From b70d44475886b6ea0ccf496266f5a7703d4fedba Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 20 May 2026 09:03:34 +0000
Subject: [PATCH 01/10] Initial plan


From 419d0d4d7dfdd75b28d51c800d7d7d5e6cb4c4eb Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 20 May 2026 09:17:11 +0000
Subject: [PATCH 02/10] feat: add comprehensive Linux kernel learning guide (10
 chapters)

Agent-Logs-Url: https://github.com/YYCB/how_to_learn_linux/sessions/6cf56f5d-3b40-45cb-b817-802ce8f88c67

Co-authored-by: YYCB <23326150+YYCB@users.noreply.github.com>
---
 .../README.md"                                | 199 ++++++++
 .../README.md"                                | 223 +++++++++
 .../README.md"                                | 284 ++++++++++++
 .../README.md"                                | 437 ++++++++++++++++++
 .../README.md"                                | 355 ++++++++++++++
 .../README.md"                                | 426 +++++++++++++++++
 .../README.md"                                | 360 +++++++++++++++
 .../README.md"                                | 419 +++++++++++++++++
 .../README.md"                                | 315 +++++++++++++
 .../README.md"                                | 361 +++++++++++++++
 README.md                                     |  86 +++-
 11 files changed, 3464 insertions(+), 1 deletion(-)
 create mode 100644 "00-\345\255\246\344\271\240\350\267\257\347\272\277/README.md"
 create mode 100644 "01-\347\273\217\345\205\270\347\211\210\346\234\254\351\200\211\346\213\251/README.md"
 create mode 100644 "02-\347\216\257\345\242\203\346\220\255\345\273\272/README.md"
 create mode 100644 "03-\350\277\233\347\250\213\347\256\241\347\220\206/README.md"
 create mode 100644 "04-\345\206\205\345\255\230\347\256\241\347\220\206/README.md"
 create mode 100644 "05-\346\226\207\344\273\266\347\263\273\347\273\237/README.md"
 create mode 100644 "06-\347\263\273\347\273\237\350\260\203\347\224\250/README.md"
 create mode 100644 "07-\350\256\276\345\244\207\351\251\261\345\212\250/README.md"
 create mode 100644 "08-\347\275\221\347\273\234\345\255\220\347\263\273\347\273\237/README.md"
 create mode 100644 "09-\345\220\214\346\255\245\346\234\272\345\210\266/README.md"

diff --git "a/00-\345\255\246\344\271\240\350\267\257\347\272\277/README.md" "b/00-\345\255\246\344\271\240\350\267\257\347\272\277/README.md"
new file mode 100644
index 0000000..5014473
--- /dev/null
+++ "b/00-\345\255\246\344\271\240\350\267\257\347\272\277/README.md"
@@ -0,0 +1,199 @@
+# 00 — Linux 内核学习路线
+
+> 本章给出一条从零基础到能够阅读并修改内核源码的完整路线，
+> 包含各阶段目标、推荐时间、学习方法与检验标准。
+
+---
+
+## 总览：四个阶段
+
+```
+阶段一            阶段二              阶段三              阶段四
+基础准备      经典版本精读        现代内核对比         实战 & 贡献
+(4~6 周)      (8~12 周)          (8~12 周)           (持续进行)
+   │               │                  │                   │
+C语言复习  ──► Linux 0.11 全读  ──► Linux 2.6 核心  ──► 写驱动/模块
+汇编入门       调试环境实验        与现代内核对比       提交 patch
+OS 理论        子系统笔记          性能分析工具        参与 LKML
+```
+
+---
+
+## 阶段一：基础准备（4~6 周）
+
+### 1.1 C 语言与内核编程习惯
+
+| 知识点 | 要求 |
+|--------|------|
+| 指针、函数指针 | **必须熟练**，内核大量使用 |
+| 位操作、宏 | 理解 `#define`、`do { } while(0)` 等惯用法 |
+| 内联汇编（GCC AT&T 语法） | 能读懂 `asm volatile` |
+| 链接脚本（vmlinux.lds） | 了解段布局即可 |
+
+```c
+/* 内核中的典型宏写法示例 */
+#define container_of(ptr, type, member) ({          \
+    const typeof(((type *)0)->member) *__mptr = (ptr); \
+    (type *)((char *)__mptr - offsetof(type, member)); })
+```
+
+### 1.2 操作系统理论
+
+重点掌握以下概念（对照书本 + 自己画图）：
+
+- **进程 vs 线程**：PCB 结构、状态机
+- **虚拟内存**：页表、TLB、缺页中断
+- **调度算法**：FIFO、Round-Robin、CFS 思想
+- **文件系统**：inode、目录树、VFS 抽象
+- **同步原语**：互斥量、信号量、条件变量
+
+### 1.3 工具链熟悉
+
+```bash
+# 必须会用的工具
+gcc / clang       # 编译
+gdb               # 调试（含 remote target）
+make / kbuild     # 内核构建系统
+objdump / nm      # 符号与反汇编
+readelf           # ELF 格式分析
+strace / ltrace   # 系统调用跟踪
+perf              # 性能分析
+```
+
+---
+
+## 阶段二：Linux 0.11 精读（8~12 周）
+
+### 为什么选 Linux 0.11？
+
+| 特性 | Linux 0.11 | 现代内核 |
+|------|-----------|---------|
+| 代码行数 | ~14,000 行 | ~3,000 万行 |
+| 架构 | 仅 x86 | 多架构 |
+| 调度器 | 简单时间片 | CFS + 实时 |
+| 内存管理 | 段页式 | 纯分页 + NUMA |
+| 文件系统 | Minix FS | ext4/btrfs/... |
+| 学习曲线 | **平缓** | 陡峭 |
+
+Linux 0.11 是 **包含现代 Linux 所有核心机制的最小完整实现**，
+非常适合初学者完整读完并在脑海中形成整体图像。
+
+### 0.11 源码目录结构
+
+```
+linux-0.11/
+├── boot/           # 启动代码（bootsect.s, setup.s, head.s）
+├── init/           # main.c — 内核入口
+├── kernel/         # 核心：进程、调度、信号、系统调用
+│   ├── sched.c     # 调度器
+│   ├── fork.c      # 进程创建
+│   ├── exit.c      # 进程退出
+│   ├── signal.c    # 信号处理
+│   └── sys.c       # 系统调用实现
+├── mm/             # 内存管理
+│   ├── memory.c    # 物理内存与页表
+│   └── page.s      # 缺页中断汇编入口
+├── fs/             # 文件系统（Minix FS）
+│   ├── inode.c
+│   ├── namei.c
+│   └── buffer.c
+├── lib/            # 内核库函数
+├── include/        # 头文件
+└── Makefile
+```
+
+### 8 周精读计划
+
+| 周次 | 内容 | 对应源文件 |
+|------|------|-----------|
+| 第 1 周 | 启动流程：BIOS → 实模式 → 保护模式 | `boot/bootsect.s`, `boot/setup.s`, `boot/head.s` |
+| 第 2 周 | 内核入口与初始化 | `init/main.c` |
+| 第 3 周 | 内存管理：段页式 + 缺页 | `mm/memory.c`, `mm/page.s` |
+| 第 4 周 | 进程创建与切换 | `kernel/fork.c`, `kernel/sched.c` |
+| 第 5 周 | 系统调用机制 | `kernel/system_call.s`, `kernel/sys.c` |
+| 第 6 周 | 信号与进程间通信 | `kernel/signal.c`, `kernel/exit.c` |
+| 第 7 周 | 文件系统：Minix FS | `fs/` 全部文件 |
+| 第 8 周 | 设备驱动：磁盘、终端 | `kernel/blk_drv/`, `kernel/chr_drv/` |
+
+---
+
+## 阶段三：Linux 2.6.0 核心精读（8~12 周）
+
+### 为什么选 2.6.0？
+
+- 2003 年发布，是**现代内核架构成型的里程碑版本**
+- 引入了 O(1) 调度器（后被 CFS 替代，但结构类似）
+- 引入了 `kobject`/`sysfs` 设备模型
+- 代码量约 600 万行，子系统边界清晰
+- 与现代内核（5.x/6.x）差异可对比学习
+
+### 重点子系统对比学习路径
+
+```
+Linux 0.11                    Linux 2.6.0
+─────────                     ───────────
+sched.c (简单轮转)    ──►   kernel/sched.c (O1 调度器)
+                                    │
+                                    ▼
+                            Linux 5.x: kernel/sched/fair.c (CFS)
+
+fork.c               ──►   kernel/fork.c (引入线程、命名空间)
+
+mm/memory.c          ──►   mm/memory.c + mm/slab.c + mm/vmalloc.c
+
+fs/ (Minix FS)       ──►   fs/ext2/ + fs/vfs/ (完整 VFS 层)
+```
+
+---
+
+## 阶段四：实战与贡献
+
+### 4.1 写内核模块
+
+```c
+/* 最简单的 Hello World 内核模块 */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+
+static int __init hello_init(void)
+{
+    printk(KERN_INFO "Hello, Kernel World!\n");
+    return 0;
+}
+
+static void __exit hello_exit(void)
+{
+    printk(KERN_INFO "Goodbye, Kernel World!\n");
+}
+
+module_init(hello_init);
+module_exit(hello_exit);
+MODULE_LICENSE("GPL");
+```
+
+### 4.2 参与社区
+
+1. 订阅 LKML（Linux Kernel Mailing List）
+2. 从修复文档错误、Checkpatch 警告开始
+3. 找到感兴趣的子系统，跟踪其 git log
+4. 提交 trivial fix，学习邮件 patch 格式
+
+---
+
+## 每日学习建议
+
+```
+┌─────────────────────────────────────────────┐
+│  每天 2~3 小时的建议安排                       │
+│                                             │
+│  0:00 ─ 0:30  复习昨天笔记，提炼关键概念       │
+│  0:30 ─ 1:30  精读源码（不超过 200 行/天）     │
+│  1:30 ─ 2:00  画出当天阅读部分的数据结构图     │
+│  2:00 ─ 2:30  在 QEMU 中验证（打印、断点）     │
+└─────────────────────────────────────────────┘
+```
+
+> **核心原则**：不要只读，要 **画图 + 实验**。
+> 每读完一个函数，先画出它操作的数据结构，
+> 再在 QEMU 中用 GDB 跟踪验证自己的理解。
diff --git "a/01-\347\273\217\345\205\270\347\211\210\346\234\254\351\200\211\346\213\251/README.md" "b/01-\347\273\217\345\205\270\347\211\210\346\234\254\351\200\211\346\213\251/README.md"
new file mode 100644
index 0000000..5d7aa06
--- /dev/null
+++ "b/01-\347\273\217\345\205\270\347\211\210\346\234\254\351\200\211\346\213\251/README.md"
@@ -0,0 +1,223 @@
+# 01 — 经典 Linux 版本选择
+
+> 本章对比分析各个"里程碑"内核版本，帮助你选择最适合学习的版本，
+> 并说明每个版本在现代内核中的"基因"贡献。
+
+---
+
+## 版本演进时间线
+
+```
+1991        1994        1996        2001        2003        2011        2015        2024
+  │           │           │           │           │           │           │           │
+  ▼           ▼           ▼           ▼           ▼           ▼           ▼           ▼
+Linux 0.01  Linux 1.0   Linux 2.0   Linux 2.4   Linux 2.6   Linux 3.0   Linux 4.0   Linux 6.x
+ ·10K行      ·17万行     ·41万行     ·300万行    ·600万行    ·1500万行   ·2000万行   ·3000万行
+ ·386 only   ·多架构     ·SMP        ·完整VM     ·现代框架   ·统一版本号  ·eBPF引入   ·Rust支持
+```
+
+---
+
+## 推荐学习版本对比
+
+### 🥇 Linux 0.11（强烈推荐入门）
+
+**发布时间**：1991 年 12 月  
+**代码行数**：约 14,000 行  
+**下载地址**：`https://github.com/karottc/linux-0.11`
+
+```
+优点：
+  ✅ 代码极少，可以完整通读
+  ✅ 包含操作系统所有核心概念：进程/内存/文件/IO
+  ✅ 注释丰富（配合《Linux内核完全注释》）
+  ✅ 可在 QEMU 上轻松运行并调试
+
+缺点：
+  ❌ x86 16位/32位混合，汇编代码较多
+  ❌ 部分设计已被现代内核废弃（如段式内存管理）
+  ❌ 不支持 SMP（多处理器）
+```
+
+**架构图（0.11 内核整体）**：
+
+```
+┌──────────────────────────────────────────────────────────────┐
+│                    Linux 0.11 内核                            │
+│                                                              │
+│  ┌────────────┐    ┌─────────────┐    ┌──────────────────┐   │
+│  │  进程管理   │    │   内存管理   │    │    文件系统       │   │
+│  │            │    │             │    │                  │   │
+│  │ task[64]   │    │ mem_map[]   │    │  Minix FS        │   │
+│  │ schedule() │    │ get_free_p()│    │  buffer_head     │   │
+│  │ fork()     │    │ copy_page() │    │  inode_table     │   │
+│  └─────┬──────┘    └──────┬──────┘    └────────┬─────────┘   │
+│        │                  │                    │              │
+│  ┌─────▼──────────────────▼────────────────────▼──────────┐  │
+│  │              中断与系统调用接口                           │  │
+│  │   system_call.s   idt[]   do_divide_error() ...         │  │
+│  └──────────────────────────────────────────────────────┘  │
+│                                                              │
+│  ┌────────────────────────────────────────────────────────┐  │
+│  │              设备驱动                                    │  │
+│  │   hd.c（硬盘）  floppy.c（软盘）  tty_io.c（终端）       │  │
+│  └────────────────────────────────────────────────────────┘  │
+└──────────────────────────────────────────────────────────────┘
+```
+
+---
+
+### 🥈 Linux 2.6.0（推荐进阶）
+
+**发布时间**：2003 年 12 月  
+**代码行数**：约 600 万行  
+**下载地址**：`https://mirrors.edge.kernel.org/pub/linux/kernel/v2.6/linux-2.6.0.tar.gz`
+
+```
+优点：
+  ✅ 现代内核框架的原型（VFS/kobject/workqueue/RCU 均在此成型）
+  ✅ 引入 O(1) 调度器 → 理解 CFS 的基础
+  ✅ 子系统边界清晰，代码质量高
+  ✅ 大量注释和文档
+
+缺点：
+  ❌ 代码量大，需要有 0.11 基础
+  ❌ 部分 API 已在后续版本修改
+```
+
+**2.6.0 源码目录结构**：
+
+```
+linux-2.6.0/
+├── arch/           # 架构相关（x86, arm, mips...）
+│   └── i386/
+│       ├── boot/   # 启动代码
+│       ├── kernel/ # 架构相关内核代码
+│       └── mm/     # 架构相关内存管理
+├── block/          # 块设备层（2.6 新增独立块层）
+├── drivers/        # 设备驱动（按类型分目录）
+├── fs/             # 文件系统
+│   ├── ext2/       # ext2 文件系统
+│   ├── proc/       # /proc 文件系统
+│   └── ...
+├── include/        # 头文件
+│   └── linux/      # 内核核心头文件
+├── init/           # 内核初始化
+├── ipc/            # 进程间通信（System V IPC）
+├── kernel/         # 核心内核代码
+│   ├── fork.c
+│   ├── sched.c     # O(1) 调度器
+│   ├── signal.c
+│   └── ...
+├── lib/            # 通用库（红黑树、链表等）
+├── mm/             # 内存管理
+│   ├── memory.c
+│   ├── slab.c      # Slab 分配器
+│   ├── vmalloc.c
+│   └── ...
+├── net/            # 网络子系统
+│   └── ipv4/       # TCP/IP 实现
+├── scripts/        # 构建脚本
+└── Makefile
+```
+
+---
+
+### 🔬 Linux 4.x（对比参考）
+
+**发布时间**：2015 年起  
+**推荐版本**：Linux 4.4（LTS，长期支持）  
+
+引入的关键新特性（学完 2.6 后对比学习）：
+
+| 特性 | 引入版本 | 说明 |
+|------|---------|------|
+| CFS 调度器 | 2.6.23 | 完全公平调度，替代 O(1) |
+| cgroups | 2.6.24 | 资源隔离，容器基础 |
+| 命名空间 | 2.6.24+ | 容器隔离基础 |
+| eBPF | 3.18+ | 可编程内核观测 |
+| io_uring | 5.1 | 高性能异步 IO |
+
+---
+
+## 学习版本决策树
+
+```
+你是否有 OS 理论基础？
+├── 否 → 先读《操作系统：精髓与设计原理》再回来
+└── 是 ↓
+
+你的目标是什么？
+├── 理解 OS 基本原理    → 读 Linux 0.11（完整读完）
+├── 实际内核开发         → 读 Linux 0.11 + Linux 2.6.0
+├── 容器/云原生内核      → 读 Linux 2.6.0 + Linux 4.x
+└── 驱动开发            → 读 Linux 2.6.0（重点 drivers/）
+```
+
+---
+
+## 两个版本核心数据结构对比
+
+### 进程描述符（task_struct）
+
+| 字段含义 | Linux 0.11 | Linux 2.6.0 |
+|---------|-----------|------------|
+| 进程状态 | `state` | `state`（值更多）|
+| 进程 ID | `pid` | `pid` + `tgid`（支持线程）|
+| 内存信息 | `ldt[2]`（段描述符）| `mm_struct *mm`（纯页式）|
+| 调度信息 | `counter`（时间片）| `prio`/`static_prio`/`se`（调度实体）|
+| 文件信息 | `filp[20]`（固定数组）| `files_struct *files`（动态）|
+| 父进程 | `father`（int）| `parent`（指针）|
+
+```c
+/* Linux 0.11: kernel/sched.h */
+struct task_struct {
+    long state;          /* -1不可运行, 0可运行, >0停止 */
+    long counter;        /* 运行时间片 */
+    long priority;       /* 静态优先级 */
+    long signal;         /* 信号位图 */
+    ...
+    long pid, father, pgrp, session, leader;
+    ...
+    struct m_inode *pwd, *root, *executable;
+    struct file *filp[NR_OPEN];
+    struct desc_struct ldt[3];  /* 局部描述符表 */
+    struct tss_struct tss;      /* 任务状态段 */
+};
+
+/* Linux 2.6.0: include/linux/sched.h（简化）*/
+struct task_struct {
+    volatile long state;
+    struct thread_info *thread_info;
+    unsigned long flags;
+    int prio, static_prio;
+    struct list_head run_list;
+    ...
+    pid_t pid, tgid;
+    struct task_struct *parent;
+    struct mm_struct *mm;          /* 内存描述符 */
+    struct files_struct *files;    /* 打开文件表 */
+    struct signal_struct *signal;  /* 信号 */
+    struct thread_struct thread;   /* CPU 状态 */
+};
+```
+
+---
+
+## 获取源码
+
+```bash
+# 方法一：从 kernel.org 下载
+wget https://mirrors.edge.kernel.org/pub/linux/kernel/Historic/linux-0.11.tar.gz
+wget https://mirrors.edge.kernel.org/pub/linux/kernel/v2.6/linux-2.6.0.tar.gz
+
+# 方法二：使用 git（0.11 的 git 镜像）
+git clone https://github.com/karottc/linux-0.11
+
+# 在线阅读（推荐，支持跳转）
+# https://elixir.bootlin.com/linux/0.11/source
+# https://elixir.bootlin.com/linux/2.6.0/source
+```
+
+> **建议**：学习时两个版本都下载，在阅读 0.11 的同时
+> 偶尔对照 2.6.0 的相同机制，理解演进方向。
diff --git "a/02-\347\216\257\345\242\203\346\220\255\345\273\272/README.md" "b/02-\347\216\257\345\242\203\346\220\255\345\273\272/README.md"
new file mode 100644
index 0000000..e0ef1a0
--- /dev/null
+++ "b/02-\347\216\257\345\242\203\346\220\255\345\273\272/README.md"
@@ -0,0 +1,284 @@
+# 02 — 环境搭建：QEMU + GDB 调试内核
+
+> 本章介绍如何搭建一个可以**单步调试**内核的实验环境，
+> 让你可以在 GDB 中打断点、查看内核变量、跟踪函数调用。
+
+---
+
+## 总体架构
+
+```
+┌─────────────────────────────────────────────┐
+│           你的开发机（Host）                   │
+│                                             │
+│  ┌─────────────────┐    ┌────────────────┐  │
+│  │   QEMU 虚拟机    │◄──►│  GDB 调试器    │  │
+│  │                 │    │                │  │
+│  │  Linux 内核运行  │    │ 设置断点       │  │
+│  │  （被调试目标）  │    │ 单步执行       │  │
+│  │                 │    │ 查看变量/寄存器 │  │
+│  └─────────────────┘    └────────────────┘  │
+│         ▲                      ▲            │
+│         │   TCP :1234          │            │
+│         └──────────────────────┘            │
+│              GDB Remote Protocol            │
+└─────────────────────────────────────────────┘
+```
+
+---
+
+## 一、安装依赖
+
+```bash
+# Ubuntu / Debian
+sudo apt update
+sudo apt install -y \
+    qemu-system-x86 \
+    gcc \
+    make \
+    gdb \
+    git \
+    build-essential \
+    libncurses-dev \
+    flex \
+    bison \
+    libssl-dev \
+    libelf-dev
+
+# macOS（使用 Homebrew）
+brew install qemu gdb
+```
+
+---
+
+## 二、调试 Linux 0.11
+
+### 2.1 获取带调试符号的 0.11 镜像
+
+```bash
+# 克隆含 Makefile 的仓库
+git clone https://github.com/karottc/linux-0.11
+cd linux-0.11
+
+# 在 Makefile 中添加 -g 调试标志（已在新版 Makefile 中支持）
+# 直接编译
+make
+# 生成文件：Image（内核镜像）、tools/system（含符号的 ELF）
+```
+
+### 2.2 启动 QEMU 并等待 GDB
+
+```bash
+# 终端 1：启动 QEMU（-s 开启 GDB 服务端 :1234，-S 暂停等待 GDB）
+qemu-system-i386 \
+    -m 16M \
+    -boot a \
+    -fda Image \
+    -hda hdc-0.11.img \
+    -s -S
+
+# 终端 2：启动 GDB 并连接
+gdb tools/system
+
+# 在 GDB 中执行：
+(gdb) target remote :1234
+(gdb) break main          # 在 init/main.c:main 设断点
+(gdb) continue
+```
+
+### 2.3 常用 GDB 命令速查
+
+```
+命令                        说明
+──────────────────────────────────────────────
+b <函数名>                  在函数入口设断点
+b <文件>:<行号>             在指定行设断点
+info breakpoints            列出所有断点
+delete <编号>               删除断点
+
+n                           单步执行（不进入函数）
+s                           单步执行（进入函数）
+c                           继续运行
+finish                      运行到当前函数返回
+
+p <变量>                    打印变量值
+p *<指针>                   打印指针指向的结构
+p/x <变量>                  以十六进制打印
+info registers              显示所有寄存器
+x/10i $eip                  反汇编当前指令附近
+
+bt                          打印调用栈
+frame <编号>                切换栈帧
+list                        显示当前源码
+```
+
+---
+
+## 三、调试 Linux 2.6.0
+
+### 3.1 编译内核
+
+```bash
+wget https://mirrors.edge.kernel.org/pub/linux/kernel/v2.6/linux-2.6.0.tar.gz
+tar xf linux-2.6.0.tar.gz
+cd linux-2.6.0
+
+# 生成最小配置（适合 QEMU x86）
+make defconfig
+
+# 开启调试选项（重要！）
+# 编辑 .config，或使用 menuconfig：
+make menuconfig
+# 进入：Kernel hacking
+#   [*] Compile the kernel with debug info
+#   [*] Compile the kernel with frame pointers
+
+# 编译（-j 并行）
+make -j$(nproc)
+
+# 生成文件：
+#   arch/i386/boot/bzImage  （可启动内核）
+#   vmlinux                  （含符号，用于 GDB）
+```
+
+### 3.2 制作最小根文件系统（BusyBox）
+
+```bash
+# 编译 BusyBox（静态链接）
+wget https://busybox.net/downloads/busybox-1.36.0.tar.bz2
+tar xf busybox-1.36.0.tar.bz2
+cd busybox-1.36.0
+make defconfig
+# 设置静态链接：CONFIG_STATIC=y
+sed -i 's/# CONFIG_STATIC is not set/CONFIG_STATIC=y/' .config
+make -j$(nproc)
+make install
+# 安装到 _install/
+
+# 创建 initramfs
+cd _install
+mkdir -p dev proc sys
+mknod dev/console c 5 1
+mknod dev/null c 1 3
+# 创建 init 脚本
+cat > init << 'EOF'
+#!/bin/sh
+mount -t proc none /proc
+mount -t sysfs none /sys
+echo "Welcome to minimal Linux!"
+exec /bin/sh
+EOF
+chmod +x init
+
+# 打包为 initramfs
+find . | cpio -o --format=newc | gzip > ../initramfs.img
+```
+
+### 3.3 启动并调试
+
+```bash
+# 终端 1：启动 QEMU
+qemu-system-i386 \
+    -kernel arch/i386/boot/bzImage \
+    -initrd initramfs.img \
+    -append "console=ttyS0 nokaslr" \
+    -nographic \
+    -s -S
+
+# 终端 2：GDB
+gdb vmlinux
+(gdb) target remote :1234
+(gdb) break start_kernel      # 内核 C 代码入口
+(gdb) continue
+```
+
+---
+
+## 四、推荐的调试工作流
+
+### 4.1 跟踪进程创建（fork）
+
+```gdb
+(gdb) break sys_fork
+(gdb) commands
+> # 每次进入 sys_fork 时自动打印
+> printf "fork() called, current pid=%d\n", current->pid
+> bt
+> continue
+> end
+(gdb) continue
+```
+
+### 4.2 查看页表结构
+
+```gdb
+# 在 Linux 0.11 中，查看进程内存映射
+(gdb) p current->ldt
+(gdb) p *current->mm   # 在 2.6.0 中
+
+# 打印页目录项（Linux 2.6.0）
+(gdb) p/x ((unsigned long *)0xc0101000)[0]
+```
+
+### 4.3 跟踪系统调用
+
+```gdb
+# 在系统调用分发处设断点（0.11）
+(gdb) break system_call
+# 在 2.6.0 中
+(gdb) break do_syscall_trace
+```
+
+---
+
+## 五、VS Code + GDB 图形化调试
+
+如果你更喜欢 GUI，可以配置 VS Code：
+
+```json
+// .vscode/launch.json
+{
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Debug Linux Kernel",
+            "type": "cppdbg",
+            "request": "launch",
+            "program": "${workspaceFolder}/vmlinux",
+            "miDebuggerPath": "/usr/bin/gdb",
+            "miDebuggerServerAddress": "localhost:1234",
+            "stopAtEntry": true,
+            "cwd": "${workspaceFolder}",
+            "externalConsole": false,
+            "MIMode": "gdb",
+            "setupCommands": [
+                {
+                    "description": "设置反汇编风格",
+                    "text": "set disassembly-flavor intel"
+                },
+                {
+                    "description": "关闭确认",
+                    "text": "set confirm off"
+                }
+            ]
+        }
+    ]
+}
+```
+
+安装 VS Code 扩展：`C/C++`（Microsoft）
+
+---
+
+## 六、常见问题
+
+| 问题 | 原因 | 解决方法 |
+|------|------|---------|
+| `SIGTRAP`后无法继续 | 内核断点处理 | 用 `signal 0` 忽略信号 |
+| 符号找不到 | 未加 `-g` 编译 | 重新编译并确认 `CFLAGS += -g` |
+| `bzImage` 无法在 QEMU 启动 | 内核配置问题 | 用 `make defconfig` 重新生成配置 |
+| GDB 连接被拒绝 | QEMU 未启动 `-s` | 检查 QEMU 启动参数 |
+| 断点不命中 | 地址重定位 | 加 `nokaslr` 内核启动参数 |
+
+> **提示**：调试内核时务必加 `nokaslr` 参数（关闭地址随机化），
+> 否则每次启动内核地址不同，GDB 符号表会对不上。
diff --git "a/03-\350\277\233\347\250\213\347\256\241\347\220\206/README.md" "b/03-\350\277\233\347\250\213\347\256\241\347\220\206/README.md"
new file mode 100644
index 0000000..0c18b7d
--- /dev/null
+++ "b/03-\350\277\233\347\250\213\347\256\241\347\220\206/README.md"
@@ -0,0 +1,437 @@
+# 03 — 进程管理
+
+> Linux 的进程管理是内核最核心的子系统之一。
+> 本章从**数据结构 → 生命周期 → 调度算法 → 上下文切换**四个维度，
+> 对照 Linux 0.11 与 Linux 2.6.0 源码逐步拆解。
+
+---
+
+## 1. 进程的本质：task_struct
+
+一个进程在内核中就是一个 `task_struct` 结构体。
+
+### Linux 0.11 的 task_struct
+
+```
+┌────────────────────────────────────────────┐
+│               task_struct (0.11)            │
+├────────────────┬───────────────────────────┤
+│ state          │ 进程状态（运行/就绪/等待）   │
+│ counter        │ 剩余时间片（动态优先级）     │
+│ priority       │ 静态优先级（调度基值）       │
+│ signal         │ 待处理信号位图              │
+│ pid / father   │ 进程 ID / 父进程 ID         │
+│ ldt[3]         │ 局部段描述符（代码段/数据段）│
+│ tss            │ 任务状态段（保存 CPU 状态）  │
+│ filp[NR_OPEN]  │ 打开文件指针数组            │
+│ pwd / root     │ 当前目录 / 根目录 inode      │
+│ *mm_seg        │ 内存段信息                  │
+└────────────────┴───────────────────────────┘
+大小：约 1KB，直接存储 TSS（很快但不灵活）
+```
+
+### Linux 2.6.0 的 task_struct（精简）
+
+```
+┌─────────────────────────────────────────────────────┐
+│                  task_struct (2.6.0)                 │
+├──────────────────┬──────────────────────────────────┤
+│ state            │ 进程状态                           │
+│ pid / tgid       │ 进程ID / 线程组ID（支持线程）       │
+│ *mm              │ → mm_struct（虚拟内存描述符）       │
+│ *active_mm       │ → 内核线程借用的 mm                │
+│ *files           │ → files_struct（打开文件表）        │
+│ *fs              │ → fs_struct（文件系统信息）         │
+│ *signal          │ → signal_struct（信号）            │
+│ prio/static_prio │ 动态/静态优先级                    │
+│ run_list         │ 调度队列链表节点                   │
+│ *parent          │ → 父进程 task_struct               │
+│ children         │ 子进程链表头                       │
+│ thread           │ CPU 架构相关状态（寄存器等）        │
+└──────────────────┴──────────────────────────────────┘
+大小：约 1.7KB，用指针间接引用（模块化）
+```
+
+**演进对比**：
+
+```
+0.11: task_struct 直接包含 TSS（任务状态段）
+       → CPU 用硬件 TSS 切换，简单但 overhead 大
+
+2.6:  task_struct 包含 thread_struct
+       → 软件保存/恢复寄存器（switch_to 宏），更灵活
+```
+
+---
+
+## 2. 进程状态机
+
+### Linux 0.11 状态
+
+```
+         fork()
+           │
+           ▼
+        TASK_RUNNING ◄────────────────────────────┐
+        (就绪/运行)                               │
+           │                                     │
+    等待资源│                              schedule()
+           ▼                                     │
+     TASK_UNINTERRUPTIBLE ──► 资源到来 ──► TASK_RUNNING
+     TASK_INTERRUPTIBLE   ──► 信号/资源 ──► TASK_RUNNING
+           │
+           │ exit()
+           ▼
+        TASK_ZOMBIE  ──► 父进程 wait() ──► 进程表项清除
+```
+
+### Linux 2.6.0 新增状态
+
+```
+TASK_RUNNING          = 0   就绪或正在 CPU 上运行
+TASK_INTERRUPTIBLE    = 1   可中断睡眠
+TASK_UNINTERRUPTIBLE  = 2   不可中断睡眠（等待 IO）
+TASK_STOPPED          = 4   被信号暂停（如 SIGSTOP）
+TASK_ZOMBIE           = 8   已退出，等待父进程回收
+TASK_DEAD             = 16  完全死亡（2.6 新增）
+```
+
+---
+
+## 3. 进程创建：fork() 源码解析
+
+### 3.1 Linux 0.11 的 fork
+
+```
+用户程序调用 fork()
+      │
+      ▼
+int 0x80（系统调用中断）
+      │
+      ▼
+kernel/system_call.s: system_call
+  → call sys_call_table(,%eax,4)   ; eax = __NR_fork = 2
+      │
+      ▼
+kernel/fork.c: sys_fork()
+  → find_empty_process()           ; 找空闲 task 槽位
+  → copy_process(nr, ...)          ; 复制父进程
+      │
+      ▼
+copy_process() 做了什么？
+  1. 申请新 task_struct 内存页
+  2. *p = *current                 ; 整体拷贝父进程
+  3. 修改 pid、state、counter 等
+  4. 复制文件描述符引用计数
+  5. 设置 tss.eax = 0             ; 子进程 fork() 返回 0
+  6. copy_mem()                    ; 设置新的 LDT（写时复制用段限长）
+  7. 将新进程加入 task[] 数组
+  8. state = TASK_RUNNING         ; 加入就绪队列
+```
+
+**关键代码（kernel/fork.c）**：
+
+```c
+/* Linux 0.11: kernel/fork.c */
+int copy_process(int nr, long ebp, long edi, long esi, long gs,
+                 long none, long ebx, long ecx, long edx,
+                 long fs, long es, long ds,
+                 long eip, long cs, long eflags, long esp, long ss)
+{
+    struct task_struct *p;
+    
+    /* 1. 分配新 task_struct 所在的内存页 */
+    p = (struct task_struct *) get_free_page();
+    if (!p)
+        return -EAGAIN;
+    
+    task[nr] = p;
+    
+    /* 2. 完整拷贝父进程（写时复制的基础） */
+    *p = *current;
+    
+    /* 3. 修改子进程特有字段 */
+    p->state = TASK_UNINTERRUPTIBLE;  /* 先不就绪 */
+    p->pid = last_pid;
+    p->father = current->pid;
+    p->counter = p->priority;
+    
+    /* 4. 设置子进程的 TSS（CPU 状态） */
+    p->tss.eax = 0;          /* 子进程 fork() 返回 0 */
+    p->tss.esp = esp;        /* 继承父进程栈指针 */
+    p->tss.eip = eip;        /* 继承父进程指令指针 */
+    
+    /* 5. 复制内存映射（设置 LDT，写时复制） */
+    if (copy_mem(nr, p)) {
+        task[nr] = NULL;
+        free_page((long)p);
+        return -EAGAIN;
+    }
+    
+    /* 6. 复制文件引用 */
+    for (int i = 0; i < NR_OPEN; i++)
+        if (f = p->filp[i])
+            f->f_count++;
+    
+    /* 7. 就绪 */
+    p->state = TASK_RUNNING;
+    return last_pid;
+}
+```
+
+### 3.2 Linux 2.6.0 的 fork/clone
+
+```
+用户调用 fork() / vfork() / clone()
+      │
+      ▼  (glibc → int 0x80 / sysenter)
+kernel/fork.c: do_fork(clone_flags, ...)
+      │
+      ├─ copy_process()
+      │     ├─ dup_task_struct()       ; 分配新 task_struct + 内核栈
+      │     ├─ copy_flags()
+      │     ├─ copy_mm()               ; 复制/共享地址空间
+      │     ├─ copy_files()            ; 复制/共享文件表
+      │     ├─ copy_sighand()          ; 复制/共享信号处理
+      │     ├─ copy_thread()           ; 设置 CPU 寄存器（子进程返回 0）
+      │     └─ pid = alloc_pid()       ; 分配新 PID
+      │
+      └─ wake_up_new_task()            ; 将子进程加入运行队列
+```
+
+**clone_flags 控制共享粒度**（线程 vs 进程的本质区别）：
+
+```
+clone_flags 标志          共享的资源
+─────────────────────────────────────────
+CLONE_VM                  虚拟地址空间（线程关键！）
+CLONE_FS                  文件系统信息（根目录等）
+CLONE_FILES               文件描述符表
+CLONE_SIGHAND             信号处理函数
+CLONE_THREAD              同一线程组（共享 tgid）
+
+fork()  = clone(SIGCHLD)
+vfork() = clone(CLONE_VM | CLONE_VFORK | SIGCHLD)
+pthread = clone(CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | ...)
+```
+
+---
+
+## 4. 调度器：从简单轮转到 O(1)
+
+### 4.1 Linux 0.11 调度器
+
+```c
+/* kernel/sched.c — schedule() */
+void schedule(void)
+{
+    int i, next, c;
+    struct task_struct *p;
+    
+    /* 处理定时器与睡眠 */
+    ...
+    
+    /* 找 counter 最大的就绪进程 */
+    while (1) {
+        c = -1;
+        next = 0;
+        i = NR_TASKS;
+        p = &task[NR_TASKS];
+        while (--i) {
+            if (!*--p)
+                continue;
+            if ((*p)->state == TASK_RUNNING && (*p)->counter > c)
+                c = (*p)->counter, next = i;
+        }
+        
+        if (c)      /* 找到时间片非零的进程 */
+            break;
+        
+        /* 所有进程时间片耗尽，重新分配 */
+        for (p = &LAST_TASK; p > &FIRST_TASK; --p)
+            if (*p)
+                (*p)->counter = ((*p)->counter >> 1) + (*p)->priority;
+    }
+    
+    switch_to(next);   /* 切换到 next 号进程 */
+}
+```
+
+**时间复杂度**：O(n)，n 为进程数  
+**问题**：进程多时性能下降；无法保证实时性
+
+### 4.2 Linux 2.6.0 O(1) 调度器
+
+```
+核心思想：用两个优先级位图（runqueue）替代遍历
+
+每个 CPU 维护一个 runqueue：
+┌─────────────────────────────────────────────┐
+│  runqueue (per-CPU)                          │
+│                                             │
+│  active   ──► prio_array（140 个优先级队列）  │
+│  expired  ──► prio_array（时间片耗尽的进程）  │
+│                                             │
+│  bitmap[5]：140位，标记哪些优先级有进程      │
+└─────────────────────────────────────────────┘
+
+调度时：
+  1. sched_find_first_bit(active->bitmap) → 找最高优先级
+  2. list_entry(queue->next, ...)          → 取队列头进程
+  时间复杂度：O(1)（位操作）
+
+时间片耗尽时：移入 expired
+所有 active 为空时：swap(active, expired) → O(1) 更新
+```
+
+### 4.3 调度时机
+
+```
+触发 schedule() 的时机：
+
+1. 主动让出：
+   sleep_on() / interruptible_sleep_on()
+   wait_event() / msleep()
+
+2. 时钟中断（timer tick）：
+   do_timer() → current->counter--
+   if counter == 0: need_resched = 1
+   中断返回时检查 need_resched → schedule()
+
+3. 系统调用返回：
+   检查 TIF_NEED_RESCHED 标志
+
+4. 中断/异常返回用户态：
+   检查 TIF_NEED_RESCHED 标志
+```
+
+---
+
+## 5. 上下文切换：switch_to 源码解析
+
+### 5.1 Linux 0.11 的 switch_to（汇编宏）
+
+```c
+/* include/linux/sched.h */
+/* 利用 CPU 硬件 TSS 切换 */
+#define switch_to(n) {                      \
+    struct {long a,b;} __tmp;               \
+    __asm__("cmpl %%ecx,current\n\t"        \
+        "je 1f\n\t"                         \
+        "movw %%dx,%1\n\t"                  \
+        "xchgl %%ecx,current\n\t"           \
+        "ljmp *%0\n\t"   /* 远跳转触发 TSS 切换 */ \
+        "cmpl %%ecx,last_task_used_math\n\t"\
+        "jne 1f\n\t"                        \
+        "clts\n"                            \
+        "1:"                                \
+        ::"m" (*&__tmp.a), "m" (*&__tmp.b), \
+        "d" (_TSS(n)), "c" ((long) task[n]));\
+}
+```
+
+**流程**：
+```
+1. ljmp 到新进程的 TSS 选择子
+2. CPU 硬件自动保存当前 TSS（所有寄存器）
+3. CPU 硬件自动加载新进程 TSS（恢复所有寄存器）
+4. 跳到新进程上次被打断的 EIP 处继续执行
+```
+
+### 5.2 Linux 2.6.0 的 switch_to（软件保存）
+
+```c
+/* arch/i386/kernel/process.c */
+void __switch_to(struct task_struct *prev, struct task_struct *next)
+{
+    struct thread_struct *prev_t = &prev->thread;
+    struct thread_struct *next_t = &next->thread;
+    
+    /* 1. 保存/恢复 FPU 状态 */
+    ...
+    
+    /* 2. 更新 TSS 中的 esp0（内核栈指针）*/
+    load_esp0(tss, next_t->esp0);
+    
+    /* 3. 加载新进程的 LDT */
+    load_LDT_nolock(&next->mm->context);
+    
+    /* 4. 保存/恢复调试寄存器 */
+    ...
+    
+    /* 5. 切换 TLS（线程本地存储）段描述符 */
+    ...
+}
+
+/* 关键宏：保存/恢复通用寄存器（include/asm-i386/system.h） */
+#define switch_to(prev, next, last)                             \
+    asm volatile(                                               \
+        "pushfl\n\t"          /* 保存 eflags */                 \
+        "pushl %%ebp\n\t"     /* 保存 ebp */                    \
+        "movl %%esp,%0\n\t"   /* 保存 esp → prev->thread.esp */ \
+        "movl %3,%%esp\n\t"   /* 恢复 esp ← next->thread.esp */ \
+        "movl $1f,%1\n\t"     /* 保存返回地址 → prev->thread.eip */\
+        "pushl %4\n\t"        /* 压入新进程返回地址 */           \
+        "jmp __switch_to\n"   /* 跳转（完成 FPU/TLS 等切换）*/  \
+        "1:\t"                /* 新进程在这里开始执行 */         \
+        "popl %%ebp\n\t"      /* 恢复 ebp */                    \
+        "popfl\n"             /* 恢复 eflags */                  \
+        ...)
+```
+
+**软件切换 vs 硬件 TSS 切换**：
+
+```
+               Linux 0.11           Linux 2.6.0
+切换方式       硬件 TSS（ljmp）      软件（push/pop）
+保存内容       全部寄存器（CPU）     仅 esp/eip/ebp/eflags
+速度           较慢（TSS 加载慢）   较快
+灵活性         低（依赖 CPU 架构）  高（可移植）
+```
+
+---
+
+## 6. 内核栈布局
+
+理解上下文切换必须理解内核栈：
+
+```
+进程内核栈（一页，4KB）：
+
+高地址 ┌──────────────────┐ ← esp0（ring 0 栈顶，存于 TSS）
+       │  pt_regs         │  ← 进程从用户态陷入时保存的寄存器
+       │  (中断帧)        │
+       ├──────────────────┤
+       │                  │
+       │  内核函数调用栈   │
+       │  (向下增长)      │
+       │                  │
+       │                  │
+低地址 └──────────────────┘ ← thread_info（2.6.0: 存于栈底）
+                              task_struct（0.11: 存于栈所在页）
+```
+
+> **关键洞察**：Linux 2.6.0 在内核栈**底部**存放 `thread_info`，
+> 通过 `current_thread_info()` = `esp & ~(THREAD_SIZE-1)` 可
+> 在 O(1) 时间内找到当前进程的 `thread_info`，进而找到 `task_struct`。
+
+---
+
+## 7. 实验：在 GDB 中观察进程切换
+
+```bash
+# 在 schedule() 中设断点，观察 current 的变化
+(gdb) break schedule
+(gdb) commands
+> printf "schedule: current pid=%d, name=%s\n", current->pid, current->comm
+> p current->state
+> continue
+> end
+
+# 在 switch_to 后观察寄存器
+(gdb) break __switch_to
+(gdb) commands
+> printf "switching from pid=%d to pid=%d\n", prev->pid, next->pid
+> continue
+> end
+```
diff --git "a/04-\345\206\205\345\255\230\347\256\241\347\220\206/README.md" "b/04-\345\206\205\345\255\230\347\256\241\347\220\206/README.md"
new file mode 100644
index 0000000..6117468
--- /dev/null
+++ "b/04-\345\206\205\345\255\230\347\256\241\347\220\206/README.md"
@@ -0,0 +1,355 @@
+# 04 — 内存管理
+
+> 内存管理是内核中最复杂的子系统之一。
+> 本章从**物理内存 → 虚拟地址空间 → 页表 → 缺页处理 → 内存分配器**
+> 五个层次，对照 Linux 0.11 与 2.6.0 源码拆解。
+
+---
+
+## 1. 内存管理全局视图
+
+```
+┌──────────────────────────────────────────────────────────────┐
+│                    虚拟地址空间（每进程 4GB）                  │
+│                                                              │
+│  0xFFFFFFFF ┌───────────────────────────────────────────┐   │
+│             │  内核空间（1GB, 所有进程共享）              │   │
+│             │  内核代码/数据/堆/vmalloc 区域             │   │
+│  0xC0000000 ├───────────────────────────────────────────┤   │
+│             │  用户空间（3GB, 每进程独立）                │   │
+│             │  ┌─────────────────────────────────────┐  │   │
+│             │  │ 栈（向下增长）  [0xBFFF_FFFF 附近]   │  │   │
+│             │  │      ↓                              │  │   │
+│             │  │  ...（空洞）                        │  │   │
+│             │  │      ↑                              │  │   │
+│             │  │ 堆（向上增长）                       │  │   │
+│             │  │ BSS 段（未初始化全局变量）           │  │   │
+│             │  │ 数据段（已初始化全局变量）           │  │   │
+│             │  │ 代码段（只读）     [0x08048000 起]  │  │   │
+│  0x00000000 └─────────────────────────────────────────┘  │   │
+└──────────────────────────────────────────────────────────────┘
+```
+
+---
+
+## 2. Linux 0.11：段页式内存管理
+
+### 2.1 为什么用"段页式"？
+
+Linux 0.11 运行在 x86 保护模式，**必须使用段机制**（CPU 强制），
+但同时用页机制实现写时复制（Copy-on-Write）。
+
+```
+                  段机制                    页机制
+逻辑地址 ────────────────► 线性地址 ────────────────► 物理地址
+(段选择子 + 偏移)    GDT/LDT             页目录 + 页表
+```
+
+### 2.2 进程内存布局（0.11）
+
+```
+每个进程有自己的 LDT（局部描述符表），包含：
+  LDT[0]: 空
+  LDT[1]: 代码段  BASE = 进程号 × 64MB, LIMIT = 640KB
+  LDT[2]: 数据段  BASE = 进程号 × 64MB, LIMIT = 640KB（含堆栈）
+
+因此：
+  进程 0 的线性地址范围：0x00000000 ~ 0x03FFFFFF（0~64MB）
+  进程 1 的线性地址范围：0x04000000 ~ 0x07FFFFFF（64~128MB）
+  进程 2 的线性地址范围：0x08000000 ~ 0x0BFFFFFF（128~192MB）
+  ...
+  最多 64 个进程 × 64MB = 4GB（恰好用满 32 位线性地址）
+```
+
+### 2.3 物理内存管理（mem_map）
+
+```c
+/* mm/memory.c */
+
+/* 全局内存位图：每个字节对应一个 4KB 物理页 */
+/* 值 = 0: 空闲；值 > 0: 引用计数 */
+unsigned char mem_map[PAGING_PAGES] = {0,};
+
+/* 分配一个物理页 */
+unsigned long get_free_page(void)
+{
+    register unsigned long __res asm("ax");
+    __asm__(
+        "std ; repne ; scasb\n\t"   /* 从末尾往前搜索 mem_map 中值为0的字节 */
+        "jne 1f\n\t"
+        "movb $1,1(%%edi)\n\t"      /* 标记为已用（引用计数=1）*/
+        "sall $12,%%ecx\n\t"        /* ecx = 页号 × 4096 = 物理地址 */
+        "addl %2,%%ecx\n\t"
+        "movl %%ecx,%%edx\n\t"
+        "movl $1024,%%ecx\n\t"
+        "leal 4092(%%edx),%%edi\n\t"
+        "rep ; stosl\n\t"           /* 将页面清零 */
+        "movl %%edx,%%eax\n"
+        "1:"
+        :"=a" (__res)
+        :"0" (0),"i" (LOW_MEM),"c" (PAGING_PAGES),
+         "D" (mem_map+PAGING_PAGES-1)
+        );
+    return __res;
+}
+```
+
+### 2.4 写时复制（Copy-on-Write）
+
+```
+fork() 时：
+  父子进程共享同一物理页
+  但将页表项设为"只读"
+  同时 mem_map[页号]++ （引用计数+1）
+
+任一进程写该页时：
+  CPU 触发写保护异常（Page Fault, error_code 有写保护位）
+
+do_wp_page() 处理：
+  1. 如果 mem_map[页号] == 1（只有自己引用）：
+     直接将页表项改回可写，不复制
+  2. 如果 mem_map[页号] > 1（有多个引用）：
+     get_free_page() 分配新物理页
+     复制原页内容到新页
+     将自己的页表项指向新页
+     mem_map[原页号]--
+     新页设为可写
+```
+
+**对应源码**（`mm/memory.c`）：
+
+```c
+void do_wp_page(unsigned long error_code, unsigned long address)
+{
+    un_wp_page((unsigned long *)
+        (((address>>10) & 0xffc) + (0xfffff000 &
+        *((unsigned long *) ((address>>20) &0xffc)))));
+}
+
+void un_wp_page(unsigned long *table_entry)
+{
+    unsigned long old_page, new_page;
+    old_page = 0xfffff000 & *table_entry;
+    
+    /* 只有自己引用该页，直接改可写 */
+    if (old_page >= LOW_MEM && mem_map[MAP_NR(old_page)] == 1) {
+        *table_entry |= 2;          /* 设置 R/W 位 */
+        invalidate();               /* 刷新 TLB */
+        return;
+    }
+    
+    /* 多个引用，需要复制 */
+    if (!(new_page = get_free_page()))
+        oom();
+    if (old_page >= LOW_MEM)
+        mem_map[MAP_NR(old_page)]--;  /* 减少引用计数 */
+    copy_page(old_page, new_page);    /* 复制内容 */
+    *table_entry = new_page | 7;      /* 新页：P/R/W/U */
+    invalidate();
+}
+```
+
+---
+
+## 3. Linux 2.6.0：纯页式管理 + 三级页表
+
+### 3.1 x86 两级页表结构
+
+```
+32位地址: [31..22][21..12][11..0]
+           页目录索引  页表索引   页内偏移
+           (10 bits)  (10 bits)  (12 bits)
+
+虚拟地址 → 物理地址转换过程：
+                                        物理内存
+CR3 ──► 页目录（4KB）                      │
+         [PGD 索引] ──► 页表（4KB）         │
+                         [PTE 索引] ──►  物理页（4KB）+ 偏移
+```
+
+### 3.2 mm_struct：进程虚拟内存描述符
+
+```c
+/* include/linux/mm.h */
+struct mm_struct {
+    struct vm_area_struct *mmap;    /* VMA 链表（所有虚拟内存区域）*/
+    struct rb_root mm_rb;           /* VMA 红黑树（快速查找）*/
+    
+    pgd_t *pgd;                     /* 页目录（物理地址）*/
+    
+    unsigned long start_code, end_code;   /* 代码段范围 */
+    unsigned long start_data, end_data;   /* 数据段范围 */
+    unsigned long start_brk, brk;         /* 堆范围 */
+    unsigned long start_stack;            /* 栈起始地址 */
+    
+    unsigned long mmap_base;              /* mmap 区域起始 */
+    unsigned long total_vm;               /* 总虚拟页数 */
+    unsigned long rss;                    /* 驻留物理页数 */
+    ...
+};
+```
+
+### 3.3 vm_area_struct：虚拟内存区域（VMA）
+
+```
+mm->mmap 链表：
+
+┌─────────────┐    ┌─────────────┐    ┌─────────────┐
+│ vma: 代码段  │───►│ vma: 数据段  │───►│ vma: 堆     │ ···
+│ vm_start    │    │ vm_start    │    │ vm_start    │
+│ vm_end      │    │ vm_end      │    │ vm_end      │
+│ vm_flags    │    │ vm_flags    │    │ vm_flags    │
+│ (VM_READ    │    │ (VM_READ|   │    │ (VM_READ|   │
+│  VM_EXEC)   │    │  VM_WRITE)  │    │  VM_WRITE)  │
+│ vm_file ────┼──► │ 对应文件     │    │ NULL        │
+└─────────────┘    └─────────────┘    └─────────────┘
+```
+
+### 3.4 缺页中断处理流程
+
+```
+CPU 访问虚拟地址 addr
+      │
+      │ 页表项 P=0（页不在物理内存）
+      ▼
+do_page_fault(regs, error_code)
+      │
+      ├─ find_vma(mm, addr)       → 找到对应 VMA
+      │
+      ├─ 分析错误类型：
+      │   error_code & 1 == 0：页不存在（handle_mm_fault）
+      │   error_code & 2 != 0：写保护违规（写时复制）
+      │
+      └─ handle_mm_fault(mm, vma, addr, write)
+              │
+              ├─ 文件映射（vma->vm_file != NULL）：
+              │     → 从磁盘读入页面（page cache）
+              │
+              ├─ 匿名映射（堆/栈）：
+              │     → alloc_page() 分配物理页
+              │     → 清零
+              │
+              └─ 写时复制：
+                    → 如果 page->_count > 1：复制页面
+                    → 修改页表项为可写
+```
+
+---
+
+## 4. 物理内存分配器
+
+### 4.1 伙伴系统（Buddy System）
+
+Linux 2.6.0 用伙伴系统管理物理页，解决外部碎片问题：
+
+```
+free_area[0]: 链表，每个块大小 = 2^0 = 1 页（4KB）
+free_area[1]: 链表，每个块大小 = 2^1 = 2 页（8KB）
+free_area[2]: 链表，每个块大小 = 2^2 = 4 页（16KB）
+...
+free_area[10]: 链表，每个块大小 = 2^10 = 1024 页（4MB）
+
+分配 N 页：
+  找最小的 2^k ≥ N 的 free_area[k]
+  如果 free_area[k] 为空，向上借：
+    从 free_area[k+1] 取一块，分成两个 2^k 块（"伙伴"）
+    一块用，一块加入 free_area[k]
+
+释放时：
+  检查"伙伴"是否空闲
+  如果是：合并为 2^(k+1) 块，递归向上合并
+```
+
+### 4.2 Slab 分配器（小对象分配）
+
+伙伴系统以 4KB 页为单位，但内核大量分配小对象（如 task_struct, inode）。
+Slab 解决**内部碎片**问题：
+
+```
+kmem_cache（task_struct 的缓存）：
+┌─────────────────────────────────────────────────────┐
+│  name: "task_struct"                                │
+│  obj_size: 1712 bytes                               │
+│                                                     │
+│  slab_full:   [slab1] → [slab2] → ...               │
+│  slab_partial:[slab3] → [slab4] → ...               │
+│  slab_free:   [slab5] → ...                         │
+└─────────────────────────────────────────────────────┘
+
+每个 slab = 若干连续物理页，切分为固定大小的对象：
+
+┌──────────────────────────────────────┐
+│ slab 控制头                           │
+│ 空闲对象链表                          │
+├──────────────────────────────────────┤
+│ [obj1] [obj2] [obj3] ... [obj_N]     │
+└──────────────────────────────────────┘
+
+优点：
+  · 避免每次 alloc/free 都走伙伴系统（快）
+  · 对象有构造/析构函数（减少初始化开销）
+  · 着色（coloring）避免 cache 行冲突
+```
+
+**关键 API**：
+
+```c
+/* 创建 slab 缓存 */
+struct kmem_cache *kmem_cache_create(
+    const char *name,
+    size_t size,
+    size_t align,
+    unsigned long flags,
+    void (*ctor)(void *, struct kmem_cache *, unsigned long),
+    void (*dtor)(void *, struct kmem_cache *, unsigned long));
+
+/* 从 slab 缓存分配对象 */
+void *kmem_cache_alloc(struct kmem_cache *cachep, int flags);
+
+/* 释放对象回 slab 缓存 */
+void kmem_cache_free(struct kmem_cache *cachep, void *objp);
+
+/* 通用小内存分配（内部使用 slab）*/
+void *kmalloc(size_t size, int flags);
+void kfree(const void *objp);
+```
+
+---
+
+## 5. 内存管理演进对比
+
+| 特性 | Linux 0.11 | Linux 2.6.0 |
+|------|-----------|------------|
+| 地址转换 | 段 → 线性 → 物理（段页式）| 段基址=0，实质纯页式 |
+| 页表级数 | 2 级（10+10+12）| 2 级（x86）/ 3 级支持（PAE）|
+| 物理内存管理 | mem_map[] 字节数组 | 伙伴系统（zones）|
+| 小对象分配 | 无（直接 get_free_page）| Slab 分配器 |
+| 写时复制 | do_wp_page() | do_wp_page()（更完善）|
+| 虚拟内存区域 | 无 VMA 概念 | vm_area_struct + 红黑树 |
+| 内存映射 | 无 mmap | mmap（文件映射/匿名映射）|
+| 交换（Swap）| 基本支持 | 完整交换子系统 |
+
+---
+
+## 6. 实验：观察内存管理
+
+```bash
+# 查看进程的虚拟内存区域（Linux 系统）
+cat /proc/self/maps
+# 输出示例：
+# 08048000-0804f000 r-xp 00000000 08:01 1234567  /bin/bash  ← 代码段
+# 0804f000-08050000 r--p 00006000 08:01 1234567  /bin/bash  ← 只读数据
+# 08050000-08051000 rw-p 00007000 08:01 1234567  /bin/bash  ← 可写数据
+# 0805f000-08080000 rw-p 00000000 00:00 0        [heap]     ← 堆
+
+# 查看物理内存使用
+cat /proc/buddyinfo    # 伙伴系统各 order 空闲页数
+cat /proc/slabinfo     # Slab 缓存信息
+
+# GDB 中查看页表（Linux 0.11）
+(gdb) break do_no_page
+(gdb) commands
+> printf "page fault at 0x%lx\n", address
+> continue
+> end
+```
diff --git "a/05-\346\226\207\344\273\266\347\263\273\347\273\237/README.md" "b/05-\346\226\207\344\273\266\347\263\273\347\273\237/README.md"
new file mode 100644
index 0000000..874fca9
--- /dev/null
+++ "b/05-\346\226\207\344\273\266\347\263\273\347\273\237/README.md"
@@ -0,0 +1,426 @@
+# 05 — 文件系统
+
+> 文件系统是操作系统中"持久化"的核心。
+> 本章从 **VFS 抽象层 → inode/dentry 数据结构 → 磁盘布局 → 文件读写路径**
+> 对照 Linux 0.11（Minix FS）与 Linux 2.6.0（ext2 + VFS）展开。
+
+---
+
+## 1. 文件系统层次模型
+
+```
+用户程序
+  open("/etc/passwd", O_RDONLY)
+          │
+          ▼
+   系统调用接口
+  sys_open → sys_read → sys_write → sys_close
+          │
+          ▼
+┌─────────────────────────────────────────────┐
+│             VFS（虚拟文件系统）               │   ← Linux 2.6 新增
+│  file → dentry → inode → super_block        │
+│  统一接口：inode_operations / file_operations│
+└─────┬─────────────────────────────────────┘
+      │
+  ┌───┴────────────────────────────────┐
+  │         具体文件系统实现             │
+  │  ext2   ext4   btrfs   tmpfs   ...  │
+  └───┬──────────────────────┬─────────┘
+      │                      │
+      ▼                      ▼
+   磁盘 I/O              内存（页缓存）
+   块设备层              page cache
+```
+
+---
+
+## 2. Linux 0.11：Minix FS（直接实现，无 VFS）
+
+### 2.1 Minix FS 磁盘布局
+
+```
+磁盘布局（以 360KB 软盘为例）：
+
+Block 0:   引导块（Boot Block）       [1 个 1KB 块]
+Block 1:   超级块（Super Block）      [1 个 1KB 块]
+Block 2~:  inode 位图（imap）         [1~8 个块]
+块 ~:      块位图（zmap）             [1~8 个块]
+块 ~:      inode 表（inode table）    [inode数/块大小 个块]
+块 ~:      数据区（data zone）        [剩余所有块]
+
+超级块内容：
+  s_ninodes    = inode 总数
+  s_nzones     = 数据块总数
+  s_imap_blocks= inode 位图块数
+  s_zmap_blocks= 数据块位图块数
+  s_firstdatazone = 第一个数据块号
+  s_max_size   = 最大文件大小
+```
+
+### 2.2 inode 结构（0.11）
+
+```c
+/* include/linux/fs.h — 磁盘上的 inode（16 字节）*/
+struct d_inode {
+    unsigned short i_mode;      /* 文件类型和权限 */
+    unsigned short i_uid;       /* 用户 ID */
+    unsigned long  i_size;      /* 文件大小（字节）*/
+    unsigned long  i_time;      /* 修改时间 */
+    unsigned char  i_gid;       /* 组 ID */
+    unsigned char  i_nlinks;    /* 硬链接数 */
+    unsigned short i_zone[9];   /* 数据块号：
+                                   i_zone[0..6]: 直接块
+                                   i_zone[7]:    一级间接块
+                                   i_zone[8]:    二级间接块 */
+};
+
+/* 内存中的 inode（含额外状态信息）*/
+struct m_inode {
+    /* 与磁盘 inode 相同的字段 */
+    unsigned short i_mode;
+    ...
+    unsigned short i_zone[9];
+    /* 内存特有字段 */
+    struct task_struct *i_wait;  /* 等待该 inode 的进程队列 */
+    unsigned long i_atime;       /* 访问时间（仅内存中）*/
+    unsigned long i_ctime;       /* 创建时间（仅内存中）*/
+    unsigned short i_dev;        /* 所在设备 */
+    unsigned short i_num;        /* inode 编号 */
+    unsigned short i_count;      /* 引用计数 */
+    unsigned char  i_lock;       /* 锁定标志 */
+    unsigned char  i_dirt;       /* 已修改标志（需要写回磁盘）*/
+    unsigned char  i_pipe;       /* 是否为管道 */
+    unsigned char  i_mount;      /* 是否为挂载点 */
+    unsigned char  i_seek;       /* seek 操作中 */
+    unsigned char  i_update;     /* 需要更新 */
+};
+```
+
+### 2.3 文件数据块访问（0.11 bmap）
+
+```
+文件大小 ≤ 7 × 1KB = 7KB：使用直接块 i_zone[0..6]
+文件大小 ≤ 7 + 512 = 519KB：使用一级间接块 i_zone[7]
+文件大小 ≤ 519 + 512×512 = 262KB：使用二级间接块 i_zone[8]
+
+  inode.i_zone[0] ──► 数据块（直接）
+  inode.i_zone[7] ──► 间接块 ──► [指针0, 指针1, ..., 指针511]
+                                        │
+                                        ▼
+                                      数据块
+
+bmap(inode, block_nr):
+  if block_nr < 7:
+    return i_zone[block_nr]
+  elif block_nr < 7 + 512:
+    读 i_zone[7] 所指的间接块
+    返回间接块[block_nr - 7]
+  else:
+    读 i_zone[8] 所指的二级间接块
+    i = (block_nr - 7 - 512) / 512
+    j = (block_nr - 7 - 512) % 512
+    读二级间接块[i]所指的一级间接块
+    返回一级间接块[j]
+```
+
+### 2.4 文件读写路径（0.11）
+
+```
+read() 系统调用
+      │
+      ▼
+sys_read(fd, buf, count)
+      │
+      ├─ 根据 fd 取 file 结构（current->filp[fd]）
+      ├─ 取 inode（file->f_inode）
+      └─ file_read(inode, file, buf, count)
+              │
+              ▼
+        按文件偏移计算数据块号
+        bmap(inode, block) → 物理块号
+              │
+              ▼
+        breads() → buffer_head（读取并缓存磁盘块）
+              │
+              ▼
+        copy_to_user(buf, buffer->b_data + offset, len)
+```
+
+### 2.5 目录查找（路径解析）
+
+```c
+/* fs/namei.c */
+/* 在指定目录 dir 中查找名为 name 的目录项 */
+struct m_inode *dir_namei(const char *pathname,
+                           int *namelen,
+                           const char **name,
+                           struct m_inode *base)
+{
+    /* 如果路径以 '/' 开头，从根目录开始 */
+    if (c == '/')
+        inode = current->root;
+    else
+        inode = current->pwd;    /* 否则从当前目录开始 */
+    
+    /* 逐段解析路径 */
+    while (1) {
+        取下一个路径分量（/分隔）
+        if (最后一个分量) break;
+        follow_link(inode) → 处理符号链接
+        find_entry(inode, name, len) → 在目录中找条目
+        inode = 目录条目的 inode
+    }
+    return inode;
+}
+```
+
+---
+
+## 3. Linux 2.6.0：VFS + ext2
+
+### 3.1 VFS 核心数据结构
+
+#### super_block — 文件系统元信息
+
+```c
+struct super_block {
+    dev_t s_dev;                    /* 所在设备 */
+    unsigned long s_blocksize;       /* 块大小 */
+    unsigned long long s_maxbytes;   /* 最大文件大小 */
+    struct file_system_type *s_type; /* 文件系统类型 */
+    struct super_operations *s_op;   /* 超级块操作函数 */
+    struct dentry *s_root;           /* 根目录 dentry */
+    struct list_head s_inodes;       /* 所有 inode 链表 */
+    void *s_fs_info;                 /* 具体 FS 的私有数据 */
+    ...
+};
+```
+
+#### inode — 文件/目录的元数据
+
+```c
+struct inode {
+    umode_t i_mode;               /* 文件类型和权限 */
+    uid_t i_uid;                  /* 用户 ID */
+    gid_t i_gid;                  /* 组 ID */
+    loff_t i_size;                /* 文件大小 */
+    struct timespec i_atime, i_mtime, i_ctime;
+    
+    unsigned long i_ino;          /* inode 编号 */
+    unsigned int i_nlink;         /* 硬链接数 */
+    dev_t i_rdev;                 /* 设备文件的设备号 */
+    
+    struct inode_operations *i_op; /* inode 操作（lookup/create/...）*/
+    struct file_operations *i_fop; /* 默认文件操作 */
+    struct address_space *i_mapping; /* 页缓存映射 */
+    
+    struct super_block *i_sb;     /* 所属超级块 */
+    struct list_head i_hash;      /* inode hash 链表 */
+    struct list_head i_list;      /* inode LRU 链表 */
+    
+    void *i_private;              /* 具体 FS 的私有数据 */
+};
+```
+
+#### dentry — 目录项缓存（路径解析缓存）
+
+```c
+struct dentry {
+    unsigned int d_flags;
+    struct inode *d_inode;         /* 对应的 inode */
+    struct dentry *d_parent;       /* 父目录 dentry */
+    struct qstr d_name;            /* 文件名（含 hash）*/
+    
+    struct list_head d_child;      /* 兄弟节点链表 */
+    struct list_head d_subdirs;    /* 子目录链表 */
+    struct list_head d_hash;       /* dcache hash 链表 */
+    struct list_head d_lru;        /* LRU 链表 */
+    
+    struct dentry_operations *d_op;
+    struct super_block *d_sb;
+    void *d_fsdata;                /* FS 私有数据 */
+};
+```
+
+#### file — 打开文件的实例
+
+```c
+struct file {
+    struct dentry *f_dentry;       /* 对应的 dentry */
+    struct vfsmount *f_vfsmnt;     /* 挂载点 */
+    struct file_operations *f_op;  /* 文件操作函数 */
+    loff_t f_pos;                  /* 当前读写位置 */
+    unsigned int f_flags;          /* O_RDONLY/O_WRONLY/... */
+    mode_t f_mode;                 /* 访问模式 */
+    atomic_t f_count;              /* 引用计数 */
+    ...
+};
+```
+
+### 3.2 VFS 四大对象关系图
+
+```
+task_struct
+  └── files_struct
+        └── fd_array[] → file ──────────────────────────┐
+                                                         │
+                                              f_dentry ──▼──── dentry
+                                                         │       │
+                                              f_op ──────┤       │ d_inode
+                                                         │       ▼
+                                                         │     inode ───── super_block
+                                                         │       │               │
+                                                         │       │ i_mapping      │ s_op
+                                                         │       ▼               │
+                                                         └──► address_space     ext2_sb_info
+                                                               (页缓存)
+```
+
+### 3.3 路径解析（path_lookup）
+
+```
+open("/home/user/file.txt", O_RDONLY)
+      │
+      ▼
+sys_open → filp_open → open_namei → path_lookup
+      │
+      ▼
+path_lookup("/home/user/file.txt", ...)
+  1. 从 '/' 开始（current->fs->root）
+  2. 查 dcache：hash(parent_dentry, "home") → 找 dentry("home")
+     · 命中：直接用（快！）
+     · 未命中：调用 inode->i_op->lookup() 从磁盘读
+  3. 继续：hash(dentry("home"), "user") → dentry("user")
+  4. 继续：hash(dentry("user"), "file.txt") → dentry("file.txt")
+  5. 返回最终 dentry
+```
+
+**dcache 是 VFS 性能的关键**：大多数路径查找在内存中完成（O(1) hash）。
+
+### 3.4 ext2 磁盘布局
+
+```
+ext2 磁盘布局（分区）：
+
+[Block 0] 引导扇区（可选）
+[Block Group 0]
+  ├── 超级块（Super Block）          1 块
+  ├── 块组描述符表（Group Descriptor）若干块
+  ├── 块位图（Block Bitmap）         1 块
+  ├── inode 位图（Inode Bitmap）     1 块
+  ├── inode 表（Inode Table）        若干块
+  └── 数据块（Data Blocks）          剩余块
+[Block Group 1]
+  ├── 超级块备份
+  └── ...（同 Group 0 结构）
+...
+[Block Group N]
+```
+
+### 3.5 文件读取路径（2.6.0）
+
+```
+read(fd, buf, count)
+      │
+      ▼
+sys_read → vfs_read → file->f_op->read
+                            │
+                   ┌────────┴─────────────┐
+                   │  generic_file_read() │
+                   └────────┬─────────────┘
+                            │
+              ┌─────────────▼──────────────┐
+              │       页缓存查找            │
+              │  find_get_page(mapping, pg)│
+              └──────┬──────────────┬──────┘
+                     │命中          │未命中
+                     │             ▼
+                     │      alloc_page()
+                     │      → readpage()  ← 具体 FS 实现
+                     │         │ (ext2_readpage)
+                     │         ▼ 提交 bio 到块设备层
+                     │      等待 IO 完成
+                     │
+                     ▼
+              copy_to_user(buf, page_data + offset, len)
+```
+
+---
+
+## 4. 挂载机制（Mount）
+
+### 4.1 概念
+
+```
+挂载 = 将一个文件系统的根目录"覆盖"在另一个目录上
+
+挂载前：
+  /
+  ├── etc/
+  ├── home/
+  └── mnt/         ← 空目录（挂载点）
+
+挂载后（mount /dev/sdb1 /mnt）：
+  /
+  ├── etc/
+  ├── home/
+  └── mnt/         ← /dev/sdb1 的根目录
+       ├── data/
+       └── logs/
+```
+
+### 4.2 vfsmount 结构（2.6.0）
+
+```c
+struct vfsmount {
+    struct list_head mnt_hash;
+    struct vfsmount *mnt_parent;     /* 父挂载点 */
+    struct dentry *mnt_mountpoint;   /* 挂载在父 FS 中的 dentry */
+    struct dentry *mnt_root;         /* 本 FS 的根 dentry */
+    struct super_block *mnt_sb;      /* 超级块 */
+    struct list_head mnt_mounts;     /* 子挂载点列表 */
+    ...
+};
+```
+
+---
+
+## 5. 文件系统核心机制对比
+
+| 特性 | Linux 0.11 (Minix FS) | Linux 2.6.0 (ext2 + VFS) |
+|------|----------------------|--------------------------|
+| 抽象层 | 无 VFS，直接调用 FS 函数 | VFS 统一接口 |
+| 路径缓存 | 无（每次都读磁盘）| dcache（内存中目录树）|
+| 页缓存 | buffer_head（块缓存）| address_space + page cache |
+| 最大文件 | ~34MB（两级间接块）| ~2TB（ext2 三级间接块）|
+| 最大分区 | ~64MB | ~4TB |
+| inode 大小 | 16 字节（磁盘）| 128 字节（ext2 默认）|
+| 链接 | 硬链接 | 硬链接 + 符号链接（完整）|
+| 挂载 | 基本支持 | 完整挂载命名空间 |
+
+---
+
+## 6. 实验
+
+```bash
+# 查看进程打开的文件
+ls -la /proc/$$/fd
+
+# 查看 dentry 缓存统计
+cat /proc/sys/fs/dentry-state
+
+# 查看 inode 缓存
+cat /proc/sys/fs/inode-state
+
+# 在 GDB 中跟踪文件打开（Linux 0.11）
+(gdb) break sys_open
+(gdb) commands
+> printf "open: %s\n", filename
+> bt
+> continue
+> end
+
+# 查看 ext2 超级块（Linux 系统）
+tune2fs -l /dev/sda1
+```
diff --git "a/06-\347\263\273\347\273\237\350\260\203\347\224\250/README.md" "b/06-\347\263\273\347\273\237\350\260\203\347\224\250/README.md"
new file mode 100644
index 0000000..a3ee743
--- /dev/null
+++ "b/06-\347\263\273\347\273\237\350\260\203\347\224\250/README.md"
@@ -0,0 +1,360 @@
+# 06 — 系统调用
+
+> 系统调用是用户程序进入内核的唯一合法通道。
+> 本章从**硬件机制 → 内核入口 → 分发 → 返回**完整拆解这条路径，
+> 对照 Linux 0.11 与 Linux 2.6.0 源码说明其演进。
+
+---
+
+## 1. 为什么需要系统调用？
+
+```
+用户态（Ring 3）         内核态（Ring 0）
+──────────────           ────────────────
+只能访问用户内存    ◄──  操作系统内核
+无法直接操作硬件         控制所有硬件
+无法直接调用内核函数      管理所有进程
+
+系统调用 = 受控的"大门"
+用户程序通过约定好的"门"进入内核，
+内核验证参数后执行操作，再返回结果。
+```
+
+---
+
+## 2. Linux 0.11：int 0x80 中断方式
+
+### 2.1 整体流程
+
+```
+用户程序（C 代码）
+  ↓  write(1, "hello", 5);
+  ↓  （glibc 展开为汇编）
+
+      movl $4, %eax    ; __NR_write = 4
+      movl $1, %ebx    ; fd = 1
+      movl $buf, %ecx  ; buffer 地址
+      movl $5, %edx    ; count = 5
+      int  $0x80        ; 触发系统调用中断
+
+  ↓  CPU 自动：
+      保存 cs:eip, ss:esp, eflags 到内核栈
+      查 IDT[0x80]：指向 system_call 处理函数
+      切换到内核态（Ring 0）
+      切换到该进程的内核栈
+
+kernel/system_call.s: system_call
+  ↓  保存所有寄存器（push 系列指令）
+  ↓  call sys_call_table(,%eax,4)  ; 按 eax 索引函数表
+  ↓  sys_write(1, "hello", 5)      ; 执行实际函数
+  ↓  将返回值存入 eax（栈上的 pt_regs.eax）
+  ↓  检查信号、是否需要调度
+  ↓  iret                          ; 返回用户态
+
+用户程序继续执行（write 返回值在 eax 中）
+```
+
+### 2.2 IDT 初始化
+
+```c
+/* kernel/traps.c — Linux 0.11 */
+void trap_init(void)
+{
+    set_trap_gate(0, &divide_error);
+    set_trap_gate(1, &debug);
+    ...
+    set_system_gate(0x80, &system_call); /* 0x80 号中断 = 系统调用 */
+}
+
+/* 区别：
+   set_trap_gate:   DPL=0（只有内核可触发，用于异常）
+   set_system_gate: DPL=3（用户态也可以 int 触发，用于系统调用）
+*/
+```
+
+### 2.3 system_call 汇编代码
+
+```asm
+# kernel/system_call.s — Linux 0.11（精简版）
+.globl system_call
+system_call:
+    cmpl $nr_system_calls-1,%eax   # 检查系统调用号合法性
+    ja bad_sys_call                 # 越界则报错
+    
+    push %ds                        # 保存段寄存器
+    push %es
+    push %fs
+    pushl %edx                      # 保存参数寄存器
+    pushl %ecx
+    pushl %ebx
+    
+    # 将 ds/es 指向内核数据段
+    movl $0x10,%edx
+    mov %dx,%ds
+    mov %dx,%es
+    movl $0x17,%edx                 # fs 指向用户数据段
+    mov %dx,%fs
+    
+    # 通过系统调用号查表并调用
+    call sys_call_table(,%eax,4)    # sys_call_table[eax]()
+    pushl %eax                      # 保存返回值
+    
+    # 检查当前进程是否需要处理信号或重新调度
+    movl current,%eax
+    cmpl $0,state(%eax)             # state != TASK_RUNNING?
+    jne reschedule
+    cmpl $0,counter(%eax)           # counter == 0?
+    je reschedule
+    
+ret_from_sys_call:
+    # 检查信号
+    movl signal(%ecx),%ebx
+    movl blocked(%ecx),%ecx
+    notl %ecx
+    andl %ebx,%ecx
+    bsfl %ecx,%ecx
+    je 3f
+    btrl %ecx,%ebx
+    movl %ebx,signal(%eax)
+    ...
+    
+3:  popl %eax                       # 恢复返回值
+    popl %ebx
+    popl %ecx
+    popl %edx
+    pop %fs
+    pop %es
+    pop %ds
+    iret                            # 返回用户态
+```
+
+### 2.4 系统调用表
+
+```c
+/* include/linux/sys.h — Linux 0.11 */
+fn_ptr sys_call_table[] = {
+    sys_setup,     /* 0 */
+    sys_exit,      /* 1 */
+    sys_fork,      /* 2 */
+    sys_read,      /* 3 */
+    sys_write,     /* 4 */
+    sys_open,      /* 5 */
+    sys_close,     /* 6 */
+    sys_waitpid,   /* 7 */
+    sys_creat,     /* 8 */
+    sys_link,      /* 9 */
+    sys_unlink,    /* 10 */
+    ...            /* 共 72 个 */
+};
+```
+
+---
+
+## 3. Linux 2.6.0：int 0x80 + sysenter/sysexit
+
+### 3.1 sysenter（快速系统调用）
+
+`int 0x80` 需要保存/恢复 TSS，开销较大。
+Pentium II 引入了 `sysenter/sysexit` 指令，专为系统调用优化：
+
+```
+对比：
+              int 0x80          sysenter
+保存的状态    cs/eip/ss/esp      仅 esp/eip（存入 MSR）
+特权切换      通过 IDT           通过 MSR 寄存器（SYSENTER_EIP_MSR）
+开销          ~100 ns           ~20 ns
+
+sysenter 使用 MSR（Model Specific Register）：
+  SYSENTER_CS_MSR  = 目标代码段（内核段）
+  SYSENTER_EIP_MSR = 目标 EIP（内核入口 sysenter_entry）
+  SYSENTER_ESP_MSR = 目标 ESP（内核栈）
+```
+
+### 3.2 两种方式的统一处理
+
+```
+用户态调用约定（2.6.0）：
+  · 系统调用号放入 eax
+  · 参数：ebx, ecx, edx, esi, edi, ebp（最多 6 个）
+  · 通过 vsyscall 页面（glibc 自动选择 int 0x80 或 sysenter）
+
+内核入口（arch/i386/kernel/entry.S）：
+
+sysenter_entry:                 # sysenter 路径
+    movl TSS_sysenter_esp0(%esp),%esp   # 切换到内核栈
+    sti
+    pushl $(__USER_DS)
+    ...
+    # 之后与 int 0x80 路径合流
+
+system_call:                    # int 0x80 路径
+    pushl %eax                  # 保存系统调用号
+    SAVE_ALL                    # 保存所有寄存器（宏）
+    ...
+    call *sys_call_table(,%eax,4)
+    movl %eax,EAX(%esp)        # 返回值写回
+    ...
+    RESTORE_INT_REGS
+    iret / sysexit
+```
+
+### 3.3 参数传递与验证
+
+```c
+/* 用户空间指针必须验证！*/
+asmlinkage ssize_t sys_read(unsigned int fd, char __user *buf, size_t count)
+{
+    struct file *file;
+    ssize_t ret = -EBADF;
+    
+    /* 1. 验证 fd 合法性 */
+    file = fget(fd);
+    if (file) {
+        /* 2. 调用 VFS 读取（buf 是用户空间指针）*/
+        ret = vfs_read(file, buf, count, &file->f_pos);
+        fput(file);
+    }
+    return ret;
+}
+
+/* vfs_read 最终调用 copy_to_user 安全复制数据 */
+static inline unsigned long
+copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+    /* 检查 to 地址是否在用户空间范围内 */
+    if (access_ok(VERIFY_WRITE, to, n))
+        n = __copy_to_user(to, from, n);  /* 实际复制 */
+    return n;
+}
+```
+
+---
+
+## 4. pt_regs：内核栈上的寄存器快照
+
+当用户程序陷入内核时，内核栈顶部存放一个 `pt_regs` 结构体，
+保存了用户态的所有寄存器状态：
+
+```c
+/* arch/i386/kernel/entry.S / include/asm-i386/ptrace.h */
+struct pt_regs {
+    long ebx;         /* 系统调用参数 1 */
+    long ecx;         /* 系统调用参数 2 */
+    long edx;         /* 系统调用参数 3 */
+    long esi;         /* 系统调用参数 4 */
+    long edi;         /* 系统调用参数 5 */
+    long ebp;         /* 系统调用参数 6 */
+    long eax;         /* 系统调用号（入）/ 返回值（出）*/
+    int  xds;
+    int  xes;
+    long orig_eax;    /* 原始系统调用号（用于重启）*/
+    long eip;         /* 用户态返回地址 */
+    int  xcs;         /* 用户态代码段 */
+    long eflags;      /* 标志寄存器 */
+    long esp;         /* 用户态栈指针 */
+    int  xss;         /* 用户态栈段 */
+};
+```
+
+**内核栈布局（系统调用时）**：
+
+```
+内核栈高地址（esp0 = 内核栈页顶）：
+┌──────────────────────┐
+│  ss（用户态栈段）     │  ← CPU 自动压入
+│  esp（用户态栈指针）  │  ← CPU 自动压入
+│  eflags              │  ← CPU 自动压入
+│  cs（用户态代码段）   │  ← CPU 自动压入
+│  eip（用户态返回地址）│  ← CPU 自动压入
+├──────────────────────┤
+│  orig_eax            │  ← SAVE_ALL 压入
+│  ds / es             │  ← SAVE_ALL 压入
+│  eax / ebp / edi ... │  ← SAVE_ALL 压入（pt_regs）
+└──────────────────────┘ ← 此时的 esp（内核栈指针）
+```
+
+---
+
+## 5. 系统调用的完整生命周期
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                    系统调用完整流程                           │
+│                                                             │
+│  ① 用户调用 write(1,"hi",2)                                 │
+│          ↓                                                   │
+│  ② glibc: mov $4,%eax; int $0x80                           │
+│          ↓                                                   │
+│  ③ CPU:  保存寄存器 → 查 IDT[0x80] → 跳 system_call        │
+│          ↓                                                   │
+│  ④ SAVE_ALL → 调用 sys_call_table[4] = sys_write()         │
+│          ↓                                                   │
+│  ⑤ sys_write → vfs_write → ext2_write → 写页缓存 → bio     │
+│          ↓                                                   │
+│  ⑥ 返回值写入 pt_regs.eax                                   │
+│          ↓                                                   │
+│  ⑦ 检查信号（do_signal）/ 调度（schedule）                   │
+│          ↓                                                   │
+│  ⑧ RESTORE_ALL → iret → 回到用户态                         │
+│          ↓                                                   │
+│  ⑨ glibc 从 eax 取返回值，返回给 write()                    │
+└─────────────────────────────────────────────────────────────┘
+```
+
+---
+
+## 6. 添加一个自定义系统调用（实验）
+
+在 Linux 0.11 中添加 `sys_myhello`：
+
+```c
+/* 第一步：在 kernel/sys.c 中添加实现 */
+int sys_myhello(void)
+{
+    printk("Hello from kernel syscall!\n");
+    return 42;
+}
+
+/* 第二步：在 include/linux/sys.h 中注册 */
+extern int sys_myhello(void);
+// 在 sys_call_table[] 末尾添加：
+// sys_myhello,   /* 73 */
+
+/* 第三步：在 include/unistd.h 中添加号码 */
+#define __NR_myhello 73
+
+/* 第四步：重新编译内核 */
+
+/* 第五步：用户程序测试 */
+#include <unistd.h>
+int main() {
+    int ret = syscall(73);
+    printf("syscall returned: %d\n", ret);
+    return 0;
+}
+```
+
+---
+
+## 7. 系统调用性能分析
+
+```bash
+# 用 strace 跟踪系统调用
+strace -c ls /tmp     # 统计各系统调用次数和耗时
+strace -e trace=open,read,write ls
+
+# 用 perf 分析系统调用开销
+perf stat -e 'syscalls:sys_enter_*' ls
+
+# 查看系统调用表
+ausyscall --dump       # 打印所有系统调用号
+```
+
+| 系统调用 | 典型耗时（现代 x86）|
+|---------|-----------------|
+| getpid  | ~100 ns（可 vDSO 优化到 ~10 ns）|
+| read    | ~500 ns（命中页缓存）|
+| write   | ~500 ns（写页缓存）|
+| open    | ~3 µs（含路径解析）|
+| fork    | ~30 µs（进程创建）|
+| execve  | ~1 ms（加载程序）|
diff --git "a/07-\350\256\276\345\244\207\351\251\261\345\212\250/README.md" "b/07-\350\256\276\345\244\207\351\251\261\345\212\250/README.md"
new file mode 100644
index 0000000..f52c3e0
--- /dev/null
+++ "b/07-\350\256\276\345\244\207\351\251\261\345\212\250/README.md"
@@ -0,0 +1,419 @@
+# 07 — 设备驱动
+
+> 驱动程序是内核与硬件之间的桥梁。
+> 本章从**设备模型 → 字符设备 → 块设备 → 驱动开发流程**，
+> 对照 Linux 0.11（直接操作）与 Linux 2.6.0（统一设备模型）拆解。
+
+---
+
+## 1. 设备分类
+
+```
+Linux 设备分为三类：
+
+┌──────────────────────────────────────────────────────────┐
+│  字符设备（Character Device）                             │
+│  · 以字节流方式访问                                       │
+│  · 无缓冲（或少缓冲）                                     │
+│  · 例：键盘(/dev/tty)、串口(/dev/ttyS0)、鼠标            │
+│  · 设备文件: /dev/xxx  主设备号:次设备号                  │
+├──────────────────────────────────────────────────────────┤
+│  块设备（Block Device）                                   │
+│  · 以固定大小的块（通常 512B 或 4KB）随机访问             │
+│  · 有内核缓冲（页缓存）                                   │
+│  · 例：硬盘(/dev/sda)、U盘、SSD                          │
+├──────────────────────────────────────────────────────────┤
+│  网络设备（Network Device）                               │
+│  · 通过 socket 接口访问，不在 /dev 中出现                 │
+│  · 例：eth0、lo、wlan0                                    │
+└──────────────────────────────────────────────────────────┘
+```
+
+---
+
+## 2. Linux 0.11：直接操作方式
+
+### 2.1 终端（TTY）驱动结构
+
+```c
+/* kernel/chr_drv/tty_io.c */
+
+/* 每个终端对应一个 tty_struct */
+struct tty_struct {
+    struct termios termios;       /* 终端参数（波特率、回显等）*/
+    int pgrp;                     /* 前台进程组 */
+    int stopped;                  /* 是否停止输出 */
+    void (*write)(struct tty_struct *tty);  /* 输出函数指针 */
+    struct tty_queue read_q;      /* 读缓冲队列 */
+    struct tty_queue write_q;     /* 写缓冲队列 */
+    struct tty_queue secondary;   /* 经行规处理后的队列 */
+};
+
+/* 终端读写（tty_read/tty_write 调用此）*/
+void con_write(struct tty_struct *tty)   /* 控制台写 */
+void rs_write(struct tty_struct *tty)    /* 串口写 */
+```
+
+### 2.2 硬盘驱动（HD）
+
+```c
+/* kernel/blk_drv/hd.c — Linux 0.11 硬盘驱动 */
+
+/* 请求队列（I/O 请求）*/
+static struct request request[NR_REQUEST];
+
+/* 提交 I/O 请求 */
+void add_request(struct blk_dev_struct *dev, struct request *req)
+{
+    /* 将请求插入电梯排序队列 */
+    /* 电梯算法：按磁道号排序，减少磁头移动距离 */
+    ...
+    if (!(tmp = dev->current_request)) {
+        dev->current_request = req;
+        (dev->request_fn)();     /* 立即执行 */
+    } else {
+        /* 插入合适位置（按磁道号） */
+        for (; tmp->next; tmp = tmp->next)
+            if ((IN_ORDER(tmp, req) || !IN_ORDER(tmp, tmp->next))
+                && IN_ORDER(req, tmp->next))
+                break;
+        req->next = tmp->next;
+        tmp->next = req;
+    }
+}
+
+/* 硬盘中断处理 */
+void hd_interrupt(void)
+{
+    void (*handler)(void) = do_hd;
+    do_hd = NULL;
+    if (!handler)
+        handler = unexpected_hd_interrupt;
+    handler();                    /* 调用当前操作的完成处理函数 */
+    enable_hd_dma();
+}
+```
+
+---
+
+## 3. Linux 2.6.0：统一设备模型
+
+### 3.1 设备模型的核心对象
+
+```
+kobject（内核对象基类）
+  ├── kset（kobject 的集合）
+  ├── ktype（kobject 的操作集合）
+  └── sysfs 文件系统节点
+
+所有设备都通过 kobject 嵌入：
+
+struct device {
+    struct kobject kobj;          /* ← 内嵌 kobject */
+    struct device *parent;        /* 父设备 */
+    struct bus_type *bus;         /* 所在总线 */
+    struct device_driver *driver; /* 对应驱动 */
+    void *driver_data;            /* 驱动私有数据 */
+    ...
+};
+
+struct device_driver {
+    const char *name;
+    struct bus_type *bus;
+    int (*probe)(struct device *dev);   /* 探测设备 */
+    int (*remove)(struct device *dev);  /* 移除设备 */
+    ...
+};
+```
+
+### 3.2 总线 - 驱动 - 设备 匹配机制
+
+```
+总线（bus_type）
+  ├── 设备链表：USB 键盘、USB 鼠标、USB 网卡 ...
+  └── 驱动链表：usbhid 驱动、usb-storage 驱动 ...
+
+当设备插入时：
+  1. 设备注册到总线设备链表
+  2. 遍历总线驱动链表
+  3. 调用 bus->match(device, driver) 尝试匹配
+  4. 匹配成功 → driver->probe(device) → 驱动初始化硬件
+
+当驱动加载时（insmod）：
+  1. 驱动注册到总线驱动链表
+  2. 遍历总线设备链表，尝试 match
+  3. 找到匹配设备 → probe()
+```
+
+### 3.3 sysfs — 设备模型的可视化
+
+```bash
+# sysfs 将设备树暴露到文件系统
+ls /sys/bus/pci/devices/
+ls /sys/class/block/
+ls /sys/class/net/
+
+# 每个设备目录包含：
+ls /sys/class/block/sda/
+# driver    → 链接到 sata_sil 驱动
+# power/    → 电源管理
+# queue/    → 请求队列参数
+# size      → 设备大小（512B 扇区数）
+```
+
+---
+
+## 4. 字符设备驱动开发（2.6.0）
+
+### 4.1 完整驱动模板
+
+```c
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <linux/uaccess.h>
+
+#define MYDEV_NAME "mychardev"
+#define MYDEV_MAJOR 240           /* 主设备号（或动态分配）*/
+#define MYDEV_MINOR 0
+
+/* 驱动私有数据 */
+static struct {
+    struct cdev cdev;
+    char buf[256];
+    int buf_len;
+} mydev_data;
+
+/* 1. open：打开设备 */
+static int mydev_open(struct inode *inode, struct file *filp)
+{
+    /* 将驱动私有数据关联到 file */
+    filp->private_data = &mydev_data;
+    printk(KERN_INFO "%s: opened\n", MYDEV_NAME);
+    return 0;
+}
+
+/* 2. release：关闭设备 */
+static int mydev_release(struct inode *inode, struct file *filp)
+{
+    printk(KERN_INFO "%s: closed\n", MYDEV_NAME);
+    return 0;
+}
+
+/* 3. read：从设备读数据（内核 → 用户）*/
+static ssize_t mydev_read(struct file *filp, char __user *buf,
+                           size_t count, loff_t *ppos)
+{
+    int len = min(count, (size_t)mydev_data.buf_len);
+    
+    if (copy_to_user(buf, mydev_data.buf, len))
+        return -EFAULT;
+    
+    *ppos += len;
+    return len;
+}
+
+/* 4. write：向设备写数据（用户 → 内核）*/
+static ssize_t mydev_write(struct file *filp, const char __user *buf,
+                            size_t count, loff_t *ppos)
+{
+    int len = min(count, sizeof(mydev_data.buf) - 1);
+    
+    if (copy_from_user(mydev_data.buf, buf, len))
+        return -EFAULT;
+    
+    mydev_data.buf[len] = '\0';
+    mydev_data.buf_len = len;
+    return len;
+}
+
+/* 5. ioctl：设备控制命令 */
+static int mydev_ioctl(struct inode *inode, struct file *filp,
+                        unsigned int cmd, unsigned long arg)
+{
+    switch (cmd) {
+    case 0:  /* 自定义命令 0：清空缓冲 */
+        mydev_data.buf_len = 0;
+        break;
+    default:
+        return -EINVAL;
+    }
+    return 0;
+}
+
+/* file_operations：VFS 与驱动的接口 */
+static struct file_operations mydev_fops = {
+    .owner   = THIS_MODULE,
+    .open    = mydev_open,
+    .release = mydev_release,
+    .read    = mydev_read,
+    .write   = mydev_write,
+    .ioctl   = mydev_ioctl,
+};
+
+/* 模块加载：注册设备 */
+static int __init mydev_init(void)
+{
+    dev_t devno = MKDEV(MYDEV_MAJOR, MYDEV_MINOR);
+    
+    /* 注册设备号 */
+    if (register_chrdev_region(devno, 1, MYDEV_NAME) < 0)
+        return -ENODEV;
+    
+    /* 初始化并注册 cdev */
+    cdev_init(&mydev_data.cdev, &mydev_fops);
+    mydev_data.cdev.owner = THIS_MODULE;
+    if (cdev_add(&mydev_data.cdev, devno, 1) < 0) {
+        unregister_chrdev_region(devno, 1);
+        return -ENODEV;
+    }
+    
+    printk(KERN_INFO "%s: registered (major=%d)\n", MYDEV_NAME, MYDEV_MAJOR);
+    return 0;
+}
+
+/* 模块卸载：注销设备 */
+static void __exit mydev_exit(void)
+{
+    cdev_del(&mydev_data.cdev);
+    unregister_chrdev_region(MKDEV(MYDEV_MAJOR, MYDEV_MINOR), 1);
+    printk(KERN_INFO "%s: unregistered\n", MYDEV_NAME);
+}
+
+module_init(mydev_init);
+module_exit(mydev_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Your Name");
+MODULE_DESCRIPTION("My simple character device");
+```
+
+### 4.2 编译与测试
+
+```bash
+# Makefile
+obj-m += mychardev.o
+
+all:
+    make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules
+
+clean:
+    make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean
+
+# 编译
+make
+
+# 加载模块
+sudo insmod mychardev.ko
+
+# 创建设备节点
+sudo mknod /dev/mychardev c 240 0
+sudo chmod 666 /dev/mychardev
+
+# 测试
+echo "Hello, Driver!" > /dev/mychardev
+cat /dev/mychardev
+# 输出：Hello, Driver!
+
+# 查看内核日志
+dmesg | tail
+
+# 卸载模块
+sudo rmmod mychardev
+```
+
+---
+
+## 5. 驱动与内核交互机制
+
+### 5.1 中断处理
+
+```
+硬件触发中断
+    │
+    ▼
+CPU 查 IDT → 跳转到 request_irq() 注册的处理函数
+    │
+    ├─ 上半部（interrupt handler）：快速、不可休眠
+    │     关键操作：清除中断状态、读写硬件寄存器
+    │     保存数据到 tasklet/workqueue
+    │
+    └─ 下半部（bottom half）：可以稍微慢一些
+          tasklet：软中断上下文，不可休眠
+          workqueue：进程上下文，可以休眠
+
+/* 注册中断处理函数 */
+int request_irq(unsigned int irq,
+                irqreturn_t (*handler)(int, void *, struct pt_regs *),
+                unsigned long flags,
+                const char *devname,
+                void *dev_id);
+
+/* 处理函数示例 */
+irqreturn_t my_irq_handler(int irq, void *dev_id, struct pt_regs *regs)
+{
+    /* 读取硬件状态 */
+    int status = inb(MY_STATUS_PORT);
+    
+    /* 调度 tasklet 处理数据 */
+    tasklet_schedule(&my_tasklet);
+    
+    return IRQ_HANDLED;
+}
+```
+
+### 5.2 DMA（直接内存访问）
+
+```
+不用 DMA：
+  CPU 逐字节从硬件读数据到内存 → CPU 占用率高
+
+用 DMA：
+  1. CPU 设置 DMA 控制器（源、目标、长度）
+  2. DMA 硬件自主搬运数据（CPU 可做其他事）
+  3. DMA 完成后触发中断通知 CPU
+
+/* 申请 DMA 缓冲区（需要物理连续内存）*/
+dma_addr_t dma_handle;
+void *cpu_addr = dma_alloc_coherent(dev, size, &dma_handle, GFP_KERNEL);
+/* cpu_addr: 内核虚拟地址，dma_handle: DMA 总线地址 */
+
+/* 启动 DMA 传输 */
+writel(dma_handle, dev_base + DMA_ADDR_REG);
+writel(size, dev_base + DMA_SIZE_REG);
+writel(DMA_START, dev_base + DMA_CTRL_REG);
+```
+
+---
+
+## 6. 驱动调试技巧
+
+```c
+/* 内核打印（比 printf 更丰富）*/
+printk(KERN_EMERG   "系统即将崩溃\n");  /* 0: 最高优先级 */
+printk(KERN_ALERT   "需要立即处理\n");  /* 1 */
+printk(KERN_CRIT    "严重错误\n");      /* 2 */
+printk(KERN_ERR     "错误\n");          /* 3 */
+printk(KERN_WARNING "警告\n");          /* 4 */
+printk(KERN_NOTICE  "注意\n");          /* 5 */
+printk(KERN_INFO    "信息\n");          /* 6 */
+printk(KERN_DEBUG   "调试\n");          /* 7 */
+
+/* /proc 接口（暴露驱动内部状态）*/
+static int mydev_proc_show(struct seq_file *m, void *v)
+{
+    seq_printf(m, "buffer: %s\n", mydev_data.buf);
+    seq_printf(m, "length: %d\n", mydev_data.buf_len);
+    return 0;
+}
+/* 注册：proc_create("mychardev", 0, NULL, &mydev_proc_fops) */
+```
+
+```bash
+# 动态调试（Linux 2.6.30+）
+echo "module mychardev +p" > /sys/kernel/debug/dynamic_debug/control
+
+# 用 oops 分析崩溃
+dmesg | grep "Oops"
+# 使用 addr2line 或 gdb 定位崩溃位置
+addr2line -e vmlinux 0xc01234ab
+```
diff --git "a/08-\347\275\221\347\273\234\345\255\220\347\263\273\347\273\237/README.md" "b/08-\347\275\221\347\273\234\345\255\220\347\263\273\347\273\237/README.md"
new file mode 100644
index 0000000..97f972d
--- /dev/null
+++ "b/08-\347\275\221\347\273\234\345\255\220\347\263\273\347\273\237/README.md"
@@ -0,0 +1,315 @@
+# 08 — 网络子系统
+
+> Linux 网络子系统实现了完整的 TCP/IP 协议栈，是现代互联网的基石。
+> 本章从 **Socket 接口 → 协议栈层次 → 数据包接收/发送路径 → 关键数据结构**
+> 逐层解析，并对照 Linux 2.6.0 源码（0.11 网络功能不完整，跳过）。
+
+---
+
+## 1. 网络子系统层次架构
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                    用户空间                                   │
+│    socket() bind() listen() accept() send() recv()          │
+└──────────────────────┬──────────────────────────────────────┘
+                       │ 系统调用
+┌──────────────────────▼──────────────────────────────────────┐
+│                  Socket 层（BSD Socket API）                  │
+│   sock_create → inet_create → tcp_v4_connect ...            │
+└──────────────────────┬──────────────────────────────────────┘
+                       │
+┌──────────────────────▼──────────────────────────────────────┐
+│                 传输层（Transport Layer）                     │
+│   TCP（tcp.c）          UDP（udp.c）                         │
+│   可靠/有序/流式        不可靠/无序/报文                      │
+└──────────────────────┬──────────────────────────────────────┘
+                       │
+┌──────────────────────▼──────────────────────────────────────┐
+│                 网络层（Network Layer）                       │
+│   IP（ip_input.c, ip_output.c）                             │
+│   路由（route.c）    ARP（arp.c）    ICMP（icmp.c）          │
+└──────────────────────┬──────────────────────────────────────┘
+                       │
+┌──────────────────────▼──────────────────────────────────────┐
+│                 链路层（Link Layer）                          │
+│   以太网帧（eth.c）   网络设备接口（dev.c）                   │
+└──────────────────────┬──────────────────────────────────────┘
+                       │
+┌──────────────────────▼──────────────────────────────────────┐
+│                     网卡驱动                                  │
+│   e100 / e1000 / virtio_net ...                             │
+└──────────────────────┬──────────────────────────────────────┘
+                       │ DMA
+                    物理网线
+```
+
+---
+
+## 2. 核心数据结构
+
+### 2.1 sk_buff（套接字缓冲区）
+
+`sk_buff` 是 Linux 网络子系统中最重要的数据结构，
+代表**网络中的一个数据包**（从用户数据到最终的以太网帧）：
+
+```
+sk_buff 内存布局：
+
+  head ──► ┌────────────────────────────────────────┐
+           │      headroom（为协议头预留空间）         │
+  data ──► ├────────────────────────────────────────┤
+           │  以太网头（14字节）  ← 由驱动填充         │
+           │  IP 头（20字节）    ← 由 IP 层填充        │
+           │  TCP 头（20字节）   ← 由 TCP 层填充       │
+           │  用户数据            ← 应用程序提供        │
+  tail ──► ├────────────────────────────────────────┤
+           │      tailroom（为尾部预留空间）           │
+  end  ──► └────────────────────────────────────────┘
+
+关键指针：
+  skb->head: 缓冲区起始
+  skb->data: 当前数据起始（随协议处理向上移动）
+  skb->tail: 数据结束
+  skb->end:  缓冲区结束
+  skb->len:  data 到 tail 的长度（当前数据长度）
+```
+
+```c
+/* include/linux/skbuff.h（精简）*/
+struct sk_buff {
+    struct sk_buff *next, *prev;    /* 链表 */
+    struct sock *sk;                /* 关联的 socket */
+    struct net_device *dev;         /* 网络设备 */
+    
+    unsigned char *head, *data, *tail, *end;
+    unsigned int len;               /* 数据长度 */
+    unsigned int data_len;          /* 分散/聚集 IO 的额外数据 */
+    
+    __u16 protocol;                 /* 协议类型（ETH_P_IP/ETH_P_ARP...）*/
+    __u8  pkt_type;                 /* 包类型（PACKET_HOST/BROADCAST/...）*/
+    
+    /* 协议头指针（在 skb->data 移动时保存各层头部位置）*/
+    union { ... } h;    /* 传输层头（tcp_header/udp_header）*/
+    union { ... } nh;   /* 网络层头（iphdr）*/
+    union { ... } mac;  /* 链路层头（ethhdr）*/
+    
+    struct dst_entry *dst;          /* 路由信息 */
+};
+
+/* sk_buff 操作 API */
+skb_push(skb, len)   /* 在 data 前添加 len 字节（添加协议头）*/
+skb_pull(skb, len)   /* 从 data 移除 len 字节（去掉协议头）*/
+skb_put(skb, len)    /* 在 tail 后添加 len 字节（添加数据）*/
+skb_reserve(skb, len)/* 在 head 处预留 len 字节（为头部预留空间）*/
+```
+
+### 2.2 sock / socket 结构
+
+```
+用户空间 fd    内核空间
+──────────     ─────────────────────────────────────
+    fd  ──►  file ──► socket（VFS 文件）
+                         │
+                         └─► sock（协议相关，如 tcp_sock）
+
+struct socket {             /* BSD socket（VFS 层）*/
+    socket_state state;     /* SS_UNCONNECTED/SS_CONNECTED/... */
+    struct proto_ops *ops;  /* inet_stream_ops / inet_dgram_ops */
+    struct sock *sk;        /* → 协议实现层 */
+    struct file *file;      /* → VFS 文件 */
+};
+
+struct sock {               /* 协议无关的 sock 基类 */
+    __u32 rcv_saddr;        /* 本地 IP */
+    __u16 num;              /* 本地端口 */
+    __u32 daddr;            /* 对端 IP */
+    __u16 dport;            /* 对端端口 */
+    struct sk_buff_head sk_receive_queue;  /* 接收队列 */
+    struct sk_buff_head sk_write_queue;    /* 发送队列 */
+    struct proto *prot;     /* → TCP/UDP 协议操作 */
+    ...
+};
+
+struct tcp_sock {           /* TCP 专有状态 */
+    struct sock sk;         /* ← 必须是第一个字段 */
+    __u32 snd_nxt;          /* 下一个要发送的序号 */
+    __u32 rcv_nxt;          /* 期望收到的下一个序号 */
+    __u32 snd_una;          /* 最后一个未确认的序号 */
+    __u16 mss_cache;        /* 最大段大小（MSS）*/
+    struct tcp_options_received rx_opt;
+    ...
+};
+```
+
+---
+
+## 3. 数据包接收路径（RX Path）
+
+```
+网卡收到数据帧
+      │
+      │ （DMA 将数据写入 ring buffer）
+      ▼
+网卡中断（netif_rx() 或 NAPI: netif_receive_skb()）
+      │
+      ▼
+net/core/dev.c: netif_receive_skb(skb)
+      │
+      ├─ 根据 skb->protocol 分发：
+      │   ETH_P_IP  → ip_rcv()
+      │   ETH_P_ARP → arp_rcv()
+      │   ...
+      ▼
+net/ipv4/ip_input.c: ip_rcv(skb, ...)
+      │
+      ├─ IP 校验和检查
+      ├─ 路由决策（ip_route_input）：
+      │   · 本机目标 → ip_local_deliver()
+      │   · 转发     → ip_forward()
+      ▼
+ip_local_deliver → ip_local_deliver_finish
+      │
+      ├─ 根据 IP 头的 protocol 字段分发：
+      │   IPPROTO_TCP → tcp_v4_rcv()
+      │   IPPROTO_UDP → udp_rcv()
+      │   IPPROTO_ICMP→ icmp_rcv()
+      ▼
+net/ipv4/tcp_ipv4.c: tcp_v4_rcv(skb)
+      │
+      ├─ 查找 socket（根据 src_ip:src_port:dst_ip:dst_port）
+      ├─ 调用 TCP 状态机处理（tcp_rcv_state_process）
+      ├─ 将数据放入 socket 接收队列（sk_receive_queue）
+      └─ 唤醒等待数据的进程（sk->sk_data_ready）
+              │
+              ▼
+用户进程从 recv() 系统调用中醒来，读取数据
+```
+
+---
+
+## 4. 数据包发送路径（TX Path）
+
+```
+用户程序调用 send(fd, buf, len, 0)
+      │
+      ▼
+sys_send → sys_sendto → sock_sendmsg
+      │
+      ▼
+inet_sendmsg → tcp_sendmsg
+      │
+      ├─ 将用户数据拷贝到 sk_buff（sk_stream_alloc_skb）
+      ├─ 更新 TCP 序号（snd_nxt）
+      ├─ 放入发送队列（sk->sk_write_queue）
+      └─ 调用 tcp_push_one() 或 tcp_write_xmit()
+              │
+              ▼
+tcp_transmit_skb(sk, skb, ...)
+      │
+      ├─ 构建 TCP 头（填充序号/确认号/标志）
+      └─ 调用 ip_queue_xmit()
+              │
+              ▼
+net/ipv4/ip_output.c: ip_queue_xmit
+      │
+      ├─ 路由查找（ip_route_output）
+      ├─ 构建 IP 头（填充 src_ip/dst_ip/ttl）
+      └─ ip_output → ip_finish_output
+              │
+              ▼
+dev_queue_xmit(skb)
+      │
+      ├─ 流量控制（qdisc）
+      └─ dev->hard_start_xmit()  ← 调用网卡驱动发送
+              │
+              ▼
+网卡硬件发送数据帧（DMA → 物理网线）
+```
+
+---
+
+## 5. TCP 三次握手（内核视角）
+
+```
+客户端                                    服务端（已 listen）
+─────                                     ─────────────────
+connect()                                 已调用 listen()
+
+tcp_v4_connect()                          (等待中)
+  sk->state = TCP_SYN_SENT
+  发送 SYN 包 ──────────────────────────►
+                                          tcp_v4_rcv()
+                                          tcp_rcv_state_process()
+                                            SYN: sk->state = TCP_SYN_RECV
+                                          ◄────────────── 发送 SYN+ACK
+
+tcp_rcv_state_process()
+  SYN+ACK: sk->state = TCP_ESTABLISHED
+  发送 ACK ──────────────────────────────►
+                                          tcp_rcv_state_process()
+                                            ACK: sk->state = TCP_ESTABLISHED
+                                            将连接移到 accept 队列
+                                            唤醒 accept() 中的进程
+
+连接建立完成！
+send() / recv() 可以开始工作
+```
+
+---
+
+## 6. Socket 编程与内核对应关系
+
+```c
+/* 用户程序                           内核函数 */
+socket(AF_INET, SOCK_STREAM, 0)  → sock_create → inet_create → tcp_v4_init_sock
+bind(sockfd, &addr, ...)         → inet_bind
+listen(sockfd, backlog)          → inet_listen → tcp_listen_start
+accept(sockfd, ...)              → inet_accept → inet_csk_accept（阻塞等待）
+connect(sockfd, &server_addr, .) → tcp_v4_connect → 发送 SYN
+
+send(sockfd, buf, len, 0)        → tcp_sendmsg
+recv(sockfd, buf, len, 0)        → tcp_recvmsg（阻塞等待数据）
+
+close(sockfd)                    → tcp_close → 发送 FIN
+```
+
+---
+
+## 7. 实验
+
+```bash
+# 跟踪 TCP 连接建立（使用 ss）
+ss -tn state established
+
+# 查看路由表
+ip route show
+cat /proc/net/route
+
+# 查看 socket 统计
+cat /proc/net/sockstat
+cat /proc/net/tcp    # 所有 TCP 连接
+
+# 用 tcpdump 抓包（同时观察内核行为）
+sudo tcpdump -i eth0 -n tcp port 80
+
+# GDB 跟踪 TCP 状态机（在 2.6.0 内核）
+(gdb) break tcp_rcv_state_process
+(gdb) commands
+> printf "TCP state: %d\n", sk->sk_state
+> continue
+> end
+```
+
+---
+
+## 8. 网络性能优化关键点
+
+| 优化技术 | 引入版本 | 说明 |
+|---------|---------|------|
+| NAPI | 2.4.20+ | 轮询 + 中断混合，减少中断开销 |
+| TCP Offload (TSO/GSO) | 2.6.18+ | 大包分片由硬件完成 |
+| Zero Copy (sendfile) | 2.2 | 跳过用户空间拷贝 |
+| epoll | 2.5.44 | O(1) IO 多路复用 |
+| RSS/RPS | 2.6.35+ | 多队列网卡，多核并行接收 |
+| XDP/eBPF | 4.8+ | 在驱动层直接处理包，旁路协议栈 |
diff --git "a/09-\345\220\214\346\255\245\346\234\272\345\210\266/README.md" "b/09-\345\220\214\346\255\245\346\234\272\345\210\266/README.md"
new file mode 100644
index 0000000..ce08e3d
--- /dev/null
+++ "b/09-\345\220\214\346\255\245\346\234\272\345\210\266/README.md"
@@ -0,0 +1,361 @@
+# 09 — 同步机制
+
+> 多处理器（SMP）环境下，多个 CPU 可能同时访问共享数据。
+> 本章从**为什么需要同步 → 各种锁的实现原理 → 使用场景**，
+> 对照 Linux 0.11（单处理器，禁中断）与 Linux 2.6.0（SMP，丰富锁原语）拆解。
+
+---
+
+## 1. 并发场景与竞争条件
+
+```
+场景：两个 CPU 同时对计数器 count++ 操作
+
+CPU0                    CPU1
+────                    ────
+读 count = 5
+                        读 count = 5
+count + 1 = 6
+                        count + 1 = 6
+写 count = 6
+                        写 count = 6    ← 应该是 7，实际是 6！
+
+根本原因：count++ 不是原子操作
+  汇编展开为：
+    MOV eax, [count]   ; 读
+    ADD eax, 1         ; 加
+    MOV [count], eax   ; 写
+  三条指令可被中断/并发破坏
+```
+
+---
+
+## 2. Linux 0.11：关中断（单核时代）
+
+```c
+/* 单核系统：只需关闭中断即可防止并发 */
+
+#define cli() __asm__ ("cli"::)   /* 关中断 */
+#define sti() __asm__ ("sti"::)   /* 开中断 */
+
+/* 典型用法 */
+void foo(void)
+{
+    cli();          /* 关中断 → 不会被抢占 */
+    /* ... 访问共享数据 ... */
+    sti();          /* 开中断 */
+}
+```
+
+**局限**：SMP（多核）系统中，关中断只能防止**本 CPU** 的中断，
+无法防止**其他 CPU** 的并发访问。
+
+---
+
+## 3. 原子操作（Atomic Operations）
+
+原子操作由 CPU 硬件保证，不需要锁：
+
+```c
+/* include/asm-i386/atomic.h — Linux 2.6.0 */
+typedef struct { volatile int counter; } atomic_t;
+
+/* 原子读写 */
+#define atomic_read(v)     ((v)->counter)
+#define atomic_set(v, i)   (((v)->counter) = (i))
+
+/* 原子加减 */
+static inline void atomic_add(int i, atomic_t *v)
+{
+    __asm__ __volatile__(
+        LOCK "addl %1,%0"     /* LOCK 前缀：总线锁定，多核安全 */
+        :"=m" (v->counter)
+        :"ir" (i), "m" (v->counter));
+}
+
+static inline int atomic_inc_and_test(atomic_t *v)
+{
+    unsigned char c;
+    __asm__ __volatile__(
+        LOCK "incl %0; sete %1"
+        :"=m" (v->counter), "=qm" (c)
+        :"m" (v->counter) : "memory");
+    return c != 0;
+}
+
+/* 使用场景：引用计数 */
+atomic_t ref_count = ATOMIC_INIT(1);
+atomic_inc(&ref_count);           /* 增加引用 */
+if (atomic_dec_and_test(&ref_count))  /* 减少引用，若为0则释放 */
+    kfree(object);
+```
+
+**`LOCK` 前缀的作用**：
+
+```
+单核：LOCK 为空（不需要）
+SMP：  LOCK → 在总线事务期间锁定内存总线
+              （或 cache line 锁，更现代的实现）
+```
+
+---
+
+## 4. 自旋锁（Spinlock）
+
+### 原理
+
+```
+获取锁失败时，不休眠，而是"自旋"（busy-wait）不断重试：
+
+      尝试获取锁
+      ┌──────────────────┐
+      │ locked == 0?     │
+      │ 是 → 设为1，返回 │
+      │ 否 → 继续等待    │
+      └──────────────────┘
+           ↑  ↓（自旋）
+           └──┘
+      ← 其他 CPU 释放锁 →
+```
+
+### Linux 2.6.0 实现
+
+```c
+/* include/asm-i386/spinlock.h */
+typedef struct {
+    volatile unsigned int lock;
+} spinlock_t;
+
+#define SPIN_LOCK_UNLOCKED (spinlock_t) { 1 }
+
+static inline void spin_lock(spinlock_t *lock)
+{
+    __asm__ __volatile__(
+        spin_lock_string    /* 原子比较并交换，或 xchg */
+        :"=m" (lock->lock) : : "memory");
+}
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+    __asm__ __volatile__(
+        spin_unlock_string
+        :"=m" (lock->lock) : : "memory");
+}
+```
+
+**x86 自旋锁的本质（test-and-set）**：
+
+```asm
+/* 获取锁：原子地将 lock 置 0（0=locked, 1=unlocked）*/
+spin_lock:
+    lock decb (%eax)    ; 原子减1
+    jns spin_acquired   ; 若结果 >= 0（原来 > 0），获取成功
+spin_retry:
+    pause               ; 提示 CPU 在自旋（节省电力，优化超线程）
+    cmpb $0, (%eax)    ; 检查锁是否释放
+    jle spin_retry      ; 若仍锁定，继续等待
+    lock decb (%eax)
+    jns spin_acquired
+    jmp spin_retry
+spin_acquired:
+    ret
+
+/* 释放锁 */
+spin_unlock:
+    movb $1, (%eax)     ; 写 1 表示解锁（不需要 LOCK 前缀，store 已是原子）
+    ret
+```
+
+### 使用规则
+
+```c
+spinlock_t my_lock = SPIN_LOCK_UNLOCKED;
+
+/* 中断上下文安全版本（禁止本 CPU 中断）*/
+unsigned long flags;
+spin_lock_irqsave(&my_lock, flags);
+/* ... 临界区 ... */
+spin_unlock_irqrestore(&my_lock, flags);
+
+/* 普通版本（不禁中断，仅用于非中断上下文）*/
+spin_lock(&my_lock);
+/* ... 临界区 ... */
+spin_unlock(&my_lock);
+```
+
+**适用场景**：临界区**极短**（几条指令），不能休眠（中断上下文）。
+
+---
+
+## 5. 互斥量（Mutex / Semaphore）
+
+### 信号量（Semaphore）
+
+```c
+/* include/asm-i386/semaphore.h */
+struct semaphore {
+    atomic_t count;           /* 信号量计数 */
+    int sleepers;             /* 等待中的进程数 */
+    wait_queue_head_t wait;   /* 等待队列 */
+};
+
+/* 初始化为互斥量（count=1）*/
+static DECLARE_MUTEX(my_mutex);  /* count = 1 */
+
+/* 获取（P 操作）：count-- */
+void down(struct semaphore *sem)
+{
+    /* 如果 count > 0，成功减1返回 */
+    /* 如果 count == 0，将进程加入等待队列，休眠 */
+}
+
+/* 释放（V 操作）：count++ */
+void up(struct semaphore *sem)
+{
+    /* count++ */
+    /* 如果有等待的进程，唤醒一个 */
+}
+```
+
+**与自旋锁的区别**：
+
+```
+             自旋锁                互斥量（信号量）
+等待方式     忙等（自旋）           休眠（让出 CPU）
+适用场景     临界区极短，中断上下文  临界区可能较长，进程上下文
+开销         自旋期间浪费 CPU        上下文切换开销
+可以休眠?    否                      是
+```
+
+### 读写信号量（rwsem）
+
+```c
+struct rw_semaphore rwsem = __RWSEM_INITIALIZER(rwsem);
+
+/* 多个读者可以同时持有 */
+down_read(&rwsem);
+/* ... 读操作 ... */
+up_read(&rwsem);
+
+/* 写者需要独占 */
+down_write(&rwsem);
+/* ... 写操作 ... */
+up_write(&rwsem);
+```
+
+---
+
+## 6. RCU（Read-Copy-Update）
+
+RCU 是 Linux 2.6.0 中引入的一种**无锁**并发技术，
+专门为**读多写少**场景优化：
+
+### 核心思想
+
+```
+RCU 的核心约束：
+  · 读者：绝对不阻塞（也不需要加任何锁）
+  · 写者：修改时创建副本，不影响正在读的读者
+
+读者                 写者
+──────               ──────
+rcu_read_lock()      old_ptr = rcu_dereference(global_ptr)
+data = rcu_dereference(ptr)  new_ptr = kmalloc(...)
+/* 使用 data，不加锁 */      /* 修改 new_ptr */
+rcu_read_unlock()    rcu_assign_pointer(global_ptr, new_ptr)
+                     /* 等待所有读者完成当前临界区 */
+                     synchronize_rcu()   ← 等待"宽限期"过去
+                     kfree(old_ptr)      ← 安全释放旧数据
+
+宽限期（Grace Period）：
+  等待所有 CPU 都经历过一次上下文切换
+  此后可保证没有读者还在使用旧数据
+```
+
+### 内核使用示例
+
+```c
+/* 链表的 RCU 遍历（无锁读）*/
+rcu_read_lock();
+list_for_each_entry_rcu(entry, &my_list, list) {
+    /* 安全读取 entry，无锁 */
+    do_something(entry);
+}
+rcu_read_unlock();
+
+/* 安全删除链表元素（写者）*/
+spin_lock(&list_lock);
+list_del_rcu(&entry->list);
+spin_unlock(&list_lock);
+synchronize_rcu();    /* 等待所有读者完成 */
+kfree(entry);         /* 现在可以安全释放 */
+```
+
+---
+
+## 7. 死锁与调试
+
+### 常见死锁场景
+
+```
+场景一：嵌套加锁顺序不一致
+
+CPU0                    CPU1
+────                    ────
+lock(A)                 lock(B)
+lock(B)  ← 等待B        lock(A)  ← 等待A
+   ↑                         ↑
+   └─────── 死锁! ────────────┘
+
+避免方法：所有地方按相同顺序加锁（A 总是先于 B）
+
+场景二：中断上下文与进程上下文
+
+进程：spin_lock(&lock)
+    此时发生中断
+中断处理：spin_lock(&lock)  ← 自旋等待，但进程永远无法释放锁！
+
+避免方法：进程中使用 spin_lock_irqsave()
+```
+
+### 调试工具
+
+```bash
+# lockdep：内核内置死锁检测器
+# 编译时开启：CONFIG_PROVE_LOCKING=y
+
+# 当检测到潜在死锁时，内核打印警告：
+# [ BUG: possible circular locking dependency detected ]
+
+# ftrace：跟踪锁获取/释放
+echo function > /sys/kernel/debug/tracing/current_tracer
+echo spin_lock > /sys/kernel/debug/tracing/set_ftrace_filter
+cat /sys/kernel/debug/tracing/trace
+
+# perf：分析锁竞争热点
+perf lock record ls
+perf lock report
+```
+
+---
+
+## 8. 各同步机制对比总结
+
+```
+┌─────────────────┬──────────────┬─────────────┬──────────────────┐
+│   机制           │  读性能      │  写性能      │  适用场景         │
+├─────────────────┼──────────────┼─────────────┼──────────────────┤
+│ 关中断（0.11）   │  最快        │  最快        │  单核，短临界区   │
+│ 原子操作        │  快          │  快          │  单变量计数/标志  │
+│ 自旋锁          │  快（无竞争）│  快（无竞争）│  短临界区，中断上下文│
+│ 互斥量/信号量   │  慢（可休眠）│  慢（可休眠）│  长临界区，进程上下文│
+│ 读写锁（rwlock）│  快（并发读）│  慢（独占写）│  读多写少         │
+│ RCU             │  极快（无锁）│  中（写+等待）│  读极多写极少    │
+└─────────────────┴──────────────┴─────────────┴──────────────────┘
+```
+
+> **选择原则**：
+> 1. 能用原子操作就不用锁
+> 2. 中断上下文用自旋锁
+> 3. 进程上下文且临界区短用自旋锁，长用互斥量
+> 4. 读远多于写用 RCU 或读写锁
diff --git a/README.md b/README.md
index 5398a40..5681c9f 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,85 @@
-# how_to_learn_linux
\ No newline at end of file
+# 🐧 如何学习 Linux 内核 — 系统性学习指南
+
+> **目标**：通过阅读经典内核版本（Linux 0.11 & Linux 2.6.0）的源码，
+> 从零开始理解现代 Linux 操作系统的核心框架与设计哲学。
+
+---
+
+## 📚 目录
+
+| # | 章节 | 核心内容 |
+|---|------|----------|
+| [00](./00-学习路线/README.md) | **学习路线** | 阶段规划、时间表、推荐资源 |
+| [01](./01-经典版本选择/README.md) | **经典版本选择** | 0.11 / 2.6.0 / 4.x 版本对比 |
+| [02](./02-环境搭建/README.md) | **环境搭建** | QEMU + GDB 调试环境、源码编译 |
+| [03](./03-进程管理/README.md) | **进程管理** | 进程模型、调度器、上下文切换 |
+| [04](./04-内存管理/README.md) | **内存管理** | 虚拟内存、页表、Slab 分配器 |
+| [05](./05-文件系统/README.md) | **文件系统** | VFS、ext2、inode、dentry |
+| [06](./06-系统调用/README.md) | **系统调用** | 中断表、syscall 入口与返回 |
+| [07](./07-设备驱动/README.md) | **设备驱动** | 驱动模型、字符设备、块设备 |
+| [08](./08-网络子系统/README.md) | **网络子系统** | Socket、TCP/IP 协议栈 |
+| [09](./09-同步机制/README.md) | **同步机制** | 自旋锁、互斥量、RCU |
+
+---
+
+## 🗺️ 总体架构一览
+
+```
+┌─────────────────────────────────────────────────────────┐
+│                   用户空间 (User Space)                   │
+│  应用程序  Shell  libc  系统工具  ……                       │
+└──────────────────────┬──────────────────────────────────┘
+                       │  系统调用接口 (syscall)
+┌──────────────────────▼──────────────────────────────────┐
+│                   内核空间 (Kernel Space)                  │
+│                                                           │
+│  ┌──────────┐  ┌──────────┐  ┌──────────┐  ┌─────────┐  │
+│  │ 进程管理  │  │ 内存管理  │  │ 文件系统  │  │ 网络栈  │  │
+│  │ scheduler│  │  VMM/MM  │  │   VFS    │  │TCP/IP   │  │
+│  └────┬─────┘  └────┬─────┘  └────┬─────┘  └────┬────┘  │
+│       │              │              │              │       │
+│  ┌────▼──────────────▼──────────────▼──────────────▼───┐  │
+│  │              设备驱动 & 硬件抽象层 (HAL)               │  │
+│  └──────────────────────────────────────────────────────┘  │
+└──────────────────────┬──────────────────────────────────┘
+                       │
+┌──────────────────────▼──────────────────────────────────┐
+│                     硬件 (Hardware)                       │
+│   CPU   内存   磁盘   网卡   键盘/鼠标   ……                 │
+└─────────────────────────────────────────────────────────┘
+```
+
+---
+
+## 🎯 学习目标
+
+完成本指南后，你将能够：
+
+1. **读懂** Linux 内核源码中任意子系统的核心逻辑
+2. **理解** 进程调度、虚拟内存、文件系统等核心机制
+3. **调试** 内核模块，用 GDB + QEMU 单步跟踪内核执行流
+4. **对比** Linux 0.11 与现代内核的演进路径
+5. **编写** 简单的内核模块与字符设备驱动
+
+---
+
+## 📖 推荐书单
+
+| 书名 | 作者 | 适用阶段 |
+|------|------|---------|
+| 《Linux内核完全注释》| 赵炯 | 入门（基于 0.11）|
+| 《深入理解Linux内核》| Bovet & Cesati | 进阶（基于 2.6）|
+| 《Linux设备驱动程序》| Corbet 等 | 驱动开发 |
+| 《Linux内核设计与实现》| Robert Love | 综合理解 |
+| 《深入Linux内核架构》| Mauerer | 深度参考 |
+| 《操作系统：精髓与设计原理》| Stallings | 理论基础 |
+
+---
+
+## 🔗 重要资源
+
+- 源码在线浏览：<https://elixir.bootlin.com/linux>
+- Linux 0.11 源码：<https://github.com/karottc/linux-0.11>
+- Linux 2.6.0 源码：<https://mirrors.edge.kernel.org/pub/linux/kernel/v2.6/linux-2.6.0.tar.gz>
+- 官方文档：<https://www.kernel.org/doc/html/latest/>
+- LKML 邮件列表：<https://lkml.org/>

From 1b0b9a4fbfda9ad0e2a45ecbb8ee3a1a2547dc34 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 20 May 2026 09:36:47 +0000
Subject: [PATCH 03/10] feat: add HTML site scaffold, CSS, SVG diagrams, and
 first chapter

Agent-Logs-Url: https://github.com/YYCB/how_to_learn_linux/sessions/1cb19491-1fd8-4418-9a44-f972d8161633

Co-authored-by: YYCB <23326150+YYCB@users.noreply.github.com>
---
 .../index.html"                               | 310 ++++++++++++
 assets/_nav.html                              |  33 ++
 assets/diagrams/arch-overview.svg             | 104 ++++
 assets/diagrams/cfs-rbtree.svg                | 102 ++++
 assets/diagrams/container-anatomy.svg         | 107 ++++
 assets/diagrams/page-table.svg                | 109 +++++
 assets/diagrams/syscall-flow.svg              |  62 +++
 assets/diagrams/tcp-handshake.svg             | 102 ++++
 assets/diagrams/vfs-objects.svg               | 106 ++++
 assets/diagrams/vm-layout.svg                 | 101 ++++
 assets/style.css                              | 456 ++++++++++++++++++
 index.html                                    | 227 +++++++++
 12 files changed, 1819 insertions(+)
 create mode 100644 "00-\345\255\246\344\271\240\350\267\257\347\272\277/index.html"
 create mode 100644 assets/_nav.html
 create mode 100644 assets/diagrams/arch-overview.svg
 create mode 100644 assets/diagrams/cfs-rbtree.svg
 create mode 100644 assets/diagrams/container-anatomy.svg
 create mode 100644 assets/diagrams/page-table.svg
 create mode 100644 assets/diagrams/syscall-flow.svg
 create mode 100644 assets/diagrams/tcp-handshake.svg
 create mode 100644 assets/diagrams/vfs-objects.svg
 create mode 100644 assets/diagrams/vm-layout.svg
 create mode 100644 assets/style.css
 create mode 100644 index.html

diff --git "a/00-\345\255\246\344\271\240\350\267\257\347\272\277/index.html" "b/00-\345\255\246\344\271\240\350\267\257\347\272\277/index.html"
new file mode 100644
index 0000000..f1e9a09
--- /dev/null
+++ "b/00-\345\255\246\344\271\240\350\267\257\347\272\277/index.html"
@@ -0,0 +1,310 @@
+<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>00 · 学习路线 — Linux 内核学习指南</title>
+<link rel="stylesheet" href="../assets/style.css">
+</head>
+<body>
+<div class="layout">
+<aside class="sidebar">
+    <div class="brand">🐧 Linux 内核学习</div>
+    <div class="brand-sub">从 0.11 到 6.x · 专家级路径</div>
+    <nav>
+        <div class="section-title">入门 & 准备</div>
+        <ul>
+            <li><a href="../index.html">🏠 总目录</a></li>
+            <li><a href="../00-学习路线/index.html" class="active">00 · 学习路线</a></li>
+            <li><a href="../01-经典版本选择/index.html">01 · 经典版本选择</a></li>
+            <li><a href="../02-环境搭建/index.html">02 · 环境搭建</a></li>
+        </ul>
+        <div class="section-title">核心子系统</div>
+        <ul>
+            <li><a href="../03-进程管理/index.html">03 · 进程管理</a></li>
+            <li><a href="../04-内存管理/index.html">04 · 内存管理</a></li>
+            <li><a href="../05-文件系统/index.html">05 · 文件系统</a></li>
+            <li><a href="../06-系统调用/index.html">06 · 系统调用</a></li>
+            <li><a href="../07-设备驱动/index.html">07 · 设备驱动</a></li>
+            <li><a href="../08-网络子系统/index.html">08 · 网络子系统</a></li>
+            <li><a href="../09-同步机制/index.html">09 · 同步机制</a></li>
+        </ul>
+        <div class="section-title">专家级深入</div>
+        <ul>
+            <li><a href="../10-CFS调度器/index.html">10 · CFS 调度器</a></li>
+            <li><a href="../11-容器与命名空间/index.html">11 · 容器与命名空间</a></li>
+            <li><a href="../12-eBPF与可观测性/index.html">12 · eBPF 与可观测性</a></li>
+            <li><a href="../13-中断与异常/index.html">13 · 中断与异常</a></li>
+            <li><a href="../14-启动流程深入/index.html">14 · 启动流程深入</a></li>
+            <li><a href="../15-内核调试与性能/index.html">15 · 内核调试与性能</a></li>
+        </ul>
+    </nav>
+</aside>
+<main class="content">
+
+<h1><span class="chapter-num">00</span>学习路线 — 从零到专家</h1>
+
+<p>这是一条经过实战验证的<b>5 阶段路线</b>，覆盖从 C 语言基础到能够提交 LKML patch 的全部技能点。
+每阶段都给出明确的<b>知识检验点</b>，避免"自我感觉良好但其实不会"的陷阱。</p>
+
+<div class="callout tip">
+    <div class="label">本路线的核心信念</div>
+    <p>内核学习的最大误区是<b>"只读不练"</b>。本路线强制要求：每读完一个概念，必须在 QEMU + GDB 中亲自验证。
+    没有动手验证的"理解"都是错觉。</p>
+</div>
+
+<h2 id="overview">总览：5 个阶段</h2>
+
+<div class="diagram">
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 900 220" font-family="-apple-system,sans-serif" font-size="13">
+  <defs>
+    <marker id="aar" viewBox="0 0 10 10" refX="9" refY="5" markerWidth="6" markerHeight="6" orient="auto">
+      <path d="M0,0 L10,5 L0,10 z" fill="#8b949e"/>
+    </marker>
+  </defs>
+  <g>
+    <rect x="20" y="60" width="150" height="100" rx="8" fill="#1a2028" stroke="#58a6ff" stroke-width="2"/>
+    <text x="95" y="84" text-anchor="middle" fill="#58a6ff" font-weight="700">阶段一</text>
+    <text x="95" y="104" text-anchor="middle" fill="#e6edf3" font-size="12">基础准备</text>
+    <text x="95" y="124" text-anchor="middle" fill="#8b949e" font-size="11">4~6 周</text>
+    <text x="95" y="144" text-anchor="middle" fill="#8b949e" font-size="10">C / OS / 工具链</text>
+
+    <line x1="170" y1="110" x2="200" y2="110" stroke="#8b949e" stroke-width="2" marker-end="url(#aar)"/>
+
+    <rect x="200" y="60" width="150" height="100" rx="8" fill="#1a2028" stroke="#ff7b29" stroke-width="2"/>
+    <text x="275" y="84" text-anchor="middle" fill="#ff7b29" font-weight="700">阶段二</text>
+    <text x="275" y="104" text-anchor="middle" fill="#e6edf3" font-size="12">Linux 0.11 精读</text>
+    <text x="275" y="124" text-anchor="middle" fill="#8b949e" font-size="11">8~12 周</text>
+    <text x="275" y="144" text-anchor="middle" fill="#8b949e" font-size="10">14k 行完整读完</text>
+
+    <line x1="350" y1="110" x2="380" y2="110" stroke="#8b949e" stroke-width="2" marker-end="url(#aar)"/>
+
+    <rect x="380" y="60" width="150" height="100" rx="8" fill="#1a2028" stroke="#56d364" stroke-width="2"/>
+    <text x="455" y="84" text-anchor="middle" fill="#56d364" font-weight="700">阶段三</text>
+    <text x="455" y="104" text-anchor="middle" fill="#e6edf3" font-size="12">Linux 2.6 精读</text>
+    <text x="455" y="124" text-anchor="middle" fill="#8b949e" font-size="11">8~12 周</text>
+    <text x="455" y="144" text-anchor="middle" fill="#8b949e" font-size="10">现代框架对照</text>
+
+    <line x1="530" y1="110" x2="560" y2="110" stroke="#8b949e" stroke-width="2" marker-end="url(#aar)"/>
+
+    <rect x="560" y="60" width="150" height="100" rx="8" fill="#1a2028" stroke="#bc8cff" stroke-width="2"/>
+    <text x="635" y="84" text-anchor="middle" fill="#bc8cff" font-weight="700">阶段四</text>
+    <text x="635" y="104" text-anchor="middle" fill="#e6edf3" font-size="12">专题深入</text>
+    <text x="635" y="124" text-anchor="middle" fill="#8b949e" font-size="11">10~16 周</text>
+    <text x="635" y="144" text-anchor="middle" fill="#8b949e" font-size="10">CFS/eBPF/容器</text>
+
+    <line x1="710" y1="110" x2="740" y2="110" stroke="#8b949e" stroke-width="2" marker-end="url(#aar)"/>
+
+    <rect x="740" y="60" width="150" height="100" rx="8" fill="#1a2028" stroke="#e3b341" stroke-width="2"/>
+    <text x="815" y="84" text-anchor="middle" fill="#e3b341" font-weight="700">阶段五</text>
+    <text x="815" y="104" text-anchor="middle" fill="#e6edf3" font-size="12">贡献社区</text>
+    <text x="815" y="124" text-anchor="middle" fill="#8b949e" font-size="11">持续</text>
+    <text x="815" y="144" text-anchor="middle" fill="#8b949e" font-size="10">提交 patch / 维护</text>
+  </g>
+  <text x="450" y="30" text-anchor="middle" font-size="16" font-weight="700" fill="#ff7b29">完整学习路线 (总计约 8~12 个月)</text>
+</svg>
+<div class="caption">5 阶段渐进式学习路线（不含已有 C/OS 基础的快速通道）</div>
+</div>
+
+<h2 id="stage1">阶段一：基础准备（4~6 周）</h2>
+
+<h3>1.1 C 语言深度</h3>
+<p>内核 C 与应用层 C 写法差异巨大，必须掌握以下"内核惯用法"：</p>
+
+<pre class="code-c"><span class="cm">/* 1. container_of — 通过成员指针得到结构体指针（链表精髓） */</span>
+<span class="kw">#define</span> container_of(ptr, type, member) ({          \
+    <span class="kw">const</span> <span class="tp">typeof</span>(((type *)<span class="num">0</span>)->member) *__mptr = (ptr); \
+    (type *)((<span class="kw">char</span> *)__mptr - offsetof(type, member)); })
+
+<span class="cm">/* 2. 多行宏的 do {} while(0) 包裹技巧 */</span>
+<span class="kw">#define</span> SET_FLAG(x, f) <span class="kw">do</span> { (x)->flags |= (f); barrier(); } <span class="kw">while</span>(<span class="num">0</span>)
+
+<span class="cm">/* 3. likely/unlikely — 分支预测提示 */</span>
+<span class="kw">if</span> (likely(p != <span class="kw">NULL</span>)) { ... }     <span class="cm">// 编译器生成顺序流</span>
+<span class="kw">if</span> (unlikely(err == -EINVAL)) { ... } <span class="cm">// 编译器把错误路径搬远</span>
+
+<span class="cm">/* 4. 内联汇编 (GCC AT&T 语法) */</span>
+<span class="kw">static inline</span> <span class="kw">unsigned long</span> arch_local_irq_save(<span class="kw">void</span>) {
+    <span class="kw">unsigned long</span> flags;
+    asm <span class="kw">volatile</span>(<span class="str">"pushfq; popq %0; cli"</span> : <span class="str">"=rm"</span>(flags) :: <span class="str">"memory"</span>);
+    <span class="kw">return</span> flags;
+}
+
+<span class="cm">/* 5. __attribute__ 修饰符 */</span>
+<span class="kw">static</span> <span class="kw">int</span> <span class="fn">__init</span> my_init(<span class="kw">void</span>);   <span class="cm">// 放入 .init 段，启动后释放</span>
+<span class="kw">static</span> <span class="kw">void</span> <span class="fn">__exit</span> my_exit(<span class="kw">void</span>);   <span class="cm">// 仅模块卸载时使用</span>
+<span class="kw">static</span> <span class="kw">int</span> data __read_mostly;     <span class="cm">// 放入"读多"段，cacheline 友好</span>
+</pre>
+
+<div class="callout warn">
+    <div class="label">必须熟练（否则读源码处处碰壁）</div>
+    <ul>
+        <li>指针、函数指针、回调（`file_operations.read` 就是函数指针）</li>
+        <li>位操作 / 位字段 / 掩码</li>
+        <li>预处理宏（`#`、`##`、可变参宏）</li>
+        <li>GCC 扩展：`typeof`、`__builtin_*`、内联汇编</li>
+        <li>链接脚本基础（`vmlinux.lds` 看不懂没关系，但要知道存在）</li>
+    </ul>
+</div>
+
+<h3>1.2 操作系统理论</h3>
+<p>无 OS 基础者必先补这些<b>概念</b>（不必精通，能画出框图即可）：</p>
+
+<table>
+<tr><th>主题</th><th>必懂概念</th><th>检验方法</th></tr>
+<tr><td>进程/线程</td><td>PCB、状态机、上下文切换、线程模型</td><td>用一张图说清楚 fork/exec/wait</td></tr>
+<tr><td>调度</td><td>FIFO、RR、CFS 直觉理解</td><td>解释为什么 CFS 比 O(1) 好</td></tr>
+<tr><td>虚拟内存</td><td>分页、页表、TLB、缺页中断</td><td>画出虚拟地址→物理地址</td></tr>
+<tr><td>并发</td><td>临界区、锁、死锁、原子操作</td><td>写出死锁的 4 个必要条件</td></tr>
+<tr><td>文件系统</td><td>inode、目录、VFS 抽象</td><td>解释一次 read() 全过程</td></tr>
+<tr><td>IO</td><td>阻塞 vs 非阻塞、同步 vs 异步</td><td>区分 select/poll/epoll/io_uring</td></tr>
+<tr><td>中断</td><td>中断向量、上下半部、IRQ</td><td>解释为什么不能在中断里 sleep</td></tr>
+</table>
+
+<h3>1.3 工具链熟悉</h3>
+<p>下面这些工具是"读 + 写 + 调试 + 分析"内核的必备品：</p>
+
+<pre class="code-bash">
+<span class="cm"># 编译与链接</span>
+gcc / clang          <span class="cm"># 编译</span>
+ld                   <span class="cm"># 链接</span>
+objdump -d           <span class="cm"># 反汇编</span>
+readelf -a           <span class="cm"># ELF 结构分析</span>
+nm / addr2line       <span class="cm"># 符号 ↔ 地址转换</span>
+
+<span class="cm"># 调试</span>
+gdb / kgdb           <span class="cm"># 内核态调试</span>
+crash                <span class="cm"># 崩溃 dump 分析</span>
+strace / ltrace      <span class="cm"># 用户态系统调用 / 库调用追踪</span>
+
+<span class="cm"># 性能</span>
+perf record / report <span class="cm"># 性能采样</span>
+ftrace               <span class="cm"># 内核函数追踪</span>
+bpftrace / bcc       <span class="cm"># eBPF 高层工具</span>
+
+<span class="cm"># 构建</span>
+make menuconfig      <span class="cm"># 内核配置 TUI</span>
+make -j$(nproc)      <span class="cm"># 并行构建</span>
+</pre>
+
+<h2 id="stage2">阶段二：Linux 0.11 精读（8~12 周）</h2>
+
+<div class="callout deep">
+    <div class="label">为什么选 0.11 而不是 0.01？</div>
+    <p>0.01 是<b>不完整的</b>实验代码（连 shell 都跑不起来），0.11 是<b>第一个能完整运行用户程序、自我托管编译的版本</b>。
+    13990 行（不含注释），既小到可以完整读完，又大到包含所有 OS 核心概念。</p>
+</div>
+
+<h3>8 周精读计划</h3>
+<table>
+<tr><th>周次</th><th>主题</th><th>对应源文件</th><th>本周末检验：能否……</th></tr>
+<tr><td>第 1 周</td><td>启动流程</td><td>boot/bootsect.s, boot/setup.s, boot/head.s</td><td>解释 BIOS → 实模式 → 保护模式 → C 入口的每一跳？</td></tr>
+<tr><td>第 2 周</td><td>内核初始化</td><td>init/main.c</td><td>列出 start_kernel 中所有子系统的初始化顺序？</td></tr>
+<tr><td>第 3 周</td><td>内存管理</td><td>mm/memory.c, mm/page.s</td><td>从 0 开始画出 0.11 的段页式地址转换？</td></tr>
+<tr><td>第 4 周</td><td>进程与调度</td><td>kernel/fork.c, kernel/sched.c</td><td>把 copy_process 的每一步背下来？</td></tr>
+<tr><td>第 5 周</td><td>系统调用</td><td>kernel/system_call.s, kernel/sys.c</td><td>从 int 0x80 一路追到 sys_write 返回？</td></tr>
+<tr><td>第 6 周</td><td>信号与 IPC</td><td>kernel/signal.c, kernel/exit.c</td><td>说清 zombie 进程是怎么形成又怎么消失的？</td></tr>
+<tr><td>第 7 周</td><td>文件系统</td><td>fs/* 全部</td><td>从打开 /etc/passwd 一路追到磁盘块？</td></tr>
+<tr><td>第 8 周</td><td>设备驱动</td><td>kernel/blk_drv/, kernel/chr_drv/</td><td>解释 tty 行规范怎么处理回车？</td></tr>
+</table>
+
+<h2 id="stage3">阶段三：Linux 2.6 精读（8~12 周）</h2>
+<p>2.6.0 是现代内核框架成型的<b>里程碑版本</b>。读 2.6 时<b>重点关注差异</b>，不要重读已懂的部分。</p>
+
+<h3>0.11 → 2.6 对照学习</h3>
+<div class="compare">
+    <div>
+        <h4 style="color: var(--accent-2)">Linux 0.11 (已读)</h4>
+        <ul>
+            <li>段页式内存 (基址 = pid × 64MB)</li>
+            <li>kernel/sched.c 简单时间片</li>
+            <li>fs/ 直接是 Minix FS</li>
+            <li>无 SMP，关中断即同步</li>
+            <li>固定 64 进程上限</li>
+        </ul>
+    </div>
+    <div>
+        <h4 style="color: var(--accent)">Linux 2.6.0 (重点)</h4>
+        <ul>
+            <li>纯页式 + mm_struct + VMA</li>
+            <li>kernel/sched.c <b>O(1)</b> 调度器</li>
+            <li>fs/ext2 + VFS 抽象层</li>
+            <li>SMP + spinlock/mutex/RCU</li>
+            <li>动态进程表 + namespace 雏形</li>
+        </ul>
+    </div>
+</div>
+
+<h2 id="stage4">阶段四：专家级专题（10~16 周）</h2>
+<p>不再线性阅读，按兴趣专题深入。本指南后半段（10~15 章）即为此阶段服务：</p>
+
+<ul>
+    <li><a href="../10-CFS调度器/index.html">CFS 调度器</a>：vruntime、负载均衡、EAS</li>
+    <li><a href="../11-容器与命名空间/index.html">容器内核机制</a>：namespace、cgroup v2、OverlayFS</li>
+    <li><a href="../12-eBPF与可观测性/index.html">eBPF</a>：现代内核可编程性</li>
+    <li><a href="../13-中断与异常/index.html">中断子系统</a>：IRQ、softirq、threaded IRQ</li>
+    <li><a href="../14-启动流程深入/index.html">启动流程深入</a>：UEFI、KASLR、ACPI</li>
+    <li><a href="../15-内核调试与性能/index.html">调试与性能</a>：ftrace、perf、KASAN</li>
+</ul>
+
+<h2 id="stage5">阶段五：贡献社区</h2>
+
+<h3>5.1 找一个突破口</h3>
+<p>不要一上来就想"修复某个大 bug"。从这些低门槛切入：</p>
+<ul>
+    <li><b>kernelnewbies.org / KernelJanitors</b>：有专门的入门 issue 列表</li>
+    <li><b>修复 checkpatch.pl 警告</b>：纯文本编辑，提交容易</li>
+    <li><b>文档错别字 / 过时信息</b>：低风险</li>
+    <li><b>把过时的 API 调用迁移到新 API</b>：机械工作，欢迎度高</li>
+</ul>
+
+<h3>5.2 邮件 patch 流程</h3>
+<pre class="code-bash">
+<span class="cm"># 1. 配置 git</span>
+git config --global user.name <span class="str">"Your Name"</span>
+git config --global user.email <span class="str">"you@example.com"</span>
+git config --global sendemail.smtpencryption tls
+git config --global sendemail.smtpserver smtp.gmail.com
+git config --global sendemail.smtpuser <span class="str">"you@gmail.com"</span>
+
+<span class="cm"># 2. 创建 patch</span>
+git format-patch -1 HEAD --subject-prefix=<span class="str">"PATCH"</span>
+
+<span class="cm"># 3. 找该子系统的 maintainer 和邮件列表</span>
+./scripts/get_maintainer.pl 0001-my-patch.patch
+
+<span class="cm"># 4. 发送</span>
+git send-email --to=maintainer@kernel.org \
+               --cc=linux-kernel@vger.kernel.org \
+               0001-my-patch.patch
+</pre>
+
+<h2 id="daily">每日学习节奏（推荐）</h2>
+<div class="callout tip">
+    <div class="label">2~3 小时/天的高效安排</div>
+    <ol>
+        <li><b>0:00 ~ 0:20</b> — 复习昨天笔记，3 分钟内能讲出"昨天学了啥"</li>
+        <li><b>0:20 ~ 1:30</b> — 精读源码（<b>不超过 200 行/天</b>，多了消化不动）</li>
+        <li><b>1:30 ~ 2:00</b> — 画图：当天阅读涉及的数据结构、流程图</li>
+        <li><b>2:00 ~ 2:30</b> — 在 QEMU 中验证（设断点、打印变量）</li>
+        <li><b>2:30 ~ 3:00</b> — 整理笔记（用自己的话复述）</li>
+    </ol>
+</div>
+
+<div class="callout danger">
+    <div class="label">三大常见误区</div>
+    <ol>
+        <li><b>"只读不练"</b>：读完 100 章不如调试明白一个函数</li>
+        <li><b>"追新版"</b>：直接读 6.x 内核 90% 概率挫败放弃，从 0.11 开始</li>
+        <li><b>"东学西学"</b>：今天 fork 明天 TCP 后天驱动，必须按子系统专精</li>
+    </ol>
+</div>
+
+<footer class="page-footer">
+    <p>← <a href="../index.html">总目录</a> · 下一章：<a href="../01-经典版本选择/index.html">01 经典版本选择 →</a></p>
+</footer>
+
+</main>
+</div>
+</body>
+</html>
diff --git a/assets/_nav.html b/assets/_nav.html
new file mode 100644
index 0000000..7d439c2
--- /dev/null
+++ b/assets/_nav.html
@@ -0,0 +1,33 @@
+<!-- Shared sidebar navigation. Copy-paste in each chapter HTML. -->
+<aside class="sidebar">
+    <div class="brand">🐧 Linux 内核学习</div>
+    <div class="brand-sub">从 0.11 到 6.x · 专家级路径</div>
+    <nav>
+        <div class="section-title">入门 & 准备</div>
+        <ul>
+            <li><a href="../index.html">🏠 总目录</a></li>
+            <li><a href="../00-学习路线/index.html">00 · 学习路线</a></li>
+            <li><a href="../01-经典版本选择/index.html">01 · 经典版本选择</a></li>
+            <li><a href="../02-环境搭建/index.html">02 · 环境搭建</a></li>
+        </ul>
+        <div class="section-title">核心子系统</div>
+        <ul>
+            <li><a href="../03-进程管理/index.html">03 · 进程管理</a></li>
+            <li><a href="../04-内存管理/index.html">04 · 内存管理</a></li>
+            <li><a href="../05-文件系统/index.html">05 · 文件系统</a></li>
+            <li><a href="../06-系统调用/index.html">06 · 系统调用</a></li>
+            <li><a href="../07-设备驱动/index.html">07 · 设备驱动</a></li>
+            <li><a href="../08-网络子系统/index.html">08 · 网络子系统</a></li>
+            <li><a href="../09-同步机制/index.html">09 · 同步机制</a></li>
+        </ul>
+        <div class="section-title">专家级深入</div>
+        <ul>
+            <li><a href="../10-CFS调度器/index.html">10 · CFS 调度器</a></li>
+            <li><a href="../11-容器与命名空间/index.html">11 · 容器与命名空间</a></li>
+            <li><a href="../12-eBPF与可观测性/index.html">12 · eBPF 与可观测性</a></li>
+            <li><a href="../13-中断与异常/index.html">13 · 中断与异常</a></li>
+            <li><a href="../14-启动流程深入/index.html">14 · 启动流程深入</a></li>
+            <li><a href="../15-内核调试与性能/index.html">15 · 内核调试与性能</a></li>
+        </ul>
+    </nav>
+</aside>
diff --git a/assets/diagrams/arch-overview.svg b/assets/diagrams/arch-overview.svg
new file mode 100644
index 0000000..81efa31
--- /dev/null
+++ b/assets/diagrams/arch-overview.svg
@@ -0,0 +1,104 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 900 520" font-family="-apple-system, sans-serif" font-size="13">
+  <defs>
+    <linearGradient id="userGrad" x1="0" x2="0" y1="0" y2="1">
+      <stop offset="0" stop-color="#58a6ff" stop-opacity="0.25"/>
+      <stop offset="1" stop-color="#58a6ff" stop-opacity="0.08"/>
+    </linearGradient>
+    <linearGradient id="kernGrad" x1="0" x2="0" y1="0" y2="1">
+      <stop offset="0" stop-color="#ff7b29" stop-opacity="0.25"/>
+      <stop offset="1" stop-color="#ff7b29" stop-opacity="0.08"/>
+    </linearGradient>
+    <linearGradient id="hwGrad" x1="0" x2="0" y1="0" y2="1">
+      <stop offset="0" stop-color="#bc8cff" stop-opacity="0.25"/>
+      <stop offset="1" stop-color="#bc8cff" stop-opacity="0.08"/>
+    </linearGradient>
+    <marker id="arr" viewBox="0 0 10 10" refX="9" refY="5" markerWidth="6" markerHeight="6" orient="auto">
+      <path d="M0,0 L10,5 L0,10 z" fill="#8b949e"/>
+    </marker>
+  </defs>
+
+  <!-- User Space -->
+  <rect x="20" y="20" width="860" height="90" rx="8" fill="url(#userGrad)" stroke="#58a6ff" stroke-width="1.5"/>
+  <text x="40" y="48" font-weight="700" fill="#58a6ff" font-size="16">用户空间 (User Space) — Ring 3</text>
+  <g font-size="12" fill="#e6edf3">
+    <rect x="50"  y="62" width="120" height="34" rx="4" fill="#1a2028" stroke="#30363d"/>
+    <text x="110" y="83" text-anchor="middle">应用程序</text>
+    <rect x="180" y="62" width="120" height="34" rx="4" fill="#1a2028" stroke="#30363d"/>
+    <text x="240" y="83" text-anchor="middle">Shell (bash)</text>
+    <rect x="310" y="62" width="120" height="34" rx="4" fill="#1a2028" stroke="#30363d"/>
+    <text x="370" y="83" text-anchor="middle">glibc / musl</text>
+    <rect x="440" y="62" width="160" height="34" rx="4" fill="#1a2028" stroke="#30363d"/>
+    <text x="520" y="83" text-anchor="middle">系统工具 (coreutils)</text>
+    <rect x="610" y="62" width="240" height="34" rx="4" fill="#1a2028" stroke="#30363d"/>
+    <text x="730" y="83" text-anchor="middle">服务进程 / 容器运行时</text>
+  </g>
+
+  <!-- syscall arrow -->
+  <line x1="450" y1="110" x2="450" y2="138" stroke="#8b949e" stroke-width="2" marker-end="url(#arr)"/>
+  <text x="460" y="128" fill="#8b949e">系统调用 (int 0x80 / syscall / sysenter)</text>
+
+  <!-- Kernel Space -->
+  <rect x="20" y="138" width="860" height="270" rx="8" fill="url(#kernGrad)" stroke="#ff7b29" stroke-width="1.5"/>
+  <text x="40" y="166" font-weight="700" fill="#ff7b29" font-size="16">内核空间 (Kernel Space) — Ring 0</text>
+
+  <!-- Core Subsystems -->
+  <g font-size="12" fill="#e6edf3">
+    <rect x="40"  y="184" width="160" height="80" rx="6" fill="#1a2028" stroke="#ff7b29" stroke-width="1.5"/>
+    <text x="120" y="207" text-anchor="middle" font-weight="700" fill="#ff7b29">进程管理</text>
+    <text x="120" y="226" text-anchor="middle" fill="#8b949e" font-size="11">scheduler/fork/exit</text>
+    <text x="120" y="244" text-anchor="middle" fill="#8b949e" font-size="11">CFS / RT / Deadline</text>
+
+    <rect x="220" y="184" width="160" height="80" rx="6" fill="#1a2028" stroke="#ff7b29" stroke-width="1.5"/>
+    <text x="300" y="207" text-anchor="middle" font-weight="700" fill="#ff7b29">内存管理</text>
+    <text x="300" y="226" text-anchor="middle" fill="#8b949e" font-size="11">buddy / slab / vmalloc</text>
+    <text x="300" y="244" text-anchor="middle" fill="#8b949e" font-size="11">page cache / NUMA</text>
+
+    <rect x="400" y="184" width="160" height="80" rx="6" fill="#1a2028" stroke="#ff7b29" stroke-width="1.5"/>
+    <text x="480" y="207" text-anchor="middle" font-weight="700" fill="#ff7b29">VFS / 文件系统</text>
+    <text x="480" y="226" text-anchor="middle" fill="#8b949e" font-size="11">ext4 / btrfs / xfs</text>
+    <text x="480" y="244" text-anchor="middle" fill="#8b949e" font-size="11">tmpfs / procfs / sysfs</text>
+
+    <rect x="580" y="184" width="160" height="80" rx="6" fill="#1a2028" stroke="#ff7b29" stroke-width="1.5"/>
+    <text x="660" y="207" text-anchor="middle" font-weight="700" fill="#ff7b29">网络栈</text>
+    <text x="660" y="226" text-anchor="middle" fill="#8b949e" font-size="11">TCP/UDP/IP/ARP</text>
+    <text x="660" y="244" text-anchor="middle" fill="#8b949e" font-size="11">netfilter / XDP / eBPF</text>
+
+    <rect x="760" y="184" width="100" height="80" rx="6" fill="#1a2028" stroke="#ff7b29" stroke-width="1.5"/>
+    <text x="810" y="207" text-anchor="middle" font-weight="700" fill="#ff7b29">IPC</text>
+    <text x="810" y="226" text-anchor="middle" fill="#8b949e" font-size="11">signals/pipes</text>
+    <text x="810" y="244" text-anchor="middle" fill="#8b949e" font-size="11">shm/msgq/sem</text>
+  </g>
+
+  <!-- Driver Layer -->
+  <rect x="40" y="284" width="820" height="50" rx="6" fill="#1a2028" stroke="#56d364" stroke-width="1.5"/>
+  <text x="450" y="304" text-anchor="middle" font-weight="700" fill="#56d364">设备驱动 (Device Drivers) — 字符 · 块 · 网络</text>
+  <text x="450" y="322" text-anchor="middle" fill="#8b949e" font-size="11">platform · PCI · USB · I²C · SPI · GPIO · DMA · IRQ subsystem</text>
+
+  <!-- HAL / Arch -->
+  <rect x="40" y="350" width="820" height="42" rx="6" fill="#1a2028" stroke="#bc8cff" stroke-width="1.5"/>
+  <text x="450" y="372" text-anchor="middle" font-weight="700" fill="#bc8cff">架构相关 (arch/) · 内存管理硬件 · 中断控制器 · 时钟源 · 原子指令</text>
+
+  <!-- Hardware boundary -->
+  <line x1="450" y1="408" x2="450" y2="436" stroke="#8b949e" stroke-width="2" marker-end="url(#arr)"/>
+  <text x="460" y="426" fill="#8b949e">驱动 → 寄存器/MMIO/中断</text>
+
+  <!-- Hardware -->
+  <rect x="20" y="436" width="860" height="70" rx="8" fill="url(#hwGrad)" stroke="#bc8cff" stroke-width="1.5"/>
+  <text x="40" y="464" font-weight="700" fill="#bc8cff" font-size="16">硬件 (Hardware)</text>
+  <g font-size="12" fill="#e6edf3">
+    <rect x="50"  y="478" width="90"  height="22" rx="4" fill="#1a2028" stroke="#30363d"/>
+    <text x="95"  y="494" text-anchor="middle">CPU (x86/ARM)</text>
+    <rect x="150" y="478" width="80"  height="22" rx="4" fill="#1a2028" stroke="#30363d"/>
+    <text x="190" y="494" text-anchor="middle">DRAM</text>
+    <rect x="240" y="478" width="100" height="22" rx="4" fill="#1a2028" stroke="#30363d"/>
+    <text x="290" y="494" text-anchor="middle">NVMe / SATA</text>
+    <rect x="350" y="478" width="100" height="22" rx="4" fill="#1a2028" stroke="#30363d"/>
+    <text x="400" y="494" text-anchor="middle">NIC (网卡)</text>
+    <rect x="460" y="478" width="80"  height="22" rx="4" fill="#1a2028" stroke="#30363d"/>
+    <text x="500" y="494" text-anchor="middle">GPU</text>
+    <rect x="550" y="478" width="120" height="22" rx="4" fill="#1a2028" stroke="#30363d"/>
+    <text x="610" y="494" text-anchor="middle">USB / I²C 设备</text>
+    <rect x="680" y="478" width="170" height="22" rx="4" fill="#1a2028" stroke="#30363d"/>
+    <text x="765" y="494" text-anchor="middle">中断控制器 (APIC/GIC)</text>
+  </g>
+</svg>
diff --git a/assets/diagrams/cfs-rbtree.svg b/assets/diagrams/cfs-rbtree.svg
new file mode 100644
index 0000000..04ff55c
--- /dev/null
+++ b/assets/diagrams/cfs-rbtree.svg
@@ -0,0 +1,102 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 900 480" font-family="-apple-system, sans-serif" font-size="13">
+  <defs>
+    <marker id="ar" viewBox="0 0 10 10" refX="9" refY="5" markerWidth="6" markerHeight="6" orient="auto">
+      <path d="M0,0 L10,5 L0,10 z" fill="#8b949e"/>
+    </marker>
+  </defs>
+
+  <text x="450" y="28" text-anchor="middle" font-size="18" font-weight="700" fill="#ff7b29">CFS 完全公平调度器 (Completely Fair Scheduler)</text>
+  <text x="450" y="46" text-anchor="middle" font-size="12" fill="#8b949e">核心思想：所有进程"公平"获得 vruntime 增长 — 总是调度 vruntime 最小者</text>
+
+  <!-- 红黑树主体 -->
+  <g transform="translate(80, 80)">
+    <text x="370" y="20" text-anchor="middle" font-weight="700" fill="#58a6ff" font-size="15">每个 CPU 的运行队列 (cfs_rq) — 红黑树按 vruntime 排序</text>
+
+    <!-- Root node -->
+    <g transform="translate(340, 50)">
+      <circle r="28" fill="#1a2028" stroke="#ff7b29" stroke-width="2"/>
+      <text y="-2" text-anchor="middle" font-size="11" fill="#e6edf3" font-weight="700">P3</text>
+      <text y="14" text-anchor="middle" font-size="10" fill="#8b949e">vr=120</text>
+    </g>
+
+    <!-- Level 1 left/right -->
+    <line x1="362" y1="55" x2="220" y2="125" stroke="#8b949e" stroke-width="1.5"/>
+    <line x1="378" y1="55" x2="520" y2="125" stroke="#8b949e" stroke-width="1.5"/>
+    <g transform="translate(200, 130)">
+      <circle r="28" fill="#1a2028" stroke="#ff7b29" stroke-width="2"/>
+      <text y="-2" text-anchor="middle" font-size="11" fill="#e6edf3" font-weight="700">P1</text>
+      <text y="14" text-anchor="middle" font-size="10" fill="#8b949e">vr=85</text>
+    </g>
+    <g transform="translate(500, 130)">
+      <circle r="28" fill="#1a2028" stroke="#ff7b29" stroke-width="2"/>
+      <text y="-2" text-anchor="middle" font-size="11" fill="#e6edf3" font-weight="700">P5</text>
+      <text y="14" text-anchor="middle" font-size="10" fill="#8b949e">vr=150</text>
+    </g>
+
+    <!-- Level 2 -->
+    <line x1="180" y1="138" x2="100" y2="208" stroke="#8b949e" stroke-width="1.5"/>
+    <line x1="218" y1="138" x2="298" y2="208" stroke="#8b949e" stroke-width="1.5"/>
+    <line x1="482" y1="138" x2="402" y2="208" stroke="#8b949e" stroke-width="1.5"/>
+    <line x1="518" y1="138" x2="600" y2="208" stroke="#8b949e" stroke-width="1.5"/>
+
+    <g transform="translate(80, 215)">
+      <circle r="26" fill="#0f1419" stroke="#56d364" stroke-width="3"/>
+      <text y="-2" text-anchor="middle" font-size="11" fill="#56d364" font-weight="700">P0</text>
+      <text y="14" text-anchor="middle" font-size="10" fill="#56d364">vr=42</text>
+    </g>
+    <g transform="translate(280, 215)">
+      <circle r="26" fill="#1a2028" stroke="#ff7b29" stroke-width="2"/>
+      <text y="-2" text-anchor="middle" font-size="11" fill="#e6edf3" font-weight="700">P2</text>
+      <text y="14" text-anchor="middle" font-size="10" fill="#8b949e">vr=100</text>
+    </g>
+    <g transform="translate(380, 215)">
+      <circle r="26" fill="#1a2028" stroke="#ff7b29" stroke-width="2"/>
+      <text y="-2" text-anchor="middle" font-size="11" fill="#e6edf3" font-weight="700">P4</text>
+      <text y="14" text-anchor="middle" font-size="10" fill="#8b949e">vr=135</text>
+    </g>
+    <g transform="translate(620, 215)">
+      <circle r="26" fill="#1a2028" stroke="#ff7b29" stroke-width="2"/>
+      <text y="-2" text-anchor="middle" font-size="11" fill="#e6edf3" font-weight="700">P6</text>
+      <text y="14" text-anchor="middle" font-size="10" fill="#8b949e">vr=180</text>
+    </g>
+
+    <!-- leftmost arrow -->
+    <text x="80" y="280" text-anchor="middle" fill="#56d364" font-weight="700" font-size="13">↑</text>
+    <text x="80" y="298" text-anchor="middle" fill="#56d364" font-size="11">最左节点</text>
+    <text x="80" y="312" text-anchor="middle" fill="#56d364" font-size="11">下一个被调度</text>
+  </g>
+
+  <!-- vruntime formula box -->
+  <g transform="translate(80, 360)">
+    <rect x="0" y="0" width="350" height="100" rx="8" fill="#1a2028" stroke="#58a6ff" stroke-width="1.5"/>
+    <text x="175" y="22" text-anchor="middle" font-weight="700" fill="#58a6ff">vruntime 计算公式</text>
+    <text x="20" y="46" font-family="monospace" fill="#e6edf3" font-size="12">
+      delta_vruntime = delta_exec ×
+    </text>
+    <text x="20" y="68" font-family="monospace" fill="#ff7b29" font-size="12">
+      NICE_0_LOAD / se.load.weight
+    </text>
+    <text x="20" y="88" font-size="11" fill="#8b949e">
+      · 高优先级 weight 大 → vruntime 增长慢 → 多调度
+    </text>
+    <text x="20" y="100" font-size="11" fill="#8b949e">
+      · 低优先级 weight 小 → vruntime 增长快 → 少调度
+    </text>
+  </g>
+
+  <!-- key ops -->
+  <g transform="translate(460, 360)">
+    <rect x="0" y="0" width="350" height="100" rx="8" fill="#1a2028" stroke="#56d364" stroke-width="1.5"/>
+    <text x="175" y="22" text-anchor="middle" font-weight="700" fill="#56d364">关键操作</text>
+    <text x="20" y="44" font-size="11" fill="#e6edf3">
+      <tspan font-weight="700" fill="#56d364">pick_next_task_fair():</tspan> O(log n)
+    </text>
+    <text x="20" y="58" font-size="11" fill="#8b949e">取最左节点 (rb_leftmost 缓存 → O(1))</text>
+    <text x="20" y="76" font-size="11" fill="#e6edf3">
+      <tspan font-weight="700" fill="#56d364">enqueue_task_fair():</tspan> O(log n) 插入
+    </text>
+    <text x="20" y="92" font-size="11" fill="#e6edf3">
+      <tspan font-weight="700" fill="#56d364">dequeue_task_fair():</tspan> O(log n) 删除
+    </text>
+  </g>
+</svg>
diff --git a/assets/diagrams/container-anatomy.svg b/assets/diagrams/container-anatomy.svg
new file mode 100644
index 0000000..96e5748
--- /dev/null
+++ b/assets/diagrams/container-anatomy.svg
@@ -0,0 +1,107 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 900 540" font-family="-apple-system, sans-serif" font-size="12">
+  <text x="450" y="28" text-anchor="middle" font-size="18" font-weight="700" fill="#ff7b29">Linux 容器 = 命名空间 (Namespaces) + 控制组 (cgroups) + 联合文件系统</text>
+
+  <!-- Container box -->
+  <g transform="translate(40, 60)">
+    <rect width="820" height="220" rx="8" fill="#1a2028" stroke="#ff7b29" stroke-width="2"/>
+    <text x="20" y="28" font-weight="700" fill="#ff7b29" font-size="15">容器进程 (普通的 Linux 进程，但被以下机制"圈起来")</text>
+
+    <!-- Namespaces -->
+    <g transform="translate(20, 50)">
+      <text font-weight="700" fill="#58a6ff" font-size="13">① 命名空间 (Namespaces) — 让进程"看不见"系统其他部分</text>
+
+      <g font-size="11">
+        <rect x="0" y="14" width="160" height="60" rx="4" fill="#0f1419" stroke="#58a6ff"/>
+        <text x="80" y="32" text-anchor="middle" fill="#58a6ff" font-weight="700">PID ns</text>
+        <text x="80" y="48" text-anchor="middle" fill="#8b949e">独立 PID 编号</text>
+        <text x="80" y="64" text-anchor="middle" fill="#8b949e">容器内 init=1</text>
+
+        <rect x="170" y="14" width="160" height="60" rx="4" fill="#0f1419" stroke="#58a6ff"/>
+        <text x="250" y="32" text-anchor="middle" fill="#58a6ff" font-weight="700">Mount ns</text>
+        <text x="250" y="48" text-anchor="middle" fill="#8b949e">独立挂载点</text>
+        <text x="250" y="64" text-anchor="middle" fill="#8b949e">独立根目录视图</text>
+
+        <rect x="340" y="14" width="160" height="60" rx="4" fill="#0f1419" stroke="#58a6ff"/>
+        <text x="420" y="32" text-anchor="middle" fill="#58a6ff" font-weight="700">Network ns</text>
+        <text x="420" y="48" text-anchor="middle" fill="#8b949e">独立网卡/路由</text>
+        <text x="420" y="64" text-anchor="middle" fill="#8b949e">独立防火墙</text>
+
+        <rect x="510" y="14" width="160" height="60" rx="4" fill="#0f1419" stroke="#58a6ff"/>
+        <text x="590" y="32" text-anchor="middle" fill="#58a6ff" font-weight="700">UTS ns</text>
+        <text x="590" y="48" text-anchor="middle" fill="#8b949e">独立 hostname</text>
+        <text x="590" y="64" text-anchor="middle" fill="#8b949e">domain name</text>
+
+        <rect x="680" y="14" width="100" height="60" rx="4" fill="#0f1419" stroke="#58a6ff"/>
+        <text x="730" y="32" text-anchor="middle" fill="#58a6ff" font-weight="700">IPC ns</text>
+        <text x="730" y="48" text-anchor="middle" fill="#8b949e">独立 SysV</text>
+        <text x="730" y="64" text-anchor="middle" fill="#8b949e">/POSIX IPC</text>
+
+        <rect x="0" y="86" width="160" height="60" rx="4" fill="#0f1419" stroke="#58a6ff"/>
+        <text x="80" y="104" text-anchor="middle" fill="#58a6ff" font-weight="700">User ns</text>
+        <text x="80" y="120" text-anchor="middle" fill="#8b949e">UID/GID 映射</text>
+        <text x="80" y="136" text-anchor="middle" fill="#8b949e">非 root 也能容器化</text>
+
+        <rect x="170" y="86" width="160" height="60" rx="4" fill="#0f1419" stroke="#58a6ff"/>
+        <text x="250" y="104" text-anchor="middle" fill="#58a6ff" font-weight="700">Cgroup ns</text>
+        <text x="250" y="120" text-anchor="middle" fill="#8b949e">独立 cgroup 视图</text>
+        <text x="250" y="136" text-anchor="middle" fill="#8b949e">隐藏宿主路径</text>
+
+        <rect x="340" y="86" width="160" height="60" rx="4" fill="#0f1419" stroke="#58a6ff"/>
+        <text x="420" y="104" text-anchor="middle" fill="#58a6ff" font-weight="700">Time ns</text>
+        <text x="420" y="120" text-anchor="middle" fill="#8b949e">独立时钟偏移</text>
+        <text x="420" y="136" text-anchor="middle" fill="#8b949e">(5.6+)</text>
+      </g>
+    </g>
+  </g>
+
+  <!-- cgroups -->
+  <g transform="translate(40, 300)">
+    <rect width="400" height="220" rx="8" fill="#1a2028" stroke="#56d364" stroke-width="2"/>
+    <text x="20" y="28" font-weight="700" fill="#56d364" font-size="15">② cgroups — 资源限制</text>
+
+    <g font-size="11" transform="translate(20, 50)">
+      <rect x="0" y="0" width="170" height="44" rx="4" fill="#0f1419" stroke="#56d364"/>
+      <text x="85" y="18" text-anchor="middle" fill="#56d364" font-weight="700">cpu</text>
+      <text x="85" y="34" text-anchor="middle" fill="#8b949e">CPU 时间份额/上限</text>
+
+      <rect x="190" y="0" width="170" height="44" rx="4" fill="#0f1419" stroke="#56d364"/>
+      <text x="275" y="18" text-anchor="middle" fill="#56d364" font-weight="700">memory</text>
+      <text x="275" y="34" text-anchor="middle" fill="#8b949e">内存上限/OOM</text>
+
+      <rect x="0" y="54" width="170" height="44" rx="4" fill="#0f1419" stroke="#56d364"/>
+      <text x="85" y="72" text-anchor="middle" fill="#56d364" font-weight="700">io / blkio</text>
+      <text x="85" y="88" text-anchor="middle" fill="#8b949e">磁盘 IOPS / 带宽</text>
+
+      <rect x="190" y="54" width="170" height="44" rx="4" fill="#0f1419" stroke="#56d364"/>
+      <text x="275" y="72" text-anchor="middle" fill="#56d364" font-weight="700">pids</text>
+      <text x="275" y="88" text-anchor="middle" fill="#8b949e">最大进程数</text>
+
+      <rect x="0" y="108" width="360" height="44" rx="4" fill="#0f1419" stroke="#56d364"/>
+      <text x="180" y="126" text-anchor="middle" fill="#56d364" font-weight="700">devices, freezer, net_cls, net_prio, hugetlb, rdma...</text>
+      <text x="180" y="142" text-anchor="middle" fill="#8b949e">cgroups v2 统一层级 (Linux 4.5+)</text>
+    </g>
+  </g>
+
+  <!-- Union FS -->
+  <g transform="translate(460, 300)">
+    <rect width="400" height="220" rx="8" fill="#1a2028" stroke="#bc8cff" stroke-width="2"/>
+    <text x="20" y="28" font-weight="700" fill="#bc8cff" font-size="15">③ 联合文件系统 — 镜像分层</text>
+
+    <g transform="translate(20, 50)" font-size="11">
+      <!-- Stacked layers -->
+      <rect x="0" y="100" width="360" height="22" rx="2" fill="#0f1419" stroke="#bc8cff"/>
+      <text x="180" y="116" text-anchor="middle" fill="#e6edf3">基础镜像层 (alpine:latest) — 只读</text>
+
+      <rect x="20" y="76" width="320" height="22" rx="2" fill="#0f1419" stroke="#bc8cff"/>
+      <text x="180" y="92" text-anchor="middle" fill="#e6edf3">中间层 (apt install ...) — 只读</text>
+
+      <rect x="40" y="52" width="280" height="22" rx="2" fill="#0f1419" stroke="#bc8cff"/>
+      <text x="180" y="68" text-anchor="middle" fill="#e6edf3">应用层 (COPY app.py) — 只读</text>
+
+      <rect x="60" y="28" width="240" height="22" rx="2" fill="#1a2028" stroke="#ff7b29" stroke-width="2"/>
+      <text x="180" y="44" text-anchor="middle" fill="#ff7b29" font-weight="700">容器读写层 (CoW)</text>
+
+      <text x="180" y="156" text-anchor="middle" fill="#8b949e">OverlayFS = lowerdir + upperdir + merged</text>
+    </g>
+  </g>
+</svg>
diff --git a/assets/diagrams/page-table.svg b/assets/diagrams/page-table.svg
new file mode 100644
index 0000000..b5aade9
--- /dev/null
+++ b/assets/diagrams/page-table.svg
@@ -0,0 +1,109 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 900 560" font-family="-apple-system, sans-serif" font-size="13">
+  <defs>
+    <marker id="arr2" viewBox="0 0 10 10" refX="9" refY="5" markerWidth="6" markerHeight="6" orient="auto">
+      <path d="M0,0 L10,5 L0,10 z" fill="#ff7b29"/>
+    </marker>
+    <marker id="arr3" viewBox="0 0 10 10" refX="9" refY="5" markerWidth="6" markerHeight="6" orient="auto">
+      <path d="M0,0 L10,5 L0,10 z" fill="#58a6ff"/>
+    </marker>
+  </defs>
+
+  <text x="450" y="28" text-anchor="middle" font-size="18" font-weight="700" fill="#ff7b29">x86 32位虚拟地址 → 物理地址</text>
+
+  <!-- Virtual Address breakdown -->
+  <g transform="translate(50, 60)">
+    <rect x="0" y="0" width="600" height="38" fill="#1a2028" stroke="#58a6ff" stroke-width="1.5" rx="4"/>
+    <line x1="200" y1="0" x2="200" y2="38" stroke="#58a6ff" stroke-dasharray="3,2"/>
+    <line x1="400" y1="0" x2="400" y2="38" stroke="#58a6ff" stroke-dasharray="3,2"/>
+    <text x="100" y="23" text-anchor="middle" fill="#58a6ff" font-weight="700">PGD 索引</text>
+    <text x="100" y="55" text-anchor="middle" fill="#8b949e" font-size="11">bits 31..22 (10位)</text>
+    <text x="300" y="23" text-anchor="middle" fill="#58a6ff" font-weight="700">PTE 索引</text>
+    <text x="300" y="55" text-anchor="middle" fill="#8b949e" font-size="11">bits 21..12 (10位)</text>
+    <text x="500" y="23" text-anchor="middle" fill="#58a6ff" font-weight="700">页内偏移</text>
+    <text x="500" y="55" text-anchor="middle" fill="#8b949e" font-size="11">bits 11..0 (12位)</text>
+  </g>
+  <text x="50" y="105" font-size="11" fill="#8b949e">32位虚拟地址</text>
+
+  <!-- CR3 -->
+  <g transform="translate(50, 150)">
+    <rect x="0" y="0" width="100" height="40" rx="4" fill="#ff7b29" opacity="0.2" stroke="#ff7b29" stroke-width="2"/>
+    <text x="50" y="25" text-anchor="middle" font-weight="700" fill="#ff7b29">CR3</text>
+  </g>
+
+  <!-- PGD (Page Directory) -->
+  <g transform="translate(190, 130)">
+    <rect x="0" y="0" width="160" height="240" rx="4" fill="#1a2028" stroke="#ff7b29" stroke-width="1.5"/>
+    <text x="80" y="22" text-anchor="middle" font-weight="700" fill="#ff7b29">页目录 (PGD)</text>
+    <text x="80" y="38" text-anchor="middle" font-size="11" fill="#8b949e">4KB · 1024 项 × 4B</text>
+    <line x1="0" y1="48" x2="160" y2="48" stroke="#30363d"/>
+    <text x="80" y="68" text-anchor="middle" font-size="11" fill="#8b949e">[0] →</text>
+    <text x="80" y="86" text-anchor="middle" font-size="11" fill="#8b949e">[1] →</text>
+    <text x="80" y="104" text-anchor="middle" font-size="11" fill="#8b949e">...</text>
+    <rect x="6" y="118" width="148" height="20" rx="2" fill="#ff7b29" opacity="0.3"/>
+    <text x="80" y="133" text-anchor="middle" font-size="11" fill="#e6edf3">[PGD索引] = 物理地址+权限</text>
+    <text x="80" y="156" text-anchor="middle" font-size="11" fill="#8b949e">...</text>
+    <text x="80" y="180" text-anchor="middle" font-size="11" fill="#8b949e">[1023]</text>
+  </g>
+
+  <!-- arrow PGD -> PTE -->
+  <line x1="100" y1="170" x2="190" y2="170" stroke="#ff7b29" stroke-width="2" marker-end="url(#arr2)"/>
+
+  <!-- PTE (Page Table) -->
+  <g transform="translate(400, 130)">
+    <rect x="0" y="0" width="160" height="240" rx="4" fill="#1a2028" stroke="#58a6ff" stroke-width="1.5"/>
+    <text x="80" y="22" text-anchor="middle" font-weight="700" fill="#58a6ff">页表 (PTE)</text>
+    <text x="80" y="38" text-anchor="middle" font-size="11" fill="#8b949e">4KB · 1024 项 × 4B</text>
+    <line x1="0" y1="48" x2="160" y2="48" stroke="#30363d"/>
+    <text x="80" y="68" text-anchor="middle" font-size="11" fill="#8b949e">[0] →</text>
+    <text x="80" y="86" text-anchor="middle" font-size="11" fill="#8b949e">...</text>
+    <rect x="6" y="100" width="148" height="20" rx="2" fill="#58a6ff" opacity="0.3"/>
+    <text x="80" y="115" text-anchor="middle" font-size="11" fill="#e6edf3">[PTE索引] = 物理页帧+标志</text>
+    <text x="80" y="140" text-anchor="middle" font-size="11" fill="#8b949e">...</text>
+    <text x="80" y="160" text-anchor="middle" font-size="11" fill="#8b949e">[1023]</text>
+    <line x1="0" y1="175" x2="160" y2="175" stroke="#30363d"/>
+    <text x="80" y="194" text-anchor="middle" font-size="10" fill="#8b949e">PTE 标志位:</text>
+    <text x="10" y="210" font-size="10" fill="#56d364">P</text><text x="22" y="210" font-size="10" fill="#8b949e">存在</text>
+    <text x="50" y="210" font-size="10" fill="#56d364">R/W</text><text x="72" y="210" font-size="10" fill="#8b949e">读写</text>
+    <text x="100" y="210" font-size="10" fill="#56d364">U/S</text><text x="122" y="210" font-size="10" fill="#8b949e">用户态</text>
+    <text x="10" y="226" font-size="10" fill="#e3b341">A</text><text x="22" y="226" font-size="10" fill="#8b949e">已访问</text>
+    <text x="50" y="226" font-size="10" fill="#e3b341">D</text><text x="62" y="226" font-size="10" fill="#8b949e">已脏</text>
+    <text x="90" y="226" font-size="10" fill="#f85149">NX</text><text x="106" y="226" font-size="10" fill="#8b949e">不可执行</text>
+  </g>
+
+  <!-- arrow PTE -> page -->
+  <line x1="310" y1="170" x2="400" y2="170" stroke="#ff7b29" stroke-width="2" marker-end="url(#arr2)"/>
+
+  <!-- Physical Page -->
+  <g transform="translate(620, 130)">
+    <rect x="0" y="0" width="160" height="120" rx="4" fill="#1a2028" stroke="#bc8cff" stroke-width="1.5"/>
+    <text x="80" y="22" text-anchor="middle" font-weight="700" fill="#bc8cff">物理页 (4KB)</text>
+    <line x1="0" y1="34" x2="160" y2="34" stroke="#30363d"/>
+    <text x="80" y="52" text-anchor="middle" font-size="11" fill="#8b949e">+ 页内偏移 (12位)</text>
+    <rect x="20" y="64" width="120" height="36" rx="3" fill="#bc8cff" opacity="0.3"/>
+    <text x="80" y="86" text-anchor="middle" font-size="11" fill="#e6edf3">最终物理地址</text>
+  </g>
+  <line x1="510" y1="225" x2="620" y2="225" stroke="#ff7b29" stroke-width="2" marker-end="url(#arr2)"/>
+
+  <!-- TLB sidebar -->
+  <g transform="translate(50, 320)">
+    <rect x="0" y="0" width="800" height="180" rx="8" fill="#1a2028" stroke="#56d364" stroke-width="1.5"/>
+    <text x="20" y="24" font-weight="700" fill="#56d364" font-size="15">TLB (Translation Lookaside Buffer) — 页表查询缓存</text>
+    <text x="20" y="46" fill="#8b949e">CPU 每次访存都需要走 2 次内存查页表 → 极慢。TLB 缓存近期翻译结果：</text>
+
+    <rect x="20" y="60" width="350" height="100" rx="4" fill="#0f1419" stroke="#30363d"/>
+    <text x="195" y="80" text-anchor="middle" font-weight="700" fill="#e6edf3">TLB 命中 (Hit)</text>
+    <text x="195" y="100" text-anchor="middle" font-size="12" fill="#56d364">~1 cycle</text>
+    <text x="40" y="120" font-size="11" fill="#8b949e">查 TLB → 命中 → 直接得到物理页帧</text>
+    <text x="40" y="138" font-size="11" fill="#8b949e">→ 拼接偏移 → 访问物理内存</text>
+
+    <rect x="430" y="60" width="350" height="100" rx="4" fill="#0f1419" stroke="#30363d"/>
+    <text x="605" y="80" text-anchor="middle" font-weight="700" fill="#e6edf3">TLB 缺失 (Miss)</text>
+    <text x="605" y="100" text-anchor="middle" font-size="12" fill="#f85149">~100+ cycles</text>
+    <text x="450" y="120" font-size="11" fill="#8b949e">查 TLB → 缺失 → 走页表 (2~5 次内存访问)</text>
+    <text x="450" y="138" font-size="11" fill="#8b949e">→ 填入 TLB → 访问物理内存</text>
+  </g>
+
+  <text x="450" y="530" text-anchor="middle" font-size="11" fill="#8b949e" font-style="italic">
+    每个进程切换时 CR3 改变 → 部分 TLB 失效 (PCID 可优化, Linux 4.14+)
+  </text>
+</svg>
diff --git a/assets/diagrams/syscall-flow.svg b/assets/diagrams/syscall-flow.svg
new file mode 100644
index 0000000..6cd8851
--- /dev/null
+++ b/assets/diagrams/syscall-flow.svg
@@ -0,0 +1,62 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 900 560" font-family="-apple-system, sans-serif" font-size="12">
+  <defs>
+    <marker id="ar" viewBox="0 0 10 10" refX="9" refY="5" markerWidth="6" markerHeight="6" orient="auto">
+      <path d="M0,0 L10,5 L0,10 z" fill="#ff7b29"/>
+    </marker>
+  </defs>
+  <text x="450" y="28" text-anchor="middle" font-size="18" font-weight="700" fill="#ff7b29">系统调用完整路径 (read(fd, buf, n) 为例)</text>
+
+  <!-- Stages -->
+  <g transform="translate(40, 60)">
+    <!-- Step 1 -->
+    <rect x="0" y="0" width="820" height="50" rx="6" fill="#1a2028" stroke="#58a6ff" stroke-width="1.5"/>
+    <text x="10" y="20" font-weight="700" fill="#58a6ff">① 用户程序 (用户态)</text>
+    <text x="20" y="40" font-family="monospace" font-size="11" fill="#e6edf3">
+      ret = read(3, buf, 1024);  // glibc 包装函数
+    </text>
+
+    <line x1="410" y1="50" x2="410" y2="70" stroke="#ff7b29" stroke-width="2" marker-end="url(#ar)"/>
+
+    <rect x="0" y="70" width="820" height="60" rx="6" fill="#1a2028" stroke="#58a6ff" stroke-width="1.5"/>
+    <text x="10" y="90" font-weight="700" fill="#58a6ff">② glibc 包装 (用户态)</text>
+    <text x="20" y="110" font-family="monospace" font-size="11" fill="#e6edf3">
+      mov $0, %rax     # __NR_read = 0
+    </text>
+    <text x="20" y="125" font-family="monospace" font-size="11" fill="#e6edf3">
+      syscall          # 触发系统调用 (x86_64) ; 或 int 0x80 (i386)
+    </text>
+
+    <line x1="410" y1="130" x2="410" y2="150" stroke="#ff7b29" stroke-width="2" marker-end="url(#ar)"/>
+
+    <rect x="0" y="150" width="820" height="80" rx="6" fill="#1a2028" stroke="#ff7b29" stroke-width="1.5"/>
+    <text x="10" y="170" font-weight="700" fill="#ff7b29">③ CPU 硬件切换 (Ring 3 → Ring 0)</text>
+    <text x="20" y="188" font-size="11" fill="#8b949e">· 从 MSR_LSTAR 读取入口地址 → 跳转到 entry_SYSCALL_64</text>
+    <text x="20" y="204" font-size="11" fill="#8b949e">· 切换到内核栈 (TSS.RSP0)</text>
+    <text x="20" y="220" font-size="11" fill="#8b949e">· 保存用户态 rip/rflags/rsp 等到内核栈 (构造 pt_regs)</text>
+
+    <line x1="410" y1="230" x2="410" y2="250" stroke="#ff7b29" stroke-width="2" marker-end="url(#ar)"/>
+
+    <rect x="0" y="250" width="820" height="80" rx="6" fill="#1a2028" stroke="#ff7b29" stroke-width="1.5"/>
+    <text x="10" y="270" font-weight="700" fill="#ff7b29">④ 系统调用分发 (内核态)</text>
+    <text x="20" y="288" font-family="monospace" font-size="11" fill="#e6edf3">entry_SYSCALL_64 → do_syscall_64()</text>
+    <text x="20" y="304" font-family="monospace" font-size="11" fill="#e6edf3">→ sys_call_table[__NR_read](regs)</text>
+    <text x="20" y="320" font-family="monospace" font-size="11" fill="#56d364">→ ksys_read(fd, buf, count)</text>
+
+    <line x1="410" y1="330" x2="410" y2="350" stroke="#ff7b29" stroke-width="2" marker-end="url(#ar)"/>
+
+    <rect x="0" y="350" width="820" height="105" rx="6" fill="#1a2028" stroke="#bc8cff" stroke-width="1.5"/>
+    <text x="10" y="370" font-weight="700" fill="#bc8cff">⑤ VFS 通用路径 → 具体文件系统</text>
+    <text x="20" y="388" font-family="monospace" font-size="11" fill="#e6edf3">ksys_read → vfs_read → file-&gt;f_op-&gt;read_iter</text>
+    <text x="20" y="404" font-family="monospace" font-size="11" fill="#e6edf3">→ ext4_file_read_iter → generic_file_read_iter</text>
+    <text x="20" y="420" font-family="monospace" font-size="11" fill="#56d364">→ 页缓存命中? 是 → copy_to_user(buf, page, n) ✓</text>
+    <text x="20" y="436" font-family="monospace" font-size="11" fill="#f85149">→ 页缓存未命中? readpage → bio → 块设备 → 等待 IO</text>
+    <text x="20" y="450" font-family="monospace" font-size="10" fill="#8b949e">    (期间可能调度走，等 IO 完成被唤醒)</text>
+
+    <line x1="410" y1="455" x2="410" y2="475" stroke="#ff7b29" stroke-width="2" marker-end="url(#ar)"/>
+
+    <rect x="0" y="475" width="820" height="60" rx="6" fill="#1a2028" stroke="#56d364" stroke-width="1.5"/>
+    <text x="10" y="495" font-weight="700" fill="#56d364">⑥ 返回用户态</text>
+    <text x="20" y="513" font-size="11" fill="#8b949e">· 检查 TIF_NEED_RESCHED / TIF_SIGPENDING → 必要时 schedule / 处理信号</text>
+    <text x="20" y="528" font-size="11" fill="#8b949e">· sysret/sysretq → 恢复 rip → 用户态继续执行</text>
+  </g>
+</svg>
diff --git a/assets/diagrams/tcp-handshake.svg b/assets/diagrams/tcp-handshake.svg
new file mode 100644
index 0000000..d85d3ad
--- /dev/null
+++ b/assets/diagrams/tcp-handshake.svg
@@ -0,0 +1,102 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 900 560" font-family="-apple-system, sans-serif" font-size="12">
+  <defs>
+    <marker id="ar" viewBox="0 0 10 10" refX="9" refY="5" markerWidth="6" markerHeight="6" orient="auto">
+      <path d="M0,0 L10,5 L0,10 z" fill="#8b949e"/>
+    </marker>
+    <marker id="arA" viewBox="0 0 10 10" refX="9" refY="5" markerWidth="6" markerHeight="6" orient="auto">
+      <path d="M0,0 L10,5 L0,10 z" fill="#58a6ff"/>
+    </marker>
+    <marker id="arB" viewBox="0 0 10 10" refX="9" refY="5" markerWidth="6" markerHeight="6" orient="auto">
+      <path d="M0,0 L10,5 L0,10 z" fill="#ff7b29"/>
+    </marker>
+  </defs>
+
+  <text x="450" y="28" text-anchor="middle" font-size="18" font-weight="700" fill="#ff7b29">TCP 三次握手 — 内核状态机视角</text>
+
+  <!-- Two columns -->
+  <text x="200" y="68" text-anchor="middle" font-weight="700" fill="#58a6ff" font-size="15">客户端 (Client)</text>
+  <text x="700" y="68" text-anchor="middle" font-weight="700" fill="#ff7b29" font-size="15">服务端 (Server)</text>
+
+  <!-- vertical lines -->
+  <line x1="200" y1="80" x2="200" y2="510" stroke="#30363d" stroke-dasharray="3,3"/>
+  <line x1="700" y1="80" x2="700" y2="510" stroke="#30363d" stroke-dasharray="3,3"/>
+
+  <!-- States client -->
+  <g>
+    <rect x="100" y="90" width="200" height="36" rx="4" fill="#1a2028" stroke="#58a6ff"/>
+    <text x="200" y="113" text-anchor="middle" fill="#e6edf3">CLOSED</text>
+  </g>
+  <text x="200" y="146" text-anchor="middle" font-size="11" fill="#8b949e">connect()</text>
+  <text x="200" y="160" text-anchor="middle" font-size="10" fill="#8b949e">tcp_v4_connect()</text>
+
+  <g>
+    <rect x="100" y="172" width="200" height="36" rx="4" fill="#1a2028" stroke="#e3b341"/>
+    <text x="200" y="195" text-anchor="middle" fill="#e3b341">SYN_SENT</text>
+  </g>
+
+  <g>
+    <rect x="100" y="320" width="200" height="36" rx="4" fill="#1a2028" stroke="#56d364" stroke-width="2"/>
+    <text x="200" y="343" text-anchor="middle" fill="#56d364" font-weight="700">ESTABLISHED</text>
+  </g>
+
+  <!-- States server -->
+  <g>
+    <rect x="600" y="90" width="200" height="36" rx="4" fill="#1a2028" stroke="#58a6ff"/>
+    <text x="700" y="113" text-anchor="middle" fill="#e6edf3">CLOSED</text>
+  </g>
+  <text x="700" y="146" text-anchor="middle" font-size="11" fill="#8b949e">listen()</text>
+  <text x="700" y="160" text-anchor="middle" font-size="10" fill="#8b949e">inet_listen()</text>
+
+  <g>
+    <rect x="600" y="172" width="200" height="36" rx="4" fill="#1a2028" stroke="#bc8cff"/>
+    <text x="700" y="195" text-anchor="middle" fill="#bc8cff">LISTEN</text>
+  </g>
+
+  <g>
+    <rect x="600" y="252" width="200" height="36" rx="4" fill="#1a2028" stroke="#e3b341"/>
+    <text x="700" y="275" text-anchor="middle" fill="#e3b341">SYN_RECV</text>
+  </g>
+
+  <g>
+    <rect x="600" y="380" width="200" height="36" rx="4" fill="#1a2028" stroke="#56d364" stroke-width="2"/>
+    <text x="700" y="403" text-anchor="middle" fill="#56d364" font-weight="700">ESTABLISHED</text>
+  </g>
+
+  <!-- packets -->
+  <g stroke="#58a6ff" stroke-width="2" fill="none">
+    <line x1="305" y1="225" x2="595" y2="265" marker-end="url(#arA)"/>
+  </g>
+  <rect x="350" y="218" width="200" height="22" rx="3" fill="#0f1419" stroke="#58a6ff"/>
+  <text x="450" y="234" text-anchor="middle" fill="#58a6ff" font-weight="700">① SYN  seq=x</text>
+
+  <g stroke="#ff7b29" stroke-width="2" fill="none">
+    <line x1="595" y1="285" x2="305" y2="325" marker-end="url(#arB)"/>
+  </g>
+  <rect x="320" y="278" width="260" height="22" rx="3" fill="#0f1419" stroke="#ff7b29"/>
+  <text x="450" y="294" text-anchor="middle" fill="#ff7b29" font-weight="700">② SYN+ACK  seq=y, ack=x+1</text>
+
+  <g stroke="#58a6ff" stroke-width="2" fill="none">
+    <line x1="305" y1="365" x2="595" y2="395" marker-end="url(#arA)"/>
+  </g>
+  <rect x="320" y="358" width="260" height="22" rx="3" fill="#0f1419" stroke="#58a6ff"/>
+  <text x="450" y="374" text-anchor="middle" fill="#58a6ff" font-weight="700">③ ACK  seq=x+1, ack=y+1</text>
+
+  <!-- Queue boxes -->
+  <g transform="translate(30, 440)">
+    <rect x="0" y="0" width="380" height="100" rx="6" fill="#1a2028" stroke="#bc8cff" stroke-width="1.5"/>
+    <text x="190" y="22" text-anchor="middle" fill="#bc8cff" font-weight="700" font-size="13">客户端关键代码路径</text>
+    <text x="10" y="42" font-size="11" fill="#8b949e">tcp_v4_connect() 构造 SYN 包</text>
+    <text x="10" y="58" font-size="11" fill="#8b949e">sk->state = TCP_SYN_SENT</text>
+    <text x="10" y="74" font-size="11" fill="#8b949e">tcp_rcv_synsent_state_process() 处理 SYN+ACK</text>
+    <text x="10" y="90" font-size="11" fill="#56d364">sk->state = TCP_ESTABLISHED → 唤醒 connect()</text>
+  </g>
+
+  <g transform="translate(490, 440)">
+    <rect x="0" y="0" width="380" height="100" rx="6" fill="#1a2028" stroke="#bc8cff" stroke-width="1.5"/>
+    <text x="190" y="22" text-anchor="middle" fill="#bc8cff" font-weight="700" font-size="13">服务端关键代码路径</text>
+    <text x="10" y="42" font-size="11" fill="#8b949e">tcp_v4_rcv → tcp_v4_do_rcv → tcp_v4_conn_request</text>
+    <text x="10" y="58" font-size="11" fill="#8b949e">入半连接队列 (syn_table) / SYN cookies</text>
+    <text x="10" y="74" font-size="11" fill="#8b949e">收到 ACK → tcp_check_req → tcp_v4_syn_recv_sock</text>
+    <text x="10" y="90" font-size="11" fill="#56d364">入全连接队列 → 唤醒 accept()</text>
+  </g>
+</svg>
diff --git a/assets/diagrams/vfs-objects.svg b/assets/diagrams/vfs-objects.svg
new file mode 100644
index 0000000..12027b8
--- /dev/null
+++ b/assets/diagrams/vfs-objects.svg
@@ -0,0 +1,106 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 900 520" font-family="-apple-system, sans-serif" font-size="12">
+  <defs>
+    <marker id="ar" viewBox="0 0 10 10" refX="9" refY="5" markerWidth="6" markerHeight="6" orient="auto">
+      <path d="M0,0 L10,5 L0,10 z" fill="#8b949e"/>
+    </marker>
+  </defs>
+  <text x="450" y="28" text-anchor="middle" font-size="18" font-weight="700" fill="#ff7b29">VFS 四大对象关系 (Linux 2.6+)</text>
+
+  <!-- task_struct -->
+  <g transform="translate(40, 60)">
+    <rect width="180" height="110" rx="6" fill="#1a2028" stroke="#ff7b29" stroke-width="1.5"/>
+    <text x="90" y="22" text-anchor="middle" font-weight="700" fill="#ff7b29">task_struct</text>
+    <text x="90" y="38" text-anchor="middle" font-size="10" fill="#8b949e">进程描述符</text>
+    <line x1="0" y1="48" x2="180" y2="48" stroke="#30363d"/>
+    <text x="14" y="66" font-size="11" fill="#e6edf3">files_struct *files</text>
+    <text x="14" y="84" font-size="11" fill="#e6edf3">fs_struct *fs</text>
+    <text x="14" y="102" font-size="11" fill="#e6edf3">mm_struct *mm</text>
+  </g>
+
+  <!-- files_struct -->
+  <g transform="translate(290, 50)">
+    <rect width="180" height="130" rx="6" fill="#1a2028" stroke="#58a6ff" stroke-width="1.5"/>
+    <text x="90" y="22" text-anchor="middle" font-weight="700" fill="#58a6ff">files_struct</text>
+    <text x="90" y="38" text-anchor="middle" font-size="10" fill="#8b949e">每进程文件表</text>
+    <line x1="0" y1="48" x2="180" y2="48" stroke="#30363d"/>
+    <text x="14" y="66" font-size="11" fill="#e6edf3">fd_array[NR_OPEN]</text>
+    <text x="14" y="84" font-size="10" fill="#8b949e">[0] → stdin file</text>
+    <text x="14" y="100" font-size="10" fill="#8b949e">[1] → stdout file</text>
+    <text x="14" y="116" font-size="10" fill="#8b949e">[2] → stderr file</text>
+  </g>
+  <line x1="220" y1="115" x2="290" y2="115" stroke="#8b949e" stroke-width="1.5" marker-end="url(#ar)"/>
+
+  <!-- file -->
+  <g transform="translate(540, 50)">
+    <rect width="180" height="160" rx="6" fill="#1a2028" stroke="#56d364" stroke-width="1.5"/>
+    <text x="90" y="22" text-anchor="middle" font-weight="700" fill="#56d364">struct file</text>
+    <text x="90" y="38" text-anchor="middle" font-size="10" fill="#8b949e">打开的文件实例</text>
+    <line x1="0" y1="48" x2="180" y2="48" stroke="#30363d"/>
+    <text x="14" y="66" font-size="11" fill="#e6edf3">f_path.dentry</text>
+    <text x="14" y="84" font-size="11" fill="#e6edf3">f_path.mnt</text>
+    <text x="14" y="102" font-size="11" fill="#e6edf3">f_op (操作集)</text>
+    <text x="14" y="120" font-size="11" fill="#e6edf3">f_pos (读写位置)</text>
+    <text x="14" y="138" font-size="11" fill="#e6edf3">f_flags</text>
+    <text x="14" y="152" font-size="11" fill="#e6edf3">f_count (引用计数)</text>
+  </g>
+  <line x1="470" y1="130" x2="540" y2="130" stroke="#8b949e" stroke-width="1.5" marker-end="url(#ar)"/>
+
+  <!-- dentry -->
+  <g transform="translate(540, 240)">
+    <rect width="180" height="120" rx="6" fill="#1a2028" stroke="#bc8cff" stroke-width="1.5"/>
+    <text x="90" y="22" text-anchor="middle" font-weight="700" fill="#bc8cff">struct dentry</text>
+    <text x="90" y="38" text-anchor="middle" font-size="10" fill="#8b949e">目录项 (dcache)</text>
+    <line x1="0" y1="48" x2="180" y2="48" stroke="#30363d"/>
+    <text x="14" y="66" font-size="11" fill="#e6edf3">d_name (文件名)</text>
+    <text x="14" y="84" font-size="11" fill="#e6edf3">d_parent (父)</text>
+    <text x="14" y="102" font-size="11" fill="#e6edf3">d_inode</text>
+  </g>
+  <line x1="630" y1="210" x2="630" y2="240" stroke="#8b949e" stroke-width="1.5" marker-end="url(#ar)"/>
+
+  <!-- inode -->
+  <g transform="translate(290, 240)">
+    <rect width="180" height="170" rx="6" fill="#1a2028" stroke="#e3b341" stroke-width="1.5"/>
+    <text x="90" y="22" text-anchor="middle" font-weight="700" fill="#e3b341">struct inode</text>
+    <text x="90" y="38" text-anchor="middle" font-size="10" fill="#8b949e">文件元数据</text>
+    <line x1="0" y1="48" x2="180" y2="48" stroke="#30363d"/>
+    <text x="14" y="66" font-size="11" fill="#e6edf3">i_ino (编号)</text>
+    <text x="14" y="84" font-size="11" fill="#e6edf3">i_mode (权限/类型)</text>
+    <text x="14" y="102" font-size="11" fill="#e6edf3">i_size</text>
+    <text x="14" y="120" font-size="11" fill="#e6edf3">i_op (操作集)</text>
+    <text x="14" y="138" font-size="11" fill="#e6edf3">i_fop (默认 f_op)</text>
+    <text x="14" y="156" font-size="11" fill="#e6edf3">i_mapping (页缓存)</text>
+  </g>
+  <line x1="540" y1="300" x2="470" y2="300" stroke="#8b949e" stroke-width="1.5" marker-end="url(#ar)"/>
+
+  <!-- super_block -->
+  <g transform="translate(40, 290)">
+    <rect width="180" height="120" rx="6" fill="#1a2028" stroke="#f85149" stroke-width="1.5"/>
+    <text x="90" y="22" text-anchor="middle" font-weight="700" fill="#f85149">super_block</text>
+    <text x="90" y="38" text-anchor="middle" font-size="10" fill="#8b949e">文件系统实例</text>
+    <line x1="0" y1="48" x2="180" y2="48" stroke="#30363d"/>
+    <text x="14" y="66" font-size="11" fill="#e6edf3">s_dev (设备)</text>
+    <text x="14" y="84" font-size="11" fill="#e6edf3">s_type → ext4</text>
+    <text x="14" y="102" font-size="11" fill="#e6edf3">s_root (根 dentry)</text>
+  </g>
+  <line x1="290" y1="330" x2="220" y2="330" stroke="#8b949e" stroke-width="1.5" marker-end="url(#ar)"/>
+
+  <!-- address_space -->
+  <g transform="translate(540, 390)">
+    <rect width="180" height="100" rx="6" fill="#1a2028" stroke="#56d364" stroke-width="1.5"/>
+    <text x="90" y="22" text-anchor="middle" font-weight="700" fill="#56d364">address_space</text>
+    <text x="90" y="38" text-anchor="middle" font-size="10" fill="#8b949e">页缓存 (radix tree)</text>
+    <line x1="0" y1="48" x2="180" y2="48" stroke="#30363d"/>
+    <text x="14" y="66" font-size="11" fill="#e6edf3">i_pages (XArray)</text>
+    <text x="14" y="84" font-size="11" fill="#e6edf3">a_ops (readpage 等)</text>
+  </g>
+  <line x1="450" y1="395" x2="540" y2="430" stroke="#8b949e" stroke-width="1.5" marker-end="url(#ar)"/>
+
+  <!-- caption -->
+  <g transform="translate(40, 430)">
+    <rect width="460" height="80" rx="6" fill="#1a2028" stroke="#30363d"/>
+    <text x="20" y="22" fill="#ff7b29" font-weight="700">读取关系：</text>
+    <text x="20" y="40" font-size="11" fill="#e6edf3">task_struct → files_struct → file → dentry → inode → super_block</text>
+    <text x="20" y="60" fill="#56d364" font-weight="700">关键点：</text>
+    <text x="20" y="76" font-size="11" fill="#e6edf3">同一文件可被多次 open，多个 file 共享同一 inode</text>
+  </g>
+</svg>
diff --git a/assets/diagrams/vm-layout.svg b/assets/diagrams/vm-layout.svg
new file mode 100644
index 0000000..7cc036f
--- /dev/null
+++ b/assets/diagrams/vm-layout.svg
@@ -0,0 +1,101 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 900 540" font-family="-apple-system, sans-serif" font-size="12">
+  <text x="450" y="28" text-anchor="middle" font-size="18" font-weight="700" fill="#ff7b29">进程虚拟地址空间布局 (x86_64, 经典)</text>
+
+  <!-- main bar -->
+  <g transform="translate(180, 60)">
+    <!-- Kernel space (top) -->
+    <rect x="0" y="0" width="200" height="60" fill="#1a2028" stroke="#f85149" stroke-width="1.5"/>
+    <text x="100" y="22" text-anchor="middle" font-weight="700" fill="#f85149">内核空间</text>
+    <text x="100" y="38" text-anchor="middle" font-size="10" fill="#8b949e">(所有进程共享)</text>
+    <text x="100" y="52" text-anchor="middle" font-size="10" fill="#8b949e">直接映射 / vmalloc / 模块</text>
+    <text x="-170" y="22" fill="#8b949e" font-family="monospace" font-size="10">0xFFFFFFFFFFFFFFFF</text>
+    <text x="-170" y="60" fill="#8b949e" font-family="monospace" font-size="10">0xFFFF800000000000</text>
+    <text x="220" y="35" fill="#f85149" font-size="11">128 TB</text>
+
+    <!-- Non-canonical hole -->
+    <rect x="0" y="60" width="200" height="22" fill="#0f1419" stroke="#30363d" stroke-dasharray="3,2"/>
+    <text x="100" y="76" text-anchor="middle" font-size="10" fill="#8b949e">不可用地址 (canonical hole)</text>
+
+    <!-- Stack -->
+    <rect x="0" y="82" width="200" height="48" fill="#1a2028" stroke="#e3b341" stroke-width="1.5"/>
+    <text x="100" y="100" text-anchor="middle" font-weight="700" fill="#e3b341">栈 (Stack) ↓向下增长</text>
+    <text x="100" y="118" text-anchor="middle" font-size="10" fill="#8b949e">函数调用 / 局部变量</text>
+    <text x="-170" y="100" fill="#8b949e" font-family="monospace" font-size="10">0x00007FFFFFFFFFFF</text>
+    <text x="220" y="106" fill="#e3b341" font-size="11">用户栈起始</text>
+
+    <!-- Hole -->
+    <rect x="0" y="130" width="200" height="26" fill="#0f1419" stroke="#30363d" stroke-dasharray="3,2"/>
+    <text x="100" y="148" text-anchor="middle" font-size="10" fill="#8b949e">未映射空间 (栈/堆之间)</text>
+
+    <!-- mmap area -->
+    <rect x="0" y="156" width="200" height="60" fill="#1a2028" stroke="#bc8cff" stroke-width="1.5"/>
+    <text x="100" y="178" text-anchor="middle" font-weight="700" fill="#bc8cff">mmap 区域 ↓</text>
+    <text x="100" y="196" text-anchor="middle" font-size="10" fill="#8b949e">共享库 / 匿名映射</text>
+    <text x="100" y="210" text-anchor="middle" font-size="10" fill="#8b949e">malloc 大块 (>128KB)</text>
+    <text x="220" y="190" fill="#bc8cff" font-size="11">mmap_base</text>
+
+    <!-- Hole -->
+    <rect x="0" y="216" width="200" height="20" fill="#0f1419" stroke="#30363d" stroke-dasharray="3,2"/>
+
+    <!-- Heap -->
+    <rect x="0" y="236" width="200" height="48" fill="#1a2028" stroke="#56d364" stroke-width="1.5"/>
+    <text x="100" y="254" text-anchor="middle" font-weight="700" fill="#56d364">堆 (Heap) ↑向上增长</text>
+    <text x="100" y="272" text-anchor="middle" font-size="10" fill="#8b949e">brk/sbrk · malloc 小块</text>
+    <text x="220" y="262" fill="#56d364" font-size="11">brk → end_brk</text>
+
+    <!-- BSS -->
+    <rect x="0" y="284" width="200" height="30" fill="#1a2028" stroke="#58a6ff" stroke-width="1.5"/>
+    <text x="100" y="304" text-anchor="middle" fill="#58a6ff" font-weight="700">.bss 段 (未初始化全局)</text>
+
+    <!-- Data -->
+    <rect x="0" y="314" width="200" height="30" fill="#1a2028" stroke="#58a6ff" stroke-width="1.5"/>
+    <text x="100" y="334" text-anchor="middle" fill="#58a6ff" font-weight="700">.data 段 (已初始化全局)</text>
+
+    <!-- Text -->
+    <rect x="0" y="344" width="200" height="46" fill="#1a2028" stroke="#ff7b29" stroke-width="1.5"/>
+    <text x="100" y="364" text-anchor="middle" font-weight="700" fill="#ff7b29">.text 段 (代码, 只读)</text>
+    <text x="100" y="380" text-anchor="middle" font-size="10" fill="#8b949e">由 ELF 加载</text>
+    <text x="-170" y="368" fill="#8b949e" font-family="monospace" font-size="10">0x00400000</text>
+
+    <!-- Reserved -->
+    <rect x="0" y="390" width="200" height="50" fill="#0f1419" stroke="#30363d"/>
+    <text x="100" y="408" text-anchor="middle" font-size="11" fill="#f85149">不可访问 (NULL 指针保护)</text>
+    <text x="100" y="426" text-anchor="middle" font-size="10" fill="#8b949e">0x0 ~ 0x400000</text>
+    <text x="-170" y="430" fill="#8b949e" font-family="monospace" font-size="10">0x0000000000000000</text>
+  </g>
+
+  <!-- Right side: detail box -->
+  <g transform="translate(450, 60)">
+    <text x="0" y="14" font-weight="700" fill="#ff7b29" font-size="14">关键内核数据结构</text>
+
+    <rect x="0" y="30" width="420" height="100" rx="6" fill="#1a2028" stroke="#58a6ff"/>
+    <text x="10" y="50" font-weight="700" fill="#58a6ff">struct mm_struct</text>
+    <text x="10" y="68" font-size="11" fill="#8b949e">每进程一个,描述完整虚拟地址空间</text>
+    <text x="10" y="86" font-family="monospace" font-size="11" fill="#e6edf3">struct vm_area_struct *mmap;</text>
+    <text x="10" y="102" font-family="monospace" font-size="11" fill="#e6edf3">struct rb_root mm_rb;</text>
+    <text x="10" y="118" font-family="monospace" font-size="11" fill="#e6edf3">pgd_t *pgd; unsigned long brk;</text>
+
+    <rect x="0" y="150" width="420" height="100" rx="6" fill="#1a2028" stroke="#56d364"/>
+    <text x="10" y="170" font-weight="700" fill="#56d364">struct vm_area_struct (VMA)</text>
+    <text x="10" y="188" font-size="11" fill="#8b949e">表示一段连续的虚拟地址区域</text>
+    <text x="10" y="206" font-family="monospace" font-size="11" fill="#e6edf3">unsigned long vm_start, vm_end;</text>
+    <text x="10" y="222" font-family="monospace" font-size="11" fill="#e6edf3">unsigned long vm_flags;</text>
+    <text x="10" y="238" font-family="monospace" font-size="11" fill="#e6edf3">struct file *vm_file; (映射的文件)</text>
+
+    <rect x="0" y="270" width="420" height="115" rx="6" fill="#1a2028" stroke="#bc8cff"/>
+    <text x="10" y="290" font-weight="700" fill="#bc8cff">VMA 标志位</text>
+    <text x="10" y="308" font-family="monospace" font-size="11" fill="#e6edf3">VM_READ / VM_WRITE / VM_EXEC</text>
+    <text x="10" y="324" font-family="monospace" font-size="11" fill="#e6edf3">VM_SHARED / VM_PRIVATE</text>
+    <text x="10" y="340" font-family="monospace" font-size="11" fill="#e6edf3">VM_GROWSDOWN (栈)</text>
+    <text x="10" y="356" font-family="monospace" font-size="11" fill="#e6edf3">VM_LOCKED (mlock)</text>
+    <text x="10" y="372" font-family="monospace" font-size="11" fill="#e6edf3">VM_HUGETLB (大页)</text>
+  </g>
+
+  <!-- Caption -->
+  <text x="450" y="495" text-anchor="middle" font-size="11" fill="#8b949e">
+    每个进程虚拟地址空间 = 一组 VMA。访问越界 → 页错误 → 内核找不到对应 VMA → SIGSEGV
+  </text>
+  <text x="450" y="512" text-anchor="middle" font-size="11" fill="#8b949e">
+    cat /proc/&lt;pid&gt;/maps 可以查看任意进程的所有 VMA
+  </text>
+</svg>
diff --git a/assets/style.css b/assets/style.css
new file mode 100644
index 0000000..cd58bc6
--- /dev/null
+++ b/assets/style.css
@@ -0,0 +1,456 @@
+/* ============================================================
+   Linux Kernel Learning Guide — Stylesheet
+   一份用于专家级 Linux 内核学习网站的样式
+   ============================================================ */
+
+:root {
+    --bg: #0f1419;
+    --bg-elev: #1a2028;
+    --bg-code: #11171f;
+    --fg: #e6edf3;
+    --fg-muted: #8b949e;
+    --accent: #ff7b29;       /* Tux 橙 */
+    --accent-soft: rgba(255, 123, 41, 0.15);
+    --accent-2: #58a6ff;     /* Linux 蓝 */
+    --green: #56d364;
+    --red: #f85149;
+    --yellow: #e3b341;
+    --purple: #bc8cff;
+    --border: #30363d;
+    --border-soft: #21262d;
+    --shadow: 0 4px 16px rgba(0, 0, 0, 0.4);
+    --radius: 8px;
+    --radius-sm: 4px;
+    --mono: "SF Mono", "JetBrains Mono", "Fira Code", Menlo, Consolas, monospace;
+    --sans: -apple-system, BlinkMacSystemFont, "Segoe UI", "PingFang SC",
+            "Microsoft YaHei", "Helvetica Neue", Arial, sans-serif;
+}
+
+@media (prefers-color-scheme: light) {
+    :root {
+        --bg: #ffffff;
+        --bg-elev: #f6f8fa;
+        --bg-code: #f6f8fa;
+        --fg: #1f2328;
+        --fg-muted: #656d76;
+        --accent: #d96714;
+        --accent-soft: rgba(217, 103, 20, 0.1);
+        --accent-2: #0969da;
+        --green: #1a7f37;
+        --red: #cf222e;
+        --yellow: #9a6700;
+        --purple: #8250df;
+        --border: #d0d7de;
+        --border-soft: #eaeef2;
+        --shadow: 0 1px 6px rgba(0, 0, 0, 0.08);
+    }
+}
+
+* { box-sizing: border-box; }
+
+html { scroll-behavior: smooth; }
+
+body {
+    background: var(--bg);
+    color: var(--fg);
+    font-family: var(--sans);
+    font-size: 16px;
+    line-height: 1.7;
+    margin: 0;
+    -webkit-font-smoothing: antialiased;
+}
+
+/* ─── Layout ─────────────────────────────────────────── */
+.layout {
+    display: grid;
+    grid-template-columns: 260px 1fr;
+    min-height: 100vh;
+}
+
+.sidebar {
+    background: var(--bg-elev);
+    border-right: 1px solid var(--border);
+    padding: 24px 16px;
+    position: sticky;
+    top: 0;
+    height: 100vh;
+    overflow-y: auto;
+    font-size: 14px;
+}
+
+.sidebar .brand {
+    font-weight: 700;
+    font-size: 18px;
+    color: var(--accent);
+    margin-bottom: 8px;
+    display: flex;
+    align-items: center;
+    gap: 8px;
+}
+
+.sidebar .brand-sub {
+    font-size: 12px;
+    color: var(--fg-muted);
+    margin-bottom: 20px;
+}
+
+.sidebar nav ul {
+    list-style: none;
+    padding: 0;
+    margin: 0 0 16px 0;
+}
+
+.sidebar nav li { margin: 2px 0; }
+
+.sidebar nav a {
+    display: block;
+    padding: 6px 12px;
+    border-radius: var(--radius-sm);
+    color: var(--fg-muted);
+    text-decoration: none;
+    transition: all 0.15s ease;
+}
+
+.sidebar nav a:hover,
+.sidebar nav a.active {
+    background: var(--accent-soft);
+    color: var(--accent);
+}
+
+.sidebar nav .section-title {
+    font-size: 11px;
+    text-transform: uppercase;
+    letter-spacing: 0.08em;
+    color: var(--fg-muted);
+    margin: 18px 12px 6px;
+    font-weight: 700;
+}
+
+.content {
+    padding: 48px 64px;
+    max-width: 1100px;
+    margin: 0 auto;
+    width: 100%;
+}
+
+@media (max-width: 900px) {
+    .layout { grid-template-columns: 1fr; }
+    .sidebar { position: relative; height: auto; }
+    .content { padding: 24px; }
+}
+
+/* ─── Typography ─────────────────────────────────────── */
+h1, h2, h3, h4 {
+    color: var(--fg);
+    font-weight: 700;
+    line-height: 1.3;
+    margin-top: 1.8em;
+    margin-bottom: 0.6em;
+}
+
+h1 {
+    font-size: 2.2em;
+    border-bottom: 2px solid var(--accent);
+    padding-bottom: 12px;
+    margin-top: 0;
+    display: flex;
+    align-items: center;
+    gap: 12px;
+}
+
+h1 .chapter-num {
+    background: var(--accent);
+    color: white;
+    padding: 4px 12px;
+    border-radius: var(--radius);
+    font-size: 0.6em;
+}
+
+h2 {
+    font-size: 1.6em;
+    border-bottom: 1px solid var(--border);
+    padding-bottom: 8px;
+}
+
+h3 { font-size: 1.25em; color: var(--accent-2); }
+h4 { font-size: 1.05em; color: var(--accent); }
+
+p { margin: 0.8em 0; }
+a { color: var(--accent-2); }
+a:hover { text-decoration: underline; }
+hr { border: 0; border-top: 1px solid var(--border); margin: 32px 0; }
+
+/* ─── Code ─────────────────────────────────────────── */
+code {
+    font-family: var(--mono);
+    background: var(--bg-code);
+    border: 1px solid var(--border-soft);
+    padding: 1px 6px;
+    border-radius: var(--radius-sm);
+    font-size: 0.88em;
+    color: var(--accent);
+}
+
+pre {
+    background: var(--bg-code);
+    border: 1px solid var(--border);
+    border-radius: var(--radius);
+    padding: 16px 20px;
+    overflow-x: auto;
+    line-height: 1.5;
+    font-size: 14px;
+    margin: 16px 0;
+    position: relative;
+}
+
+pre code {
+    background: transparent;
+    border: none;
+    padding: 0;
+    font-size: inherit;
+    color: var(--fg);
+}
+
+pre.code-c::before,
+pre.code-asm::before,
+pre.code-bash::before {
+    position: absolute;
+    top: 0;
+    right: 12px;
+    font-size: 10px;
+    text-transform: uppercase;
+    letter-spacing: 0.1em;
+    color: var(--fg-muted);
+    padding: 4px 8px;
+}
+pre.code-c::before { content: "C"; }
+pre.code-asm::before { content: "ASM"; }
+pre.code-bash::before { content: "BASH"; }
+
+/* Syntax-like coloring (manual spans) */
+.kw   { color: var(--purple); }   /* keyword */
+.fn   { color: var(--accent-2); } /* function */
+.str  { color: var(--green); }    /* string */
+.num  { color: var(--yellow); }   /* number */
+.cm   { color: var(--fg-muted); font-style: italic; } /* comment */
+.tp   { color: var(--accent); }   /* type */
+
+/* ─── Tables ─────────────────────────────────────────── */
+table {
+    width: 100%;
+    border-collapse: collapse;
+    margin: 16px 0;
+    font-size: 0.95em;
+}
+
+th, td {
+    text-align: left;
+    padding: 10px 14px;
+    border-bottom: 1px solid var(--border-soft);
+}
+
+th {
+    background: var(--bg-elev);
+    color: var(--accent);
+    font-weight: 700;
+    border-bottom: 2px solid var(--border);
+}
+
+tr:hover td { background: var(--accent-soft); }
+
+/* ─── Callout boxes ─────────────────────────────────── */
+.callout {
+    border-left: 4px solid var(--accent-2);
+    background: var(--bg-elev);
+    border-radius: var(--radius);
+    padding: 16px 20px;
+    margin: 20px 0;
+    box-shadow: var(--shadow);
+}
+
+.callout.tip    { border-left-color: var(--green); }
+.callout.warn   { border-left-color: var(--yellow); }
+.callout.danger { border-left-color: var(--red); }
+.callout.deep   { border-left-color: var(--purple); }
+
+.callout .label {
+    font-weight: 700;
+    font-size: 12px;
+    text-transform: uppercase;
+    letter-spacing: 0.1em;
+    margin-bottom: 8px;
+    display: flex;
+    align-items: center;
+    gap: 6px;
+}
+
+.callout.tip .label    { color: var(--green); }
+.callout.warn .label   { color: var(--yellow); }
+.callout.danger .label { color: var(--red); }
+.callout.deep .label   { color: var(--purple); }
+.callout .label::before {
+    content: ""; width: 8px; height: 8px; border-radius: 50%;
+    background: currentColor; display: inline-block;
+}
+
+/* ─── Cards (used on index) ─────────────────────────── */
+.card-grid {
+    display: grid;
+    grid-template-columns: repeat(auto-fill, minmax(280px, 1fr));
+    gap: 16px;
+    margin: 24px 0;
+}
+
+.card {
+    background: var(--bg-elev);
+    border: 1px solid var(--border);
+    border-radius: var(--radius);
+    padding: 20px;
+    transition: all 0.2s ease;
+    text-decoration: none;
+    color: var(--fg);
+    display: block;
+}
+
+.card:hover {
+    transform: translateY(-2px);
+    border-color: var(--accent);
+    box-shadow: var(--shadow);
+    text-decoration: none;
+}
+
+.card .num {
+    color: var(--accent);
+    font-weight: 700;
+    font-size: 12px;
+    letter-spacing: 0.1em;
+}
+
+.card .title {
+    font-weight: 700;
+    font-size: 18px;
+    margin: 6px 0;
+    color: var(--fg);
+}
+
+.card .desc {
+    color: var(--fg-muted);
+    font-size: 14px;
+    margin: 0;
+}
+
+/* ─── SVG diagrams ─────────────────────────────────── */
+.diagram {
+    margin: 24px auto;
+    text-align: center;
+    background: var(--bg-elev);
+    border: 1px solid var(--border);
+    border-radius: var(--radius);
+    padding: 20px;
+    overflow-x: auto;
+}
+
+.diagram svg { max-width: 100%; height: auto; }
+
+.diagram .caption {
+    margin-top: 12px;
+    color: var(--fg-muted);
+    font-size: 14px;
+    font-style: italic;
+}
+
+/* ─── Tabs ─────────────────────────────────────────── */
+.tabs {
+    border: 1px solid var(--border);
+    border-radius: var(--radius);
+    margin: 16px 0;
+    overflow: hidden;
+}
+
+.tab-headers {
+    display: flex;
+    background: var(--bg-elev);
+    border-bottom: 1px solid var(--border);
+}
+
+.tab-headers button {
+    background: transparent;
+    border: none;
+    color: var(--fg-muted);
+    padding: 10px 18px;
+    cursor: pointer;
+    font-family: inherit;
+    font-size: 14px;
+    border-bottom: 2px solid transparent;
+    transition: all 0.15s;
+}
+
+.tab-headers button.active {
+    color: var(--accent);
+    border-bottom-color: var(--accent);
+}
+
+.tab-body { padding: 20px; }
+.tab-pane { display: none; }
+.tab-pane.active { display: block; }
+
+/* ─── Footer ─────────────────────────────────────── */
+footer.page-footer {
+    margin-top: 60px;
+    padding: 24px 0;
+    border-top: 1px solid var(--border);
+    color: var(--fg-muted);
+    font-size: 13px;
+    text-align: center;
+}
+
+/* ─── Anchor links on headings ─────────────────────── */
+h2[id], h3[id] {
+    scroll-margin-top: 24px;
+}
+
+/* ─── Reading list ─────────────────────────────────── */
+.book-list {
+    list-style: none;
+    padding: 0;
+}
+
+.book-list li {
+    padding: 12px 16px;
+    margin: 8px 0;
+    background: var(--bg-elev);
+    border-left: 3px solid var(--accent);
+    border-radius: var(--radius-sm);
+}
+
+.book-list .title { font-weight: 700; color: var(--fg); }
+.book-list .author { color: var(--fg-muted); font-size: 0.9em; }
+.book-list .why { font-size: 0.9em; margin-top: 4px; color: var(--fg-muted); }
+
+/* ─── Two-column compare ─────────────────────────── */
+.compare {
+    display: grid;
+    grid-template-columns: 1fr 1fr;
+    gap: 16px;
+    margin: 20px 0;
+}
+
+.compare > div {
+    background: var(--bg-elev);
+    border: 1px solid var(--border);
+    border-radius: var(--radius);
+    padding: 16px;
+}
+
+.compare h4 { margin-top: 0; }
+
+@media (max-width: 700px) {
+    .compare { grid-template-columns: 1fr; }
+}
+
+/* Print */
+@media print {
+    .sidebar { display: none; }
+    .layout { grid-template-columns: 1fr; }
+    pre, .diagram { page-break-inside: avoid; }
+}
diff --git a/index.html b/index.html
new file mode 100644
index 0000000..341f68a
--- /dev/null
+++ b/index.html
@@ -0,0 +1,227 @@
+<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>Linux 内核学习指南 · 从 0.11 到 6.x · 专家级路径</title>
+<link rel="stylesheet" href="assets/style.css">
+</head>
+<body>
+<div class="layout">
+    <aside class="sidebar">
+        <div class="brand">🐧 Linux 内核学习</div>
+        <div class="brand-sub">从 0.11 到 6.x · 专家级路径</div>
+        <nav>
+            <div class="section-title">入门 & 准备</div>
+            <ul>
+                <li><a href="index.html" class="active">🏠 总目录</a></li>
+                <li><a href="00-学习路线/index.html">00 · 学习路线</a></li>
+                <li><a href="01-经典版本选择/index.html">01 · 经典版本选择</a></li>
+                <li><a href="02-环境搭建/index.html">02 · 环境搭建</a></li>
+            </ul>
+            <div class="section-title">核心子系统</div>
+            <ul>
+                <li><a href="03-进程管理/index.html">03 · 进程管理</a></li>
+                <li><a href="04-内存管理/index.html">04 · 内存管理</a></li>
+                <li><a href="05-文件系统/index.html">05 · 文件系统</a></li>
+                <li><a href="06-系统调用/index.html">06 · 系统调用</a></li>
+                <li><a href="07-设备驱动/index.html">07 · 设备驱动</a></li>
+                <li><a href="08-网络子系统/index.html">08 · 网络子系统</a></li>
+                <li><a href="09-同步机制/index.html">09 · 同步机制</a></li>
+            </ul>
+            <div class="section-title">专家级深入</div>
+            <ul>
+                <li><a href="10-CFS调度器/index.html">10 · CFS 调度器</a></li>
+                <li><a href="11-容器与命名空间/index.html">11 · 容器与命名空间</a></li>
+                <li><a href="12-eBPF与可观测性/index.html">12 · eBPF 与可观测性</a></li>
+                <li><a href="13-中断与异常/index.html">13 · 中断与异常</a></li>
+                <li><a href="14-启动流程深入/index.html">14 · 启动流程深入</a></li>
+                <li><a href="15-内核调试与性能/index.html">15 · 内核调试与性能</a></li>
+            </ul>
+        </nav>
+    </aside>
+
+    <main class="content">
+        <h1>🐧 Linux 内核学习指南</h1>
+        <p style="font-size: 1.1em; color: var(--fg-muted);">
+            一份系统性的、从入门到专家级的 Linux 内核学习路径。
+            以 <b>Linux 0.11</b>（仅 14000 行）为切入点理解全貌，以 <b>Linux 2.6 / 5.x / 6.x</b> 对照现代演进。
+            每章包含：核心机制讲解、源码逐行解析、SVG 架构图、可动手实验。
+        </p>
+
+        <div class="callout deep">
+            <div class="label">为什么这样学</div>
+            <p>
+                现代 Linux 内核已超过 <b>3000 万行代码</b>，直接读完不现实。
+                本指南遵循<b>"小内核入门 → 现代内核拓展 → 专家级专题"</b>的路径：
+            </p>
+            <ol>
+                <li><b>Linux 0.11</b>：14k 行包含 OS 全部核心概念，可完整读完</li>
+                <li><b>Linux 2.6.0</b>：现代框架成型，VFS/kobject/RCU 全部就位</li>
+                <li><b>Linux 5.x/6.x</b>：CFS、eBPF、io_uring、容器、KVM 等专题深入</li>
+            </ol>
+        </div>
+
+        <h2>🗺️ 内核架构全景</h2>
+        <div class="diagram">
+            <img src="assets/diagrams/arch-overview.svg" alt="Linux 内核架构总览">
+            <div class="caption">用户空间 → 系统调用 → 内核子系统 → 驱动 → 硬件</div>
+        </div>
+
+        <h2>📚 章节导航</h2>
+        <h3>入门与准备</h3>
+        <div class="card-grid">
+            <a class="card" href="00-学习路线/index.html">
+                <div class="num">CHAPTER 00</div>
+                <div class="title">学习路线</div>
+                <p class="desc">四阶段路线图、每周计划、推荐书单、每日学习节奏。</p>
+            </a>
+            <a class="card" href="01-经典版本选择/index.html">
+                <div class="num">CHAPTER 01</div>
+                <div class="title">经典版本选择</div>
+                <p class="desc">0.11 / 2.6.0 / 4.x / 5.x / 6.x 全面对比，源码导航。</p>
+            </a>
+            <a class="card" href="02-环境搭建/index.html">
+                <div class="num">CHAPTER 02</div>
+                <div class="title">环境搭建</div>
+                <p class="desc">QEMU + GDB + VS Code + BusyBox + KGDB + crash 全套工具链。</p>
+            </a>
+        </div>
+
+        <h3>核心子系统</h3>
+        <div class="card-grid">
+            <a class="card" href="03-进程管理/index.html">
+                <div class="num">CHAPTER 03</div>
+                <div class="title">进程管理</div>
+                <p class="desc">task_struct、fork、调度器、上下文切换、preemption、idle/init。</p>
+            </a>
+            <a class="card" href="04-内存管理/index.html">
+                <div class="num">CHAPTER 04</div>
+                <div class="title">内存管理</div>
+                <p class="desc">页表/TLB、buddy/slab、NUMA、page cache、kswapd、OOM、THP。</p>
+            </a>
+            <a class="card" href="05-文件系统/index.html">
+                <div class="num">CHAPTER 05</div>
+                <div class="title">文件系统</div>
+                <p class="desc">VFS 四大对象、ext2/ext4、日志、page cache 写回、fsync、io_uring。</p>
+            </a>
+            <a class="card" href="06-系统调用/index.html">
+                <div class="num">CHAPTER 06</div>
+                <div class="title">系统调用</div>
+                <p class="desc">int 0x80 / syscall / sysenter、vDSO、seccomp、自定义 syscall。</p>
+            </a>
+            <a class="card" href="07-设备驱动/index.html">
+                <div class="num">CHAPTER 07</div>
+                <div class="title">设备驱动</div>
+                <p class="desc">设备模型、字符/块/平台驱动、设备树、MSI、DMA、IRQ。</p>
+            </a>
+            <a class="card" href="08-网络子系统/index.html">
+                <div class="num">CHAPTER 08</div>
+                <div class="title">网络子系统</div>
+                <p class="desc">sk_buff、TCP 状态机、netfilter、conntrack、NAPI、XDP。</p>
+            </a>
+            <a class="card" href="09-同步机制/index.html">
+                <div class="num">CHAPTER 09</div>
+                <div class="title">同步机制</div>
+                <p class="desc">原子操作、自旋锁、mutex、RCU、futex、percpu、memory barrier。</p>
+            </a>
+        </div>
+
+        <h3>专家级深入</h3>
+        <div class="card-grid">
+            <a class="card" href="10-CFS调度器/index.html">
+                <div class="num">CHAPTER 10</div>
+                <div class="title">CFS 调度器深入</div>
+                <p class="desc">vruntime、红黑树、调度域、负载均衡、EAS、autogroup。</p>
+            </a>
+            <a class="card" href="11-容器与命名空间/index.html">
+                <div class="num">CHAPTER 11</div>
+                <div class="title">容器与命名空间</div>
+                <p class="desc">8 种 namespace、cgroups v1/v2、OverlayFS、runc 内部原理。</p>
+            </a>
+            <a class="card" href="12-eBPF与可观测性/index.html">
+                <div class="num">CHAPTER 12</div>
+                <div class="title">eBPF 与可观测性</div>
+                <p class="desc">verifier、JIT、maps、XDP、kprobe、uprobe、bpftrace 实战。</p>
+            </a>
+            <a class="card" href="13-中断与异常/index.html">
+                <div class="num">CHAPTER 13</div>
+                <div class="title">中断与异常</div>
+                <p class="desc">IRQ 子系统、softirq、tasklet、workqueue、threaded IRQ、IPI。</p>
+            </a>
+            <a class="card" href="14-启动流程深入/index.html">
+                <div class="num">CHAPTER 14</div>
+                <div class="title">启动流程深入</div>
+                <p class="desc">BIOS/UEFI、bootloader、EFI stub、KASLR、ACPI、initramfs、systemd。</p>
+            </a>
+            <a class="card" href="15-内核调试与性能/index.html">
+                <div class="num">CHAPTER 15</div>
+                <div class="title">内核调试与性能</div>
+                <p class="desc">ftrace、perf、KASAN、lockdep、livepatch、kdump、crash dump 分析。</p>
+            </a>
+        </div>
+
+        <h2>📖 配套书单</h2>
+        <ul class="book-list">
+            <li>
+                <div class="title">《Linux 内核完全注释》</div>
+                <div class="author">赵炯</div>
+                <div class="why">入门首选 · 配合 Linux 0.11 源码逐行注释，中文资料中无可替代</div>
+            </li>
+            <li>
+                <div class="title">《深入理解 Linux 内核》(Understanding the Linux Kernel)</div>
+                <div class="author">Daniel P. Bovet & Marco Cesati</div>
+                <div class="why">进阶必读 · 基于 2.6 内核全面拆解，是中级开发者的"圣经"</div>
+            </li>
+            <li>
+                <div class="title">《Linux 设备驱动程序》(Linux Device Drivers, 3rd ed)</div>
+                <div class="author">Jonathan Corbet, Alessandro Rubini, Greg Kroah-Hartman</div>
+                <div class="why">驱动开发权威 · 官方电子版免费 (LWN)</div>
+            </li>
+            <li>
+                <div class="title">《Linux 内核设计与实现》(Linux Kernel Development)</div>
+                <div class="author">Robert Love</div>
+                <div class="why">综合理解 · 文笔流畅，覆盖广，适合通读</div>
+            </li>
+            <li>
+                <div class="title">《深入 Linux 内核架构》(Professional Linux Kernel Architecture)</div>
+                <div class="author">Wolfgang Mauerer</div>
+                <div class="why">深度参考 · 1000+ 页详尽剖析，作字典用</div>
+            </li>
+            <li>
+                <div class="title">《BPF Performance Tools》</div>
+                <div class="author">Brendan Gregg</div>
+                <div class="why">现代可观测性 · 学完 12 章后必读</div>
+            </li>
+            <li>
+                <div class="title">《操作系统：精髓与设计原理》</div>
+                <div class="author">William Stallings</div>
+                <div class="why">理论基础 · 没有 OS 背景请先读此</div>
+            </li>
+        </ul>
+
+        <h2>🔗 关键资源</h2>
+        <ul>
+            <li>📂 在线源码浏览（最佳）：<a href="https://elixir.bootlin.com/linux">elixir.bootlin.com/linux</a></li>
+            <li>📦 Linux 0.11 源码：<a href="https://github.com/karottc/linux-0.11">github.com/karottc/linux-0.11</a></li>
+            <li>📦 Linux 2.6.0 源码：<a href="https://mirrors.edge.kernel.org/pub/linux/kernel/v2.6/">mirrors.edge.kernel.org</a></li>
+            <li>📜 官方文档：<a href="https://www.kernel.org/doc/html/latest/">kernel.org/doc/html/latest</a></li>
+            <li>✉️ LKML 邮件列表：<a href="https://lkml.org/">lkml.org</a></li>
+            <li>📰 LWN 技术新闻：<a href="https://lwn.net/">lwn.net</a></li>
+            <li>🛠️ 内核新手任务：<a href="https://kernelnewbies.org/">kernelnewbies.org</a></li>
+        </ul>
+
+        <div class="callout tip">
+            <div class="label">使用建议</div>
+            <p>建议按顺序学习 00 → 02，之后核心子系统 (03~09) 可按兴趣穿插，
+               专家章节 (10~15) 待核心扎实后再读。每章末尾都有"动手实验"小节，
+               <b>务必在 QEMU 中亲自跑过 GDB 才能真正掌握</b>。</p>
+        </div>
+
+        <footer class="page-footer">
+            <p>Linux 内核学习指南 · 持续迭代中 · 欢迎 PR · Made with ❤️ for kernel hackers</p>
+        </footer>
+    </main>
+</div>
+</body>
+</html>

From d414dbb2eaddbd18d7612a10e87c913d5b87b75e Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 20 May 2026 09:48:23 +0000
Subject: [PATCH 04/10] feat: HTML chapters 01-09 (existing topics) with rich
 SVG and expert content

Agent-Logs-Url: https://github.com/YYCB/how_to_learn_linux/sessions/1cb19491-1fd8-4418-9a44-f972d8161633

Co-authored-by: YYCB <23326150+YYCB@users.noreply.github.com>
---
 .../index.html"                               | 230 +++++++++++
 .../index.html"                               | 243 +++++++++++
 .../index.html"                               | 378 ++++++++++++++++++
 .../index.html"                               | 263 ++++++++++++
 .../index.html"                               | 251 ++++++++++++
 .../index.html"                               | 189 +++++++++
 .../index.html"                               | 263 ++++++++++++
 .../index.html"                               | 253 ++++++++++++
 .../index.html"                               | 254 ++++++++++++
 9 files changed, 2324 insertions(+)
 create mode 100644 "01-\347\273\217\345\205\270\347\211\210\346\234\254\351\200\211\346\213\251/index.html"
 create mode 100644 "02-\347\216\257\345\242\203\346\220\255\345\273\272/index.html"
 create mode 100644 "03-\350\277\233\347\250\213\347\256\241\347\220\206/index.html"
 create mode 100644 "04-\345\206\205\345\255\230\347\256\241\347\220\206/index.html"
 create mode 100644 "05-\346\226\207\344\273\266\347\263\273\347\273\237/index.html"
 create mode 100644 "06-\347\263\273\347\273\237\350\260\203\347\224\250/index.html"
 create mode 100644 "07-\350\256\276\345\244\207\351\251\261\345\212\250/index.html"
 create mode 100644 "08-\347\275\221\347\273\234\345\255\220\347\263\273\347\273\237/index.html"
 create mode 100644 "09-\345\220\214\346\255\245\346\234\272\345\210\266/index.html"

diff --git "a/01-\347\273\217\345\205\270\347\211\210\346\234\254\351\200\211\346\213\251/index.html" "b/01-\347\273\217\345\205\270\347\211\210\346\234\254\351\200\211\346\213\251/index.html"
new file mode 100644
index 0000000..8dee28b
--- /dev/null
+++ "b/01-\347\273\217\345\205\270\347\211\210\346\234\254\351\200\211\346\213\251/index.html"
@@ -0,0 +1,230 @@
+<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>01 · 经典版本选择 — 为什么是 0.11 + 2.6 — Linux 内核学习指南</title>
+<link rel="stylesheet" href="../assets/style.css">
+</head>
+<body>
+<div class="layout">
+<aside class="sidebar">
+    <div class="brand">🐧 Linux 内核学习</div>
+    <div class="brand-sub">从 0.11 到 6.x · 专家级路径</div>
+    <nav>
+        <div class="section-title">入门 & 准备</div>
+        <ul>
+            <li><a href="../index.html">🏠 总目录</a></li>
+            <li><a href="../00-学习路线/index.html">00 · 学习路线</a></li>
+            <li><a href="../01-经典版本选择/index.html" class="active">01 · 经典版本选择</a></li>
+            <li><a href="../02-环境搭建/index.html">02 · 环境搭建</a></li>
+        </ul>
+        <div class="section-title">核心子系统</div>
+        <ul>
+            <li><a href="../03-进程管理/index.html">03 · 进程管理</a></li>
+            <li><a href="../04-内存管理/index.html">04 · 内存管理</a></li>
+            <li><a href="../05-文件系统/index.html">05 · 文件系统</a></li>
+            <li><a href="../06-系统调用/index.html">06 · 系统调用</a></li>
+            <li><a href="../07-设备驱动/index.html">07 · 设备驱动</a></li>
+            <li><a href="../08-网络子系统/index.html">08 · 网络子系统</a></li>
+            <li><a href="../09-同步机制/index.html">09 · 同步机制</a></li>
+        </ul>
+        <div class="section-title">专家级深入</div>
+        <ul>
+            <li><a href="../10-CFS调度器/index.html">10 · CFS 调度器</a></li>
+            <li><a href="../11-容器与命名空间/index.html">11 · 容器与命名空间</a></li>
+            <li><a href="../12-eBPF与可观测性/index.html">12 · eBPF 与可观测性</a></li>
+            <li><a href="../13-中断与异常/index.html">13 · 中断与异常</a></li>
+            <li><a href="../14-启动流程深入/index.html">14 · 启动流程深入</a></li>
+            <li><a href="../15-内核调试与性能/index.html">15 · 内核调试与性能</a></li>
+        </ul>
+    </nav>
+</aside>
+
+<main class="content">
+
+<h1><span class="chapter-num">01</span>经典版本选择 — 为什么是 0.11 + 2.6</h1>
+
+
+<p>本章回答一个学习者最关心的问题：<b>"3000 万行代码的内核，我到底该读哪个版本？"</b></p>
+
+<div class="callout deep">
+<div class="label">一句话答案</div>
+<p>主读 <b>Linux 0.11</b>（14k 行，可全读完），辅读 <b>Linux 2.6.0</b>（80 万行，对照学现代框架），关注 <b>5.x/6.x</b> 的特定子系统（专题学习）。</p>
+</div>
+
+<h2 id="why">为什么不直接读最新版?</h2>
+<table>
+<tr><th>版本</th><th>发布年</th><th>代码行数</th><th>读完时间估计</th><th>适合阶段</th></tr>
+<tr><td>0.01</td><td>1991</td><td>~10,000</td><td>—</td><td>残缺品，不推荐</td></tr>
+<tr><td><b>0.11</b></td><td>1991</td><td><b>~14,000</b></td><td><b>2~3 月可读完</b></td><td><b>★ 入门首选</b></td></tr>
+<tr><td>0.99</td><td>1993</td><td>~50,000</td><td>—</td><td>已无人维护资料</td></tr>
+<tr><td>1.0</td><td>1994</td><td>~170,000</td><td>太长</td><td>跳过</td></tr>
+<tr><td>2.4.0</td><td>2001</td><td>~3,000,000</td><td>不可能</td><td>查阅</td></tr>
+<tr><td><b>2.6.0</b></td><td>2003</td><td><b>~5,900,000</b></td><td>专题读，3~6 月</td><td><b>★ 现代框架对照</b></td></tr>
+<tr><td>3.x</td><td>2011</td><td>~15,000,000</td><td>太长</td><td>专题</td></tr>
+<tr><td>5.x</td><td>2019</td><td>~28,000,000</td><td>太长</td><td>专题</td></tr>
+<tr><td>6.x</td><td>2022</td><td>~33,000,000+</td><td>太长</td><td>专题</td></tr>
+</table>
+
+<h2 id="011">为什么 Linux 0.11 是黄金切入点</h2>
+<ol>
+    <li><b>麻雀虽小五脏俱全</b>：进程、内存、文件系统、TTY、磁盘 IO 全部有</li>
+    <li><b>线性结构清晰</b>：没有 RCU、没有 SMP、没有大量抽象层包装</li>
+    <li><b>能跑能调</b>：QEMU 上 1 秒启动，GDB 单步无障碍</li>
+    <li><b>中文资料完备</b>：赵炯《Linux 内核完全注释》逐行讲解</li>
+    <li><b>核心概念全部出现</b>：fork、调度、缺页、syscall、缓冲区、tty 行规范……</li>
+</ol>
+
+<h2 id="011-tree">Linux 0.11 源码目录结构</h2>
+<pre class="code-bash">linux-0.11/
+├── <span class="str">boot/</span>         <span class="cm"># 引导：bootsect.s + setup.s + head.s (实模式→保护模式)</span>
+├── <span class="str">init/</span>         <span class="cm"># main.c (内核入口、所有子系统初始化)</span>
+├── <span class="str">kernel/</span>       
+│   ├── sched.c       <span class="cm"># 调度器（仅 ~300 行!）</span>
+│   ├── fork.c        <span class="cm"># 进程创建</span>
+│   ├── exit.c        <span class="cm"># 进程退出</span>
+│   ├── signal.c      <span class="cm"># 信号机制</span>
+│   ├── system_call.s <span class="cm"># 系统调用入口（汇编）</span>
+│   ├── traps.c       <span class="cm"># 异常处理</span>
+│   ├── blk_drv/      <span class="cm"># 块设备驱动 (硬盘、软盘、ram disk)</span>
+│   ├── chr_drv/      <span class="cm"># 字符设备驱动 (tty、串口、键盘)</span>
+│   └── math/         <span class="cm"># 数学协处理器仿真</span>
+├── <span class="str">mm/</span>           <span class="cm"># 内存管理</span>
+│   ├── memory.c      <span class="cm"># 页面分配、缺页处理</span>
+│   └── page.s        <span class="cm"># 缺页异常入口（汇编）</span>
+├── <span class="str">fs/</span>           <span class="cm"># 文件系统 (Minix FS)</span>
+│   ├── inode.c       <span class="cm"># inode 管理</span>
+│   ├── buffer.c      <span class="cm"># 缓冲块管理</span>
+│   ├── file_table.c  <span class="cm"># 文件表</span>
+│   ├── namei.c       <span class="cm"># 路径解析</span>
+│   ├── read_write.c  <span class="cm"># 读写</span>
+│   └── ... 等 20+ 个文件
+├── <span class="str">lib/</span>          <span class="cm"># 内核内 C 库 (close, exit, dup ...)</span>
+├── <span class="str">include/</span>      <span class="cm"># 头文件</span>
+└── <span class="str">tools/</span>        <span class="cm"># build 工具</span>
+</pre>
+
+<h2 id="2.6">为什么 2.6.0 是现代内核的"创世版"</h2>
+
+<p>2.6 在 2003 年发布，相对 2.4 是一次<b>大规模框架重构</b>，奠定了至今未变的核心架构：</p>
+
+<table>
+<tr><th>子系统</th><th>2.4 (旧)</th><th>2.6 (现代框架)</th><th>之后版本</th></tr>
+<tr><td>调度器</td><td>O(n) 遍历</td><td><b>O(1) 调度器</b></td><td>2.6.23 起换成 CFS</td></tr>
+<tr><td>内存</td><td>简单</td><td>NUMA、反向映射 (rmap)</td><td>沿用</td></tr>
+<tr><td>文件系统</td><td>VFS 较简单</td><td><b>VFS 重构 + sysfs</b></td><td>沿用</td></tr>
+<tr><td>设备模型</td><td>各自为政</td><td><b>kobject / sysfs 统一</b></td><td>沿用</td></tr>
+<tr><td>线程</td><td>LinuxThreads</td><td><b>NPTL (futex 基础)</b></td><td>沿用</td></tr>
+<tr><td>同步</td><td>仅 spinlock/sem</td><td><b>RCU 大量引入</b></td><td>沿用并扩展</td></tr>
+<tr><td>抢占</td><td>非抢占</td><td><b>CONFIG_PREEMPT 可选</b></td><td>沿用</td></tr>
+</table>
+
+<h2 id="task_struct">直观对比：task_struct 的演化</h2>
+<div class="compare">
+<div>
+<h4>Linux 0.11 <code>task_struct</code> (~50 字段)</h4>
+<pre class="code-c"><span class="kw">struct</span> task_struct {
+    <span class="kw">long</span> state;          <span class="cm">// 状态</span>
+    <span class="kw">long</span> counter;        <span class="cm">// 剩余时间片</span>
+    <span class="kw">long</span> priority;
+    <span class="kw">long</span> signal;
+    <span class="kw">struct</span> sigaction sigaction[<span class="num">32</span>];
+    <span class="kw">long</span> blocked;
+    <span class="kw">int</span> exit_code;
+    <span class="kw">unsigned long</span> start_code, end_code, end_data;
+    <span class="kw">unsigned long</span> brk, start_stack;
+    <span class="kw">long</span> pid, father, pgrp, session, leader;
+    <span class="kw">unsigned short</span> uid, euid, suid;
+    <span class="kw">unsigned short</span> gid, egid, sgid;
+    <span class="kw">long</span> alarm;
+    <span class="kw">long</span> utime, stime, cutime, cstime, start_time;
+    <span class="kw">unsigned short</span> used_math;
+    <span class="kw">int</span> tty;
+    <span class="kw">unsigned short</span> umask;
+    <span class="kw">struct</span> m_inode *pwd, *root, *executable;
+    <span class="kw">unsigned long</span> close_on_exec;
+    <span class="kw">struct</span> file *filp[NR_OPEN];   <span class="cm">// 仅 20 个文件！</span>
+    <span class="kw">struct</span> desc_struct ldt[<span class="num">3</span>];
+    <span class="kw">struct</span> tss_struct tss;          <span class="cm">// 硬件 TSS 切换</span>
+};</pre>
+</div>
+<div>
+<h4>Linux 6.x <code>task_struct</code> (~400 字段)</h4>
+<pre class="code-c"><span class="kw">struct</span> task_struct {
+    <span class="cm">/* 状态、栈、调度 */</span>
+    <span class="kw">unsigned int</span> __state;
+    <span class="kw">void</span> *stack;
+    <span class="kw">int</span> on_cpu, prio, static_prio, normal_prio;
+    <span class="kw">struct</span> sched_entity se;   <span class="cm">// CFS 实体</span>
+    <span class="kw">struct</span> sched_rt_entity rt; <span class="cm">// RT 实体</span>
+    <span class="kw">struct</span> sched_dl_entity dl; <span class="cm">// Deadline 实体</span>
+    
+    <span class="cm">/* 内存 */</span>
+    <span class="kw">struct</span> mm_struct *mm, *active_mm;
+    
+    <span class="cm">/* PID 命名空间 (容器关键) */</span>
+    <span class="kw">struct</span> pid *thread_pid;
+    <span class="kw">struct</span> nsproxy *nsproxy;
+    
+    <span class="cm">/* 文件、信号、IPC */</span>
+    <span class="kw">struct</span> files_struct *files;
+    <span class="kw">struct</span> fs_struct *fs;
+    <span class="kw">struct</span> signal_struct *signal;
+    
+    <span class="cm">/* SMP、CPU 亲和性、cgroup */</span>
+    cpumask_t cpus_mask;
+    <span class="kw">struct</span> css_set *cgroups;
+    
+    <span class="cm">/* perf、ftrace、tracing 钩子 */</span>
+    <span class="kw">struct</span> perf_event_context *perf_event_ctxp[];
+    
+    <span class="cm">/* ……还有 ~300 个字段 */</span>
+};</pre>
+</div>
+</div>
+
+<h2 id="strategy">推荐阅读策略</h2>
+
+<div class="callout tip">
+<div class="label">"对照阅读法"</div>
+<p>读 2.6 时，每打开一个文件，<b>先回忆 0.11 对应文件做了什么</b>，再看 2.6 在此基础上加了什么。这样不会迷失在抽象层里。</p>
+<p>示例：读 <code>2.6/kernel/sched.c</code> 时，先回忆 <code>0.11/kernel/sched.c</code> 的简陋调度，再看 2.6 的 O(1) 是如何分两个 priority array 实现的。</p>
+</div>
+
+<h3>三种学习者的最优策略</h3>
+<table>
+<tr><th>背景</th><th>推荐策略</th></tr>
+<tr><td>毫无 OS 基础</td><td>先读《操作系统精髓》→ 再 0.11 精读 → 再 2.6 对照</td></tr>
+<tr><td>有 OS 课程基础</td><td>直接 0.11 精读 (2 个月) → 2.6 专题 → 选修 5.x/6.x 专题</td></tr>
+<tr><td>已写过驱动</td><td>跳过 0.11 部分；2.6 文件系统 + 网络 + 内存 直接深入</td></tr>
+</table>
+
+<h2 id="srctools">在线源码导航工具</h2>
+
+<div class="callout tip">
+<div class="label">⚡ 神器：Bootlin Elixir Cross-Referencer</div>
+<p><a href="https://elixir.bootlin.com/linux/v6.6/source">https://elixir.bootlin.com/linux/v6.6/source</a></p>
+<ul>
+<li>任意符号点击跳转定义 / 引用</li>
+<li>可以在版本之间切换（v0.11 / v2.6.39 / v6.6 都支持）</li>
+<li>比本地 ctags + vim 更直观，<b>强烈推荐设为浏览器主页</b></li>
+</ul>
+</div>
+
+<pre class="code-bash"><span class="cm"># 本地: 用 cscope 建立索引（适合离线深度浏览）</span>
+cd linux-source
+find . -name <span class="str">"*.[chS]"</span> > cscope.files
+cscope -bkq
+<span class="cm"># 在 vim 中 :cs find s symbol_name 即可跳转</span>
+</pre>
+
+
+<footer class="page-footer">
+    <p><a href="../index.html">总目录</a> · <a href="../02-环境搭建/index.html">下一章 →</a></p>
+</footer>
+
+</main>
+</div>
+</body>
+</html>
diff --git "a/02-\347\216\257\345\242\203\346\220\255\345\273\272/index.html" "b/02-\347\216\257\345\242\203\346\220\255\345\273\272/index.html"
new file mode 100644
index 0000000..6d00227
--- /dev/null
+++ "b/02-\347\216\257\345\242\203\346\220\255\345\273\272/index.html"
@@ -0,0 +1,243 @@
+<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>02 · 环境搭建 — QEMU + GDB 全套工具链 — Linux 内核学习指南</title>
+<link rel="stylesheet" href="../assets/style.css">
+</head>
+<body>
+<div class="layout">
+<aside class="sidebar">
+    <div class="brand">🐧 Linux 内核学习</div>
+    <div class="brand-sub">从 0.11 到 6.x · 专家级路径</div>
+    <nav>
+        <div class="section-title">入门 & 准备</div>
+        <ul>
+            <li><a href="../index.html">🏠 总目录</a></li>
+            <li><a href="../00-学习路线/index.html">00 · 学习路线</a></li>
+            <li><a href="../01-经典版本选择/index.html">01 · 经典版本选择</a></li>
+            <li><a href="../02-环境搭建/index.html" class="active">02 · 环境搭建</a></li>
+        </ul>
+        <div class="section-title">核心子系统</div>
+        <ul>
+            <li><a href="../03-进程管理/index.html">03 · 进程管理</a></li>
+            <li><a href="../04-内存管理/index.html">04 · 内存管理</a></li>
+            <li><a href="../05-文件系统/index.html">05 · 文件系统</a></li>
+            <li><a href="../06-系统调用/index.html">06 · 系统调用</a></li>
+            <li><a href="../07-设备驱动/index.html">07 · 设备驱动</a></li>
+            <li><a href="../08-网络子系统/index.html">08 · 网络子系统</a></li>
+            <li><a href="../09-同步机制/index.html">09 · 同步机制</a></li>
+        </ul>
+        <div class="section-title">专家级深入</div>
+        <ul>
+            <li><a href="../10-CFS调度器/index.html">10 · CFS 调度器</a></li>
+            <li><a href="../11-容器与命名空间/index.html">11 · 容器与命名空间</a></li>
+            <li><a href="../12-eBPF与可观测性/index.html">12 · eBPF 与可观测性</a></li>
+            <li><a href="../13-中断与异常/index.html">13 · 中断与异常</a></li>
+            <li><a href="../14-启动流程深入/index.html">14 · 启动流程深入</a></li>
+            <li><a href="../15-内核调试与性能/index.html">15 · 内核调试与性能</a></li>
+        </ul>
+    </nav>
+</aside>
+
+<main class="content">
+
+<h1><span class="chapter-num">02</span>环境搭建 — QEMU + GDB 全套工具链</h1>
+
+
+<p>本章目标：在 <b>30 分钟内</b> 让你能够在 QEMU 中启动 Linux 内核，并用 GDB 在 <code>start_kernel()</code> 处下断点。</p>
+
+<div class="callout tip">
+<div class="label">为什么必须用 QEMU？</div>
+<p>真机调试内核 → 一旦 crash 整机重启，调试效率为 0。QEMU 是<b>软件模拟器</b>，crash 只是退出进程，可以无限次重启；更重要的是 QEMU 提供 <code>gdbserver</code> 接口，让 GDB 可以像调试用户程序一样调试内核。</p>
+</div>
+
+<h2 id="install">2.1 安装基础工具</h2>
+<pre class="code-bash"><span class="cm"># Ubuntu / Debian</span>
+sudo apt install -y \
+    build-essential gcc-multilib \
+    qemu-system-x86 qemu-utils \
+    gdb gdb-multiarch \
+    bison flex libelf-dev libssl-dev \
+    libncurses-dev bc rsync cpio \
+    git wget
+
+<span class="cm"># macOS (M1/M2 用 brew)</span>
+brew install qemu gdb x86_64-elf-gcc
+
+<span class="cm"># Fedora</span>
+sudo dnf install qemu gdb ncurses-devel openssl-devel elfutils-libelf-devel
+</pre>
+
+<h2 id="011env">2.2 Linux 0.11 环境</h2>
+
+<pre class="code-bash"><span class="cm"># 1. 克隆带注释的版本</span>
+git clone https://github.com/karottc/linux-0.11.git
+cd linux-0.11
+
+<span class="cm"># 2. 编译（需要老 gcc，可用 Docker）</span>
+docker run -it --rm -v $(pwd):/work tinyminded/gcc4 \
+    bash -c <span class="str">"cd /work && make"</span>
+
+<span class="cm"># 3. QEMU 启动</span>
+qemu-system-i386 -m 16M \
+    -boot a -fda Image \
+    -hda hdc-0.11-new.img \
+    -nographic -append <span class="str">"console=ttyS0"</span>
+
+<span class="cm"># 4. GDB 调试启动: 加 -s -S</span>
+qemu-system-i386 -m 16M -boot a -fda Image \
+    -hda hdc-0.11-new.img -s -S &
+
+<span class="cm"># 另一终端</span>
+gdb
+(gdb) target remote :1234
+(gdb) <span class="kw">file</span> tools/system     <span class="cm"># 加载符号</span>
+(gdb) <span class="kw">break</span> main             <span class="cm"># 在 init/main.c:main 下断</span>
+(gdb) <span class="kw">continue</span>
+</pre>
+
+<h2 id="modernenv">2.3 现代内核 (5.x/6.x) 环境</h2>
+
+<h3>构建最小可调试内核</h3>
+<pre class="code-bash"><span class="cm"># 1. 下载源码</span>
+wget https://cdn.kernel.org/pub/linux/kernel/v6.x/linux-6.6.tar.xz
+tar xf linux-6.6.tar.xz && cd linux-6.6
+
+<span class="cm"># 2. 默认配置 + 开启调试选项</span>
+make defconfig
+./scripts/config -e DEBUG_INFO \
+                 -e DEBUG_INFO_DWARF4 \
+                 -e GDB_SCRIPTS \
+                 -e DEBUG_KERNEL \
+                 -e FRAME_POINTER \
+                 -d RANDOMIZE_BASE        <span class="cm"># 关闭 KASLR (调试方便)</span>
+
+<span class="cm"># 3. 编译 (i7 ~5 分钟)</span>
+make -j$(nproc) bzImage modules
+
+<span class="cm"># 4. 构造 initramfs</span>
+mkdir -p initramfs/{bin,sbin,etc,proc,sys,dev}
+wget https://busybox.net/downloads/binaries/1.35.0-x86_64-linux-musl/busybox -O initramfs/bin/busybox
+chmod +x initramfs/bin/busybox
+
+cat > initramfs/init <<<span class="str">'EOF'</span>
+#!/bin/busybox sh
+/bin/busybox --install -s
+mount -t proc proc /proc
+mount -t sysfs sysfs /sys
+mount -t devtmpfs devtmpfs /dev
+exec /bin/sh
+EOF
+chmod +x initramfs/init
+
+(cd initramfs && find . | cpio -o -H newc | gzip > ../initramfs.img)
+
+<span class="cm"># 5. QEMU 启动</span>
+qemu-system-x86_64 -kernel arch/x86/boot/bzImage \
+    -initrd initramfs.img \
+    -append <span class="str">"console=ttyS0 nokaslr"</span> \
+    -nographic -m 512M
+</pre>
+
+<h2 id="gdb">2.4 GDB 调试现代内核</h2>
+
+<pre class="code-bash"><span class="cm"># 启动 QEMU + 等待 GDB</span>
+qemu-system-x86_64 -kernel arch/x86/boot/bzImage \
+    -initrd initramfs.img -append <span class="str">"console=ttyS0 nokaslr"</span> \
+    -nographic -m 512M -s -S
+
+<span class="cm"># 另一终端</span>
+gdb vmlinux
+(gdb) target remote :1234
+(gdb) hbreak start_kernel
+(gdb) <span class="kw">continue</span>
+
+<span class="cm"># 内核到达 start_kernel 时停住，从此可单步、看变量</span>
+(gdb) bt
+(gdb) p init_task
+(gdb) p init_task->comm
+(gdb) lx-ps              <span class="cm"># 内核自带 GDB 脚本：列出所有进程</span>
+(gdb) lx-dmesg           <span class="cm"># 显示内核日志</span>
+(gdb) lx-list-check ...  <span class="cm"># 检查链表完整性</span>
+</pre>
+
+<h2 id="vscode">2.5 VS Code 集成</h2>
+
+<p>把内核源码当成普通项目用 VS Code 打开，配合 <b>clangd</b> 可获得近乎"工业级"的代码导航：</p>
+
+<pre class="code-bash"><span class="cm"># 1. 安装 clangd 扩展，然后：</span>
+make clean
+<span class="cm"># 生成 compile_commands.json (clangd 必需)</span>
+make -j$(nproc) compile_commands.json   <span class="cm"># 内核内置目标</span>
+
+<span class="cm"># 2. .vscode/settings.json</span>
+{
+  <span class="str">"clangd.arguments"</span>: [
+    <span class="str">"--compile-commands-dir=."</span>,
+    <span class="str">"--background-index"</span>,
+    <span class="str">"--header-insertion=never"</span>
+  ]
+}
+</pre>
+
+<p>效果：F12 跳转定义、shift+F12 找所有引用、hover 显示函数签名 — 全部对内核源码生效。</p>
+
+<h2 id="advanced">2.6 高阶工具链</h2>
+
+<div class="card-grid">
+<div class="card" style="cursor:default">
+<div class="num">KGDB</div>
+<div class="title">真机内核调试</div>
+<p class="desc">通过串口 / 网络在真机上调试内核 (不靠 QEMU)。开发驱动时常用。</p>
+</div>
+<div class="card" style="cursor:default">
+<div class="num">CRASH</div>
+<div class="title">崩溃 dump 分析</div>
+<p class="desc">配合 kdump 收集的 vmcore 文件，事后分析内核 panic 原因。</p>
+</div>
+<div class="card" style="cursor:default">
+<div class="num">FTRACE</div>
+<div class="title">函数追踪</div>
+<p class="desc">/sys/kernel/debug/tracing，无侵入跟踪任意函数调用。</p>
+</div>
+<div class="card" style="cursor:default">
+<div class="num">PERF</div>
+<div class="title">性能采样</div>
+<p class="desc">perf record / report / top，分析热点函数、CPU 周期。</p>
+</div>
+<div class="card" style="cursor:default">
+<div class="num">BPFTRACE</div>
+<div class="title">动态追踪 DSL</div>
+<p class="desc">用 awk 风格脚本写动态探针。一行命令搞定复杂分析。</p>
+</div>
+<div class="card" style="cursor:default">
+<div class="num">QEMU-IMG</div>
+<div class="title">磁盘镜像工具</div>
+<p class="desc">创建、转换、查看虚拟磁盘镜像。</p>
+</div>
+</div>
+
+<h2 id="checklist">2.7 完成检查清单</h2>
+
+<div class="callout tip">
+<div class="label">✅ 完成本章后，你应该能…</div>
+<ol>
+<li>在 QEMU 中启动 Linux 0.11 看到登录提示符</li>
+<li>在 QEMU 中启动 Linux 6.x 看到 BusyBox shell</li>
+<li>用 GDB 在 <code>start_kernel</code> 处停住，打印 <code>init_task</code></li>
+<li>VS Code 中 F12 能跳转到任意内核函数定义</li>
+<li>知道 <code>printk</code>、<code>ftrace</code>、<code>perf</code> 三种调试手段的适用场景</li>
+</ol>
+</div>
+
+
+<footer class="page-footer">
+    <p>← <a href="../01-经典版本选择/index.html">上一章</a> · <a href="../index.html">总目录</a> · <a href="../03-进程管理/index.html">下一章 →</a></p>
+</footer>
+
+</main>
+</div>
+</body>
+</html>
diff --git "a/03-\350\277\233\347\250\213\347\256\241\347\220\206/index.html" "b/03-\350\277\233\347\250\213\347\256\241\347\220\206/index.html"
new file mode 100644
index 0000000..1f18a0a
--- /dev/null
+++ "b/03-\350\277\233\347\250\213\347\256\241\347\220\206/index.html"
@@ -0,0 +1,378 @@
+<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>03 · 进程管理 — task_struct、fork、上下文切换 — Linux 内核学习指南</title>
+<link rel="stylesheet" href="../assets/style.css">
+</head>
+<body>
+<div class="layout">
+<aside class="sidebar">
+    <div class="brand">🐧 Linux 内核学习</div>
+    <div class="brand-sub">从 0.11 到 6.x · 专家级路径</div>
+    <nav>
+        <div class="section-title">入门 & 准备</div>
+        <ul>
+            <li><a href="../index.html">🏠 总目录</a></li>
+            <li><a href="../00-学习路线/index.html">00 · 学习路线</a></li>
+            <li><a href="../01-经典版本选择/index.html">01 · 经典版本选择</a></li>
+            <li><a href="../02-环境搭建/index.html">02 · 环境搭建</a></li>
+        </ul>
+        <div class="section-title">核心子系统</div>
+        <ul>
+            <li><a href="../03-进程管理/index.html" class="active">03 · 进程管理</a></li>
+            <li><a href="../04-内存管理/index.html">04 · 内存管理</a></li>
+            <li><a href="../05-文件系统/index.html">05 · 文件系统</a></li>
+            <li><a href="../06-系统调用/index.html">06 · 系统调用</a></li>
+            <li><a href="../07-设备驱动/index.html">07 · 设备驱动</a></li>
+            <li><a href="../08-网络子系统/index.html">08 · 网络子系统</a></li>
+            <li><a href="../09-同步机制/index.html">09 · 同步机制</a></li>
+        </ul>
+        <div class="section-title">专家级深入</div>
+        <ul>
+            <li><a href="../10-CFS调度器/index.html">10 · CFS 调度器</a></li>
+            <li><a href="../11-容器与命名空间/index.html">11 · 容器与命名空间</a></li>
+            <li><a href="../12-eBPF与可观测性/index.html">12 · eBPF 与可观测性</a></li>
+            <li><a href="../13-中断与异常/index.html">13 · 中断与异常</a></li>
+            <li><a href="../14-启动流程深入/index.html">14 · 启动流程深入</a></li>
+            <li><a href="../15-内核调试与性能/index.html">15 · 内核调试与性能</a></li>
+        </ul>
+    </nav>
+</aside>
+
+<main class="content">
+
+<h1><span class="chapter-num">03</span>进程管理 — task_struct、fork、上下文切换</h1>
+
+
+<p>进程管理是内核的<b>核心中的核心</b>。掌握本章后，你就能回答："从我敲下 <code>./hello</code> 回车到 <code>printf</code> 输出，内核里到底发生了什么？"</p>
+
+<h2 id="layout">3.1 虚拟地址空间布局</h2>
+<div class="diagram">
+<img src="../assets/diagrams/vm-layout.svg" alt="进程虚拟地址空间">
+<div class="caption">每个进程独立拥有的 128TB 虚拟地址空间（x86_64）</div>
+</div>
+
+<h2 id="task_struct">3.2 task_struct — 一切的源头</h2>
+
+<p>Linux 把所有"可运行单元"（不管是进程还是线程）都用 <code>task_struct</code> 表示。这是<b>内核里最大的结构体</b>，包含 ~400 个字段。</p>
+
+<pre class="code-c"><span class="cm">/* include/linux/sched.h — 简化版核心字段 */</span>
+<span class="kw">struct</span> task_struct {
+    <span class="cm">/* === 调度相关 === */</span>
+    <span class="kw">unsigned int</span> __state;          <span class="cm">// TASK_RUNNING / TASK_INTERRUPTIBLE / ...</span>
+    <span class="kw">void</span> *stack;                    <span class="cm">// 内核栈指针</span>
+    <span class="kw">int</span> prio, static_prio, normal_prio;
+    <span class="kw">struct</span> sched_entity se;         <span class="cm">// CFS 调度实体 (含 vruntime)</span>
+    <span class="kw">const struct</span> sched_class *sched_class;
+
+    <span class="cm">/* === 进程身份 === */</span>
+    pid_t pid;                      <span class="cm">// 进程 ID (在 PID 命名空间内)</span>
+    pid_t tgid;                     <span class="cm">// 线程组 ID = 主线程 pid</span>
+    <span class="kw">struct</span> task_struct __rcu *parent;
+    <span class="kw">struct</span> list_head children;
+    <span class="kw">struct</span> list_head sibling;
+
+    <span class="cm">/* === 内存 === */</span>
+    <span class="kw">struct</span> mm_struct *mm;          <span class="cm">// 用户进程的地址空间（线程共享）</span>
+    <span class="kw">struct</span> mm_struct *active_mm;   <span class="cm">// 内核线程"借用"的 mm</span>
+
+    <span class="cm">/* === 文件 === */</span>
+    <span class="kw">struct</span> files_struct *files;    <span class="cm">// 打开的文件表</span>
+    <span class="kw">struct</span> fs_struct *fs;          <span class="cm">// 当前目录、根目录</span>
+
+    <span class="cm">/* === 信号 === */</span>
+    <span class="kw">struct</span> signal_struct *signal;
+    <span class="kw">struct</span> sighand_struct *sighand;
+    sigset_t blocked, real_blocked;
+
+    <span class="cm">/* === 命名空间 (容器关键) === */</span>
+    <span class="kw">struct</span> nsproxy *nsproxy;       <span class="cm">// 8 个 namespace 指针</span>
+
+    <span class="cm">/* === cgroup === */</span>
+    <span class="kw">struct</span> css_set __rcu *cgroups;
+
+    <span class="cm">/* === 凭证 === */</span>
+    <span class="kw">const struct</span> cred __rcu *real_cred;
+    <span class="kw">const struct</span> cred __rcu *cred;
+
+    <span class="cm">/* === 其他: perf, ftrace, audit, seccomp ... */</span>
+    <span class="kw">char</span> comm[TASK_COMM_LEN];     <span class="cm">// 进程名（最大 16 字节）</span>
+};</pre>
+
+<div class="callout deep">
+<div class="label">深入：线程是怎么实现的？</div>
+<p>Linux 的<b>线程就是共享 mm 的进程</b>。<code>pthread_create</code> 底层调用 <code>clone(CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | ...)</code>，
+两个 <code>task_struct</code> 共享同一个 <code>mm_struct</code>，所以共用堆和全局变量。
+这就是 Linux "1:1 线程模型"。<code>tgid</code> 字段记录主线程 pid，<code>getpid()</code> 实际返回 tgid，<code>gettid()</code> 才返回真实 pid。</p>
+</div>
+
+<h2 id="state">3.3 进程状态机</h2>
+
+<div class="diagram">
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 800 360" font-size="12" font-family="-apple-system,sans-serif">
+<defs>
+  <marker id="ar3" viewBox="0 0 10 10" refX="9" refY="5" markerWidth="6" markerHeight="6" orient="auto">
+    <path d="M0,0 L10,5 L0,10 z" fill="#8b949e"/>
+  </marker>
+</defs>
+<text x="400" y="24" text-anchor="middle" font-size="16" font-weight="700" fill="#ff7b29">进程状态机</text>
+
+<g><rect x="320" y="50" width="160" height="50" rx="8" fill="#1a2028" stroke="#56d364" stroke-width="2"/>
+<text x="400" y="72" text-anchor="middle" fill="#56d364" font-weight="700">TASK_RUNNING (R)</text>
+<text x="400" y="90" text-anchor="middle" fill="#8b949e" font-size="10">就绪/正在运行</text></g>
+
+<g><rect x="60" y="180" width="180" height="50" rx="8" fill="#1a2028" stroke="#e3b341" stroke-width="2"/>
+<text x="150" y="202" text-anchor="middle" fill="#e3b341" font-weight="700">TASK_INTERRUPTIBLE (S)</text>
+<text x="150" y="220" text-anchor="middle" fill="#8b949e" font-size="10">可中断睡眠</text></g>
+
+<g><rect x="300" y="180" width="200" height="50" rx="8" fill="#1a2028" stroke="#f85149" stroke-width="2"/>
+<text x="400" y="202" text-anchor="middle" fill="#f85149" font-weight="700">TASK_UNINTERRUPTIBLE (D)</text>
+<text x="400" y="220" text-anchor="middle" fill="#8b949e" font-size="10">不可中断 (IO等待)</text></g>
+
+<g><rect x="560" y="180" width="180" height="50" rx="8" fill="#1a2028" stroke="#bc8cff" stroke-width="2"/>
+<text x="650" y="202" text-anchor="middle" fill="#bc8cff" font-weight="700">__TASK_STOPPED (T)</text>
+<text x="650" y="220" text-anchor="middle" fill="#8b949e" font-size="10">SIGSTOP / ptrace</text></g>
+
+<g><rect x="60" y="290" width="180" height="50" rx="8" fill="#1a2028" stroke="#8b949e" stroke-width="2"/>
+<text x="150" y="312" text-anchor="middle" fill="#8b949e" font-weight="700">EXIT_ZOMBIE (Z)</text>
+<text x="150" y="330" text-anchor="middle" fill="#8b949e" font-size="10">已退出待回收</text></g>
+
+<g><rect x="560" y="290" width="180" height="50" rx="8" fill="#1a2028" stroke="#8b949e" stroke-width="2"/>
+<text x="650" y="312" text-anchor="middle" fill="#8b949e" font-weight="700">EXIT_DEAD (X)</text>
+<text x="650" y="330" text-anchor="middle" fill="#8b949e" font-size="10">已被父回收</text></g>
+
+<path d="M340 100 Q 200 140 230 180" fill="none" stroke="#8b949e" marker-end="url(#ar3)"/>
+<text x="180" y="138" font-size="10" fill="#8b949e">等待事件 (read)</text>
+
+<path d="M230 180 Q 290 140 360 100" fill="none" stroke="#8b949e" marker-end="url(#ar3)"/>
+<text x="245" y="155" font-size="10" fill="#56d364">事件来了/信号</text>
+
+<path d="M400 100 L 400 180" fill="none" stroke="#8b949e" marker-end="url(#ar3)"/>
+<text x="410" y="140" font-size="10" fill="#8b949e">磁盘 IO</text>
+
+<path d="M460 100 Q 600 140 580 180" fill="none" stroke="#8b949e" marker-end="url(#ar3)"/>
+<text x="600" y="138" font-size="10" fill="#8b949e">SIGSTOP</text>
+
+<path d="M580 180 Q 620 140 470 100" fill="none" stroke="#8b949e" marker-end="url(#ar3)"/>
+<text x="568" y="158" font-size="10" fill="#56d364">SIGCONT</text>
+
+<path d="M340 100 L 200 290" fill="none" stroke="#f85149" marker-end="url(#ar3)"/>
+<text x="200" y="248" font-size="10" fill="#f85149">exit()</text>
+
+<path d="M240 315 L 560 315" fill="none" stroke="#8b949e" marker-end="url(#ar3)"/>
+<text x="400" y="305" text-anchor="middle" font-size="10" fill="#8b949e">父进程 wait()</text>
+</svg>
+<div class="caption">完整的进程状态转换。"D"状态进程无法 kill，常见于网络/磁盘卡死</div>
+</div>
+
+<h2 id="fork">3.4 fork() 源码剖析 — 现代版本</h2>
+
+<p>fork 的本质是<b>复制一个 task_struct</b>，然后通过 <b>Copy-on-Write</b> 共享物理页直到一方写入。</p>
+
+<pre class="code-c"><span class="cm">/* kernel/fork.c (简化版本) */</span>
+<span class="kw">SYSCALL_DEFINE0</span>(fork) {
+    <span class="kw">struct</span> kernel_clone_args args = {
+        .exit_signal = SIGCHLD,
+    };
+    <span class="kw">return</span> kernel_clone(&args);
+}
+
+pid_t <span class="fn">kernel_clone</span>(<span class="kw">struct</span> kernel_clone_args *args) {
+    <span class="kw">struct</span> task_struct *p;
+    pid_t pid;
+
+    <span class="cm">// 关键：copy_process 复制一份 task_struct</span>
+    p = <span class="fn">copy_process</span>(NULL, trace, NUMA_NO_NODE, args);
+    <span class="kw">if</span> (IS_ERR(p)) <span class="kw">return</span> PTR_ERR(p);
+
+    pid = pid_vnr(p->thread_pid);
+
+    <span class="cm">// 唤醒新进程 → 进入运行队列</span>
+    <span class="fn">wake_up_new_task</span>(p);
+
+    <span class="cm">// CLONE_VFORK 时父进程阻塞等待</span>
+    <span class="kw">if</span> (args->flags & CLONE_VFORK)
+        <span class="fn">wait_for_vfork_done</span>(p, &vfork);
+
+    <span class="kw">return</span> pid;       <span class="cm">// 父进程返回子 pid</span>
+}
+
+<span class="kw">struct</span> task_struct *<span class="fn">copy_process</span>(...) {
+    <span class="kw">struct</span> task_struct *p = <span class="fn">dup_task_struct</span>(current, node);  <span class="cm">// 1. 复制结构体</span>
+    
+    <span class="fn">copy_creds</span>(p, clone_flags);    <span class="cm">// 2. 复制凭证</span>
+    <span class="fn">copy_files</span>(clone_flags, p);   <span class="cm">// 3. 复制文件描述符表</span>
+    <span class="fn">copy_fs</span>(clone_flags, p);      <span class="cm">// 4. 复制根目录信息</span>
+    <span class="fn">copy_sighand</span>(clone_flags, p); <span class="cm">// 5. 复制信号处理</span>
+    <span class="fn">copy_mm</span>(clone_flags, p);      <span class="cm">// 6. 关键!复制 mm_struct (CoW 在此触发)</span>
+    <span class="fn">copy_namespaces</span>(clone_flags, p);
+    <span class="fn">copy_thread</span>(p, args);         <span class="cm">// 7. 设置子进程的"返回点"</span>
+
+    <span class="fn">sched_cgroup_fork</span>(p, args);
+    <span class="kw">return</span> p;
+}</pre>
+
+<h3>copy_mm — CoW 的核心</h3>
+
+<pre class="code-c"><span class="kw">static int</span> <span class="fn">copy_mm</span>(<span class="kw">unsigned long</span> clone_flags, <span class="kw">struct</span> task_struct *tsk) {
+    <span class="kw">if</span> (clone_flags & CLONE_VM) {       <span class="cm">// 线程：共享 mm</span>
+        mmget(oldmm);
+        mm = oldmm;
+    } <span class="kw">else</span> {                              <span class="cm">// 进程：复制 mm</span>
+        mm = <span class="fn">dup_mm</span>(tsk, current->mm); <span class="cm">// → dup_mmap → 复制 VMA 链表</span>
+    }                                       <span class="cm">// 注意：物理页表项设为只读，CoW 触发缺页</span>
+    tsk->mm = mm;
+    tsk->active_mm = mm;
+    <span class="kw">return</span> <span class="num">0</span>;
+}
+
+<span class="cm">/* CoW 触发：写只读页 → 缺页异常 → do_wp_page() */</span>
+<span class="kw">static</span> vm_fault_t <span class="fn">do_wp_page</span>(<span class="kw">struct</span> vm_fault *vmf) {
+    <span class="kw">struct</span> page *page = vm_normal_page(...);
+    <span class="kw">if</span> (page_count(page) == 1) {        <span class="cm">// 独占？直接改为可写</span>
+        set_pte(vmf->pte, pte_mkwrite(pte));
+    } <span class="kw">else</span> {                              <span class="cm">// 共享？分配新页拷贝</span>
+        new_page = alloc_page(...);
+        copy_user_highpage(new_page, page, address);
+        set_pte(vmf->pte, mk_pte(new_page, PROT_WRITE));
+    }
+}
+</pre>
+
+<h2 id="011fork">3.5 对照：Linux 0.11 的 fork（极简版）</h2>
+
+<pre class="code-c"><span class="cm">/* linux-0.11/kernel/fork.c */</span>
+<span class="kw">int</span> <span class="fn">copy_process</span>(<span class="kw">int</span> nr, <span class="kw">long</span> ebp, ... <span class="cm">/* 所有寄存器 */</span>) {
+    <span class="kw">struct</span> task_struct *p;
+
+    p = (<span class="kw">struct</span> task_struct *) <span class="fn">get_free_page</span>();  <span class="cm">// 分配一页存 task_struct</span>
+    <span class="kw">if</span> (!p) <span class="kw">return</span> -EAGAIN;
+    task[nr] = p;
+    *p = *current;          <span class="cm">// 结构体拷贝!</span>
+    p->state = TASK_UNINTERRUPTIBLE;
+    p->pid = last_pid;
+    p->father = current->pid;
+    p->counter = p->priority;
+    p->utime = p->stime = <span class="num">0</span>;
+
+    <span class="cm">// 关键：设置 TSS (硬件任务切换)</span>
+    p->tss.esp = esp;
+    p->tss.eip = eip;       <span class="cm">// 子进程从这里继续 → 返回 0</span>
+    p->tss.eflags = eflags;
+    p->tss.eax = <span class="num">0</span>;          <span class="cm">// 子进程 fork() 返回 0 的实现!</span>
+
+    <span class="fn">copy_mem</span>(nr, p);        <span class="cm">// 复制段（0.11 是段页式）</span>
+
+    p->state = TASK_RUNNING;
+    <span class="kw">return</span> last_pid;        <span class="cm">// 父进程返回子 pid</span>
+}</pre>
+
+<div class="callout tip">
+<div class="label">"fork 返回两次" 的真相</div>
+<p>fork 只<b>调用了一次</b>，但<b>返回了两次</b>。父进程返回路径正常出栈；子进程被构造时，
+<code>tss.eip</code> 被设成 fork 系统调用即将返回的地址，<code>tss.eax = 0</code>（x86 上系统调用返回值放 eax）。
+当调度器选中子进程，CPU 恢复其 TSS，rip 跳到 fork "出口"，eax=0，于是用户态看到 <code>fork() == 0</code>。</p>
+</div>
+
+<h2 id="ctxsw">3.6 上下文切换 — switch_to</h2>
+
+<pre class="code-c"><span class="cm">/* kernel/sched/core.c */</span>
+<span class="kw">static</span> __always_inline <span class="kw">struct</span> rq *
+<span class="fn">context_switch</span>(<span class="kw">struct</span> rq *rq, <span class="kw">struct</span> task_struct *prev,
+               <span class="kw">struct</span> task_struct *next, <span class="kw">struct</span> rq_flags *rf) {
+    <span class="cm">// 1. 切换地址空间 (CR3)</span>
+    <span class="kw">if</span> (next->mm)
+        switch_mm_irqs_off(prev->active_mm, next->mm, next);
+    <span class="kw">else</span> {                          <span class="cm">// 内核线程"借用"prev 的 mm，避免切 CR3</span>
+        next->active_mm = prev->active_mm;
+        mmgrab(prev->active_mm);
+    }
+
+    <span class="cm">// 2. 切换寄存器（含栈指针）→ 真正的"换上下文"</span>
+    switch_to(prev, next, prev);
+    <span class="cm">// !!! 这之后 prev 的本函数不会立刻返回，</span>
+    <span class="cm">//      要等 next 再被切回来才继续 !!!</span>
+
+    barrier();
+    <span class="kw">return</span> <span class="fn">finish_task_switch</span>(prev);
+}
+
+<span class="cm">/* arch/x86/include/asm/switch_to.h */</span>
+<span class="kw">#define</span> switch_to(prev, next, last)             \
+do {                                                  \
+    ((last) = <span class="fn">__switch_to_asm</span>((prev), (next))); \
+} <span class="kw">while</span> (<span class="num">0</span>)
+</pre>
+
+<pre class="code-asm"><span class="cm">; arch/x86/entry/entry_64.S — __switch_to_asm</span>
+SYM_FUNC_START(__switch_to_asm)
+    pushq %rbp           <span class="cm">; 保存当前进程的 callee-saved 寄存器</span>
+    pushq %rbx
+    pushq %r12
+    pushq %r13
+    pushq %r14
+    pushq %r15
+    movq  %rsp, TASK_threadsp(%rdi)   <span class="cm">; prev->thread.sp = rsp ← 关键!</span>
+    movq  TASK_threadsp(%rsi), %rsp   <span class="cm">; rsp = next->thread.sp  ← 栈切了!</span>
+    popq  %r15           <span class="cm">; 恢复 next 的寄存器</span>
+    popq  %r14
+    popq  %r13
+    popq  %r12
+    popq  %rbx
+    popq  %rbp
+    jmp   __switch_to    <span class="cm">; C 函数完成 FPU、TLS 等切换</span>
+SYM_FUNC_END(__switch_to_asm)
+</pre>
+
+<div class="callout warn">
+<div class="label">理解上下文切换的关键</div>
+<p><b>"切换"二字的本质就两件事：</b></p>
+<ol>
+<li><b>切栈</b>：<code>rsp</code> 从 prev 的内核栈指向 next 的内核栈</li>
+<li><b>切地址空间</b>：CR3 寄存器指向 next 的 pgd（如果 next 是用户进程）</li>
+</ol>
+<p>切完之后 <code>ret</code> 弹出的是 next 上次被切走时的返回地址，于是 next 从它当时"被打断"的地方继续。</p>
+</div>
+
+<h2 id="sched">3.7 调度器演化</h2>
+
+<table>
+<tr><th>版本</th><th>调度器</th><th>核心数据结构</th><th>选下一个进程的复杂度</th></tr>
+<tr><td>0.11</td><td>简单时间片</td><td>固定 64 项 task[] 数组</td><td>O(n) 遍历</td></tr>
+<tr><td>2.4</td><td>O(n)</td><td>双向链表 runqueue</td><td>O(n)</td></tr>
+<tr><td>2.6.0~2.6.22</td><td>O(1)</td><td>140 个优先级链表 × 2 (active/expired)</td><td>O(1)</td></tr>
+<tr><td>2.6.23+</td><td><b>CFS</b></td><td>红黑树按 vruntime 排序</td><td>O(log n)</td></tr>
+<tr><td>4.x+</td><td>CFS + 调度类</td><td>+ RT、Deadline、Idle 调度类</td><td>O(log n)</td></tr>
+</table>
+
+<p>CFS 是当前默认调度器，是<a href="../10-CFS调度器/index.html">第 10 章</a>的主题。</p>
+
+<h2 id="experiment">3.8 动手实验</h2>
+
+<pre class="code-bash"><span class="cm"># 实验 1：观察 task_struct 切换</span>
+<span class="cm"># /sys/kernel/debug/tracing/ 下打开调度事件</span>
+echo <span class="num">1</span> > /sys/kernel/debug/tracing/events/sched/sched_switch/enable
+echo <span class="num">1</span> > /sys/kernel/debug/tracing/tracing_on
+cat /sys/kernel/debug/tracing/trace_pipe
+
+<span class="cm"># 实验 2：GDB 单步走完 fork</span>
+<span class="cm">(gdb) hbreak kernel_clone</span>
+<span class="cm">(gdb) c</span>
+<span class="cm">(gdb) n      # 一步步过 copy_process</span>
+
+<span class="cm"># 实验 3：观察 CoW</span>
+echo <span class="num">1</span> > /proc/sys/vm/swappiness
+<span class="cm"># 写一个 fork() 后立刻 sleep 的程序，对比 /proc/self/status 的 RssAnon</span>
+</pre>
+
+
+<footer class="page-footer">
+    <p>← <a href="../02-环境搭建/index.html">上一章</a> · <a href="../index.html">总目录</a> · <a href="../04-内存管理/index.html">下一章 →</a></p>
+</footer>
+
+</main>
+</div>
+</body>
+</html>
diff --git "a/04-\345\206\205\345\255\230\347\256\241\347\220\206/index.html" "b/04-\345\206\205\345\255\230\347\256\241\347\220\206/index.html"
new file mode 100644
index 0000000..85899d9
--- /dev/null
+++ "b/04-\345\206\205\345\255\230\347\256\241\347\220\206/index.html"
@@ -0,0 +1,263 @@
+<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>04 · 内存管理 — 页表、buddy、slab、NUMA — Linux 内核学习指南</title>
+<link rel="stylesheet" href="../assets/style.css">
+</head>
+<body>
+<div class="layout">
+<aside class="sidebar">
+    <div class="brand">🐧 Linux 内核学习</div>
+    <div class="brand-sub">从 0.11 到 6.x · 专家级路径</div>
+    <nav>
+        <div class="section-title">入门 & 准备</div>
+        <ul>
+            <li><a href="../index.html">🏠 总目录</a></li>
+            <li><a href="../00-学习路线/index.html">00 · 学习路线</a></li>
+            <li><a href="../01-经典版本选择/index.html">01 · 经典版本选择</a></li>
+            <li><a href="../02-环境搭建/index.html">02 · 环境搭建</a></li>
+        </ul>
+        <div class="section-title">核心子系统</div>
+        <ul>
+            <li><a href="../03-进程管理/index.html">03 · 进程管理</a></li>
+            <li><a href="../04-内存管理/index.html" class="active">04 · 内存管理</a></li>
+            <li><a href="../05-文件系统/index.html">05 · 文件系统</a></li>
+            <li><a href="../06-系统调用/index.html">06 · 系统调用</a></li>
+            <li><a href="../07-设备驱动/index.html">07 · 设备驱动</a></li>
+            <li><a href="../08-网络子系统/index.html">08 · 网络子系统</a></li>
+            <li><a href="../09-同步机制/index.html">09 · 同步机制</a></li>
+        </ul>
+        <div class="section-title">专家级深入</div>
+        <ul>
+            <li><a href="../10-CFS调度器/index.html">10 · CFS 调度器</a></li>
+            <li><a href="../11-容器与命名空间/index.html">11 · 容器与命名空间</a></li>
+            <li><a href="../12-eBPF与可观测性/index.html">12 · eBPF 与可观测性</a></li>
+            <li><a href="../13-中断与异常/index.html">13 · 中断与异常</a></li>
+            <li><a href="../14-启动流程深入/index.html">14 · 启动流程深入</a></li>
+            <li><a href="../15-内核调试与性能/index.html">15 · 内核调试与性能</a></li>
+        </ul>
+    </nav>
+</aside>
+
+<main class="content">
+
+<h1><span class="chapter-num">04</span>内存管理 — 页表、buddy、slab、NUMA</h1>
+
+
+<p>内存管理是内核最复杂的子系统之一。本章按"<b>从硬件到分配器</b>"的顺序逐层剖析。</p>
+
+<h2 id="hw">4.1 硬件视角：页表与 TLB</h2>
+<div class="diagram">
+<img src="../assets/diagrams/page-table.svg" alt="页表与TLB">
+<div class="caption">x86 32位多级页表查询 + TLB 命中/缺失</div>
+</div>
+
+<h3>x86_64 实际是 4~5 级页表</h3>
+<table>
+<tr><th>层级</th><th>名称</th><th>覆盖范围</th><th>项数</th></tr>
+<tr><td>1</td><td>PGD (P4D in 5-level)</td><td>512GB / 256TB</td><td>512</td></tr>
+<tr><td>2</td><td>PUD</td><td>1GB / page</td><td>512</td></tr>
+<tr><td>3</td><td>PMD</td><td>2MB / page</td><td>512</td></tr>
+<tr><td>4</td><td>PTE</td><td>4KB / page</td><td>512</td></tr>
+<tr><td>+0</td><td>页内偏移</td><td>4KB</td><td>4096 字节</td></tr>
+</table>
+
+<h2 id="zones">4.2 物理内存的"分区"</h2>
+
+<p>因为不同物理地址范围有<b>硬件限制</b>，内核把物理内存分为<b>多个 zone</b>：</p>
+
+<table>
+<tr><th>Zone</th><th>地址范围 (x86_64)</th><th>用途</th></tr>
+<tr><td>ZONE_DMA</td><td>0 ~ 16MB</td><td>老 ISA DMA 设备</td></tr>
+<tr><td>ZONE_DMA32</td><td>0 ~ 4GB</td><td>32 位 DMA 设备</td></tr>
+<tr><td>ZONE_NORMAL</td><td>4GB 起</td><td>常规分配</td></tr>
+<tr><td>ZONE_MOVABLE</td><td>—</td><td>可迁移页（用于热插拔）</td></tr>
+</table>
+
+<p>NUMA 系统每个 node 独立一组 zone：<code>node 0 [DMA, DMA32, Normal] · node 1 [DMA32, Normal] ...</code></p>
+
+<h2 id="buddy">4.3 Buddy System — 页粒度分配器</h2>
+
+<div class="diagram">
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 800 320" font-family="-apple-system,sans-serif" font-size="11">
+<text x="400" y="22" text-anchor="middle" font-size="15" font-weight="700" fill="#ff7b29">Buddy 分配器：以 2^n 页为单位</text>
+<g>
+  <text x="20" y="60" fill="#58a6ff" font-weight="700">order 0 (4KB):</text>
+  <g transform="translate(140,48)">
+    <rect width="40" height="22" fill="#1a2028" stroke="#56d364"/>
+    <rect x="50" width="40" height="22" fill="#1a2028" stroke="#56d364"/>
+    <rect x="100" width="40" height="22" fill="#1a2028" stroke="#30363d" stroke-dasharray="3,2"/>
+    <rect x="150" width="40" height="22" fill="#1a2028" stroke="#56d364"/>
+    <text x="20" y="40" font-size="10" fill="#8b949e">free</text>
+    <text x="70" y="40" font-size="10" fill="#8b949e">free</text>
+    <text x="120" y="40" font-size="10" fill="#f85149">used</text>
+    <text x="170" y="40" font-size="10" fill="#8b949e">free</text>
+  </g>
+
+  <text x="20" y="115" fill="#58a6ff" font-weight="700">order 1 (8KB):</text>
+  <g transform="translate(140,103)">
+    <rect width="90" height="22" fill="#1a2028" stroke="#56d364"/>
+    <rect x="100" width="90" height="22" fill="#1a2028" stroke="#56d364"/>
+    <text x="45" y="40" font-size="10" fill="#8b949e">free 2 pages</text>
+    <text x="145" y="40" font-size="10" fill="#8b949e">free 2 pages</text>
+  </g>
+
+  <text x="20" y="170" fill="#58a6ff" font-weight="700">order 2 (16KB):</text>
+  <g transform="translate(140,158)">
+    <rect width="190" height="22" fill="#1a2028" stroke="#56d364"/>
+    <text x="95" y="40" font-size="10" fill="#8b949e">free 4 pages</text>
+  </g>
+
+  <text x="20" y="225" fill="#58a6ff" font-weight="700">order 3..10:</text>
+  <g transform="translate(140,213)"><rect width="200" height="22" fill="#0f1419" stroke="#30363d"/><text x="100" y="15" text-anchor="middle" fill="#8b949e">...更高阶链表...</text></g>
+</g>
+
+<g transform="translate(450, 60)">
+<rect width="320" height="220" rx="8" fill="#1a2028" stroke="#bc8cff" stroke-width="1.5"/>
+<text x="160" y="22" text-anchor="middle" fill="#bc8cff" font-weight="700">关键操作</text>
+<text x="14" y="50" fill="#e6edf3" font-weight="700">alloc_pages(GFP_KERNEL, order):</text>
+<text x="14" y="68" font-size="11" fill="#8b949e">1. 查 order 链表 → 有则取出</text>
+<text x="14" y="84" font-size="11" fill="#8b949e">2. 没有 → 查更高阶 → 拆半 (split)</text>
+<text x="14" y="100" font-size="11" fill="#8b949e">3. 把剩下的"伙伴"挂到对应阶链表</text>
+<text x="14" y="130" fill="#e6edf3" font-weight="700">free_pages(page, order):</text>
+<text x="14" y="148" font-size="11" fill="#8b949e">1. 找"伙伴"（异或一位地址）</text>
+<text x="14" y="164" font-size="11" fill="#8b949e">2. 伙伴也是 free? 合并 (merge) ↑</text>
+<text x="14" y="180" font-size="11" fill="#8b949e">3. 否则挂回 order 链表</text>
+<text x="14" y="205" font-size="11" fill="#56d364">→ 经典减少外部碎片的算法</text>
+</g>
+</svg>
+<div class="caption">Buddy 算法：每阶维护 free 链表，按 2 的幂分裂/合并</div>
+</div>
+
+<h2 id="slab">4.4 Slab — 小对象高效分配</h2>
+
+<p>Buddy 最小单位是 4KB 页，但内核 90% 的分配是 <code>kmalloc(64)</code> 这种小对象。直接给 4KB 浪费 98%！</p>
+
+<p><b>Slab</b> 在 Buddy 之上分配几页大块，然后切成小对象供 <code>kmalloc</code> 使用：</p>
+
+<pre class="code-c"><span class="cm">/* mm/slub.c — SLUB 是 Linux 默认 slab 实现 */</span>
+<span class="kw">struct</span> kmem_cache {
+    <span class="kw">unsigned int</span> object_size;     <span class="cm">// 单个对象大小</span>
+    <span class="kw">unsigned int</span> size;            <span class="cm">// 含 metadata 实际占用</span>
+    <span class="kw">unsigned int</span> offset;          <span class="cm">// freelist 指针偏移</span>
+    <span class="kw">struct</span> kmem_cache_node *node[MAX_NUMNODES];
+    <span class="kw">struct</span> kmem_cache_cpu __percpu *cpu_slab;  <span class="cm">// per-CPU 热缓存</span>
+    <span class="kw">const char</span> *name;
+};
+
+<span class="cm">/* 常见 cache: task_struct, mm_struct, inode, dentry, ... */</span>
+<span class="kw">static struct</span> kmem_cache *task_struct_cachep;
+
+<span class="kw">void</span> *<span class="fn">kmalloc</span>(<span class="kw">size_t</span> size, gfp_t flags) {
+    <span class="cm">// kmalloc 内部就是 8/16/32/.../8192 字节的多个 kmem_cache</span>
+    <span class="kw">struct</span> kmem_cache *s = kmalloc_caches[kmalloc_index(size)];
+    <span class="kw">return</span> kmem_cache_alloc(s, flags);
+}</pre>
+
+<p><b>查看 slab 使用情况</b>：<code>cat /proc/slabinfo</code></p>
+
+<h2 id="numa">4.5 NUMA — 现代多 socket 系统</h2>
+
+<div class="callout deep">
+<div class="label">NUMA 是什么</div>
+<p>多 CPU socket 服务器（如 2~8 路），每个 socket 自带一组内存控制器。<b>CPU 0 访问 node 1 的内存</b>要经过 QPI/UPI 总线，比访问本地 node 0 的内存慢 30~50%。</p>
+<p>Linux 的对策：<code>numa_balancing</code> 自动把进程的页面"迁回"它常运行的 CPU 所在 node。</p>
+</div>
+
+<pre class="code-bash"><span class="cm"># 查 NUMA 拓扑</span>
+numactl --hardware
+
+<span class="cm"># 显式绑定进程到 node 0</span>
+numactl --cpunodebind=0 --membind=0 ./my_program
+
+<span class="cm"># 内核统计</span>
+cat /sys/devices/system/node/node0/meminfo
+cat /proc/&lt;pid&gt;/numa_maps
+</pre>
+
+<h2 id="reclaim">4.6 内存回收 — kswapd 与 OOM</h2>
+
+<div class="diagram">
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 800 280" font-family="-apple-system,sans-serif" font-size="11">
+<defs><marker id="armr" viewBox="0 0 10 10" refX="9" refY="5" markerWidth="6" markerHeight="6" orient="auto"><path d="M0,0 L10,5 L0,10 z" fill="#ff7b29"/></marker></defs>
+<text x="400" y="22" text-anchor="middle" font-size="15" font-weight="700" fill="#ff7b29">内存压力下的回收链</text>
+
+<rect x="20" y="50" width="180" height="80" rx="8" fill="#1a2028" stroke="#56d364"/>
+<text x="110" y="74" text-anchor="middle" fill="#56d364" font-weight="700">充足 (high)</text>
+<text x="110" y="94" text-anchor="middle" fill="#8b949e" font-size="10">free &gt; high watermark</text>
+<text x="110" y="112" text-anchor="middle" fill="#8b949e" font-size="10">分配立即成功</text>
+
+<line x1="200" y1="90" x2="230" y2="90" stroke="#ff7b29" marker-end="url(#armr)"/>
+
+<rect x="230" y="50" width="180" height="80" rx="8" fill="#1a2028" stroke="#e3b341"/>
+<text x="320" y="74" text-anchor="middle" fill="#e3b341" font-weight="700">紧张 (low)</text>
+<text x="320" y="94" text-anchor="middle" fill="#8b949e" font-size="10">唤醒 kswapd 异步回收</text>
+<text x="320" y="112" text-anchor="middle" fill="#8b949e" font-size="10">分配仍成功，但开始扫描</text>
+
+<line x1="410" y1="90" x2="440" y2="90" stroke="#ff7b29" marker-end="url(#armr)"/>
+
+<rect x="440" y="50" width="180" height="80" rx="8" fill="#1a2028" stroke="#f85149"/>
+<text x="530" y="74" text-anchor="middle" fill="#f85149" font-weight="700">紧迫 (min)</text>
+<text x="530" y="94" text-anchor="middle" fill="#8b949e" font-size="10">direct_reclaim 同步阻塞</text>
+<text x="530" y="112" text-anchor="middle" fill="#8b949e" font-size="10">申请进程被罚去回收</text>
+
+<line x1="620" y1="90" x2="650" y2="90" stroke="#ff7b29" marker-end="url(#armr)"/>
+
+<rect x="650" y="50" width="130" height="80" rx="8" fill="#1a2028" stroke="#f85149" stroke-width="3"/>
+<text x="715" y="74" text-anchor="middle" fill="#f85149" font-weight="700">OOM</text>
+<text x="715" y="94" text-anchor="middle" fill="#8b949e" font-size="10">真的没了</text>
+<text x="715" y="112" text-anchor="middle" fill="#8b949e" font-size="10">oom_kill_process</text>
+
+<g transform="translate(20, 160)">
+<rect width="760" height="100" rx="8" fill="#1a2028" stroke="#58a6ff"/>
+<text x="20" y="20" fill="#58a6ff" font-weight="700">回收候选 (按性价比排序)</text>
+<text x="20" y="42" font-size="11" fill="#e6edf3">1. <span style="color:#56d364">干净的页缓存</span> — 直接丢弃，下次按需读盘</text>
+<text x="20" y="58" font-size="11" fill="#e6edf3">2. <span style="color:#56d364">脏页</span> — 先写回再丢弃</text>
+<text x="20" y="74" font-size="11" fill="#e6edf3">3. <span style="color:#e3b341">匿名页</span> — 换出到 swap 分区</text>
+<text x="20" y="90" font-size="11" fill="#e6edf3">4. <span style="color:#f85149">slab 可回收对象</span> — dcache/icache shrink</text>
+</g>
+</svg>
+</div>
+
+<h3>OOM Killer 的选择策略</h3>
+<pre class="code-c"><span class="cm">/* mm/oom_kill.c */</span>
+<span class="kw">static long</span> <span class="fn">oom_badness</span>(<span class="kw">struct</span> task_struct *p) {
+    <span class="kw">long</span> points;
+    points = <span class="fn">get_mm_rss</span>(p->mm) +
+             <span class="fn">get_mm_counter</span>(p->mm, MM_SWAPENTS) +
+             <span class="fn">mm_pgtables_bytes</span>(p->mm) / PAGE_SIZE;
+    <span class="cm">// 调整: oom_score_adj 用户/管理员可手动加减分</span>
+    points += (p->signal->oom_score_adj * totalpages) / <span class="num">1000</span>;
+    <span class="kw">return</span> points;
+}
+<span class="cm">// 最高分进程 → kill -9 (SIGKILL)</span>
+</pre>
+
+<div class="callout warn">
+<div class="label">保护关键进程不被 OOM-kill</div>
+<pre class="code-bash">echo -<span class="num">1000</span> > /proc/$(pidof myserver)/oom_score_adj   <span class="cm"># 永不杀</span>
+echo  <span class="num">1000</span> > /proc/$(pidof junk)/oom_score_adj       <span class="cm"># 优先杀</span></pre>
+</div>
+
+<h2 id="thp">4.7 透明大页 (THP) 与 KSM</h2>
+<ul>
+<li><b>THP</b>：自动把 4KB 页合并为 2MB 大页 → 减少 TLB 缺失，但增加内存压力</li>
+<li><b>KSM (Kernel Samepage Merging)</b>：扫描相同内容页，合并节省内存（KVM 虚拟机常用）</li>
+</ul>
+
+<pre class="code-bash">cat /sys/kernel/mm/transparent_hugepage/enabled
+<span class="cm"># always [madvise] never</span>
+echo madvise > /sys/kernel/mm/transparent_hugepage/enabled
+</pre>
+
+
+<footer class="page-footer">
+    <p>← <a href="../03-进程管理/index.html">上一章</a> · <a href="../index.html">总目录</a> · <a href="../05-文件系统/index.html">下一章 →</a></p>
+</footer>
+
+</main>
+</div>
+</body>
+</html>
diff --git "a/05-\346\226\207\344\273\266\347\263\273\347\273\237/index.html" "b/05-\346\226\207\344\273\266\347\263\273\347\273\237/index.html"
new file mode 100644
index 0000000..d108ee7
--- /dev/null
+++ "b/05-\346\226\207\344\273\266\347\263\273\347\273\237/index.html"
@@ -0,0 +1,251 @@
+<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>05 · 文件系统 — VFS、ext4、page cache、io_uring — Linux 内核学习指南</title>
+<link rel="stylesheet" href="../assets/style.css">
+</head>
+<body>
+<div class="layout">
+<aside class="sidebar">
+    <div class="brand">🐧 Linux 内核学习</div>
+    <div class="brand-sub">从 0.11 到 6.x · 专家级路径</div>
+    <nav>
+        <div class="section-title">入门 & 准备</div>
+        <ul>
+            <li><a href="../index.html">🏠 总目录</a></li>
+            <li><a href="../00-学习路线/index.html">00 · 学习路线</a></li>
+            <li><a href="../01-经典版本选择/index.html">01 · 经典版本选择</a></li>
+            <li><a href="../02-环境搭建/index.html">02 · 环境搭建</a></li>
+        </ul>
+        <div class="section-title">核心子系统</div>
+        <ul>
+            <li><a href="../03-进程管理/index.html">03 · 进程管理</a></li>
+            <li><a href="../04-内存管理/index.html">04 · 内存管理</a></li>
+            <li><a href="../05-文件系统/index.html" class="active">05 · 文件系统</a></li>
+            <li><a href="../06-系统调用/index.html">06 · 系统调用</a></li>
+            <li><a href="../07-设备驱动/index.html">07 · 设备驱动</a></li>
+            <li><a href="../08-网络子系统/index.html">08 · 网络子系统</a></li>
+            <li><a href="../09-同步机制/index.html">09 · 同步机制</a></li>
+        </ul>
+        <div class="section-title">专家级深入</div>
+        <ul>
+            <li><a href="../10-CFS调度器/index.html">10 · CFS 调度器</a></li>
+            <li><a href="../11-容器与命名空间/index.html">11 · 容器与命名空间</a></li>
+            <li><a href="../12-eBPF与可观测性/index.html">12 · eBPF 与可观测性</a></li>
+            <li><a href="../13-中断与异常/index.html">13 · 中断与异常</a></li>
+            <li><a href="../14-启动流程深入/index.html">14 · 启动流程深入</a></li>
+            <li><a href="../15-内核调试与性能/index.html">15 · 内核调试与性能</a></li>
+        </ul>
+    </nav>
+</aside>
+
+<main class="content">
+
+<h1><span class="chapter-num">05</span>文件系统 — VFS、ext4、page cache、io_uring</h1>
+
+
+<p>"<b>一切皆文件</b>"是 Unix 的核心哲学。本章揭示这一哲学如何在内核中通过 <b>VFS</b> 抽象实现。</p>
+
+<h2 id="vfs">5.1 VFS 四大对象</h2>
+<div class="diagram">
+<img src="../assets/diagrams/vfs-objects.svg" alt="VFS 对象关系">
+<div class="caption">task_struct → files_struct → file → dentry → inode → super_block</div>
+</div>
+
+<h3>四大对象的职责</h3>
+<table>
+<tr><th>对象</th><th>表示什么</th><th>每个有几个</th></tr>
+<tr><td><code>super_block</code></td><td>已挂载的文件系统实例</td><td>每挂载点一个</td></tr>
+<tr><td><code>inode</code></td><td>文件元数据（权限、大小、所属 fs）</td><td>每文件一个</td></tr>
+<tr><td><code>dentry</code></td><td>路径中的一个名字（"目录项"）</td><td>每路径组件一个</td></tr>
+<tr><td><code>file</code></td><td>一次"打开"的实例（含读写位置）</td><td>每次 open() 一个</td></tr>
+</table>
+
+<h2 id="open">5.2 一次 open("/etc/passwd") 的完整追踪</h2>
+
+<pre class="code-c"><span class="cm">/* fs/open.c */</span>
+<span class="kw">SYSCALL_DEFINE3</span>(open, <span class="kw">const char</span> __user *, filename, <span class="kw">int</span>, flags, mode_t, mode) {
+    <span class="kw">return</span> <span class="fn">do_sys_open</span>(AT_FDCWD, filename, flags, mode);
+}
+
+<span class="kw">long</span> <span class="fn">do_sys_open</span>(...) {
+    <span class="cm">// 1. 找一个空闲 fd</span>
+    <span class="kw">int</span> fd = <span class="fn">get_unused_fd_flags</span>(flags);
+    
+    <span class="cm">// 2. 实际打开 → 路径解析 → 构造 struct file</span>
+    <span class="kw">struct</span> file *f = <span class="fn">do_filp_open</span>(dfd, &tmp, &op);
+    
+    <span class="cm">// 3. fd ↔ file 绑定</span>
+    <span class="fn">fd_install</span>(fd, f);
+    <span class="kw">return</span> fd;
+}
+
+<span class="kw">struct</span> file *<span class="fn">do_filp_open</span>(...) {
+    <span class="kw">struct</span> nameidata nd;
+    <span class="cm">// 关键: path_openat 一层层解析 "/etc/passwd"</span>
+    <span class="cm">// → "/" lookup → "etc" lookup → "passwd" lookup</span>
+    <span class="kw">return</span> <span class="fn">path_openat</span>(&nd, op, flags);
+}
+</pre>
+
+<h3>路径解析 (path_lookupat) 深入</h3>
+
+<pre class="code-c"><span class="cm">/* fs/namei.c */</span>
+<span class="kw">static int</span> <span class="fn">link_path_walk</span>(<span class="kw">const char</span> *name, <span class="kw">struct</span> nameidata *nd) {
+    <span class="kw">for</span> (;;) {
+        <span class="cm">// 1. 先查 dcache（高速缓存）</span>
+        <span class="kw">struct</span> dentry *dentry = <span class="fn">__d_lookup</span>(parent, &this);
+        <span class="kw">if</span> (!dentry) {
+            <span class="cm">// 2. dcache 未命中 → 调用具体 fs 的 lookup</span>
+            dentry = <span class="fn">lookup_slow</span>(&this, parent, flags);
+            <span class="cm">// 比如 ext4_lookup → ext4_find_entry → 读目录块</span>
+        }
+        <span class="kw">if</span> (last_component) <span class="kw">break</span>;
+    }
+}
+</pre>
+
+<h2 id="pagecache">5.3 Page Cache — 文件读写的"防火墙"</h2>
+
+<div class="diagram">
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 800 380" font-family="-apple-system,sans-serif" font-size="11">
+<defs><marker id="ar5" viewBox="0 0 10 10" refX="9" refY="5" markerWidth="6" markerHeight="6" orient="auto"><path d="M0,0 L10,5 L0,10 z" fill="#8b949e"/></marker></defs>
+<text x="400" y="22" text-anchor="middle" font-size="15" font-weight="700" fill="#ff7b29">read() 的路径 (页缓存视角)</text>
+
+<g transform="translate(50, 50)">
+<rect width="200" height="40" rx="4" fill="#1a2028" stroke="#58a6ff"/>
+<text x="100" y="25" text-anchor="middle" fill="#58a6ff">用户调用 read(fd, buf, n)</text>
+</g>
+<line x1="150" y1="90" x2="150" y2="110" stroke="#8b949e" marker-end="url(#ar5)"/>
+
+<g transform="translate(50, 110)">
+<rect width="200" height="40" rx="4" fill="#1a2028" stroke="#ff7b29"/>
+<text x="100" y="25" text-anchor="middle" fill="#ff7b29">vfs_read → file->f_op->read</text>
+</g>
+<line x1="150" y1="150" x2="150" y2="170" stroke="#8b949e" marker-end="url(#ar5)"/>
+
+<g transform="translate(50, 170)">
+<rect width="200" height="60" rx="4" fill="#1a2028" stroke="#ff7b29"/>
+<text x="100" y="195" text-anchor="middle" fill="#ff7b29">generic_file_read_iter</text>
+<text x="100" y="215" text-anchor="middle" fill="#8b949e" font-size="10">查页缓存 (find_get_page)</text>
+</g>
+
+<g transform="translate(330, 110)">
+<rect width="200" height="50" rx="4" fill="#1a2028" stroke="#56d364"/>
+<text x="100" y="22" text-anchor="middle" fill="#56d364" font-weight="700">命中 (Cache Hit)</text>
+<text x="100" y="40" text-anchor="middle" fill="#8b949e" font-size="10">copy_to_user → 返回</text>
+</g>
+
+<g transform="translate(330, 200)">
+<rect width="200" height="120" rx="4" fill="#1a2028" stroke="#f85149"/>
+<text x="100" y="22" text-anchor="middle" fill="#f85149" font-weight="700">未命中 (Cache Miss)</text>
+<text x="20" y="44" font-size="10" fill="#8b949e">1. alloc 一个 page</text>
+<text x="20" y="60" font-size="10" fill="#8b949e">2. add_to_page_cache</text>
+<text x="20" y="76" font-size="10" fill="#8b949e">3. a_ops->readpage</text>
+<text x="20" y="92" font-size="10" fill="#8b949e">4. ext4_readpage → 构造 BIO</text>
+<text x="20" y="108" font-size="10" fill="#8b949e">5. submit_bio → 块设备</text>
+</g>
+
+<line x1="250" y1="200" x2="330" y2="135" stroke="#56d364" marker-end="url(#ar5)"/>
+<line x1="250" y1="200" x2="330" y2="240" stroke="#f85149" marker-end="url(#ar5)"/>
+
+<g transform="translate(580, 240)">
+<rect width="180" height="100" rx="4" fill="#1a2028" stroke="#bc8cff"/>
+<text x="90" y="22" text-anchor="middle" fill="#bc8cff" font-weight="700">块设备层</text>
+<text x="20" y="44" font-size="10" fill="#8b949e">BIO → 请求队列</text>
+<text x="20" y="60" font-size="10" fill="#8b949e">IO 调度器 (mq-deadline...)</text>
+<text x="20" y="76" font-size="10" fill="#8b949e">驱动 → NVMe/SATA/SCSI</text>
+<text x="20" y="92" font-size="10" fill="#8b949e">等待 IO 完成中断</text>
+</g>
+<line x1="530" y1="270" x2="580" y2="285" stroke="#8b949e" marker-end="url(#ar5)"/>
+</svg>
+</div>
+
+<h3>关键数据结构: address_space</h3>
+<pre class="code-c"><span class="kw">struct</span> address_space {
+    <span class="kw">struct</span> inode *host;        <span class="cm">// 关联的 inode</span>
+    <span class="kw">struct</span> xarray i_pages;     <span class="cm">// 所有缓存页（XArray 树）</span>
+    <span class="kw">const struct</span> address_space_operations *a_ops;
+    <span class="kw">unsigned long</span> nrpages;    <span class="cm">// 已缓存的页数</span>
+};
+
+<span class="kw">struct</span> address_space_operations {
+    <span class="kw">int</span> (*writepage)(<span class="kw">struct</span> page *page, <span class="kw">struct</span> writeback_control *);
+    <span class="kw">int</span> (*readpage)(<span class="kw">struct</span> file *, <span class="kw">struct</span> page *);
+    <span class="kw">int</span> (*write_begin)(...);
+    <span class="kw">int</span> (*write_end)(...);
+    <span class="cm">/* ... */</span>
+};
+</pre>
+
+<h2 id="writeback">5.4 脏页回写</h2>
+
+<p>用户 write() 只是把数据写进 page cache，<b>页变"脏"</b>但不立刻落盘。回写有三种触发：</p>
+
+<ol>
+<li><b>定时</b>：<code>dirty_writeback_centisecs</code>（默认 500，即 5 秒），后台 <code>flush</code> 线程触发</li>
+<li><b>过期</b>：脏页停留 <code>dirty_expire_centisecs</code>（默认 3000，即 30 秒）后必写</li>
+<li><b>阈值</b>：脏页超过 <code>dirty_ratio</code>（默认 20%），<b>申请方被阻塞</b>直到回写完成</li>
+</ol>
+
+<pre class="code-bash"><span class="cm"># 查看与调整</span>
+sysctl vm.dirty_ratio              <span class="cm"># 阻塞阈值 (%)</span>
+sysctl vm.dirty_background_ratio   <span class="cm"># 后台回写阈值 (%)</span>
+sysctl vm.dirty_expire_centisecs   <span class="cm"># 脏页存活上限</span>
+
+<span class="cm"># 强制立刻回写</span>
+sync           <span class="cm"># 全部</span>
+fsync(fd)      <span class="cm"># 单个文件 (用户态调用)</span>
+</pre>
+
+<h2 id="ext4">5.5 ext4 关键特性</h2>
+
+<table>
+<tr><th>特性</th><th>说明</th></tr>
+<tr><td><b>Extent</b></td><td>取代 ext3 间接块，用 [start, len] 描述连续块，大文件效率高</td></tr>
+<tr><td><b>HTree 目录</b></td><td>大目录用哈希树代替线性扫描，O(log n) 查找</td></tr>
+<tr><td><b>日志 (jbd2)</b></td><td>data=writeback / ordered / journal 三种级别</td></tr>
+<tr><td><b>延迟分配</b></td><td>write() 不立即分配块，回写时一次分配大段连续块</td></tr>
+<tr><td><b>多块分配</b></td><td>一次系统调用分配多个块</td></tr>
+<tr><td><b>Inline data</b></td><td>&lt;60B 的小文件直接存在 inode 里</td></tr>
+</table>
+
+<h2 id="iouring">5.6 io_uring — 现代异步 IO</h2>
+
+<p>传统 <code>aio</code> 早已被废弃。<code>io_uring</code> 是 Linux 5.1 引入的新一代异步 IO 框架，性能远超 epoll：</p>
+
+<pre class="code-c"><span class="cm">/* 用户态使用 (liburing 简化) */</span>
+<span class="kw">struct</span> io_uring ring;
+<span class="fn">io_uring_queue_init</span>(<span class="num">256</span>, &ring, <span class="num">0</span>);
+
+<span class="kw">struct</span> io_uring_sqe *sqe = <span class="fn">io_uring_get_sqe</span>(&ring);
+<span class="fn">io_uring_prep_read</span>(sqe, fd, buf, sz, <span class="num">0</span>);
+<span class="fn">io_uring_submit</span>(&ring);
+
+<span class="kw">struct</span> io_uring_cqe *cqe;
+<span class="fn">io_uring_wait_cqe</span>(&ring, &cqe);
+<span class="cm">// cqe->res 是 read 的返回值</span>
+<span class="fn">io_uring_cqe_seen</span>(&ring, cqe);
+</pre>
+
+<div class="callout deep">
+<div class="label">io_uring 为什么快</div>
+<ul>
+<li><b>共享内存环形队列</b>：用户和内核共享 SQ/CQ 两个 ring，无需系统调用提交任务</li>
+<li><b>批量提交</b>：一次系统调用提交多个任务</li>
+<li><b>SQPOLL 模式</b>：内核线程主动 poll，<b>用户完全无系统调用</b></li>
+<li><b>链式 IO</b>：read → write 之类组合一次提交</li>
+</ul>
+</div>
+
+
+<footer class="page-footer">
+    <p>← <a href="../04-内存管理/index.html">上一章</a> · <a href="../index.html">总目录</a> · <a href="../06-系统调用/index.html">下一章 →</a></p>
+</footer>
+
+</main>
+</div>
+</body>
+</html>
diff --git "a/06-\347\263\273\347\273\237\350\260\203\347\224\250/index.html" "b/06-\347\263\273\347\273\237\350\260\203\347\224\250/index.html"
new file mode 100644
index 0000000..db7f423
--- /dev/null
+++ "b/06-\347\263\273\347\273\237\350\260\203\347\224\250/index.html"
@@ -0,0 +1,189 @@
+<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>06 · 系统调用 — 用户态与内核态的桥梁 — Linux 内核学习指南</title>
+<link rel="stylesheet" href="../assets/style.css">
+</head>
+<body>
+<div class="layout">
+<aside class="sidebar">
+    <div class="brand">🐧 Linux 内核学习</div>
+    <div class="brand-sub">从 0.11 到 6.x · 专家级路径</div>
+    <nav>
+        <div class="section-title">入门 & 准备</div>
+        <ul>
+            <li><a href="../index.html">🏠 总目录</a></li>
+            <li><a href="../00-学习路线/index.html">00 · 学习路线</a></li>
+            <li><a href="../01-经典版本选择/index.html">01 · 经典版本选择</a></li>
+            <li><a href="../02-环境搭建/index.html">02 · 环境搭建</a></li>
+        </ul>
+        <div class="section-title">核心子系统</div>
+        <ul>
+            <li><a href="../03-进程管理/index.html">03 · 进程管理</a></li>
+            <li><a href="../04-内存管理/index.html">04 · 内存管理</a></li>
+            <li><a href="../05-文件系统/index.html">05 · 文件系统</a></li>
+            <li><a href="../06-系统调用/index.html" class="active">06 · 系统调用</a></li>
+            <li><a href="../07-设备驱动/index.html">07 · 设备驱动</a></li>
+            <li><a href="../08-网络子系统/index.html">08 · 网络子系统</a></li>
+            <li><a href="../09-同步机制/index.html">09 · 同步机制</a></li>
+        </ul>
+        <div class="section-title">专家级深入</div>
+        <ul>
+            <li><a href="../10-CFS调度器/index.html">10 · CFS 调度器</a></li>
+            <li><a href="../11-容器与命名空间/index.html">11 · 容器与命名空间</a></li>
+            <li><a href="../12-eBPF与可观测性/index.html">12 · eBPF 与可观测性</a></li>
+            <li><a href="../13-中断与异常/index.html">13 · 中断与异常</a></li>
+            <li><a href="../14-启动流程深入/index.html">14 · 启动流程深入</a></li>
+            <li><a href="../15-内核调试与性能/index.html">15 · 内核调试与性能</a></li>
+        </ul>
+    </nav>
+</aside>
+
+<main class="content">
+
+<h1><span class="chapter-num">06</span>系统调用 — 用户态与内核态的桥梁</h1>
+
+
+<p>系统调用是<b>唯一</b>合法的"用户态 → 内核态"通道。本章彻底搞清 <code>read(fd, buf, n)</code> 之后到底发生了什么。</p>
+
+<h2 id="flow">6.1 完整路径全景</h2>
+<div class="diagram">
+<img src="../assets/diagrams/syscall-flow.svg" alt="系统调用流程">
+<div class="caption">从 glibc 到块设备的 6 个阶段</div>
+</div>
+
+<h2 id="x86">6.2 x86 三代系统调用机制</h2>
+
+<table>
+<tr><th>机制</th><th>指令</th><th>引入</th><th>开销</th><th>触发方式</th></tr>
+<tr><td>软中断</td><td><code>int 0x80</code></td><td>286</td><td>慢 (~1000 cycles)</td><td>查 IDT，类似中断</td></tr>
+<tr><td>SYSENTER</td><td><code>sysenter</code> / <code>sysexit</code></td><td>Pentium II</td><td>快 (~100 cycles)</td><td>从 MSR 直接跳转</td></tr>
+<tr><td>SYSCALL</td><td><code>syscall</code> / <code>sysret</code></td><td>x86_64</td><td>最快</td><td>从 MSR 直接跳转</td></tr>
+</table>
+
+<h3>x86_64 syscall 入口</h3>
+<pre class="code-asm"><span class="cm">; arch/x86/entry/entry_64.S — entry_SYSCALL_64</span>
+SYM_CODE_START(entry_SYSCALL_64)
+    swapgs                              <span class="cm">; 切换 GSBASE 到内核 per-CPU 数据</span>
+    movq    %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2)
+    movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp   <span class="cm">; 换内核栈</span>
+
+    pushq   $__USER_DS                  <span class="cm">; 构造 pt_regs (假装中断那样)</span>
+    pushq   PER_CPU_VAR(cpu_tss_rw + TSS_sp2)
+    pushq   %r11                        <span class="cm">; 用户态 RFLAGS</span>
+    pushq   $__USER_CS
+    pushq   %rcx                        <span class="cm">; 用户态 RIP (syscall 把它存到 rcx)</span>
+    pushq   %rax                        <span class="cm">; 系统调用号</span>
+    PUSH_AND_CLEAR_REGS rax=$-ENOSYS    <span class="cm">; 保存其他寄存器</span>
+
+    movq    %rsp, %rdi                  <span class="cm">; rdi = pt_regs *</span>
+    call    do_syscall_64               <span class="cm">; ─── 进入 C 代码</span>
+
+    <span class="cm">; ... 返回路径: 检查 signal/resched, swapgs, sysretq</span>
+SYM_CODE_END(entry_SYSCALL_64)
+</pre>
+
+<h3>分发：do_syscall_64</h3>
+<pre class="code-c"><span class="cm">/* arch/x86/entry/common.c */</span>
+__visible noinstr <span class="kw">void</span> <span class="fn">do_syscall_64</span>(<span class="kw">struct</span> pt_regs *regs, <span class="kw">int</span> nr) {
+    nr = syscall_enter_from_user_mode(regs, nr);
+
+    <span class="kw">if</span> (likely(nr < NR_syscalls)) {
+        nr = array_index_nospec(nr, NR_syscalls);
+        <span class="cm">// 关键: 通过函数指针表分发</span>
+        regs->ax = sys_call_table[nr](regs);
+    }
+
+    syscall_exit_to_user_mode(regs);
+}
+
+<span class="cm">/* sys_call_table 由 syscall_64.tbl 自动生成 */</span>
+<span class="kw">extern const</span> sys_call_ptr_t sys_call_table[NR_syscalls] = {
+    [<span class="num">0</span>]   = __x64_sys_read,
+    [<span class="num">1</span>]   = __x64_sys_write,
+    [<span class="num">2</span>]   = __x64_sys_open,
+    [<span class="num">3</span>]   = __x64_sys_close,
+    [<span class="num">57</span>]  = __x64_sys_fork,
+    [<span class="num">59</span>]  = __x64_sys_execve,
+    [<span class="num">60</span>]  = __x64_sys_exit,
+    <span class="cm">/* ... 共 ~450 个 */</span>
+};</pre>
+
+<h2 id="table">6.3 系统调用号是如何"生长"的</h2>
+
+<p>新加系统调用必须<b>排在表尾</b>，编号永不重用，这是 ABI 稳定的关键。</p>
+
+<pre class="code-bash"><span class="cm">$ wc -l arch/x86/entry/syscalls/syscall_64.tbl</span>
+<span class="num">446</span> arch/x86/entry/syscalls/syscall_64.tbl
+
+<span class="cm">$ tail -5 arch/x86/entry/syscalls/syscall_64.tbl</span>
+<span class="num">449</span>     common  futex_waitv             sys_futex_waitv
+<span class="num">450</span>     common  set_mempolicy_home_node sys_set_mempolicy_home_node
+<span class="num">451</span>     common  cachestat               sys_cachestat
+<span class="num">452</span>     common  fchmodat2               sys_fchmodat2
+<span class="num">453</span>     common  map_shadow_stack        sys_map_shadow_stack
+</pre>
+
+<h2 id="vdso">6.4 vDSO — 不进内核的"系统调用"</h2>
+
+<p>有些"系统调用"<b>不需要切到内核态</b>就能完成，比如 <code>gettimeofday()</code>。Linux 把一段内核代码映射到每个进程地址空间，称为 vDSO：</p>
+
+<pre class="code-bash">cat /proc/self/maps | grep vdso
+<span class="cm"># 7ffd...000-7ffd...000 r-xp 00000000 00:00 0    [vdso]</span>
+
+<span class="cm"># gettimeofday() 直接调用 vDSO 里的代码 → 不切环 → 极快</span>
+</pre>
+
+<p>内核把当前时间放在一段共享内存里，vDSO 函数直接读这段内存。</p>
+
+<h2 id="seccomp">6.5 seccomp — 系统调用过滤器</h2>
+
+<p>容器、浏览器沙箱用 seccomp 限制进程<b>能调用哪些 syscall</b>。</p>
+
+<pre class="code-c"><span class="cm">/* 用 BPF 过滤 syscall */</span>
+<span class="kw">struct</span> sock_filter filter[] = {
+    BPF_STMT(BPF_LD | BPF_W | BPF_ABS, offsetof(<span class="kw">struct</span> seccomp_data, nr)),
+    BPF_JUMP(BPF_JMP | BPF_JEQ, __NR_read,  <span class="num">0</span>, <span class="num">1</span>), BPF_STMT(BPF_RET, SECCOMP_RET_ALLOW),
+    BPF_JUMP(BPF_JMP | BPF_JEQ, __NR_write, <span class="num">0</span>, <span class="num">1</span>), BPF_STMT(BPF_RET, SECCOMP_RET_ALLOW),
+    BPF_STMT(BPF_RET, SECCOMP_RET_KILL),
+};
+prctl(PR_SET_NO_NEW_PRIVS, <span class="num">1</span>);
+prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+<span class="cm">// 此后调用 open() 等会被立刻 KILL</span>
+</pre>
+
+<h2 id="custom">6.6 添加自定义系统调用 (实验)</h2>
+
+<pre class="code-bash"><span class="cm"># 1. 在 syscall_64.tbl 末尾加一行</span>
+<span class="num">454</span>   common  hello_kernel    sys_hello_kernel
+
+<span class="cm"># 2. 在 kernel/sys.c 加实现</span>
+SYSCALL_DEFINE1(hello_kernel, char __user *, name) {
+    char buf[<span class="num">64</span>];
+    <span class="kw">if</span> (strncpy_from_user(buf, name, <span class="num">63</span>) < <span class="num">0</span>)
+        <span class="kw">return</span> -EFAULT;
+    pr_info(<span class="str">"Hello, %s! From kernel.\n"</span>, buf);
+    <span class="kw">return</span> <span class="num">0</span>;
+}
+
+<span class="cm"># 3. include/linux/syscalls.h 加声明</span>
+asmlinkage <span class="kw">long</span> sys_hello_kernel(<span class="kw">const char</span> __user *name);
+
+<span class="cm"># 4. 重新编译内核 + 启动 QEMU</span>
+
+<span class="cm"># 5. 用户态调用</span>
+syscall(<span class="num">454</span>, <span class="str">"world"</span>);
+<span class="cm"># dmesg | tail   → Hello, world! From kernel.</span>
+</pre>
+
+
+<footer class="page-footer">
+    <p>← <a href="../05-文件系统/index.html">上一章</a> · <a href="../index.html">总目录</a> · <a href="../07-设备驱动/index.html">下一章 →</a></p>
+</footer>
+
+</main>
+</div>
+</body>
+</html>
diff --git "a/07-\350\256\276\345\244\207\351\251\261\345\212\250/index.html" "b/07-\350\256\276\345\244\207\351\251\261\345\212\250/index.html"
new file mode 100644
index 0000000..6f441be
--- /dev/null
+++ "b/07-\350\256\276\345\244\207\351\251\261\345\212\250/index.html"
@@ -0,0 +1,263 @@
+<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>07 · 设备驱动 — 字符/块/平台/PCI 驱动模型 — Linux 内核学习指南</title>
+<link rel="stylesheet" href="../assets/style.css">
+</head>
+<body>
+<div class="layout">
+<aside class="sidebar">
+    <div class="brand">🐧 Linux 内核学习</div>
+    <div class="brand-sub">从 0.11 到 6.x · 专家级路径</div>
+    <nav>
+        <div class="section-title">入门 & 准备</div>
+        <ul>
+            <li><a href="../index.html">🏠 总目录</a></li>
+            <li><a href="../00-学习路线/index.html">00 · 学习路线</a></li>
+            <li><a href="../01-经典版本选择/index.html">01 · 经典版本选择</a></li>
+            <li><a href="../02-环境搭建/index.html">02 · 环境搭建</a></li>
+        </ul>
+        <div class="section-title">核心子系统</div>
+        <ul>
+            <li><a href="../03-进程管理/index.html">03 · 进程管理</a></li>
+            <li><a href="../04-内存管理/index.html">04 · 内存管理</a></li>
+            <li><a href="../05-文件系统/index.html">05 · 文件系统</a></li>
+            <li><a href="../06-系统调用/index.html">06 · 系统调用</a></li>
+            <li><a href="../07-设备驱动/index.html" class="active">07 · 设备驱动</a></li>
+            <li><a href="../08-网络子系统/index.html">08 · 网络子系统</a></li>
+            <li><a href="../09-同步机制/index.html">09 · 同步机制</a></li>
+        </ul>
+        <div class="section-title">专家级深入</div>
+        <ul>
+            <li><a href="../10-CFS调度器/index.html">10 · CFS 调度器</a></li>
+            <li><a href="../11-容器与命名空间/index.html">11 · 容器与命名空间</a></li>
+            <li><a href="../12-eBPF与可观测性/index.html">12 · eBPF 与可观测性</a></li>
+            <li><a href="../13-中断与异常/index.html">13 · 中断与异常</a></li>
+            <li><a href="../14-启动流程深入/index.html">14 · 启动流程深入</a></li>
+            <li><a href="../15-内核调试与性能/index.html">15 · 内核调试与性能</a></li>
+        </ul>
+    </nav>
+</aside>
+
+<main class="content">
+
+<h1><span class="chapter-num">07</span>设备驱动 — 字符/块/平台/PCI 驱动模型</h1>
+
+
+<p>Linux 驱动是<b>内核中代码量最大的部分</b>（占比 &gt; 60%）。本章以"<b>字符设备 + 设备树</b>"为线索，串起整个驱动模型。</p>
+
+<h2 id="model">7.1 统一设备模型 (kobject + sysfs)</h2>
+
+<div class="diagram">
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 800 360" font-family="-apple-system,sans-serif" font-size="11">
+<defs><marker id="ar7" viewBox="0 0 10 10" refX="9" refY="5" markerWidth="6" markerHeight="6" orient="auto"><path d="M0,0 L10,5 L0,10 z" fill="#8b949e"/></marker></defs>
+<text x="400" y="22" text-anchor="middle" font-size="15" font-weight="700" fill="#ff7b29">2.6+ 统一设备模型</text>
+
+<g><rect x="40" y="50" width="160" height="80" rx="6" fill="#1a2028" stroke="#ff7b29" stroke-width="1.5"/>
+<text x="120" y="74" text-anchor="middle" fill="#ff7b29" font-weight="700">struct kobject</text>
+<text x="120" y="94" text-anchor="middle" fill="#8b949e" font-size="10">所有对象的基类</text>
+<text x="120" y="110" text-anchor="middle" fill="#8b949e" font-size="10">引用计数 + 树形组织</text></g>
+
+<g><rect x="320" y="50" width="160" height="80" rx="6" fill="#1a2028" stroke="#58a6ff" stroke-width="1.5"/>
+<text x="400" y="74" text-anchor="middle" fill="#58a6ff" font-weight="700">struct device</text>
+<text x="400" y="94" text-anchor="middle" fill="#8b949e" font-size="10">具体设备实例</text>
+<text x="400" y="110" text-anchor="middle" fill="#8b949e" font-size="10">含 bus, driver, parent</text></g>
+
+<g><rect x="600" y="50" width="160" height="80" rx="6" fill="#1a2028" stroke="#56d364" stroke-width="1.5"/>
+<text x="680" y="74" text-anchor="middle" fill="#56d364" font-weight="700">struct device_driver</text>
+<text x="680" y="94" text-anchor="middle" fill="#8b949e" font-size="10">驱动模块</text>
+<text x="680" y="110" text-anchor="middle" fill="#8b949e" font-size="10">probe/remove 回调</text></g>
+
+<line x1="200" y1="90" x2="320" y2="90" stroke="#8b949e" marker-end="url(#ar7)"/>
+<text x="260" y="82" font-size="10" fill="#8b949e" text-anchor="middle">嵌入</text>
+
+<line x1="480" y1="90" x2="600" y2="90" stroke="#8b949e" marker-end="url(#ar7)"/>
+<text x="540" y="82" font-size="10" fill="#8b949e" text-anchor="middle">匹配 (match)</text>
+
+<g><rect x="180" y="180" width="200" height="60" rx="6" fill="#1a2028" stroke="#bc8cff" stroke-width="1.5"/>
+<text x="280" y="200" text-anchor="middle" fill="#bc8cff" font-weight="700">struct bus_type</text>
+<text x="280" y="220" text-anchor="middle" fill="#8b949e" font-size="10">PCI / USB / I²C / platform...</text></g>
+
+<g><rect x="420" y="180" width="200" height="60" rx="6" fill="#1a2028" stroke="#e3b341" stroke-width="1.5"/>
+<text x="520" y="200" text-anchor="middle" fill="#e3b341" font-weight="700">struct class</text>
+<text x="520" y="220" text-anchor="middle" fill="#8b949e" font-size="10">逻辑分组 (block, net, input...)</text></g>
+
+<line x1="280" y1="180" x2="350" y2="130" stroke="#8b949e" marker-end="url(#ar7)"/>
+<line x1="520" y1="180" x2="450" y2="130" stroke="#8b949e" marker-end="url(#ar7)"/>
+
+<g transform="translate(40, 280)">
+<rect width="720" height="60" rx="6" fill="#1a2028" stroke="#56d364" stroke-width="1.5"/>
+<text x="20" y="22" fill="#56d364" font-weight="700">/sys 文件系统 — kobject 树的可视化</text>
+<text x="20" y="40" font-size="11" fill="#e6edf3">/sys/bus/pci/devices/...   /sys/class/net/eth0   /sys/devices/system/cpu/...</text>
+<text x="20" y="55" font-size="10" fill="#8b949e">每个 kobject = 一个目录；每个属性 = 一个文件 (读写触发 show/store 回调)</text>
+</g>
+</svg>
+</div>
+
+<h2 id="cdev">7.2 字符设备驱动完整模板</h2>
+
+<p>下面是一个<b>可编译运行</b>的最小字符设备驱动，实现 <code>/dev/hello</code> 读写：</p>
+
+<pre class="code-c"><span class="cm">/* hello_drv.c */</span>
+<span class="kw">#include</span> &lt;linux/module.h&gt;
+<span class="kw">#include</span> &lt;linux/fs.h&gt;
+<span class="kw">#include</span> &lt;linux/cdev.h&gt;
+<span class="kw">#include</span> &lt;linux/device.h&gt;
+<span class="kw">#include</span> &lt;linux/uaccess.h&gt;
+
+<span class="kw">static</span> dev_t devno;
+<span class="kw">static struct</span> cdev hello_cdev;
+<span class="kw">static struct</span> class *hello_class;
+<span class="kw">static char</span> buffer[<span class="num">128</span>] = <span class="str">"Hello from kernel!\n"</span>;
+<span class="kw">static</span> ssize_t buf_len = <span class="num">19</span>;
+
+<span class="kw">static</span> ssize_t <span class="fn">hello_read</span>(<span class="kw">struct</span> file *f, <span class="kw">char</span> __user *u, <span class="kw">size_t</span> n, loff_t *off) {
+    <span class="kw">if</span> (*off >= buf_len) <span class="kw">return</span> <span class="num">0</span>;          <span class="cm">// EOF</span>
+    n = min(n, (<span class="kw">size_t</span>)(buf_len - *off));
+    <span class="kw">if</span> (copy_to_user(u, buffer + *off, n)) <span class="kw">return</span> -EFAULT;
+    *off += n;
+    <span class="kw">return</span> n;
+}
+
+<span class="kw">static</span> ssize_t <span class="fn">hello_write</span>(<span class="kw">struct</span> file *f, <span class="kw">const char</span> __user *u, <span class="kw">size_t</span> n, loff_t *off) {
+    <span class="kw">if</span> (n > <span class="num">127</span>) n = <span class="num">127</span>;
+    <span class="kw">if</span> (copy_from_user(buffer, u, n)) <span class="kw">return</span> -EFAULT;
+    buffer[n] = <span class="str">'\0'</span>;
+    buf_len = n;
+    <span class="kw">return</span> n;
+}
+
+<span class="kw">static const struct</span> file_operations hello_fops = {
+    .owner = THIS_MODULE,
+    .read  = hello_read,
+    .write = hello_write,
+};
+
+<span class="kw">static int</span> <span class="fn">__init</span> <span class="fn">hello_init</span>(<span class="kw">void</span>) {
+    alloc_chrdev_region(&devno, <span class="num">0</span>, <span class="num">1</span>, <span class="str">"hello"</span>);
+    cdev_init(&hello_cdev, &hello_fops);
+    cdev_add(&hello_cdev, devno, <span class="num">1</span>);
+    hello_class = class_create(<span class="str">"hello_class"</span>);
+    device_create(hello_class, NULL, devno, NULL, <span class="str">"hello"</span>);  <span class="cm">// → /dev/hello</span>
+    pr_info(<span class="str">"hello driver loaded\n"</span>);
+    <span class="kw">return</span> <span class="num">0</span>;
+}
+
+<span class="kw">static void</span> <span class="fn">__exit</span> <span class="fn">hello_exit</span>(<span class="kw">void</span>) {
+    device_destroy(hello_class, devno);
+    class_destroy(hello_class);
+    cdev_del(&hello_cdev);
+    unregister_chrdev_region(devno, <span class="num">1</span>);
+}
+
+module_init(hello_init);
+module_exit(hello_exit);
+MODULE_LICENSE(<span class="str">"GPL"</span>);
+</pre>
+
+<pre class="code-bash"><span class="cm"># Makefile</span>
+obj-m += hello_drv.o
+all:
+	make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules
+clean:
+	make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean
+
+<span class="cm"># 测试</span>
+$ make
+$ sudo insmod hello_drv.ko
+$ cat /dev/hello              <span class="cm"># Hello from kernel!</span>
+$ echo <span class="str">"hi there"</span> > /dev/hello
+$ cat /dev/hello              <span class="cm"># hi there</span>
+$ sudo rmmod hello_drv
+</pre>
+
+<h2 id="platform">7.3 平台设备 + 设备树 (ARM 嵌入式必懂)</h2>
+
+<p>x86 设备通过 ACPI 自动发现；ARM 等嵌入式没有 ACPI，靠<b>设备树 (DTS)</b> 描述硬件：</p>
+
+<pre class="code-c"><span class="cm">/* my_board.dts */</span>
+&i2c1 {
+    eeprom@<span class="num">50</span> {
+        compatible = <span class="str">"atmel,24c64"</span>;     <span class="cm">// 关键: 用于匹配驱动</span>
+        reg = <<span class="num">0x50</span>>;                    <span class="cm">// I2C 地址</span>
+        pagesize = <<span class="num">32</span>>;
+    };
+};
+
+<span class="cm">/* 驱动 (drivers/misc/eeprom/at24.c 简化) */</span>
+<span class="kw">static const struct</span> of_device_id at24_of_match[] = {
+    { .compatible = <span class="str">"atmel,24c64"</span> },
+    { },
+};
+
+<span class="kw">static struct</span> platform_driver at24_driver = {
+    .driver = {
+        .name = <span class="str">"at24"</span>,
+        .of_match_table = at24_of_match,
+    },
+    .probe = at24_probe,
+    .remove = at24_remove,
+};
+module_platform_driver(at24_driver);
+</pre>
+
+<h2 id="irq">7.4 中断处理 — 上下半部</h2>
+
+<pre class="code-c"><span class="cm">/* 上半部 (top half) — 中断上下文，必须快! */</span>
+<span class="kw">static</span> irqreturn_t <span class="fn">my_irq_handler</span>(<span class="kw">int</span> irq, <span class="kw">void</span> *dev_id) {
+    <span class="cm">// 1. 读寄存器确认是自己的中断</span>
+    <span class="kw">u32</span> status = readl(dev->regs + IRQ_STATUS);
+    <span class="kw">if</span> (!status) <span class="kw">return</span> IRQ_NONE;
+
+    <span class="cm">// 2. 清中断、关本中断</span>
+    writel(status, dev->regs + IRQ_CLEAR);
+
+    <span class="cm">// 3. 紧急工作做完就走 — 复杂处理交给下半部</span>
+    schedule_work(&dev->bh_work);
+
+    <span class="kw">return</span> IRQ_HANDLED;
+}
+
+<span class="cm">/* 下半部 (bottom half) — 进程上下文，可 sleep */</span>
+<span class="kw">static void</span> <span class="fn">my_bh_work</span>(<span class="kw">struct</span> work_struct *w) {
+    <span class="kw">struct</span> my_dev *dev = container_of(w, <span class="kw">struct</span> my_dev, bh_work);
+    <span class="cm">// 慢速操作: 读 DMA 数据、唤醒等待进程...</span>
+    wake_up_interruptible(&dev->wait);
+}
+
+<span class="cm">/* 注册中断 */</span>
+request_irq(irq, my_irq_handler, IRQF_SHARED, <span class="str">"my-dev"</span>, dev);
+</pre>
+
+<h3>下半部的三种方式</h3>
+<table>
+<tr><th>机制</th><th>上下文</th><th>能 sleep?</th><th>使用场景</th></tr>
+<tr><td>softirq</td><td>软中断上下文</td><td>否</td><td>网络收发、定时器 (固定 10 种)</td></tr>
+<tr><td>tasklet</td><td>软中断上下文</td><td>否</td><td>逐渐被 workqueue 取代</td></tr>
+<tr><td>workqueue</td><td><b>进程上下文</b></td><td><b>是</b></td><td>需要睡眠/分配的延后处理（首选）</td></tr>
+<tr><td>threaded IRQ</td><td>专用内核线程</td><td>是</td><td>低延迟 + 需睡眠 (RT 内核必备)</td></tr>
+</table>
+
+<h2 id="dma">7.5 DMA 简述</h2>
+
+<pre class="code-c"><span class="cm">/* DMA 一致性内存分配 — 内核 + 设备都能访问 */</span>
+dma_addr_t dma_handle;
+<span class="kw">void</span> *cpu_addr = dma_alloc_coherent(dev, size, &dma_handle, GFP_KERNEL);
+
+<span class="cm">/* 流式 DMA — 单向，更省事但要 sync */</span>
+dma_handle = dma_map_single(dev, buffer, len, DMA_TO_DEVICE);
+<span class="cm">// 把 dma_handle 给硬件 → 硬件 DMA 读完</span>
+dma_unmap_single(dev, dma_handle, len, DMA_TO_DEVICE);
+</pre>
+
+
+<footer class="page-footer">
+    <p>← <a href="../06-系统调用/index.html">上一章</a> · <a href="../index.html">总目录</a> · <a href="../08-网络子系统/index.html">下一章 →</a></p>
+</footer>
+
+</main>
+</div>
+</body>
+</html>
diff --git "a/08-\347\275\221\347\273\234\345\255\220\347\263\273\347\273\237/index.html" "b/08-\347\275\221\347\273\234\345\255\220\347\263\273\347\273\237/index.html"
new file mode 100644
index 0000000..db72e14
--- /dev/null
+++ "b/08-\347\275\221\347\273\234\345\255\220\347\263\273\347\273\237/index.html"
@@ -0,0 +1,253 @@
+<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>08 · 网络子系统 — sk_buff、TCP 状态机、XDP — Linux 内核学习指南</title>
+<link rel="stylesheet" href="../assets/style.css">
+</head>
+<body>
+<div class="layout">
+<aside class="sidebar">
+    <div class="brand">🐧 Linux 内核学习</div>
+    <div class="brand-sub">从 0.11 到 6.x · 专家级路径</div>
+    <nav>
+        <div class="section-title">入门 & 准备</div>
+        <ul>
+            <li><a href="../index.html">🏠 总目录</a></li>
+            <li><a href="../00-学习路线/index.html">00 · 学习路线</a></li>
+            <li><a href="../01-经典版本选择/index.html">01 · 经典版本选择</a></li>
+            <li><a href="../02-环境搭建/index.html">02 · 环境搭建</a></li>
+        </ul>
+        <div class="section-title">核心子系统</div>
+        <ul>
+            <li><a href="../03-进程管理/index.html">03 · 进程管理</a></li>
+            <li><a href="../04-内存管理/index.html">04 · 内存管理</a></li>
+            <li><a href="../05-文件系统/index.html">05 · 文件系统</a></li>
+            <li><a href="../06-系统调用/index.html">06 · 系统调用</a></li>
+            <li><a href="../07-设备驱动/index.html">07 · 设备驱动</a></li>
+            <li><a href="../08-网络子系统/index.html" class="active">08 · 网络子系统</a></li>
+            <li><a href="../09-同步机制/index.html">09 · 同步机制</a></li>
+        </ul>
+        <div class="section-title">专家级深入</div>
+        <ul>
+            <li><a href="../10-CFS调度器/index.html">10 · CFS 调度器</a></li>
+            <li><a href="../11-容器与命名空间/index.html">11 · 容器与命名空间</a></li>
+            <li><a href="../12-eBPF与可观测性/index.html">12 · eBPF 与可观测性</a></li>
+            <li><a href="../13-中断与异常/index.html">13 · 中断与异常</a></li>
+            <li><a href="../14-启动流程深入/index.html">14 · 启动流程深入</a></li>
+            <li><a href="../15-内核调试与性能/index.html">15 · 内核调试与性能</a></li>
+        </ul>
+    </nav>
+</aside>
+
+<main class="content">
+
+<h1><span class="chapter-num">08</span>网络子系统 — sk_buff、TCP 状态机、XDP</h1>
+
+
+<p>网络栈是 Linux 内核最复杂的子系统，但只要抓住三条主线就能驾驭：<b>sk_buff (数据载体) · TCP 状态机 (协议) · netfilter (扩展点)</b>。</p>
+
+<h2 id="skb">8.1 sk_buff — 一切数据包的载体</h2>
+
+<div class="diagram">
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 800 360" font-family="-apple-system,sans-serif" font-size="11">
+<text x="400" y="22" text-anchor="middle" font-size="15" font-weight="700" fill="#ff7b29">struct sk_buff 内存布局</text>
+
+<g transform="translate(40, 60)">
+<rect width="270" height="280" rx="6" fill="#1a2028" stroke="#58a6ff" stroke-width="1.5"/>
+<text x="135" y="22" text-anchor="middle" fill="#58a6ff" font-weight="700">struct sk_buff (元数据)</text>
+<line x1="0" y1="34" x2="270" y2="34" stroke="#30363d"/>
+<text x="14" y="54" font-family="monospace" fill="#e6edf3">struct sk_buff *next, *prev;</text>
+<text x="14" y="72" font-family="monospace" fill="#e6edf3">struct sock *sk;          (所属 socket)</text>
+<text x="14" y="90" font-family="monospace" fill="#e6edf3">struct net_device *dev;   (网卡)</text>
+<text x="14" y="108" font-family="monospace" fill="#e6edf3">__u32 len, data_len;</text>
+<text x="14" y="126" font-family="monospace" fill="#e6edf3">__u16 mac_len;</text>
+<text x="14" y="144" font-family="monospace" fill="#e6edf3">__u32 hash;</text>
+<text x="14" y="162" font-family="monospace" fill="#e6edf3">union { tcp_skb_cb cb; ... };</text>
+<line x1="0" y1="178" x2="270" y2="178" stroke="#30363d"/>
+<text x="14" y="200" fill="#56d364" font-weight="700">指向数据区的指针：</text>
+<text x="14" y="220" font-family="monospace" fill="#56d364">unsigned char *head;</text>
+<text x="14" y="238" font-family="monospace" fill="#56d364">unsigned char *data;     ← 当前协议头</text>
+<text x="14" y="256" font-family="monospace" fill="#56d364">unsigned char *tail;</text>
+<text x="14" y="274" font-family="monospace" fill="#56d364">unsigned char *end;</text>
+</g>
+
+<g transform="translate(360, 60)">
+<rect width="400" height="280" rx="6" fill="#0f1419" stroke="#ff7b29" stroke-width="1.5"/>
+<text x="200" y="22" text-anchor="middle" fill="#ff7b29" font-weight="700">数据缓冲区</text>
+<line x1="0" y1="34" x2="400" y2="34" stroke="#30363d"/>
+
+<rect x="20" y="50" width="360" height="24" fill="#1a2028" stroke="#bc8cff"/>
+<text x="200" y="66" text-anchor="middle" fill="#bc8cff">headroom (预留, 给下层加头)</text>
+
+<rect x="20" y="80" width="360" height="24" fill="#1a2028" stroke="#56d364" stroke-width="2"/>
+<text x="200" y="96" text-anchor="middle" fill="#56d364" font-weight="700">以太网帧头 (14B)</text>
+
+<rect x="20" y="110" width="360" height="24" fill="#1a2028" stroke="#56d364"/>
+<text x="200" y="126" text-anchor="middle" fill="#56d364">IP 头 (20B)</text>
+
+<rect x="20" y="140" width="360" height="24" fill="#1a2028" stroke="#56d364"/>
+<text x="200" y="156" text-anchor="middle" fill="#56d364">TCP 头 (20+ B)</text>
+
+<rect x="20" y="170" width="360" height="80" fill="#1a2028" stroke="#e3b341"/>
+<text x="200" y="194" text-anchor="middle" fill="#e3b341" font-weight="700">payload (用户数据)</text>
+<text x="200" y="214" text-anchor="middle" fill="#8b949e" font-size="10">data 指针随协议处理</text>
+<text x="200" y="230" text-anchor="middle" fill="#8b949e" font-size="10">用 skb_pull/push 调整</text>
+
+<rect x="20" y="256" width="360" height="20" fill="#1a2028" stroke="#bc8cff"/>
+<text x="200" y="270" text-anchor="middle" fill="#bc8cff">tailroom (预留)</text>
+</g>
+</svg>
+</div>
+
+<div class="callout deep">
+<div class="label">为什么 sk_buff 这样设计？</div>
+<p>包从网卡上来时，最外层是<b>以太网帧</b>。每经过一层（链路 → IP → TCP），就要"剥掉"一个头。
+若每层都拷贝一份数据 = 灾难。<b>sk_buff 通过移动 data 指针实现"零拷贝剥头"</b>，
+<code>skb_pull</code> 让 data 前移（剥头），<code>skb_push</code> 让 data 后移（加头，发送时）。</p>
+</div>
+
+<h2 id="rxpath">8.2 收包路径</h2>
+
+<pre class="code-c"><span class="cm">/* 1. 网卡硬中断 → 入 backlog 或 NAPI */</span>
+ISR (drivers/net/.../xxx.c):
+  napi_schedule(&napi);     <span class="cm">// 触发 NET_RX_SOFTIRQ</span>
+
+<span class="cm">/* 2. 软中断 net_rx_action() 调 NAPI poll */</span>
+xxx_poll(napi, budget):
+  <span class="kw">while</span> (有包 && budget--) {
+      skb = napi_alloc_skb(...);
+      <span class="cm">// DMA 已把数据放在 skb->data</span>
+      <span class="fn">netif_receive_skb</span>(skb);
+  }
+
+<span class="cm">/* 3. 通用 RX 入口 — 协议分发 */</span>
+__netif_receive_skb_core(skb):
+  <span class="cm">// 调用 XDP / tcpdump / vlan / netfilter ingress 等</span>
+  pt = ptype_base[ntohs(skb->protocol)];
+  pt->func(skb, ...);   <span class="cm">// → ip_rcv / arp_rcv / ...</span>
+
+<span class="cm">/* 4. IP 层 */</span>
+ip_rcv → ip_rcv_finish → ip_local_deliver → ip_local_deliver_finish:
+  ipprot = inet_protos[ip_hdr->protocol];
+  ipprot->handler(skb);     <span class="cm">// → tcp_v4_rcv / udp_rcv / icmp_rcv</span>
+
+<span class="cm">/* 5. TCP 层 */</span>
+tcp_v4_rcv:
+  sk = __inet_lookup_skb(...);   <span class="cm">// 找到 socket</span>
+  tcp_v4_do_rcv(sk, skb);
+    <span class="kw">if</span> (sk->state == TCP_ESTABLISHED)
+        tcp_rcv_established(...)  <span class="cm">// → 入 sk->sk_receive_queue</span>
+    <span class="kw">else</span>
+        tcp_rcv_state_process(...)  <span class="cm">// 处理握手/挥手</span>
+
+<span class="cm">/* 6. 用户态 read() 系统调用从 sk_receive_queue 取数据 */</span>
+</pre>
+
+<h2 id="tcp">8.3 TCP 三次握手 — 内核视角</h2>
+
+<div class="diagram">
+<img src="../assets/diagrams/tcp-handshake.svg" alt="TCP 三次握手">
+<div class="caption">客户端 / 服务端 状态机变化与对应内核代码路径</div>
+</div>
+
+<h3>半连接队列 vs 全连接队列</h3>
+
+<table>
+<tr><th>队列</th><th>状态</th><th>大小调节</th><th>溢出后果</th></tr>
+<tr><td>半连接 (syns_q)</td><td>SYN_RECV</td><td><code>tcp_max_syn_backlog</code></td><td>丢 SYN (SYN flood)</td></tr>
+<tr><td>全连接 (accept_q)</td><td>ESTABLISHED 待 accept</td><td>listen() 的 backlog</td><td>丢 ACK，客户端重传</td></tr>
+</table>
+
+<pre class="code-bash"><span class="cm"># 查看全连接队列溢出</span>
+ss -lnt | head
+nstat -az TcpExtListenOverflows
+nstat -az TcpExtListenDrops
+</pre>
+
+<h2 id="netfilter">8.4 netfilter — iptables 的内核基座</h2>
+
+<div class="diagram">
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 800 280" font-family="-apple-system,sans-serif" font-size="11">
+<defs><marker id="arn" viewBox="0 0 10 10" refX="9" refY="5" markerWidth="6" markerHeight="6" orient="auto"><path d="M0,0 L10,5 L0,10 z" fill="#ff7b29"/></marker></defs>
+
+<text x="400" y="22" text-anchor="middle" font-size="15" font-weight="700" fill="#ff7b29">netfilter 5 个 hook 点</text>
+
+<rect x="20" y="50" width="80" height="40" rx="4" fill="#1a2028" stroke="#58a6ff"/>
+<text x="60" y="75" text-anchor="middle" fill="#58a6ff" font-weight="700">网卡</text>
+<line x1="100" y1="70" x2="130" y2="70" stroke="#ff7b29" marker-end="url(#arn)"/>
+
+<rect x="130" y="50" width="120" height="40" rx="4" fill="#1a2028" stroke="#ff7b29"/>
+<text x="190" y="68" text-anchor="middle" fill="#ff7b29" font-weight="700">PREROUTING</text>
+<text x="190" y="84" text-anchor="middle" fill="#8b949e" font-size="9">入口 (nat/mangle/raw)</text>
+<line x1="250" y1="70" x2="280" y2="70" stroke="#ff7b29" marker-end="url(#arn)"/>
+
+<rect x="280" y="50" width="100" height="40" rx="4" fill="#1a2028" stroke="#bc8cff"/>
+<text x="330" y="75" text-anchor="middle" fill="#bc8cff">路由判决</text>
+
+<line x1="330" y1="90" x2="200" y2="130" stroke="#ff7b29" marker-end="url(#arn)"/>
+<line x1="330" y1="90" x2="500" y2="130" stroke="#ff7b29" marker-end="url(#arn)"/>
+
+<rect x="120" y="135" width="140" height="40" rx="4" fill="#1a2028" stroke="#ff7b29"/>
+<text x="190" y="153" text-anchor="middle" fill="#ff7b29" font-weight="700">INPUT</text>
+<text x="190" y="169" text-anchor="middle" fill="#8b949e" font-size="9">给本机</text>
+
+<rect x="430" y="135" width="140" height="40" rx="4" fill="#1a2028" stroke="#ff7b29"/>
+<text x="500" y="153" text-anchor="middle" fill="#ff7b29" font-weight="700">FORWARD</text>
+<text x="500" y="169" text-anchor="middle" fill="#8b949e" font-size="9">转发别人</text>
+
+<line x1="190" y1="175" x2="190" y2="215" stroke="#ff7b29" marker-end="url(#arn)"/>
+<rect x="120" y="215" width="140" height="40" rx="4" fill="#1a2028" stroke="#56d364"/>
+<text x="190" y="240" text-anchor="middle" fill="#56d364">本机进程</text>
+
+<line x1="190" y1="255" x2="430" y2="215" stroke="#ff7b29" marker-end="url(#arn)"/>
+<rect x="430" y="215" width="140" height="40" rx="4" fill="#1a2028" stroke="#ff7b29"/>
+<text x="500" y="240" text-anchor="middle" fill="#ff7b29" font-weight="700">OUTPUT</text>
+
+<rect x="600" y="135" width="140" height="40" rx="4" fill="#1a2028" stroke="#ff7b29"/>
+<text x="670" y="153" text-anchor="middle" fill="#ff7b29" font-weight="700">POSTROUTING</text>
+<text x="670" y="169" text-anchor="middle" fill="#8b949e" font-size="9">出口 (nat/SNAT)</text>
+<line x1="570" y1="155" x2="600" y2="155" stroke="#ff7b29" marker-end="url(#arn)"/>
+<line x1="570" y1="225" x2="600" y2="170" stroke="#ff7b29" marker-end="url(#arn)"/>
+</svg>
+</div>
+
+<h2 id="xdp">8.5 XDP — 极致性能的包处理</h2>
+
+<p>XDP (eXpress Data Path) 在<b>网卡驱动收包的最早期</b>执行 eBPF 程序，性能远超 iptables：</p>
+
+<table>
+<tr><th>层级</th><th>每核 PPS (大致)</th><th>典型用途</th></tr>
+<tr><td>用户态 (DPDK 除外)</td><td>~1M</td><td>普通应用</td></tr>
+<tr><td>iptables</td><td>~5M</td><td>防火墙</td></tr>
+<tr><td>tc-bpf</td><td>~10M</td><td>流量控制</td></tr>
+<tr><td><b>XDP</b></td><td><b>20~50M+</b></td><td>DDoS 缓解、L4 负载均衡 (Katran)</td></tr>
+</table>
+
+<pre class="code-c"><span class="cm">/* xdp_drop_bad.bpf.c */</span>
+SEC(<span class="str">"xdp"</span>)
+<span class="kw">int</span> <span class="fn">xdp_drop_bad</span>(<span class="kw">struct</span> xdp_md *ctx) {
+    <span class="kw">void</span> *data     = (<span class="kw">void</span> *)(<span class="kw">long</span>)ctx->data;
+    <span class="kw">void</span> *data_end = (<span class="kw">void</span> *)(<span class="kw">long</span>)ctx->data_end;
+    <span class="kw">struct</span> ethhdr *eth = data;
+    <span class="kw">if</span> ((<span class="kw">void</span>*)(eth+<span class="num">1</span>) > data_end) <span class="kw">return</span> XDP_PASS;
+    <span class="kw">if</span> (eth->h_proto == bpf_htons(ETH_P_IP)) {
+        <span class="kw">struct</span> iphdr *ip = (<span class="kw">void</span>*)(eth+<span class="num">1</span>);
+        <span class="kw">if</span> ((<span class="kw">void</span>*)(ip+<span class="num">1</span>) > data_end) <span class="kw">return</span> XDP_PASS;
+        <span class="kw">if</span> (ip->protocol == IPPROTO_ICMP)
+            <span class="kw">return</span> XDP_DROP;   <span class="cm">// 丢所有 ICMP</span>
+    }
+    <span class="kw">return</span> XDP_PASS;
+}
+</pre>
+
+
+<footer class="page-footer">
+    <p>← <a href="../07-设备驱动/index.html">上一章</a> · <a href="../index.html">总目录</a> · <a href="../09-同步机制/index.html">下一章 →</a></p>
+</footer>
+
+</main>
+</div>
+</body>
+</html>
diff --git "a/09-\345\220\214\346\255\245\346\234\272\345\210\266/index.html" "b/09-\345\220\214\346\255\245\346\234\272\345\210\266/index.html"
new file mode 100644
index 0000000..c22b179
--- /dev/null
+++ "b/09-\345\220\214\346\255\245\346\234\272\345\210\266/index.html"
@@ -0,0 +1,254 @@
+<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>09 · 同步机制 — 自旋锁、Mutex、RCU、Memory Barriers — Linux 内核学习指南</title>
+<link rel="stylesheet" href="../assets/style.css">
+</head>
+<body>
+<div class="layout">
+<aside class="sidebar">
+    <div class="brand">🐧 Linux 内核学习</div>
+    <div class="brand-sub">从 0.11 到 6.x · 专家级路径</div>
+    <nav>
+        <div class="section-title">入门 & 准备</div>
+        <ul>
+            <li><a href="../index.html">🏠 总目录</a></li>
+            <li><a href="../00-学习路线/index.html">00 · 学习路线</a></li>
+            <li><a href="../01-经典版本选择/index.html">01 · 经典版本选择</a></li>
+            <li><a href="../02-环境搭建/index.html">02 · 环境搭建</a></li>
+        </ul>
+        <div class="section-title">核心子系统</div>
+        <ul>
+            <li><a href="../03-进程管理/index.html">03 · 进程管理</a></li>
+            <li><a href="../04-内存管理/index.html">04 · 内存管理</a></li>
+            <li><a href="../05-文件系统/index.html">05 · 文件系统</a></li>
+            <li><a href="../06-系统调用/index.html">06 · 系统调用</a></li>
+            <li><a href="../07-设备驱动/index.html">07 · 设备驱动</a></li>
+            <li><a href="../08-网络子系统/index.html">08 · 网络子系统</a></li>
+            <li><a href="../09-同步机制/index.html" class="active">09 · 同步机制</a></li>
+        </ul>
+        <div class="section-title">专家级深入</div>
+        <ul>
+            <li><a href="../10-CFS调度器/index.html">10 · CFS 调度器</a></li>
+            <li><a href="../11-容器与命名空间/index.html">11 · 容器与命名空间</a></li>
+            <li><a href="../12-eBPF与可观测性/index.html">12 · eBPF 与可观测性</a></li>
+            <li><a href="../13-中断与异常/index.html">13 · 中断与异常</a></li>
+            <li><a href="../14-启动流程深入/index.html">14 · 启动流程深入</a></li>
+            <li><a href="../15-内核调试与性能/index.html">15 · 内核调试与性能</a></li>
+        </ul>
+    </nav>
+</aside>
+
+<main class="content">
+
+<h1><span class="chapter-num">09</span>同步机制 — 自旋锁、Mutex、RCU、Memory Barriers</h1>
+
+
+<p>SMP 时代，"并发即正义，并发即灾难"。本章覆盖从<b>原子操作</b>到 <b>RCU</b> 的完整同步谱系，并教你选锁。</p>
+
+<h2 id="why">9.1 为什么需要同步</h2>
+
+<div class="callout danger">
+<div class="label">三种"看起来好但其实坏"的代码</div>
+<ol>
+<li><b>没保护的全局变量</b>：<code>counter++</code> 实际是 read-modify-write 三步，多核同时执行结果错乱</li>
+<li><b>错误的双重检查</b>：<code>if (!ptr) ptr = alloc();</code> 多核可能 alloc 多次</li>
+<li><b>遗漏 barrier</b>：编译器 / CPU 乱序 → 用户看到"不可能"的状态</li>
+</ol>
+</div>
+
+<h2 id="atomic">9.2 原子操作 — 基石</h2>
+
+<pre class="code-c"><span class="kw">#include</span> &lt;linux/atomic.h&gt;
+
+atomic_t v = ATOMIC_INIT(<span class="num">0</span>);
+
+atomic_inc(&v);              <span class="cm">// v++</span>
+atomic_dec(&v);              <span class="cm">// v--</span>
+atomic_add(<span class="num">5</span>, &v);          <span class="cm">// v += 5</span>
+atomic_read(&v);             <span class="cm">// 读取 (READ_ONCE)</span>
+atomic_set(&v, <span class="num">0</span>);          <span class="cm">// 写入 (WRITE_ONCE)</span>
+
+<span class="cm">// 返回旧值/新值的版本</span>
+<span class="kw">int</span> old = atomic_xchg(&v, <span class="num">10</span>);
+<span class="kw">int</span> old = atomic_cmpxchg(&v, expect, <span class="num">10</span>);
+
+<span class="cm">// 检查零 (常用于引用计数)</span>
+<span class="kw">if</span> (atomic_dec_and_test(&v)) {
+    <span class="cm">// v 减 1 后等于 0 → 安全释放</span>
+    kfree(obj);
+}
+</pre>
+
+<p>原子操作在 x86 上对应 <code>LOCK</code> 前缀指令（如 <code>lock incl</code>），保证 cache line 独占。</p>
+
+<h2 id="spinlock">9.3 自旋锁 (spinlock) — 短临界区</h2>
+
+<pre class="code-c">DEFINE_SPINLOCK(my_lock);
+
+<span class="cm">/* 不能在持锁时 sleep / 调度 — 中断/软中断仍可发生 */</span>
+spin_lock(&my_lock);
+... critical section ...
+spin_unlock(&my_lock);
+
+<span class="cm">/* 中断处理也用此锁? 必须用 _irqsave 版本 */</span>
+<span class="kw">unsigned long</span> flags;
+spin_lock_irqsave(&my_lock, flags);
+...
+spin_unlock_irqrestore(&my_lock, flags);
+</pre>
+
+<div class="callout warn">
+<div class="label">为什么自旋锁的临界区不能 sleep</div>
+<p>持自旋锁时，本 CPU <b>禁用抢占</b>。若 sleep → 另一进程跑上来 → 也试图拿锁 → 等不到永远不让出 → <b>死锁</b>。
+所以自旋锁的代码内：不能调 <code>schedule()</code>，不能 <code>kmalloc(GFP_KERNEL)</code>（可能回收 → 可能 sleep），不能 <code>copy_to_user</code>（可能缺页 → 可能 sleep）。</p>
+</div>
+
+<h2 id="mutex">9.4 mutex / semaphore — 长临界区</h2>
+
+<pre class="code-c">DEFINE_MUTEX(big_lock);
+
+mutex_lock(&big_lock);       <span class="cm">// 拿不到 → 睡眠</span>
+mutex_lock_interruptible(&big_lock);  <span class="cm">// 可被信号中断</span>
+mutex_trylock(&big_lock);    <span class="cm">// 立刻返回，不睡</span>
+... critical section (允许 sleep!) ...
+mutex_unlock(&big_lock);
+</pre>
+
+<h2 id="rwlock">9.5 读写锁与 seqlock</h2>
+
+<pre class="code-c"><span class="cm">/* rwlock — 多读单写 (适合读多写少) */</span>
+DEFINE_RWLOCK(my_rwlock);
+read_lock(&my_rwlock);   ... read_unlock(&my_rwlock);
+write_lock(&my_rwlock);  ... write_unlock(&my_rwlock);
+
+<span class="cm">/* seqlock — 写优先，读可重试 (jiffies/xtime 用这个) */</span>
+seqlock_t timer_lock;
+<span class="kw">unsigned</span> seq;
+<span class="kw">do</span> {
+    seq = read_seqbegin(&timer_lock);
+    ... 读多个字段 ...
+} <span class="kw">while</span> (read_seqretry(&timer_lock, seq));  <span class="cm">// 写者来过 → 重读</span>
+</pre>
+
+<h2 id="rcu">9.6 RCU — Read-Copy-Update（内核同步皇冠）</h2>
+
+<div class="callout deep">
+<div class="label">RCU 一句话原理</div>
+<p>读者<b>完全无锁、零开销</b>。写者<b>不修改</b>共享数据，而是<b>复制一份</b>、修改副本、原子地<b>替换指针</b>。
+旧版本数据等所有"<b>当时在读"的进程都退出</b>（grace period）后再释放。</p>
+</div>
+
+<pre class="code-c"><span class="cm">/* 读 — 极度便宜 */</span>
+rcu_read_lock();      <span class="cm">// 只是禁用抢占,几乎零开销</span>
+<span class="kw">struct</span> foo *p = rcu_dereference(global_ptr);
+<span class="kw">if</span> (p) use(p->data);
+rcu_read_unlock();
+
+<span class="cm">/* 写 — 复制-修改-替换 */</span>
+<span class="kw">struct</span> foo *new = kmalloc(<span class="kw">sizeof</span>(*new), GFP_KERNEL);
+*new = *old_ptr;
+new->data = <span class="num">42</span>;
+rcu_assign_pointer(global_ptr, new);   <span class="cm">// 原子替换</span>
+synchronize_rcu();    <span class="cm">// 等所有读者退出 (老进程完成 read)</span>
+kfree(old);           <span class="cm">// 安全释放</span>
+
+<span class="cm">/* 或异步版本 (推荐用于热路径) */</span>
+call_rcu(&old->rcu_head, free_callback);
+</pre>
+
+<p><b>RCU 用在哪：</b>路由表、cgroup 链表、dentry 缓存、内核模块列表、IDR、percpu 数据……几乎所有"读多写少"场景。</p>
+
+<h2 id="percpu">9.7 Per-CPU 变量 — 彻底避免共享</h2>
+
+<pre class="code-c">DEFINE_PER_CPU(<span class="kw">long</span>, my_counter);     <span class="cm">// 每个 CPU 一份独立的 my_counter</span>
+
+<span class="cm">/* 读写本 CPU 的变量 (需禁抢占) */</span>
+preempt_disable();
+this_cpu_inc(my_counter);
+preempt_enable();
+
+<span class="cm">/* 读所有 CPU 累加 */</span>
+<span class="kw">long</span> total = <span class="num">0</span>;
+<span class="kw">int</span> cpu;
+for_each_possible_cpu(cpu)
+    total += per_cpu(my_counter, cpu);
+</pre>
+
+<h2 id="barrier">9.8 内存屏障 (Memory Barriers)</h2>
+
+<pre class="code-c"><span class="cm">/* 编译器屏障 — 仅禁止编译器重排 */</span>
+barrier();
+
+<span class="cm">/* CPU 屏障 (x86 上前两个是空操作，arm/ppc 上有真指令) */</span>
+smp_rmb();           <span class="cm">// 读屏障</span>
+smp_wmb();           <span class="cm">// 写屏障</span>
+smp_mb();            <span class="cm">// 全屏障</span>
+
+<span class="cm">/* 典型场景: 单生产者单消费者环形队列 */</span>
+<span class="cm">// 生产者</span>
+ring[head] = data;
+smp_wmb();           <span class="cm">// 确保数据先写入,再更新 head</span>
+WRITE_ONCE(ring->head, head+<span class="num">1</span>);
+
+<span class="cm">// 消费者</span>
+<span class="kw">while</span> (READ_ONCE(ring->tail) == READ_ONCE(ring->head)) <span class="kw">continue</span>;
+smp_rmb();           <span class="cm">// 确保读到 head 后再读数据</span>
+data = ring[tail];
+</pre>
+
+<h2 id="futex">9.9 futex — 用户态锁的内核基础</h2>
+
+<p>pthread_mutex 性能秘诀：<b>无竞争时纯用户态原子操作，有竞争时才陷入内核 futex_wait</b>。</p>
+
+<pre class="code-c"><span class="cm">/* 简化的伪代码 */</span>
+mutex_lock(m):
+    <span class="kw">if</span> (atomic_cmpxchg(&m->state, <span class="num">0</span>, <span class="num">1</span>) == <span class="num">0</span>)
+        <span class="kw">return</span>;            <span class="cm">// 快路径: 纯用户态!</span>
+    futex(&m->state, FUTEX_WAIT, <span class="num">1</span>, ...);  <span class="cm">// 慢路径: 进内核睡</span>
+
+mutex_unlock(m):
+    <span class="kw">if</span> (atomic_xchg(&m->state, <span class="num">0</span>) == <span class="num">1</span>)
+        <span class="kw">return</span>;
+    futex(&m->state, FUTEX_WAKE, <span class="num">1</span>, ...);  <span class="cm">// 唤醒一个</span>
+</pre>
+
+<h2 id="choose">9.10 选锁速查表</h2>
+
+<table>
+<tr><th>场景</th><th>推荐</th></tr>
+<tr><td>临界区 &lt; 100ns，不睡</td><td>spinlock</td></tr>
+<tr><td>会 sleep / 持锁久</td><td>mutex</td></tr>
+<tr><td>读极多写极少</td><td>RCU</td></tr>
+<tr><td>读多写少但写不能等</td><td>rwlock / seqlock</td></tr>
+<tr><td>计数 / 统计</td><td>atomic_t 或 percpu</td></tr>
+<tr><td>等待事件</td><td>wait_queue + completion</td></tr>
+<tr><td>多个锁需要按序</td><td>mutex_lock_nested + lockdep</td></tr>
+</table>
+
+<h2 id="lockdep">9.11 lockdep — 死锁检测器</h2>
+
+<p>CONFIG_PROVE_LOCKING 开启后，内核运行时记录每把锁的获取顺序。一旦发现<b>潜在</b>死锁（哪怕这次没真死），立刻 dmesg 大字报：</p>
+
+<pre class="code-bash">[ <span class="num">1234.5</span>] ======================================================
+[ <span class="num">1234.5</span>] WARNING: possible recursive locking detected
+[ <span class="num">1234.5</span>] ...
+[ <span class="num">1234.5</span>] Possible unsafe locking scenario:
+[ <span class="num">1234.5</span>]   CPU0
+[ <span class="num">1234.5</span>]   ----
+[ <span class="num">1234.5</span>]   lock(&inode->i_mutex);
+[ <span class="num">1234.5</span>]   lock(&inode->i_mutex);
+[ <span class="num">1234.5</span>] *** DEADLOCK ***
+</pre>
+
+
+<footer class="page-footer">
+    <p>← <a href="../08-网络子系统/index.html">上一章</a> · <a href="../index.html">总目录</a> · <a href="../10-CFS调度器/index.html">下一章 →</a></p>
+</footer>
+
+</main>
+</div>
+</body>
+</html>

From c1841dcf81c693289508198d64b1ee8d09c9d6cd Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 20 May 2026 09:56:48 +0000
Subject: [PATCH 05/10] feat: add 6 expert chapters (10-15), update README to
 reference HTML site

Agent-Logs-Url: https://github.com/YYCB/how_to_learn_linux/sessions/1cb19491-1fd8-4418-9a44-f972d8161633

Co-authored-by: YYCB <23326150+YYCB@users.noreply.github.com>
---
 .../index.html"                               | 246 +++++++++++++++
 .../index.html"                               | 264 ++++++++++++++++
 .../index.html"                               | 245 +++++++++++++++
 .../index.html"                               | 204 +++++++++++++
 .../index.html"                               | 259 ++++++++++++++++
 .../index.html"                               | 289 ++++++++++++++++++
 README.md                                     |  53 +++-
 7 files changed, 1549 insertions(+), 11 deletions(-)
 create mode 100644 "10-CFS\350\260\203\345\272\246\345\231\250/index.html"
 create mode 100644 "11-\345\256\271\345\231\250\344\270\216\345\221\275\345\220\215\347\251\272\351\227\264/index.html"
 create mode 100644 "12-eBPF\344\270\216\345\217\257\350\247\202\346\265\213\346\200\247/index.html"
 create mode 100644 "13-\344\270\255\346\226\255\344\270\216\345\274\202\345\270\270/index.html"
 create mode 100644 "14-\345\220\257\345\212\250\346\265\201\347\250\213\346\267\261\345\205\245/index.html"
 create mode 100644 "15-\345\206\205\346\240\270\350\260\203\350\257\225\344\270\216\346\200\247\350\203\275/index.html"

diff --git "a/10-CFS\350\260\203\345\272\246\345\231\250/index.html" "b/10-CFS\350\260\203\345\272\246\345\231\250/index.html"
new file mode 100644
index 0000000..d52c4a4
--- /dev/null
+++ "b/10-CFS\350\260\203\345\272\246\345\231\250/index.html"
@@ -0,0 +1,246 @@
+<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>10 · CFS 完全公平调度器深入 — Linux 内核学习指南</title>
+<link rel="stylesheet" href="../assets/style.css">
+</head>
+<body>
+<div class="layout">
+<aside class="sidebar">
+    <div class="brand">🐧 Linux 内核学习</div>
+    <div class="brand-sub">从 0.11 到 6.x · 专家级路径</div>
+    <nav>
+        <div class="section-title">入门 & 准备</div>
+        <ul>
+            <li><a href="../index.html">🏠 总目录</a></li>
+            <li><a href="../00-学习路线/index.html">00 · 学习路线</a></li>
+            <li><a href="../01-经典版本选择/index.html">01 · 经典版本选择</a></li>
+            <li><a href="../02-环境搭建/index.html">02 · 环境搭建</a></li>
+        </ul>
+        <div class="section-title">核心子系统</div>
+        <ul>
+            <li><a href="../03-进程管理/index.html">03 · 进程管理</a></li>
+            <li><a href="../04-内存管理/index.html">04 · 内存管理</a></li>
+            <li><a href="../05-文件系统/index.html">05 · 文件系统</a></li>
+            <li><a href="../06-系统调用/index.html">06 · 系统调用</a></li>
+            <li><a href="../07-设备驱动/index.html">07 · 设备驱动</a></li>
+            <li><a href="../08-网络子系统/index.html">08 · 网络子系统</a></li>
+            <li><a href="../09-同步机制/index.html">09 · 同步机制</a></li>
+        </ul>
+        <div class="section-title">专家级深入</div>
+        <ul>
+            <li><a href="../10-CFS调度器/index.html" class="active">10 · CFS 调度器</a></li>
+            <li><a href="../11-容器与命名空间/index.html">11 · 容器与命名空间</a></li>
+            <li><a href="../12-eBPF与可观测性/index.html">12 · eBPF 与可观测性</a></li>
+            <li><a href="../13-中断与异常/index.html">13 · 中断与异常</a></li>
+            <li><a href="../14-启动流程深入/index.html">14 · 启动流程深入</a></li>
+            <li><a href="../15-内核调试与性能/index.html">15 · 内核调试与性能</a></li>
+        </ul>
+    </nav>
+</aside>
+
+<main class="content">
+
+<h1><span class="chapter-num">10</span>CFS 完全公平调度器深入</h1>
+
+
+<p>CFS (Completely Fair Scheduler) 由 Ingo Molnar 在 2.6.23 引入，<b>取代了 O(1) 调度器</b>，至今仍是 Linux 默认调度器。本章逐层揭开它的"公平"如何实现。</p>
+
+<h2 id="idea">10.1 核心思想 — 公平 = 等比例 CPU 时间</h2>
+
+<div class="callout deep">
+<div class="label">CFS 一句话原理</div>
+<p>设想一个"理想的多任务 CPU"，能<b>同时</b>运行所有进程，每个进程得到 <code>1/n</code> 的 CPU。
+CFS 用 <b>vruntime</b> 模拟这个理想：<b>每个进程的 vruntime 应该完全相等</b>，
+谁的 vruntime 最小，就调度谁。</p>
+</div>
+
+<h2 id="vruntime">10.2 vruntime 是什么</h2>
+
+<div class="diagram">
+<img src="../assets/diagrams/cfs-rbtree.svg" alt="CFS 红黑树">
+<div class="caption">CFS 红黑树按 vruntime 排序，每次取最左节点</div>
+</div>
+
+<pre class="code-c"><span class="cm">/* kernel/sched/fair.c */</span>
+<span class="kw">struct</span> sched_entity {
+    <span class="kw">struct</span> load_weight load;       <span class="cm">// 由 nice 值决定</span>
+    <span class="kw">struct</span> rb_node run_node;        <span class="cm">// 红黑树节点</span>
+    <span class="kw">u64</span> exec_start;                <span class="cm">// 上次开始运行的时刻</span>
+    <span class="kw">u64</span> sum_exec_runtime;          <span class="cm">// 累计实际运行时间</span>
+    <span class="kw">u64</span> vruntime;                  <span class="cm">// ★ 虚拟运行时间</span>
+    <span class="kw">u64</span> prev_sum_exec_runtime;
+    <span class="kw">struct</span> cfs_rq *cfs_rq;
+};
+
+<span class="cm">/* 每次 tick (1ms) 更新 vruntime */</span>
+<span class="kw">static void</span> <span class="fn">update_curr</span>(<span class="kw">struct</span> cfs_rq *cfs_rq) {
+    <span class="kw">struct</span> sched_entity *curr = cfs_rq->curr;
+    <span class="kw">u64</span> now = rq_clock_task(rq_of(cfs_rq));
+    <span class="kw">u64</span> delta_exec = now - curr->exec_start;
+    
+    curr->exec_start = now;
+    curr->sum_exec_runtime += delta_exec;
+    
+    <span class="cm">// 关键公式!</span>
+    curr->vruntime += <span class="fn">calc_delta_fair</span>(delta_exec, curr);
+    update_min_vruntime(cfs_rq);
+}
+
+<span class="cm">/* calc_delta_fair: 实际时间 → 虚拟时间 */</span>
+<span class="kw">static inline u64</span> <span class="fn">calc_delta_fair</span>(<span class="kw">u64</span> delta, <span class="kw">struct</span> sched_entity *se) {
+    <span class="kw">if</span> (unlikely(se->load.weight != NICE_0_LOAD))
+        delta = __calc_delta(delta, NICE_0_LOAD, &se->load);
+    <span class="kw">return</span> delta;
+}
+</pre>
+
+<div class="callout tip">
+<div class="label">公式直观理解</div>
+<pre><b>delta_vruntime = delta_real × (NICE_0_LOAD / weight)</b></pre>
+<ul>
+<li>nice = 0 → weight = 1024 (NICE_0_LOAD)，<b>实时间 = 虚时间</b></li>
+<li>nice = -5 → weight = 3121，虚时间增长慢 → 多被选中 (高优先级)</li>
+<li>nice = +5 → weight = 335，虚时间增长快 → 少被选中 (低优先级)</li>
+</ul>
+<p>nice 每差 1，CPU 份额比例约为 <b>1.25:1</b>（标准定义）。</p>
+</div>
+
+<h2 id="rbtree">10.3 红黑树 — 为什么用它</h2>
+
+<table>
+<tr><th>数据结构</th><th>插入</th><th>删除</th><th>取最小</th><th>结论</th></tr>
+<tr><td>链表</td><td>O(1) 或 O(n)</td><td>O(1) 或 O(n)</td><td>O(n)</td><td>太慢</td></tr>
+<tr><td>最小堆</td><td>O(log n)</td><td>O(log n)</td><td>O(1)</td><td>但更新 vruntime 不便</td></tr>
+<tr><td><b>红黑树</b></td><td><b>O(log n)</b></td><td><b>O(log n)</b></td><td><b>O(1) (缓存 leftmost)</b></td><td><b>赢家</b></td></tr>
+</table>
+
+<pre class="code-c"><span class="cm">/* CFS 每个 CPU 一个 cfs_rq */</span>
+<span class="kw">struct</span> cfs_rq {
+    <span class="kw">struct</span> load_weight load;
+    <span class="kw">unsigned int</span> nr_running;
+    <span class="kw">u64</span> min_vruntime;             <span class="cm">// 队列里最小 vruntime, 用于新进程初始化</span>
+    <span class="kw">struct</span> rb_root_cached tasks_timeline;  <span class="cm">// 红黑树 + leftmost 缓存</span>
+    <span class="kw">struct</span> sched_entity *curr;
+};
+
+<span class="cm">/* pick_next_task_fair — 选下一个 */</span>
+<span class="kw">struct</span> task_struct *<span class="fn">pick_next_task_fair</span>(<span class="kw">struct</span> rq *rq, ...) {
+    <span class="kw">struct</span> cfs_rq *cfs_rq = &rq->cfs;
+    <span class="kw">struct</span> sched_entity *se;
+    
+    <span class="cm">// 取红黑树最左节点 — O(1)!</span>
+    se = pick_next_entity(cfs_rq, NULL);
+    <span class="kw">return</span> task_of(se);
+}
+</pre>
+
+<h2 id="sched_class">10.4 调度类 — CFS 不是全部</h2>
+
+<div class="diagram">
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 800 240" font-family="-apple-system,sans-serif" font-size="11">
+<text x="400" y="22" text-anchor="middle" font-size="15" font-weight="700" fill="#ff7b29">5 个调度类按优先级排序</text>
+<g><rect x="40" y="50" width="140" height="80" rx="6" fill="#1a2028" stroke="#f85149" stroke-width="2"/>
+<text x="110" y="74" text-anchor="middle" fill="#f85149" font-weight="700">stop_sched_class</text>
+<text x="110" y="92" text-anchor="middle" fill="#8b949e" font-size="10">最高优先级</text>
+<text x="110" y="108" text-anchor="middle" fill="#8b949e" font-size="10">migration/hot-unplug</text></g>
+
+<g><rect x="200" y="50" width="140" height="80" rx="6" fill="#1a2028" stroke="#ff7b29" stroke-width="2"/>
+<text x="270" y="74" text-anchor="middle" fill="#ff7b29" font-weight="700">dl_sched_class</text>
+<text x="270" y="92" text-anchor="middle" fill="#8b949e" font-size="10">Deadline (SCHED_DEADLINE)</text>
+<text x="270" y="108" text-anchor="middle" fill="#8b949e" font-size="10">EDF 算法</text></g>
+
+<g><rect x="360" y="50" width="140" height="80" rx="6" fill="#1a2028" stroke="#e3b341" stroke-width="2"/>
+<text x="430" y="74" text-anchor="middle" fill="#e3b341" font-weight="700">rt_sched_class</text>
+<text x="430" y="92" text-anchor="middle" fill="#8b949e" font-size="10">实时 (FIFO/RR)</text>
+<text x="430" y="108" text-anchor="middle" fill="#8b949e" font-size="10">prio 0~99</text></g>
+
+<g><rect x="520" y="50" width="140" height="80" rx="6" fill="#1a2028" stroke="#56d364" stroke-width="3"/>
+<text x="590" y="74" text-anchor="middle" fill="#56d364" font-weight="700">fair_sched_class (CFS)</text>
+<text x="590" y="92" text-anchor="middle" fill="#8b949e" font-size="10">★ 默认</text>
+<text x="590" y="108" text-anchor="middle" fill="#8b949e" font-size="10">99% 任务用这个</text></g>
+
+<g><rect x="680" y="50" width="100" height="80" rx="6" fill="#1a2028" stroke="#8b949e" stroke-width="2"/>
+<text x="730" y="74" text-anchor="middle" fill="#8b949e" font-weight="700">idle_sched_class</text>
+<text x="730" y="92" text-anchor="middle" fill="#8b949e" font-size="10">最低</text>
+<text x="730" y="108" text-anchor="middle" fill="#8b949e" font-size="10">idle 进程</text></g>
+
+<text x="400" y="180" text-anchor="middle" fill="#8b949e" font-size="11">pick_next_task 依次问每个类: 有任务给我吗?</text>
+<text x="400" y="200" text-anchor="middle" fill="#8b949e" font-size="11">stop → dl → rt → cfs → idle (绝对优先级)</text>
+</svg>
+</div>
+
+<h2 id="loadbalance">10.5 负载均衡</h2>
+
+<p>SMP 系统每核一个 runqueue，CFS 周期性把"忙核"的任务迁到"闲核"：</p>
+
+<pre class="code-bash"><span class="cm"># 调度域层级 (NUMA + SMT 系统)</span>
+sys/kernel/debug/sched/domains/
+└── cpu<span class="num">0</span>/
+    ├── domain0/           <span class="cm"># SMT (兄弟核) — 几乎免费迁</span>
+    │   ├── name: SMT
+    │   └── flags: SD_SHARE_CPUCAPACITY ...
+    ├── domain1/           <span class="cm"># MC (同 LLC/L3) — 便宜</span>
+    │   ├── name: MC
+    │   └── flags: SD_SHARE_PKG_RESOURCES ...
+    └── domain2/           <span class="cm"># NUMA — 昂贵, 慎迁</span>
+        ├── name: NUMA
+        └── flags: SD_NUMA ...
+</pre>
+
+<p>每个 tick 检查是否需要均衡，按调度域<b>从小到大</b>逐层尝试。同一 socket 内的迁移几乎免费（L3 命中），跨 NUMA 迁移成本极高。</p>
+
+<h2 id="cgroup">10.6 CFS + cgroup 层次调度</h2>
+
+<p>容器场景下，<b>每个 cgroup 是一个 sched_entity</b>，它内部又有自己的 cfs_rq 包含子任务/子 cgroup。
+"公平"于是变成"<b>cgroup 间公平 → cgroup 内任务间公平</b>"的两层结构。</p>
+
+<pre class="code-bash"><span class="cm"># cgroup v2 配置 cpu.weight (默认 100, 范围 1~10000)</span>
+echo <span class="num">200</span> > /sys/fs/cgroup/my_app/cpu.weight   <span class="cm"># 翻倍权重</span>
+echo <span class="num">50</span>  > /sys/fs/cgroup/junk/cpu.weight    <span class="cm"># 半倍权重</span>
+
+<span class="cm"># cpu.max — 硬上限 (类似 nice -20 但更可控)</span>
+echo <span class="str">"50000 100000"</span> > /sys/fs/cgroup/my_app/cpu.max  <span class="cm"># 50%</span>
+</pre>
+
+<h2 id="eas">10.7 EAS — 能效感知调度 (移动场景)</h2>
+
+<p>big.LITTLE / DynamIQ ARM 芯片有"大核"和"小核"，性能-功耗特性不同。
+<b>EAS (Energy Aware Scheduling)</b> 在 5.x 内核引入，调度时计算每核的<b>能效模型</b>，把任务放在能量最低且满足性能的核上：</p>
+
+<pre class="code-bash">cat /sys/devices/system/cpu/cpu0/cpu_capacity     <span class="cm"># 大核可能 1024</span>
+cat /sys/devices/system/cpu/cpu4/cpu_capacity     <span class="cm"># 小核可能 446</span>
+
+cat /sys/kernel/debug/sched/features              <span class="cm"># 看 EAS 是否开启</span>
+</pre>
+
+<h2 id="debug">10.8 CFS 调试与可视化</h2>
+
+<pre class="code-bash"><span class="cm"># 实时查看每个 cfs_rq 状态</span>
+cat /proc/sched_debug
+
+<span class="cm"># 用 ftrace 看调度事件</span>
+echo function_graph > /sys/kernel/debug/tracing/current_tracer
+echo pick_next_task_fair > /sys/kernel/debug/tracing/set_graph_function
+cat /sys/kernel/debug/tracing/trace_pipe
+
+<span class="cm"># 用 perf 看上下文切换</span>
+perf sched record sleep <span class="num">5</span>
+perf sched latency
+perf sched map        <span class="cm"># 可视化每核任务时间线</span>
+
+<span class="cm"># BPF 工具</span>
+bpftrace -e <span class="str">'tracepoint:sched:sched_switch { @[prev_comm, next_comm] = count(); }'</span>
+</pre>
+
+
+<footer class="page-footer">
+    <p>← <a href="../09-同步机制/index.html">上一章</a> · <a href="../index.html">总目录</a> · <a href="../11-容器与命名空间/index.html">下一章 →</a></p>
+</footer>
+
+</main>
+</div>
+</body>
+</html>
diff --git "a/11-\345\256\271\345\231\250\344\270\216\345\221\275\345\220\215\347\251\272\351\227\264/index.html" "b/11-\345\256\271\345\231\250\344\270\216\345\221\275\345\220\215\347\251\272\351\227\264/index.html"
new file mode 100644
index 0000000..0fb6d43
--- /dev/null
+++ "b/11-\345\256\271\345\231\250\344\270\216\345\221\275\345\220\215\347\251\272\351\227\264/index.html"
@@ -0,0 +1,264 @@
+<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>11 · 容器内核机制 — Namespaces、Cgroups、OverlayFS — Linux 内核学习指南</title>
+<link rel="stylesheet" href="../assets/style.css">
+</head>
+<body>
+<div class="layout">
+<aside class="sidebar">
+    <div class="brand">🐧 Linux 内核学习</div>
+    <div class="brand-sub">从 0.11 到 6.x · 专家级路径</div>
+    <nav>
+        <div class="section-title">入门 & 准备</div>
+        <ul>
+            <li><a href="../index.html">🏠 总目录</a></li>
+            <li><a href="../00-学习路线/index.html">00 · 学习路线</a></li>
+            <li><a href="../01-经典版本选择/index.html">01 · 经典版本选择</a></li>
+            <li><a href="../02-环境搭建/index.html">02 · 环境搭建</a></li>
+        </ul>
+        <div class="section-title">核心子系统</div>
+        <ul>
+            <li><a href="../03-进程管理/index.html">03 · 进程管理</a></li>
+            <li><a href="../04-内存管理/index.html">04 · 内存管理</a></li>
+            <li><a href="../05-文件系统/index.html">05 · 文件系统</a></li>
+            <li><a href="../06-系统调用/index.html">06 · 系统调用</a></li>
+            <li><a href="../07-设备驱动/index.html">07 · 设备驱动</a></li>
+            <li><a href="../08-网络子系统/index.html">08 · 网络子系统</a></li>
+            <li><a href="../09-同步机制/index.html">09 · 同步机制</a></li>
+        </ul>
+        <div class="section-title">专家级深入</div>
+        <ul>
+            <li><a href="../10-CFS调度器/index.html">10 · CFS 调度器</a></li>
+            <li><a href="../11-容器与命名空间/index.html" class="active">11 · 容器与命名空间</a></li>
+            <li><a href="../12-eBPF与可观测性/index.html">12 · eBPF 与可观测性</a></li>
+            <li><a href="../13-中断与异常/index.html">13 · 中断与异常</a></li>
+            <li><a href="../14-启动流程深入/index.html">14 · 启动流程深入</a></li>
+            <li><a href="../15-内核调试与性能/index.html">15 · 内核调试与性能</a></li>
+        </ul>
+    </nav>
+</aside>
+
+<main class="content">
+
+<h1><span class="chapter-num">11</span>容器内核机制 — Namespaces、Cgroups、OverlayFS</h1>
+
+
+<p>Docker / Kubernetes 是包装，<b>容器的真正实现完全在 Linux 内核里</b>：8 种命名空间 + cgroups + 联合文件系统。本章揭开"容器"魔术。</p>
+
+<h2 id="anatomy">11.1 容器解剖图</h2>
+<div class="diagram">
+<img src="../assets/diagrams/container-anatomy.svg" alt="容器结构">
+<div class="caption">容器 = Namespaces (隔离视图) + Cgroups (限制资源) + UnionFS (分层镜像)</div>
+</div>
+
+<h2 id="ns">11.2 8 种命名空间</h2>
+
+<table>
+<tr><th>类型</th><th>引入</th><th>隔离什么</th><th>验证</th></tr>
+<tr><td>Mount (mnt)</td><td>2.4.19</td><td>挂载点视图</td><td><code>cat /proc/self/mounts</code></td></tr>
+<tr><td>UTS</td><td>2.6.19</td><td>hostname / domainname</td><td><code>hostname</code></td></tr>
+<tr><td>IPC</td><td>2.6.19</td><td>SysV IPC / POSIX 消息队列</td><td><code>ipcs</code></td></tr>
+<tr><td>PID</td><td>2.6.24</td><td>进程编号空间</td><td>容器内 init = pid 1</td></tr>
+<tr><td>Network (net)</td><td>2.6.29</td><td>网卡/路由/防火墙/socket</td><td><code>ip link</code></td></tr>
+<tr><td>User</td><td>3.8</td><td>UID/GID 映射</td><td><code>id</code> 在容器内 root 实际是宿主 uid 100000</td></tr>
+<tr><td>Cgroup</td><td>4.6</td><td>cgroup 层级视图</td><td><code>cat /proc/self/cgroup</code></td></tr>
+<tr><td>Time</td><td>5.6</td><td>CLOCK_MONOTONIC 偏移</td><td><code>uptime</code></td></tr>
+</table>
+
+<h2 id="clone">11.3 创建命名空间的系统调用</h2>
+
+<pre class="code-c"><span class="cm">/* 1. clone() 创建新进程时同时创建 ns */</span>
+clone(child_fn, stack, 
+      CLONE_NEWPID  |     <span class="cm">// 新 PID ns</span>
+      CLONE_NEWNET  |     <span class="cm">// 新 net ns</span>
+      CLONE_NEWNS   |     <span class="cm">// 新 mount ns</span>
+      CLONE_NEWUTS  |     <span class="cm">// 新 UTS ns</span>
+      CLONE_NEWIPC  |     <span class="cm">// 新 IPC ns</span>
+      CLONE_NEWUSER |     <span class="cm">// 新 user ns</span>
+      SIGCHLD, arg);
+
+<span class="cm">/* 2. unshare() 把当前进程切到新 ns */</span>
+unshare(CLONE_NEWNET);   <span class="cm">// 当前进程从此在独立 net ns</span>
+
+<span class="cm">/* 3. setns() 加入已有 ns (Docker exec 用) */</span>
+<span class="kw">int</span> fd = open(<span class="str">"/proc/1234/ns/net"</span>, O_RDONLY);
+setns(fd, CLONE_NEWNET);
+</pre>
+
+<pre class="code-bash"><span class="cm"># 命令行版本</span>
+unshare --pid --fork --mount-proc /bin/bash    <span class="cm"># 新 PID ns 的 shell</span>
+nsenter -t <span class="num">1234</span> -n ip addr                    <span class="cm"># 进入进程 1234 的 net ns</span>
+
+<span class="cm"># 实际看看自己的所有 ns</span>
+ls -l /proc/self/ns/
+<span class="cm"># lrwxrwxrwx 1 root root 0 cgroup -> 'cgroup:[4026531835]'</span>
+<span class="cm"># lrwxrwxrwx 1 root root 0 ipc    -> 'ipc:[4026531839]'</span>
+<span class="cm"># lrwxrwxrwx 1 root root 0 mnt    -> 'mnt:[4026531840]'</span>
+<span class="cm"># lrwxrwxrwx 1 root root 0 net    -> 'net:[4026531992]'</span>
+<span class="cm"># lrwxrwxrwx 1 root root 0 pid    -> 'pid:[4026531836]'</span>
+<span class="cm"># lrwxrwxrwx 1 root root 0 user   -> 'user:[4026531837]'</span>
+<span class="cm"># lrwxrwxrwx 1 root root 0 uts    -> 'uts:[4026531838]'</span>
+</pre>
+
+<h2 id="pid_ns">11.4 PID 命名空间深入</h2>
+
+<div class="callout deep">
+<div class="label">同一个进程，两个 PID</div>
+<p>容器内 nginx 显示 <code>pid = 1</code>，但宿主机上 <code>ps</code> 看到的可能是 <code>pid = 12345</code>。
+<b>这不是 bug，是 PID ns 设计</b>：每个 PID ns 内进程从 1 开始编号。</p>
+</div>
+
+<pre class="code-c"><span class="kw">struct</span> task_struct {
+    <span class="cm">/* ... */</span>
+    <span class="kw">struct</span> pid *thread_pid;     <span class="cm">// pid 结构</span>
+};
+
+<span class="kw">struct</span> pid {
+    refcount_t count;
+    <span class="kw">unsigned int</span> level;        <span class="cm">// 嵌套层数 (容器内套容器)</span>
+    <span class="kw">struct</span> upid numbers[<span class="num">1</span>];     <span class="cm">// 每层一个 upid</span>
+};
+<span class="kw">struct</span> upid {
+    <span class="kw">int</span> nr;                   <span class="cm">// 在该 ns 内的 pid</span>
+    <span class="kw">struct</span> pid_namespace *ns; <span class="cm">// 所属 ns</span>
+};
+<span class="cm">// → 同一个进程在不同 ns 看到不同 pid</span>
+</pre>
+
+<h2 id="net_ns">11.5 网络命名空间与 veth pair</h2>
+
+<p>每个 net ns 拥有<b>独立的整套网络栈</b>：网卡、路由表、iptables 规则、socket。容器内"看到的"网卡完全独立。</p>
+
+<pre class="code-bash"><span class="cm"># 手工搭建容器网络 (Docker 内部做的事)</span>
+ip netns add c1                              <span class="cm"># 新 net ns</span>
+
+<span class="cm"># 创建 veth pair (像虚拟网线两端)</span>
+ip link add veth0 type veth peer name veth1
+
+<span class="cm"># 把 veth1 塞进 c1 ns</span>
+ip link set veth1 netns c1
+
+<span class="cm"># 配置两端 IP</span>
+ip addr add <span class="num">10.0.0.1/24</span> dev veth0
+ip link set veth0 up
+ip netns exec c1 ip addr add <span class="num">10.0.0.2/24</span> dev veth1
+ip netns exec c1 ip link set veth1 up
+
+<span class="cm"># 测试连通</span>
+ping <span class="num">10.0.0.2</span>           <span class="cm"># 从宿主</span>
+ip netns exec c1 ping <span class="num">10.0.0.1</span>   <span class="cm"># 从容器</span>
+
+<span class="cm"># 接入网桥让多个容器互通 (docker0)</span>
+ip link add br0 type bridge
+ip link set veth0 master br0
+ip link set br0 up
+</pre>
+
+<h2 id="cgroup">11.6 cgroups v2 详解</h2>
+
+<p>cgroups 控制<b>多少</b>资源（v1 旧、v2 新统一）。v2 单一层级树，每个 cgroup 是一个目录：</p>
+
+<pre class="code-bash"><span class="cm"># cgroup v2 挂载点</span>
+mount | grep cgroup2
+<span class="cm"># cgroup2 on /sys/fs/cgroup type cgroup2 (rw,nosuid,nodev,noexec,relatime,nsdelegate)</span>
+
+<span class="cm"># 创建一个 cgroup</span>
+mkdir /sys/fs/cgroup/my_app
+
+<span class="cm"># 启用控制器</span>
+echo <span class="str">"+cpu +memory +io +pids"</span> > /sys/fs/cgroup/cgroup.subtree_control
+
+<span class="cm"># 加进程进来</span>
+echo $$ > /sys/fs/cgroup/my_app/cgroup.procs
+
+<span class="cm"># 各项限制</span>
+echo <span class="str">"200000 1000000"</span> > /sys/fs/cgroup/my_app/cpu.max     <span class="cm"># 20% CPU</span>
+echo <span class="num">536870912</span>         > /sys/fs/cgroup/my_app/memory.max  <span class="cm"># 512MB</span>
+echo <span class="num">100</span>               > /sys/fs/cgroup/my_app/pids.max
+echo <span class="str">"8:0 rbps=1048576"</span> > /sys/fs/cgroup/my_app/io.max     <span class="cm"># 1MB/s</span>
+
+<span class="cm"># 当前用量</span>
+cat /sys/fs/cgroup/my_app/memory.current
+cat /sys/fs/cgroup/my_app/cpu.stat
+</pre>
+
+<h2 id="overlay">11.7 OverlayFS — 镜像分层的魔法</h2>
+
+<pre class="code-bash"><span class="cm"># 三个目录</span>
+mkdir /tmp/{lower,upper,work,merged}
+echo <span class="str">"base"</span>    > /tmp/lower/file1
+echo <span class="str">"changed"</span> > /tmp/upper/file1   <span class="cm"># 同名覆盖</span>
+echo <span class="str">"only"</span>    > /tmp/upper/file2
+
+<span class="cm"># 挂载 overlay</span>
+mount -t overlay overlay \\
+    -o lowerdir=/tmp/lower,upperdir=/tmp/upper,workdir=/tmp/work \\
+    /tmp/merged
+
+ls /tmp/merged
+<span class="cm"># file1 file2</span>
+cat /tmp/merged/file1
+<span class="cm"># changed   ← upper 覆盖了 lower</span>
+
+<span class="cm"># 在 merged 里写 → 实际写到 upper (Copy-on-Write)</span>
+echo <span class="str">"new"</span> >> /tmp/merged/file1
+cat /tmp/upper/file1    <span class="cm"># changed\nnew</span>
+cat /tmp/lower/file1    <span class="cm"># base (没动!)</span>
+</pre>
+
+<p>Docker 镜像每层就是一个 lowerdir，最顶上的容器读写层是 upperdir。删除文件用 "whiteout" 特殊文件标记。</p>
+
+<h2 id="runc">11.8 一个 75 行的 "mini docker"</h2>
+
+<pre class="code-c"><span class="cm">/* mini_container.c — 演示容器原理的最小实现 */</span>
+<span class="kw">#define</span> _GNU_SOURCE
+<span class="kw">#include</span> &lt;sched.h&gt; <span class="kw">#include</span> &lt;sys/wait.h&gt; <span class="kw">#include</span> &lt;sys/mount.h&gt;
+<span class="kw">#include</span> &lt;sys/utsname.h&gt; <span class="kw">#include</span> &lt;stdio.h&gt; <span class="kw">#include</span> &lt;unistd.h&gt;
+
+<span class="kw">static char</span> stack[<span class="num">1024</span>*<span class="num">1024</span>];
+
+<span class="kw">int</span> <span class="fn">child_main</span>(<span class="kw">void</span> *arg) {
+    <span class="cm">// 1. 改 hostname</span>
+    sethostname(<span class="str">"in-container"</span>, <span class="num">12</span>);
+
+    <span class="cm">// 2. 切根目录 (类似 chroot)</span>
+    chdir(<span class="str">"/rootfs"</span>);
+    chroot(<span class="str">"."</span>);
+
+    <span class="cm">// 3. 挂载新 proc (PID ns 需要)</span>
+    mount(<span class="str">"proc"</span>, <span class="str">"/proc"</span>, <span class="str">"proc"</span>, <span class="num">0</span>, NULL);
+
+    <span class="cm">// 4. 跑 shell</span>
+    <span class="kw">char</span> *argv[] = { <span class="str">"/bin/sh"</span>, NULL };
+    execv(<span class="str">"/bin/sh"</span>, argv);
+    <span class="kw">return</span> <span class="num">1</span>;
+}
+
+<span class="kw">int</span> <span class="fn">main</span>() {
+    <span class="kw">int</span> flags = CLONE_NEWPID | CLONE_NEWNS | CLONE_NEWUTS
+              | CLONE_NEWIPC | CLONE_NEWNET | SIGCHLD;
+    <span class="kw">int</span> pid = clone(child_main, stack + <span class="kw">sizeof</span>(stack), flags, NULL);
+    waitpid(pid, NULL, <span class="num">0</span>);
+    <span class="kw">return</span> <span class="num">0</span>;
+}
+<span class="cm">/* 编译: gcc -o mini mini_container.c</span>
+<span class="cm">   运行 (需 root): sudo ./mini                                 */</span>
+</pre>
+
+<div class="callout tip">
+<div class="label">看懂这个，你就懂了 Docker</div>
+<p>runc (Docker 底层运行时) 比这个复杂 100 倍，但<b>核心机制就是上面 4 步</b>：clone 多个 NS → chroot → 挂载 proc → exec。剩下的是镜像分层 (OverlayFS)、网络配置 (veth)、资源限制 (cgroups)。</p>
+</div>
+
+
+<footer class="page-footer">
+    <p>← <a href="../10-CFS调度器/index.html">上一章</a> · <a href="../index.html">总目录</a> · <a href="../12-eBPF与可观测性/index.html">下一章 →</a></p>
+</footer>
+
+</main>
+</div>
+</body>
+</html>
diff --git "a/12-eBPF\344\270\216\345\217\257\350\247\202\346\265\213\346\200\247/index.html" "b/12-eBPF\344\270\216\345\217\257\350\247\202\346\265\213\346\200\247/index.html"
new file mode 100644
index 0000000..6f96a88
--- /dev/null
+++ "b/12-eBPF\344\270\216\345\217\257\350\247\202\346\265\213\346\200\247/index.html"
@@ -0,0 +1,245 @@
+<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>12 · eBPF — 内核可编程性革命 — Linux 内核学习指南</title>
+<link rel="stylesheet" href="../assets/style.css">
+</head>
+<body>
+<div class="layout">
+<aside class="sidebar">
+    <div class="brand">🐧 Linux 内核学习</div>
+    <div class="brand-sub">从 0.11 到 6.x · 专家级路径</div>
+    <nav>
+        <div class="section-title">入门 & 准备</div>
+        <ul>
+            <li><a href="../index.html">🏠 总目录</a></li>
+            <li><a href="../00-学习路线/index.html">00 · 学习路线</a></li>
+            <li><a href="../01-经典版本选择/index.html">01 · 经典版本选择</a></li>
+            <li><a href="../02-环境搭建/index.html">02 · 环境搭建</a></li>
+        </ul>
+        <div class="section-title">核心子系统</div>
+        <ul>
+            <li><a href="../03-进程管理/index.html">03 · 进程管理</a></li>
+            <li><a href="../04-内存管理/index.html">04 · 内存管理</a></li>
+            <li><a href="../05-文件系统/index.html">05 · 文件系统</a></li>
+            <li><a href="../06-系统调用/index.html">06 · 系统调用</a></li>
+            <li><a href="../07-设备驱动/index.html">07 · 设备驱动</a></li>
+            <li><a href="../08-网络子系统/index.html">08 · 网络子系统</a></li>
+            <li><a href="../09-同步机制/index.html">09 · 同步机制</a></li>
+        </ul>
+        <div class="section-title">专家级深入</div>
+        <ul>
+            <li><a href="../10-CFS调度器/index.html">10 · CFS 调度器</a></li>
+            <li><a href="../11-容器与命名空间/index.html">11 · 容器与命名空间</a></li>
+            <li><a href="../12-eBPF与可观测性/index.html" class="active">12 · eBPF 与可观测性</a></li>
+            <li><a href="../13-中断与异常/index.html">13 · 中断与异常</a></li>
+            <li><a href="../14-启动流程深入/index.html">14 · 启动流程深入</a></li>
+            <li><a href="../15-内核调试与性能/index.html">15 · 内核调试与性能</a></li>
+        </ul>
+    </nav>
+</aside>
+
+<main class="content">
+
+<h1><span class="chapter-num">12</span>eBPF — 内核可编程性革命</h1>
+
+
+<p>eBPF (extended Berkeley Packet Filter) 是过去十年 Linux 内核<b>最重大的创新</b>。它把内核从"封闭的固件"变成<b>可在运行时动态扩展的执行引擎</b>。</p>
+
+<h2 id="what">12.1 eBPF 是什么</h2>
+
+<div class="callout deep">
+<div class="label">一句话理解</div>
+<p>eBPF 让你<b>把一段 C 代码编译成字节码，注入到内核里的某些"挂载点"运行</b>，不需要修改内核源码，不需要装内核模块。
+代码受<b>严格验证 (Verifier)</b> 不会让内核崩溃。</p>
+</div>
+
+<h2 id="arch">12.2 整体架构</h2>
+
+<div class="diagram">
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 800 400" font-family="-apple-system,sans-serif" font-size="11">
+<defs><marker id="ar12" viewBox="0 0 10 10" refX="9" refY="5" markerWidth="6" markerHeight="6" orient="auto"><path d="M0,0 L10,5 L0,10 z" fill="#8b949e"/></marker></defs>
+<text x="400" y="22" text-anchor="middle" font-size="15" font-weight="700" fill="#ff7b29">eBPF 完整工作流</text>
+
+<g transform="translate(20, 50)">
+<rect width="200" height="80" rx="6" fill="#1a2028" stroke="#58a6ff"/>
+<text x="100" y="22" text-anchor="middle" fill="#58a6ff" font-weight="700">1. 写 C 代码</span></text>
+<text x="14" y="42" font-size="10" font-family="monospace" fill="#e6edf3">SEC("kprobe/sys_open")</text>
+<text x="14" y="56" font-size="10" font-family="monospace" fill="#e6edf3">int kp_open(...) {</text>
+<text x="14" y="70" font-size="10" font-family="monospace" fill="#e6edf3">  bpf_printk(...); }</text>
+</g>
+
+<line x1="220" y1="90" x2="260" y2="90" stroke="#8b949e" marker-end="url(#ar12)"/>
+
+<g transform="translate(260, 50)">
+<rect width="200" height="80" rx="6" fill="#1a2028" stroke="#ff7b29"/>
+<text x="100" y="22" text-anchor="middle" fill="#ff7b29" font-weight="700">2. clang 编译</text>
+<text x="100" y="44" text-anchor="middle" font-size="10" fill="#8b949e">clang -target bpf -O2</text>
+<text x="100" y="64" text-anchor="middle" font-size="10" fill="#8b949e">生成 ELF + BPF 字节码</text>
+</g>
+
+<line x1="460" y1="90" x2="500" y2="90" stroke="#8b949e" marker-end="url(#ar12)"/>
+
+<g transform="translate(500, 50)">
+<rect width="240" height="80" rx="6" fill="#1a2028" stroke="#bc8cff"/>
+<text x="120" y="22" text-anchor="middle" fill="#bc8cff" font-weight="700">3. bpf() 系统调用加载</text>
+<text x="120" y="44" text-anchor="middle" font-size="10" fill="#8b949e">用户态 libbpf 调 bpf(BPF_PROG_LOAD)</text>
+<text x="120" y="64" text-anchor="middle" font-size="10" fill="#8b949e">字节码 → 内核</text>
+</g>
+
+<line x1="620" y1="130" x2="620" y2="170" stroke="#ff7b29" marker-end="url(#ar12)"/>
+
+<g transform="translate(500, 170)">
+<rect width="240" height="100" rx="6" fill="#1a2028" stroke="#f85149" stroke-width="2"/>
+<text x="120" y="22" text-anchor="middle" fill="#f85149" font-weight="700">4. Verifier 严格校验</text>
+<text x="14" y="42" font-size="10" fill="#8b949e">· 限制 1M 条指令</text>
+<text x="14" y="56" font-size="10" fill="#8b949e">· 禁止无界循环 (允许有界)</text>
+<text x="14" y="70" font-size="10" fill="#8b949e">· 所有内存访问必须证明合法</text>
+<text x="14" y="84" font-size="10" fill="#8b949e">· 不通过 → 加载失败</text>
+</g>
+
+<line x1="500" y1="220" x2="460" y2="220" stroke="#ff7b29" marker-end="url(#ar12)"/>
+
+<g transform="translate(260, 170)">
+<rect width="200" height="100" rx="6" fill="#1a2028" stroke="#56d364"/>
+<text x="100" y="22" text-anchor="middle" fill="#56d364" font-weight="700">5. JIT 编译</text>
+<text x="100" y="44" text-anchor="middle" font-size="10" fill="#8b949e">BPF 字节码</text>
+<text x="100" y="60" text-anchor="middle" font-size="10" fill="#8b949e">→ 原生 x86_64/ARM 机器码</text>
+<text x="100" y="80" text-anchor="middle" fill="#8b949e" font-size="10">速度接近原生 C</text>
+</g>
+
+<line x1="260" y1="220" x2="220" y2="220" stroke="#ff7b29" marker-end="url(#ar12)"/>
+
+<g transform="translate(20, 170)">
+<rect width="200" height="100" rx="6" fill="#1a2028" stroke="#e3b341"/>
+<text x="100" y="22" text-anchor="middle" fill="#e3b341" font-weight="700">6. attach 到挂载点</text>
+<text x="14" y="42" font-size="10" fill="#8b949e">kprobe / tracepoint /</text>
+<text x="14" y="56" font-size="10" fill="#8b949e">xdp / tc / cgroup /</text>
+<text x="14" y="70" font-size="10" fill="#8b949e">socket / uprobe ...</text>
+<text x="14" y="84" font-size="10" fill="#56d364">事件发生即触发</text>
+</g>
+
+<g transform="translate(140, 310)">
+<rect width="520" height="60" rx="6" fill="#1a2028" stroke="#bc8cff" stroke-width="1.5"/>
+<text x="260" y="22" text-anchor="middle" fill="#bc8cff" font-weight="700">BPF Maps — 用户态 ↔ 内核态数据桥梁</text>
+<text x="260" y="42" text-anchor="middle" fill="#8b949e" font-size="11">HASH / ARRAY / PERCPU / RINGBUF / LRU / LPM_TRIE ...</text>
+</g>
+</svg>
+</div>
+
+<h2 id="hooks">12.3 主要挂载点 (Program Types)</h2>
+
+<table>
+<tr><th>类型</th><th>触发时机</th><th>用途</th></tr>
+<tr><td><b>kprobe</b></td><td>任意内核函数入口/返回</td><td>动态追踪（生产可用）</td></tr>
+<tr><td><b>uprobe</b></td><td>用户态函数入口</td><td>无侵入跟踪应用</td></tr>
+<tr><td><b>tracepoint</b></td><td>预定义的静态点</td><td>稳定接口，长期可用</td></tr>
+<tr><td><b>XDP</b></td><td>网卡驱动收包最早</td><td>DDoS、L4LB（极致性能）</td></tr>
+<tr><td><b>tc</b></td><td>流量控制层</td><td>容器网络策略 (Cilium)</td></tr>
+<tr><td><b>cgroup_skb</b></td><td>cgroup 收发包</td><td>容器网络隔离</td></tr>
+<tr><td><b>perf_event</b></td><td>性能事件</td><td>采样、性能分析</td></tr>
+<tr><td><b>LSM</b></td><td>安全钩子</td><td>替代 AppArmor/SELinux 部分</td></tr>
+<tr><td><b>iter</b></td><td>遍历内核对象</td><td>自定义 /proc 类输出</td></tr>
+</table>
+
+<h2 id="example">12.4 完整例子：统计每个命令的 syscall 次数</h2>
+
+<pre class="code-c"><span class="cm">/* count_syscall.bpf.c */</span>
+<span class="kw">#include</span> &lt;vmlinux.h&gt;
+<span class="kw">#include</span> &lt;bpf/bpf_helpers.h&gt;
+<span class="kw">#include</span> &lt;bpf/bpf_tracing.h&gt;
+
+<span class="kw">struct</span> {
+    __uint(type, BPF_MAP_TYPE_HASH);
+    __uint(max_entries, <span class="num">1024</span>);
+    __type(key, <span class="kw">char</span>[TASK_COMM_LEN]);
+    __type(value, <span class="kw">u64</span>);
+} count_map SEC(<span class="str">".maps"</span>);
+
+SEC(<span class="str">"tp/raw_syscalls/sys_enter"</span>)
+<span class="kw">int</span> <span class="fn">on_syscall</span>(<span class="kw">struct</span> trace_event_raw_sys_enter *ctx) {
+    <span class="kw">char</span> comm[TASK_COMM_LEN];
+    bpf_get_current_comm(&comm, <span class="kw">sizeof</span>(comm));
+
+    <span class="kw">u64</span> *val = bpf_map_lookup_elem(&count_map, &comm);
+    <span class="kw">if</span> (val) {
+        __sync_fetch_and_add(val, <span class="num">1</span>);   <span class="cm">// 原子加</span>
+    } <span class="kw">else</span> {
+        <span class="kw">u64</span> one = <span class="num">1</span>;
+        bpf_map_update_elem(&count_map, &comm, &one, BPF_ANY);
+    }
+    <span class="kw">return</span> <span class="num">0</span>;
+}
+
+<span class="kw">char</span> LICENSE[] SEC(<span class="str">"license"</span>) = <span class="str">"GPL"</span>;
+</pre>
+
+<pre class="code-bash"><span class="cm"># 用 bpftrace 一行完成同样功能</span>
+bpftrace -e <span class="str">'tracepoint:raw_syscalls:sys_enter { @[comm] = count(); }'</span>
+
+<span class="cm"># 5 秒后 Ctrl+C 看结果</span>
+<span class="cm">@[bash]: 1234</span>
+<span class="cm">@[nginx]: 50000</span>
+<span class="cm">@[systemd]: 87</span>
+</pre>
+
+<h2 id="bpftrace">12.5 bpftrace — 内核版 awk</h2>
+
+<pre class="code-bash"><span class="cm"># 跟踪 open 系统调用</span>
+bpftrace -e <span class="str">'tracepoint:syscalls:sys_enter_openat {
+    printf("%s opened %s\n", comm, str(args->filename));
+}'</span>
+
+<span class="cm"># 统计 read 延迟分布</span>
+bpftrace -e <span class="str">'
+kprobe:vfs_read { @start[tid] = nsecs; }
+kretprobe:vfs_read /@start[tid]/ {
+    @ns = hist(nsecs - @start[tid]);
+    delete(@start[tid]);
+}'</span>
+
+<span class="cm">@ns:</span>
+<span class="cm">[1K, 2K)               12 |                                |</span>
+<span class="cm">[2K, 4K)              305 |@@@                             |</span>
+<span class="cm">[4K, 8K)             2876 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |</span>
+<span class="cm">[8K, 16K)              42 |                                |</span>
+</pre>
+
+<h2 id="bccvsce">12.6 BCC、libbpf、CO-RE</h2>
+
+<table>
+<tr><th>工具</th><th>特点</th><th>适用场景</th></tr>
+<tr><td>bpftrace</td><td>一行 DSL</td><td>临时排查</td></tr>
+<tr><td>BCC</td><td>Python + 嵌入 C</td><td>复杂工具（运行时编译，依赖 LLVM）</td></tr>
+<tr><td><b>libbpf + CO-RE</b></td><td>纯 C + 提前编译</td><td><b>生产环境（推荐）</b></td></tr>
+</table>
+
+<div class="callout deep">
+<div class="label">CO-RE (Compile Once, Run Everywhere)</div>
+<p>历史上 BCC 工具每次运行都要在目标机编译，需要内核 headers。
+<b>CO-RE</b> 通过 BTF (BPF Type Format) 信息，让 BPF 程序<b>一次编译，跑遍所有有 BTF 的内核</b>（4.18+ 普遍支持）。</p>
+</div>
+
+<h2 id="cilium">12.7 真实世界 eBPF 应用</h2>
+
+<table>
+<tr><th>项目</th><th>领域</th><th>使用 eBPF 做什么</th></tr>
+<tr><td>Cilium</td><td>K8s 网络</td><td>替代 iptables 实现 ServiceMesh，性能 10x</td></tr>
+<tr><td>Falco</td><td>容器安全</td><td>实时检测异常系统调用</td></tr>
+<tr><td>Pixie</td><td>K8s 可观测性</td><td>无侵入采集 HTTP/SQL 全链路</td></tr>
+<tr><td>Tetragon</td><td>运行时安全</td><td>策略执行 + 阻断</td></tr>
+<tr><td>Katran (FB)</td><td>L4 负载均衡</td><td>XDP 实现，单机数千万 PPS</td></tr>
+<tr><td>bpfilter (WIP)</td><td>下一代防火墙</td><td>未来替代 iptables</td></tr>
+</table>
+
+
+<footer class="page-footer">
+    <p>← <a href="../11-容器与命名空间/index.html">上一章</a> · <a href="../index.html">总目录</a> · <a href="../13-中断与异常/index.html">下一章 →</a></p>
+</footer>
+
+</main>
+</div>
+</body>
+</html>
diff --git "a/13-\344\270\255\346\226\255\344\270\216\345\274\202\345\270\270/index.html" "b/13-\344\270\255\346\226\255\344\270\216\345\274\202\345\270\270/index.html"
new file mode 100644
index 0000000..8c84761
--- /dev/null
+++ "b/13-\344\270\255\346\226\255\344\270\216\345\274\202\345\270\270/index.html"
@@ -0,0 +1,204 @@
+<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>13 · 中断与异常子系统 — Linux 内核学习指南</title>
+<link rel="stylesheet" href="../assets/style.css">
+</head>
+<body>
+<div class="layout">
+<aside class="sidebar">
+    <div class="brand">🐧 Linux 内核学习</div>
+    <div class="brand-sub">从 0.11 到 6.x · 专家级路径</div>
+    <nav>
+        <div class="section-title">入门 & 准备</div>
+        <ul>
+            <li><a href="../index.html">🏠 总目录</a></li>
+            <li><a href="../00-学习路线/index.html">00 · 学习路线</a></li>
+            <li><a href="../01-经典版本选择/index.html">01 · 经典版本选择</a></li>
+            <li><a href="../02-环境搭建/index.html">02 · 环境搭建</a></li>
+        </ul>
+        <div class="section-title">核心子系统</div>
+        <ul>
+            <li><a href="../03-进程管理/index.html">03 · 进程管理</a></li>
+            <li><a href="../04-内存管理/index.html">04 · 内存管理</a></li>
+            <li><a href="../05-文件系统/index.html">05 · 文件系统</a></li>
+            <li><a href="../06-系统调用/index.html">06 · 系统调用</a></li>
+            <li><a href="../07-设备驱动/index.html">07 · 设备驱动</a></li>
+            <li><a href="../08-网络子系统/index.html">08 · 网络子系统</a></li>
+            <li><a href="../09-同步机制/index.html">09 · 同步机制</a></li>
+        </ul>
+        <div class="section-title">专家级深入</div>
+        <ul>
+            <li><a href="../10-CFS调度器/index.html">10 · CFS 调度器</a></li>
+            <li><a href="../11-容器与命名空间/index.html">11 · 容器与命名空间</a></li>
+            <li><a href="../12-eBPF与可观测性/index.html">12 · eBPF 与可观测性</a></li>
+            <li><a href="../13-中断与异常/index.html" class="active">13 · 中断与异常</a></li>
+            <li><a href="../14-启动流程深入/index.html">14 · 启动流程深入</a></li>
+            <li><a href="../15-内核调试与性能/index.html">15 · 内核调试与性能</a></li>
+        </ul>
+    </nav>
+</aside>
+
+<main class="content">
+
+<h1><span class="chapter-num">13</span>中断与异常子系统</h1>
+
+
+<p>中断是<b>CPU 与外界沟通的唯一异步通道</b>。本章从硬件中断向量讲到 threaded IRQ、IPI、softirq 全套机制。</p>
+
+<h2 id="why">13.1 为什么有中断</h2>
+
+<p>没中断 → CPU 必须<b>轮询</b>外设是否就绪 → 99% 时间浪费。中断让 CPU 在外设事件发生时<b>被动接到通知</b>，期间可以做别的事。</p>
+
+<h2 id="vector">13.2 中断向量与 IDT (x86)</h2>
+
+<table>
+<tr><th>向量号</th><th>类型</th><th>例子</th></tr>
+<tr><td>0~31</td><td><b>异常</b> (内部)</td><td>0=DIV/0, 6=#UD, 13=#GP, 14=#PF</td></tr>
+<tr><td>32~47</td><td>传统 IRQ (PIC)</td><td>32=timer, 33=keyboard</td></tr>
+<tr><td>32~255</td><td>APIC / MSI</td><td>现代 PCI 设备动态分配</td></tr>
+<tr><td>0x80</td><td>软中断 (老)</td><td>int 0x80 = 系统调用 (i386)</td></tr>
+<tr><td>NMI</td><td>不可屏蔽</td><td>看门狗、硬件错误</td></tr>
+</table>
+
+<pre class="code-c"><span class="cm">/* arch/x86/kernel/idt.c — IDT 表项 */</span>
+<span class="kw">static const</span> __initconst <span class="kw">struct</span> idt_data def_idts[] = {
+    INTG(X86_TRAP_DE,    asm_exc_divide_error),     <span class="cm">// vec 0</span>
+    INTG(X86_TRAP_DB,    asm_exc_debug),            <span class="cm">// vec 1</span>
+    INTG(X86_TRAP_BP,    asm_exc_int3),             <span class="cm">// vec 3 (int3 断点)</span>
+    INTG(X86_TRAP_PF,    asm_exc_page_fault),       <span class="cm">// vec 14 缺页</span>
+    INTG(X86_TRAP_GP,    asm_exc_general_protection),
+    <span class="cm">/* ... */</span>
+};
+</pre>
+
+<h2 id="apic">13.3 APIC — 现代多核中断架构</h2>
+
+<div class="diagram">
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 800 320" font-family="-apple-system,sans-serif" font-size="11">
+<defs><marker id="ar13" viewBox="0 0 10 10" refX="9" refY="5" markerWidth="6" markerHeight="6" orient="auto"><path d="M0,0 L10,5 L0,10 z" fill="#ff7b29"/></marker></defs>
+<text x="400" y="22" text-anchor="middle" font-size="15" font-weight="700" fill="#ff7b29">现代 x86 中断路径</text>
+
+<g><rect x="20" y="60" width="120" height="50" rx="6" fill="#1a2028" stroke="#bc8cff"/>
+<text x="80" y="82" text-anchor="middle" fill="#bc8cff">外设 (网卡)</text>
+<text x="80" y="100" text-anchor="middle" fill="#8b949e" font-size="10">PCIe MSI-X</text></g>
+
+<line x1="140" y1="85" x2="180" y2="85" stroke="#ff7b29" marker-end="url(#ar13)"/>
+
+<g><rect x="180" y="60" width="120" height="50" rx="6" fill="#1a2028" stroke="#58a6ff"/>
+<text x="240" y="82" text-anchor="middle" fill="#58a6ff">IOAPIC / MSI</text>
+<text x="240" y="100" text-anchor="middle" fill="#8b949e" font-size="10">路由到特定 CPU</text></g>
+
+<line x1="300" y1="85" x2="340" y2="85" stroke="#ff7b29" marker-end="url(#ar13)"/>
+
+<g><rect x="340" y="60" width="120" height="50" rx="6" fill="#1a2028" stroke="#56d364"/>
+<text x="400" y="82" text-anchor="middle" fill="#56d364">Local APIC</text>
+<text x="400" y="100" text-anchor="middle" fill="#8b949e" font-size="10">每核一个</text></g>
+
+<line x1="460" y1="85" x2="500" y2="85" stroke="#ff7b29" marker-end="url(#ar13)"/>
+
+<g><rect x="500" y="60" width="120" height="50" rx="6" fill="#1a2028" stroke="#e3b341"/>
+<text x="560" y="82" text-anchor="middle" fill="#e3b341">CPU 核</text>
+<text x="560" y="100" text-anchor="middle" fill="#8b949e" font-size="10">查 IDT</text></g>
+
+<line x1="620" y1="85" x2="660" y2="85" stroke="#ff7b29" marker-end="url(#ar13)"/>
+
+<g><rect x="660" y="60" width="120" height="50" rx="6" fill="#1a2028" stroke="#f85149"/>
+<text x="720" y="82" text-anchor="middle" fill="#f85149">ISR 入口</text>
+<text x="720" y="100" text-anchor="middle" fill="#8b949e" font-size="10">汇编→ C 处理</text></g>
+
+<g transform="translate(20, 150)">
+<rect width="760" height="80" rx="6" fill="#1a2028" stroke="#56d364"/>
+<text x="380" y="22" text-anchor="middle" fill="#56d364" font-weight="700">下半部 (Bottom Half) — 把慢的工作搬出中断上下文</text>
+<text x="20" y="44" font-size="11" fill="#e6edf3">· softirq (10 种固定): NET_TX, NET_RX, TIMER, BLOCK, TASKLET, SCHED, HRTIMER, RCU ...</text>
+<text x="20" y="62" font-size="11" fill="#e6edf3">· workqueue (进程上下文): kworker 线程池, 可 sleep, 现代首选</text>
+<text x="20" y="76" font-size="11" fill="#e6edf3">· threaded IRQ: 整个上半部就跑在独立线程, RT 内核必备</text>
+</g>
+
+<g transform="translate(20, 250)">
+<rect width="760" height="60" rx="6" fill="#1a2028" stroke="#bc8cff"/>
+<text x="380" y="22" text-anchor="middle" fill="#bc8cff" font-weight="700">IPI (Inter-Processor Interrupt) — CPU 间互相打招呼</text>
+<text x="20" y="44" font-size="11" fill="#e6edf3">RESCHEDULE_VECTOR: 让另一核重新调度 (load balancing 唤醒)</text>
+<text x="380" y="44" font-size="11" fill="#e6edf3">TLB_FLUSH: 远程 TLB 失效 (mmu_notifier)</text>
+</g>
+</svg>
+</div>
+
+<h2 id="softirq">13.4 softirq vs tasklet vs workqueue</h2>
+
+<table>
+<tr><th>对比</th><th>softirq</th><th>tasklet</th><th>workqueue</th></tr>
+<tr><td>类型数</td><td>固定 10 种</td><td>可动态创建</td><td>可动态创建</td></tr>
+<tr><td>上下文</td><td>软中断</td><td>软中断</td><td><b>进程</b></td></tr>
+<tr><td>可 sleep</td><td>否</td><td>否</td><td><b>是</b></td></tr>
+<tr><td>并发</td><td>同类可在不同 CPU 并发</td><td>同类只在一个 CPU</td><td>完全并发</td></tr>
+<tr><td>典型用户</td><td>网络/定时器</td><td>逐步淘汰</td><td>多数延后处理首选</td></tr>
+</table>
+
+<pre class="code-c"><span class="cm">/* workqueue 使用示例 */</span>
+<span class="kw">static void</span> <span class="fn">my_work_fn</span>(<span class="kw">struct</span> work_struct *w) {
+    <span class="cm">// 进程上下文! 可以 sleep、kmalloc(GFP_KERNEL)</span>
+    msleep(<span class="num">100</span>);
+    do_slow_io();
+}
+
+DECLARE_WORK(my_work, my_work_fn);
+
+<span class="cm">/* 中断里触发 */</span>
+schedule_work(&my_work);           <span class="cm">// 加入 system_wq</span>
+
+<span class="cm">/* 或专用 workqueue */</span>
+<span class="kw">struct</span> workqueue_struct *wq = alloc_workqueue(<span class="str">"my_wq"</span>, WQ_UNBOUND, <span class="num">0</span>);
+queue_work(wq, &my_work);
+
+<span class="cm">/* 延后执行 */</span>
+DECLARE_DELAYED_WORK(dw, my_work_fn);
+schedule_delayed_work(&dw, msecs_to_jiffies(<span class="num">500</span>));
+</pre>
+
+<h2 id="threaded">13.5 threaded IRQ — 整个中断处理跑在线程里</h2>
+
+<p>实时系统 (PREEMPT_RT) 几乎所有中断都改为 threaded IRQ，因为传统 ISR 不能被抢占，破坏延迟保证：</p>
+
+<pre class="code-c"><span class="cm">/* 注册 threaded IRQ */</span>
+<span class="cm">/* primary = 快速 ack;  thread_fn = 实际处理逻辑在线程里 */</span>
+request_threaded_irq(irq,
+                     primary_handler,   <span class="cm">// 也可为 NULL</span>
+                     threaded_handler,  <span class="cm">// 进程上下文，可 sleep!</span>
+                     IRQF_ONESHOT,
+                     <span class="str">"my-dev"</span>, dev);
+</pre>
+
+<h2 id="experiment">13.6 实验：观察中断分布</h2>
+
+<pre class="code-bash"><span class="cm"># 看每核每个中断的次数</span>
+cat /proc/interrupts
+
+           CPU0       CPU1       CPU2       CPU3
+  <span class="num">0</span>:        <span class="num">125</span>          <span class="num">0</span>          <span class="num">0</span>          <span class="num">0</span>   IO-APIC   <span class="num">2</span>-edge      timer
+  <span class="num">1</span>:          <span class="num">0</span>         <span class="num">12</span>          <span class="num">0</span>          <span class="num">0</span>   IO-APIC   <span class="num">1</span>-edge      i8042
+  <span class="num">8</span>:          <span class="num">1</span>          <span class="num">0</span>          <span class="num">0</span>          <span class="num">0</span>   IO-APIC   <span class="num">8</span>-edge      rtc0
+ <span class="num">26</span>:      <span class="num">12345</span>     <span class="num">12876</span>     <span class="num">11432</span>     <span class="num">12098</span>   PCI-MSI <span class="num">2097152</span>-edge   eth0
+
+<span class="cm"># 把网卡 IRQ 26 绑定到 CPU 1</span>
+echo <span class="num">2</span> > /proc/irq/<span class="num">26</span>/smp_affinity   <span class="cm"># 0x2 = CPU 1</span>
+
+<span class="cm"># 看 softirq 统计</span>
+cat /proc/softirqs
+
+<span class="cm"># ftrace 跟踪中断</span>
+echo <span class="num">1</span> > /sys/kernel/debug/tracing/events/irq/enable
+cat /sys/kernel/debug/tracing/trace_pipe
+</pre>
+
+
+<footer class="page-footer">
+    <p>← <a href="../12-eBPF与可观测性/index.html">上一章</a> · <a href="../index.html">总目录</a> · <a href="../14-启动流程深入/index.html">下一章 →</a></p>
+</footer>
+
+</main>
+</div>
+</body>
+</html>
diff --git "a/14-\345\220\257\345\212\250\346\265\201\347\250\213\346\267\261\345\205\245/index.html" "b/14-\345\220\257\345\212\250\346\265\201\347\250\213\346\267\261\345\205\245/index.html"
new file mode 100644
index 0000000..e6c513e
--- /dev/null
+++ "b/14-\345\220\257\345\212\250\346\265\201\347\250\213\346\267\261\345\205\245/index.html"
@@ -0,0 +1,259 @@
+<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>14 · 启动流程 — BIOS / UEFI 到 systemd — Linux 内核学习指南</title>
+<link rel="stylesheet" href="../assets/style.css">
+</head>
+<body>
+<div class="layout">
+<aside class="sidebar">
+    <div class="brand">🐧 Linux 内核学习</div>
+    <div class="brand-sub">从 0.11 到 6.x · 专家级路径</div>
+    <nav>
+        <div class="section-title">入门 & 准备</div>
+        <ul>
+            <li><a href="../index.html">🏠 总目录</a></li>
+            <li><a href="../00-学习路线/index.html">00 · 学习路线</a></li>
+            <li><a href="../01-经典版本选择/index.html">01 · 经典版本选择</a></li>
+            <li><a href="../02-环境搭建/index.html">02 · 环境搭建</a></li>
+        </ul>
+        <div class="section-title">核心子系统</div>
+        <ul>
+            <li><a href="../03-进程管理/index.html">03 · 进程管理</a></li>
+            <li><a href="../04-内存管理/index.html">04 · 内存管理</a></li>
+            <li><a href="../05-文件系统/index.html">05 · 文件系统</a></li>
+            <li><a href="../06-系统调用/index.html">06 · 系统调用</a></li>
+            <li><a href="../07-设备驱动/index.html">07 · 设备驱动</a></li>
+            <li><a href="../08-网络子系统/index.html">08 · 网络子系统</a></li>
+            <li><a href="../09-同步机制/index.html">09 · 同步机制</a></li>
+        </ul>
+        <div class="section-title">专家级深入</div>
+        <ul>
+            <li><a href="../10-CFS调度器/index.html">10 · CFS 调度器</a></li>
+            <li><a href="../11-容器与命名空间/index.html">11 · 容器与命名空间</a></li>
+            <li><a href="../12-eBPF与可观测性/index.html">12 · eBPF 与可观测性</a></li>
+            <li><a href="../13-中断与异常/index.html">13 · 中断与异常</a></li>
+            <li><a href="../14-启动流程深入/index.html" class="active">14 · 启动流程深入</a></li>
+            <li><a href="../15-内核调试与性能/index.html">15 · 内核调试与性能</a></li>
+        </ul>
+    </nav>
+</aside>
+
+<main class="content">
+
+<h1><span class="chapter-num">14</span>启动流程 — BIOS / UEFI 到 systemd</h1>
+
+
+<p>"按下电源到看见登录提示符"之间，<b>发生了 12 个关键阶段</b>。本章逐阶段拆解，让你彻底理解 Linux 是如何"自举"起来的。</p>
+
+<h2 id="overview">14.1 启动全景</h2>
+
+<div class="diagram">
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 800 460" font-family="-apple-system,sans-serif" font-size="11">
+<defs><marker id="ar14" viewBox="0 0 10 10" refX="9" refY="5" markerWidth="6" markerHeight="6" orient="auto"><path d="M0,0 L10,5 L0,10 z" fill="#ff7b29"/></marker></defs>
+<text x="400" y="22" text-anchor="middle" font-size="15" font-weight="700" fill="#ff7b29">从按电源到 shell 提示符的 12 阶段</text>
+
+<g font-size="11">
+<rect x="20" y="40" width="760" height="34" rx="4" fill="#1a2028" stroke="#f85149"/>
+<text x="14" y="62" fill="#f85149" font-weight="700" font-size="13" transform="translate(20,0)">① 上电 / RESET</text>
+<text x="180" y="62" fill="#8b949e">CPU 从复位向量 0xFFFFFFF0 (BIOS) 或 SEC (UEFI) 开始执行</text>
+
+<line x1="400" y1="74" x2="400" y2="84" stroke="#ff7b29" marker-end="url(#ar14)"/>
+
+<rect x="20" y="84" width="760" height="34" rx="4" fill="#1a2028" stroke="#ff7b29"/>
+<text x="14" y="106" fill="#ff7b29" font-weight="700" font-size="13" transform="translate(20,0)">② 固件 (BIOS / UEFI)</text>
+<text x="190" y="106" fill="#8b949e">POST 自检 → 加载 MBR (0x7C00) 或 EFI System Partition → 启动管理器</text>
+
+<line x1="400" y1="118" x2="400" y2="128" stroke="#ff7b29" marker-end="url(#ar14)"/>
+
+<rect x="20" y="128" width="760" height="34" rx="4" fill="#1a2028" stroke="#e3b341"/>
+<text x="14" y="150" fill="#e3b341" font-weight="700" font-size="13" transform="translate(20,0)">③ Bootloader (GRUB)</text>
+<text x="200" y="150" fill="#8b949e">GRUB 读 /boot/grub.cfg → 显示菜单 → 加载 vmlinuz + initramfs</text>
+
+<line x1="400" y1="162" x2="400" y2="172" stroke="#ff7b29" marker-end="url(#ar14)"/>
+
+<rect x="20" y="172" width="760" height="34" rx="4" fill="#1a2028" stroke="#56d364"/>
+<text x="14" y="194" fill="#56d364" font-weight="700" font-size="13" transform="translate(20,0)">④ 内核解压</text>
+<text x="150" y="194" fill="#8b949e">arch/x86/boot/compressed/ — bzImage 自解压到运行地址</text>
+
+<line x1="400" y1="206" x2="400" y2="216" stroke="#ff7b29" marker-end="url(#ar14)"/>
+
+<rect x="20" y="216" width="760" height="34" rx="4" fill="#1a2028" stroke="#56d364"/>
+<text x="14" y="238" fill="#56d364" font-weight="700" font-size="13" transform="translate(20,0)">⑤ 实模式 → 保护模式 → 长模式</text>
+<text x="270" y="238" fill="#8b949e">建临时页表、启用 CR0.PG、跳到 64 位代码段</text>
+
+<line x1="400" y1="250" x2="400" y2="260" stroke="#ff7b29" marker-end="url(#ar14)"/>
+
+<rect x="20" y="260" width="760" height="34" rx="4" fill="#1a2028" stroke="#58a6ff"/>
+<text x="14" y="282" fill="#58a6ff" font-weight="700" font-size="13" transform="translate(20,0)">⑥ start_kernel()</text>
+<text x="180" y="282" fill="#8b949e">init/main.c — 各子系统按顺序初始化 (sched, mm, vfs, irq...)</text>
+
+<line x1="400" y1="294" x2="400" y2="304" stroke="#ff7b29" marker-end="url(#ar14)"/>
+
+<rect x="20" y="304" width="760" height="34" rx="4" fill="#1a2028" stroke="#58a6ff"/>
+<text x="14" y="326" fill="#58a6ff" font-weight="700" font-size="13" transform="translate(20,0)">⑦ rest_init → init (pid 1)</text>
+<text x="220" y="326" fill="#8b949e">创建 init 内核线程, 之后切到 idle 当 pid 0</text>
+
+<line x1="400" y1="338" x2="400" y2="348" stroke="#ff7b29" marker-end="url(#ar14)"/>
+
+<rect x="20" y="348" width="760" height="34" rx="4" fill="#1a2028" stroke="#bc8cff"/>
+<text x="14" y="370" fill="#bc8cff" font-weight="700" font-size="13" transform="translate(20,0)">⑧ initramfs 解压 + 运行 /init</text>
+<text x="270" y="370" fill="#8b949e">挂载真正根文件系统, 加载存储/网络驱动</text>
+
+<line x1="400" y1="382" x2="400" y2="392" stroke="#ff7b29" marker-end="url(#ar14)"/>
+
+<rect x="20" y="392" width="760" height="34" rx="4" fill="#1a2028" stroke="#e3b341"/>
+<text x="14" y="414" fill="#e3b341" font-weight="700" font-size="13" transform="translate(20,0)">⑨ switch_root → /sbin/init (systemd)</text>
+<text x="290" y="414" fill="#8b949e">扔掉 initramfs, 跳到真实 init, 启动各 service</text>
+
+<rect x="20" y="430" width="760" height="20" rx="4" fill="#1a2028" stroke="#56d364"/>
+<text x="14" y="446" fill="#56d364" font-weight="700" transform="translate(20,0)">⑩~⑫ systemd target → getty → login → shell</text>
+</g>
+</svg>
+</div>
+
+<h2 id="uefi">14.2 UEFI vs Legacy BIOS</h2>
+
+<table>
+<tr><th>对比</th><th>Legacy BIOS</th><th>UEFI</th></tr>
+<tr><td>启动方式</td><td>MBR (446B 引导代码)</td><td>EFI System Partition (FAT32)</td></tr>
+<tr><td>磁盘上限</td><td>2 TB (MBR)</td><td>9.4 ZB (GPT)</td></tr>
+<tr><td>启动代码语言</td><td>16 位汇编</td><td>C (UEFI 应用)</td></tr>
+<tr><td>Secure Boot</td><td>无</td><td>支持 (RSA 签名校验)</td></tr>
+<tr><td>启动菜单</td><td>需 bootloader</td><td>固件内置</td></tr>
+<tr><td>内核接口</td><td>real-mode call</td><td>EFI Runtime Services</td></tr>
+</table>
+
+<h3>EFI Stub — 不需要 GRUB 也能启动</h3>
+<p>现代 Linux 内核内嵌 "EFI stub"，本身就是一个 EFI 应用，可被 UEFI 固件直接执行：</p>
+<pre class="code-bash"><span class="cm"># 把 vmlinuz 复制到 ESP，UEFI 启动菜单加一条即可，无需 GRUB</span>
+cp /boot/vmlinuz /boot/efi/EFI/Linux/
+efibootmgr -c -d /dev/nvme0n1 -p <span class="num">1</span> -L <span class="str">"Linux"</span> -l <span class="str">"/EFI/Linux/vmlinuz"</span> \\
+    -u <span class="str">"root=/dev/nvme0n1p2 ro initrd=\\EFI\\Linux\\initrd"</span>
+</pre>
+
+<h2 id="grub">14.3 GRUB 2 的多阶段启动</h2>
+
+<pre class="code-bash"><span class="cm"># GRUB 分多阶段是因为 MBR 只有 446 字节, 装不下完整代码</span>
+
+Stage <span class="num">1</span>:  MBR (446 B) → 加载 stage 1.5
+Stage <span class="num">1.5</span>: 紧跟 MBR 的扇区 → 包含 fs 驱动 (能读 ext4 等)
+Stage <span class="num">2</span>:  从 /boot/grub/ 加载完整 GRUB 代码 + grub.cfg
+        → 显示启动菜单 → 加载 kernel + initramfs → boot
+</pre>
+
+<h2 id="initramfs">14.4 initramfs — 为什么需要</h2>
+
+<div class="callout deep">
+<div class="label">"鸡生蛋"问题</div>
+<p>内核需要根文件系统（如 ext4 on LVM on /dev/nvme0n1p2）才能跑用户程序，
+但<b>挂载根文件系统需要先加载 LVM、文件系统、磁盘驱动</b>，而这些是<b>用户态 mkfs/lvm 命令配合的</b>。
+解决：把这些工具打包成一个最小镜像 initramfs，内核先用它，准备好后再切换到真根。</p>
+</div>
+
+<pre class="code-bash"><span class="cm"># 查看你的 initramfs</span>
+lsinitramfs /boot/initrd.img-$(uname -r) | head
+
+<span class="cm"># 解包看</span>
+mkdir /tmp/initrd && cd /tmp/initrd
+zcat /boot/initrd.img-$(uname -r) | cpio -idmv
+
+ls
+<span class="cm"># bin etc init lib lib64 proc root run sbin sys usr var</span>
+</pre>
+
+<h2 id="start_kernel">14.5 start_kernel() 初始化顺序</h2>
+
+<pre class="code-c"><span class="cm">/* init/main.c — 简化版执行顺序 */</span>
+asmlinkage <span class="kw">__visible</span> <span class="kw">void</span> __init <span class="fn">start_kernel</span>(<span class="kw">void</span>) {
+    set_task_stack_end_magic(&init_task);
+    smp_setup_processor_id();
+    boot_cpu_init();             <span class="cm">// 标记 CPU 0 上线</span>
+
+    page_address_init();         <span class="cm">// 页地址映射</span>
+    early_security_init();
+    setup_arch(&command_line);   <span class="cm">// 架构相关初始化</span>
+    setup_command_line(command_line);
+    setup_per_cpu_areas();
+    smp_prepare_boot_cpu();
+
+    build_all_zonelists(NULL);
+    page_alloc_init();
+    pr_notice(<span class="str">"Kernel command line: %s\n"</span>, saved_command_line);
+
+    setup_log_buf(<span class="num">0</span>);
+    vfs_caches_init_early();     <span class="cm">// dcache/icache</span>
+    sort_main_extable();
+    trap_init();                 <span class="cm">// 设置 IDT</span>
+    mm_init();                   <span class="cm">// ★ 内存管理</span>
+
+    sched_init();                <span class="cm">// ★ 调度器</span>
+    preempt_disable();
+    local_irq_disable();
+    radix_tree_init();
+    
+    early_irq_init();
+    init_IRQ();                  <span class="cm">// 中断子系统</span>
+    tick_init();
+    rcu_init();
+    init_timers();
+    hrtimers_init();
+
+    local_irq_enable();          <span class="cm">// ★ 终于开中断</span>
+
+    console_init();              <span class="cm">// 现在能 printk 到屏幕</span>
+
+    proc_root_init();
+    cgroup_init();
+    fork_init();
+    proc_caches_init();
+    buffer_init();
+    security_init();
+    vfs_caches_init();           <span class="cm">// ★ VFS 完整初始化</span>
+    signals_init();
+
+    rest_init();                 <span class="cm">// 创建 pid 1 后变 idle</span>
+}
+</pre>
+
+<h2 id="kaslr">14.6 KASLR — 内核地址随机化</h2>
+
+<p>内核启动时把自己<b>随机加载到不同的虚拟地址</b>，让攻击者无法靠固定地址 ROP：</p>
+
+<pre class="code-bash"><span class="cm"># 看当前内核基址</span>
+sudo cat /proc/kallsyms | head -<span class="num">1</span>
+<span class="cm"># ffffffff8e600000 T startup_64</span>
+
+<span class="cm"># 重启后再看 → 地址会变 (除非禁用)</span>
+<span class="cm"># 关闭 KASLR (调试用)</span>
+<span class="cm"># 在 cmdline 加 nokaslr</span>
+</pre>
+
+<h2 id="systemd">14.7 systemd — 现代 init</h2>
+
+<p>systemd 取代了传统 SysV init / Upstart，特点：</p>
+<ul>
+<li><b>并行启动</b>：服务依赖图，无依赖的服务并发起</li>
+<li><b>按需 (socket-activation)</b>：服务先不起，端口连接时再起</li>
+<li><b>cgroup 直接集成</b>：每个 service 自动一个 cgroup</li>
+<li><b>统一日志 journald</b>：结构化日志</li>
+</ul>
+
+<pre class="code-bash">systemctl status                <span class="cm"># 当前 target</span>
+systemctl list-dependencies     <span class="cm"># 依赖树</span>
+systemd-analyze blame           <span class="cm"># 启动时间贡献排序</span>
+systemd-analyze critical-chain  <span class="cm"># 关键路径</span>
+journalctl -b -p err            <span class="cm"># 本次启动错误日志</span>
+</pre>
+
+
+<footer class="page-footer">
+    <p>← <a href="../13-中断与异常/index.html">上一章</a> · <a href="../index.html">总目录</a> · <a href="../15-内核调试与性能/index.html">下一章 →</a></p>
+</footer>
+
+</main>
+</div>
+</body>
+</html>
diff --git "a/15-\345\206\205\346\240\270\350\260\203\350\257\225\344\270\216\346\200\247\350\203\275/index.html" "b/15-\345\206\205\346\240\270\350\260\203\350\257\225\344\270\216\346\200\247\350\203\275/index.html"
new file mode 100644
index 0000000..97bf9ee
--- /dev/null
+++ "b/15-\345\206\205\346\240\270\350\260\203\350\257\225\344\270\216\346\200\247\350\203\275/index.html"
@@ -0,0 +1,289 @@
+<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>15 · 内核调试与性能工具全景 — Linux 内核学习指南</title>
+<link rel="stylesheet" href="../assets/style.css">
+</head>
+<body>
+<div class="layout">
+<aside class="sidebar">
+    <div class="brand">🐧 Linux 内核学习</div>
+    <div class="brand-sub">从 0.11 到 6.x · 专家级路径</div>
+    <nav>
+        <div class="section-title">入门 & 准备</div>
+        <ul>
+            <li><a href="../index.html">🏠 总目录</a></li>
+            <li><a href="../00-学习路线/index.html">00 · 学习路线</a></li>
+            <li><a href="../01-经典版本选择/index.html">01 · 经典版本选择</a></li>
+            <li><a href="../02-环境搭建/index.html">02 · 环境搭建</a></li>
+        </ul>
+        <div class="section-title">核心子系统</div>
+        <ul>
+            <li><a href="../03-进程管理/index.html">03 · 进程管理</a></li>
+            <li><a href="../04-内存管理/index.html">04 · 内存管理</a></li>
+            <li><a href="../05-文件系统/index.html">05 · 文件系统</a></li>
+            <li><a href="../06-系统调用/index.html">06 · 系统调用</a></li>
+            <li><a href="../07-设备驱动/index.html">07 · 设备驱动</a></li>
+            <li><a href="../08-网络子系统/index.html">08 · 网络子系统</a></li>
+            <li><a href="../09-同步机制/index.html">09 · 同步机制</a></li>
+        </ul>
+        <div class="section-title">专家级深入</div>
+        <ul>
+            <li><a href="../10-CFS调度器/index.html">10 · CFS 调度器</a></li>
+            <li><a href="../11-容器与命名空间/index.html">11 · 容器与命名空间</a></li>
+            <li><a href="../12-eBPF与可观测性/index.html">12 · eBPF 与可观测性</a></li>
+            <li><a href="../13-中断与异常/index.html">13 · 中断与异常</a></li>
+            <li><a href="../14-启动流程深入/index.html">14 · 启动流程深入</a></li>
+            <li><a href="../15-内核调试与性能/index.html" class="active">15 · 内核调试与性能</a></li>
+        </ul>
+    </nav>
+</aside>
+
+<main class="content">
+
+<h1><span class="chapter-num">15</span>内核调试与性能工具全景</h1>
+
+
+<p>本章是内核学习的"<b>百宝箱</b>"。掌握这些工具，你可以从"我也不知道哪里出问题了"升级到"我能精确定位到第几行"。</p>
+
+<h2 id="overview">15.1 工具全景</h2>
+
+<table>
+<tr><th>问题类型</th><th>首选工具</th><th>次选</th></tr>
+<tr><td>函数追踪 / 调用图</td><td><b>ftrace</b></td><td>perf / bpftrace</td></tr>
+<tr><td>CPU 性能热点</td><td><b>perf record</b></td><td>FlameGraph</td></tr>
+<tr><td>延迟 / 抖动</td><td><b>perf sched / bpftrace</b></td><td>ftrace latency tracers</td></tr>
+<tr><td>内存错误</td><td><b>KASAN</b></td><td>KFENCE, KMSAN</td></tr>
+<tr><td>锁竞争 / 死锁</td><td><b>lockdep + lockstat</b></td><td>—</td></tr>
+<tr><td>动态追踪生产</td><td><b>eBPF / bpftrace</b></td><td>kprobe</td></tr>
+<tr><td>崩溃事后分析</td><td><b>kdump + crash</b></td><td>—</td></tr>
+<tr><td>不停机修补</td><td><b>livepatch</b></td><td>—</td></tr>
+<tr><td>RCU 卡死</td><td><b>RCU stall detector</b></td><td>—</td></tr>
+</table>
+
+<h2 id="printk">15.2 printk — 最简单也最常用</h2>
+
+<pre class="code-c"><span class="cm">/* 日志级别 */</span>
+pr_emerg(...);    <span class="cm">// KERN_EMERG  0  系统不可用</span>
+pr_alert(...);    <span class="cm">// KERN_ALERT  1  立即处理</span>
+pr_crit(...);     <span class="cm">// KERN_CRIT   2  严重错误</span>
+pr_err(...);      <span class="cm">// KERN_ERR    3  错误</span>
+pr_warn(...);     <span class="cm">// KERN_WARNING 4 警告</span>
+pr_notice(...);   <span class="cm">// KERN_NOTICE 5  注意事项</span>
+pr_info(...);     <span class="cm">// KERN_INFO   6  信息</span>
+pr_debug(...);    <span class="cm">// KERN_DEBUG  7  调试</span>
+
+<span class="cm">/* 限速版本 — 防止刷屏 */</span>
+pr_warn_ratelimited(<span class="str">"oops happened %d times\n"</span>, count);
+
+<span class="cm">/* 一次性 — 只打印第一次 */</span>
+pr_warn_once(<span class="str">"deprecated API\n"</span>);
+</pre>
+
+<pre class="code-bash">dmesg --level=err,warn
+sudo dmesg -wH                  <span class="cm"># 实时跟踪，人类友好时间戳</span>
+echo <span class="num">7</span> > /proc/sys/kernel/printk    <span class="cm"># 显示所有级别</span>
+</pre>
+
+<h2 id="ftrace">15.3 ftrace — 内核内建追踪框架</h2>
+
+<pre class="code-bash"><span class="cm"># 1. 跟踪所有函数调用 (function tracer)</span>
+cd /sys/kernel/debug/tracing
+echo function > current_tracer
+cat trace_pipe | head
+
+<span class="cm"># 2. 函数图 (function_graph) — 看调用关系</span>
+echo function_graph > current_tracer
+echo sys_open > set_graph_function
+cat trace_pipe
+
+<span class="cm">#  3) cpu | duration | function</span>
+<span class="cm">#  0)               |  sys_open() {</span>
+<span class="cm">#  0)               |    do_sys_open() {</span>
+<span class="cm">#  0)   0.420 us    |      _raw_spin_lock();</span>
+<span class="cm">#  0)   1.250 us    |      do_filp_open() {</span>
+<span class="cm">#  0)   8.470 us    |    }</span>
+<span class="cm">#  0)  12.100 us    |  }</span>
+
+<span class="cm"># 3. 跟特定事件 (events)</span>
+echo <span class="num">1</span> > events/sched/sched_switch/enable
+echo <span class="num">1</span> > events/syscalls/sys_enter_open/enable
+
+<span class="cm"># 4. 过滤特定进程</span>
+echo $(pidof myapp) > set_ftrace_pid
+</pre>
+
+<h3>function_graph 的杀手锏</h3>
+<pre class="code-bash"><span class="cm"># 看哪个函数耗时最多 (按级排序输出)</span>
+echo function_graph > current_tracer
+echo nop > set_ftrace_filter
+echo nop > set_graph_function
+echo schedule > set_graph_function     <span class="cm"># 关注 schedule 子树</span>
+echo <span class="num">10000</span> > tracing_thresh             <span class="cm"># 只显示 &gt;10us 的</span>
+cat trace_pipe
+</pre>
+
+<h2 id="perf">15.4 perf — 性能分析瑞士军刀</h2>
+
+<pre class="code-bash"><span class="cm"># 实时看哪些函数 CPU 占用最高 (top)</span>
+sudo perf top
+
+<span class="cm"># 录制 30 秒采样</span>
+sudo perf record -F <span class="num">99</span> -a -g -- sleep <span class="num">30</span>
+sudo perf report                   <span class="cm"># 交互式 TUI</span>
+
+<span class="cm"># 输出火焰图 (Brendan Gregg)</span>
+git clone https://github.com/brendangregg/FlameGraph
+sudo perf script | ./FlameGraph/stackcollapse-perf.pl | ./FlameGraph/flamegraph.pl > flame.svg
+
+<span class="cm"># 跟踪特定事件 (PMU)</span>
+sudo perf stat -e cycles,instructions,cache-misses,branch-misses -- ./my_app
+
+<span class="cm"># 调度延迟</span>
+sudo perf sched record -- sleep <span class="num">5</span>
+sudo perf sched latency
+sudo perf sched map
+</pre>
+
+<h2 id="kasan">15.5 KASAN — 内存错误检测器</h2>
+
+<p>KASAN (Kernel Address Sanitizer) 用<b>影子内存</b>追踪每字节的访问权限，发现 use-after-free、越界、双释放等：</p>
+
+<pre class="code-bash"><span class="cm"># 编译内核时开启</span>
+CONFIG_KASAN=y
+CONFIG_KASAN_GENERIC=y
+
+<span class="cm"># 触发后 dmesg:</span>
+[ <span class="num">5.000</span>] ==================================================================
+[ <span class="num">5.000</span>] BUG: KASAN: use-after-free in foo+<span class="num">0x42</span>/<span class="num">0x100</span>
+[ <span class="num">5.000</span>] Read of size <span class="num">4</span> at addr ffff888008c4f1d8 by task qemu/<span class="num">1234</span>
+[ <span class="num">5.000</span>] Call Trace:
+[ <span class="num">5.000</span>]   foo+<span class="num">0x42</span>/<span class="num">0x100</span>
+[ <span class="num">5.000</span>]   bar+<span class="num">0x10</span>/<span class="num">0x20</span>
+[ <span class="num">5.000</span>] Freed by task <span class="num">1233</span>:        ←  谁释放的</span>
+[ <span class="num">5.000</span>]   kfree+...
+[ <span class="num">5.000</span>] Allocated by task <span class="num">1233</span>:    ←  谁分配的</span>
+</pre>
+
+<table>
+<tr><th>变种</th><th>开销</th><th>检测</th></tr>
+<tr><td>KASAN</td><td>2x 内存, 3x 慢</td><td>UAF、越界、double-free</td></tr>
+<tr><td>KFENCE</td><td>几乎零开销</td><td>采样式检测，可上生产</td></tr>
+<tr><td>KMSAN</td><td>4x 内存, 5x 慢</td><td>未初始化内存读</td></tr>
+<tr><td>UBSAN</td><td>低</td><td>未定义行为 (溢出、错位移位等)</td></tr>
+</table>
+
+<h2 id="lockdep">15.6 lockdep — 在死锁发生<b>之前</b>就警告</h2>
+
+<pre class="code-bash"><span class="cm"># 内核配置</span>
+CONFIG_PROVE_LOCKING=y
+CONFIG_DEBUG_LOCK_ALLOC=y
+
+<span class="cm"># 锁统计</span>
+cat /proc/lock_stat | head -<span class="num">30</span>
+</pre>
+
+<p>lockdep 会跟踪<b>每把锁的获取顺序</b>形成有向图，发现<b>潜在循环</b>就报警（哪怕实际没死锁）。</p>
+
+<h2 id="bpftrace">15.7 bpftrace — 现代内核观察利器</h2>
+
+<pre class="code-bash"><span class="cm"># 1. 谁在打开文件?</span>
+bpftrace -e <span class="str">'tracepoint:syscalls:sys_enter_openat {
+    printf("%s -> %s\n", comm, str(args->filename));
+}'</span>
+
+<span class="cm"># 2. 哪个进程在产生 OOM?</span>
+bpftrace -e <span class="str">'kprobe:oom_kill_process {
+    printf("OOM kill: %s (pid %d)\n", str(arg1->name), arg1->pid);
+}'</span>
+
+<span class="cm"># 3. block IO 延迟分布</span>
+bpftrace -e <span class="str">'
+tracepoint:block:block_rq_issue { @start[args->dev, args->sector] = nsecs; }
+tracepoint:block:block_rq_complete /@start[args->dev, args->sector]/ {
+    @us = hist((nsecs - @start[args->dev, args->sector]) / 1000);
+    delete(@start[args->dev, args->sector]);
+}'</span>
+
+<span class="cm"># 4. TCP 连接事件</span>
+bpftrace -e <span class="str">'kprobe:tcp_connect {
+    printf("%s tcp_connect\n", comm);
+}'</span>
+</pre>
+
+<h2 id="kdump">15.8 kdump + crash — 事后尸检</h2>
+
+<p>内核 panic 时 <b>kdump</b> 自动捕获完整内存到文件（vmcore），<b>crash</b> 工具像 gdb 一样事后分析：</p>
+
+<pre class="code-bash"><span class="cm"># 1. 安装 + 配置 kdump (Ubuntu)</span>
+sudo apt install kdump-tools linux-crashdump
+<span class="cm"># 重启后 /var/crash 下会有 vmcore</span>
+
+<span class="cm"># 2. 分析</span>
+crash /usr/lib/debug/boot/vmlinux-$(uname -r) /var/crash/.../vmcore
+
+crash> bt          <span class="cm"># 崩溃时的栈</span>
+crash> ps          <span class="cm"># 当时所有进程</span>
+crash> log         <span class="cm"># dmesg</span>
+crash> kmem -i     <span class="cm"># 内存状况</span>
+crash> sym 0xffffffff8...   <span class="cm"># 地址 → 符号</span>
+</pre>
+
+<h2 id="livepatch">15.9 livepatch — 不重启打补丁</h2>
+
+<p>生产环境最怕"打个安全补丁要重启 1000 台机器"。<b>livepatch</b> 通过 ftrace 机制把函数<b>动态替换</b>为新版本：</p>
+
+<pre class="code-c"><span class="cm">/* 写一个 livepatch 模块替换 cmdline_proc_show */</span>
+<span class="kw">#include</span> &lt;linux/livepatch.h&gt;
+
+<span class="kw">static int</span> <span class="fn">my_cmdline_show</span>(<span class="kw">struct</span> seq_file *m, <span class="kw">void</span> *v) {
+    seq_puts(m, <span class="str">"PATCHED\n"</span>);
+    <span class="kw">return</span> <span class="num">0</span>;
+}
+
+<span class="kw">static struct</span> klp_func funcs[] = {
+    { .old_name = <span class="str">"cmdline_proc_show"</span>, .new_func = my_cmdline_show },
+    { }
+};
+<span class="kw">static struct</span> klp_object objs[] = { { .funcs = funcs }, { } };
+<span class="kw">static struct</span> klp_patch patch = { .mod = THIS_MODULE, .objs = objs };
+
+<span class="kw">static int</span> <span class="fn">__init</span> <span class="fn">init</span>(<span class="kw">void</span>) { <span class="kw">return</span> klp_enable_patch(&patch); }
+</pre>
+
+<h2 id="checklist">15.10 调试速查清单</h2>
+
+<div class="callout tip">
+<div class="label">遇到问题先问自己</div>
+<ol>
+<li><b>能否在 QEMU 复现？</b> → 能则 GDB；不能则上生产</li>
+<li><b>是性能问题还是正确性问题？</b></li>
+<li><b>是 CPU/内存/IO/锁哪一类？</b> → 工具不同（perf / KASAN / iostat / lockstat）</li>
+<li><b>是否能扛得起影响？</b> → 重则 ftrace 全开；轻则 bpftrace 探针</li>
+<li><b>有没有崩溃 dump？</b> → 有则先 crash 分析</li>
+</ol>
+</div>
+
+<div class="callout deep">
+<div class="label">恭喜你完成了全部 16 章</div>
+<p>到这里，你已经走完了从 "0 基础" 到 "专家级" 的完整路径。下一步：</p>
+<ul>
+<li>从 <a href="https://kernelnewbies.org">kernelnewbies.org</a> 找一个 first patch</li>
+<li>订阅 <a href="https://lwn.net">LWN.net</a> 跟踪内核演化</li>
+<li>找一个开源项目（Cilium / runc / fuse / KVM 等）深入贡献</li>
+<li>把本指南中的实验全部跑一遍，每个都写一篇博客</li>
+</ul>
+<p><b>"读万行代码，不如调一个 bug"</b> — 祝你 hacking 愉快！🐧</p>
+</div>
+
+
+<footer class="page-footer">
+    <p>← <a href="../14-启动流程深入/index.html">上一章</a> · <a href="../index.html">总目录</a></p>
+</footer>
+
+</main>
+</div>
+</body>
+</html>
diff --git a/README.md b/README.md
index 5681c9f..87c27a4 100644
--- a/README.md
+++ b/README.md
@@ -1,24 +1,55 @@
 # 🐧 如何学习 Linux 内核 — 系统性学习指南
 
 > **目标**：通过阅读经典内核版本（Linux 0.11 & Linux 2.6.0）的源码，
-> 从零开始理解现代 Linux 操作系统的核心框架与设计哲学。
+> 从零开始理解现代 Linux 操作系统的核心框架与设计哲学，
+> 并通过专家级章节深入 CFS、容器、eBPF、调试等现代主题。
+
+---
+
+## 🌐 推荐阅读方式：HTML 网站
+
+本指南已升级为 **完整的 HTML 网站**，包含：
+
+- 🎨 暗色主题 + 语法高亮的代码块
+- 📊 30+ 个内嵌 SVG 架构图（页表、CFS 红黑树、TCP 状态机、容器结构…）
+- 📑 16 章覆盖从入门到专家级的完整知识体系
+- 🧭 左侧固定侧边栏，章节间一键跳转
+
+**用浏览器直接打开 [`index.html`](./index.html) 即可开始学习。**
+
+> 仍保留每章的 README.md 作为快速文本参考。
 
 ---
 
 ## 📚 目录
 
+### 入门 & 准备
+| # | 章节 | 核心内容 |
+|---|------|----------|
+| [00](./00-学习路线/index.html) | **学习路线** | 阶段规划、时间表、推荐资源 |
+| [01](./01-经典版本选择/index.html) | **经典版本选择** | 0.11 / 2.6.0 / 5.x / 6.x 对比 |
+| [02](./02-环境搭建/index.html) | **环境搭建** | QEMU + GDB + clangd + ftrace + perf |
+
+### 核心子系统
+| # | 章节 | 核心内容 |
+|---|------|----------|
+| [03](./03-进程管理/index.html) | **进程管理** | task_struct、fork/CoW、上下文切换、状态机 |
+| [04](./04-内存管理/index.html) | **内存管理** | 多级页表、Buddy、Slab、NUMA、OOM、THP |
+| [05](./05-文件系统/index.html) | **文件系统** | VFS 四对象、Page Cache、ext4、io_uring |
+| [06](./06-系统调用/index.html) | **系统调用** | entry_SYSCALL_64、vDSO、seccomp、自定义 syscall |
+| [07](./07-设备驱动/index.html) | **设备驱动** | 完整 char driver、设备树、中断上下半部 |
+| [08](./08-网络子系统/index.html) | **网络子系统** | sk_buff、TCP 状态机、netfilter、XDP |
+| [09](./09-同步机制/index.html) | **同步机制** | atomic/spinlock/mutex/RCU/percpu/futex/lockdep |
+
+### 专家级深入
 | # | 章节 | 核心内容 |
 |---|------|----------|
-| [00](./00-学习路线/README.md) | **学习路线** | 阶段规划、时间表、推荐资源 |
-| [01](./01-经典版本选择/README.md) | **经典版本选择** | 0.11 / 2.6.0 / 4.x 版本对比 |
-| [02](./02-环境搭建/README.md) | **环境搭建** | QEMU + GDB 调试环境、源码编译 |
-| [03](./03-进程管理/README.md) | **进程管理** | 进程模型、调度器、上下文切换 |
-| [04](./04-内存管理/README.md) | **内存管理** | 虚拟内存、页表、Slab 分配器 |
-| [05](./05-文件系统/README.md) | **文件系统** | VFS、ext2、inode、dentry |
-| [06](./06-系统调用/README.md) | **系统调用** | 中断表、syscall 入口与返回 |
-| [07](./07-设备驱动/README.md) | **设备驱动** | 驱动模型、字符设备、块设备 |
-| [08](./08-网络子系统/README.md) | **网络子系统** | Socket、TCP/IP 协议栈 |
-| [09](./09-同步机制/README.md) | **同步机制** | 自旋锁、互斥量、RCU |
+| [10](./10-CFS调度器/index.html) | **CFS 调度器** | vruntime、红黑树、调度类、EAS、cgroup 调度 |
+| [11](./11-容器与命名空间/index.html) | **容器与命名空间** | 8 种 NS、cgroups v2、OverlayFS、75 行 mini-docker |
+| [12](./12-eBPF与可观测性/index.html) | **eBPF 与可观测性** | Verifier/JIT/Maps、kprobe/XDP/tc、bpftrace、Cilium |
+| [13](./13-中断与异常/index.html) | **中断与异常** | IDT、APIC、softirq/workqueue/threaded IRQ、IPI |
+| [14](./14-启动流程深入/index.html) | **启动流程** | UEFI → GRUB → initramfs → start_kernel → systemd |
+| [15](./15-内核调试与性能/index.html) | **内核调试与性能** | ftrace/perf/KASAN/lockdep/bpftrace/kdump/livepatch |
 
 ---
 

From fbcacead6f296200efbc4fe5a5521d1d0c509eee Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 20 May 2026 12:02:49 +0000
Subject: [PATCH 06/10] docs: add expert-level chapters 10/11/12
 (CFS/Container/eBPF)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 10-CFS调度器: vruntime公式、nice权重表、红黑树分析、pick_next_task_fair()、
  调度类优先级、EDF/SCHED_DEADLINE、负载均衡域、EAS、cgroup v2带宽控制、
  调试工具与常见性能问题 (1154行)

- 11-容器与命名空间: 8种namespace对比表、clone/unshare/setns代码示例、
  PID双重视图、veth pair网络搭建、User namespace UID映射、pivot_root vs chroot、
  cgroups v1/v2对比、OverlayFS copy-up/whiteout、seccomp BPF、capabilities、
  runc流程、mini-docker C实现 (1056行)

- 12-eBPF与可观测性: cBPF→eBPF历史、Verifier安全模型、JIT寄存器映射、
  Map类型表、挂载点全景、完整Hello World程序、XDP防DDoS示例、TCP追踪、
  CO-RE原理、bpftrace实战命令集、BCC工具集、Cilium/Falco/Pixie/Katran、
  限制与调试工作流 (1338行)

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

Co-authored-by: YYCB <23326150+YYCB@users.noreply.github.com>
---
 .../README.md"                                | 1154 ++++++++++++++
 .../README.md"                                | 1056 +++++++++++++
 .../README.md"                                | 1338 +++++++++++++++++
 3 files changed, 3548 insertions(+)
 create mode 100644 "10-CFS\350\260\203\345\272\246\345\231\250/README.md"
 create mode 100644 "11-\345\256\271\345\231\250\344\270\216\345\221\275\345\220\215\347\251\272\351\227\264/README.md"
 create mode 100644 "12-eBPF\344\270\216\345\217\257\350\247\202\346\265\213\346\200\247/README.md"

diff --git "a/10-CFS\350\260\203\345\272\246\345\231\250/README.md" "b/10-CFS\350\260\203\345\272\246\345\231\250/README.md"
new file mode 100644
index 0000000..5e44d50
--- /dev/null
+++ "b/10-CFS\350\260\203\345\272\246\345\231\250/README.md"
@@ -0,0 +1,1154 @@
+# 10 — CFS 完全公平调度器深入
+
+> **目标**：从数学公式到红黑树实现，彻底理解 Linux CFS 调度器的每一个设计决策。
+
+---
+
+## 目录
+
+1. [核心思想 — 理想多任务CPU与vruntime](#101-核心思想)
+2. [vruntime 计算公式](#102-vruntime-计算公式)
+3. [sched_entity 结构体](#103-sched_entity-结构体)
+4. [update_curr() 源码分析](#104-update_curr-源码分析)
+5. [红黑树的选择](#105-红黑树的选择)
+6. [cfs_rq 与 pick_next_task_fair()](#106-cfs_rq-与-pick_next_task_fair)
+7. [调度时间片计算](#107-调度时间片计算)
+8. [5个调度类及其优先级](#108-5个调度类及其优先级)
+9. [SCHED_DEADLINE — EDF算法](#109-sched_deadline--edf算法)
+10. [负载均衡](#1010-负载均衡)
+11. [EAS — 能效感知调度](#1011-eas--能效感知调度)
+12. [cgroup v2 层次调度](#1012-cgroup-v2-层次调度)
+13. [调试工具](#1013-调试工具)
+14. [常见性能问题](#1014-常见性能问题)
+
+---
+
+## 10.1 核心思想
+
+### 理想多任务CPU（Ideal Multi-Tasking CPU）
+
+CFS 的设计目标是模拟一个**理想的多任务处理器**：假设有 N 个进程同时运行，每个进程都以 `1/N` 的速度并行执行。在真实硬件上，CPU 一次只能运行一个任务，CFS 通过快速切换来近似这个理想模型。
+
+```
+理想模型（3个等权进程）:
+时间轴: ──────────────────────────────────────►
+进程A:  ▓░░░▓░░░▓░░░▓░░░▓  (每时刻获得 1/3 CPU)
+进程B:  ░▓░░░▓░░░▓░░░▓░░░  (每时刻获得 1/3 CPU)
+进程C:  ░░▓░░░▓░░░▓░░░▓░░  (每时刻获得 1/3 CPU)
+
+实际实现（时间片轮转近似）:
+时间轴: ──────────────────────────────────────►
+进程A:  ▓▓▓▓░░░░░░░░▓▓▓▓░  (集中执行后让出)
+进程B:  ░░░░▓▓▓▓░░░░░░░░▓  
+进程C:  ░░░░░░░░▓▓▓▓░░░░░  
+```
+
+### vruntime 的本质
+
+**虚拟运行时间（virtual runtime）** 是 CFS 的核心抽象。它不是真实流逝的时间，而是经过优先级权重**归一化**后的时间。CFS 始终选择 vruntime 最小的任务运行，从而确保所有任务在"虚拟时钟"上保持同步。
+
+![CFS 红黑树](../assets/diagrams/cfs-rbtree.svg)
+
+```art
+                    CFS 红黑树（按 vruntime 排序）
+                    
+           ┌──────────────────────────────────────┐
+           │           cfs_rq                     │
+           │   rb_root ──────────────────────┐    │
+           │   min_vruntime = 100ns           │    │
+           │   nr_running = 5                 │    │
+           └──────────────────────────────────┘    │
+                                                   │
+                              [110ns] B            ▼
+                             /         \
+                    [100ns] A           [130ns] D
+                   /         \         /         \
+               [95ns] ?   [105ns] C [125ns] E   NULL
+               
+               ↑
+           最左节点 = 下一个被调度的任务 (vruntime最小)
+           pick_next_entity() 直接取 rb_leftmost → O(1)
+```
+
+---
+
+## 10.2 vruntime 计算公式
+
+### 核心公式
+
+$$\text{delta\_vruntime} = \text{delta\_real} \times \frac{\text{NICE\_0\_LOAD}}{\text{weight}}$$
+
+其中：
+- `delta_real`：任务实际运行的物理时间（纳秒）
+- `NICE_0_LOAD = 1024`：nice 值为 0 时的标准权重
+- `weight`：当前任务的权重（由 nice 值决定）
+
+**含义**：
+- nice = 0 的任务：`delta_vruntime = delta_real × 1024/1024 = delta_real`（1:1）
+- nice = -5 的任务（高优先级）：weight > 1024，vruntime 增长**慢**，更容易被选中
+- nice = +5 的任务（低优先级）：weight < 1024，vruntime 增长**快**，被调度少
+
+### Nice 值到 Weight 的映射表
+
+每相邻 nice 值之间，CPU 份额比例约为 **1.25 : 1**。
+
+| Nice 值 | Weight    | 相邻比值 | 说明 |
+|---------|-----------|----------|------|
+| -20     | 88761     | —        | 最高优先级 |
+| -19     | 71755     | 1.237    | |
+| -18     | 56483     | 1.270    | |
+| -17     | 46273     | 1.220    | |
+| -16     | 36291     | 1.275    | |
+| -15     | 29154     | 1.245    | |
+| -14     | 23254     | 1.253    | |
+| -13     | 18705     | 1.243    | |
+| -12     | 14949     | 1.251    | |
+| -11     | 11916     | 1.255    | |
+| -10     | 9548      | 1.248    | |
+| -9      | 7620      | 1.253    | |
+| -8      | 6100      | 1.249    | |
+| -7      | 4904      | 1.244    | |
+| -6      | 3906      | 1.255    | |
+| -5      | 3121      | 1.252    | |
+| -4      | 2501      | 1.248    | |
+| -3      | 1991      | 1.256    | |
+| -2      | 1586      | 1.256    | |
+| -1      | 1277      | 1.242    | |
+| **0**   | **1024**  | 1.247    | **基准（NICE_0_LOAD）** |
+| +1      | 820       | 1.249    | |
+| +2      | 655       | 1.252    | |
+| +3      | 526       | 1.245    | |
+| +4      | 423       | 1.244    | |
+| +5      | 335       | 1.263    | |
+| +6      | 272       | 1.232    | |
+| +7      | 215       | 1.265    | |
+| +8      | 172       | 1.250    | |
+| +9      | 137       | 1.255    | |
+| +10     | 110       | 1.245    | |
+| +11     | 87        | 1.264    | |
+| +12     | 70        | 1.243    | |
+| +13     | 56        | 1.250    | |
+| +14     | 45        | 1.244    | |
+| +15     | 36        | 1.250    | |
+| +16     | 29        | 1.241    | |
+| +17     | 23        | 1.261    | |
+| +18     | 18        | 1.278    | |
+| +19     | 15        | 1.200    | 最低优先级 |
+
+> **内核源码位置**：`kernel/sched/core.c` — `const int sched_prio_to_weight[40]`
+
+```c
+/* kernel/sched/core.c */
+const int sched_prio_to_weight[40] = {
+ /* -20 */     88761,     71755,     56483,     46273,     36291,
+ /* -15 */     29154,     23254,     18705,     14949,     11916,
+ /* -10 */      9548,      7620,      6100,      4904,      3906,
+ /*  -5 */      3121,      2501,      1991,      1586,      1277,
+ /*   0 */      1024,       820,       655,       526,       423,
+ /*   5 */       335,       272,       215,       172,       137,
+ /*  10 */       110,        87,        70,        56,        45,
+ /*  15 */        36,        29,        23,        18,        15,
+};
+```
+
+### 乘法逆元优化
+
+为避免整数除法，内核预计算每个 weight 的**乘法逆元**（inv_weight），用移位乘法代替除法：
+
+```c
+/* kernel/sched/core.c */
+const u32 sched_prio_to_wmult[40] = {
+ /* -20 */     48388,     59856,     76040,     92818,    118348,
+ /* ...每个值 = 2^32 / weight（近似）... */
+ /*   0 */   4194304,   5237765,   6557202,   8166337,  10153587,
+};
+
+/* calc_delta_fair() 核心计算 */
+static u64 __calc_delta(u64 delta_exec, unsigned long weight,
+                        struct load_weight *lw)
+{
+    u64 fact = scale_load_down(weight);
+    u32 fact_hi = (u32)(fact >> 32);
+    int shift = 32;
+    /* 使用 inv_weight 避免除法: delta * weight * inv_lw_weight >> 32 */
+    ...
+}
+```
+
+---
+
+## 10.3 sched_entity 结构体
+
+每个可调度实体（进程或 cgroup）都嵌入一个 `sched_entity`：
+
+```c
+/* include/linux/sched.h */
+struct sched_entity {
+    /* 负载权重，包含 weight 和 inv_weight */
+    struct load_weight      load;
+
+    /* 红黑树节点，key = vruntime */
+    struct rb_node          run_node;
+
+    /* 调度统计信息链表节点 */
+    struct list_head        group_node;
+
+    /* 是否在运行队列中 */
+    unsigned int            on_rq;
+
+    /* 上次开始执行的时间戳（ns，单调时钟）*/
+    u64                     exec_start;
+
+    /* 总累计执行时间（物理时间，ns）*/
+    u64                     sum_exec_runtime;
+
+    /* 虚拟运行时间 — CFS 排序的关键字段 */
+    u64                     vruntime;
+
+    /* 上次统计时的 sum_exec_runtime（用于计算本次 delta）*/
+    u64                     prev_sum_exec_runtime;
+
+    /* 进程被抢占次数 */
+    u64                     nr_migrations;
+
+#ifdef CONFIG_SCHEDSTATS
+    struct sched_statistics statistics;
+#endif
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+    /* 所属调度组的深度（层次调度）*/
+    int                     depth;
+
+    /* 父调度实体（cgroup 层次）*/
+    struct sched_entity    *parent;
+
+    /* 所在的 CFS 运行队列 */
+    struct cfs_rq          *cfs_rq;
+
+    /* 代表该 cgroup 的 "my_q" */
+    struct cfs_rq          *my_q;
+
+    /* 平滑负载（PELT：Per-Entity Load Tracking）*/
+    unsigned long           runnable_weight;
+#endif
+
+#ifdef CONFIG_SMP
+    /* 用于 PELT 负载追踪的平均值结构 */
+    struct sched_avg        avg;
+#endif
+};
+```
+
+### task_struct 中的嵌入
+
+```c
+struct task_struct {
+    ...
+    /* CFS 调度实体 */
+    struct sched_entity     se;
+    /* 实时调度实体 */
+    struct sched_rt_entity  rt;
+    /* Deadline 调度实体 */
+    struct sched_dl_entity  dl;
+    ...
+    int                     prio;        /* 动态优先级 */
+    int                     static_prio; /* 静态优先级（nice转换而来）*/
+    int                     normal_prio; /* 归一化优先级 */
+    unsigned int            rt_priority; /* 实时优先级（1-99）*/
+    ...
+};
+```
+
+---
+
+## 10.4 update_curr() 源码分析
+
+`update_curr()` 在每次 tick、任务入队/出队时调用，负责更新 vruntime：
+
+```c
+/* kernel/sched/fair.c */
+static void update_curr(struct cfs_rq *cfs_rq)
+{
+    struct sched_entity *curr = cfs_rq->curr;
+    u64 now = rq_clock_task(rq_of(cfs_rq));  /* 获取当前时间戳 */
+    u64 delta_exec;
+
+    if (unlikely(!curr))
+        return;
+
+    /* 计算自上次更新以来的实际执行时间 */
+    delta_exec = now - curr->exec_start;
+    if (unlikely((s64)delta_exec <= 0))
+        return;
+
+    /* 更新 exec_start 为当前时间 */
+    curr->exec_start = now;
+
+    /* 更新调度统计 */
+    schedstat_set(curr->statistics.exec_max,
+                  max(delta_exec, curr->statistics.exec_max));
+
+    /* 累加真实执行时间 */
+    curr->sum_exec_runtime += delta_exec;
+
+#ifdef CONFIG_SCHEDSTATS
+    schedstat_add(cfs_rq->exec_clock, delta_exec);
+#endif
+
+    /* 关键：更新 vruntime
+     * calc_delta_fair() 实现公式:
+     * delta_vruntime = delta_exec * NICE_0_LOAD / weight
+     */
+    curr->vruntime += calc_delta_fair(delta_exec, curr);
+
+    /* 更新运行队列的 min_vruntime
+     * min_vruntime 只增不减，防止新进程入队时 vruntime 过小导致独占 CPU
+     */
+    update_min_vruntime(cfs_rq);
+
+    /* 如果是进程组调度，递归更新父实体 */
+    if (entity_is_task(curr)) {
+        struct task_struct *curtask = task_of(curr);
+        trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime);
+        cgroup_account_cputime(curtask, delta_exec);
+        account_group_exec_runtime(curtask, delta_exec);
+    }
+
+    account_cfs_rq_runtime(cfs_rq, delta_exec);
+}
+```
+
+### update_min_vruntime() 的关键作用
+
+```c
+static void update_min_vruntime(struct cfs_rq *cfs_rq)
+{
+    struct sched_entity *curr = cfs_rq->curr;
+    struct rb_node *leftmost = rb_first_cached(&cfs_rq->tasks_timeline);
+
+    u64 vruntime = cfs_rq->min_vruntime;
+
+    if (curr) {
+        if (curr->on_rq)
+            vruntime = curr->vruntime;
+        else
+            curr = NULL;
+    }
+
+    if (leftmost) {
+        struct sched_entity *se = __node_2_se(leftmost);
+        if (!curr)
+            vruntime = se->vruntime;
+        else
+            vruntime = min_vruntime(vruntime, se->vruntime);
+    }
+
+    /* 保证 min_vruntime 单调递增 */
+    cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime);
+}
+```
+
+---
+
+## 10.5 红黑树的选择
+
+### 数据结构复杂度对比
+
+| 操作         | 链表    | 二叉堆   | 红黑树     | 跳表       |
+|-------------|---------|----------|-----------|-----------|
+| 插入         | O(1)    | O(log n) | O(log n)  | O(log n)  |
+| 删除         | O(1)*   | O(log n) | O(log n)  | O(log n)  |
+| 查找最小值   | O(n)    | O(1)     | O(log n)† | O(log n)  |
+| 任意查找     | O(n)    | O(n)     | O(log n)  | O(log n)  |
+| 内存开销     | 最小    | 紧凑     | 中等      | 较高      |
+| 缓存友好性   | 差      | 好（数组）| 中等      | 差        |
+
+†：内核通过 `rb_leftmost` 缓存最小节点指针，实际为 O(1)。
+
+### 为什么选红黑树
+
+```art
+红黑树特性与调度需求的匹配：
+
+需求1: 快速找到 vruntime 最小的任务
+  → rb_leftmost 缓存左端节点 → O(1) ✓
+
+需求2: 任务入队（唤醒、fork）
+  → O(log n) 插入 ✓（n 通常很小，几百以内）
+
+需求3: 任务出队（调度离开）
+  → O(log n) 删除 ✓
+
+需求4: 树始终平衡（防止退化为链表）
+  → 红黑树自平衡保证最坏 O(2 log n) ✓
+
+需求5: 不需要随机访问第 k 个元素
+  → 不需要完美排名，红黑树满足 ✓
+```
+
+### 内核红黑树 API
+
+```c
+/* include/linux/rbtree.h — 泛型红黑树 */
+
+/* 插入（需要调用者实现比较逻辑）*/
+void rb_insert_color(struct rb_node *node, struct rb_root *root);
+
+/* 删除 */
+void rb_erase(struct rb_node *node, struct rb_root *root);
+
+/* 遍历 */
+struct rb_node *rb_first(const struct rb_root *root);
+struct rb_node *rb_next(const struct rb_node *node);
+
+/* CFS 使用带缓存的版本 */
+struct rb_root_cached {
+    struct rb_root rb_root;
+    struct rb_node *rb_leftmost;  /* 缓存最左（最小）节点 */
+};
+
+void rb_insert_color_cached(struct rb_node *node,
+                             struct rb_root_cached *root,
+                             bool leftmost);
+```
+
+---
+
+## 10.6 cfs_rq 与 pick_next_task_fair()
+
+### cfs_rq 结构体关键字段
+
+```c
+/* kernel/sched/sched.h */
+struct cfs_rq {
+    /* 队列总权重（所有任务 load.weight 之和）*/
+    struct load_weight      load;
+
+    /* 可运行任务数 + h_nr_running（含下层 cgroup）*/
+    unsigned int            nr_running;
+    unsigned int            h_nr_running;
+
+    /* 时钟 */
+    u64                     exec_clock;
+
+    /* 当前最小 vruntime（单调递增）*/
+    u64                     min_vruntime;
+
+    /* 红黑树根（带 leftmost 缓存）*/
+    struct rb_root_cached   tasks_timeline;
+
+    /* 当前正在运行的实体 */
+    struct sched_entity    *curr;
+
+    /* 下一个推荐的实体（used by wakeup preemption）*/
+    struct sched_entity    *next;
+
+    /* 上次被抢占的实体（skip buddy）*/
+    struct sched_entity    *skip;
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+    /* 所属 rq 和父 sched_entity */
+    struct rq              *rq;
+    struct task_group      *tg;
+    struct sched_entity    *tg_load_avg_contrib;
+#endif
+
+    /* PELT 负载追踪 */
+    struct sched_avg        avg;
+    u64                     runnable_load_avg;
+    u64                     load_avg;
+};
+```
+
+### pick_next_task_fair() — O(1) 选择下一个任务
+
+```c
+/* kernel/sched/fair.c（简化版）*/
+static struct task_struct *
+pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+{
+    struct cfs_rq *cfs_rq = &rq->cfs;
+    struct sched_entity *se;
+    struct task_struct *p;
+    int new_tasks;
+
+again:
+    if (!sched_fair_runnable(rq))
+        goto idle;  /* 没有可运行任务 */
+
+    /* 处理前一个任务 */
+    put_prev_task(rq, prev);
+
+    /* 从最高层 cfs_rq 开始向下选择 */
+    do {
+        /* 关键：pick_next_entity() 直接取 rb_leftmost → O(1) */
+        se = pick_next_entity(cfs_rq, NULL);
+        set_next_entity(cfs_rq, se);
+        /* 如果是 group entity，进入其 my_q（向下遍历层次）*/
+        cfs_rq = group_cfs_rq(se);
+    } while (cfs_rq);
+
+    p = task_of(se);
+    ...
+    return p;
+
+idle:
+    ...
+}
+
+/* 选择红黑树最左节点（vruntime 最小）*/
+static struct sched_entity *
+pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
+{
+    /* rb_leftmost 已缓存，直接取 O(1) */
+    struct sched_entity *left = __pick_first_entity(cfs_rq);
+    struct sched_entity *se;
+
+    /* 处理 skip buddy（被显式跳过的实体）*/
+    if (!left || (curr && entity_before(curr, left)))
+        left = curr;
+
+    se = left;
+
+    /* wakeup buddy 抢占逻辑 */
+    if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1)
+        se = cfs_rq->next;
+
+    /* 清理 skip/next 指针 */
+    if (se == cfs_rq->skip) {
+        ...
+    }
+    return se;
+}
+```
+
+---
+
+## 10.7 调度时间片计算
+
+### 目标延迟（sched_latency_ns）
+
+内核保证每个可运行任务在**目标延迟**内至少运行一次：
+
+```
+默认 sched_latency_ns = 6ms（内核编译默认值，实际可通过 sysctl 调整）
+
+每个任务的时间片 = sched_latency_ns × (task_weight / cfs_rq_total_weight)
+```
+
+### 时间片下界（sched_min_granularity_ns）
+
+当任务数量很多时，每个任务的时间片可能过小，导致切换开销过大。内核设置最小粒度：
+
+```c
+/* kernel/sched/fair.c */
+static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+    unsigned int nr_running = cfs_rq->nr_running;
+    struct load_weight *load;
+    struct load_weight lw;
+    u64 slice;
+
+    if (sched_feat(ALT_PERIOD))
+        nr_running = rq_of(cfs_rq)->cfs.h_nr_running;
+
+    /* 计算总调度周期 */
+    slice = __sched_period(nr_running + !se->on_rq);
+
+    /* 按权重比例分配时间片 */
+    load = &cfs_rq->load;
+    ...
+    slice = __calc_delta(slice, se->load.weight, load);
+
+    /* 保证最小粒度 */
+    if (sched_feat(BASE_SLICE))
+        slice = max_t(u64, slice, (u64)sysctl_sched_min_granularity);
+
+    return slice;
+}
+```
+
+### 关键 sysctl 参数
+
+```bash
+# 查看当前调度参数
+cat /proc/sys/kernel/sched_latency_ns        # 目标延迟（默认 6000000 ns = 6ms）
+cat /proc/sys/kernel/sched_min_granularity_ns # 最小时间片（默认 750000 ns = 0.75ms）
+cat /proc/sys/kernel/sched_wakeup_granularity_ns  # 唤醒抢占粒度
+cat /proc/sys/kernel/sched_migration_cost_ns      # 迁移代价阈值
+
+# 实时系统调优（降低延迟）
+sysctl kernel.sched_latency_ns=2000000
+sysctl kernel.sched_min_granularity_ns=500000
+```
+
+```art
+时间片计算示例（3个进程，nice=0/0/+10）：
+
+sched_latency_ns = 6ms
+进程权重: A=1024, B=1024, C=110
+总权重 = 2158
+
+A 的时间片 = 6ms × 1024/2158 ≈ 2.85ms
+B 的时间片 = 6ms × 1024/2158 ≈ 2.85ms
+C 的时间片 = 6ms × 110/2158  ≈ 0.31ms（但保底 0.75ms）
+
+实际: A=2.85ms, B=2.85ms, C=0.75ms (min granularity 保底)
+```
+
+---
+
+## 10.8 5个调度类及其优先级
+
+Linux 内核调度器采用模块化设计，共有 5 个调度类，按优先级从高到低：
+
+```art
+调度类优先级链（高 → 低）:
+
+  stop_sched_class      优先级最高
+       │  (per-CPU 停止线程，迁移/hotplug)
+       ▼
+  dl_sched_class
+       │  (SCHED_DEADLINE — 硬实时)
+       ▼
+  rt_sched_class
+       │  (SCHED_FIFO / SCHED_RR — 软实时)
+       ▼
+  fair_sched_class      ← CFS 在此
+       │  (SCHED_NORMAL / SCHED_BATCH / SCHED_IDLE)
+       ▼
+  idle_sched_class      优先级最低
+          (swapper/N 线程，CPU空闲时运行)
+```
+
+### 5个调度类详细对比
+
+| 调度类     | 调度策略                    | 优先级范围        | 抢占性 | 时间片     | 典型用途                    |
+|-----------|----------------------------|-----------------|--------|-----------|---------------------------|
+| stop      | 内部专用                    | 最高（特殊）     | 是     | 无限制    | CPU 热插拔、任务迁移         |
+| deadline  | SCHED_DEADLINE              | 无 nice         | 是     | runtime/period | 硬实时任务（视频编码）      |
+| realtime  | SCHED_FIFO<br>SCHED_RR     | 1–99（RT优先级） | 是（被更高RT抢占）| FIFO=无限<br>RR=100ms | 音频、工业控制       |
+| fair(CFS) | SCHED_NORMAL<br>SCHED_BATCH<br>SCHED_IDLE | nice -20~+19 | 是     | 见§10.7   | 普通进程（绝大多数）         |
+| idle      | 内部专用                    | 最低            | 否     | —         | 空闲循环（HLT/mwait）       |
+
+### 调度类切换示例
+
+```bash
+# 查看进程调度策略
+chrt -p $$
+
+# 设置为 SCHED_FIFO 优先级 50（需要 CAP_SYS_NICE 或 root）
+chrt -f -p 50 <pid>
+
+# 设置为 SCHED_RR 优先级 10
+chrt -r -p 10 <pid>
+
+# 设置为 SCHED_DEADLINE
+chrt -d --sched-runtime 5000000 --sched-deadline 10000000 \
+        --sched-period 10000000 -p 0 <pid>
+
+# 恢复为普通调度
+chrt -o -p 0 <pid>
+nice -n 10 <command>
+renice -n 5 -p <pid>
+```
+
+---
+
+## 10.9 SCHED_DEADLINE — EDF算法
+
+### EDF（Earliest Deadline First）理论
+
+SCHED_DEADLINE 基于实时调度理论中的 EDF 算法，并结合 CBS（Constant Bandwidth Server）保证隔离性。
+
+**三元组参数**：
+- `runtime`（执行时间）：任务在每个周期内最多运行多少纳秒
+- `deadline`（截止时间）：任务必须在周期开始后多少纳秒内完成
+- `period`（周期）：任务的重复周期
+
+**可调度性检验**（所有 DL 任务在单核上可调度的充要条件）：
+
+$$\sum_{i} \frac{\text{runtime}_i}{\text{period}_i} \leq 1$$
+
+```art
+SCHED_DEADLINE 时间轴示例:
+period=10ms, deadline=8ms, runtime=3ms
+
+时间(ms): 0    2    4    6    8    10   12   14   16   18   20
+          |    |    |    |    |    |    |    |    |    |    |
+任务执行:  [███]░░░░░░░░░░     [███]░░░░░░░░░░     [███]
+          ◄─── runtime=3ms       ◄─── 下一周期
+          ◄──────── deadline=8ms ──────►
+          ◄──────────── period=10ms ──────────────►
+```
+
+```c
+/* 设置 SCHED_DEADLINE 属性 */
+struct sched_attr attr = {
+    .size           = sizeof(attr),
+    .sched_policy   = SCHED_DEADLINE,
+    .sched_runtime  = 5 * 1000 * 1000,   /* 5ms runtime */
+    .sched_deadline = 10 * 1000 * 1000,  /* 10ms deadline */
+    .sched_period   = 10 * 1000 * 1000,  /* 10ms period */
+};
+syscall(SYS_sched_setattr, getpid(), &attr, 0);
+```
+
+---
+
+## 10.10 负载均衡
+
+### 调度域层次结构
+
+```art
+NUMA 系统的调度域层次:
+
+Node 0                    Node 1
+┌─────────────────────┐   ┌─────────────────────┐
+│ Socket 0            │   │ Socket 1            │
+│ ┌───────┐ ┌───────┐ │   │ ┌───────┐ ┌───────┐ │
+│ │Core 0 │ │Core 1 │ │   │ │Core 2 │ │Core 3 │ │
+│ │[HT0]  │ │[HT0]  │ │   │ │[HT0]  │ │[HT0]  │ │
+│ │[HT1]  │ │[HT1]  │ │   │ │[HT1]  │ │[HT1]  │ │
+│ └───────┘ └───────┘ │   └─────────────────────┘
+└─────────────────────┘   
+         
+调度域层次（由小到大）:
+  SD_SMT   : 同一物理核的超线程
+  SD_MC    : 同一 Socket 的多核
+  SD_NUMA  : 跨 NUMA 节点
+```
+
+### load_balance() 核心流程
+
+```c
+/* kernel/sched/fair.c */
+static int load_balance(int this_cpu, struct rq *this_rq,
+                        struct sched_domain *sd,
+                        enum cpu_idle_type idle,
+                        int *continue_balancing)
+{
+    int ld_moved = 0;
+    struct sched_group *group;
+    struct rq *busiest;
+
+    /* 1. 找到最繁忙的调度组 */
+    group = find_busiest_group(&env);
+    if (!group) goto out_balanced;
+
+    /* 2. 找到该组中最繁忙的 CPU */
+    busiest = find_busiest_queue(&env, group);
+    if (!busiest) goto out_balanced;
+
+    /* 3. 计算需要迁移多少任务 */
+    env.src_cpu = busiest->cpu;
+    env.src_rq  = busiest;
+
+    /* 4. 实际迁移任务 */
+    ld_moved = move_tasks(&env);
+
+    /* 5. 如果仍不平衡，触发 active 迁移（通过 IPI）*/
+    if (!ld_moved) {
+        ...
+        active_load_balance_cpu_stop(busiest, ...);
+    }
+    ...
+}
+```
+
+### newidle_balance — 空闲时主动拉取
+
+```c
+/* CPU 变为空闲时主动从其他 CPU 拉取任务 */
+static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
+{
+    /* 快速路径：如果上次均衡很近，跳过 */
+    if (this_rq->avg_idle < sysctl_sched_migration_cost) {
+        ...
+        return 0;
+    }
+
+    /* 遍历调度域从最近的开始 */
+    for_each_domain(this_cpu, sd) {
+        if (sd->flags & SD_BALANCE_NEWIDLE) {
+            pulled_task = load_balance(this_cpu, this_rq, sd,
+                                       CPU_NEWLY_IDLE, ...);
+            if (pulled_task)
+                break;
+        }
+    }
+    return pulled_task;
+}
+```
+
+---
+
+## 10.11 EAS — 能效感知调度
+
+### big.LITTLE 架构下的挑战
+
+```art
+Arm big.LITTLE 典型拓扑（如 Cortex-A55 + A78）：
+
+LITTLE 核（节能）          big 核（高性能）
+┌──────────────────┐      ┌──────────────────┐
+│ A55 × 4          │      │ A78 × 4          │
+│ 频率: 0.6-1.8GHz │      │ 频率: 1.0-3.0GHz │
+│ 容量: 100-380     │      │ 容量: 170-1024   │
+│ 功耗: 低          │      │ 功耗: 高         │
+└──────────────────┘      └──────────────────┘
+
+EAS 目标：在满足性能要求的前提下，最小化系统能耗
+```
+
+### cpu_capacity 与任务放置
+
+```c
+/* kernel/sched/fair.c — EAS 任务放置 */
+static int find_energy_efficient_cpu(struct task_struct *p,
+                                     int prev_cpu, int sync)
+{
+    unsigned long prev_energy = ULONG_MAX, best_energy = ULONG_MAX;
+    int best_cpu = -1;
+
+    /* 遍历性能域（pd）— 通常是 LITTLE 和 big 两组 */
+    for_each_perf_domain(pd) {
+        unsigned long cur_energy;
+
+        /* 检查该域的 CPU 是否满足任务的性能需求 */
+        if (!cpumask_intersects(perf_domain_span(pd), &p->cpus_mask))
+            continue;
+
+        /* 计算将任务放在此域的能耗 */
+        cur_energy = compute_energy(p, pd, &cpus_to_visit);
+
+        if (cur_energy < best_energy) {
+            best_energy = cur_energy;
+            best_cpu = ...;
+        }
+    }
+
+    /* 只有节能收益超过阈值才迁移（避免频繁迁移）*/
+    if (prev_energy - best_energy > prev_energy >> 4)
+        return best_cpu;
+    return prev_cpu;
+}
+```
+
+---
+
+## 10.12 cgroup v2 层次调度
+
+### 两级 cfs_rq 结构
+
+```art
+cgroup v2 层次调度示例:
+
+                    根 cfs_rq (CPU 0)
+                   /                \
+          group_A/cfs_rq          group_B/cfs_rq
+          cpu.weight=100          cpu.weight=200
+          (获得 33% CPU)           (获得 67% CPU)
+          /         \              /            \
+      task_1      task_2       task_3          task_4
+      nice=0      nice=5       nice=0          nice=-5
+```
+
+### cgroup v2 CPU 控制接口
+
+```bash
+# 创建 cgroup 层次
+mkdir /sys/fs/cgroup/myapp
+
+# 设置 CPU 权重（默认 100，范围 1-10000）
+echo 200 > /sys/fs/cgroup/myapp/cpu.weight
+
+# 设置 CPU 带宽限制（每 100ms 最多用 50ms）
+echo "50000 100000" > /sys/fs/cgroup/myapp/cpu.max
+
+# 将进程加入 cgroup
+echo $$ > /sys/fs/cgroup/myapp/cgroup.procs
+
+# 查看 CPU 统计
+cat /sys/fs/cgroup/myapp/cpu.stat
+# usage_usec 1234567   ← 总 CPU 使用时间（微秒）
+# user_usec  987654    ← 用户态时间
+# system_usec 246913   ← 内核态时间
+# nr_periods  1000     ← 带宽控制周期数
+# nr_throttled 50      ← 被限速次数
+# throttled_usec 250000 ← 被限速总时长
+```
+
+### cpu.max 带宽控制实现（CFS Bandwidth）
+
+```c
+/* kernel/sched/fair.c — CFS 带宽控制核心 */
+struct cfs_bandwidth {
+    raw_spinlock_t  lock;
+    ktime_t         period;       /* 统计周期 */
+    u64             quota;        /* 每周期配额（ns）*/
+    u64             runtime;      /* 剩余可用时间 */
+    s64             hierarchical_quota;
+    u8              idle;
+    struct hrtimer  period_timer; /* 每周期重填配额 */
+    struct hrtimer  slack_timer;  /* 归还未用完配额 */
+    struct list_head throttled_cfs_rq;
+    int             nr_periods;
+    int             nr_throttled;
+    u64             throttled_time;
+};
+```
+
+---
+
+## 10.13 调试工具
+
+### /proc/sched_debug
+
+```bash
+cat /proc/sched_debug
+
+# 输出示例（部分）:
+# Sched Debug Version: v0.11, 5.15.0
+# ktime                                   : 12345678901
+# sched_clk                               : 12345679000
+# cpu_clk                                 : 12345679100
+#
+# nr_running                              : 3
+# nr_switches                             : 1234567
+# nr_load_updates                         : 890123
+# nr_uninterruptible                      : 0
+#
+# cfs_rq[0]:/
+#   .exec_clock                           : 12345.678901
+#   .MIN_vruntime                         : 0.000001
+#   .min_vruntime                         : 12345.678
+#   .max_vruntime                         : 12345.700
+#   .spread                               : 0.022
+#   .nr_running                           : 3
+#   .load                                 : 3072          (3 × 1024)
+```
+
+### perf sched — 调度事件分析
+
+```bash
+# 记录 5 秒的调度事件
+perf sched record -a sleep 5
+
+# 分析延迟
+perf sched latency
+
+# 输出示例:
+# -------------------------------------------------
+#  Task                  |   Runtime ms  | Switches |
+# -------------------------------------------------
+#  swapper/0:0           |    4998.000   |      500 |
+#  bash:12345            |       2.345   |       12 |
+#  kworker/0:1:234       |       0.123   |        5 |
+
+# 调度时间线（可视化）
+perf sched timehist
+
+# 详细输出每次切换
+perf sched script | head -50
+```
+
+### ftrace 追踪 CFS 调度
+
+```bash
+# 方法1: 使用 trace-cmd
+trace-cmd record -e sched_switch -e sched_wakeup -p function_graph \
+    -g pick_next_task_fair sleep 1
+
+# 方法2: 直接使用 tracefs
+echo 0 > /sys/kernel/debug/tracing/tracing_on
+echo "pick_next_task_fair" > /sys/kernel/debug/tracing/set_ftrace_filter
+echo function > /sys/kernel/debug/tracing/current_tracer
+echo 1 > /sys/kernel/debug/tracing/tracing_on
+sleep 1
+echo 0 > /sys/kernel/debug/tracing/tracing_on
+cat /sys/kernel/debug/tracing/trace | head -30
+
+# 方法3: sched_switch tracepoint（推荐）
+echo 1 > /sys/kernel/debug/tracing/events/sched/sched_switch/enable
+cat /sys/kernel/debug/tracing/trace_pipe
+# 输出: prev_comm=bash prev_pid=1234 prev_prio=120 prev_state=S
+#       next_comm=vim  next_pid=5678 next_prio=120
+```
+
+### schedstats — 详细调度统计
+
+```bash
+# 启用 schedstats（需要内核编译选项 CONFIG_SCHEDSTATS）
+echo 1 > /proc/sys/kernel/sched_schedstats
+
+# 查看每个 CPU 的调度统计
+cat /proc/schedstat
+# 版本号
+# cpu0 0 0 0 0 0 0 25862403 3456789 1234
+#  字段: yld_count(让出次数) sched_count(调度次数) sched_goidle
+#        ttwu_count(唤醒次数) ttwu_local(本地唤醒次数)
+#        run_delay(等待运行的总延迟ns) pcount(进程数)
+
+# 查看单个进程的调度统计（需要 /proc/PID/schedstat）
+cat /proc/$$/schedstat
+# 字段: sum_exec_runtime(总运行ns) run_delay(总等待ns) pcount
+```
+
+### bpftrace 快速调度分析
+
+```bash
+# 统计各进程的调度延迟（等待运行的时间）
+bpftrace -e '
+tracepoint:sched:sched_wakeup { @ts[args->pid] = nsecs; }
+tracepoint:sched:sched_switch {
+    if (@ts[args->next_pid]) {
+        @latency_us = hist((nsecs - @ts[args->next_pid]) / 1000);
+        delete(@ts[args->next_pid]);
+    }
+}'
+
+# 统计各进程运行时间（top 10）
+bpftrace -e '
+tracepoint:sched:sched_switch {
+    @on_cpu[args->prev_comm] = sum(args->prev_pid ? 1 : 0);
+} interval:s:5 { print(@on_cpu, 10); clear(@on_cpu); }'
+```
+
+---
+
+## 10.14 常见性能问题
+
+### 问题1：调度延迟 Spike
+
+**症状**：应用程序偶发性延迟高，P99 延迟远高于平均值。
+
+**排查步骤**：
+```bash
+# 1. 使用 perf 查看延迟分布
+perf sched latency --sort max | head -20
+
+# 2. 检查是否有高优先级 RT 任务抢占
+cat /proc/sched_debug | grep -E "rt_rq|dl_rq"
+
+# 3. 检查 CPU throttling（cgroup 带宽限制）
+cat /sys/fs/cgroup/*/cpu.stat | grep throttled
+
+# 4. 使用 bpftrace 精确定位
+bpftrace -e '
+tracepoint:sched:sched_switch /args->prev_state == 0/ {
+    @[args->prev_comm] = hist(nsecs - @start[args->prev_pid]);
+}
+tracepoint:sched:sched_switch {
+    @start[args->next_pid] = nsecs;
+}'
+```
+
+**常见原因与解决**：
+
+| 原因 | 解决方案 |
+|------|----------|
+| cgroup cpu.max 限速过紧 | 调大 quota 或关闭 bandwidth control |
+| RT 任务长期占用 CPU | 设置 `sysctl kernel.sched_rt_runtime_us` 限制 RT 带宽 |
+| 大量内核中断 | `irqbalance` 优化中断分配，或绑定中断到专用 CPU |
+| NUMA 内存访问延迟 | 绑定进程到单 NUMA 节点 `numactl --cpunodebind=0` |
+
+### 问题2：跨 NUMA 迁移导致性能下降
+
+```bash
+# 检测 NUMA 命中率
+numastat -p <pid>
+# 或
+cat /sys/fs/cgroup/<cgroup>/memory.numa_stat
+
+# 绑定进程到 NUMA 节点
+numactl --cpunodebind=0 --membind=0 ./myapp
+
+# 设置 NUMA 平衡策略
+echo 0 > /proc/sys/kernel/numa_balancing  # 关闭自动 NUMA 迁移
+
+# 查看 NUMA 迁移统计
+cat /proc/vmstat | grep numa
+# numa_hit: 本地访问命中
+# numa_miss: 远端访问（需要优化）
+```
+
+### 问题3：实时任务饿死 CFS 进程
+
+**现象**：`SCHED_FIFO` 任务进入死循环，导致所有普通进程（CFS）无法运行。
+
+```bash
+# 内核防护机制：sched_rt_runtime_us
+# 默认：每秒 RT 任务最多占用 950ms（留 50ms 给 CFS）
+cat /proc/sys/kernel/sched_rt_period_us    # 1000000（1秒）
+cat /proc/sys/kernel/sched_rt_runtime_us   # 950000（950ms）
+
+# 设置为 -1 则禁用此限制（危险！仅适合受控实时系统）
+# echo -1 > /proc/sys/kernel/sched_rt_runtime_us
+
+# 检查是否有 RT 任务在跑
+ps -eo pid,comm,cls,pri --sort=-pri | head -20
+# CLS 字段：TS=normal, FF=SCHED_FIFO, RR=SCHED_RR, DL=DEADLINE
+```
+
+### 问题4：vruntime 不均衡导致新进程独占
+
+**原因**：新创建/唤醒的进程 vruntime 为 0，远小于其他进程，会被连续调度。
+
+**内核解决方案**：
+```c
+/* 新进程入队时，vruntime 从 min_vruntime 开始 */
+static void place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
+                         int initial)
+{
+    u64 vruntime = cfs_rq->min_vruntime;
+
+    if (sched_feat(START_DEBIT) && initial) {
+        /* 新进程额外惩罚：加上一个时间片，防止抢占现有任务 */
+        vruntime += sched_vslice(cfs_rq, se);
+    }
+
+    /* 保证 vruntime 不小于 min_vruntime（唤醒睡眠任务）*/
+    if (!initial) {
+        unsigned long thresh = sysctl_sched_latency;
+        if (sched_feat(GENTLE_FAIR_SLEEPERS))
+            thresh >>= 1;  /* 睡眠奖励最多半个延迟窗口 */
+        vruntime -= thresh;
+    }
+
+    se->vruntime = max_vruntime(se->vruntime, vruntime);
+}
+```
+
+---
+
+## 总结
+
+```art
+CFS 调度器全景图:
+
+  用户空间               内核调度器              硬件
+  ─────────            ───────────────         ──────
+  nice/renice ──────► sched_prio_to_weight
+  chrt        ──────► 调度类选择
+  cgroup      ──────► cfs_rq 层次树
+  
+  调度触发点:
+  timer tick  ──────► update_curr()  ──────► vruntime += delta
+  sys_sched   ──────► schedule()     ──────► pick_next_task_fair()
+  wakeup      ──────► enqueue_entity()──────► rb_insert (红黑树)
+  
+  核心数据流:
+  delta_real × (NICE_0_LOAD/weight) = delta_vruntime
+                                           │
+                                           ▼
+                                    红黑树（vruntime排序）
+                                           │
+                                     rb_leftmost ──► 下一个运行任务
+```
+
+**参考资料**：
+- `kernel/sched/fair.c` — CFS 主实现
+- `kernel/sched/sched.h` — 核心数据结构
+- `Documentation/scheduler/sched-design-CFS.rst` — 官方设计文档
+- Ingo Molnár 的原始 CFS 补丁说明（2007年）
diff --git "a/11-\345\256\271\345\231\250\344\270\216\345\221\275\345\220\215\347\251\272\351\227\264/README.md" "b/11-\345\256\271\345\231\250\344\270\216\345\221\275\345\220\215\347\251\272\351\227\264/README.md"
new file mode 100644
index 0000000..6bc2610
--- /dev/null
+++ "b/11-\345\256\271\345\231\250\344\270\216\345\221\275\345\220\215\347\251\272\351\227\264/README.md"
@@ -0,0 +1,1056 @@
+# 11 — 容器与命名空间
+
+> **目标**：从内核 namespace/cgroup 原语出发，彻底理解容器技术的底层实现，并动手构建一个 mini-docker。
+
+---
+
+## 目录
+
+1. [容器不是虚拟机](#111-容器不是虚拟机)
+2. [8种 Linux Namespace](#112-8种-linux-namespace)
+3. [clone/unshare/setns 系统调用](#113-cloneunsharesetns-系统调用)
+4. [PID 命名空间深入](#114-pid-命名空间深入)
+5. [Network 命名空间](#115-network-命名空间)
+6. [User 命名空间](#116-user-命名空间)
+7. [Mount 命名空间与 pivot_root](#117-mount-命名空间与-pivot_root)
+8. [cgroups v1 vs v2](#118-cgroups-v1-vs-v2)
+9. [cgroup v2 内存控制](#119-cgroup-v2-内存控制)
+10. [OverlayFS 原理](#1110-overlayfs-原理)
+11. [seccomp BPF 系统调用过滤](#1111-seccomp-bpf)
+12. [Linux Capabilities](#1112-linux-capabilities)
+13. [容器运行时内部：runc 流程](#1113-容器运行时内部runc-流程)
+14. [mini-docker 实现](#1114-mini-docker-实现)
+
+---
+
+## 11.1 容器不是虚拟机
+
+### 核心区别
+
+![容器内部结构](../assets/diagrams/container-anatomy.svg)
+
+```art
+虚拟机 vs 容器隔离层次对比:
+
+┌─────────────────────────────────────────────────────────────┐
+│                        物理硬件                              │
+├─────────────────────────────────────────────────────────────┤
+│                                                             │
+│  虚拟机模式:                    容器模式:                    │
+│  ┌──────────┐ ┌──────────┐     ┌──────┐ ┌──────┐ ┌──────┐  │
+│  │  Guest   │ │  Guest   │     │容器A │ │容器B │ │容器C │  │
+│  │  OS      │ │  OS      │     │进程  │ │进程  │ │进程  │  │
+│  ├──────────┤ ├──────────┤     ├──────┴─┴──────┴─┴──────┤  │
+│  │ VMKernel │ │ VMKernel │     │     共享 Host Kernel     │  │
+│  ├──────────┴─┴──────────┤     │   (namespace隔离)        │  │
+│  │    Hypervisor          │     └──────────────────────────┤  │
+│  ├───────────────────────┤     │       Host OS            │  │
+│  │       Host OS         │     └──────────────────────────┘  │
+└─────────────────────────────────────────────────────────────┘
+```
+
+### 详细对比表
+
+| 维度             | 虚拟机（KVM/VMware）                    | 容器（Docker/containerd）              |
+|----------------|----------------------------------------|--------------------------------------|
+| 隔离级别         | 硬件级别（完全隔离）                    | 内核级别（namespace隔离）             |
+| 内核共享         | 每个 VM 独立内核                       | 共享宿主机内核                        |
+| 启动时间         | 秒级~分钟                              | 毫秒级~秒级                           |
+| 内存开销         | GiB（含 Guest OS）                    | MiB（仅进程开销）                     |
+| 存储开销         | 完整 OS 镜像（几GiB）                  | 分层镜像（增量）                      |
+| 网络隔离         | 虚拟网卡/虚拟交换机（完全隔离）         | veth pair/iptables/eBPF              |
+| 安全边界         | 强（CVE 逃逸极少）                     | 弱（内核漏洞可能逃逸）               |
+| 文件系统         | 独立虚拟磁盘                           | OverlayFS 分层                       |
+| 性能损耗         | 5-15%（CPU/内存）                      | < 1%（接近原生）                     |
+| 混合部署         | 不同 OS（Windows + Linux）             | 只能同 Kernel ABI                    |
+| 典型用途         | 强隔离、多OS、有状态服务               | 微服务、CI/CD、无状态服务             |
+
+---
+
+## 11.2 8种 Linux Namespace
+
+Linux 内核目前支持 **8 种** namespace，每种隔离不同的系统资源：
+
+| Namespace  | 隔离内容                          | 引入版本   | clone 标志          | /proc/PID/ns/ 文件 | 关键系统调用 |
+|-----------|----------------------------------|-----------|--------------------|--------------------|------------|
+| Mount      | 文件系统挂载点视图                 | 2.4.19    | `CLONE_NEWNS`      | `mnt`              | `mount(2)` |
+| UTS        | hostname / domainname            | 2.6.19    | `CLONE_NEWUTS`     | `uts`              | `sethostname(2)` |
+| IPC        | SysV IPC, POSIX 消息队列          | 2.6.19    | `CLONE_NEWIPC`     | `ipc`              | `msgget(2)` |
+| Network    | 网络接口/路由/iptables/socket     | 2.6.24    | `CLONE_NEWNET`     | `net`              | `socket(2)` |
+| PID        | 进程 ID 空间                     | 2.6.24    | `CLONE_NEWPID`     | `pid`/`pid_for_children` | `getpid(2)` |
+| User       | UID/GID 映射，特权隔离            | 3.8       | `CLONE_NEWUSER`    | `user`             | `setuid(2)` |
+| Cgroup     | cgroup 根目录（/proc/self/cgroup）| 4.6       | `CLONE_NEWCGROUP`  | `cgroup`           | `mount("cgroup2")` |
+| Time       | 单调时钟/启动时钟偏移             | 5.6       | `CLONE_NEWTIME`    | `time`/`time_for_children` | `clock_settime(2)` |
+
+### 查看进程的 Namespace
+
+```bash
+# 查看当前进程的所有 namespace
+ls -la /proc/$$/ns/
+# lrwxrwxrwx 1 root root 0 Jan 1 00:00 cgroup -> 'cgroup:[4026531835]'
+# lrwxrwxrwx 1 root root 0 Jan 1 00:00 ipc    -> 'ipc:[4026531839]'
+# lrwxrwxrwx 1 root root 0 Jan 1 00:00 mnt    -> 'mnt:[4026531840]'
+# lrwxrwxrwx 1 root root 0 Jan 1 00:00 net    -> 'net:[4026531992]'
+# lrwxrwxrwx 1 root root 0 Jan 1 00:00 pid    -> 'pid:[4026531836]'
+# lrwxrwxrwx 1 root root 0 Jan 1 00:00 time   -> 'time:[4026531834]'
+# lrwxrwxrwx 1 root root 0 Jan 1 00:00 user   -> 'user:[4026531837]'
+# lrwxrwxrwx 1 root root 0 Jan 1 00:00 uts    -> 'uts:[4026531838]'
+
+# 数字即 namespace inode，相同 inode = 同一 namespace
+readlink /proc/$$/ns/pid   # pid:[4026531836]
+
+# 比较两个进程是否在同一 namespace
+stat -L /proc/1/ns/net /proc/$$/ns/net
+```
+
+---
+
+## 11.3 clone/unshare/setns 系统调用
+
+### clone(2) — 创建新进程并指定 namespace
+
+```c
+#include <sched.h>
+#include <sys/types.h>
+
+/* 原型 */
+int clone(int (*fn)(void *), void *stack, int flags, void *arg, ...
+          /* pid_t *parent_tid, void *tls, pid_t *child_tid */);
+
+/* 示例：创建新的 PID + UTS + Network namespace */
+#define STACK_SIZE (1024 * 1024)  /* 1 MiB 栈 */
+
+static int child_func(void *arg)
+{
+    /* 此时已在新 namespace 中 */
+    printf("Child PID in new ns: %d\n", getpid());  /* 输出: 1 */
+
+    /* 设置新 hostname */
+    sethostname("container", 9);
+
+    /* 执行 shell */
+    execlp("/bin/sh", "/bin/sh", NULL);
+    return 0;
+}
+
+int main(void)
+{
+    char *stack = malloc(STACK_SIZE);
+    char *stack_top = stack + STACK_SIZE;  /* 栈向下增长 */
+
+    int flags = CLONE_NEWPID | CLONE_NEWUTS | CLONE_NEWNET |
+                CLONE_NEWNS  | CLONE_NEWIPC | SIGCHLD;
+
+    pid_t pid = clone(child_func, stack_top, flags, NULL);
+    if (pid < 0) {
+        perror("clone");
+        return 1;
+    }
+
+    printf("Parent: child PID = %d\n", pid);
+    waitpid(pid, NULL, 0);
+    return 0;
+}
+```
+
+### unshare(2) — 当前进程离开已有 namespace
+
+```bash
+# 在新的 UTS namespace 中运行 bash（无需 root，仅改 hostname）
+unshare --uts bash
+hostname mycontainer    # 只影响此 namespace
+
+# 创建完整隔离的 shell（需要 root）
+unshare --pid --fork --mount-proc bash
+ps aux  # 只看到自己的进程
+
+# User namespace（无需 root！）
+unshare --user --map-root-user bash
+id  # uid=0(root) — 在新 namespace 内是 root，宿主机上无特权
+```
+
+```c
+/* unshare(2) 系统调用 */
+#include <sched.h>
+int unshare(int flags);
+
+/* 示例：当前进程进入新的 mount namespace */
+if (unshare(CLONE_NEWNS) < 0) {
+    perror("unshare");
+    exit(1);
+}
+/* 现在 mount/umount 不影响其他进程 */
+mount("tmpfs", "/tmp", "tmpfs", 0, NULL);
+```
+
+### setns(2) — 加入已有 namespace
+
+```c
+#include <fcntl.h>
+#include <sched.h>
+
+/* 进入另一个进程的 network namespace */
+int nsfd = open("/proc/12345/ns/net", O_RDONLY);
+if (setns(nsfd, CLONE_NEWNET) < 0) {
+    perror("setns");
+    exit(1);
+}
+close(nsfd);
+/* 现在共享 PID 12345 的网络 namespace */
+```
+
+```bash
+# nsenter 工具（封装 setns）
+nsenter --target <pid> --net --pid --mount -- bash
+# 或进入 Docker 容器
+nsenter --target $(docker inspect -f '{{.State.Pid}}' mycontainer) \
+        --net --pid --mount -- bash
+```
+
+---
+
+## 11.4 PID 命名空间深入
+
+### 双重 PID 视图
+
+每个进程在不同的 PID namespace 中拥有不同的 PID：
+
+```art
+PID Namespace 层次视图:
+
+  Host namespace (init_pid_ns)
+  PID: 1=systemd, 234=sshd, 567=dockerd, 890=containerd
+
+  container ns (child_pid_ns)          container ns (sibling)
+  ┌────────────────────────────┐        ┌──────────────┐
+  │ PID 1 = bash               │        │ PID 1 = nginx│
+  │ PID 2 = ps                 │        │ PID 2 = worker│
+  │（宿主机看到的是 890, 891）  │        └──────────────┘
+  └────────────────────────────┘
+
+规则：
+- 容器内 PID 1 对应宿主机某个 PID（如 890）
+- 容器内只能看到本 namespace 的进程
+- 父 namespace 可以看到所有子 namespace 的进程（用宿主机 PID）
+- /proc/<pid>/status 中有 NSpid 字段显示所有层级的 PID
+```
+
+```bash
+# 查看容器内进程的宿主机 PID
+docker inspect --format '{{.State.Pid}}' mycontainer
+
+# 查看多层 PID 映射
+cat /proc/$(docker inspect -f '{{.State.Pid}}' mycontainer)/status \
+    | grep NSpid
+# NSpid: 890  1    ← 宿主机 PID=890，容器内 PID=1
+
+# pid_for_children — 影响子进程使用的 PID namespace
+cat /proc/$$/ns/pid_for_children   # 子进程在哪个 PID ns 中创建
+```
+
+### 容器 init 进程的特殊性
+
+```bash
+# PID 1 在 PID namespace 中有特殊职责：
+# 1. 收割孤儿进程（子进程的父进程退出时，孤儿被 PID 1 收养）
+# 2. 处理 SIGTERM（PID 1 默认忽略信号！需显式处理）
+# 3. PID namespace 消亡：当 PID 1 退出时，整个 namespace 销毁
+
+# 这就是为什么 Docker 容器在 CMD 退出时容器也退出
+
+# 使用 tini 作为 PID 1（正确处理信号和孤儿进程）
+docker run --init myimage
+# 或在 Dockerfile 中：
+# ENTRYPOINT ["/usr/bin/tini", "--", "/myapp"]
+```
+
+---
+
+## 11.5 Network 命名空间
+
+### veth pair 手动搭建网络隔离
+
+```bash
+# 创建两个 network namespace
+ip netns add ns1
+ip netns add ns2
+
+# 创建 veth pair（虚拟以太网对）
+ip link add veth1 type veth peer name veth2
+
+# 将 veth 两端分别放入两个 namespace
+ip link set veth1 netns ns1
+ip link set veth2 netns ns2
+
+# 配置 IP 地址
+ip netns exec ns1 ip addr add 10.0.0.1/24 dev veth1
+ip netns exec ns2 ip addr add 10.0.0.2/24 dev veth2
+
+# 启动接口
+ip netns exec ns1 ip link set veth1 up
+ip netns exec ns1 ip link set lo up
+ip netns exec ns2 ip link set veth2 up
+ip netns exec ns2 ip link set lo up
+
+# 测试连通性
+ip netns exec ns1 ping -c 3 10.0.0.2  # ✓
+
+# 清理
+ip netns del ns1
+ip netns del ns2
+```
+
+### 容器网络架构（Docker bridge 模式）
+
+```art
+Docker bridge 网络架构:
+
+宿主机:                              容器:
+┌──────────────────────────────────────────────────────────────┐
+│  eth0 (192.168.1.100)                                        │
+│    │                                                         │
+│  iptables MASQUERADE (NAT出口)                               │
+│    │                                                         │
+│  docker0 bridge (172.17.0.1/16)                             │
+│    ├── veth_a_host (172.17.0.0/16)  ◄──► veth_a (172.17.0.2)│
+│    └── veth_b_host (172.17.0.0/16)  ◄──► veth_b (172.17.0.3)│
+│                                          (容器A)  (容器B)    │
+└──────────────────────────────────────────────────────────────┘
+
+数据包路径（容器A → 外网）:
+veth_a → docker0 → iptables MASQUERADE → eth0 → 外网
+```
+
+```bash
+# 查看容器网络 namespace 的网络配置
+pid=$(docker inspect -f '{{.State.Pid}}' mycontainer)
+ip netns exec /proc/$pid/ns/net ip addr
+# 等价于：
+nsenter --target $pid --net -- ip addr
+```
+
+---
+
+## 11.6 User 命名空间
+
+### UID 映射机制
+
+User namespace 允许将容器内的 UID/GID 映射到宿主机上的不同值，实现 **rootless 容器**：
+
+```bash
+# 查看 UID 映射格式（容器内UID 宿主机UID 数量）
+cat /proc/$$/uid_map
+# 0  1000  1    ← 容器内UID 0 映射到宿主机 UID 1000（只有1个）
+
+# 创建 User namespace（无需 root！）
+unshare --user --map-root-user bash
+id  # uid=0(root) gid=0(root) — 容器内是 root
+cat /proc/$$/uid_map  # 0  1000  1
+
+# 扩展映射：容器内 0-65535 映射到宿主机 100000-165535
+# /etc/subuid 文件：alice:100000:65536
+newuidmap <pid> 0 100000 65536
+newgidmap <pid> 0 100000 65536
+```
+
+```c
+/* 内核中 UID 映射的数据结构 */
+struct uid_gid_extent {
+    u32 first;          /* namespace 内的起始 ID */
+    u32 lower_first;    /* 宿主机上的起始 ID */
+    u32 count;          /* 映射的 ID 数量 */
+};
+
+struct uid_gid_map {    /* 最多 5 条映射规则 */
+    u32 nr_extents;
+    union {
+        struct uid_gid_extent extent[UID_GID_MAP_MAX_EXTENTS];
+        struct {
+            struct uid_gid_extent *forward;
+            struct uid_gid_extent *reverse;
+        };
+    };
+};
+```
+
+### Rootless 容器安全模型
+
+```bash
+# Podman rootless 模式（推荐生产使用）
+podman run --rm -it alpine sh
+id  # 容器内: uid=0(root) — 宿主机: uid=1000(alice)
+
+# 验证：rootless 容器无法访问宿主机敏感资源
+ls /root  # Permission denied（宿主机 /root 归 uid=0 所有）
+```
+
+---
+
+## 11.7 Mount 命名空间与 pivot_root
+
+### pivot_root vs chroot
+
+```art
+chroot vs pivot_root 对比:
+
+chroot:                        pivot_root:
+┌─────────────────────┐        ┌─────────────────────┐
+│    / (宿主机)        │        │   new_root/          │
+│    ├── etc/          │        │   ├── etc/           │
+│    ├── var/          │        │   ├── var/           │
+│    └── container/   │        │   └── put_old/       │
+│        └── (chroot  │        │       └── (原始/)    │
+│            到这里)  │        │                      │
+│                     │        │  原始 / 被卸载        │
+│ 问题：仍可访问原始   │        │  或隐藏在 put_old    │
+│ /proc /sys 等敏感   │        │                      │
+│ 路径（安全漏洞）     │        │ 优势：真正替换根文件  │
+│                     │        │ 系统，与旧 / 彻底隔离 │
+└─────────────────────┘        └─────────────────────┘
+```
+
+```c
+/* pivot_root 使用示例（在新 mount namespace 中）*/
+#include <sys/syscall.h>
+#include <sys/mount.h>
+
+int pivot_root(const char *new_root, const char *put_old)
+{
+    return syscall(SYS_pivot_root, new_root, put_old);
+}
+
+/* 容器启动流程 */
+void setup_rootfs(const char *rootfs)
+{
+    char put_old[PATH_MAX];
+
+    /* 1. 确保 rootfs 是挂载点（bind mount 自身）*/
+    mount(rootfs, rootfs, NULL, MS_BIND | MS_REC, NULL);
+
+    /* 2. 创建 put_old 目录 */
+    snprintf(put_old, sizeof(put_old), "%s/.pivot_root", rootfs);
+    mkdir(put_old, 0700);
+
+    /* 3. 切换根文件系统 */
+    pivot_root(rootfs, put_old);
+
+    /* 4. 切换工作目录 */
+    chdir("/");
+
+    /* 5. 卸载旧根文件系统 */
+    umount2("/.pivot_root", MNT_DETACH);
+    rmdir("/.pivot_root");
+}
+```
+
+### 挂载传播类型
+
+```bash
+# 挂载传播类型（Shared/Private/Slave/Unbindable）
+
+# 默认：shared（双向传播）
+mount --make-shared /mnt
+
+# private（不传播）— Docker 容器默认用此
+mount --make-private /mnt
+
+# slave（只接收宿主机挂载，不向上传播）— 适合容器挂载卷
+mount --make-slave /mnt
+
+# 查看挂载传播
+cat /proc/$$/mountinfo | head -5
+# 22 1 8:1 / / rw,relatime shared:1 - ext4 /dev/sda1 rw
+#                            ^^^^^^^^
+#                            传播类型 + peer group ID
+```
+
+---
+
+## 11.8 cgroups v1 vs v2
+
+### 架构对比
+
+```art
+cgroups v1（多棵树）:              cgroups v2（单棵树）:
+                                   
+/sys/fs/cgroup/                    /sys/fs/cgroup/
+├── cpu/                           ├── system.slice/
+│   └── myapp/                     │   └── myapp.service/
+│       └── tasks                  │       ├── cpu.weight
+├── memory/                        │       ├── memory.max
+│   └── myapp/                     │       └── cgroup.procs
+│       └── tasks                  ├── user.slice/
+├── blkio/                         └── cgroup.controllers
+│   └── myapp/
+│       └── tasks
+└── pids/
+    └── myapp/
+        └── tasks
+
+问题: 同一进程在不同控制器下的视图不一致
+优势: 每个控制器独立挂载，灵活
+
+优势: 统一视图，强一致性
+     委托层次（Delegation）
+     线程模式（Thread Mode）
+```
+
+### 详细功能对比表
+
+| 特性                        | cgroups v1                          | cgroups v2                           |
+|---------------------------|-------------------------------------|--------------------------------------|
+| 挂载方式                    | 多个子系统独立挂载                  | 单一统一层次                          |
+| 进程归属                    | 可在不同控制器的不同位置             | 一个进程只属于一个 cgroup             |
+| 控制器激活                  | 挂载时自动激活                      | 按需通过 `cgroup.subtree_control`    |
+| 线程支持                    | 不支持线程级控制                    | Thread Mode（`cgroup.type=threaded`）|
+| 委托管理                    | 有限支持                            | 完整委托（用户可管理子 cgroup）       |
+| io 控制                    | blkio（权重+带宽）                  | io（统一 BFQ + iocost 模型）         |
+| 内存统计                    | 粗粒度                              | 更细粒度（含 slab/anon/file）        |
+| 压力指标                    | 无                                  | PSI（pressure stall information）    |
+| Kubernetes 支持             | v1.19 之前                          | v1.25+ 默认，推荐                    |
+
+### 主要控制器
+
+```bash
+# v2：查看可用控制器
+cat /sys/fs/cgroup/cgroup.controllers
+# cpuset cpu io memory hugetlb pids rdma misc
+
+# 启用子 cgroup 的控制器
+echo "+cpu +memory +io" > /sys/fs/cgroup/myapp/cgroup.subtree_control
+
+# CPU 控制
+echo 200 > /sys/fs/cgroup/myapp/cpu.weight       # 权重（1-10000）
+echo "50000 100000" > /sys/fs/cgroup/myapp/cpu.max # 带宽限制(quota period)
+
+# 内存控制
+echo $((512*1024*1024)) > /sys/fs/cgroup/myapp/memory.max    # 512MB 上限
+echo $((400*1024*1024)) > /sys/fs/cgroup/myapp/memory.high   # 400MB 软限制
+
+# IO 控制（基于设备号）
+echo "8:0 rbps=10485760 wbps=10485760" > /sys/fs/cgroup/myapp/io.max # 10MB/s
+
+# PID 限制
+echo 100 > /sys/fs/cgroup/myapp/pids.max    # 最多100个进程/线程
+```
+
+---
+
+## 11.9 cgroup v2 内存控制
+
+### 内存限制层次
+
+```art
+内存限制层次（软限制 → 硬限制 → OOM）:
+
+            memory.min   memory.low   memory.high   memory.max
+            (预留底线)   (软保护)     (软上限)      (硬上限)
+
+使用量:  ──────────────────────────────────────────────────────►
+         0    min       low          high          max
+              │          │            │             │
+              │          │         超过后触发       │
+              │          │         内存回收压力     │
+              │          │         (仍可继续使用)   OOM killer
+              │        低于此       
+              │        值时保护
+              │        不被回收
+           保证至少有
+           min字节可用
+```
+
+```bash
+# memory.max — 硬上限（超过直接 OOM kill）
+echo $((256*1024*1024)) > /sys/fs/cgroup/myapp/memory.max
+
+# memory.high — 软上限（超过触发回收，但不立即 OOM）
+echo $((200*1024*1024)) > /sys/fs/cgroup/myapp/memory.high
+
+# memory.low — 软保护（内存紧张时优先保留此量）
+echo $((100*1024*1024)) > /sys/fs/cgroup/myapp/memory.low
+
+# memory.min — 硬保护（保证此量不被回收）
+echo $((50*1024*1024)) > /sys/fs/cgroup/myapp/memory.min
+
+# 查看内存使用统计
+cat /sys/fs/cgroup/myapp/memory.current   # 当前使用量（字节）
+cat /sys/fs/cgroup/myapp/memory.stat      # 详细统计
+
+# OOM 事件监控
+cat /sys/fs/cgroup/myapp/memory.events
+# low 0          ← 触发 low 阈值回收次数
+# high 5         ← 触发 high 阈值回收次数
+# max 2          ← 触发 max 阈值次数
+# oom 1          ← OOM kill 次数
+# oom_kill 3     ← OOM 杀死进程次数
+
+# PSI（压力指标）— 判断容器是否"内存饥渴"
+cat /sys/fs/cgroup/myapp/memory.pressure
+# some avg10=0.00 avg60=0.00 avg300=0.00 total=0
+# full avg10=0.00 avg60=0.00 avg300=0.00 total=0
+```
+
+---
+
+## 11.10 OverlayFS 原理
+
+### 四层结构
+
+```art
+OverlayFS 层次结构:
+
+merged（用户看到的）:
+  /merged/
+  ├── etc/ (来自 upper)        ← 修改的文件在 upper
+  ├── bin/ (来自 lower)        ← 未修改的在 lower（只读）
+  └── new_file (来自 upper)    ← 新建文件在 upper
+
+upper（可读写）:              lower（只读，可多个）:
+  /upper/                       /lower1/
+  ├── etc/passwd (modified)     ├── etc/passwd (original)
+  ├── new_file (new)            ├── bin/sh
+  └── .wh.deleted_file (whiteout) └── lib/
+
+workdir（内核工作目录）:      （copy-up 临时区域）
+  /workdir/work/
+```
+
+### Copy-Up 机制
+
+当容器修改只读层（lower）中的文件时，内核执行 copy-up：
+
+```art
+Copy-Up 流程（修改 /etc/passwd）:
+
+步骤1: 用户写 /merged/etc/passwd
+步骤2: OverlayFS 发现 passwd 在 lower（只读）
+步骤3: Copy-Up:
+  a. 在 upper 创建 /upper/etc/ 目录
+  b. 将 /lower/etc/passwd 复制到 /upper/etc/passwd
+  c. 修改 /upper/etc/passwd
+步骤4: 后续读写直接操作 /upper/etc/passwd
+
+注意：Copy-Up 是文件级别，非块级别
+     大文件的首次写入会有延迟（拷贝整个文件）
+```
+
+### Whiteout 文件（删除标记）
+
+```bash
+# 当在容器中删除 lower 层中的文件时
+# OverlayFS 在 upper 层创建特殊的 whiteout 文件
+
+# whiteout 是字符设备 0:0
+rm /merged/some_file
+# 效果：在 upper 创建 .wh.some_file（设备号 0:0）
+ls -la /upper/.wh.some_file
+# c--------- 1 root root 0, 0 Jan 1 00:00 .wh.some_file
+
+# 挂载 OverlayFS 的命令
+mount -t overlay overlay \
+    -o lowerdir=/lower1:/lower2,upperdir=/upper,workdir=/workdir \
+    /merged
+
+# Docker 的 OverlayFS 位置
+ls /var/lib/docker/overlay2/<container-id>/
+# diff/    ← upper 层（容器写入的变更）
+# link     ← 层 ID 的短链接（节省 mount option 长度）
+# lower    ← lower 层路径列表
+# merged/  ← 挂载点（运行时存在）
+# work/    ← workdir
+```
+
+---
+
+## 11.11 seccomp BPF
+
+### 系统调用过滤原理
+
+```art
+seccomp BPF 工作流程:
+
+用户进程                     内核
+    │                          │
+    │  syscall(execve, ...)    │
+    ├─────────────────────────►│
+    │                          │  seccomp_run_filters()
+    │                          │  ┌─────────────────┐
+    │                          │  │ BPF 程序执行    │
+    │                          │  │ seccomp_data:   │
+    │                          │  │  .nr = 59       │
+    │                          │  │  .arch = x86_64 │
+    │                          │  │  .args[6]       │
+    │                          │  └────────┬────────┘
+    │                          │           │
+    │                          │  返回码：  │
+    │                          │  ALLOW ───┼──► 继续执行
+    │◄─────────────────────────┼── ERRNO ──┼──► 返回错误
+    │  SIGSYS                  │  KILL ────┼──► 杀死进程
+    │                          │  TRACE ───┼──► ptrace 通知
+```
+
+```c
+/* 使用 libseccomp 设置过滤规则 */
+#include <seccomp.h>
+
+int setup_seccomp(void)
+{
+    scmp_filter_ctx ctx;
+
+    /* 默认拒绝所有系统调用 */
+    ctx = seccomp_init(SCMP_ACT_ERRNO(EPERM));
+    if (!ctx) return -1;
+
+    /* 白名单：允许的系统调用 */
+    seccomp_rule_add(ctx, SCMP_ACT_ALLOW, SCMP_SYS(read), 0);
+    seccomp_rule_add(ctx, SCMP_ACT_ALLOW, SCMP_SYS(write), 0);
+    seccomp_rule_add(ctx, SCMP_ACT_ALLOW, SCMP_SYS(exit_group), 0);
+    seccomp_rule_add(ctx, SCMP_ACT_ALLOW, SCMP_SYS(openat), 0);
+
+    /* 条件过滤：只允许打开 /dev/null */
+    /* seccomp_rule_add(ctx, SCMP_ACT_ALLOW, SCMP_SYS(open),
+                        1, SCMP_A0(SCMP_CMP_EQ, (scmp_datum_t)"/dev/null")); */
+
+    seccomp_load(ctx);
+    seccomp_release(ctx);
+    return 0;
+}
+```
+
+```bash
+# Docker 默认 seccomp profile 禁用的危险系统调用（部分）：
+# - kexec_load      (加载新内核)
+# - mount           (除非 --privileged)
+# - setns           (命名空间操作)
+# - reboot          (重启系统)
+# - clone (CLONE_NEWUSER) (创建 user namespace)
+# - ptrace          (调试其他进程)
+# - perf_event_open (性能监控，可侧信道攻击)
+# - bpf             (eBPF 程序)
+# 共禁用约 44 个系统调用
+
+# 查看被 seccomp 阻止的调用
+strace -e trace=all -e seccomp=1 docker run --rm alpine sleep 1 2>&1 \
+    | grep EPERM
+
+# 以禁用 seccomp 运行（危险！）
+docker run --security-opt seccomp=unconfined myimage
+```
+
+---
+
+## 11.12 Linux Capabilities
+
+### 拆分 root 权限
+
+传统 Unix 的 root（UID=0）拥有所有特权。Linux 将其拆分为约 **37 个独立 capability**：
+
+```bash
+# 查看当前进程的 capability
+cat /proc/$$/status | grep -i cap
+# CapInh: 0000000000000000   (可继承)
+# CapPrm: 0000003fffffffff   (允许集)
+# CapEff: 0000003fffffffff   (有效集)
+# CapBnd: 0000003fffffffff   (边界集)
+# CapAmb: 0000000000000000   (环境集)
+
+# 解码 capability 位掩码
+capsh --decode=0000003fffffffff
+```
+
+### 常用 Capability 对照表
+
+| Capability          | 对应权限                              | Docker 默认 |
+|--------------------|--------------------------------------|------------|
+| `CAP_NET_ADMIN`    | 网络配置（路由/iptables/接口）        | ✗ 移除     |
+| `CAP_NET_BIND_SERVICE` | 绑定 1024 以下端口               | ✓ 保留     |
+| `CAP_SYS_ADMIN`    | 挂载/sethostname/namespace 等        | ✗ 移除     |
+| `CAP_SYS_PTRACE`   | ptrace 其他进程                      | ✗ 移除     |
+| `CAP_SYS_CHROOT`   | chroot(2)                           | ✓ 保留     |
+| `CAP_SETUID`       | 任意设置 UID                         | ✓ 保留     |
+| `CAP_SETGID`       | 任意设置 GID                         | ✓ 保留     |
+| `CAP_KILL`         | 向任意进程发送信号                    | ✓ 保留     |
+| `CAP_CHOWN`        | 任意修改文件所有权                    | ✓ 保留     |
+| `CAP_DAC_OVERRIDE` | 绕过 DAC 权限检查                    | ✓ 保留     |
+| `CAP_MKNOD`        | 创建设备文件                         | ✓ 保留     |
+| `CAP_NET_RAW`      | 原始套接字（ping 等）                 | ✓ 保留     |
+| `CAP_SYS_BOOT`     | 重启系统                             | ✗ 移除     |
+| `CAP_SYS_MODULE`   | 加载/卸载内核模块                    | ✗ 移除     |
+| `CAP_AUDIT_WRITE`  | 写审计日志                           | ✓ 保留     |
+
+```bash
+# 最小权限容器（移除所有 capability，仅保留必要的）
+docker run --cap-drop=ALL --cap-add=NET_BIND_SERVICE nginx
+
+# 使用 setcap 给非 root 程序授予单个 capability
+setcap 'cap_net_bind_service=+ep' /usr/bin/node
+# 现在 node 可以绑定 80 端口，无需 root
+
+# 查看文件的 capability
+getcap /usr/bin/ping
+# /usr/bin/ping = cap_net_raw+ep
+```
+
+---
+
+## 11.13 容器运行时内部：runc 流程
+
+### runc 执行流程
+
+```art
+runc 启动容器的完整流程:
+
+  runc run mycontainer
+       │
+       ▼
+  1. 读取 config.json (OCI Runtime Spec)
+       │  ├── process (cmd/args/env)
+       │  ├── mounts (文件系统挂载列表)
+       │  ├── linux.namespaces (要创建的 ns)
+       │  └── linux.cgroupsPath (cgroup 路径)
+       │
+       ▼
+  2. clone(CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWNET|...) 
+       │  创建子进程（runc init）
+       │
+       ▼
+  3. runc init（子进程内执行）:
+       │  a. 应用 cgroup 限制
+       │  b. 设置 network namespace（veth配置）
+       │  c. pivot_root 切换根文件系统
+       │  d. 挂载 /proc /sys /dev
+       │  e. 设置 hostname (UTS namespace)
+       │  f. 应用 seccomp 过滤
+       │  g. 降低 capabilities
+       │  h. 切换 UID/GID
+       │  i. execve(container_cmd)
+       │
+       ▼
+  4. 容器进程运行（PID 1 = container_cmd）
+```
+
+### OCI Runtime Spec (config.json) 示例
+
+```json
+{
+  "ociVersion": "1.0.2",
+  "process": {
+    "terminal": false,
+    "user": { "uid": 0, "gid": 0 },
+    "args": ["/bin/sh"],
+    "env": ["PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"],
+    "cwd": "/"
+  },
+  "root": {
+    "path": "rootfs",
+    "readonly": false
+  },
+  "linux": {
+    "namespaces": [
+      { "type": "pid" },
+      { "type": "network" },
+      { "type": "mount" },
+      { "type": "uts" },
+      { "type": "ipc" }
+    ],
+    "cgroupsPath": "/mycontainer",
+    "seccomp": {
+      "defaultAction": "SCMP_ACT_ERRNO",
+      "syscalls": [
+        { "names": ["read","write","exit_group"], "action": "SCMP_ACT_ALLOW" }
+      ]
+    }
+  }
+}
+```
+
+---
+
+## 11.14 mini-docker 实现
+
+以下约 **65 行 C 代码**演示容器的核心机制：
+
+```c
+/* mini_docker.c — 使用 clone + pivot_root + cgroups 的最简容器 */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sched.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <fcntl.h>
+#include <limits.h>
+
+#define STACK_SIZE (1024 * 1024)  /* 子进程栈 1MiB */
+#define ROOTFS     "/tmp/rootfs"  /* 容器根文件系统路径 */
+
+/* 设置 cgroup 内存限制（v1）*/
+static void setup_cgroup(pid_t pid)
+{
+    char path[PATH_MAX];
+    int fd;
+
+    mkdir("/sys/fs/cgroup/memory/minicontainer", 0755);
+
+    /* 内存限制 64MB */
+    snprintf(path, sizeof(path),
+             "/sys/fs/cgroup/memory/minicontainer/memory.limit_in_bytes");
+    fd = open(path, O_WRONLY);
+    write(fd, "67108864", 8);  /* 64 * 1024 * 1024 */
+    close(fd);
+
+    /* 将进程加入 cgroup */
+    snprintf(path, sizeof(path),
+             "/sys/fs/cgroup/memory/minicontainer/cgroup.procs");
+    fd = open(path, O_WRONLY);
+    char pid_str[16];
+    snprintf(pid_str, sizeof(pid_str), "%d", pid);
+    write(fd, pid_str, strlen(pid_str));
+    close(fd);
+}
+
+/* 容器内部初始化（在新 namespace 中执行）*/
+static int container_main(void *arg)
+{
+    char **argv = (char **)arg;
+
+    /* 1. 设置 hostname */
+    sethostname("minicontainer", 13);
+
+    /* 2. 挂载新的 /proc（在新 PID namespace 中）*/
+    mount("proc", ROOTFS "/proc", "proc", 0, NULL);
+
+    /* 3. pivot_root 切换根文件系统 */
+    char put_old[PATH_MAX];
+    snprintf(put_old, sizeof(put_old), "%s/.old_root", ROOTFS);
+    mkdir(put_old, 0700);
+
+    /* 确保 rootfs 是挂载点 */
+    mount(ROOTFS, ROOTFS, NULL, MS_BIND | MS_REC, NULL);
+    
+    if (syscall(SYS_pivot_root, ROOTFS, put_old) < 0) {
+        /* fallback: 使用 chroot */
+        chroot(ROOTFS);
+    }
+    chdir("/");
+
+    /* 卸载旧根文件系统 */
+    umount2("/.old_root", MNT_DETACH);
+
+    /* 4. 执行容器命令 */
+    printf("[minicontainer] PID=%d, hostname=minicontainer\n", getpid());
+    execvp(argv[0], argv);
+    perror("execvp");
+    return 1;
+}
+
+int main(int argc, char *argv[])
+{
+    if (argc < 2) {
+        fprintf(stderr, "Usage: %s <command> [args...]\n", argv[0]);
+        return 1;
+    }
+
+    /* 分配子进程栈 */
+    char *stack = malloc(STACK_SIZE);
+    char *stack_top = stack + STACK_SIZE;
+
+    /* 创建所有 namespace */
+    int flags = CLONE_NEWPID  |   /* 新 PID namespace */
+                CLONE_NEWUTS  |   /* 新 UTS namespace (hostname) */
+                CLONE_NEWNET  |   /* 新 Network namespace */
+                CLONE_NEWNS   |   /* 新 Mount namespace */
+                CLONE_NEWIPC  |   /* 新 IPC namespace */
+                SIGCHLD;
+
+    pid_t pid = clone(container_main, stack_top, flags, &argv[1]);
+    if (pid < 0) {
+        perror("clone");
+        return 1;
+    }
+
+    printf("[host] Container PID on host: %d\n", pid);
+
+    /* 在宿主机侧设置 cgroup（子进程创建后立即执行）*/
+    setup_cgroup(pid);
+
+    /* 等待容器退出 */
+    int status;
+    waitpid(pid, &status, 0);
+    printf("[host] Container exited with status %d\n",
+           WEXITSTATUS(status));
+
+    free(stack);
+    return 0;
+}
+```
+
+### 编译与运行
+
+```bash
+# 编译
+gcc -o mini_docker mini_docker.c
+
+# 准备最简 rootfs（使用 BusyBox）
+mkdir -p /tmp/rootfs/{bin,proc,sys,dev,tmp}
+cp $(which busybox) /tmp/rootfs/bin/
+/tmp/rootfs/bin/busybox --install /tmp/rootfs/bin/
+
+# 运行容器
+sudo ./mini_docker /bin/sh
+
+# 在容器内验证隔离
+hostname          # → minicontainer
+ps aux            # → 只看到自己的进程
+ip addr           # → 只有 lo 接口
+cat /proc/1/ns/pid  # → 新的 namespace inode
+```
+
+### 与真实 Docker 的差距
+
+| 功能               | mini-docker          | Docker/containerd       |
+|------------------|----------------------|------------------------|
+| Namespace 隔离   | 6 种基本 namespace   | 全部 8 种              |
+| 文件系统         | 简单 chroot/pivot    | OverlayFS 分层镜像     |
+| 网络             | 无网络配置           | veth + bridge + iptables |
+| cgroup           | 简单内存限制         | 全部控制器 + v1/v2     |
+| 安全             | 无 seccomp/capabilities | 完整 seccomp profile  |
+| 镜像管理         | 无                   | OCI 镜像格式 + 仓库    |
+| 生命周期管理     | 无                   | start/stop/restart/pause |
+
+---
+
+## 总结
+
+```art
+容器技术全景图:
+
+  ┌─────────────────────────────────────────────────────────────┐
+  │                    容器 = 进程 + 隔离 + 限制               │
+  │                                                             │
+  │  隔离机制（Namespace）:                                     │
+  │  PID + Net + Mount + UTS + IPC + User + Cgroup + Time      │
+  │                                                             │
+  │  限制机制（cgroups v2）:                                    │
+  │  cpu.weight + memory.max + io.max + pids.max               │
+  │                                                             │
+  │  文件系统（OverlayFS）:                                     │
+  │  lower(镜像层) + upper(写入层) = merged(容器视图)           │
+  │                                                             │
+  │  安全机制:                                                  │
+  │  seccomp(syscall过滤) + capabilities(权限最小化)            │
+  │  + AppArmor/SELinux(MAC) + User namespace(rootless)        │
+  └─────────────────────────────────────────────────────────────┘
+```
+
+**参考资料**：
+- `kernel/nsproxy.c` — namespace 核心实现
+- `kernel/cgroup/cgroup.c` — cgroup v2 实现
+- `fs/overlayfs/` — OverlayFS 实现
+- `kernel/seccomp.c` — seccomp 实现
+- OCI Runtime Specification: https://github.com/opencontainers/runtime-spec
+- runc 源码: https://github.com/opencontainers/runc
diff --git "a/12-eBPF\344\270\216\345\217\257\350\247\202\346\265\213\346\200\247/README.md" "b/12-eBPF\344\270\216\345\217\257\350\247\202\346\265\213\346\200\247/README.md"
new file mode 100644
index 0000000..4ea67a1
--- /dev/null
+++ "b/12-eBPF\344\270\216\345\217\257\350\247\202\346\265\213\346\200\247/README.md"
@@ -0,0 +1,1338 @@
+# 12 — eBPF 与可观测性
+
+> **目标**：从 cBPF 历史到生产级 XDP/Cilium，系统掌握 eBPF 的安全模型、编程接口和可观测性工具链。
+
+---
+
+## 目录
+
+1. [eBPF 历史演进](#121-ebpf-历史演进)
+2. [eBPF 安全模型](#122-ebpf-安全模型)
+3. [eBPF JIT 编译器](#123-ebpf-jit-编译器)
+4. [eBPF Map 类型详解](#124-ebpf-map-类型详解)
+5. [挂载点全景](#125-挂载点全景)
+6. [第一个 eBPF 程序](#126-第一个-ebpf-程序)
+7. [XDP 深入](#127-xdp-深入)
+8. [网络可观测性](#128-网络可观测性)
+9. [CO-RE：一次编译，到处运行](#129-co-re一次编译到处运行)
+10. [bpftrace 实战](#1210-bpftrace-实战)
+11. [BCC 工具集](#1211-bcc-工具集)
+12. [生产级应用](#1212-生产级应用)
+13. [eBPF 限制](#1213-ebpf-限制)
+14. [调试 eBPF 程序](#1214-调试-ebpf-程序)
+
+---
+
+## 12.1 eBPF 历史演进
+
+### 从 cBPF 到 eBPF
+
+```art
+BPF 发展时间线:
+
+1992 ─── cBPF (Classic BPF)
+         Steven McCanne & Van Jacobson
+         论文: "The BSD Packet Filter: A New Architecture for
+               User-level Packet Capture"
+         用途: tcpdump 的包过滤（2个32位寄存器，固定指令集）
+         
+2012 ─── seccomp-BPF (Linux 3.5)
+         将 cBPF 用于系统调用过滤
+         
+2014 ─── eBPF 诞生 (Linux 3.18)
+         Alexei Starovoitov 大规模重写
+         ├── 64位寄存器（R0-R10，11个）
+         ├── 512字节栈
+         ├── Maps（持久化键值存储）
+         ├── 辅助函数（helper functions）
+         └── JIT 编译器（从解释执行转为机器码）
+         
+2015 ─── kprobes/tracepoints 支持 (Linux 4.1)
+         tc BPF 支持（网络 ingress/egress）
+         
+2016 ─── XDP (eXpress Data Path) (Linux 4.8)
+         最快包处理路径（网卡驱动层）
+         
+2017 ─── cgroup eBPF (Linux 4.10)
+         socket 级别策略控制
+         
+2018 ─── BTF (BPF Type Format) (Linux 4.18)
+         类型信息嵌入内核
+         
+2019 ─── BPF ringbuf, bpf_link (Linux 5.8)
+         
+2020 ─── LSM BPF, CO-RE 成熟 (Linux 5.7)
+         运行时安全策略
+         
+2021 ─── BPF 骨架（skeleton）自动生成
+         
+2022+ ── Signed BPF programs, BPF token
+         企业级安全特性
+```
+
+### eBPF 架构概览
+
+![eBPF 完整架构](../assets/diagrams/ebpf-arch.svg)
+
+```art
+eBPF 完整执行架构:
+
+用户空间                    内核空间
+─────────────               ─────────────────────────────────────
+                            
+BPF C 源码                  
+    │ clang/LLVM             
+    ▼                        
+BPF 字节码(.o)              
+    │                        ┌─────────────────────────────┐
+    │  bpf(BPF_PROG_LOAD)   │        Verifier             │
+    ├─────────────────────► │  ├── CFG 分析（无环检测）    │
+    │                        │  ├── 类型检查              │
+    │                        │  ├── 有界循环验证           │
+    │                        │  └── 指针安全检查           │
+    │                        └──────────┬──────────────────┘
+    │                                   │ 通过
+    │                                   ▼
+    │                           ┌──────────────┐
+    │                           │  JIT 编译器   │
+    │                           │  x86_64/ARM64│
+    │                           └──────┬───────┘
+    │                                  │ 机器码
+    │  bpf(BPF_PROG_ATTACH)            ▼
+    ├─────────────────────►  ┌──────────────────────┐
+    │                        │    挂载点             │
+    │                        │  kprobe/xdp/tc/...   │
+    │                        └──────────────────────┘
+    │                                  │ 触发执行
+    │  Map read/write                  ▼
+    ◄─────────────────────►  ┌──────────────────────┐
+    │                        │     eBPF Maps        │
+    │                        │  (共享内存区域)       │
+    └────────────────────────┴──────────────────────┘
+```
+
+---
+
+## 12.2 eBPF 安全模型
+
+### Verifier — 静态分析守门人
+
+Verifier 是 eBPF 安全性的核心，在程序加载时进行严格的静态分析：
+
+```art
+Verifier 分析流程:
+
+BPF 字节码
+    │
+    ▼
+┌─────────────────────────────────────────────┐
+│              Verifier 检查项                 │
+│                                             │
+│  1. 基本检查:                               │
+│     ├── 指令数量 ≤ 1M (内核5.2+)            │
+│     ├── 函数调用深度 ≤ 8                    │
+│     └── 无非法指令                          │
+│                                             │
+│  2. CFG 分析（控制流图）:                   │
+│     ├── 程序必须可终止                      │
+│     ├── 检测死代码                          │
+│     └── 有界循环验证（5.3+支持有限循环）     │
+│                                             │
+│  3. 类型与指针安全:                         │
+│     ├── 寄存器类型追踪                      │
+│     ├── 指针算术范围检查                    │
+│     ├── 内存访问边界验证                    │
+│     └── 禁止访问内核非公开内存             │
+│                                             │
+│  4. Helper 函数白名单:                      │
+│     ├── 每种程序类型允许的 helper 不同      │
+│     ├── 参数类型检查                        │
+│     └── 返回值类型追踪                      │
+└─────────────────────────────────────────────┘
+    │通过           │拒绝
+    ▼               ▼
+  加载成功        EINVAL + verifier log
+```
+
+```bash
+# 查看 verifier 拒绝原因
+bpftool prog load bad_prog.o /sys/fs/bpf/bad 2>&1
+# Error: failed to load program: Permission denied
+# 0: (85) call unknown#123
+# unknown func 123
+
+# 启用详细 verifier 日志
+bpftool prog load prog.o /sys/fs/bpf/test \
+    --log-level 2 2>&1 | head -50
+```
+
+### BTF（BPF Type Format）
+
+BTF 是 eBPF 的类型系统，将内核数据结构的类型信息嵌入内核镜像：
+
+```bash
+# 查看内核是否包含 BTF
+ls /sys/kernel/btf/vmlinux   # 内核自带 BTF
+
+# 生成 vmlinux.h（包含所有内核类型）
+bpftool btf dump file /sys/kernel/btf/vmlinux format c > vmlinux.h
+wc -l vmlinux.h  # 约 20 万行类型定义
+
+# 查看特定类型的 BTF 信息
+bpftool btf dump file /sys/kernel/btf/vmlinux format raw \
+    | grep -A 10 '"task_struct"'
+```
+
+---
+
+## 12.3 eBPF JIT 编译器
+
+### JIT 工作原理
+
+eBPF 字节码在加载后通过 JIT 编译为目标平台的机器码，性能接近原生 C 代码：
+
+```bash
+# 启用 JIT（现代内核默认开启）
+cat /proc/sys/net/core/bpf_jit_enable
+# 0: 禁用（解释执行）
+# 1: 启用 JIT
+# 2: 启用 JIT + 打印机器码到内核日志
+
+# 启用 JIT
+echo 1 > /proc/sys/net/core/bpf_jit_enable
+
+# 启用 JIT 并查看生成的机器码（调试用）
+echo 2 > /proc/sys/net/core/bpf_jit_enable
+dmesg | grep -A 30 "flen="
+```
+
+### x86_64 JIT 寄存器映射
+
+```art
+eBPF 寄存器 → x86_64 寄存器映射:
+
+eBPF 寄存器    用途              x86_64 寄存器
+──────────────────────────────────────────────
+R0            返回值/函数返回值    rax
+R1            参数1（ctx）         rdi
+R2            参数2               rsi
+R3            参数3               rdx
+R4            参数4               rcx
+R5            参数5               r8
+R6-R9         被调保存寄存器      rbx, r13, r14, r15
+R10           只读帧指针          rbp
+```
+
+```bash
+# 查看 JIT 生成的机器码
+bpftool prog dump jited id <prog_id>
+# int bpf_prog_xxx:
+# bpf_prog_xxx:
+#    0:	nopl   0x0(%rax,%rax,1)
+#    5:	push   %rbp
+#    6:	mov    %rsp,%rbp
+#   ...
+
+# 查看 eBPF 字节码（翻译后的指令）
+bpftool prog dump xlated id <prog_id>
+# int bpf_prog_xxx:
+#    0: (85) call bpf_get_current_pid_tgid#14
+#    1: (77) r1 >>= 32
+#   ...
+```
+
+---
+
+## 12.4 eBPF Map 类型详解
+
+Map 是 eBPF 程序与用户空间之间、以及多个 eBPF 程序之间共享数据的机制：
+
+| Map 类型                    | 结构        | 查找复杂度  | 典型用途                          |
+|---------------------------|-------------|-----------|----------------------------------|
+| `BPF_MAP_TYPE_HASH`        | 哈希表       | O(1) avg  | 连接追踪、频率计数                 |
+| `BPF_MAP_TYPE_ARRAY`       | 数组         | O(1)      | 统计计数器、固定配置               |
+| `BPF_MAP_TYPE_LRU_HASH`    | LRU 哈希    | O(1) avg  | 大规模连接追踪（自动淘汰旧条目）    |
+| `BPF_MAP_TYPE_PERCPU_HASH` | Per-CPU 哈希 | O(1)      | 高性能无锁计数（per-CPU 独立）    |
+| `BPF_MAP_TYPE_PERCPU_ARRAY`| Per-CPU 数组 | O(1)      | 高性能统计                        |
+| `BPF_MAP_TYPE_RINGBUF`     | 环形缓冲区   | —         | 高效向用户空间推送事件（5.8+推荐） |
+| `BPF_MAP_TYPE_PERF_EVENT_ARRAY` | perf 事件 | O(1)   | 向用户空间传递事件（旧方式）       |
+| `BPF_MAP_TYPE_PROG_ARRAY`  | 程序数组     | O(1)      | tail call（程序链跳转）            |
+| `BPF_MAP_TYPE_SOCKMAP`     | Socket 映射  | O(1)      | Socket 重定向（L7 负载均衡）      |
+| `BPF_MAP_TYPE_CGROUP_ARRAY`| cgroup 映射  | O(1)      | cgroup 级别策略                   |
+| `BPF_MAP_TYPE_STACK_TRACE` | 栈追踪       | —         | 火焰图采样                        |
+| `BPF_MAP_TYPE_BLOOM_FILTER`| 布隆过滤器   | O(k)      | 快速成员检测（5.16+）             |
+
+```c
+/* 创建 Hash Map 示例 */
+struct {
+    __uint(type, BPF_MAP_TYPE_HASH);
+    __uint(max_entries, 10240);
+    __type(key,   u32);       /* PID */
+    __type(value, u64);       /* 计数 */
+} pid_counter SEC(".maps");
+
+/* eBPF 程序内使用 */
+SEC("kprobe/__x64_sys_read")
+int trace_read(struct pt_regs *ctx)
+{
+    u32 pid = bpf_get_current_pid_tgid() >> 32;
+    u64 *count = bpf_map_lookup_elem(&pid_counter, &pid);
+    if (count) {
+        __sync_fetch_and_add(count, 1);
+    } else {
+        u64 init_val = 1;
+        bpf_map_update_elem(&pid_counter, &pid, &init_val, BPF_ANY);
+    }
+    return 0;
+}
+```
+
+### Ring Buffer（推荐的事件传递方式）
+
+```c
+/* Ring Buffer（5.8+ 推荐，比 perf_event 更高效）*/
+struct {
+    __uint(type, BPF_MAP_TYPE_RINGBUF);
+    __uint(max_entries, 256 * 1024);  /* 256KB */
+} events SEC(".maps");
+
+struct event {
+    u32  pid;
+    char comm[16];
+    char filename[256];
+};
+
+SEC("tracepoint/syscalls/sys_enter_openat")
+int trace_openat(struct trace_event_raw_sys_enter *ctx)
+{
+    struct event *e;
+
+    /* 从 ring buffer 预留空间 */
+    e = bpf_ringbuf_reserve(&events, sizeof(*e), 0);
+    if (!e) return 0;
+
+    e->pid = bpf_get_current_pid_tgid() >> 32;
+    bpf_get_current_comm(&e->comm, sizeof(e->comm));
+    bpf_probe_read_user_str(e->filename, sizeof(e->filename),
+                             (void *)ctx->args[1]);
+
+    /* 提交事件 */
+    bpf_ringbuf_submit(e, 0);
+    return 0;
+}
+```
+
+---
+
+## 12.5 挂载点全景
+
+| 挂载点类型            | 触发时机                      | 内核版本  | 特点                                    |
+|---------------------|------------------------------|---------|----------------------------------------|
+| `kprobe`            | 任意内核函数入口               | 3.18    | 动态，无需重编内核；符号可能被优化消除    |
+| `kretprobe`         | 任意内核函数返回               | 3.18    | 可获取函数返回值                         |
+| `uprobe`            | 用户空间函数入口               | 4.0+    | 追踪用户程序，无需修改源码               |
+| `tracepoint`        | 内核静态探针点                 | 4.7     | 稳定 ABI，性能好，推荐优先使用           |
+| `raw_tracepoint`    | 原始 tracepoint（无参数处理）   | 4.17    | 比 tracepoint 开销低                    |
+| `fentry/fexit`      | 内核函数入口/返回（BTF辅助）   | 5.5     | 比 kprobe 开销低 ~3×                    |
+| `XDP`               | 网卡驱动收包（最早期）         | 4.8     | 零拷贝，最高性能（14Mpps+）              |
+| `TC (cls_bpf)`      | 内核流量控制（ingress/egress） | 4.1     | 可修改包内容，支持 redirect              |
+| `socket filter`     | Socket 收包过滤               | 3.19    | 替代 cBPF，用于 tcpdump                 |
+| `sockops`           | TCP 连接事件（建连/关闭等）    | 4.13    | socket 级别拥塞控制                     |
+| `cgroup/skb`        | cgroup 级别包过滤             | 4.10    | 容器网络策略                            |
+| `LSM BPF`           | Linux Security Module 钩子   | 5.7     | 运行时安全策略（代替 AppArmor/SELinux） |
+| `iterator (BPF iter)`| 遍历内核对象（进程/文件等）   | 5.8     | 安全高效地dump内核状态                  |
+| `struct_ops`        | 替换内核结构体中的函数指针     | 5.6     | 自定义 TCP 拥塞算法                     |
+
+---
+
+## 12.6 第一个 eBPF 程序
+
+### 完整 Hello World（追踪 execve）
+
+```c
+/* hello_ebpf.bpf.c — 追踪 execve 系统调用 */
+#include "vmlinux.h"          /* 内核类型定义（由 bpftool 生成）*/
+#include <bpf/bpf_helpers.h>  /* eBPF helper 函数 */
+#include <bpf/bpf_tracing.h>  /* kprobe 辅助宏 */
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
+
+/* SEC 宏指定挂载点类型和名称 */
+SEC("kprobe/__x64_sys_execve")
+int BPF_KPROBE(trace_execve, const char __user *filename,
+               const char __user *const __user *argv,
+               const char __user *const __user *envp)
+{
+    /* 获取当前进程信息 */
+    pid_t pid  = bpf_get_current_pid_tgid() >> 32;
+    pid_t tgid = bpf_get_current_pid_tgid() & 0xFFFFFFFF;
+    u32   uid  = bpf_get_current_uid_gid() & 0xFFFFFFFF;
+
+    /* 读取进程名 */
+    char comm[16];
+    bpf_get_current_comm(comm, sizeof(comm));
+
+    /* 读取要执行的文件名（从用户空间）*/
+    char fname[256];
+    bpf_probe_read_user_str(fname, sizeof(fname), filename);
+
+    /* 输出到 trace_pipe */
+    bpf_printk("execve: pid=%d uid=%d comm=%s filename=%s",
+               pid, uid, comm, fname);
+
+    return 0;
+}
+```
+
+```c
+/* hello_ebpf.c — 用户空间加载程序 */
+#include <stdio.h>
+#include <unistd.h>
+#include <signal.h>
+#include <bpf/libbpf.h>
+#include "hello_ebpf.skel.h"  /* 自动生成的骨架 */
+
+static volatile bool running = true;
+
+static void sig_handler(int sig) { running = false; }
+
+int main(void)
+{
+    struct hello_ebpf_bpf *skel;
+    int err;
+
+    /* 1. 打开并加载 BPF 对象 */
+    skel = hello_ebpf_bpf__open_and_load();
+    if (!skel) {
+        fprintf(stderr, "Failed to open/load BPF skeleton\n");
+        return 1;
+    }
+
+    /* 2. 附加到挂载点 */
+    err = hello_ebpf_bpf__attach(skel);
+    if (err) {
+        fprintf(stderr, "Failed to attach BPF: %d\n", err);
+        goto cleanup;
+    }
+
+    /* 3. 读取输出 */
+    printf("Tracing execve... Hit Ctrl-C to stop.\n");
+    printf("%-8s %-6s %-16s %s\n", "TIME", "PID", "COMM", "FILENAME");
+
+    signal(SIGINT, sig_handler);
+    while (running) {
+        /* 从 /sys/kernel/debug/tracing/trace_pipe 读取 */
+        /* 生产代码应使用 ring buffer 回调 */
+        sleep(1);
+    }
+
+cleanup:
+    hello_ebpf_bpf__destroy(skel);
+    return err;
+}
+```
+
+```makefile
+# Makefile
+CLANG   := clang
+BPFTOOL := bpftool
+ARCH    := $(shell uname -m | sed 's/x86_64/x86/')
+
+# 生成 vmlinux.h
+vmlinux.h:
+	$(BPFTOOL) btf dump file /sys/kernel/btf/vmlinux format c > $@
+
+# 编译 BPF 程序
+hello_ebpf.bpf.o: hello_ebpf.bpf.c vmlinux.h
+	$(CLANG) -g -O2 -target bpf -D__TARGET_ARCH_$(ARCH) \
+		-I/usr/include/bpf -c $< -o $@
+
+# 生成骨架头文件
+hello_ebpf.skel.h: hello_ebpf.bpf.o
+	$(BPFTOOL) gen skeleton $< > $@
+
+# 编译用户空间程序
+hello_ebpf: hello_ebpf.c hello_ebpf.skel.h
+	gcc -g -O2 -o $@ $< -lbpf -lelf -lz
+```
+
+```bash
+# 编译与运行
+make
+sudo ./hello_ebpf
+
+# 查看输出
+sudo cat /sys/kernel/debug/tracing/trace_pipe
+# bash-1234    [001] .... 12345.678: bpf_trace_printk:
+#   execve: pid=1234 uid=1000 comm=bash filename=/usr/bin/ls
+```
+
+---
+
+## 12.7 XDP 深入
+
+### XDP 包处理路径
+
+```art
+XDP 在网络栈中的位置:
+
+网卡（NIC）
+    │ 网卡驱动收到数据包
+    ▼
+┌─────────────────────────────────────┐
+│  XDP（eXpress Data Path）           │  ← eBPF 在此处理，最快！
+│  运行于驱动层，在分配 skb 之前      │
+└─────────────┬───────────────────────┘
+              │ XDP_PASS
+              ▼
+         分配 sk_buff
+              │
+              ▼
+         TC (traffic control)
+         cls_bpf ingress          ← eBPF 也可在此
+              │
+              ▼
+         Netfilter / iptables
+              │
+              ▼
+         IP 路由
+              │
+              ▼
+         TCP/UDP 协议栈
+              │
+              ▼
+         Socket 应用程序
+```
+
+### XDP 返回码
+
+```c
+/* XDP 程序必须返回以下之一 */
+enum xdp_action {
+    XDP_ABORTED = 0,  /* 程序异常，丢弃并触发 xdp:xdp_exception tracepoint */
+    XDP_DROP,         /* 静默丢弃数据包（最快的防 DDoS 方式）*/
+    XDP_PASS,         /* 将包传递给正常网络栈 */
+    XDP_TX,           /* 从同一网卡发回去（反射）*/
+    XDP_REDIRECT,     /* 重定向到其他 CPU/网卡/用户空间（AF_XDP）*/
+};
+```
+
+### XDP 防 DDoS 示例
+
+```c
+/* ddos_filter.bpf.c — XDP 速率限制 */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define MAX_ENTRIES 65536
+#define RATE_LIMIT  1000  /* 每秒最多 1000 个包 */
+
+struct {
+    __uint(type, BPF_MAP_TYPE_LRU_HASH);
+    __uint(max_entries, MAX_ENTRIES);
+    __type(key,   __u32);     /* 源 IP */
+    __type(value, __u64);     /* 包计数 */
+} ip_count SEC(".maps");
+
+SEC("xdp")
+int xdp_ddos_filter(struct xdp_md *ctx)
+{
+    void *data_end = (void *)(long)ctx->data_end;
+    void *data     = (void *)(long)ctx->data;
+
+    /* 解析以太网头 */
+    struct ethhdr *eth = data;
+    if ((void *)(eth + 1) > data_end)
+        return XDP_PASS;
+
+    if (eth->h_proto != bpf_htons(ETH_P_IP))
+        return XDP_PASS;
+
+    /* 解析 IP 头 */
+    struct iphdr *iph = (void *)(eth + 1);
+    if ((void *)(iph + 1) > data_end)
+        return XDP_PASS;
+
+    __u32 src_ip = iph->saddr;
+
+    /* 查找/更新计数 */
+    __u64 *count = bpf_map_lookup_elem(&ip_count, &src_ip);
+    if (count) {
+        __sync_fetch_and_add(count, 1);
+        if (*count > RATE_LIMIT)
+            return XDP_DROP;  /* 超速，丢弃 */
+    } else {
+        __u64 init = 1;
+        bpf_map_update_elem(&ip_count, &src_ip, &init, BPF_ANY);
+    }
+
+    return XDP_PASS;
+}
+
+char LICENSE[] SEC("license") = "GPL";
+```
+
+```bash
+# 加载 XDP 程序到网卡
+ip link set dev eth0 xdp obj ddos_filter.bpf.o sec xdp
+
+# 查看 XDP 程序状态
+ip link show eth0
+# 2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 xdp ...
+#    prog/xdp id 42 tag abc123def456
+
+# 卸载 XDP
+ip link set dev eth0 xdp off
+
+# 性能数据（基准测试）
+# XDP_DROP 速率：14Mpps（单核，Intel X710 10G NIC）
+# 相比 iptables DROP：约快 10x
+# 相比内核网络栈处理：约快 6x
+```
+
+### AF_XDP — 零拷贝到用户空间
+
+```bash
+# AF_XDP 允许将包直接从 NIC 传递到用户空间（绕过内核网络栈）
+# 适合需要用户空间包处理的高性能场景（DPDK 替代方案）
+
+# 性能对比:
+# ┌──────────────────────┬─────────────┬──────────────┐
+# │ 技术                 │ 延迟（µs）   │ 吞吐量       │
+# ├──────────────────────┼─────────────┼──────────────┤
+# │ 内核网络栈           │  50-200     │  ~1 Mpps     │
+# │ XDP（内核处理）      │  5-20       │  14+ Mpps    │
+# │ AF_XDP（用户空间）   │  2-10       │  10+ Mpps    │
+# │ DPDK                │  1-5        │  20+ Mpps    │
+# └──────────────────────┴─────────────┴──────────────┘
+```
+
+---
+
+## 12.8 网络可观测性
+
+### TCP 连接追踪
+
+```c
+/* tcp_trace.bpf.c — 追踪 TCP 连接建立 */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+struct conn_event {
+    u32 pid;
+    u32 saddr;    /* 源 IP */
+    u32 daddr;    /* 目标 IP */
+    u16 sport;    /* 源端口 */
+    u16 dport;    /* 目标端口 */
+    char comm[16];
+};
+
+struct {
+    __uint(type, BPF_MAP_TYPE_RINGBUF);
+    __uint(max_entries, 1 << 20);
+} events SEC(".maps");
+
+/* 追踪 TCP connect 完成 */
+SEC("kprobe/tcp_connect")
+int trace_tcp_connect(struct pt_regs *ctx)
+{
+    struct sock *sk = (struct sock *)PT_REGS_PARM1(ctx);
+    struct conn_event *e;
+
+    e = bpf_ringbuf_reserve(&events, sizeof(*e), 0);
+    if (!e) return 0;
+
+    e->pid = bpf_get_current_pid_tgid() >> 32;
+    bpf_get_current_comm(&e->comm, sizeof(e->comm));
+
+    /* 使用 CO-RE 安全读取内核结构体字段 */
+    BPF_CORE_READ_INTO(&e->saddr, sk,
+                       __sk_common.skc_rcv_saddr);
+    BPF_CORE_READ_INTO(&e->daddr, sk,
+                       __sk_common.skc_daddr);
+    BPF_CORE_READ_INTO(&e->dport, sk,
+                       __sk_common.skc_dport);
+
+    bpf_ringbuf_submit(e, 0);
+    return 0;
+}
+```
+
+### sk_buff 延迟测量
+
+```c
+/* 测量数据包在网络栈中的延迟 */
+struct {
+    __uint(type, BPF_MAP_TYPE_HASH);
+    __uint(max_entries, 10240);
+    __type(key,   u64);   /* skb 地址 */
+    __type(value, u64);   /* 进入时间戳 */
+} skb_ts SEC(".maps");
+
+SEC("kprobe/ip_rcv")
+int kprobe_ip_rcv(struct pt_regs *ctx)
+{
+    struct sk_buff *skb = (struct sk_buff *)PT_REGS_PARM1(ctx);
+    u64 ts = bpf_ktime_get_ns();
+    u64 skb_addr = (u64)skb;
+    bpf_map_update_elem(&skb_ts, &skb_addr, &ts, BPF_ANY);
+    return 0;
+}
+
+SEC("kprobe/__skb_free_head")
+int kprobe_skb_free(struct pt_regs *ctx)
+{
+    struct sk_buff *skb = (struct sk_buff *)PT_REGS_PARM1(ctx);
+    u64 skb_addr = (u64)skb;
+    u64 *ts = bpf_map_lookup_elem(&skb_ts, &skb_addr);
+    if (ts) {
+        u64 delta_us = (bpf_ktime_get_ns() - *ts) / 1000;
+        bpf_printk("skb latency: %llu us", delta_us);
+        bpf_map_delete_elem(&skb_ts, &skb_addr);
+    }
+    return 0;
+}
+```
+
+---
+
+## 12.9 CO-RE：一次编译，到处运行
+
+### 传统 BCC 的问题
+
+```art
+传统 BCC 方式（有问题）:
+
+  用户机器                        目标机器
+  ─────────                      ─────────
+  BPF C 源码                     
+      │ clang 编译                
+      ▼                           
+  BPF 字节码                     
+      │ 分发                      
+      ├──────────────────────►  加载失败！
+                                  因为内核版本不同
+                                  struct task_struct 字段偏移不同
+                                  
+问题：BPF 程序硬编码了内核结构体的偏移量
+```
+
+### CO-RE 解决方案
+
+```art
+CO-RE 方式（正确）:
+
+  开发机器                        任意目标机器
+  ─────────                      ─────────────────
+  BPF C 源码（使用 BPF_CORE_READ）
+      │ clang + BTF               
+      ▼                           
+  BPF 对象（含 BTF 重定位信息）   
+      │ 分发                      
+      ├──────────────────────►   libbpf 加载
+                                      │
+                                  读取 /sys/kernel/btf/vmlinux
+                                      │
+                                  动态计算字段偏移
+                                      │
+                                  修补 BPF 字节码
+                                      │
+                                  加载成功 ✓
+```
+
+```c
+/* CO-RE 编程示例 */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>   /* 提供 BPF_CORE_READ 宏 */
+
+SEC("kprobe/do_sys_openat2")
+int trace_open(struct pt_regs *ctx)
+{
+    struct task_struct *task = (struct task_struct *)bpf_get_current_task();
+
+    /* 传统方式（硬编码偏移，不安全）: */
+    // pid_t pid = *(pid_t *)((char *)task + 1244);  // 错误！
+
+    /* CO-RE 方式（动态适配内核版本）: */
+    pid_t pid = BPF_CORE_READ(task, pid);           // 安全 ✓
+    pid_t tgid = BPF_CORE_READ(task, tgid);         // 安全 ✓
+
+    /* 读取嵌套字段 */
+    unsigned int uid = BPF_CORE_READ(task, cred, uid.val);
+
+    /* 读取数组元素 */
+    // u64 nvcsw = BPF_CORE_READ(task, nvcsw);
+
+    bpf_printk("open: pid=%d tgid=%d uid=%d", pid, tgid, uid);
+    return 0;
+}
+```
+
+---
+
+## 12.10 bpftrace 实战
+
+bpftrace 是基于 eBPF 的高级追踪语言，类似 DTrace：
+
+```bash
+# ─── 文件 I/O ───────────────────────────────────────────────
+
+# opensnoop：追踪所有文件打开（类似 opensnoop 工具）
+bpftrace -e '
+tracepoint:syscalls:sys_enter_openat {
+    printf("%-6d %-16s %s\n", pid, comm, str(args->filename));
+}'
+
+# 统计每个进程打开的文件数
+bpftrace -e '
+tracepoint:syscalls:sys_enter_openat {
+    @[comm] = count();
+} interval:s:5 { print(@); clear(@); }'
+
+# ─── 进程执行 ───────────────────────────────────────────────
+
+# execsnoop：追踪新进程创建
+bpftrace -e '
+tracepoint:syscalls:sys_enter_execve {
+    printf("%-10u %-6d %-16s %s\n", elapsed/1e9, pid, comm,
+           str(args->filename));
+}'
+
+# ─── 网络 ───────────────────────────────────────────────────
+
+# tcplife：TCP 连接生命周期
+bpftrace -e '
+kprobe:tcp_set_state / arg1 == 1 / {
+    @start[arg0] = nsecs;
+}
+kprobe:tcp_set_state / arg1 == 7 && @start[arg0] / {
+    printf("TCP conn duration: %d ms\n",
+           (nsecs - @start[arg0]) / 1000000);
+    delete(@start[arg0]);
+}'
+
+# 统计 TCP 连接目标端口分布
+bpftrace -e '
+kprobe:tcp_connect {
+    $sk = (struct sock *)arg0;
+    @[($sk->__sk_common.skc_dport >> 8) |
+      (($sk->__sk_common.skc_dport & 0xff) << 8)] = count();
+}'
+
+# ─── 调度 ───────────────────────────────────────────────────
+
+# 运行队列延迟直方图
+bpftrace -e '
+tracepoint:sched:sched_wakeup,
+tracepoint:sched:sched_wakeup_new {
+    @ts[args->pid] = nsecs;
+}
+tracepoint:sched:sched_switch {
+    if (@ts[args->next_pid]) {
+        @runqlat = hist((nsecs - @ts[args->next_pid]) / 1000);
+        delete(@ts[args->next_pid]);
+    }
+}
+interval:s:5 { print(@runqlat); clear(@runqlat); }'
+
+# ─── 内存 ───────────────────────────────────────────────────
+
+# 追踪 OOM kill 事件
+bpftrace -e '
+kprobe:oom_kill_process {
+    printf("OOM kill: pid=%d comm=%s\n",
+           ((struct task_struct *)arg1)->pid,
+           ((struct task_struct *)arg1)->comm);
+}'
+
+# 统计 malloc 大小分布（uprobe 追踪用户空间）
+bpftrace -e '
+uprobe:/lib/x86_64-linux-gnu/libc.so.6:malloc {
+    @sizes = hist(arg0);
+} interval:s:5 { print(@sizes); exit(); }'
+
+# ─── CPU ────────────────────────────────────────────────────
+
+# CPU 使用率（按进程）
+bpftrace -e '
+tracepoint:sched:sched_switch {
+    @[args->prev_comm] = count();
+} interval:s:1 {
+    print(@);
+    clear(@);
+}'
+
+# 内核函数调用频率
+bpftrace -e 'kprobe:vfs_* { @[probe] = count(); }
+             interval:s:5  { print(@); exit(); }'
+```
+
+---
+
+## 12.11 BCC 工具集
+
+BCC（BPF Compiler Collection）提供了一组生产级可观测性工具：
+
+```bash
+# ─── 安装 BCC ───────────────────────────────────────────────
+apt-get install bpfcc-tools linux-headers-$(uname -r)
+# 工具位于 /usr/share/bcc/tools/ 或 /sbin/
+
+# ─── 文件系统 ───────────────────────────────────────────────
+
+# 追踪 open 系统调用（含失败）
+opensnoop-bpfcc -x     # -x 显示失败的调用
+
+# 追踪文件系统慢操作（>10ms）
+fileslower-bpfcc 10
+
+# 统计文件 I/O 大小分布
+filelife-bpfcc         # 追踪短暂文件的生命周期
+
+# ─── 磁盘 I/O ───────────────────────────────────────────────
+
+# 块设备延迟直方图
+biolatency-bpfcc -D    # -D 按磁盘分类
+
+# 块设备 I/O 追踪（类似 iotop）
+biotop-bpfcc 1 5       # 每1秒刷新，共5次
+
+# 慢速块 I/O（>10ms）
+biosnoop-bpfcc
+
+# ─── 网络 ───────────────────────────────────────────────────
+
+# TCP 连接追踪
+tcpconnect-bpfcc       # 追踪主动连接
+tcpaccept-bpfcc        # 追踪被动连接
+tcpretrans-bpfcc       # 追踪 TCP 重传
+tcplife-bpfcc          # TCP 连接生命周期（含字节数）
+
+# 网络延迟
+tcptracer-bpfcc        # 全量 TCP 事件
+
+# ─── CPU/调度 ───────────────────────────────────────────────
+
+# 运行队列延迟直方图
+runqlat-bpfcc
+
+# CPU 火焰图采样
+profile-bpfcc -F 99 30 > profile.folded  # 99Hz，30秒
+flamegraph.pl profile.folded > flame.svg
+
+# Off-CPU 分析（阻塞在哪里）
+offcputime-bpfcc 30 > offcpu.folded
+flamegraph.pl --color=io offcpu.folded > offcpu-flame.svg
+
+# ─── 内存 ───────────────────────────────────────────────────
+
+# 追踪内存分配（用户空间）
+memleak-bpfcc -p <pid>  # 检测内存泄漏
+
+# slab 分配统计
+slabratetop-bpfcc 1 5
+
+# ─── BCC Python 自定义工具 ───────────────────────────────────
+
+# openat 追踪（Python BCC 版本）
+python3 - <<'EOF'
+from bcc import BPF
+
+prog = """
+#include <uapi/linux/ptrace.h>
+#include <linux/sched.h>
+
+BPF_HASH(counts, u32, u64);
+
+TRACEPOINT_PROBE(syscalls, sys_enter_openat) {
+    u32 pid = bpf_get_current_pid_tgid() >> 32;
+    u64 *val = counts.lookup(&pid);
+    if (val) (*val)++;
+    else {
+        u64 one = 1;
+        counts.update(&pid, &one);
+    }
+    return 0;
+}
+"""
+
+b = BPF(text=prog)
+import time
+time.sleep(5)
+print("Top processes by open() calls:")
+for k, v in sorted(b["counts"].items(),
+                   key=lambda x: x[1].value, reverse=True)[:10]:
+    print(f"  PID {k.value}: {v.value}")
+EOF
+```
+
+---
+
+## 12.12 生产级应用
+
+### Cilium — eBPF CNI 网络插件
+
+```art
+Cilium 架构:
+
+  Pod A                          Pod B
+  ┌──────────┐                   ┌──────────┐
+  │ 应用程序  │                   │ 应用程序  │
+  └────┬─────┘                   └────┬─────┘
+       │                              │
+  ┌────▼──────────────────────────────▼────┐
+  │              eBPF 数据平面              │
+  │                                        │
+  │  ┌─────────────┐  ┌─────────────────┐  │
+  │  │ L3/L4 策略  │  │ L7 (HTTP/gRPC)  │  │
+  │  │ 零 iptables │  │ 感知策略         │  │
+  │  └─────────────┘  └─────────────────┘  │
+  │                                        │
+  │  ┌──────────────────────────────────┐  │
+  │  │     负载均衡 (替代 kube-proxy)   │  │
+  │  │     XDP + TC BPF               │  │
+  │  └──────────────────────────────────┘  │
+  └────────────────────────────────────────┘
+```
+
+```bash
+# 安装 Cilium（Kubernetes）
+cilium install --version 1.15.0
+
+# 验证 eBPF 程序已加载
+cilium status --all-controllers
+
+# 查看 Cilium 加载的 eBPF 程序
+bpftool prog list | grep cilium
+# 1234: xdp tag abc123  cilium_xdp_ingress
+# 1235: tc  tag def456  cilium_tc_ingress
+
+# Hubble — 基于 eBPF 的网络可观测性
+hubble observe --namespace production --last 100
+```
+
+### Falco — 安全运行时检测
+
+```yaml
+# falco_rules.yaml — 检测容器内的危险操作
+- rule: Write below binary dir
+  desc: Write operation to /usr/bin or /bin（可能被注入）
+  condition: >
+    evt.type = write and
+    container and
+    fd.directory in (/usr/bin, /usr/sbin, /bin, /sbin)
+  output: >
+    Writing to binary directory
+    (user=%user.name container=%container.name
+     file=%fd.name proc=%proc.name)
+  priority: WARNING
+
+- rule: Terminal shell in container
+  desc: 容器内出现交互式 shell（可能被攻击）
+  condition: >
+    evt.type = execve and
+    container and
+    proc.name in (bash, sh, zsh) and
+    proc.tty != 0
+  output: >
+    Interactive shell in container
+    (user=%user.name container=%container.id
+     shell=%proc.name parent=%proc.pname)
+  priority: NOTICE
+```
+
+### Pixie — 无插桩全链路追踪
+
+```bash
+# Pixie 使用 eBPF uprobe 自动追踪 HTTP/gRPC/MySQL 等协议
+# 无需修改应用代码
+
+# 安装 Pixie
+px deploy
+
+# 查询 HTTP 请求延迟（PxL 语言）
+px run px/http_data -- start_time='-5m'
+
+# 查询 SQL 请求（自动识别 MySQL 协议）
+px run px/mysql_data
+```
+
+### Meta Katran — eBPF L4 负载均衡
+
+```bash
+# Meta 开源的 eBPF L4 负载均衡器
+# 替代传统的 IPVS/HAProxy
+# 性能：在单台服务器上处理 100Gbps 流量
+
+# 关键特性:
+# - XDP 数据平面（14Mpps+ per core）
+# - ECMP 一致性哈希（Maglev 算法）
+# - Healthcheck 集成
+# - GUE（Generic UDP Encapsulation）封装
+```
+
+---
+
+## 12.13 eBPF 限制
+
+### 技术限制汇总
+
+```art
+eBPF 程序限制（以 Linux 5.15 为基准）:
+
+  ┌─────────────────────────────────────────────────────────┐
+  │  栈大小: 512 字节（硬限制）                             │
+  │  → 大缓冲区必须用 Map 或 per-CPU array                 │
+  │                                                         │
+  │  程序大小: 100万条指令（5.2+ 提升，原来 4096）          │
+  │  → 复杂策略需要 tail call 分割                         │
+  │                                                         │
+  │  循环: 有界循环（5.3+），必须可证明终止                  │
+  │  → 无限循环被 Verifier 拒绝                            │
+  │                                                         │
+  │  函数调用深度: 8 层（不含 tail call）                   │
+  │                                                         │
+  │  Map 数量: 每个程序 64 个 Map                          │
+  │                                                         │
+  │  BPF-to-BPF 调用: 支持（4.16+）                        │
+  │                                                         │
+  │  Tail call 深度: 33 次                                  │
+  │                                                         │
+  │  内核版本要求:                                          │
+  │  - 基础功能: 3.18+                                     │
+  │  - tracepoint: 4.7+                                    │
+  │  - XDP: 4.8+                                           │
+  │  - CO-RE: 5.2+（libbpf）                               │
+  │  - LSM BPF: 5.7+                                       │
+  │  - 有界循环: 5.3+                                      │
+  └─────────────────────────────────────────────────────────┘
+```
+
+### 内核版本要求对照表
+
+| 功能                    | 最低内核版本 | 说明                          |
+|------------------------|------------|------------------------------|
+| 基础 eBPF              | 3.18       | Maps + JIT                   |
+| kprobe                 | 4.1        | 追踪内核函数                  |
+| tracepoint             | 4.7        | 稳定 ABI 追踪点               |
+| XDP                    | 4.8        | 高性能包处理                  |
+| cgroup BPF             | 4.10       | 容器网络策略                  |
+| sockmap                | 4.14       | Socket 重定向                |
+| 有界循环               | 5.3        | for 循环支持                  |
+| fentry/fexit           | 5.5        | 低开销函数追踪                |
+| CO-RE + BTF            | 5.2+       | 跨版本兼容                   |
+| LSM BPF                | 5.7        | 运行时安全                   |
+| Ring Buffer            | 5.8        | 高效事件传递                 |
+| Bloom Filter Map       | 5.16       | 概率性数据结构                |
+
+---
+
+## 12.14 调试 eBPF 程序
+
+### bpf_printk — 打印调试信息
+
+```c
+/* 在 eBPF 程序中打印（慢，仅用于调试）*/
+SEC("kprobe/__x64_sys_read")
+int debug_read(struct pt_regs *ctx)
+{
+    pid_t pid = bpf_get_current_pid_tgid() >> 32;
+    char comm[16];
+    bpf_get_current_comm(comm, sizeof(comm));
+
+    /* 最多3个参数（内核5.13前限制）*/
+    bpf_printk("read: pid=%d comm=%s", pid, comm);
+    return 0;
+}
+```
+
+```bash
+# 读取 bpf_printk 输出
+cat /sys/kernel/debug/tracing/trace_pipe
+
+# 或者
+trace-cmd stream -e bpf
+
+# 只显示 BPF 输出（过滤其他 trace）
+sudo bash -c 'echo "" > /sys/kernel/debug/tracing/trace'  # 清空
+cat /sys/kernel/debug/tracing/trace_pipe | grep bpf_trace
+```
+
+### bpftool — eBPF 对象管理工具
+
+```bash
+# ─── 程序管理 ───────────────────────────────────────────────
+
+# 列出所有已加载的 BPF 程序
+bpftool prog list
+# 42: kprobe  name trace_execve  tag a1b2c3d4e5f6  gpl
+#     loaded_at 2024-01-01T00:00:00+0000  uid 0
+#     xlated 128B  jited 256B  memlock 4096B  map_ids 1,2
+
+# 查看程序的翻译字节码
+bpftool prog dump xlated id 42
+bpftool prog dump xlated id 42 visual > prog.dot
+# dot -Tsvg prog.dot > prog.svg  # 可视化控制流图
+
+# 查看 JIT 生成的机器码
+bpftool prog dump jited id 42
+bpftool prog dump jited id 42 opcodes  # 含十六进制
+
+# 钉住程序（防止被垃圾回收）
+bpftool prog pin id 42 /sys/fs/bpf/my_prog
+
+# ─── Map 管理 ────────────────────────────────────────────────
+
+# 列出所有 Map
+bpftool map list
+
+# 查看 Map 内容
+bpftool map dump id 5
+
+# 查看特定键的值
+bpftool map lookup id 5 key 0x01 0x02 0x03 0x04
+
+# 更新 Map 值（可在运行时修改 eBPF 行为）
+bpftool map update id 5 key 0x01 0x00 0x00 0x00 value 0xff 0x00
+
+# ─── BTF 信息 ────────────────────────────────────────────────
+
+# 查看内核 BTF 类型
+bpftool btf list
+bpftool btf dump id 1 format c | grep -A 10 "task_struct"
+
+# 查看程序关联的 BTF
+bpftool prog show id 42 --json | jq '.btf_id'
+
+# ─── Perf 事件关联 ────────────────────────────────────────────
+
+# 查看程序的挂载点（link）
+bpftool link list
+# 1: kprobe  prog 42
+#     pids bash(1234)
+
+# 查看网络接口上的 BPF 程序
+bpftool net list
+# xdp:
+# eth0(2) generic id 42
+
+# ─── 高级诊断 ────────────────────────────────────────────────
+
+# 显示 verifier 日志（调试加载失败）
+bpftool prog load bad.o /sys/fs/bpf/bad \
+    --log-level 2 2>&1 | grep -A 5 "invalid"
+
+# 统计 BPF 程序运行时间（需要 CONFIG_DEBUG_INFO_BTF）
+bpftool prog profile id 42 duration 5 cycles instructions
+# cycles     : 12345678  /sec
+# instructions: 23456789 /sec
+# ipc:        1.900
+```
+
+### Verifier 错误排查
+
+```bash
+# 常见 Verifier 错误及原因
+
+# 错误1: "R1 type=inv expected=ctx"
+# 原因：向 helper 传递了错误类型的指针
+# 修复：确保传递的是 ctx 指针，而不是普通指针
+
+# 错误2: "invalid read from stack R1 off=-512 size=4"
+# 原因：访问了栈以外的内存（栈溢出）
+# 修复：减少栈变量大小，使用 Map 存储大数据
+
+# 错误3: "back-edge from insn X to Y"
+# 原因：无界循环（5.3以前内核）
+# 修复：用有界循环或升级内核
+
+# 错误4: "map_lookup_elem: R0=map_value P"
+# 原因：没有检查 map_lookup_elem 返回值是否为 NULL
+# 修复：
+u64 *val = bpf_map_lookup_elem(&my_map, &key);
+if (!val) return 0;  // 必须检查 NULL！
+*val += 1;           // 安全
+
+# 错误5: "dereference of modified ctx ptr R1"
+# 原因：修改了 ctx 指针后解引用
+# 修复：不要对 ctx 指针进行算术运算
+
+# 使用 libbpf verbose 模式获取详细错误
+libbpf_set_print(LIBBPF_DEBUG, libbpf_print_fn, NULL);
+```
+
+### 完整调试工作流
+
+```bash
+# 1. 编译时启用调试信息
+clang -g -O2 -target bpf -c prog.bpf.c -o prog.bpf.o
+
+# 2. 检查字节码是否正确
+llvm-objdump -S prog.bpf.o
+
+# 3. 验证加载（带详细日志）
+bpftool prog load prog.bpf.o /sys/fs/bpf/test \
+    --log-level 2 2>&1 | tee verifier.log
+
+# 4. 查看运行时输出
+cat /sys/kernel/debug/tracing/trace_pipe &
+
+# 5. 运行工作负载
+./trigger_workload.sh
+
+# 6. 检查 Map 内容
+bpftool map dump pinned /sys/fs/bpf/my_map
+
+# 7. 性能分析
+perf stat -e 'bpf:*' ./my_app 2>&1  # 需要 perf + BPF 追踪事件
+
+# 8. 清理
+rm /sys/fs/bpf/test /sys/fs/bpf/my_map
+kill %1  # 停止 trace_pipe
+```
+
+---
+
+## 总结
+
+```art
+eBPF 生态系统全景:
+
+  ┌────────────────────────────────────────────────────────────┐
+  │                    eBPF 应用场景                           │
+  │                                                            │
+  │  可观测性           安全                网络               │
+  │  ─────────         ──────             ──────              │
+  │  bpftrace          seccomp BPF        XDP (DDoS防护)      │
+  │  BCC 工具集         Falco              Cilium (CNI)        │
+  │  Pixie             Tetragon           Katran (LB)         │
+  │  perf + BPF        LSM BPF            tc BPF              │
+  │  火焰图             syscall过滤         sockmap             │
+  │                                                            │
+  │  基础设施层                                                │
+  │  ─────────────────────────────────────────────────────    │
+  │  挂载点: kprobe/tracepoint/XDP/TC/LSM/cgroup/uprobe       │
+  │  Maps:   Hash/Array/RingBuf/Perf/Sockmap/...              │
+  │  安全:   Verifier + BTF + CO-RE                           │
+  │  工具:   bpftool/libbpf/clang(BPF后端)                   │
+  └────────────────────────────────────────────────────────────┘
+
+关键原则:
+  ✓ 安全第一：Verifier 保证程序不崩溃内核
+  ✓ 高性能：JIT 编译，接近原生速度
+  ✓ 可移植：CO-RE + BTF，一次编译到处运行
+  ✓ 无需修改内核：动态加载，无侵入性
+```
+
+**参考资料**：
+- `kernel/bpf/verifier.c` — Verifier 实现
+- `net/core/filter.c` — 网络 BPF 实现
+- `samples/bpf/` — 内核自带示例
+- libbpf 文档: https://libbpf.readthedocs.io
+- bpftrace 参考手册: https://github.com/iovisor/bpftrace/blob/master/docs/reference_guide.md
+- eBPF 官网: https://ebpf.io
+- Brendan Gregg 的 BPF 书: "BPF Performance Tools" (2019)

From 894100623043b02d61f0780fee3174a4183bf5e6 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 20 May 2026 12:16:20 +0000
Subject: [PATCH 07/10] =?UTF-8?q?docs:=20add=20chapters=2013=E2=80=9315=20?=
 =?UTF-8?q?(=E4=B8=AD=E6=96=AD=E4=B8=8E=E5=BC=82=E5=B8=B8/=E5=90=AF?=
 =?UTF-8?q?=E5=8A=A8=E6=B5=81=E7=A8=8B/=E5=86=85=E6=A0=B8=E8=B0=83?=
 =?UTF-8?q?=E8=AF=95=E4=B8=8E=E6=80=A7=E8=83=BD)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 13-中断与异常: IDT/APIC/softirq/tasklet/workqueue/threaded IRQ/IPI/
  中断亲和性/hrtimer/延迟测量 (1050 行)
- 14-启动流程深入: BIOS/UEFI/MBR/GRUB2/bzImage解压/head_64.S/
  start_kernel序列/initramfs/systemd/KASLR/kdump (1139 行)
- 15-内核调试与性能: ftrace/perf/FlameGraph/KASAN/KFENCE/KMSAN/
  UBSAN/lockdep/KCOV/kdump+crash/GDB+QEMU/bpftrace/livepatch/
  性能优化清单 (1542 行)

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

Co-authored-by: YYCB <23326150+YYCB@users.noreply.github.com>
---
 .../README.md"                                | 1050 +++++++++++
 .../README.md"                                | 1139 ++++++++++++
 .../README.md"                                | 1542 +++++++++++++++++
 assets/diagrams/boot-flow.svg                 |  111 ++
 assets/diagrams/driver-model.svg              |   90 +
 assets/diagrams/ebpf-arch.svg                 |  123 ++
 assets/diagrams/irq-flow.svg                  |  102 ++
 assets/diagrams/sync-map.svg                  |  116 ++
 8 files changed, 4273 insertions(+)
 create mode 100644 "13-\344\270\255\346\226\255\344\270\216\345\274\202\345\270\270/README.md"
 create mode 100644 "14-\345\220\257\345\212\250\346\265\201\347\250\213\346\267\261\345\205\245/README.md"
 create mode 100644 "15-\345\206\205\346\240\270\350\260\203\350\257\225\344\270\216\346\200\247\350\203\275/README.md"
 create mode 100644 assets/diagrams/boot-flow.svg
 create mode 100644 assets/diagrams/driver-model.svg
 create mode 100644 assets/diagrams/ebpf-arch.svg
 create mode 100644 assets/diagrams/irq-flow.svg
 create mode 100644 assets/diagrams/sync-map.svg

diff --git "a/13-\344\270\255\346\226\255\344\270\216\345\274\202\345\270\270/README.md" "b/13-\344\270\255\346\226\255\344\270\216\345\274\202\345\270\270/README.md"
new file mode 100644
index 0000000..da8bbfc
--- /dev/null
+++ "b/13-\344\270\255\346\226\255\344\270\216\345\274\202\345\270\270/README.md"
@@ -0,0 +1,1050 @@
+# 13 — 中断与异常
+
+> **学习目标**：掌握 x86/ARM64 中断体系结构，理解 Linux 内核从硬件信号到驱动回调的完整处理路径，
+> 能够分析中断延迟、合理使用 softirq/tasklet/workqueue/线程化中断，并对生产系统进行中断调优。
+
+![中断处理完整路径](../assets/diagrams/irq-flow.svg)
+
+---
+
+## 目录
+
+| 节 | 主题 |
+|----|------|
+| 13.1 | 中断 vs 异常 |
+| 13.2 | x86 IDT — 256 个向量 |
+| 13.3 | APIC 架构 |
+| 13.4 | IRQ 线与 /proc/interrupts |
+| 13.5 | 中断处理上半部源码 |
+| 13.6 | 中断上下文约束 |
+| 13.7 | softirq |
+| 13.8 | tasklet |
+| 13.9 | workqueue (cmwq) |
+| 13.10 | 线程化中断 |
+| 13.11 | IPI 核间中断 |
+| 13.12 | 中断亲和性调优 |
+| 13.13 | 定时器中断与高精度时钟 |
+| 13.14 | 中断延迟测量 |
+
+---
+
+## 13.1 中断 vs 异常
+
+### 分类体系
+
+```
+CPU 控制流中断事件
+├── 同步事件（异常 Exception）— 由当前指令引发
+│   ├── 故障 Fault     — 可恢复，EIP 指向出错指令（缺页 #PF、段错误 #GP）
+│   ├── 陷阱 Trap      — 可恢复，EIP 指向下一条指令（int3 断点、syscall）
+│   ├── 中止 Abort     — 不可恢复（双重故障 #DF、机器检查 #MC）
+│   └── 软件中断 INT n  — 程序主动触发（int 0x80 旧版 syscall）
+└── 异步事件（中断 Interrupt）— 与当前指令无关
+    ├── 可屏蔽中断 IRQ  — INTR 引脚，可被 CLI 屏蔽
+    └── 不可屏蔽中断 NMI— NMI 引脚，watchdog/内存错误
+```
+
+### 关键区别对比表
+
+| 维度 | 异常（同步） | 中断（异步） |
+|------|------------|------------|
+| 触发源 | CPU 执行指令 | 外部硬件/IPI |
+| 可预测性 | 确定性 | 随机时序 |
+| 保存的 RIP | 故障=出错指令；陷阱=下一条 | 被打断的下一条 |
+| 典型例子 | `#PF`(14), `#GP`(13), `#UD`(6) | 键盘(IRQ1), 网卡(MSI) |
+| 内核响应 | 发送信号/修复/杀进程 | 驱动 ISR + 下半部 |
+
+### x86 异常向量（部分）
+
+```c
+/* arch/x86/include/asm/trapnr.h */
+#define X86_TRAP_DE      0   /* 除零错误 */
+#define X86_TRAP_DB      1   /* 调试陷阱 */
+#define X86_TRAP_NMI     2   /* 不可屏蔽中断 */
+#define X86_TRAP_BP      3   /* 断点 int3 */
+#define X86_TRAP_OF      4   /* 溢出 */
+#define X86_TRAP_BR      5   /* 边界检查 */
+#define X86_TRAP_UD      6   /* 无效操作码 */
+#define X86_TRAP_NM      7   /* 设备不可用（FPU） */
+#define X86_TRAP_DF      8   /* 双重故障 */
+#define X86_TRAP_TS     10   /* 无效 TSS */
+#define X86_TRAP_NP     11   /* 段不存在 */
+#define X86_TRAP_SS     12   /* 栈段故障 */
+#define X86_TRAP_GP     13   /* 通用保护故障 */
+#define X86_TRAP_PF     14   /* 缺页故障 */
+#define X86_TRAP_MF     16   /* x87 FPU 错误 */
+#define X86_TRAP_AC     17   /* 对齐检查 */
+#define X86_TRAP_MC     18   /* 机器检查 */
+#define X86_TRAP_XF     19   /* SIMD FP 异常 */
+```
+
+---
+
+## 13.2 x86 中断向量表 (IDT)
+
+### IDT 结构
+
+x86-64 的 IDT 包含 256 个门描述符，每个 16 字节：
+
+```
+IDT（中断描述符表）
+┌─────────────────────────────────────────────┐
+│  向量 0–31   : CPU 保留异常（故障/陷阱/中止）  │
+│  向量 32–47  : PIC 8259 遗留 IRQ（已弃用）    │
+│  向量 32–255 : 硬件中断（APIC 分配）          │
+│  向量 128(0x80): Linux 系统调用（int 0x80）   │
+│  向量 239(0xEF): Local APIC 定时器           │
+│  向量 242(0xF2): 热插拔 IPI                  │
+│  向量 243(0xF3): 重调度 IPI                  │
+│  向量 244(0xF4): 函数调用 IPI                │
+│  向量 251(0xFB): IRQ Work IPI               │
+│  向量 252(0xFC): x86 平台 IPI               │
+│  向量 255(0xFF): APIC Spurious（伪中断）      │
+└─────────────────────────────────────────────┘
+```
+
+### IDT 门描述符（64位）
+
+```c
+/* arch/x86/include/asm/desc_defs.h */
+struct gate_struct {
+    u16 offset_low;     /* 处理函数偏移 [15:0]  */
+    u16 segment;        /* 代码段选择子          */
+    struct idt_bits {
+        u16 ist   : 3;  /* IST 栈索引（NMI用）  */
+        u16 zero  : 5;
+        u16 type  : 5;  /* 0xE=中断门,0xF=陷阱门*/
+        u16 dpl   : 2;  /* 描述符特权级          */
+        u16 p     : 1;  /* 存在位               */
+    } bits;
+    u16 offset_middle;  /* 偏移 [31:16]          */
+    u32 offset_high;    /* 偏移 [63:32]          */
+    u32 reserved;
+} __attribute__((packed));
+```
+
+**中断门 vs 陷阱门**：中断门进入时自动 `CLI`（关中断），陷阱门不关。内核大多用中断门。
+
+### 加载 IDT
+
+```asm
+; arch/x86/kernel/idt.c → load_current_idt()
+lidt    idt_descr(%rip)   ; 加载 IDTR 寄存器
+                          ; idt_descr = {limit=0xFFF, base=idt_table}
+```
+
+---
+
+## 13.3 APIC 架构
+
+### 演进历史
+
+```
+8259A PIC（传统）
+  ├── 主 PIC：IRQ0-7  → 向量 32-39
+  └── 从 PIC：IRQ8-15 → 向量 40-47
+  缺点：仅支持单处理器，EOI 串行化
+
+APIC 体系（现代）
+  ├── Local APIC（每个 CPU 核心一个）
+  │   ├── 接收来自 IO-APIC 的中断
+  │   ├── 发送/接收 IPI
+  │   ├── 本地定时器（LAPIC Timer）
+  │   └── 性能计数器/温度传感器中断
+  ├── IO-APIC（芯片组，通常 1-3 个）
+  │   ├── 24~120 个输入引脚
+  │   ├── 重定向表（RTE）：引脚→向量→目标CPU
+  │   └── 支持电平/边沿触发
+  └── MSI / MSI-X（PCIe 设备直写 LAPIC）
+      ├── 无需中断线，写内存地址触发
+      ├── MSI：最多 32 个向量
+      └── MSI-X：最多 2048 个向量，每个独立配置
+```
+
+### Local APIC 寄存器（MMIO，基址 0xFEE00000）
+
+```c
+/* 关键寄存器偏移 */
+#define APIC_ID         0x020  /* APIC ID */
+#define APIC_LVR        0x030  /* 版本寄存器 */
+#define APIC_TASKPRI    0x080  /* 任务优先级（TPR）*/
+#define APIC_EOI        0x0B0  /* 中断结束寄存器 */
+#define APIC_LDR        0x0D0  /* 逻辑目标寄存器 */
+#define APIC_SPIV       0x0F0  /* 伪中断向量/使能位 */
+#define APIC_ICR        0x300  /* 中断命令寄存器（低32位）*/
+#define APIC_ICR2       0x310  /* 中断命令寄存器（高32位）*/
+#define APIC_LVTT       0x320  /* 定时器 LVT 条目 */
+#define APIC_TMICT      0x380  /* 定时器初始计数 */
+#define APIC_TMCCT      0x390  /* 定时器当前计数 */
+```
+
+### 查看 APIC 信息
+
+```bash
+# 查看 IO-APIC 路由表
+cat /proc/interrupts | head -5
+cat /sys/firmware/acpi/tables/APIC   # MADT 表
+
+# x2APIC 模式检查（现代大系统）
+dmesg | grep -i apic
+grep -i apic /proc/cpuinfo | head -3
+
+# 查看 MSI 分配
+lspci -v | grep -A5 "MSI"
+```
+
+---
+
+## 13.4 中断请求线 (IRQ)
+
+### /proc/interrupts 格式解析
+
+```
+           CPU0       CPU1       CPU2       CPU3
+  0:         46          0          0          0  IO-APIC   2-edge      timer
+  1:          0          0          0          9  IO-APIC   1-edge      i8042
+ 16:          0          0          0          0  IO-APIC  16-fasteoi   ehci_hcd
+ 23:          1          0          0          0  IO-APIC  23-fasteoi   ehci_hcd
+ 56:          0      74821          0          0  PCI-MSI 524288-edge   nvme0q0
+ 57:      12431          0      98234          0  PCI-MSI 524289-edge   nvme0q1
+
+列说明：
+  [向量/IRQ号] [各CPU计数...] [中断控制器] [触发类型] [设备名]
+```
+
+### /proc/irq/N/ 目录结构
+
+```bash
+ls /proc/irq/56/
+# affinity_hint          每次中断后建议亲和性
+# effective_affinity     实际生效的亲和性掩码
+# effective_affinity_list  CPU 列表格式
+# node                   NUMA 节点
+# smp_affinity           CPU 亲和性位掩码（十六进制）
+# smp_affinity_list      CPU 亲和性列表（十进制范围）
+# spurious               伪中断统计
+
+# 读取 IRQ 56 的亲和性
+cat /proc/irq/56/smp_affinity       # e.g. "0000000f" = CPU 0-3
+cat /proc/irq/56/smp_affinity_list  # e.g. "0-3"
+
+# 设置 IRQ 56 只在 CPU2 上处理
+echo "4" > /proc/irq/56/smp_affinity        # 位掩码：CPU2=bit2=4
+echo "2" > /proc/irq/56/smp_affinity_list   # 直接指定 CPU2
+```
+
+### irqdesc 数据结构
+
+```c
+/* include/linux/irqdesc.h */
+struct irq_desc {
+    struct irq_common_data  irq_common_data;
+    struct irq_data         irq_data;
+    unsigned int __percpu  *kstat_irqs;  /* 每 CPU 计数 */
+    irq_flow_handler_t      handle_irq;  /* 流处理函数 */
+    struct irqaction       *action;      /* IRQ action 链表 */
+    unsigned int            status_use_accessors;
+    unsigned int            core_internal_state__do_not_mess_with_it;
+    unsigned int            depth;       /* 嵌套禁用计数 */
+    unsigned int            wake_depth;  /* wakeup 计数 */
+    unsigned int            tot_count;
+    unsigned int            irq_count;   /* 用于检测卡死 */
+    unsigned long           last_unhandled; /* 未处理时间戳 */
+    unsigned int            irqs_unhandled;
+    atomic_t                threads_handled;
+    int                     threads_handled_last;
+    raw_spinlock_t          lock;
+    struct cpumask          *percpu_enabled;
+    const struct cpumask    *percpu_affinity;
+    const struct cpumask    *affinity_hint;
+    struct irq_affinity_notify *affinity_notify;
+    cpumask_var_t           pending_mask;
+    unsigned long           threads_oneshot;
+    atomic_t                threads_active;
+    wait_queue_head_t       wait_for_threads;
+} ____cacheline_internodealigned_in_smp;
+```
+
+---
+
+## 13.5 中断处理上半部源码
+
+### 硬件触发到驱动回调的完整路径
+
+```
+硬件设备发出中断信号
+    ↓
+Local APIC 接收，写入 IRR（中断请求寄存器）
+    ↓
+CPU 完成当前指令，检查 IF 标志
+    ↓
+CPU 从 IDT[vector] 取门描述符，切换到内核栈
+    ↓
+common_interrupt()  ← arch/x86/kernel/irq.c
+    ↓
+handle_irq(irq_desc)
+    ↓
+desc->handle_irq(desc)  ← 流处理函数
+    │   handle_edge_irq()    — 边沿触发
+    │   handle_fasteoi_irq() — 电平触发（IO-APIC FASTeoI）
+    │   handle_percpu_irq()  — per-CPU 中断
+    ↓
+__handle_irq_event_percpu(desc)
+    ↓
+for each action in desc->action:
+    action->handler(irq, action->dev_id)  ← 驱动 ISR
+    ↓
+写 LAPIC EOI 寄存器（告知中断结束）
+    ↓
+iret / eret  返回被打断的上下文
+```
+
+### 关键源码片段（kernel 6.x）
+
+```c
+/* arch/x86/kernel/irq.c */
+DEFINE_IDTENTRY_IRQ(common_interrupt)
+{
+    struct pt_regs *old_regs = set_irq_regs(regs);
+    struct irq_desc *desc;
+
+    /* 处理 APIC 向量，转换为 Linux IRQ 号 */
+    desc = __this_cpu_read(vector_irq[vector]);
+
+    if (likely(!IS_ERR_OR_NULL(desc))) {
+        handle_irq(desc, regs);
+    } else {
+        ack_APIC_irq();  /* 必须写 EOI，否则 APIC 卡死 */
+    }
+
+    set_irq_regs(old_regs);
+}
+
+/* kernel/irq/handle.c */
+irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc)
+{
+    irqreturn_t retval = IRQ_NONE;
+    unsigned int irq = desc->irq_data.irq;
+    struct irqaction *action;
+
+    record_irq_time(desc);
+
+    for_each_action_of_desc(desc, action) {
+        irqreturn_t res;
+
+        trace_irq_handler_entry(irq, action);
+        res = action->handler(irq, action->dev_id);
+        trace_irq_handler_exit(irq, action, res);
+
+        if (WARN_ONCE(!irqs_disabled(),
+                      "irq %u handler %ps enabled interrupts\n",
+                      irq, action->handler))
+            local_irq_disable();
+
+        switch (res) {
+        case IRQ_WAKE_THREAD:
+            __irq_wake_thread(desc, action);
+            /* fall through */
+        case IRQ_HANDLED:
+            retval = IRQ_HANDLED;
+            break;
+        default:
+            break;
+        }
+    }
+    return retval;
+}
+```
+
+### 注册中断
+
+```c
+/* 驱动中注册中断的标准方式 */
+ret = request_irq(irq,                    /* IRQ 号 */
+                  my_interrupt_handler,   /* 处理函数 */
+                  IRQF_SHARED,            /* 标志 */
+                  "my_device",            /* 名称（/proc/interrupts）*/
+                  dev);                   /* dev_id，共享 IRQ 用于区分 */
+
+/* IRQF 标志 */
+IRQF_SHARED        /* 允许多驱动共享同一 IRQ */
+IRQF_TRIGGER_RISING  /* 上升沿触发 */
+IRQF_TRIGGER_LEVEL   /* 电平触发 */
+IRQF_NOBALANCING     /* 禁止 irqbalance 调整 */
+IRQF_PERCPU          /* per-CPU 中断 */
+IRQF_NO_THREAD       /* 禁止线程化 */
+```
+
+---
+
+## 13.6 中断上下文约束
+
+### 为什么中断上下文不能睡眠
+
+```
+进程 A 运行在 CPU0
+    ↓
+硬件中断发生，CPU 跳入 ISR
+    ↓
+ISR 调用 mutex_lock() → 尝试睡眠
+    ↓
+schedule() 被调用 → 试图切换到进程 B
+    ↓
+问题：当前"进程"不是真正的进程，没有 task_struct 可以切换！
+      内核栈处于中断上下文，恢复时无法正确还原
+      → 内核崩溃 / 数据损坏
+```
+
+### 检查是否在中断上下文
+
+```c
+/* include/linux/preempt.h */
+#define in_interrupt()    (irq_count())          /* 硬中断+软中断 */
+#define in_irq()          (hardirq_count())      /* 仅硬中断 */
+#define in_softirq()      (softirq_count())      /* 仅软中断 */
+#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
+#define in_nmi()          (preempt_count() & NMI_MASK)
+#define in_task()         (!(in_nmi() | in_irq() | in_softirq()))
+
+/* 调试：BUG_ON 检查 */
+might_sleep();           /* 若在中断上下文则警告 */
+BUG_ON(in_interrupt());  /* 强制检查 */
+```
+
+### 中断栈大小
+
+```bash
+# x86-64：每个 CPU 的中断栈 = 16KB（IRQ_STACK_SIZE）
+grep IRQ_STACK_SIZE arch/x86/include/asm/page_64_types.h
+# #define IRQ_STACK_SIZE (PAGE_SIZE << ORDER_IRQ_STACK)
+# ORDER_IRQ_STACK = 2 → 4 * 4096 = 16384 bytes
+
+# 查看内核栈使用
+cat /proc/$(pgrep -f "kworker" | head -1)/status | grep VmStk
+
+# 特殊栈（x86-64 IST）：
+# IST1 = #DB 调试栈    (8KB)
+# IST2 = #NMI 不可屏蔽中断栈 (8KB)
+# IST3 = #DF 双重故障栈  (8KB)
+# IST4 = #MCE 机器检查栈 (8KB)
+```
+
+---
+
+## 13.7 softirq（软中断）
+
+### 10 种 softirq 类型
+
+```c
+/* include/linux/interrupt.h */
+enum {
+    HI_SOFTIRQ = 0,      /* 高优先级 tasklet */
+    TIMER_SOFTIRQ,       /* 定时器超时处理 */
+    NET_TX_SOFTIRQ,      /* 网络发送 */
+    NET_RX_SOFTIRQ,      /* 网络接收（最高频） */
+    BLOCK_SOFTIRQ,       /* 块设备 IO 完成 */
+    IRQ_POLL_SOFTIRQ,    /* IRQ 轮询 */
+    TASKLET_SOFTIRQ,     /* 普通 tasklet */
+    SCHED_SOFTIRQ,       /* 调度器（负载均衡）*/
+    HRTIMER_SOFTIRQ,     /* 高精度定时器 */
+    RCU_SOFTIRQ,         /* RCU 回调处理 */
+    NR_SOFTIRQS          /* = 10 */
+};
+```
+
+### softirq 执行路径
+
+```
+硬件中断 ISR 结尾
+    ↓
+raise_softirq(NET_RX_SOFTIRQ)
+    ↓
+设置 per-CPU 位图 __softirq_pending
+    ↓
+irq_exit() → __do_softirq()
+    ↓
+循环处理 pending 位图
+    for each pending softirq:
+        softirq_vec[i].action(h)  ← 注册的处理函数
+    如果处理超时(2ms)或新 softirq 出现 > 10次：
+        wakeup ksoftirqd/N 线程处理剩余
+```
+
+### 核心源码
+
+```c
+/* kernel/softirq.c */
+asmlinkage __visible void __softirq_entry __do_softirq(void)
+{
+    unsigned long end = jiffies + MAX_SOFTIRQ_TIME;  /* 2ms 时限 */
+    unsigned long old_flags = current->flags;
+    int max_restart = MAX_SOFTIRQ_RESTART;           /* 10 次 */
+    struct softirq_action *h;
+    __u32 pending;
+    int softirq_bit;
+
+    pending = local_softirq_pending();
+
+restart:
+    set_softirq_pending(0);       /* 清除 pending 位图 */
+    local_irq_enable();           /* 重新开中断（允许新中断打断softirq）*/
+
+    h = softirq_vec;
+    while ((softirq_bit = ffs(pending))) {
+        unsigned int vec_nr;
+        int prev_count;
+
+        h += softirq_bit - 1;
+        vec_nr = h - softirq_vec;
+
+        trace_softirq_entry(vec_nr);
+        h->action(h);             /* 执行 softirq 处理函数 */
+        trace_softirq_exit(vec_nr);
+
+        h++;
+        pending >>= softirq_bit;
+    }
+
+    local_irq_disable();
+    pending = local_softirq_pending();
+    if (pending) {
+        if (time_before(jiffies, end) && !need_resched() &&
+            --max_restart)
+            goto restart;
+        wakeup_softirqd();        /* 唤醒 ksoftirqd */
+    }
+    current->flags |= old_flags & PF_MEMALLOC;
+}
+
+/* 注册 softirq（编译时静态注册）*/
+void open_softirq(int nr, void (*action)(struct softirq_action *))
+{
+    softirq_vec[nr].action = action;
+}
+```
+
+### ksoftirqd 线程
+
+```bash
+# 每个 CPU 一个 ksoftirqd 线程
+ps aux | grep ksoftirqd
+# root         14  0.0  0.0      0     0 ?  S    00:00   0:00 [ksoftirqd/0]
+# root         23  0.0  0.0      0     0 ?  S    00:00   0:00 [ksoftirqd/1]
+
+# 查看 softirq 统计
+cat /proc/softirqs
+#                     CPU0       CPU1       CPU2       CPU3
+#           HI:          1          0          0          0
+#        TIMER:     432891     412043     398012     441293
+#       NET_TX:        234        891        123        456
+#       NET_RX:    1234567    2345678     987654    1654321
+#        BLOCK:      98234      87654      76543      65432
+#     IRQ_POLL:          0          0          0          0
+#      TASKLET:       1234       2345       3456       4567
+#        SCHED:     234567     345678     456789     567890
+#      HRTIMER:      12345      23456      34567      45678
+#          RCU:     345678     456789     567890     678901
+```
+
+---
+
+## 13.8 tasklet
+
+### tasklet 基于 softirq 的实现
+
+```c
+/* include/linux/interrupt.h */
+struct tasklet_struct {
+    struct tasklet_struct *next;  /* 链表 */
+    unsigned long          state; /* TASKLET_STATE_SCHED/LOCK */
+    atomic_t               count; /* 引用计数，非零则禁用 */
+    bool                   use_callback;
+    union {
+        void (*func)(unsigned long);     /* 旧接口 */
+        void (*callback)(struct tasklet_struct *); /* 新接口 */
+    };
+    unsigned long          data;  /* 传给 func 的参数 */
+};
+
+/* 静态定义 */
+DECLARE_TASKLET(name, callback);
+DECLARE_TASKLET_DISABLED(name, callback);
+
+/* 动态初始化 */
+tasklet_init(t, func, data);
+tasklet_setup(t, callback);  /* 6.x 新接口 */
+
+/* 调度执行（在 TASKLET_SOFTIRQ 或 HI_SOFTIRQ）*/
+tasklet_schedule(&my_tasklet);
+tasklet_hi_schedule(&my_tasklet);  /* 高优先级 */
+
+/* 禁用/启用 */
+tasklet_disable(&my_tasklet);  /* 等待正在执行的完成 */
+tasklet_enable(&my_tasklet);
+tasklet_kill(&my_tasklet);     /* 确保不再运行后销毁 */
+```
+
+### ⚠️ tasklet 在 6.x 内核中的弃用
+
+```
+Linux 6.1+ 内核：tasklet 官方标记为 deprecated
+原因：
+  1. 不能并发执行（同一 tasklet 同时只在一个 CPU）
+  2. 不能睡眠（依然在 softirq 上下文）
+  3. 长时间延迟低优先级工作
+替代方案：
+  - 短小非睡眠工作 → 直接写 softirq（驱动核心用）
+  - 可睡眠工作     → workqueue
+  - 线程化处理     → request_threaded_irq()
+```
+
+---
+
+## 13.9 workqueue (cmwq)
+
+### Concurrency Managed Workqueue 架构
+
+```
+cmwq（并发管理 workqueue）架构（2.6.36+）
+
+驱动调用 queue_work(wq, &work)
+    ↓
+工作项加入 per-CPU 的 pool_workqueue
+    ↓
+worker_pool（每个 NUMA 节点 × bound/unbound × 优先级）
+    ↓
+worker 线程（kworker/uN:M）执行 work->func()
+    ↓
+cmwq 动态创建/销毁 worker 线程（min=0，max=512）
+```
+
+### 创建和使用 workqueue
+
+```c
+/* 创建 workqueue */
+struct workqueue_struct *wq;
+
+/* 简单创建 */
+wq = create_singlethread_workqueue("my_wq");   /* 单线程（有序）*/
+wq = create_workqueue("my_wq");                /* 每CPU一个线程（弃用）*/
+
+/* 推荐：alloc_workqueue */
+wq = alloc_workqueue("my_wq",
+    WQ_UNBOUND |        /* 不绑定CPU，允许迁移 */
+    WQ_MEM_RECLAIM |    /* 参与内存回收 */
+    WQ_HIGHPRI |        /* 高优先级 worker */
+    WQ_FREEZABLE |      /* 休眠时冻结 */
+    WQ_SYSFS,           /* 在 sysfs 暴露 */
+    max_active);        /* 并发上限，0=默认 */
+
+/* 定义工作项 */
+DECLARE_WORK(my_work, my_work_handler);
+DECLARE_DELAYED_WORK(my_dwork, my_delayed_handler);
+
+/* 动态初始化 */
+INIT_WORK(&work, handler);
+INIT_DELAYED_WORK(&dwork, handler);
+
+/* 提交工作 */
+queue_work(wq, &work);
+queue_delayed_work(wq, &dwork, msecs_to_jiffies(100));
+
+/* 系统预定义 workqueue */
+schedule_work(&work);                    /* system_wq */
+schedule_delayed_work(&dwork, delay);   /* system_wq */
+
+/* 等待所有工作完成 */
+flush_workqueue(wq);
+flush_work(&work);          /* 等待特定工作完成 */
+cancel_work_sync(&work);    /* 取消并等待 */
+
+/* 销毁 */
+destroy_workqueue(wq);
+```
+
+### 查看 kworker 线程
+
+```bash
+# 列出所有 kworker 线程
+ps aux | grep kworker
+# kworker/0:1H  — CPU0, 线程1, H=高优先级
+# kworker/u8:3  — unbound, 8个CPU, 线程3
+
+# 查看 workqueue 信息
+cat /sys/kernel/debug/workqueue/
+ls /sys/bus/workqueue/devices/
+
+# 统计
+cat /proc/workqueue_stats  # 需要 CONFIG_WQ_WATCHDOG
+```
+
+---
+
+## 13.10 线程化中断 (Threaded IRQ)
+
+### 原理与动机
+
+```
+传统中断模型：
+  硬中断（上半部）→ 快速处理 → softirq/tasklet/workqueue（下半部）
+  问题：softirq 在中断上下文，延迟大，实时性差
+
+线程化中断模型：
+  硬中断（最小处理，返回IRQ_WAKE_THREAD）
+      ↓
+  唤醒 irq/N-name 内核线程
+      ↓
+  线程中执行完整处理（可睡眠！可设置优先级！）
+  优势：可被调度器管理，支持实时优先级（PREEMPT_RT）
+```
+
+### request_threaded_irq()
+
+```c
+/* kernel/irq/manage.c */
+int request_threaded_irq(unsigned int irq,
+                         irq_handler_t handler,      /* 上半部（快速）*/
+                         irq_handler_t thread_fn,    /* 下半部（线程）*/
+                         unsigned long irqflags,
+                         const char *devname,
+                         void *dev_id);
+
+/* 示例：网卡驱动 */
+static irqreturn_t nic_hard_irq(int irq, void *dev_id)
+{
+    struct nic_priv *priv = dev_id;
+
+    /* 快速读取中断原因，清除中断 */
+    priv->irq_status = readl(priv->base + IRQ_STATUS);
+    writel(priv->irq_status, priv->base + IRQ_CLEAR);
+
+    /* 唤醒线程处理 */
+    return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t nic_thread_irq(int irq, void *dev_id)
+{
+    struct nic_priv *priv = dev_id;
+
+    /* 可以睡眠！可以调用 mutex_lock！*/
+    if (priv->irq_status & RX_COMPLETE)
+        nic_rx_process(priv);
+    if (priv->irq_status & TX_COMPLETE)
+        nic_tx_cleanup(priv);
+
+    return IRQ_HANDLED;
+}
+
+/* 注册：IRQF_ONESHOT 表示线程处理完前不重新使能中断 */
+request_threaded_irq(irq, nic_hard_irq, nic_thread_irq,
+                     IRQF_SHARED | IRQF_ONESHOT, "nic", priv);
+```
+
+### 查看线程化中断线程
+
+```bash
+# 查看中断线程
+ps aux | grep "irq/"
+# root       345  0.0  0.0  0  0 ? S  irq/56-nvme0q0
+
+# 设置实时优先级（对延迟敏感的中断）
+chrt -f -p 50 $(pgrep -f "irq/56-nvme")
+
+# 查看线程化 IRQ 的优先级
+cat /proc/$(pgrep -f "irq/56")/sched | grep policy
+```
+
+---
+
+## 13.11 IPI (核间中断)
+
+### IPI 类型与用途
+
+```c
+/* arch/x86/include/asm/hw_irq.h — IPI 向量分配 */
+#define RESCHEDULE_VECTOR         0xfd  /* 重调度：wake_up_process 跨核 */
+#define CALL_FUNCTION_VECTOR      0xfc  /* smp_call_function_many() */
+#define CALL_FUNCTION_SINGLE_VECTOR 0xfb /* smp_call_function_single() */
+#define REBOOT_VECTOR             0xf8  /* 重启 IPI */
+
+/* TLB shootdown IPI（flush_tlb_others）*/
+#define INVALIDATE_TLB_VECTOR_START 0xef
+```
+
+### 发送 IPI
+
+```c
+/* 在特定 CPU 上执行函数 */
+smp_call_function_single(cpu,    /* 目标 CPU */
+                         func,   /* 要执行的函数 */
+                         info,   /* 参数 */
+                         wait);  /* 是否等待完成 */
+
+/* 在所有 CPU（除当前）上执行 */
+smp_call_function(func, info, wait);
+
+/* 触发重调度 IPI */
+smp_send_reschedule(cpu);  /* 告知目标 CPU 需要重新调度 */
+
+/* TLB 失效 IPI（mm/tlb.c）*/
+flush_tlb_mm_range(mm, start, end, stride_shift, freed_tables);
+```
+
+### IPI 性能影响
+
+```bash
+# 统计 TLB shootdown 次数
+perf stat -e tlb:tlb_flush -a sleep 5
+
+# 追踪 IPI
+trace-cmd record -e 'ipi:*' sleep 1
+trace-cmd report | grep ipi
+
+# 减少 TLB shootdown：
+# 1. 使用大页（减少 PTE 条目数）
+# 2. 进程绑定 CPU（减少跨核迁移）
+# 3. NUMA aware 分配
+```
+
+---
+
+## 13.12 中断亲和性调优
+
+### irqbalance 守护进程
+
+```bash
+# irqbalance 自动将中断分散到各 CPU
+systemctl status irqbalance
+cat /etc/sysconfig/irqbalance  # 或 /etc/default/irqbalance
+
+# 禁用特定 IRQ 的自动均衡（手动管理）
+# IRQBALANCE_BANNED_IRQS="56 57 58"
+
+# 查看当前分配
+irqbalance --debug --foreground --oneshot 2>&1 | head -50
+```
+
+### 手动设置亲和性
+
+```bash
+# 查找网卡中断
+grep eth0 /proc/interrupts
+# 或
+ls -la /sys/class/net/eth0/device/msi_irqs/
+
+# 将 IRQ 56-59 绑定到 CPU 4-7（位掩码 0xF0 = 11110000）
+for irq in 56 57 58 59; do
+    echo "f0" > /proc/irq/$irq/smp_affinity
+done
+
+# 使用列表格式（更直观）
+echo "4-7" > /proc/irq/56/smp_affinity_list
+
+# 配合 CPU 隔离（isolcpus）
+# 内核参数：isolcpus=4-7 nohz_full=4-7 rcu_nocbs=4-7
+# 隔离 CPU 不接收 irqbalance 分配的中断
+```
+
+### NOHZ_FULL 与中断影响
+
+```bash
+# 查看 NOHZ 配置
+cat /sys/devices/system/cpu/nohz_full   # 哪些 CPU 启用了 nohz_full
+cat /sys/devices/system/cpu/isolated    # 哪些 CPU 被隔离
+
+# 检查 timer tick 是否被禁用
+perf stat -C 4 -e irq_vectors:local_timer_entry sleep 5
+# 正常系统：~1000/s（HZ=1000）
+# nohz_full CPU：接近 0（无进程运行时）
+```
+
+---
+
+## 13.13 定时器中断与高精度时钟
+
+### HZ 与 jiffies
+
+```c
+/* include/asm-generic/param.h */
+#define HZ     CONFIG_HZ  /* 通常 250 或 1000 */
+
+/* kernel/time/jiffies.c */
+/* jiffies：系统启动以来的 tick 数（无符号长整型）*/
+extern unsigned long volatile jiffies;
+
+/* 时间转换宏 */
+msecs_to_jiffies(500)      /* 500ms → jiffies */
+jiffies_to_msecs(j)        /* jiffies → ms */
+time_after(a, b)           /* a > b（处理回绕）*/
+time_before(a, b)          /* a < b */
+```
+
+### hrtimer（高精度定时器）
+
+```c
+/* include/linux/hrtimer.h */
+struct hrtimer {
+    struct timerqueue_node  node;
+    ktime_t                 _softexpires;
+    enum hrtimer_restart  (*function)(struct hrtimer *);
+    struct hrtimer_clock_base *base;
+    u8                      state;
+    u8                      is_rel;
+    u8                      is_soft;
+    u8                      is_hard;
+};
+
+/* 使用 hrtimer */
+struct hrtimer htimer;
+hrtimer_init(&htimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+htimer.function = my_hrtimer_callback;
+hrtimer_start(&htimer, ns_to_ktime(1000000), HRTIMER_MODE_REL); /* 1ms */
+
+static enum hrtimer_restart my_hrtimer_callback(struct hrtimer *timer)
+{
+    /* 周期性：前进到下一个到期时间 */
+    hrtimer_forward_now(timer, ns_to_ktime(1000000));
+    return HRTIMER_RESTART;
+    /* 或一次性：return HRTIMER_NORESTART; */
+}
+```
+
+### 动态 tick (NO_HZ)
+
+```bash
+# 配置选项
+grep -E "NO_HZ|CONFIG_HZ" /boot/config-$(uname -r)
+# CONFIG_HZ_1000=y            — tick 频率 1000 Hz
+# CONFIG_NO_HZ_IDLE=y         — 空闲时停止 tick
+# CONFIG_NO_HZ_FULL=y         — 运行时也可停止（需 isolcpus）
+# CONFIG_HIGH_RES_TIMERS=y    — 高精度定时器
+
+# 查看 tick 模式
+cat /sys/devices/system/clocksource/clocksource0/current_clocksource
+# tsc 或 hpet 或 acpi_pm
+
+# 验证高精度定时器
+cat /proc/timer_list | grep -A3 "hrtimer"
+dmesg | grep "Timer resolution"
+```
+
+### 时钟源对比
+
+| 时钟源 | 精度 | 开销 | 适用场景 |
+|--------|------|------|----------|
+| TSC | 亚纳秒 | 极低 | 现代 SMP（invariant TSC）|
+| HPET | ~100ns | 中等 | 无 invariant TSC 时 |
+| ACPI PM | ~1µs | 高（IO 访问）| 回退方案 |
+| kvm-clock | 亚纳秒 | 极低 | 虚拟化环境 |
+
+---
+
+## 13.14 中断延迟测量
+
+### cyclictest（实时延迟测量）
+
+```bash
+# 安装
+apt install rt-tests  # 或 dnf install rt-tests
+
+# 基本测试：测量中断/调度延迟
+cyclictest --mlockall \
+           --smp \              # 测试所有 CPU
+           --priority=99 \     # 实时优先级
+           --policy=fifo \     # SCHED_FIFO
+           --interval=200 \    # 200µs 周期
+           --distance=0 \      # CPU 间无偏移
+           --duration=60 \     # 持续 60 秒
+           --histogram=400 \   # 记录延迟直方图（µs）
+           --histfile=hist.txt
+
+# 结果解读：
+# T: 0 (34521) A:   8 C: 300000 Min:      4 Act:    6 Avg:    8 Max:      67
+# T=线程, A=超出阈值次数, Min/Avg/Max 单位 µs
+# Max < 100µs → 良好；Max > 1ms → 需要调查
+```
+
+### ftrace irq_handler 追踪
+
+```bash
+# 挂载 tracefs
+mount -t tracefs tracefs /sys/kernel/tracing
+
+# 追踪中断处理时间
+cd /sys/kernel/tracing
+echo 0 > tracing_on
+echo "irq_handler_entry irq_handler_exit" > set_event
+echo 1 > tracing_on
+sleep 1
+echo 0 > tracing_on
+
+# 分析结果：找出最慢的中断处理
+cat trace | awk '
+/irq_handler_entry/ { irq=$NF; start=NR }
+/irq_handler_exit/  { print NR-start, irq }
+' | sort -n | tail -20
+```
+
+### perf stat 中断统计
+
+```bash
+# 统计中断相关 PMU 事件
+perf stat -e \
+  irq_vectors:local_timer_entry,\
+  irq_vectors:reschedule_entry,\
+  irq_vectors:call_function_entry,\
+  irq_vectors:irq_work_entry \
+  -a sleep 5
+
+# 追踪高频中断驱动
+perf top -e irq:irq_handler_entry --sort comm,sym
+
+# 记录中断延迟（从硬件到 ISR 入口）
+perf record -e irq:irq_handler_entry -a sleep 5
+perf script | head -50
+```
+
+### 诊断中断延迟抖动
+
+```bash
+# 1. 检查是否有 SMI（系统管理中断）
+# SMI 是 BIOS/固件触发的不可屏蔽中断，可造成 >1ms 延迟
+# 使用 MSR 0x34 读取 SMI 计数器
+rdmsr -a 0x34   # 需要 msr 内核模块
+
+# 2. 检查 CPU 频率抖动
+grep MHz /proc/cpuinfo | sort -u
+cpupower frequency-info
+
+# 3. 禁用 CPU 深度睡眠（C-state）
+cpupower idle-set -D 1  # 最多 C1
+# 或内核参数：intel_idle.max_cstate=1
+
+# 4. 关闭 SMT（超线程）减少干扰
+echo off > /sys/devices/system/cpu/smt/control
+
+# 5. 检查 NUMA 跨节点内存访问
+numastat -c | head -20
+```
+
+---
+
+## 参考资料
+
+| 资源 | 链接/位置 |
+|------|----------|
+| Linux 内核源码 | `kernel/irq/`, `arch/x86/kernel/irq.c` |
+| APIC 规范 | Intel SDM Vol.3 Chapter 10 |
+| RT 补丁文档 | `Documentation/timers/hrtimers.rst` |
+| IRQ 文档 | `Documentation/core-api/genericirq.rst` |
+| cyclictest | `https://wiki.linuxfoundation.org/realtime/documentation/howto/tools/cyclictest` |
+| ftrace 文档 | `Documentation/trace/ftrace.rst` |
+
+```bash
+# 快速参考命令汇总
+watch -n1 'cat /proc/interrupts'           # 实时监控中断
+cat /proc/softirqs                         # softirq 统计
+cat /proc/irq/*/spurious                   # 伪中断统计
+echo 1 > /proc/sys/kernel/panic_on_unrecovered_nmi  # NMI 触发 panic
+dmesg | grep -i "irq\|interrupt" | tail -30  # 内核中断日志
+```
diff --git "a/14-\345\220\257\345\212\250\346\265\201\347\250\213\346\267\261\345\205\245/README.md" "b/14-\345\220\257\345\212\250\346\265\201\347\250\213\346\267\261\345\205\245/README.md"
new file mode 100644
index 0000000..431e16f
--- /dev/null
+++ "b/14-\345\220\257\345\212\250\346\265\201\347\250\213\346\267\261\345\205\245/README.md"
@@ -0,0 +1,1139 @@
+# 14 — 启动流程深入
+
+> **学习目标**：彻底理解从按下电源键到第一个用户进程运行的每个阶段，包括固件初始化、
+> 引导加载程序、内核解压、早期初始化、initramfs 和 systemd 启动序列，能够调试各阶段故障。
+
+![Linux 启动流程](../assets/diagrams/boot-flow.svg)
+
+---
+
+## 目录
+
+| 节 | 主题 |
+|----|------|
+| 14.1 | BIOS vs UEFI 对比 |
+| 14.2 | MBR 传统启动 |
+| 14.3 | UEFI 启动链 |
+| 14.4 | GRUB2 深入 |
+| 14.5 | 内核命令行参数 |
+| 14.6 | bzImage 解压流程 |
+| 14.7 | arch/x86/boot/main.c |
+| 14.8 | head_64.S 初始页表 |
+| 14.9 | start_kernel() 调用序列 |
+| 14.10 | initramfs / initrd |
+| 14.11 | PID 1: systemd |
+| 14.12 | KASLR |
+| 14.13 | 启动时间优化 |
+| 14.14 | 调试启动问题 |
+
+---
+
+## 14.1 BIOS vs UEFI 对比
+
+### 详细对比表
+
+| 特性 | Legacy BIOS | UEFI |
+|------|-------------|------|
+| 固件接口标准 | IBM PC 1981，非标准 | UEFI 规范 2.x（统一） |
+| 寻址空间 | 16 位实模式，1MB 上限 | 64 位保护模式，无上限 |
+| 磁盘分区表 | MBR（最大 2TB，4 主分区）| GPT（最大 9.4ZB，128 分区）|
+| 安全启动 | 不支持 | Secure Boot（数字签名验证）|
+| 网络启动 | PXE（仅 BIOS 阶段）| PXE + HTTP Boot |
+| 固件驱动 | 汇编/16位C | EFI 驱动（PE 格式）|
+| 启动时间 | 慢（POST 全量检测）| 快（可跳过硬件初始化）|
+| 交互界面 | 文本 VGA | 图形 GOP（支持鼠标）|
+| 变量存储 | CMOS（256字节）| NVRAM（EFI 变量，任意大小）|
+| 兼容性 | CSM 兼容模块提供 | 原生 UEFI 或 CSM 回退 |
+| 启动入口 | MBR 第一扇区 (0x7C00) | EFI 系统分区 .efi 文件 |
+
+### UEFI 启动管理器
+
+```bash
+# 查看 UEFI 启动条目
+efibootmgr -v
+# BootOrder: 0001,0000,0002
+# Boot0000* ubuntu  HD(...)/EFI/ubuntu/shimx64.efi
+# Boot0001* Windows HD(...)/EFI/Microsoft/Boot/bootmgfw.efi
+
+# 添加启动条目
+efibootmgr --create \
+    --disk /dev/sda \
+    --part 1 \
+    --label "My Linux" \
+    --loader '\EFI\linux\grubx64.efi'
+
+# 设置启动顺序
+efibootmgr --bootorder 0000,0001
+
+# 查看 EFI 变量
+ls /sys/firmware/efi/efivars/
+cat /sys/firmware/efi/fw_platform_size  # 32 或 64
+```
+
+---
+
+## 14.2 MBR 传统启动
+
+### MBR 512 字节结构
+
+```
+MBR（Master Boot Record）布局
+╔══════════════════════════════════════════╗
+║  偏移    大小    内容                     ║
+╠══════════════════════════════════════════╣
+║  0x000   446字节  Bootstrap 代码（Stage1）║
+║  0x1BE    16字节  分区表条目 1            ║
+║  0x1CE    16字节  分区表条目 2            ║
+║  0x1DE    16字节  分区表条目 3            ║
+║  0x1EE    16字节  分区表条目 4            ║
+║  0x1FE     2字节  魔数 0x55AA            ║
+╚══════════════════════════════════════════╝
+
+分区表条目（16字节）：
+  [0]    状态（0x80=可启动，0x00=不可启动）
+  [1-3]  CHS 起始地址（已过时）
+  [4]    分区类型（0x83=Linux，0x82=Swap，0x8E=LVM）
+  [5-7]  CHS 结束地址（已过时）
+  [8-11] LBA 起始扇区（32位，支持到2TB）
+  [12-15]扇区数量
+```
+
+### GRUB Legacy 三阶段启动
+
+```
+Stage 1  (MBR 446字节)
+  ↓  加载 Stage1.5（位于 MBR 之后的扇区，不依赖文件系统）
+Stage 1.5 (~32KB，嵌入 MBR 间隙)
+  ↓  理解文件系统（ext4/xfs/fat）
+  ↓  从文件系统加载 Stage2
+Stage 2  (/boot/grub/grub.cfg + 模块)
+  ↓  显示菜单，解析配置
+  ↓  加载内核 + initramfs
+  ↓  传递参数，跳转执行
+```
+
+### 查看和备份 MBR
+
+```bash
+# 备份 MBR
+dd if=/dev/sda of=mbr_backup.bin bs=512 count=1
+
+# 查看分区表
+fdisk -l /dev/sda
+hexdump -C mbr_backup.bin | head -40
+
+# 恢复 MBR（保留分区表）
+dd if=mbr_backup.bin of=/dev/sda bs=446 count=1
+
+# 查看 GRUB 安装信息
+grub-install --target=i386-pc --dry-run /dev/sda
+```
+
+---
+
+## 14.3 UEFI 启动链
+
+### EFI 系统分区 (ESP)
+
+```
+ESP（FAT32 格式，通常 100-512MB）
+└── EFI/
+    ├── BOOT/
+    │   └── BOOTX64.EFI          ← 默认启动（可移动介质）
+    ├── ubuntu/
+    │   ├── shimx64.efi          ← Shim（处理 Secure Boot）
+    │   ├── grubx64.efi          ← GRUB EFI 版本
+    │   └── grub.cfg
+    ├── Microsoft/
+    │   └── Boot/
+    │       └── bootmgfw.efi
+    └── linux/
+        └── vmlinuz.efi          ← EFI stub 直接启动
+```
+
+### Secure Boot 信任链
+
+```
+UEFI 固件（包含 PK/KEK/db 密钥）
+    ↓  验证签名
+shimx64.efi（微软签名的 Shim）
+    ↓  验证 GRUB 签名（使用 MOK 或发行版密钥）
+grubx64.efi（发行版签名的 GRUB）
+    ↓  验证内核签名
+vmlinuz（发行版签名的内核）
+    ↓  内核验证模块签名（MODULES_SIG）
+.ko 模块文件
+
+密钥层次：
+  PK（平台密钥）→ 设备制造商（OEM）
+  KEK（密钥交换密钥）→ 操作系统厂商
+  db（签名数据库）→ 允许的启动程序
+  dbx（黑名单数据库）→ 禁止的启动程序
+```
+
+### EFI Stub 直接启动（无需 GRUB）
+
+```bash
+# 内核自带 EFI stub（CONFIG_EFI_STUB=y）
+# 可以直接从 UEFI 加载内核
+
+# 查看 EFI stub 支持
+grep EFI_STUB /boot/config-$(uname -r)
+
+# 安装内核为 EFI 启动项
+efibootmgr --create \
+    --disk /dev/nvme0n1 \
+    --part 1 \
+    --label "Linux EFI Stub" \
+    --loader '/vmlinuz-6.1.0' \
+    --unicode 'root=/dev/nvme0n1p2 rw console=tty0'
+
+# 查看 ESP 挂载点
+cat /proc/mounts | grep vfat
+ls /sys/firmware/efi/
+```
+
+---
+
+## 14.4 GRUB2 深入
+
+### grub.cfg 语法解析
+
+```bash
+# /boot/grub/grub.cfg（自动生成，勿手动修改）
+# 手动配置：/etc/grub.d/ + /etc/default/grub
+
+# 菜单项结构
+menuentry 'Ubuntu, Linux 6.1.0-generic' {
+    # 记录当前启动项
+    recordfail
+    # 设置图形模式
+    gfxmode $linux_gfx_mode
+
+    # 加载模块
+    insmod gzio            # gzip 解压支持
+    insmod part_gpt        # GPT 分区支持
+    insmod ext2            # ext4 文件系统支持
+
+    # 搜索包含指定 UUID 的设备
+    search --no-floppy --fs-uuid --set=root abc123...
+
+    # 加载内核
+    linux   /boot/vmlinuz-6.1.0 root=UUID=abc123 \
+            ro quiet splash $vt_handoff
+
+    # 加载 initramfs
+    initrd  /boot/initrd.img-6.1.0
+}
+
+# 子菜单
+submenu '高级选项 for Ubuntu' $menuentry_id_option ... {
+    menuentry 'Ubuntu, Linux 6.1.0 (recovery mode)' {
+        linux /boot/vmlinuz-6.1.0 root=UUID=... ro recovery \
+              nomodeset dis_ucode_ldr
+        initrd /boot/initrd.img-6.1.0
+    }
+}
+```
+
+### 重新生成 grub.cfg
+
+```bash
+# Ubuntu/Debian
+update-grub
+# 或
+grub-mkconfig -o /boot/grub/grub.cfg
+
+# RHEL/CentOS（BIOS）
+grub2-mkconfig -o /boot/grub2/grub.cfg
+
+# RHEL/CentOS（UEFI）
+grub2-mkconfig -o /boot/efi/EFI/redhat/grub.cfg
+
+# /etc/default/grub 关键选项
+GRUB_TIMEOUT=5                  # 菜单超时秒数
+GRUB_DEFAULT=0                  # 默认启动项
+GRUB_CMDLINE_LINUX_DEFAULT="quiet splash"  # 普通启动参数
+GRUB_CMDLINE_LINUX=""           # 所有启动（包括 recovery）
+GRUB_DISABLE_RECOVERY="false"   # 是否显示恢复模式
+GRUB_GFXMODE="1024x768"        # 图形分辨率
+```
+
+### GRUB 命令行调试
+
+```bash
+# 在 GRUB 菜单按 'c' 进入命令行
+grub> ls                         # 列出设备
+grub> ls (hd0,gpt2)/             # 列出分区文件
+grub> set root=(hd0,gpt2)
+grub> linux /boot/vmlinuz root=/dev/sda2 ro debug
+grub> initrd /boot/initrd.img
+grub> boot
+
+# 常用 GRUB 命令
+grub> lsmod                      # 已加载模块
+grub> insmod ext2                # 加载模块
+grub> cat /boot/grub/grub.cfg   # 查看文件
+grub> search --fs-uuid UUID      # 按 UUID 搜索
+grub> configfile /boot/grub/grub.cfg  # 加载配置
+```
+
+---
+
+## 14.5 内核命令行重要参数
+
+### 完整参数表
+
+| 参数 | 示例值 | 说明 |
+|------|--------|------|
+| `root` | `/dev/sda2`, `UUID=abc` | 根文件系统设备 |
+| `rootfstype` | `ext4`, `xfs` | 根文件系统类型 |
+| `rw` / `ro` | — | 根分区读写/只读挂载 |
+| `init` | `/sbin/init`, `/bin/bash` | 第一个用户进程 |
+| `console` | `tty0`, `ttyS0,115200n8` | 内核控制台 |
+| `quiet` | — | 抑制启动消息 |
+| `splash` | — | 显示启动动画 |
+| `debug` | — | 启用详细调试输出 |
+| `loglevel` | `7` (=KERN_DEBUG) | 日志级别 |
+| `ignore_loglevel` | — | 输出所有消息 |
+| `nokaslr` | — | 禁用内核地址随机化 |
+| `noapic` | — | 禁用 APIC |
+| `noacpi` | — | 禁用 ACPI |
+| `acpi=off` | — | 完全禁用 ACPI |
+| `nomodeset` | — | 禁用 KMS 显卡模式设置 |
+| `mem` | `4G` | 限制内存使用 |
+| `maxcpus` | `4` | 限制 CPU 数量 |
+| `isolcpus` | `2-7` | 隔离 CPU |
+| `nohz_full` | `2-7` | NOHZ full CPU |
+| `rcu_nocbs` | `2-7` | RCU 卸载 CPU |
+| `intel_idle.max_cstate` | `1` | 最大 C-state |
+| `crashkernel` | `256M` | kdump 预留内存 |
+| `nosmt` | — | 禁用超线程 |
+| `mitigations` | `off` | 禁用 CPU 漏洞缓解 |
+| `selinux` | `0` | 禁用 SELinux |
+| `enforcing` | `0` | SELinux 宽容模式 |
+| `rd.break` | — | 在 initramfs 中断 |
+| `single` / `1` | — | 单用户模式 |
+| `systemd.unit` | `rescue.target` | systemd 目标 |
+| `earlycon` | — | 早期串口控制台 |
+| `earlyprintk` | `vga`, `serial` | 早期 printk 输出 |
+| `panic` | `10` | Panic 后自动重启秒数 |
+
+### 运行时查看和修改
+
+```bash
+# 查看当前命令行
+cat /proc/cmdline
+# BOOT_IMAGE=/boot/vmlinuz-6.1.0 root=UUID=... ro quiet splash
+
+# 查看所有已知参数
+/sbin/modinfo -p  # 模块参数
+# 内核参数文档
+man 7 bootparam
+ls /sys/module/*/parameters/   # 已加载模块参数
+```
+
+---
+
+## 14.6 解压流程
+
+### bzImage 结构
+
+```
+bzImage（压缩内核）文件布局
+┌──────────────────────────────────────────────┐
+│  bootsector  (512字节)  — 实模式引导扇区       │
+│    偏移 0x1F1: setup_sects（setup 扇区数）     │
+│    偏移 0x202: magic "HdrS"                   │
+│    偏移 0x20C: kernel_version 字符串          │
+│    偏移 0x214: code32_start（保护模式入口）    │
+│    偏移 0x218: ramdisk_image（initrd 地址）   │
+├──────────────────────────────────────────────┤
+│  setup.bin (setup_sects × 512字节)            │
+│    — 实模式内核初始化代码                      │
+│    — 收集 BIOS 信息（内存图/VBE/APM）         │
+├──────────────────────────────────────────────┤
+│  vmlinux.bin.gz（或 .xz / .lzo / .zst）      │
+│    — 压缩的 ELF 内核镜像                      │
+│    — 包含解压代码（arch/x86/boot/compressed/）│
+└──────────────────────────────────────────────┘
+```
+
+### 解压执行流程
+
+```
+GRUB 加载 bzImage → 内存 0x100000（1MB）
+    ↓
+实模式 setup 代码运行（arch/x86/boot/main.c）
+    ↓  BIOS 探测：内存/APM/EDD/视频
+    ↓  设置堆，验证 CPU 类型
+    ↓  进入保护模式（go_to_protected_mode()）
+保护模式解压代码（arch/x86/boot/compressed/head_64.S）
+    ↓  建立临时页表（5级/4级分页）
+    ↓  extract_kernel() → 调用 decompress_kernel()
+    ↓  从压缩段解压出真正的 vmlinux ELF
+    ↓  KASLR：随机选择加载地址
+    ↓  跳入解压后内核入口
+arch/x86/kernel/head_64.S
+    ↓  建立正式初始页表（init_top_pgt）
+    ↓  设置 GDT/IDT
+    ↓  进入 64 位长模式
+    ↓  调用 x86_64_start_kernel()
+    ↓  → start_kernel()
+```
+
+### 查看 vmlinux 信息
+
+```bash
+# 查看内核版本和配置
+file /boot/vmlinuz-$(uname -r)
+# Linux kernel x86 boot executable bzImage, version 6.1.0...
+
+# 提取解压后的内核（用于调试）
+/usr/src/linux-headers-$(uname -r)/scripts/extract-vmlinux \
+    /boot/vmlinuz-$(uname -r) > vmlinux
+
+# 查看 ELF 段
+readelf -S vmlinux | grep -E "Name|\.text|\.data|\.bss"
+
+# 查看内核符号
+nm vmlinux | grep " T " | sort | head -20
+```
+
+---
+
+## 14.7 arch/x86/boot/main.c
+
+### 实模式初始化序列
+
+```c
+/* arch/x86/boot/main.c — 实模式 C 代码入口 */
+void main(void)
+{
+    /* 首先：复制引导参数到 zeropage */
+    copy_boot_params();
+
+    /* 初始化早期串口（earlycon）*/
+    console_init();
+    if (cmdline_find_option_bool("debug"))
+        puts("early console in setup code\n");
+
+    /* 初始化堆 */
+    init_heap();
+
+    /* 验证 CPU 是否满足最低要求 */
+    if (validate_cpu()) {
+        puts("Unable to boot - please use a kernel appropriate "
+             "for your CPU.\n");
+        die();
+    }
+
+    /* 检查此版本 BIOS 是否支持我们需要的调用 */
+    check_cpu_support();
+
+    /* 探测内存（E820 内存图）*/
+    detect_memory();
+
+    /* 键盘初始化 */
+    keyboard_init();
+
+    /* 查询 Intel SpeedStep 信息 */
+    query_ist();
+
+    /* 查询 APM（高级电源管理）*/
+#if defined(CONFIG_APM) || defined(CONFIG_APM_MODULE)
+    query_apm_bios();
+#endif
+
+    /* 查询 EDD（增强磁盘驱动）*/
+#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
+    query_edd();
+#endif
+
+    /* 设置视频模式 */
+    set_video();
+
+    /* 进入保护模式（不会返回）*/
+    go_to_protected_mode();
+}
+```
+
+### E820 内存图探测
+
+```bash
+# 查看 BIOS 提供的物理内存映射
+dmesg | grep -A100 "BIOS-provided" | grep "BIOS-e820"
+# BIOS-e820: [mem 0x0000000000000000-0x000000000009fbff] usable
+# BIOS-e820: [mem 0x000000000009fc00-0x000000000009ffff] reserved
+# BIOS-e820: [mem 0x00000000000f0000-0x00000000000fffff] reserved
+# BIOS-e820: [mem 0x0000000000100000-0x00000000bffdffff] usable
+# BIOS-e820: [mem 0x00000000bffe0000-0x00000000bfffffff] reserved（ACPI）
+# BIOS-e820: [mem 0x00000000fec00000-0x00000000fec00fff] reserved（IO-APIC）
+# BIOS-e820: [mem 0x0000000100000000-0x000000013fffffff] usable（4GB以上）
+
+# 完整内存信息
+cat /proc/iomem | head -30
+```
+
+---
+
+## 14.8 arch/x86/kernel/head_64.S
+
+### 建立初始页表
+
+```asm
+/* arch/x86/kernel/head_64.S（简化）*/
+
+    .code64
+    .section ".head.text","ax"
+
+ENTRY(startup_64)
+    /*
+     * 此时处于保护模式，来自 GRUB 或解压代码
+     * 需要建立 64 位模式的页表
+     */
+
+    /* 清零 BSS 段 */
+    xorl    %eax, %eax
+    leaq    _bss(%rip), %rdi
+    leaq    _ebss(%rip), %rcx
+    subq    %rdi, %rcx
+    shrq    $3, %rcx
+    rep stosq
+
+    /* 建立初始页表（身份映射 + 内核高地址映射）*/
+    /* PGD (Page Global Directory) → 512 GB 每项 */
+    leaq    init_top_pgt(%rip), %rax
+    movq    %rax, %cr3            /* 加载页表基址 */
+
+    /* 启用 PAE, 设置 CR4 */
+    movl    %cr4, %eax
+    orl     $X86_CR4_PAE, %eax
+    movl    %eax, %cr4
+
+    /* 设置 EFER（Extended Feature Enable Register）*/
+    /* 启用长模式位 LME */
+    movl    $MSR_EFER, %ecx
+    rdmsr
+    orl     $EFER_LME, %eax
+    wrmsr
+
+    /* 启用分页（CR0.PG=1），进入 64 位模式 */
+    movl    %cr0, %eax
+    orl     $X86_CR0_PG, %eax
+    movl    %eax, %cr0
+
+    /* 跳入 64 位 C 代码 */
+    pushq   $__KERNEL_CS
+    leaq    x86_64_start_kernel(%rip), %rax
+    pushq   %rax
+    lretq
+
+/* 初始页全局目录（静态分配）*/
+NEXT_PAGE(init_top_pgt)
+    /* 4 级分页：PML4 → PDPT → PD → PT → 物理页 */
+    .quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
+    .fill   511, 8, 0    /* 其余 511 项清零 */
+    .quad   level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
+```
+
+### 内核虚拟地址空间布局（x86-64）
+
+```
+虚拟地址空间（x86-64，4级分页，48位）
+┌─────────────────────────────────────────────────┐
+│ 0xFFFFFFFF80000000 ~ 0xFFFFFFFFFFFFFFFF（2GB）  │
+│   内核代码/数据（vmlinux）                       │
+│   .text .data .bss                              │
+├─────────────────────────────────────────────────┤
+│ 0xFFFF888000000000 ~ （物理内存直接映射）         │
+│   physmem[0..N] 的完整映射                       │
+├─────────────────────────────────────────────────┤
+│ 0xFFFF000000000000 ~ （vmalloc/ioremap 区域）    │
+├─────────────────────────────────────────────────┤
+│ 0x0000000000000000 ~ 0x00007FFFFFFFFFFF（128TB） │
+│   用户空间                                       │
+└─────────────────────────────────────────────────┘
+
+查看实际布局：
+  cat /proc/kallsyms | grep -E " _text| _etext| _data| _end$"
+  cat /proc/vmallocinfo | head -20
+```
+
+---
+
+## 14.9 start_kernel() 完整调用序列
+
+```c
+/* init/main.c — start_kernel()，约 150 行初始化调用 */
+asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
+{
+    char *command_line;
+    char *after_dashes;
+
+    /* ① 基础数据结构 */
+    set_task_stack_end_magic(&init_task);     /* 设置 init 进程栈哨兵 */
+    smp_setup_processor_id();                 /* 设置 boot CPU ID */
+    debug_objects_early_init();               /* 调试对象系统早期初始化 */
+
+    /* ② 内核地址空间初始化 */
+    cgroup_init_early();                      /* cgroup 早期初始化 */
+    local_irq_disable();                      /* 关中断 */
+    early_boot_irqs_disabled = true;
+
+    /* ③ 硬件探测 */
+    boot_cpu_init();                          /* 激活 boot CPU */
+    page_address_init();                      /* 高端内存页地址哈希表 */
+    pr_notice("%s", linux_banner);            /* 打印内核版本横幅 */
+    early_security_init();                    /* LSM 早期初始化 */
+    setup_arch(&command_line);                /* 体系结构相关初始化 */
+                                              /*   x86: ACPI/APIC/内存探测 */
+    setup_boot_config();                      /* 解析 bootconfig */
+    setup_command_line(command_line);         /* 保存命令行 */
+    setup_nr_cpu_ids();                       /* 确定 CPU 数量 */
+    setup_per_cpu_areas();                    /* 分配 per-CPU 区域 */
+    smp_prepare_boot_cpu();                   /* 准备 boot CPU SMP */
+    boot_cpu_hotplug_init();
+
+    /* ④ 内存管理 */
+    build_all_zonelists(NULL);                /* 建立内存区域链表 */
+    page_alloc_init();                        /* 页分配器初始化 */
+    pr_notice("Kernel command line: %s\n", saved_command_line);
+    parse_early_param();                      /* 解析 early_param */
+    after_dashes = parse_args("Booting kernel", ...);
+    jump_label_init();                        /* 静态键初始化 */
+    setup_log_buf(0);                         /* 设置 printk 缓冲区 */
+    vfs_caches_init_early();                  /* VFS 早期缓存 */
+    sort_main_extable();                      /* 排序异常表 */
+    trap_init();                              /* 初始化 IDT 陷阱 */
+    mm_init();                                /* 内存管理子系统 */
+                                              /*   mem_init(): 释放 bootmem */
+                                              /*   kmem_cache_init(): slub */
+                                              /*   vmalloc_init()          */
+
+    /* ⑤ 调度器 */
+    ftrace_init();                            /* ftrace 初始化 */
+    early_trace_init();
+    sched_init();                             /* 调度器初始化（CFS/RT/DL）*/
+    preempt_disable();                        /* 禁止抢占（还没 per-CPU 区域）*/
+
+    /* ⑥ 中断系统 */
+    radix_tree_init();                        /* 基数树初始化 */
+    maple_tree_init();
+    housekeeping_init();
+    workqueue_init_early();                   /* workqueue 早期初始化 */
+    rcu_init();                               /* RCU 子系统初始化 */
+    trace_init();                             /* tracepoint 初始化 */
+    initcall_debug_enable();
+    context_tracking_init();
+    early_irq_init();                         /* 早期中断初始化 */
+    init_IRQ();                               /* 架构相关 IRQ 初始化 */
+    tick_init();                              /* tick 子系统 */
+    rcu_init_nohz();
+    init_timers();                            /* 低精度定时器初始化 */
+    srcu_init();
+    hrtimers_init();                          /* 高精度定时器初始化 */
+    softirq_init();                           /* softirq 初始化 */
+    timekeeping_init();                       /* 时间记录初始化 */
+    time_init();                              /* 体系结构时间初始化 */
+    random_init();                            /* 随机数生成器 */
+    kfence_init();                            /* KFENCE 初始化 */
+
+    /* ⑦ 控制台与显示 */
+    console_init();                           /* 初始化控制台驱动 */
+    if (panic_later)
+        panic("Too many boot %s vars at `%s'", panic_later, ...);
+    lockdep_init();                           /* 死锁检测初始化 */
+
+    /* ⑧ 系统调用 / 安全 */
+    lsm_early_init();
+    perf_event_init();                        /* perf 事件初始化 */
+    profile_init();                           /* 性能剖析初始化 */
+    call_function_init();                     /* SMP 调用函数初始化 */
+    WARN(!irqs_disabled(), "...irqs enabled early");
+    early_boot_irqs_disabled = false;
+    local_irq_enable();                       /* 开中断！ */
+
+    /* ⑨ kmem / 安全 */
+    kmem_cache_init_late();
+    console_init();                           /* 再次初始化（flush 缓冲）*/
+    lockdep_after_bootmem();
+    kmemleak_init();                          /* 内存泄漏检测 */
+    pgtable_init();
+    debug_objects_mem_init();
+    numa_policy_init();                       /* NUMA 策略初始化 */
+    acpi_early_init();
+    late_time_init();
+    calibrate_delay();                        /* 校准 udelay 循环计数 */
+    pid_idr_init();                           /* PID 分配器 */
+    anon_vma_init();                          /* 匿名 VMA 初始化 */
+    thread_stack_cache_init();
+    cred_init();                              /* 证书子系统 */
+    fork_init();                              /* fork 初始化 */
+    proc_caches_init();                       /* proc slab 缓存 */
+    uts_ns_init();                            /* UTS 命名空间 */
+    key_init();                               /* 密钥管理 */
+    security_init();                          /* 安全框架完整初始化 */
+    dbg_late_init();
+    net_ns_init();                            /* 网络命名空间 */
+    vfs_caches_init();                        /* VFS 缓存完整初始化 */
+    pagecache_init();                         /* 页缓存 */
+    signals_init();                           /* 信号处理 */
+    seq_file_init();                          /* seq_file */
+    proc_root_init();                         /* /proc 初始化 */
+    nsfs_init();
+    cpuset_init();                            /* cpuset 初始化 */
+    cgroup_init();                            /* cgroup 完整初始化 */
+    taskstats_init_early();
+    delayacct_init();
+
+    /* ⑩ 检查和最后步骤 */
+    poking_init();
+    check_bugs();                             /* 体系结构 bug 检查/修复 */
+    acpi_subsystem_init();
+    arch_post_acpi_subsys_init();
+    kcsan_init();
+
+    /* 启动第一个内核线程：kernel_init */
+    arch_call_rest_init();                    /* → rest_init() */
+}
+
+/* rest_init()：创建 init 和 kthreadd */
+static noinline void __ref rest_init(void)
+{
+    pid_t pid;
+    rcu_scheduler_starting();
+
+    /* 创建 PID 1 线程（将exec为/sbin/init）*/
+    pid = kernel_thread(kernel_init, NULL, CLONE_FS);
+
+    /* 创建 PID 2：kthreadd（内核线程守护进程）*/
+    pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
+    kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
+
+    /* boot CPU 变成 idle 线程 */
+    cpu_startup_entry(CPUHP_ONLINE);
+}
+```
+
+---
+
+## 14.10 initramfs / initrd
+
+### initramfs 格式
+
+```
+initramfs = cpio 归档（可能 gzip 压缩）
+解压后的目录结构：
+├── bin -> usr/bin
+├── dev/
+│   ├── console
+│   └── null
+├── etc/
+│   └── ld.so.conf
+├── init                   ← 第一个执行的脚本/程序
+├── lib -> usr/lib
+├── lib64 -> usr/lib64
+├── proc/
+├── run/
+├── sbin -> usr/sbin
+├── sys/
+├── tmp/
+└── usr/
+    ├── bin/
+    │   ├── busybox        ← 静态编译，提供基本命令
+    │   ├── mount
+    │   └── udevadm
+    ├── lib/
+    │   └── modules/6.1.0/  ← 关键驱动模块（磁盘/文件系统）
+    └── sbin/
+        ├── fsck.ext4
+        └── switch_root    ← 切换根文件系统
+```
+
+### switch_root vs pivot_root
+
+```bash
+# switch_root（现代 initramfs 使用）
+# 1. 挂载真实根文件系统到 /newroot
+mount /dev/sda2 /newroot
+
+# 2. 切换根目录（删除 initramfs，释放内存）
+exec switch_root /newroot /sbin/init
+
+# switch_root 内部操作：
+# - chdir("/newroot")
+# - mount(".", "/", NULL, MS_MOVE)  移动挂载点
+# - chroot(".")
+# - exec(argv[1])                   执行 /sbin/init
+# - 所有 initramfs 内存被释放
+
+# pivot_root（容器/特殊用途）
+# 保留旧根（不释放内存），用于容器切换 namespace
+mkdir /newroot/oldroot
+pivot_root /newroot /newroot/oldroot
+cd /
+umount /oldroot
+exec /sbin/init
+```
+
+### 查看和修改 initramfs
+
+```bash
+# 查看 initramfs 内容
+lsinitramfs /boot/initrd.img-$(uname -r) | head -30
+# 或
+mkdir /tmp/initrd_extract
+cd /tmp/initrd_extract
+zcat /boot/initrd.img-$(uname -r) | cpio -idmv
+
+# 重新打包
+find . | cpio -H newc -o | gzip > /boot/initrd.img.new
+
+# Ubuntu: 重新生成 initramfs
+update-initramfs -u -k $(uname -r)
+
+# RHEL: 重新生成 initramfs
+dracut --force /boot/initramfs-$(uname -r).img $(uname -r)
+
+# 查看 initramfs 中包含的模块
+lsinitramfs /boot/initrd.img-$(uname -r) | grep "\.ko"
+```
+
+---
+
+## 14.11 PID 1: systemd 启动
+
+### systemd 初始化序列
+
+```
+kernel_init()
+    ↓
+run_init_process("/sbin/init")  → systemd 接管
+    ↓
+systemd PID 1 启动
+    ↓
+加载 /etc/systemd/system.conf
+    ↓
+挂载 /proc /sys /dev /run（核心文件系统）
+    ↓
+解析 default.target（通常 = multi-user.target 或 graphical.target）
+    ↓
+构建依赖图（Before/After/Requires/Wants）
+    ↓
+并行启动 unit（无依赖关系的 unit 同时启动）
+    ↓
+激活 basic.target
+    → sysinit.target（文件系统挂载、udev、内核参数）
+    → sockets.target（套接字激活）
+    → timers.target
+    ↓
+multi-user.target
+    → network.target → NetworkManager.service
+    → sshd.service
+    → cron.service
+    → ...（其他服务）
+    ↓
+graphical.target（可选）
+    → display-manager.service（GDM/SDDM）
+```
+
+### Unit 文件格式
+
+```ini
+# /etc/systemd/system/myapp.service
+[Unit]
+Description=My Application
+Documentation=https://example.com/docs
+After=network.target postgresql.service
+Requires=postgresql.service
+Wants=redis.service
+
+[Service]
+Type=notify                    # simple/forking/oneshot/notify/idle
+User=myapp
+Group=myapp
+WorkingDirectory=/opt/myapp
+ExecStartPre=/opt/myapp/check-config.sh
+ExecStart=/opt/myapp/bin/server --config /etc/myapp/config.yaml
+ExecReload=/bin/kill -HUP $MAINPID
+ExecStop=/bin/kill -TERM $MAINPID
+Restart=on-failure
+RestartSec=5s
+TimeoutStartSec=30
+LimitNOFILE=65536
+MemoryMax=2G
+CPUQuota=50%
+PrivateTmp=true                # 安全隔离
+NoNewPrivileges=true
+ProtectSystem=strict
+
+[Install]
+WantedBy=multi-user.target
+```
+
+### systemd 诊断命令
+
+```bash
+# 查看启动状态
+systemctl is-system-running    # running/degraded/maintenance
+
+# 查看失败的服务
+systemctl --failed
+
+# 查看服务依赖图
+systemctl list-dependencies --all multi-user.target | head -30
+
+# 查看 unit 详情
+systemctl status sshd.service -l
+
+# 查看 journal 日志（PID 1 之前的内核日志）
+journalctl -k --boot=0         # 本次启动的内核消息
+journalctl --boot=-1           # 上次启动的日志
+journalctl -u sshd --since "1 hour ago"
+
+# 并行启动顺序分析
+systemd-analyze plot > boot.svg
+systemctl list-jobs            # 当前正在进行的 job
+```
+
+---
+
+## 14.12 KASLR
+
+### 地址空间布局随机化
+
+```
+KASLR（Kernel Address Space Layout Randomization）
+Linux 3.14+ 启用（CONFIG_RANDOMIZE_BASE=y）
+
+x86-64 随机化范围（4级分页）：
+  内核镜像：在 512MB 窗口内随机偏移（以 2MB 对齐）
+  物理地址：在 64GB 范围内随机选择加载地址
+  模块区域：在内核附近 1GB 内随机分配
+
+随机化过程：
+  1. GRUB 传递随机种子（EFI Random Protocol 或 TSC）
+  2. 解压代码选择随机偏移量
+  3. 重定位内核到随机地址
+  4. 更新页表
+```
+
+### KASLR 与调试
+
+```bash
+# 禁用 KASLR（调试/漏洞研究）
+# 内核命令行添加：nokaslr
+
+# 查看内核加载地址
+sudo cat /proc/kallsyms | grep " _text"
+# ffffffff81000000 T _text    （固定地址 = nokaslr）
+# ffffffff95800000 T _text    （随机地址 = kaslr 启用）
+
+# 查看内核基址（需要 root）
+cat /sys/kernel/kexec_load_disabled
+hexdump -n8 /sys/kernel/debug/boot_params/data 2>/dev/null
+
+# KASLR 对 /proc/kallsyms 的影响
+# 非 root：所有地址显示为 0
+# root：显示真实（随机化后的）地址
+sudo cat /proc/kallsyms | grep "sys_read"
+
+# 检查 KASLR 是否启用
+dmesg | grep -i kaslr
+# [    0.000000] KASLR enabled
+```
+
+---
+
+## 14.13 启动时间优化
+
+### 测量启动时间
+
+```bash
+# systemd-analyze 基本分析
+systemd-analyze                # 总启动时间
+# Startup finished in 1.234s (firmware) + 2.345s (loader) + 3.456s (kernel) + 12.345s (userspace) = 19.380s
+
+# 显示各服务时间
+systemd-analyze blame | head -20
+# 12.345s NetworkManager-wait-online.service
+#  8.234s cloud-init.service
+#  5.123s plymouth-quit-wait.service
+
+# 关键路径分析
+systemd-analyze critical-chain
+# The time when unit became active or started is printed after the "@" character.
+# The time the unit took to start is printed after the "+" character.
+
+# 可视化
+systemd-analyze plot > /tmp/boot.svg
+
+# bootchart（更详细的内核到用户空间）
+apt install bootchart2
+# 或内核参数：initcall_debug log_buf_len=16M
+```
+
+### 优化建议
+
+```bash
+# 1. 禁用不需要的服务
+systemctl disable bluetooth.service
+systemctl disable cups.service
+systemctl mask lvm2-monitor.service
+
+# 2. 网络等待优化（最常见的慢点）
+systemctl disable NetworkManager-wait-online.service
+# 或：仅在需要网络的服务上添加 After=network-online.target
+
+# 3. 内核参数优化
+# /etc/default/grub 中添加：
+# quiet splash loglevel=3      # 减少输出
+# fastboot                     # 跳过 fsck
+# noplymouth                   # 禁用 Plymouth
+
+# 4. 文件系统挂载优化
+# /etc/fstab 中添加 noatime,nodiratime
+# ext4 挂载选项：data=writeback
+
+# 5. 减少 initramfs 大小
+# /etc/dracut.conf 或 /etc/initramfs-tools/conf.d/
+# 仅包含必要模块
+update-initramfs -u
+
+# 6. 启用 systemd-readahead（预读取）
+systemctl enable systemd-readahead-collect.service
+systemctl enable systemd-readahead-replay.service
+```
+
+### 内核初始化时间测量
+
+```bash
+# 打印所有 initcall 耗时
+# 内核参数：initcall_debug
+
+# 结合 dmesg 时间戳分析
+dmesg -T | grep "\[" | awk '{
+    match($0, /\[([0-9.]+)\]/, a);
+    if(a[1]+0 < 5) print
+}' | tail -50
+
+# 使用 ftrace 追踪 initcall
+echo function > /sys/kernel/tracing/current_tracer
+echo 'do_one_initcall' > /sys/kernel/tracing/set_ftrace_filter
+echo 1 > /sys/kernel/tracing/tracing_on
+```
+
+---
+
+## 14.14 调试启动问题
+
+### 早期控制台（earlycon）
+
+```bash
+# 内核命令行参数：
+earlycon                          # 自动检测（UART/MMIO）
+earlycon=uart8250,io,0x3f8        # 传统 COM1 串口
+earlycon=pl011,mmio,0x09000000    # ARM PL011 UART
+console=ttyS0,115200n8            # 后期串口控制台
+earlyprintk=vga                   # VGA 文本模式输出
+earlyprintk=serial,ttyS0,115200   # 串口早期输出
+
+# 启用所有日志
+debug ignore_loglevel
+```
+
+### kdump 配置与使用
+
+```bash
+# 1. 安装 kdump
+apt install kdump-tools crash
+# 或
+yum install kexec-tools crash
+
+# 2. 内核命令行预留内存
+# /etc/default/grub:
+GRUB_CMDLINE_LINUX="crashkernel=256M"
+# 或自动：crashkernel=auto
+
+# 3. 配置 kdump
+cat /etc/kdump.conf
+# path /var/crash
+# core_collector makedumpfile -l --message-level 1 -d 31
+
+# 4. 启用 kdump 服务
+systemctl enable kdump
+systemctl start kdump
+
+# 5. 测试触发崩溃（!!! 生产环境慎用 !!!）
+echo c > /proc/sysrq-trigger  # 强制崩溃
+
+# 6. 分析 crash dump
+crash /usr/lib/debug/boot/vmlinux-6.1.0 \
+      /var/crash/$(date +%Y-%m-%d)/dump.202*
+
+# crash 命令：
+crash> bt           # 回溯调用栈
+crash> bt -a        # 所有 CPU 的调用栈
+crash> ps          # 进程列表
+crash> vm          # 虚拟内存信息
+crash> log         # 内核消息缓冲区
+crash> files       # 打开文件
+crash> kmem -i     # 内存信息
+crash> dis -l panic # 反汇编 panic 函数
+```
+
+### 常见启动问题排查
+
+```bash
+# 问题1：内核无法找到根文件系统
+# 症状：VFS: Unable to mount root fs
+# 解决：检查 root= 参数，确保 initramfs 包含对应文件系统驱动
+# rd.break 进入 initramfs 调试：
+# 内核参数添加：rd.break
+
+# 进入 initramfs shell 后：
+ls /dev/disk/by-uuid/    # 确认设备存在
+blkid                    # 查看 UUID
+mount -o rw,remount /    # 重新以读写挂载
+exit                     # 继续启动
+
+# 问题2：systemd 循环重启
+journalctl --boot -p err  # 查看错误日志
+systemctl --failed         # 查看失败单元
+
+# 问题3：内核 Oops/Panic 解析
+# dmesg 中的 Oops 包含：
+# - 错误类型（general protection fault / NULL pointer dereference）
+# - RIP：崩溃时的指令地址
+# - 调用栈（Call Trace）
+
+# 使用 addr2line 解析地址
+addr2line -e vmlinux -i ffffffff81234567
+# 或使用 gdb
+echo "list *0xffffffff81234567" | \
+    gdb --batch -ex "file vmlinux" /dev/stdin
+```
+
+---
+
+## 参考资料
+
+| 资源 | 位置 |
+|------|------|
+| 内核文档 | `Documentation/x86/boot.rst` |
+| UEFI 规范 | `https://uefi.org/specifications` |
+| start_kernel 源码 | `init/main.c` |
+| GRUB2 手册 | `info grub` |
+| systemd 文档 | `man systemd.unit`, `man systemd-analyze` |
+| kdump 指南 | `Documentation/admin-guide/kdump/kdump.rst` |
+
+```bash
+# 快速诊断命令汇总
+dmesg -T | grep -E "error|fail|warn" -i | head -30  # 启动错误
+journalctl -k -b 0 | tail -50                         # 本次内核日志
+systemd-analyze blame | head -10                       # 慢启动服务
+cat /proc/cmdline                                       # 当前命令行
+ls -la /boot/                                          # 启动文件
+efibootmgr -v                                          # UEFI 启动项
+```
diff --git "a/15-\345\206\205\346\240\270\350\260\203\350\257\225\344\270\216\346\200\247\350\203\275/README.md" "b/15-\345\206\205\346\240\270\350\260\203\350\257\225\344\270\216\346\200\247\350\203\275/README.md"
new file mode 100644
index 0000000..9a4451b
--- /dev/null
+++ "b/15-\345\206\205\346\240\270\350\260\203\350\257\225\344\270\216\346\200\247\350\203\275/README.md"
@@ -0,0 +1,1542 @@
+# 15 — 内核调试与性能分析
+
+> **学习目标**：掌握 Linux 内核调试与性能分析的完整工具链，从 printk 到 eBPF，
+> 从静态分析到运行时 crash dump，能够定位内核 bug、量化性能瓶颈并实施优化。
+
+---
+
+## 目录
+
+| 节 | 主题 |
+|----|------|
+| 15.1 | 调试工具全景 |
+| 15.2 | printk 深入 |
+| 15.3 | ftrace 基础 |
+| 15.4 | ftrace 高级 |
+| 15.5 | perf stat |
+| 15.6 | perf record + report |
+| 15.7 | FlameGraph 火焰图 |
+| 15.8 | KASAN |
+| 15.9 | KFENCE |
+| 15.10 | KMSAN |
+| 15.11 | UBSAN |
+| 15.12 | lockdep |
+| 15.13 | KCOV |
+| 15.14 | kdump + crash |
+| 15.15 | GDB + QEMU |
+| 15.16 | bpftrace 调试技巧 |
+| 15.17 | livepatch |
+| 15.18 | 性能优化清单 |
+
+---
+
+## 15.1 调试工具全景
+
+### 工具对比矩阵
+
+| 工具 | 适用场景 | 运行时开销 | 内核配置 | 生产可用 |
+|------|----------|----------|---------|---------|
+| printk/dyndbg | 快速日志调试 | 低~中 | 内置 | ✅ |
+| ftrace | 函数/事件追踪 | 低~中 | `CONFIG_FTRACE` | ✅ |
+| perf | CPU/内存/IO 分析 | 低 | `CONFIG_PERF_EVENTS` | ✅ |
+| eBPF/bpftrace | 动态安全探针 | 极低 | `CONFIG_BPF` | ✅ |
+| KASAN | 内存安全 bug | **高(2x内存,慢2x)** | `CONFIG_KASAN` | ❌开发用 |
+| KFENCE | UAF/OOB 检测 | **极低** | `CONFIG_KFENCE` | ✅ |
+| KMSAN | 未初始化内存 | **极高** | `CONFIG_KMSAN` | ❌开发用 |
+| UBSAN | 未定义行为 | 低~中 | `CONFIG_UBSAN` | ⚠️ |
+| lockdep | 死锁检测 | 中 | `CONFIG_LOCKDEP` | ❌开发用 |
+| KCOV | 代码覆盖率 | 中 | `CONFIG_KCOV` | ❌测试用 |
+| kdump/crash | 崩溃分析 | 无（事后） | `CONFIG_KEXEC` | ✅ |
+| GDB+QEMU | 源码级调试 | 极高（虚拟机）| vmlinux | ❌开发用 |
+| livepatch | 热补丁 | 极低 | `CONFIG_LIVEPATCH` | ✅ |
+
+### 工具选择决策树
+
+```
+遇到问题
+    ├── 系统崩溃/Panic？
+    │   └── kdump + crash → 分析 vmcore
+    ├── 内存损坏/bug？
+    │   ├── 开发环境 → KASAN（全面检测）
+    │   └── 生产环境 → KFENCE（低开销）
+    ├── 死锁/锁顺序问题？
+    │   └── lockdep（开发内核）
+    ├── 性能问题？
+    │   ├── CPU 瓶颈 → perf stat + FlameGraph
+    │   ├── 延迟问题 → ftrace + bpftrace
+    │   └── IO 问题  → blktrace + bpftrace
+    └── 行为追踪/理解代码？
+        ├── 静态 → ftrace function tracer
+        └── 动态 → bpftrace / eBPF
+```
+
+---
+
+## 15.2 printk 深入
+
+### 日志级别
+
+```c
+/* include/linux/kern_levels.h */
+#define KERN_EMERG   "0"  /* 系统不可用，立即崩溃 */
+#define KERN_ALERT   "1"  /* 必须立即处理 */
+#define KERN_CRIT    "2"  /* 严重条件 */
+#define KERN_ERR     "3"  /* 错误条件 */
+#define KERN_WARNING "4"  /* 警告条件 */
+#define KERN_NOTICE  "5"  /* 正常但值得注意 */
+#define KERN_INFO    "6"  /* 信息性消息 */
+#define KERN_DEBUG   "7"  /* 调试级别消息 */
+#define KERN_DEFAULT "d"  /* 默认内核日志级别 */
+
+/* 使用方式 */
+pr_emerg("Out of memory: Kill process %d (%s)\n", pid, comm);
+pr_err("Failed to allocate %zu bytes\n", size);
+pr_warn("Deprecated feature used by %s\n", current->comm);
+pr_info("Device %s registered\n", dev_name);
+pr_debug("value = %d\n", val);  /* 仅在 DEBUG 宏定义时编译 */
+
+/* 带设备前缀 */
+dev_err(dev, "I2C transfer failed: %d\n", ret);
+dev_info(dev, "Probed successfully\n");
+
+/* 速率限制（避免日志洪水）*/
+pr_err_ratelimited("DMA error %d\n", err);
+printk_ratelimited(KERN_ERR "error: %d\n", err);
+```
+
+### dmesg 使用技巧
+
+```bash
+# 带时间戳查看
+dmesg -T          # 人类可读时间
+dmesg -t          # 无时间戳
+dmesg --follow    # 实时跟踪
+
+# 按级别过滤
+dmesg -l err,crit,emerg     # 只显示错误
+dmesg -l debug              # 只显示调试信息
+dmesg --facility=kern       # 只显示内核消息
+
+# 清空日志缓冲
+dmesg -c
+
+# 设置日志级别（内核输出到控制台的最低级别）
+echo 7 > /proc/sys/kernel/printk   # 显示所有级别
+# /proc/sys/kernel/printk 包含 4 个值：
+# console_loglevel default_message_loglevel min_console_loglevel default_console_loglevel
+cat /proc/sys/kernel/printk
+# 7 4 1 7
+```
+
+### 动态调试 (dyndbg)
+
+```bash
+# CONFIG_DYNAMIC_DEBUG=y 时可运行时开关 pr_debug/dev_dbg
+
+# 控制文件
+ls /sys/kernel/debug/dynamic_debug/control
+
+# 语法：
+# 条件 格式标志 动作
+# 动作：+/-  p=打印 f=函数名 l=行号 m=模块名 t=线程ID
+
+# 启用特定文件的调试消息
+echo "file net/ipv4/tcp.c +p" > /sys/kernel/debug/dynamic_debug/control
+
+# 启用特定模块
+echo "module e1000e +p" > /sys/kernel/debug/dynamic_debug/control
+
+# 启用特定函数
+echo "func tcp_sendmsg +p" > /sys/kernel/debug/dynamic_debug/control
+
+# 启用某函数并显示行号
+echo "func kmalloc +flp" > /sys/kernel/debug/dynamic_debug/control
+
+# 内核命令行启用（早期调试）
+dyndbg="file init/main.c +p"
+dyndbg="module thermal +p"
+
+# 查看当前启用的调试消息
+cat /sys/kernel/debug/dynamic_debug/control | grep "=p"
+
+# 禁用
+echo "module e1000e -p" > /sys/kernel/debug/dynamic_debug/control
+```
+
+---
+
+## 15.3 ftrace 基础
+
+### tracefs 挂载与基本操作
+
+```bash
+# 挂载 tracefs（通常已挂载在 /sys/kernel/tracing）
+mount -t tracefs tracefs /sys/kernel/tracing
+# 或
+mount -t debugfs debugfs /sys/kernel/debug
+# tracefs 在 /sys/kernel/debug/tracing/
+
+cd /sys/kernel/tracing   # 以下命令在此目录执行
+
+# 查看可用 tracer
+cat available_tracers
+# blk function_graph wakeup_dl wakeup_rt wakeup function nop
+
+# 查看当前 tracer
+cat current_tracer
+
+# 设置 function tracer
+echo function > current_tracer
+
+# 开始追踪
+echo 1 > tracing_on
+
+# 停止追踪
+echo 0 > tracing_on
+
+# 读取结果
+cat trace | head -30
+# 格式：进程名-PID  [CPU] 标志  时间戳:  函数名 <- 调用者
+# bash-1234  [001] ....  1234.567890: kmalloc <- __kmalloc_node
+
+# 清空 trace buffer
+echo > trace
+```
+
+### function_graph tracer
+
+```bash
+# 显示函数调用图（入口+出口+执行时间）
+echo function_graph > current_tracer
+
+# 设置追踪深度
+echo 5 > max_graph_depth
+
+# 只追踪特定函数及其子调用
+echo do_sys_open > set_graph_function
+echo 1 > tracing_on
+cat trace | head -50
+
+# 示例输出：
+# CPU DURATION      FUNCTION CALLS
+# |   |   |         |   |   |   |
+# 1)               | do_sys_open() {
+# 1)               |   getname() {
+# 1)   2.341 us    |     getname_flags();
+# 1) + 5.234 us    |   } /* getname */
+# 1)               |   alloc_fd() {
+# 1)   0.891 us    |     __alloc_fd();
+# 1)   1.234 us    |   } /* alloc_fd */
+# 1) + 45.678 us   | } /* do_sys_open */
+```
+
+### set_ftrace_filter 过滤
+
+```bash
+# 只追踪特定函数
+echo "kmalloc" > set_ftrace_filter
+echo "kfree" >> set_ftrace_filter
+
+# 使用通配符
+echo "tcp_*" > set_ftrace_filter
+echo "ext4_*" >> set_ftrace_filter
+
+# 追踪模块函数
+echo ':mod:e1000e' > set_ftrace_filter
+
+# 排除某些函数（notrace filter）
+echo "native_sched_clock" > set_ftrace_notrace
+
+# 只追踪特定 PID
+echo 1234 > set_ftrace_pid
+
+# 查看可追踪的函数列表
+cat available_filter_functions | wc -l   # 通常数万个
+
+# trace-cmd 封装工具（更方便）
+trace-cmd record -p function -l "tcp_*" sleep 5
+trace-cmd report | head -50
+trace-cmd hist
+```
+
+---
+
+## 15.4 ftrace 高级
+
+### 事件追踪 (trace_events)
+
+```bash
+# 查看所有可用事件
+ls /sys/kernel/tracing/events/
+# block  ext4  kmem  net  sched  signal  skb  sock  ...
+
+# 查看某类别的事件
+ls /sys/kernel/tracing/events/sched/
+# sched_switch  sched_wakeup  sched_process_fork  ...
+
+# 启用单个事件
+echo 1 > /sys/kernel/tracing/events/sched/sched_switch/enable
+
+# 启用整个类别
+echo 1 > /sys/kernel/tracing/events/net/enable
+
+# 查看事件格式
+cat /sys/kernel/tracing/events/sched/sched_switch/format
+# name: sched_switch
+# field:unsigned short common_type;
+# field:pid_t prev_pid;
+# field:char prev_comm[16];
+# field:int prev_prio;
+# field:long prev_state;
+# field:pid_t next_pid;
+# field:char next_comm[16];
+
+# 设置事件过滤器
+echo "prev_comm == 'nginx'" > \
+    /sys/kernel/tracing/events/sched/sched_switch/filter
+
+# trace-cmd 方式（推荐）
+trace-cmd record -e sched:sched_switch -e net:netif_rx \
+    -f "comm == 'nginx'" sleep 10
+trace-cmd report
+```
+
+### hist 触发器（内核直方图）
+
+```bash
+# 记录系统调用延迟直方图
+echo 'hist:key=id.syscall:val=elapsed:sort=elapsed' > \
+    /sys/kernel/tracing/events/raw_syscalls/sys_exit/trigger
+
+sleep 10
+
+cat /sys/kernel/tracing/events/raw_syscalls/sys_exit/hist
+# 输出按延迟排序的系统调用直方图
+
+# 记录调度延迟
+echo 'hist:key=comm:val=hitcount:sort=hitcount' > \
+    /sys/kernel/tracing/events/sched/sched_switch/trigger
+
+# 更复杂：跟踪 sched_wakeup 到 sched_switch 的延迟
+echo 'hist:key=pid:ts0=common_timestamp.usecs' > \
+    /sys/kernel/tracing/events/sched/sched_wakeup/trigger
+echo 'hist:key=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).trace(sched_wakeup_latency,$wakeup_lat)' > \
+    /sys/kernel/tracing/events/sched/sched_switch/trigger
+```
+
+### 合成事件与延迟测量
+
+```bash
+# 创建合成事件：测量块 IO 延迟
+# 1. 定义合成事件
+echo 'block_io_lat u64 sector; u64 lat_us' > \
+    /sys/kernel/tracing/synthetic_events
+
+# 2. 在 block_rq_issue 记录时间戳
+echo 'hist:key=sector:ts0=common_timestamp.usecs' > \
+    /sys/kernel/tracing/events/block/block_rq_issue/trigger
+
+# 3. 在 block_rq_complete 计算延迟并触发合成事件
+echo 'hist:key=sector:lat=common_timestamp.usecs-$ts0:
+onmatch(block.block_rq_issue).block_io_lat(sector,$lat)' > \
+    /sys/kernel/tracing/events/block/block_rq_complete/trigger
+
+# 4. 监听合成事件
+echo 1 > /sys/kernel/tracing/events/synthetic/block_io_lat/enable
+cat /sys/kernel/tracing/trace
+```
+
+---
+
+## 15.5 perf stat
+
+### CPU 性能事件
+
+```bash
+# 基本统计：运行 5 秒全系统
+perf stat -a sleep 5
+
+# 输出解读：
+# Performance counter stats for 'system wide':
+#    10,234,567,890  cycles              #    3.214 GHz
+#     8,901,234,567  instructions        #    0.87  insn per cycle  ← IPC
+#        23,456,789  cache-misses        #    2.345 % of all cache refs
+#         1,234,567  branch-misses       #    0.123 % of all branches
+#               500  context-switches    #   50.000 /sec
+#                12  cpu-migrations
+#            45,678  page-faults
+
+# IPC < 1：通常是内存/缓存延迟瓶颈
+# IPC > 3：CPU 计算密集，运行良好
+# cache-miss % > 5%：需要优化内存访问模式
+
+# 追踪特定进程
+perf stat -p $(pgrep nginx) sleep 10
+
+# 追踪特定 CPU
+perf stat -C 0,1,2,3 sleep 5
+
+# 自定义事件
+perf stat -e \
+  cycles,\
+  instructions,\
+  cache-references,\
+  cache-misses,\
+  branch-instructions,\
+  branch-misses,\
+  L1-dcache-loads,\
+  L1-dcache-load-misses,\
+  LLC-loads,\
+  LLC-load-misses \
+  -- /usr/bin/stress --cpu 4 --timeout 10
+
+# 性能公式
+# IPC = instructions / cycles
+# CPI = cycles / instructions （越低越好）
+# cache miss rate = cache-misses / cache-references × 100%
+# branch miss rate = branch-misses / branch-instructions × 100%
+```
+
+### perf list — 可用事件
+
+```bash
+# 列出所有硬件事件
+perf list hw
+
+# 软件事件
+perf list sw
+
+# 追踪点（tracepoints）
+perf list tracepoint | wc -l   # 通常 1000+ 个
+
+# PMU（处理器特定）事件
+perf list pmu | head -30
+
+# 按类别搜索
+perf list | grep -i "cache"
+perf list | grep -i "tlb"
+perf list | grep -i "memory"
+
+# Intel Top-Down 分析方法（需要 Intel CPU + perf topdown）
+perf stat --topdown -a sleep 5
+# Retiring: 40%  → 有效执行
+# Bad Speculation: 15%  → 分支预测失误
+# Frontend Bound: 25%  → 指令获取瓶颈
+# Backend Bound: 20%  → 执行资源/内存瓶颈
+```
+
+---
+
+## 15.6 perf record + report
+
+### 采样原理
+
+```
+perf record 采样原理：
+  1. 设置 PMU 溢出中断（每 N 次事件触发一次中断）
+  2. 中断处理程序记录 RIP（当前指令地址）+ 调用栈
+  3. 写入 perf.data 文件（mmap 环形缓冲区）
+  4. perf report 对地址进行符号化
+
+默认采样频率：-F 4000（4000 Hz 采样/秒）
+默认事件：cycles（CPU 周期）
+
+权衡：
+  高频率 → 更精确 → 更高开销（>10000 Hz 慎用）
+  低频率 → 低开销 → 统计误差更大
+```
+
+### 基本采样
+
+```bash
+# 全系统采样 30 秒
+perf record -a -g -F 999 sleep 30
+# -a: 所有 CPU
+# -g: 记录调用图（call graph）
+# -F 999: 每秒采样 999 次
+
+# 采样特定进程
+perf record -g -p $(pgrep -f "java.*MyApp") sleep 30
+
+# 采样特定事件（LLC miss）
+perf record -e LLC-load-misses -a -g sleep 10
+
+# 运行命令并采样
+perf record -g -- /path/to/my_program arg1 arg2
+
+# 查看 perf.data 文件信息
+perf report --header
+
+# 基本报告
+perf report --stdio | head -50
+```
+
+### 调用图采集方法对比
+
+```bash
+# 方法1：frame pointer（快速，但需要 -fno-omit-frame-pointer 编译）
+perf record -g --call-graph=fp -a sleep 30
+
+# 方法2：DWARF 调试信息（准确，开销较高）
+perf record -g --call-graph=dwarf -a sleep 30
+
+# 方法3：LBR（Last Branch Record，Intel 专有，最快）
+perf record -g --call-graph=lbr -a sleep 30
+
+# 实际推荐：
+# 内核：fp（内核用 -fno-omit-frame-pointer 编译）
+# 用户空间 C/C++：dwarf 或重新编译加 -fno-omit-frame-pointer
+# Java/Python：需要额外 perf map agent
+
+# 查看注释（按函数内的指令热点）
+perf annotate --stdio kmalloc | head -40
+```
+
+---
+
+## 15.7 FlameGraph 火焰图
+
+### 生成步骤
+
+```bash
+# 1. 克隆 FlameGraph 工具
+git clone https://github.com/brendangregg/FlameGraph.git
+cd FlameGraph
+
+# 2. 采样（使用 frame pointer 调用图）
+perf record -F 99 -a -g -- sleep 60
+# 或针对特定进程
+perf record -F 99 -g -p $(pgrep nginx) sleep 30
+
+# 3. 转换格式
+perf script > out.perf
+
+# 4. 折叠调用栈
+./stackcollapse-perf.pl out.perf > out.folded
+
+# 5. 生成 SVG
+./flamegraph.pl out.folded > flamegraph.svg
+
+# 一键命令
+perf record -F 99 -a -g -- sleep 60 && \
+    perf script | \
+    ./stackcollapse-perf.pl | \
+    ./flamegraph.pl > flamegraph.svg
+
+# 打开查看
+firefox flamegraph.svg
+# 或
+python3 -m http.server 8080  # 然后浏览器访问
+```
+
+### 读懂火焰图
+
+```
+火焰图解读规则：
+  Y 轴（上下）= 调用栈深度（底部=被采样点，顶部=最深调用）
+  X 轴（左右）= 时间宽度（宽=消耗更多 CPU 时间）
+  颜色        = 随机（区分函数），无特殊含义
+
+  ┌─────────────────────────────────────────────────┐
+  │      handle_mm_fault      copy_page_range        │ ← 叶节点（最热）
+  │        do_page_fault       do_mprotect           │
+  │          page_fault       sys_mprotect           │
+  │             entry_SYSCALL_64                     │
+  │    nginx_worker_cycle    kernel_vsyscall         │
+  └─────────────────────────────────────────────────┘
+                      时间 →
+
+  最宽的栈顶函数 = 最消耗 CPU 的热点
+  宽大的平顶     = 可能的性能瓶颈
+  窄但深的塔     = 递归或深调用链
+```
+
+### 差分火焰图（对比优化前后）
+
+```bash
+# 优化前采样
+perf record -F 99 -a -g -- sleep 60
+perf script > before.perf
+./stackcollapse-perf.pl before.perf > before.folded
+
+# 实施优化...
+
+# 优化后采样
+perf record -F 99 -a -g -- sleep 60
+perf script > after.perf
+./stackcollapse-perf.pl after.perf > after.folded
+
+# 生成差分图（红=变慢/增多，蓝=变快/减少）
+./difffolded.pl before.folded after.folded | \
+    ./flamegraph.pl --colors=blue > diff.svg
+```
+
+---
+
+## 15.8 KASAN (Kernel Address Sanitizer)
+
+### 配置与原理
+
+```bash
+# 内核配置
+CONFIG_KASAN=y
+CONFIG_KASAN_GENERIC=y        # 软件实现（所有架构）
+# 或
+CONFIG_KASAN_HW_TAGS=y        # 硬件实现（ARM MTE，低开销）
+
+# 开销：
+# - 内存：每8字节对应1字节 shadow（内存×2）
+# - CPU：约 1.5-2x 慢（每次访问检查 shadow）
+# - 可检测：use-after-free, out-of-bounds, use-after-scope
+```
+
+### KASAN 报告解读
+
+```
+==================================================================
+BUG: KASAN: slab-out-of-bounds in copy_from_user+0x.../...
+Write of size 8 at addr ffff888012345678 by task kworker/0:1/234
+                 ↑类型    ↑被访问地址          ↑任务名
+
+CPU: 0 PID: 234 Comm: kworker/0:1
+Hardware name: QEMU Standard PC
+
+Call Trace:                          ← 谁触发的
+ dump_stack+0x...
+ kasan_report+0x...
+ copy_from_user+0x...
+ my_driver_write+0x3c/0x80          ← 问题代码位置
+ vfs_write+0x...
+
+Allocated by task 234:              ← 内存在哪里被分配
+ kmalloc+0x...
+ my_driver_probe+0x58/0x100
+
+Freed by task 234:                  ← 内存在哪里被释放（UAF时）
+ kfree+0x...
+ my_driver_remove+0x...
+
+The buggy address belongs to the object at ffff888012345600
+ which belongs to the cache kmalloc-128 of size 128
+The buggy address is located 120 bytes inside of
+ 128-byte region [ffff888012345600, ffff888012345680)
+ ↑ 地址在 128 字节对象的第 120 字节处（越界 8 字节）
+==================================================================
+```
+
+### 使用 KASAN 调试
+
+```bash
+# 在 KASAN 内核上运行目标程序
+# 建议使用 syzkaller 或手动触发 bug 路径
+
+# 查看 KASAN 统计
+cat /sys/kernel/debug/kasan/stats 2>/dev/null
+
+# KASAN 选项（内核命令行）
+kasan=off            # 禁用（通常不需要）
+kasan_multi_shot     # 每个 bug 报告多次（默认每次只报告一次）
+
+# 编译内核时的 KASAN 选项
+CONFIG_KASAN_INLINE=y   # 内联检查（更快）
+CONFIG_KASAN_OUTLINE=y  # 函数调用检查（更小代码）
+```
+
+---
+
+## 15.9 KFENCE (Kernel Electric Fence)
+
+### 原理与配置
+
+```c
+/* KFENCE：低开销的内存安全检测，适合生产环境 */
+
+/* 工作原理：
+ * - 以固定概率（默认每 100ms 一次）将 slab 分配重定向到 KFENCE 保护池
+ * - KFENCE 池中每个对象使用独立内存页，前后各有 Guard Page
+ * - Guard Page 无访问权限，访问时触发 page fault
+ * - 对象释放后，页面标记为不可访问（检测 UAF）
+ */
+
+/* 内核配置 */
+// CONFIG_KFENCE=y
+// CONFIG_KFENCE_SAMPLE_INTERVAL=100  (ms)
+// CONFIG_KFENCE_NUM_OBJECTS=255
+
+/* 内核命令行 */
+// kfence.sample_interval=100   — 采样间隔（ms，0=禁用）
+```
+
+```bash
+# 查看 KFENCE 统计
+cat /sys/kernel/debug/kfence/stats
+# total allocs:    12345
+# total frees:     12340
+# total bugs:          3
+
+# 查看详细报告
+dmesg | grep KFENCE
+
+# 调整采样频率（越低=越多覆盖，越高开销）
+echo 50 > /sys/module/kfence/parameters/sample_interval
+
+# KFENCE vs KASAN 对比
+# KFENCE：概率采样，开销极低，生产可用，覆盖率随时间积累
+# KASAN：全量检测，开销高，开发专用，立即发现所有访问
+```
+
+---
+
+## 15.10 KMSAN (Kernel Memory Sanitizer)
+
+```c
+/* KMSAN：检测内核中使用未初始化内存 */
+
+/* 常见 bug：
+ * - 栈变量未初始化就拷贝到用户空间（信息泄漏！）
+ * - 联合体部分字段未初始化
+ * - kmalloc 后未 memset 就读取
+ */
+
+/* 内核配置 */
+// CONFIG_KMSAN=y
+// 依赖：CONFIG_CC_IS_CLANG=y（需要 Clang 编译）
+
+/* 示例 bug */
+struct my_struct {
+    int important;
+    int padding;     /* 未初始化 */
+};
+
+void buggy_function(void) {
+    struct my_struct s;
+    s.important = 42;
+    /* BUG: s.padding 未初始化 */
+    if (copy_to_user(user_ptr, &s, sizeof(s)))
+        return -EFAULT;
+    /* KMSAN 会在这里报告：uninitialized memory copy to user */
+}
+
+/* 修复 */
+struct my_struct s = {};  /* 零初始化 */
+/* 或 */
+memset(&s, 0, sizeof(s));
+```
+
+```bash
+# 查看 KMSAN 报告
+dmesg | grep "KMSAN"
+
+# KMSAN 报告格式
+# BUG: KMSAN: kernel-infoleak in copy_to_user+0x...
+# Uninit was created at:
+#   kmalloc+0x...
+#   my_driver_alloc+0x...
+```
+
+---
+
+## 15.11 UBSAN (Undefined Behavior Sanitizer)
+
+```bash
+# 内核配置
+# CONFIG_UBSAN=y
+# CONFIG_UBSAN_SANITIZE_ALL=y  — 检查所有代码
+# CONFIG_UBSAN_TRAP=y          — 遇到 UB 时 trap（更严格）
+
+# 检测的 UB 类型
+# - 有符号整数溢出（signed overflow）
+# - 移位越界（shift out of bounds）
+# - 数组越界（array index out of bounds）
+# - 空指针解引用（null pointer dereference）
+# - 对齐违规（misaligned access）
+# - 无效 bool 值
+
+# UBSAN 报告示例
+# UBSAN: signed-integer-overflow in kernel/time.c:123
+# -2147483648 - 1 cannot be represented in type 'int'
+# ...
+# Call Trace:
+#   ubsan_epilogue
+#   handle_overflow
+#   __ubsan_handle_sub_overflow
+
+# 在代码中禁止特定 UBSAN 检查
+__attribute__((no_sanitize("signed-integer-overflow")))
+static int my_safe_function(int a, int b) {
+    return a + b;  /* 此处溢出是预期行为 */
+}
+```
+
+---
+
+## 15.12 lockdep (死锁检测)
+
+### 工作原理
+
+```
+lockdep 死锁检测原理：
+
+1. 锁类（Lock Class）
+   - 每个锁变量实例属于一个"锁类"（同一代码位置分配的锁）
+   - 不追踪具体锁实例，追踪锁类之间的顺序关系
+
+2. 锁顺序图（Lock Order Graph）
+   - 记录：持有 A 时获取 B → 边 A→B
+   - 检测：是否存在环（A→B→C→A = 死锁可能）
+
+3. 检测时机
+   - 每次 lock() 操作时即时检查
+   - 不需要实际发生死锁，只要顺序可能导致死锁就报告
+```
+
+### lockdep 报告解读
+
+```
+=====================================================
+WARNING: possible circular locking dependency detected
+6.1.0 #1 SMP
+-----------------------------------------------------
+kworker/0:1/234 is trying to acquire lock:
+ffffffff81234560 (&mm->mmap_lock){++++}, ...
+
+but task is already holding lock:
+ffffffff81567890 (&fs->lock){+.+.}, ...
+
+which lock already depends on the new lock.
+
+the existing dependency chain (in reverse order) is:
+
+-> #1 (&fs->lock){+.+.}:       ← 锁 A
+       lock_acquire
+       __mutex_lock
+       copy_fs_struct
+
+-> #0 (&mm->mmap_lock){++++}:  ← 锁 B
+       lock_acquire
+       down_read
+       dup_mm
+
+other info that might help us debug this:
+ Possible unsafe locking scenario:
+
+       CPU0                    CPU1
+       ----                    ----
+  lock(&mm->mmap_lock);    lock(&fs->lock);
+                             lock(&mm->mmap_lock);  ← 等待 CPU0
+  lock(&fs->lock);  ← 等待 CPU1
+
+ *** DEADLOCK ***
+=====================================================
+```
+
+### lockdep 配置与工具
+
+```bash
+# 内核配置
+# CONFIG_LOCKDEP=y
+# CONFIG_PROVE_LOCKING=y
+# CONFIG_DEBUG_LOCKDEP=y
+# CONFIG_LOCK_STAT=y
+
+# 查看锁统计
+cat /proc/lock_stat | head -30
+# class name    con-bounces    contentions  waittime-min  waittime-max  ...
+
+# 重置统计
+echo 0 > /proc/lock_stat
+
+# 查看死锁报告
+dmesg | grep -A50 "circular locking"
+
+# 在代码中标注锁顺序（消除误报）
+mutex_lock_nested(&child->lock, SINGLE_DEPTH_NESTING);
+
+# 声明锁类（相同代码但不同实例）
+static struct lock_class_key my_lock_key;
+lockdep_set_class(&spinlock, &my_lock_key);
+```
+
+---
+
+## 15.13 KCOV (内核代码覆盖率)
+
+```c
+/* KCOV：为 syzkaller 等 fuzzer 提供覆盖率反馈 */
+
+/* 内核配置 */
+// CONFIG_KCOV=y
+// CONFIG_KCOV_ENABLE_COMPARISONS=y  — 比较值覆盖
+
+/* 用户空间使用 KCOV */
+#include <linux/kcov.h>
+
+int fd = open("/sys/kernel/debug/kcov", O_RDWR);
+ioctl(fd, KCOV_INIT_TRACE, COVER_SIZE);
+uint64_t *cover = mmap(NULL, COVER_SIZE * sizeof(uint64_t),
+                       PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+
+/* 开始收集覆盖率 */
+ioctl(fd, KCOV_ENABLE, KCOV_TRACE_PC);
+__atomic_store_n(&cover[0], 0, __ATOMIC_RELAXED);
+
+/* 执行系统调用 */
+read(some_fd, buf, size);
+
+/* 停止收集 */
+ioctl(fd, KCOV_DISABLE, 0);
+
+uint64_t n = __atomic_load_n(&cover[0], __ATOMIC_RELAXED);
+for (uint64_t i = 0; i < n; i++) {
+    printf("0x%llx\n", cover[i + 1]);  /* 执行到的内核地址 */
+}
+```
+
+```bash
+# syzkaller 使用 KCOV 的基本配置
+cat syz-manager.cfg
+# {
+#     "target": "linux/amd64",
+#     "http": "0.0.0.0:56741",
+#     "workdir": "/syzkaller/workdir",
+#     "kernel_obj": "/linux",
+#     "image": "/linux/stretch.img",
+#     "sshkey": "/linux/stretch.id_rsa",
+#     "syzkaller": "/syzkaller",
+#     "procs": 8,
+#     "type": "qemu",
+#     "vm": {"count": 4, "kernel": "/linux/arch/x86/boot/bzImage"}
+# }
+```
+
+---
+
+## 15.14 kdump + crash
+
+### 配置 kdump
+
+```bash
+# 1. 安装工具
+apt install kdump-tools crash linux-crashdump
+# 或
+yum install kexec-tools crash kernel-debuginfo
+
+# 2. 内核参数（/etc/default/grub）
+GRUB_CMDLINE_LINUX="crashkernel=256M"
+# 大内存系统建议：crashkernel=512M,high
+# 自动计算：crashkernel=auto
+
+update-grub && reboot
+
+# 3. 验证配置
+cat /proc/iomem | grep "Crash kernel"
+# 1:00000000-3fffffff : Crash kernel  ← 预留的内存范围
+
+# 4. 配置 kdump 目标
+cat /etc/kdump.conf
+# path /var/crash
+# core_collector makedumpfile -l --message-level 1 -d 31
+# default reboot
+# 注：-d 31 = 丢弃不需要的页（zero/cache）以减小 dump 大小
+
+# 5. 启动 kdump 服务
+systemctl enable --now kdump
+systemctl status kdump
+
+# 6. 测试（!!! 会崩溃系统 !!!）
+echo c > /proc/sysrq-trigger
+```
+
+### makedumpfile 过滤级别
+
+```bash
+# makedumpfile 过滤级别（-d 参数）
+# 1  = 去除 zero 页
+# 2  = 去除 cache 页（未使用）
+# 4  = 去除 cache 页（私有）
+# 8  = 去除用户数据页
+# 16 = 去除 free 页
+# 31 = 去除以上所有（只保留内核内存，大幅减小文件大小）
+
+makedumpfile -l --message-level 31 -d 31 \
+    /proc/vmcore /var/crash/vmcore.$(date +%s)
+```
+
+### crash 命令参考
+
+```bash
+# 启动 crash
+crash /usr/lib/debug/boot/vmlinux-6.1.0-22 \
+      /var/crash/202401010000/vmcore
+
+# crash 内基本命令
+crash> help        # 命令列表
+
+# 进程和线程
+crash> ps          # 所有进程
+crash> ps -k       # 内核线程
+crash> task 1234   # 查看指定 PID 的 task_struct
+crash> thread_info 1234  # 线程信息
+
+# 调用栈
+crash> bt          # 当前（崩溃）进程的调用栈
+crash> bt -a       # 所有 CPU 的调用栈
+crash> bt -t 1234  # 指定 PID 的调用栈
+crash> bt -l       # 显示行号
+crash> bt -f       # 显示完整帧信息
+
+# 内存
+crash> vm          # 当前进程虚拟内存
+crash> vm 1234     # 指定 PID 的 VMA
+crash> kmem -i     # 内存使用概况
+crash> kmem -s     # slab 分配器统计
+crash> kmem -S kmalloc-128  # 特定 slab 信息
+
+# 日志
+crash> log         # 内核消息缓冲区（dmesg）
+crash> log -m      # 带时间戳
+
+# 文件
+crash> files 1234  # 进程打开的文件
+crash> net         # 网络统计
+crash> net -s      # socket 信息
+
+# 反汇编
+crash> dis -l tcp_sendmsg    # 带行号反汇编
+crash> dis -l 0xffffffff81234567  # 按地址
+
+# 符号查找
+crash> sym schedule       # 查找符号地址
+crash> sym ffffffff81234567  # 地址转符号
+
+# 查看数据结构
+crash> struct task_struct 0xffff888012345600
+crash> p init_task        # 打印变量值
+crash> rd -64 0xffffffff81234567 20  # 读取内存（20个64位字）
+```
+
+---
+
+## 15.15 GDB + QEMU 内核调试
+
+### 环境搭建
+
+```bash
+# 编译调试内核（.config 选项）
+# CONFIG_DEBUG_INFO=y
+# CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+# CONFIG_GDB_SCRIPTS=y
+# CONFIG_FRAME_POINTER=y
+# CONFIG_RANDOMIZE_BASE=n  (禁用 KASLR)
+# CONFIG_KASAN=n           (可选，避免干扰)
+
+# 启动 QEMU（暴露 GDB 服务器端口）
+qemu-system-x86_64 \
+    -kernel arch/x86/boot/bzImage \
+    -drive file=rootfs.img,format=raw \
+    -append "root=/dev/sda console=ttyS0 nokaslr debug" \
+    -serial stdio \
+    -m 2G \
+    -smp 4 \
+    -nographic \
+    -s \        # 监听 :1234 GDB 端口
+    -S          # 暂停等待 GDB 连接（可选）
+```
+
+### GDB 连接与调试
+
+```bash
+# 启动 GDB
+gdb vmlinux
+
+# 连接 QEMU
+(gdb) target remote localhost:1234
+
+# 加载内核调试脚本（CONFIG_GDB_SCRIPTS=y 时可用）
+(gdb) lx-symbols         # 加载所有模块符号
+(gdb) lx-version         # 内核版本
+(gdb) lx-ps              # 进程列表（类似 ps）
+(gdb) lx-dmesg           # 内核消息缓冲区
+(gdb) lx-lsmod           # 已加载模块
+
+# 设置断点
+(gdb) break sys_execve
+(gdb) break net/ipv4/tcp.c:tcp_sendmsg
+(gdb) hbreak do_page_fault  # 硬件断点（不修改代码）
+(gdb) watch *(int*)0xffffffff81234567  # 内存监视点
+
+# 继续执行
+(gdb) continue
+(gdb) c
+
+# 单步
+(gdb) step      # 进入函数
+(gdb) next      # 跳过函数
+(gdb) finish    # 执行到函数返回
+(gdb) stepi     # 汇编单步
+
+# 查看信息
+(gdb) info registers             # 寄存器
+(gdb) info breakpoints           # 断点列表
+(gdb) backtrace                  # 调用栈
+(gdb) frame 3                    # 切换到第 3 帧
+(gdb) list                       # 显示源码
+(gdb) print init_task.pid        # 打印变量
+(gdb) print/x $rip               # 打印寄存器（十六进制）
+(gdb) x/10i $rip                 # 查看当前指令
+
+# lx-* 辅助命令
+(gdb) lx-ps                      # 进程列表
+(gdb) lx-task-by-pid 1234        # 按 PID 查找 task
+(gdb) lx-thread-info 0xffff...   # 线程信息
+(gdb) lx-per-cpu current_task 0  # CPU0 的当前任务
+(gdb) lx-list-check init_task tasks  # 遍历进程链表
+
+# 调试模块（加载后）
+(gdb) add-symbol-file /path/to/module.ko 0xffffffffc0123000
+```
+
+---
+
+## 15.16 bpftrace 调试技巧
+
+### 追踪内存分配
+
+```bash
+# 安装
+apt install bpftrace  # 或 dnf install bpftrace
+
+# 1. 统计 kmalloc 调用大小分布
+bpftrace -e '
+kprobe:__kmalloc {
+    @size_hist = hist(arg0);
+}
+interval:s:5 {
+    print(@size_hist);
+    clear(@size_hist);
+}'
+
+# 2. 找出 kmalloc 最多的调用方（前10）
+bpftrace -e '
+kprobe:__kmalloc {
+    @[kstack()] = count();
+}
+END {
+    print(@, 10);
+}'
+
+# 3. 追踪特定大小的分配（检测内存泄漏）
+bpftrace -e '
+kprobe:kmalloc {
+    @allocs[arg0, retval] = count();
+}
+kprobe:kfree {
+    delete(@allocs[0, arg0]);
+}
+interval:s:10 {
+    print(@allocs);
+}'
+
+# 4. slab 分配器热点
+bpftrace -e '
+kprobe:kmem_cache_alloc {
+    @cache[((struct kmem_cache *)arg0)->name] = count();
+}
+END { print(@cache, 20); }'
+```
+
+### 追踪调度延迟
+
+```bash
+# 1. 测量进程唤醒到运行的延迟
+bpftrace -e '
+tracepoint:sched:sched_wakeup,
+tracepoint:sched:sched_wakeup_new {
+    @wakeup_ts[args->pid] = nsecs;
+}
+
+tracepoint:sched:sched_switch {
+    $ts = @wakeup_ts[args->next_pid];
+    if ($ts) {
+        $lat = (nsecs - $ts) / 1000;  /* 转换为微秒 */
+        @latency_us = hist($lat);
+        delete(@wakeup_ts[args->next_pid]);
+    }
+}
+
+END { print(@latency_us); }'
+
+# 2. 找出导致高调度延迟的进程
+bpftrace -e '
+tracepoint:sched:sched_wakeup {
+    @wake[args->comm] = nsecs;
+}
+tracepoint:sched:sched_switch {
+    $ts = @wake[args->next_comm];
+    if ($ts && (nsecs - $ts) > 1000000) {  /* > 1ms */
+        printf("HIGH LAT: %s lat=%dms on CPU%d\n",
+               args->next_comm, (nsecs-$ts)/1000000, cpu);
+    }
+    delete(@wake[args->next_comm]);
+}'
+```
+
+### 追踪文件 IO 延迟
+
+```bash
+# 1. 块设备 IO 延迟分布
+bpftrace -e '
+tracepoint:block:block_rq_issue {
+    @start[args->dev, args->sector] = nsecs;
+}
+tracepoint:block:block_rq_complete {
+    $key = (args->dev, args->sector);
+    $ts = @start[$key];
+    if ($ts) {
+        $lat = (nsecs - $ts) / 1000;
+        @io_lat_us = hist($lat);
+        @io_lat_by_dev[args->dev] = hist($lat);
+        delete(@start[$key]);
+    }
+}
+END {
+    print(@io_lat_us);
+    print(@io_lat_by_dev);
+}'
+
+# 2. 追踪慢 read 系统调用（> 10ms）
+bpftrace -e '
+tracepoint:syscalls:sys_enter_read {
+    @ts[tid] = nsecs;
+    @fd[tid] = args->fd;
+}
+tracepoint:syscalls:sys_exit_read {
+    $ts = @ts[tid];
+    if ($ts && (nsecs - $ts) > 10000000) {
+        printf("SLOW READ: pid=%d comm=%s fd=%d lat=%dms count=%d\n",
+               pid, comm, @fd[tid],
+               (nsecs - $ts) / 1000000,
+               args->ret);
+    }
+    delete(@ts[tid]);
+    delete(@fd[tid]);
+}'
+
+# 3. ext4 层延迟
+bpftrace -e '
+kprobe:ext4_file_read_iter { @[tid] = nsecs; }
+kretprobe:ext4_file_read_iter {
+    $lat = (nsecs - @[tid]) / 1000;
+    if ($lat > 1000) {
+        printf("ext4 slow read: %s lat=%dµs\n", comm, $lat);
+    }
+    @ext4_lat = hist($lat);
+    delete(@[tid]);
+}
+END { print(@ext4_lat); }'
+```
+
+---
+
+## 15.17 livepatch
+
+### 热补丁原理
+
+```c
+/* kernel/livepatch/ — 无需重启修复内核 bug */
+
+/* livepatch 流程：
+ * 1. 编写补丁模块（替换有 bug 的函数）
+ * 2. insmod 补丁模块
+ * 3. 内核修改函数入口：添加 trampoline 跳转到新函数
+ * 4. 一致性模型确保安全切换（所有 CPU 退出旧函数后才生效）
+ */
+
+/* 补丁模块示例 */
+#include <linux/livepatch.h>
+
+/* 替换函数（新实现）*/
+static int livepatch_cmdline_proc_show(struct seq_file *m, void *v)
+{
+    seq_printf(m, "%s\n", saved_command_line);
+    /* 修复：添加了换行符 */
+    return 0;
+}
+
+/* 描述要替换的函数 */
+static struct klp_func funcs[] = {
+    {
+        .old_name = "cmdline_proc_show",     /* 旧函数名 */
+        .new_func = livepatch_cmdline_proc_show,  /* 新函数 */
+    }, { }
+};
+
+/* 描述包含该函数的对象 */
+static struct klp_object objs[] = {
+    {
+        /* .name = NULL 表示 vmlinux 本体 */
+        .funcs = funcs,
+    }, { }
+};
+
+/* 补丁描述 */
+static struct klp_patch patch = {
+    .mod = THIS_MODULE,
+    .objs = objs,
+};
+
+static int livepatch_init(void)
+{
+    return klp_enable_patch(&patch);
+}
+
+static void livepatch_exit(void)
+{
+    /* livepatch 不支持卸载（安全原因），但可以禁用 */
+}
+
+module_init(livepatch_init);
+module_exit(livepatch_exit);
+MODULE_INFO(livepatch, "Y");
+MODULE_LICENSE("GPL");
+```
+
+### livepatch 管理
+
+```bash
+# 内核配置
+# CONFIG_LIVEPATCH=y
+
+# 加载补丁
+insmod my_livepatch.ko
+
+# 查看补丁状态
+cat /sys/kernel/livepatch/my_livepatch/enabled
+# 1 = 已启用
+
+# 查看过渡状态（等待一致性）
+cat /sys/kernel/livepatch/my_livepatch/transition
+# 1 = 正在过渡中
+# 0 = 完成
+
+# 禁用补丁（回滚到原始函数）
+echo 0 > /sys/kernel/livepatch/my_livepatch/enabled
+
+# 确认状态
+ls /sys/kernel/livepatch/
+```
+
+---
+
+## 15.18 性能优化清单
+
+### CPU 性能分析
+
+```bash
+# 1. 检查 CPU 使用率和负载
+top -b -n1 | head -20
+mpstat -P ALL 1 5          # 每秒每核统计
+pidstat -u -p ALL 1 5      # 进程级 CPU
+
+# 2. CPU 调度延迟（运行队列长度）
+vmstat 1 10                # r 列 = 运行队列
+sar -q 1 10               # 队列和负载
+
+# 3. 中断分布均衡
+watch -n1 'cat /proc/interrupts | sort -k2 -rn | head -10'
+
+# 4. CPU 缓存命中率
+perf stat -e cache-misses,cache-references -a sleep 5
+# miss rate > 5% 需要优化内存访问局部性
+
+# 5. CPU 频率和节流
+cpupower frequency-info
+cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_cur_freq
+# 频率低 = 可能被热节流
+cat /sys/class/thermal/thermal_zone*/temp  # 温度
+
+# 优化建议：
+# - 设置 CPU 调速器为 performance：
+#   cpupower frequency-set -g performance
+# - 禁用 C-state 深睡眠（低延迟场景）：
+#   cpupower idle-set -D 1
+# - NUMA 绑定：numactl --cpunodebind=0 --membind=0 ./app
+```
+
+### 内存性能分析
+
+```bash
+# 1. 内存使用概况
+free -h
+cat /proc/meminfo | grep -E "MemTotal|MemFree|Cached|Dirty|Writeback"
+
+# 2. 内存分配器统计
+cat /proc/slabinfo | sort -k3 -rn | head -20  # 最大的 slab
+slabtop                                         # 实时 slab 视图
+
+# 3. 页缓存压力
+cat /proc/vmstat | grep -E "pgmajfault|pgfault|pswpin|pswpout"
+# pgmajfault 高 = 大量缺页（可能 swap 抖动）
+
+# 4. NUMA 统计
+numastat -c
+numastat -m | head -20     # 内存使用
+# node_miss 高 = 大量跨节点访问，考虑 NUMA 优化
+
+# 5. 内存带宽（Intel MLC 或 stream）
+stream                     # 标准内存带宽测试
+
+# 优化建议：
+# - 大页：echo always > /sys/kernel/mm/transparent_hugepage/enabled
+# - NUMA 感知分配：numactl, libnuma
+# - 减少 dirty 页积压：
+#   sysctl vm.dirty_ratio=5
+#   sysctl vm.dirty_background_ratio=2
+```
+
+### IO 性能分析
+
+```bash
+# 1. 磁盘 IO 统计
+iostat -x 1 10
+# await 列：平均 IO 等待时间（ms）
+# %util 列：设备利用率
+# r_await/w_await：读写分别的等待时间
+
+# 2. IO 延迟分布（bpftrace）
+bpftrace -e '
+tracepoint:block:block_rq_complete {
+    @[args->rwbs] = hist(args->nr_sector * 512);
+}' &
+sleep 10; kill %1
+
+# 3. IO 调度器查看
+cat /sys/block/sda/queue/scheduler
+# [mq-deadline] kyber bfq none
+
+# 4. 文件系统缓存命中率
+bpftrace -e '
+kprobe:__do_page_cache_readahead { @readahead = count(); }
+kprobe:mark_page_accessed { @hits = count(); }
+interval:s:5 {
+    print(@readahead);
+    print(@hits);
+    clear(@readahead);
+    clear(@hits);
+}'
+
+# 优化建议：
+# - SSD 使用 none 或 mq-deadline 调度器：
+#   echo mq-deadline > /sys/block/nvme0n1/queue/scheduler
+# - 调整 readahead：
+#   blockdev --setra 2048 /dev/sda  # 1MB readahead
+# - 挂载选项：noatime,nodiratime,data=writeback（ext4）
+# - io_uring 代替 epoll（高并发 IO）
+```
+
+### 网络性能分析
+
+```bash
+# 1. 网络吞吐和错误
+sar -n DEV 1 10
+ip -s link show eth0
+ethtool -S eth0 | grep -E "error|drop|miss"
+
+# 2. TCP 统计
+ss -s                          # socket 摘要
+netstat -s | grep -i "retransmit\|error\|fail"
+cat /proc/net/netstat | \
+    awk 'NR%2==0{for(i=1;i<=NF;i++) printf "%-30s %s\n", h[i], $i}
+         NR%2==1{for(i=1;i<=NF;i++) h[i]=$i}' | \
+    grep -E "Retrans|TCPLost|TCPSpuriousRTOs"
+
+# 3. 网络中断分配
+cat /proc/interrupts | grep eth
+
+# 4. 软中断接收速率
+watch -n1 'cat /proc/softirqs | grep NET'
+
+# 优化建议：
+# - 网卡多队列 + CPU 亲和性绑定：
+#   ethtool -L eth0 combined 8
+#   for i in $(seq 0 7); do
+#     echo $i > /proc/irq/$(cat /sys/class/net/eth0/queues/rx-$i/rps_cpus)/smp_affinity_list
+#   done
+# - 增大 socket 缓冲区：
+#   sysctl net.core.rmem_max=134217728
+#   sysctl net.core.wmem_max=134217728
+#   sysctl net.ipv4.tcp_rmem="4096 87380 134217728"
+# - 启用 GRO/GSO：
+#   ethtool -K eth0 gro on gso on tso on
+# - 增大 backlog：
+#   sysctl net.core.somaxconn=65535
+#   sysctl net.ipv4.tcp_max_syn_backlog=65535
+# - 减少 TIME_WAIT：
+#   sysctl net.ipv4.tcp_tw_reuse=1
+#   sysctl net.ipv4.tcp_fin_timeout=15
+```
+
+### 综合性能快速检查脚本
+
+```bash
+#!/bin/bash
+# 快速性能快照
+
+echo "=== CPU ===" 
+uptime
+mpstat -P ALL 1 1 | tail -5
+
+echo "=== 内存 ==="
+free -h
+cat /proc/meminfo | grep -E "HugePages|Dirty|Writeback" 
+
+echo "=== IO ==="
+iostat -x 1 1 | tail -10
+
+echo "=== 网络 ==="
+ss -s
+ip -s link | grep -A4 "eth0\|ens\|enp"
+
+echo "=== 进程 TOP5 CPU ==="
+ps aux --sort=-%cpu | head -6
+
+echo "=== 进程 TOP5 内存 ==="
+ps aux --sort=-%mem | head -6
+
+echo "=== 最近内核错误 ==="
+dmesg -T | grep -E "error|BUG|WARN|OOM" | tail -10
+
+echo "=== 系统调用热点（5秒）==="
+perf stat -e 'syscalls:sys_enter_*' -a sleep 5 2>&1 | \
+    grep -v "0 " | sort -k1 -rn | head -10
+```
+
+---
+
+## 参考资料
+
+| 资源 | 链接/位置 |
+|------|----------|
+| perf wiki | `https://perf.wiki.kernel.org/` |
+| FlameGraph | `https://github.com/brendangregg/FlameGraph` |
+| bpftrace 参考 | `https://github.com/iovisor/bpftrace/blob/master/docs/reference_guide.md` |
+| KASAN 文档 | `Documentation/dev-tools/kasan.rst` |
+| lockdep 设计 | `Documentation/locking/lockdep-design.rst` |
+| ftrace 文档 | `Documentation/trace/ftrace.rst` |
+| kdump 指南 | `Documentation/admin-guide/kdump/kdump.rst` |
+| Brendan Gregg 博客 | `http://www.brendangregg.com/` |
+| Linux 性能 | `http://www.brendangregg.com/linuxperf.html` |
+
+```bash
+# 调试工具快速安装
+apt install -y \
+    linux-tools-$(uname -r) \   # perf
+    trace-cmd \                  # ftrace 前端
+    bpftrace \                   # bpftrace
+    bpfcc-tools \                # BCC tools
+    crash \                      # kernel crash analyzer
+    gdb \                        # 调试器
+    rt-tests \                   # cyclictest
+    sysstat \                    # iostat/mpstat/sar
+    numactl                      # NUMA 工具
+```
diff --git a/assets/diagrams/boot-flow.svg b/assets/diagrams/boot-flow.svg
new file mode 100644
index 0000000..711c01d
--- /dev/null
+++ b/assets/diagrams/boot-flow.svg
@@ -0,0 +1,111 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 900 460" font-family="-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif" font-size="12">
+  <rect width="900" height="460" fill="#0d1117"/>
+  <text x="450" y="30" text-anchor="middle" font-size="18" font-weight="700" fill="#ff7b29">Linux 启动流程 — 从按下电源到 Shell 提示符</text>
+
+  <defs>
+    <marker id="a" markerWidth="8" markerHeight="8" refX="6" refY="3" orient="auto">
+      <path d="M0,0 L0,6 L8,3 z" fill="#ff7b29"/>
+    </marker>
+  </defs>
+
+  <!-- Stage boxes -->
+  <!-- 1 BIOS/UEFI -->
+  <rect x="20" y="55" width="120" height="80" rx="6" fill="#1a2028" stroke="#f85149" stroke-width="2"/>
+  <text x="80" y="80" text-anchor="middle" fill="#f85149" font-weight="700">① BIOS/UEFI</text>
+  <text x="80" y="97" text-anchor="middle" fill="#8b949e" font-size="10">POST 自检</text>
+  <text x="80" y="112" text-anchor="middle" fill="#8b949e" font-size="10">MBR/GPT 读取</text>
+  <text x="80" y="124" text-anchor="middle" fill="#8b949e" font-size="10">EFI stub 支持</text>
+
+  <line x1="140" y1="95" x2="170" y2="95" stroke="#ff7b29" stroke-width="2" marker-end="url(#a)"/>
+
+  <!-- 2 Bootloader -->
+  <rect x="170" y="55" width="130" height="80" rx="6" fill="#1a2028" stroke="#e3b341" stroke-width="2"/>
+  <text x="235" y="80" text-anchor="middle" fill="#e3b341" font-weight="700">② Bootloader</text>
+  <text x="235" y="97" text-anchor="middle" fill="#8b949e" font-size="10">GRUB2 / systemd-boot</text>
+  <text x="235" y="112" text-anchor="middle" fill="#8b949e" font-size="10">选择内核版本</text>
+  <text x="235" y="124" text-anchor="middle" fill="#8b949e" font-size="10">加载 vmlinuz + initrd</text>
+
+  <line x1="300" y1="95" x2="330" y2="95" stroke="#ff7b29" stroke-width="2" marker-end="url(#a)"/>
+
+  <!-- 3 Kernel decompress -->
+  <rect x="330" y="55" width="130" height="80" rx="6" fill="#1a2028" stroke="#56d364" stroke-width="2"/>
+  <text x="395" y="75" text-anchor="middle" fill="#56d364" font-weight="700">③ 内核解压</text>
+  <text x="395" y="92" text-anchor="middle" fill="#8b949e" font-size="10">arch/x86/boot/</text>
+  <text x="395" y="107" text-anchor="middle" fill="#8b949e" font-size="10">header.S → main.c</text>
+  <text x="395" y="122" text-anchor="middle" fill="#8b949e" font-size="10">KASLR 随机地址</text>
+
+  <line x1="460" y1="95" x2="490" y2="95" stroke="#ff7b29" stroke-width="2" marker-end="url(#a)"/>
+
+  <!-- 4 head.S -->
+  <rect x="490" y="55" width="130" height="80" rx="6" fill="#1a2028" stroke="#58a6ff" stroke-width="2"/>
+  <text x="555" y="75" text-anchor="middle" fill="#58a6ff" font-weight="700">④ head.S</text>
+  <text x="555" y="92" text-anchor="middle" fill="#8b949e" font-size="10">建立 4 级页表</text>
+  <text x="555" y="107" text-anchor="middle" fill="#8b949e" font-size="10">跳入 64-bit 模式</text>
+  <text x="555" y="122" text-anchor="middle" fill="#8b949e" font-size="10">初始化 IDT/GDT</text>
+
+  <line x1="620" y1="95" x2="650" y2="95" stroke="#ff7b29" stroke-width="2" marker-end="url(#a)"/>
+
+  <!-- 5 start_kernel -->
+  <rect x="650" y="55" width="140" height="80" rx="6" fill="#1a2028" stroke="#8957e5" stroke-width="2"/>
+  <text x="720" y="75" text-anchor="middle" fill="#8957e5" font-weight="700">⑤ start_kernel()</text>
+  <text x="720" y="92" text-anchor="middle" fill="#8b949e" font-size="10">setup_arch()</text>
+  <text x="720" y="107" text-anchor="middle" fill="#8b949e" font-size="10">mm_init(), sched_init()</text>
+  <text x="720" y="122" text-anchor="middle" fill="#8b949e" font-size="10">rest_init() →</text>
+
+  <!-- start_kernel sub steps -->
+  <rect x="30" y="175" width="840" height="110" rx="8" fill="#161b22" stroke="#30363d"/>
+  <text x="450" y="198" text-anchor="middle" font-size="11" font-weight="600" fill="#8b949e">start_kernel() 关键调用序列</text>
+
+  <rect x="50" y="208" width="110" height="50" rx="4" fill="#0d1117" stroke="#56d364"/>
+  <text x="105" y="228" text-anchor="middle" fill="#56d364" font-size="10">setup_arch()</text>
+  <text x="105" y="243" text-anchor="middle" fill="#8b949e" font-size="9">ACPI/DMI/内存探测</text>
+
+  <rect x="170" y="208" width="110" height="50" rx="4" fill="#0d1117" stroke="#56d364"/>
+  <text x="225" y="228" text-anchor="middle" fill="#56d364" font-size="10">mm_init()</text>
+  <text x="225" y="243" text-anchor="middle" fill="#8b949e" font-size="9">buddy/slab 初始化</text>
+
+  <rect x="290" y="208" width="110" height="50" rx="4" fill="#0d1117" stroke="#56d364"/>
+  <text x="345" y="228" text-anchor="middle" fill="#56d364" font-size="10">sched_init()</text>
+  <text x="345" y="243" text-anchor="middle" fill="#8b949e" font-size="9">per-CPU runqueue</text>
+
+  <rect x="410" y="208" width="110" height="50" rx="4" fill="#0d1117" stroke="#56d364"/>
+  <text x="465" y="228" text-anchor="middle" fill="#56d364" font-size="10">vfs_caches_init()</text>
+  <text x="465" y="243" text-anchor="middle" fill="#8b949e" font-size="9">dcache/inode cache</text>
+
+  <rect x="530" y="208" width="110" height="50" rx="4" fill="#0d1117" stroke="#56d364"/>
+  <text x="585" y="228" text-anchor="middle" fill="#56d364" font-size="10">signals_init()</text>
+  <text x="585" y="243" text-anchor="middle" fill="#8b949e" font-size="9">信号队列</text>
+
+  <rect x="650" y="208" width="110" height="50" rx="4" fill="#0d1117" stroke="#e3b341"/>
+  <text x="705" y="228" text-anchor="middle" fill="#e3b341" font-size="10">rest_init()</text>
+  <text x="705" y="243" text-anchor="middle" fill="#8b949e" font-size="9">创建 PID-1/kthreadd</text>
+
+  <!-- arrows between sub-steps -->
+  <line x1="160" y1="233" x2="170" y2="233" stroke="#30363d" stroke-width="1" marker-end="url(#a)"/>
+  <line x1="280" y1="233" x2="290" y2="233" stroke="#30363d" stroke-width="1" marker-end="url(#a)"/>
+  <line x1="400" y1="233" x2="410" y2="233" stroke="#30363d" stroke-width="1" marker-end="url(#a)"/>
+  <line x1="520" y1="233" x2="530" y2="233" stroke="#30363d" stroke-width="1" marker-end="url(#a)"/>
+  <line x1="640" y1="233" x2="650" y2="233" stroke="#30363d" stroke-width="1" marker-end="url(#a)"/>
+
+  <!-- rest_init detail -->
+  <line x1="450" y1="285" x2="450" y2="315" stroke="#ff7b29" stroke-width="2" marker-end="url(#a)"/>
+
+  <rect x="30" y="315" width="840" height="80" rx="8" fill="#161b22" stroke="#30363d"/>
+  <text x="450" y="338" text-anchor="middle" font-size="11" font-weight="600" fill="#8b949e">rest_init() — 诞生前三个进程</text>
+
+  <rect x="60" y="348" width="160" height="35" rx="4" fill="#0d1117" stroke="#f85149"/>
+  <text x="140" y="362" text-anchor="middle" fill="#f85149" font-weight="600">PID 0: swapper/idle</text>
+  <text x="140" y="376" text-anchor="middle" fill="#8b949e" font-size="9">rest_init 本身变成 idle</text>
+
+  <rect x="260" y="348" width="200" height="35" rx="4" fill="#0d1117" stroke="#56d364"/>
+  <text x="360" y="362" text-anchor="middle" fill="#56d364" font-weight="600">PID 1: init (systemd)</text>
+  <text x="360" y="376" text-anchor="middle" fill="#8b949e" font-size="9">kernel_thread(kernel_init) → execve(/sbin/init)</text>
+
+  <rect x="510" y="348" width="200" height="35" rx="4" fill="#0d1117" stroke="#58a6ff"/>
+  <text x="610" y="362" text-anchor="middle" fill="#58a6ff" font-weight="600">PID 2: kthreadd</text>
+  <text x="610" y="376" text-anchor="middle" fill="#8b949e" font-size="9">所有内核线程的父进程</text>
+
+  <!-- systemd chain -->
+  <rect x="30" y="415" width="840" height="35" rx="6" fill="#1a2028" stroke="#30363d"/>
+  <text x="450" y="432" text-anchor="middle" fill="#8b949e" font-size="10">systemd → 读 unit 文件 → 并行启动服务 → getty → login → bash → 🐧 Shell 提示符</text>
+</svg>
diff --git a/assets/diagrams/driver-model.svg b/assets/diagrams/driver-model.svg
new file mode 100644
index 0000000..1552df3
--- /dev/null
+++ b/assets/diagrams/driver-model.svg
@@ -0,0 +1,90 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 860 420" font-family="-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif" font-size="12">
+  <rect width="860" height="420" fill="#0d1117"/>
+  <text x="430" y="30" text-anchor="middle" font-size="18" font-weight="700" fill="#ff7b29">Linux 设备模型 — kobject 层次</text>
+
+  <defs>
+    <marker id="a" markerWidth="8" markerHeight="8" refX="6" refY="3" orient="auto">
+      <path d="M0,0 L0,6 L8,3 z" fill="#ff7b29"/>
+    </marker>
+  </defs>
+
+  <!-- sysfs tree -->
+  <rect x="30" y="55" width="140" height="50" rx="5" fill="#1a2028" stroke="#f85149" stroke-width="2"/>
+  <text x="100" y="75" text-anchor="middle" fill="#f85149" font-weight="700">kobject</text>
+  <text x="100" y="92" text-anchor="middle" fill="#8b949e" font-size="10">引用计数 + sysfs 节点</text>
+
+  <line x1="100" y1="105" x2="100" y2="130" stroke="#ff7b29" stroke-width="1.5" marker-end="url(#a)"/>
+
+  <rect x="30" y="130" width="140" height="50" rx="5" fill="#1a2028" stroke="#e3b341" stroke-width="1.5"/>
+  <text x="100" y="150" text-anchor="middle" fill="#e3b341" font-weight="700">kset</text>
+  <text x="100" y="165" text-anchor="middle" fill="#8b949e" font-size="10">kobject 的集合/容器</text>
+
+  <line x1="100" y1="180" x2="100" y2="205" stroke="#ff7b29" stroke-width="1.5" marker-end="url(#a)"/>
+
+  <rect x="30" y="205" width="140" height="50" rx="5" fill="#1a2028" stroke="#56d364" stroke-width="1.5"/>
+  <text x="100" y="225" text-anchor="middle" fill="#56d364" font-weight="700">device</text>
+  <text x="100" y="240" text-anchor="middle" fill="#8b949e" font-size="10">嵌入 kobject</text>
+
+  <line x1="100" y1="255" x2="100" y2="280" stroke="#ff7b29" stroke-width="1.5" marker-end="url(#a)"/>
+
+  <rect x="30" y="280" width="140" height="50" rx="5" fill="#1a2028" stroke="#58a6ff" stroke-width="1.5"/>
+  <text x="100" y="300" text-anchor="middle" fill="#58a6ff" font-weight="700">device_driver</text>
+  <text x="100" y="315" text-anchor="middle" fill="#8b949e" font-size="10">probe/remove 操作集</text>
+
+  <!-- sysfs -->
+  <rect x="220" y="55" width="160" height="300" rx="6" fill="#161b22" stroke="#30363d"/>
+  <text x="300" y="78" text-anchor="middle" fill="#8b949e" font-weight="600">/sys 文件系统</text>
+  <text x="300" y="100" text-anchor="middle" fill="#56d364" font-size="10">/sys/bus/</text>
+  <text x="300" y="117" text-anchor="middle" fill="#8b949e" font-size="9">  platform/ pci/ usb/ i2c/</text>
+  <text x="300" y="135" text-anchor="middle" fill="#56d364" font-size="10">/sys/devices/</text>
+  <text x="300" y="152" text-anchor="middle" fill="#8b949e" font-size="9">  物理设备树</text>
+  <text x="300" y="170" text-anchor="middle" fill="#56d364" font-size="10">/sys/class/</text>
+  <text x="300" y="187" text-anchor="middle" fill="#8b949e" font-size="9">  net/ block/ input/</text>
+  <text x="300" y="205" text-anchor="middle" fill="#56d364" font-size="10">/sys/module/</text>
+  <text x="300" y="222" text-anchor="middle" fill="#8b949e" font-size="9">  已加载模块参数</text>
+  <text x="300" y="250" text-anchor="middle" fill="#e3b341" font-size="10">每个目录 = 一个 kobject</text>
+  <text x="300" y="267" text-anchor="middle" fill="#e3b341" font-size="10">每个文件 = 一个 attribute</text>
+
+  <!-- driver lifecycle -->
+  <rect x="410" y="55" width="420" height="300" rx="6" fill="#161b22" stroke="#30363d"/>
+  <text x="620" y="78" text-anchor="middle" fill="#8b949e" font-weight="600">驱动注册与 probe 生命周期</text>
+
+  <rect x="430" y="92" width="160" height="40" rx="4" fill="#1a2028" stroke="#56d364"/>
+  <text x="510" y="108" text-anchor="middle" fill="#56d364" font-size="10">platform_driver_register()</text>
+  <text x="510" y="122" text-anchor="middle" fill="#8b949e" font-size="9">注册到 platform bus</text>
+
+  <line x1="510" y1="132" x2="510" y2="152" stroke="#ff7b29" stroke-width="1.5" marker-end="url(#a)"/>
+
+  <rect x="430" y="152" width="160" height="40" rx="4" fill="#1a2028" stroke="#e3b341"/>
+  <text x="510" y="168" text-anchor="middle" fill="#e3b341" font-size="10">bus match()</text>
+  <text x="510" y="182" text-anchor="middle" fill="#8b949e" font-size="9">名字/of_match 匹配设备</text>
+
+  <line x1="510" y1="192" x2="510" y2="212" stroke="#ff7b29" stroke-width="1.5" marker-end="url(#a)"/>
+
+  <rect x="430" y="212" width="160" height="40" rx="4" fill="#1a2028" stroke="#58a6ff"/>
+  <text x="510" y="228" text-anchor="middle" fill="#58a6ff" font-size="10">driver→probe()</text>
+  <text x="510" y="242" text-anchor="middle" fill="#8b949e" font-size="9">申请资源/初始化硬件</text>
+
+  <line x1="510" y1="252" x2="510" y2="272" stroke="#ff7b29" stroke-width="1.5" marker-end="url(#a)"/>
+
+  <rect x="430" y="272" width="160" height="40" rx="4" fill="#1a2028" stroke="#56d364"/>
+  <text x="510" y="288" text-anchor="middle" fill="#56d364" font-size="10">设备正常运行</text>
+  <text x="510" y="302" text-anchor="middle" fill="#8b949e" font-size="9">register_chrdev / netdev</text>
+
+  <!-- device tree note -->
+  <rect x="610" y="92" width="200" height="220" rx="4" fill="#0d1117" stroke="#30363d"/>
+  <text x="710" y="112" text-anchor="middle" fill="#8957e5" font-weight="600" font-size="11">设备树 (DTS)</text>
+  <text x="710" y="130" text-anchor="middle" fill="#8b949e" font-size="9">ARM/RISC-V 非 ACPI 平台</text>
+  <text x="710" y="148" text-anchor="middle" fill="#56d364" font-size="9">compatible = "vendor,chip"</text>
+  <text x="710" y="165" text-anchor="middle" fill="#8b949e" font-size="9">↕ of_match_table 匹配</text>
+  <text x="710" y="183" text-anchor="middle" fill="#8b949e" font-size="9">of_get_property()</text>
+  <text x="710" y="200" text-anchor="middle" fill="#8b949e" font-size="9">devm_ioremap_resource()</text>
+  <text x="710" y="218" text-anchor="middle" fill="#8b949e" font-size="9">devm_* 自动资源管理</text>
+  <text x="710" y="236" text-anchor="middle" fill="#e3b341" font-size="9">rmmod 时自动释放所有</text>
+  <text x="710" y="253" text-anchor="middle" fill="#e3b341" font-size="9">devm_ 申请的资源</text>
+
+  <!-- /proc/devices legend -->
+  <rect x="30" y="350" width="820" height="50" rx="5" fill="#161b22" stroke="#30363d"/>
+  <text x="430" y="370" text-anchor="middle" fill="#8b949e" font-size="10">关键命令: lsmod | modinfo &lt;mod&gt; | modprobe --show-depends &lt;mod&gt; | udevadm info /dev/sda | ls /sys/bus/platform/drivers/</text>
+  <text x="430" y="390" text-anchor="middle" fill="#8b949e" font-size="10">主/次设备号: ls -l /dev/ → crw(字符)/brw(块); mknod /dev/mydev c 240 0</text>
+</svg>
diff --git a/assets/diagrams/ebpf-arch.svg b/assets/diagrams/ebpf-arch.svg
new file mode 100644
index 0000000..8545035
--- /dev/null
+++ b/assets/diagrams/ebpf-arch.svg
@@ -0,0 +1,123 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 900 540" font-family="-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif" font-size="12">
+  <rect width="900" height="540" fill="#0d1117"/>
+  <text x="450" y="32" text-anchor="middle" font-size="18" font-weight="700" fill="#ff7b29">eBPF 完整架构</text>
+
+  <!-- User Space -->
+  <rect x="20" y="50" width="860" height="130" rx="8" fill="#161b22" stroke="#30363d" stroke-width="1"/>
+  <text x="40" y="72" font-size="11" font-weight="600" fill="#8b949e">USER SPACE</text>
+
+  <rect x="40" y="82" width="130" height="50" rx="5" fill="#1f2937" stroke="#56d364" stroke-width="1.5"/>
+  <text x="105" y="103" text-anchor="middle" fill="#56d364" font-weight="600">BCC / bpftrace</text>
+  <text x="105" y="120" text-anchor="middle" fill="#8b949e" font-size="10">高级前端工具</text>
+
+  <rect x="190" y="82" width="130" height="50" rx="5" fill="#1f2937" stroke="#56d364" stroke-width="1.5"/>
+  <text x="255" y="103" text-anchor="middle" fill="#56d364" font-weight="600">libbpf / CO-RE</text>
+  <text x="255" y="120" text-anchor="middle" fill="#8b949e" font-size="10">可移植加载库</text>
+
+  <rect x="340" y="82" width="130" height="50" rx="5" fill="#1f2937" stroke="#56d364" stroke-width="1.5"/>
+  <text x="405" y="103" text-anchor="middle" fill="#56d364" font-weight="600">clang / LLVM</text>
+  <text x="405" y="120" text-anchor="middle" fill="#8b949e" font-size="10">编译 C → BPF 字节码</text>
+
+  <rect x="490" y="82" width="130" height="50" rx="5" fill="#1f2937" stroke="#56d364" stroke-width="1.5"/>
+  <text x="555" y="103" text-anchor="middle" fill="#56d364" font-weight="600">bpftool</text>
+  <text x="555" y="120" text-anchor="middle" fill="#8b949e" font-size="10">程序/Map 管理</text>
+
+  <rect x="640" y="82" width="220" height="50" rx="5" fill="#1f2937" stroke="#56d364" stroke-width="1.5"/>
+  <text x="750" y="103" text-anchor="middle" fill="#56d364" font-weight="600">用户态 Map 读写</text>
+  <text x="750" y="120" text-anchor="middle" fill="#8b949e" font-size="10">bpf(BPF_MAP_LOOKUP_ELEM,...)</text>
+
+  <!-- Arrow into kernel -->
+  <line x1="450" y1="180" x2="450" y2="210" stroke="#ff7b29" stroke-width="2" marker-end="url(#arr)"/>
+  <text x="470" y="200" fill="#ff7b29" font-size="10">bpf() syscall</text>
+
+  <defs>
+    <marker id="arr" markerWidth="8" markerHeight="8" refX="6" refY="3" orient="auto">
+      <path d="M0,0 L0,6 L8,3 z" fill="#ff7b29"/>
+    </marker>
+    <marker id="arrb" markerWidth="8" markerHeight="8" refX="6" refY="3" orient="auto">
+      <path d="M0,0 L0,6 L8,3 z" fill="#58a6ff"/>
+    </marker>
+  </defs>
+
+  <!-- Kernel Space -->
+  <rect x="20" y="215" width="860" height="295" rx="8" fill="#161b22" stroke="#30363d" stroke-width="1"/>
+  <text x="40" y="237" font-size="11" font-weight="600" fill="#8b949e">KERNEL SPACE</text>
+
+  <!-- Verifier -->
+  <rect x="40" y="248" width="180" height="80" rx="6" fill="#1a2028" stroke="#f85149" stroke-width="2"/>
+  <text x="130" y="270" text-anchor="middle" fill="#f85149" font-weight="700" font-size="13">Verifier (验证器)</text>
+  <text x="130" y="288" text-anchor="middle" fill="#8b949e" font-size="10">· 安全性检查</text>
+  <text x="130" y="302" text-anchor="middle" fill="#8b949e" font-size="10">· 循环有界性验证</text>
+  <text x="130" y="316" text-anchor="middle" fill="#8b949e" font-size="10">· 指针访问合法性</text>
+
+  <line x1="220" y1="288" x2="260" y2="288" stroke="#ff7b29" stroke-width="2" marker-end="url(#arr)"/>
+
+  <!-- JIT -->
+  <rect x="260" y="248" width="180" height="80" rx="6" fill="#1a2028" stroke="#e3b341" stroke-width="2"/>
+  <text x="350" y="270" text-anchor="middle" fill="#e3b341" font-weight="700" font-size="13">JIT 编译器</text>
+  <text x="350" y="288" text-anchor="middle" fill="#8b949e" font-size="10">BPF 字节码</text>
+  <text x="350" y="302" text-anchor="middle" fill="#8b949e" font-size="10">↓</text>
+  <text x="350" y="316" text-anchor="middle" fill="#8b949e" font-size="10">本机机器码 (x86/ARM)</text>
+
+  <line x1="440" y1="288" x2="480" y2="288" stroke="#ff7b29" stroke-width="2" marker-end="url(#arr)"/>
+
+  <!-- Maps -->
+  <rect x="480" y="248" width="180" height="80" rx="6" fill="#1a2028" stroke="#58a6ff" stroke-width="2"/>
+  <text x="570" y="270" text-anchor="middle" fill="#58a6ff" font-weight="700" font-size="13">BPF Maps</text>
+  <text x="570" y="288" text-anchor="middle" fill="#8b949e" font-size="10">Hash / Array / RingBuf</text>
+  <text x="570" y="302" text-anchor="middle" fill="#8b949e" font-size="10">PerfEvent / LRU Hash</text>
+  <text x="570" y="316" text-anchor="middle" fill="#8b949e" font-size="10">内核↔用户数据桥梁</text>
+
+  <line x1="660" y1="288" x2="700" y2="288" stroke="#ff7b29" stroke-width="2" marker-end="url(#arr)"/>
+
+  <!-- Helper calls -->
+  <rect x="700" y="248" width="160" height="80" rx="6" fill="#1a2028" stroke="#8957e5" stroke-width="2"/>
+  <text x="780" y="270" text-anchor="middle" fill="#8957e5" font-weight="700" font-size="13">Helper 函数</text>
+  <text x="780" y="288" text-anchor="middle" fill="#8b949e" font-size="10">bpf_map_update_elem</text>
+  <text x="780" y="302" text-anchor="middle" fill="#8b949e" font-size="10">bpf_probe_read_kernel</text>
+  <text x="780" y="316" text-anchor="middle" fill="#8b949e" font-size="10">bpf_get_current_pid_tgid</text>
+
+  <!-- Hook points row -->
+  <text x="450" y="358" text-anchor="middle" font-size="11" font-weight="600" fill="#8b949e">挂载点 (Hook Points)</text>
+
+  <rect x="30" y="368" width="100" height="55" rx="5" fill="#0d1117" stroke="#56d364" stroke-width="1.5"/>
+  <text x="80" y="388" text-anchor="middle" fill="#56d364" font-weight="600" font-size="11">kprobe/</text>
+  <text x="80" y="403" text-anchor="middle" fill="#56d364" font-weight="600" font-size="11">kretprobe</text>
+  <text x="80" y="416" text-anchor="middle" fill="#8b949e" font-size="9">任意内核函数</text>
+
+  <rect x="140" y="368" width="100" height="55" rx="5" fill="#0d1117" stroke="#56d364" stroke-width="1.5"/>
+  <text x="190" y="388" text-anchor="middle" fill="#56d364" font-weight="600" font-size="11">uprobe/</text>
+  <text x="190" y="403" text-anchor="middle" fill="#56d364" font-weight="600" font-size="11">uretprobe</text>
+  <text x="190" y="416" text-anchor="middle" fill="#8b949e" font-size="9">用户态函数</text>
+
+  <rect x="250" y="368" width="100" height="55" rx="5" fill="#0d1117" stroke="#56d364" stroke-width="1.5"/>
+  <text x="300" y="388" text-anchor="middle" fill="#56d364" font-weight="600" font-size="11">tracepoint</text>
+  <text x="300" y="416" text-anchor="middle" fill="#8b949e" font-size="9">静态预设跟踪点</text>
+
+  <rect x="360" y="368" width="100" height="55" rx="5" fill="#0d1117" stroke="#e3b341" stroke-width="1.5"/>
+  <text x="410" y="388" text-anchor="middle" fill="#e3b341" font-weight="600" font-size="11">XDP</text>
+  <text x="410" y="403" text-anchor="middle" fill="#8b949e" font-size="9">网卡驱动层</text>
+  <text x="410" y="416" text-anchor="middle" fill="#8b949e" font-size="9">最高性能包处理</text>
+
+  <rect x="470" y="368" width="100" height="55" rx="5" fill="#0d1117" stroke="#e3b341" stroke-width="1.5"/>
+  <text x="520" y="388" text-anchor="middle" fill="#e3b341" font-weight="600" font-size="11">TC (traffic</text>
+  <text x="520" y="403" text-anchor="middle" fill="#e3b341" font-weight="600" font-size="11">control)</text>
+  <text x="520" y="416" text-anchor="middle" fill="#8b949e" font-size="9">网络层流量控制</text>
+
+  <rect x="580" y="368" width="100" height="55" rx="5" fill="#0d1117" stroke="#58a6ff" stroke-width="1.5"/>
+  <text x="630" y="388" text-anchor="middle" fill="#58a6ff" font-weight="600" font-size="11">socket filter</text>
+  <text x="630" y="403" text-anchor="middle" fill="#8b949e" font-size="9">套接字级过滤</text>
+  <text x="630" y="416" text-anchor="middle" fill="#8b949e" font-size="9">(seccomp 基础)</text>
+
+  <rect x="690" y="368" width="100" height="55" rx="5" fill="#0d1117" stroke="#58a6ff" stroke-width="1.5"/>
+  <text x="740" y="388" text-anchor="middle" fill="#58a6ff" font-weight="600" font-size="11">cgroup</text>
+  <text x="740" y="416" text-anchor="middle" fill="#8b949e" font-size="9">资源控制/策略</text>
+
+  <rect x="800" y="368" width="80" height="55" rx="5" fill="#0d1117" stroke="#8957e5" stroke-width="1.5"/>
+  <text x="840" y="388" text-anchor="middle" fill="#8957e5" font-weight="600" font-size="11">LSM</text>
+  <text x="840" y="403" text-anchor="middle" fill="#8b949e" font-size="9">安全策略</text>
+  <text x="840" y="416" text-anchor="middle" fill="#8b949e" font-size="9">hooks</text>
+
+  <!-- bottom label -->
+  <text x="450" y="508" text-anchor="middle" fill="#8b949e" font-size="11">BPF 程序在 hook 触发时以 JIT 本机速度运行，通过 Map 与用户态双向通信</text>
+</svg>
diff --git a/assets/diagrams/irq-flow.svg b/assets/diagrams/irq-flow.svg
new file mode 100644
index 0000000..d136724
--- /dev/null
+++ b/assets/diagrams/irq-flow.svg
@@ -0,0 +1,102 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 880 500" font-family="-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif" font-size="12">
+  <rect width="880" height="500" fill="#0d1117"/>
+  <text x="440" y="30" text-anchor="middle" font-size="18" font-weight="700" fill="#ff7b29">Linux 中断处理完整路径</text>
+
+  <defs>
+    <marker id="a" markerWidth="8" markerHeight="8" refX="6" refY="3" orient="auto">
+      <path d="M0,0 L0,6 L8,3 z" fill="#ff7b29"/>
+    </marker>
+    <marker id="ab" markerWidth="8" markerHeight="8" refX="6" refY="3" orient="auto">
+      <path d="M0,0 L0,6 L8,3 z" fill="#58a6ff"/>
+    </marker>
+  </defs>
+
+  <!-- Hardware -->
+  <rect x="30" y="55" width="120" height="55" rx="6" fill="#1a2028" stroke="#f85149" stroke-width="2"/>
+  <text x="90" y="78" text-anchor="middle" fill="#f85149" font-weight="700">硬件设备</text>
+  <text x="90" y="95" text-anchor="middle" fill="#8b949e" font-size="10">NIC / Disk / Timer</text>
+
+  <line x1="150" y1="82" x2="190" y2="82" stroke="#ff7b29" stroke-width="2" marker-end="url(#a)"/>
+  <text x="168" y="76" text-anchor="middle" fill="#ff7b29" font-size="9">IRQ 信号</text>
+
+  <!-- APIC -->
+  <rect x="190" y="55" width="130" height="55" rx="6" fill="#1a2028" stroke="#e3b341" stroke-width="2"/>
+  <text x="255" y="78" text-anchor="middle" fill="#e3b341" font-weight="700">Local APIC</text>
+  <text x="255" y="95" text-anchor="middle" fill="#8b949e" font-size="10">中断优先级仲裁</text>
+
+  <line x1="320" y1="82" x2="360" y2="82" stroke="#ff7b29" stroke-width="2" marker-end="url(#a)"/>
+  <text x="340" y="76" text-anchor="middle" fill="#ff7b29" font-size="9">向量号</text>
+
+  <!-- IDT -->
+  <rect x="360" y="45" width="150" height="75" rx="6" fill="#1a2028" stroke="#56d364" stroke-width="2"/>
+  <text x="435" y="68" text-anchor="middle" fill="#56d364" font-weight="700">IDT 查找</text>
+  <text x="435" y="85" text-anchor="middle" fill="#8b949e" font-size="10">256 个描述符</text>
+  <text x="435" y="100" text-anchor="middle" fill="#8b949e" font-size="10">IDTR → 向量 → 处理函数</text>
+
+  <line x1="510" y1="82" x2="550" y2="82" stroke="#ff7b29" stroke-width="2" marker-end="url(#a)"/>
+
+  <!-- CPU state save -->
+  <rect x="550" y="45" width="160" height="75" rx="6" fill="#1a2028" stroke="#58a6ff" stroke-width="2"/>
+  <text x="630" y="68" text-anchor="middle" fill="#58a6ff" font-weight="700">CPU 自动压栈</text>
+  <text x="630" y="85" text-anchor="middle" fill="#8b949e" font-size="10">SS/RSP/RFLAGS/CS/RIP</text>
+  <text x="630" y="100" text-anchor="middle" fill="#8b949e" font-size="10">切换到中断栈 (IST)</text>
+
+  <line x1="630" y1="120" x2="630" y2="155" stroke="#ff7b29" stroke-width="2" marker-end="url(#a)"/>
+
+  <!-- Top half -->
+  <rect x="470" y="155" width="320" height="80" rx="6" fill="#1a2028" stroke="#e3b341" stroke-width="2"/>
+  <text x="630" y="178" text-anchor="middle" fill="#e3b341" font-weight="700" font-size="13">上半部 (Top Half) — 中断上下文</text>
+  <text x="630" y="195" text-anchor="middle" fill="#8b949e" font-size="10">· 关中断（或 IRQF_SHARED 保持开）执行</text>
+  <text x="630" y="210" text-anchor="middle" fill="#8b949e" font-size="10">· 只做最少工作：读状态寄存器、ACK 硬件、唤醒下半部</text>
+
+  <!-- arrows down to bottom halves -->
+  <line x1="530" y1="235" x2="530" y2="275" stroke="#ff7b29" stroke-width="2" marker-end="url(#a)"/>
+  <line x1="630" y1="235" x2="630" y2="275" stroke="#ff7b29" stroke-width="2" marker-end="url(#a)"/>
+  <line x1="730" y1="235" x2="730" y2="275" stroke="#ff7b29" stroke-width="2" marker-end="url(#a)"/>
+
+  <!-- softirq -->
+  <rect x="410" y="275" width="150" height="70" rx="6" fill="#1a2028" stroke="#f85149" stroke-width="1.5"/>
+  <text x="485" y="298" text-anchor="middle" fill="#f85149" font-weight="700">softirq</text>
+  <text x="485" y="315" text-anchor="middle" fill="#8b949e" font-size="10">TIMER/NET_TX/NET_RX</text>
+  <text x="485" y="330" text-anchor="middle" fill="#8b949e" font-size="10">BLOCK/TASKLET/SCHED</text>
+
+  <!-- tasklet -->
+  <rect x="570" y="275" width="130" height="70" rx="6" fill="#1a2028" stroke="#e3b341" stroke-width="1.5"/>
+  <text x="635" y="298" text-anchor="middle" fill="#e3b341" font-weight="700">tasklet</text>
+  <text x="635" y="315" text-anchor="middle" fill="#8b949e" font-size="10">建立在 softirq 上</text>
+  <text x="635" y="330" text-anchor="middle" fill="#8b949e" font-size="10">不可并发，已弃用</text>
+
+  <!-- workqueue -->
+  <rect x="710" y="275" width="130" height="70" rx="6" fill="#1a2028" stroke="#56d364" stroke-width="1.5"/>
+  <text x="775" y="298" text-anchor="middle" fill="#56d364" font-weight="700">workqueue</text>
+  <text x="775" y="315" text-anchor="middle" fill="#8b949e" font-size="10">内核线程执行</text>
+  <text x="775" y="330" text-anchor="middle" fill="#8b949e" font-size="10">可睡眠/schedule()</text>
+
+  <!-- threaded IRQ box -->
+  <rect x="30" y="155" width="200" height="80" rx="6" fill="#1a2028" stroke="#8957e5" stroke-width="2"/>
+  <text x="130" y="178" text-anchor="middle" fill="#8957e5" font-weight="700" font-size="12">线程化中断 (threaded)</text>
+  <text x="130" y="195" text-anchor="middle" fill="#8b949e" font-size="10">IRQF_ONESHOT 标志</text>
+  <text x="130" y="210" text-anchor="middle" fill="#8b949e" font-size="10">上半部极简，下半部在</text>
+  <text x="130" y="222" text-anchor="middle" fill="#8b949e" font-size="10">irq/N-xxx 内核线程</text>
+
+  <line x1="230" y1="195" x2="280" y2="195" stroke="#8957e5" stroke-width="1.5" stroke-dasharray="5,3" marker-end="url(#ab)"/>
+  <text x="258" y="212" text-anchor="middle" fill="#8957e5" font-size="9">推荐新驱动使用</text>
+
+  <!-- /proc/interrupts -->
+  <rect x="30" y="295" width="370" height="70" rx="6" fill="#1a2028" stroke="#30363d" stroke-width="1"/>
+  <text x="215" y="315" text-anchor="middle" fill="#8b949e" font-weight="600" font-size="11">观测工具</text>
+  <text x="215" y="332" text-anchor="middle" fill="#8b949e" font-size="10">cat /proc/interrupts    — 每核中断计数</text>
+  <text x="215" y="347" text-anchor="middle" fill="#8b949e" font-size="10">watch -n1 cat /proc/softirqs  — softirq 统计</text>
+
+  <!-- ksoftirqd -->
+  <rect x="30" y="385" width="820" height="70" rx="6" fill="#161b22" stroke="#30363d" stroke-width="1"/>
+  <text x="440" y="408" text-anchor="middle" fill="#8b949e" font-weight="600" font-size="11">ksoftirqd/N — 每核 softirq 守护线程</text>
+  <text x="440" y="425" text-anchor="middle" fill="#8b949e" font-size="10">当 softirq 负载过重时由 ksoftirqd 接管执行，避免饿死普通进程</text>
+  <text x="440" y="440" text-anchor="middle" fill="#8b949e" font-size="10">优先级 nice=19，可通过 chrt 调整；NET_RX 最高频触发（万兆网卡下可达 10M/s）</text>
+
+  <!-- IPI note -->
+  <rect x="710" y="155" width="140" height="65" rx="6" fill="#0d1117" stroke="#58a6ff" stroke-width="1.5"/>
+  <text x="780" y="175" text-anchor="middle" fill="#58a6ff" font-weight="600" font-size="11">IPI 核间中断</text>
+  <text x="780" y="192" text-anchor="middle" fill="#8b949e" font-size="9">TLB flush / reschedule</text>
+  <text x="780" y="207" text-anchor="middle" fill="#8b949e" font-size="9">call function / stop CPU</text>
+</svg>
diff --git a/assets/diagrams/sync-map.svg b/assets/diagrams/sync-map.svg
new file mode 100644
index 0000000..2ee0dc7
--- /dev/null
+++ b/assets/diagrams/sync-map.svg
@@ -0,0 +1,116 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 860 480" font-family="-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif" font-size="12">
+  <rect width="860" height="480" fill="#0d1117"/>
+  <text x="430" y="30" text-anchor="middle" font-size="18" font-weight="700" fill="#ff7b29">内核同步机制全景图</text>
+
+  <defs>
+    <marker id="a" markerWidth="8" markerHeight="8" refX="6" refY="3" orient="auto">
+      <path d="M0,0 L0,6 L8,3 z" fill="#8b949e"/>
+    </marker>
+  </defs>
+
+  <!-- Y axis label -->
+  <text x="15" y="250" text-anchor="middle" fill="#8b949e" font-size="10" transform="rotate(-90,15,250)">开销 / 复杂度</text>
+  <!-- X axis label -->
+  <text x="430" y="470" text-anchor="middle" fill="#8b949e" font-size="10">可睡眠性 / 场景</text>
+
+  <!-- Axes -->
+  <line x1="50" y1="50" x2="50" y2="440" stroke="#30363d" stroke-width="1" marker-end="url(#a)"/>
+  <line x1="50" y1="440" x2="840" y2="440" stroke="#30363d" stroke-width="1" marker-end="url(#a)"/>
+
+  <!-- Rows from bottom (simple) to top (complex) -->
+
+  <!-- atomic_t row -->
+  <rect x="60" y="390" width="160" height="42" rx="5" fill="#1a2028" stroke="#56d364" stroke-width="2"/>
+  <text x="140" y="407" text-anchor="middle" fill="#56d364" font-weight="700">atomic_t / atomic64_t</text>
+  <text x="140" y="422" text-anchor="middle" fill="#8b949e" font-size="10">LOCK XADD / CMPXCHG</text>
+
+  <rect x="230" y="390" width="130" height="42" rx="5" fill="#1a2028" stroke="#56d364" stroke-width="1.5"/>
+  <text x="295" y="407" text-anchor="middle" fill="#56d364" font-weight="600">per-CPU 变量</text>
+  <text x="295" y="422" text-anchor="middle" fill="#8b949e" font-size="10">不需要加锁，最快</text>
+
+  <text x="430" y="418" fill="#8b949e" font-size="10">▶ 不阻塞，适合计数器/统计</text>
+
+  <!-- spinlock row -->
+  <rect x="60" y="335" width="160" height="45" rx="5" fill="#1a2028" stroke="#e3b341" stroke-width="2"/>
+  <text x="140" y="353" text-anchor="middle" fill="#e3b341" font-weight="700">spinlock_t</text>
+  <text x="140" y="368" text-anchor="middle" fill="#8b949e" font-size="10">忙等待，禁止抢占</text>
+  <text x="140" y="378" text-anchor="middle" fill="#8b949e" font-size="9">spin_lock_irqsave</text>
+
+  <rect x="230" y="335" width="130" height="45" rx="5" fill="#1a2028" stroke="#e3b341" stroke-width="1.5"/>
+  <text x="295" y="353" text-anchor="middle" fill="#e3b341" font-weight="600">rwlock_t</text>
+  <text x="295" y="368" text-anchor="middle" fill="#8b949e" font-size="10">读多写少场景</text>
+  <text x="295" y="378" text-anchor="middle" fill="#8b949e" font-size="9">读并发，写排他</text>
+
+  <text x="430" y="360" fill="#8b949e" font-size="10">▶ 持有时不可睡眠，适合中断上下文</text>
+
+  <!-- seqlock row -->
+  <rect x="60" y="280" width="160" height="45" rx="5" fill="#1a2028" stroke="#ff7b29" stroke-width="2"/>
+  <text x="140" y="298" text-anchor="middle" fill="#ff7b29" font-weight="700">seqlock_t</text>
+  <text x="140" y="314" text-anchor="middle" fill="#8b949e" font-size="10">写优先；读用序号检测</text>
+  <text x="140" y="326" text-anchor="middle" fill="#8b949e" font-size="9">时钟读取 jiffies/ktime</text>
+
+  <rect x="230" y="280" width="130" height="45" rx="5" fill="#1a2028" stroke="#ff7b29" stroke-width="1.5"/>
+  <text x="295" y="298" text-anchor="middle" fill="#ff7b29" font-weight="600">RCU</text>
+  <text x="295" y="314" text-anchor="middle" fill="#8b949e" font-size="10">读完全无锁</text>
+  <text x="295" y="326" text-anchor="middle" fill="#8b949e" font-size="9">netdev/路由表/task list</text>
+
+  <text x="430" y="307" fill="#8b949e" font-size="10">▶ 读密集型，写很少；读者免锁性能极优</text>
+
+  <!-- mutex row -->
+  <rect x="60" y="220" width="160" height="50" rx="5" fill="#1a2028" stroke="#58a6ff" stroke-width="2"/>
+  <text x="140" y="240" text-anchor="middle" fill="#58a6ff" font-weight="700">mutex_t</text>
+  <text x="140" y="256" text-anchor="middle" fill="#8b949e" font-size="10">可睡眠互斥，进程上下文</text>
+  <text x="140" y="268" text-anchor="middle" fill="#8b949e" font-size="9">mutex_lock / mutex_unlock</text>
+
+  <rect x="230" y="220" width="130" height="50" rx="5" fill="#1a2028" stroke="#58a6ff" stroke-width="1.5"/>
+  <text x="295" y="240" text-anchor="middle" fill="#58a6ff" font-weight="600">rt_mutex</text>
+  <text x="295" y="256" text-anchor="middle" fill="#8b949e" font-size="10">含优先级继承</text>
+  <text x="295" y="268" text-anchor="middle" fill="#8b949e" font-size="9">防优先级反转</text>
+
+  <text x="430" y="248" fill="#8b949e" font-size="10">▶ 持有时可睡眠，仅限进程上下文</text>
+
+  <!-- semaphore / rwsem row -->
+  <rect x="60" y="160" width="160" height="50" rx="5" fill="#1a2028" stroke="#8957e5" stroke-width="2"/>
+  <text x="140" y="180" text-anchor="middle" fill="#8957e5" font-weight="700">semaphore</text>
+  <text x="140" y="196" text-anchor="middle" fill="#8b949e" font-size="10">计数信号量（可>1）</text>
+  <text x="140" y="208" text-anchor="middle" fill="#8b949e" font-size="9">down()/up() 可睡眠</text>
+
+  <rect x="230" y="160" width="130" height="50" rx="5" fill="#1a2028" stroke="#8957e5" stroke-width="1.5"/>
+  <text x="295" y="180" text-anchor="middle" fill="#8957e5" font-weight="600">rw_semaphore</text>
+  <text x="295" y="196" text-anchor="middle" fill="#8b949e" font-size="10">读写信号量</text>
+  <text x="295" y="208" text-anchor="middle" fill="#8b949e" font-size="9">down_read/down_write</text>
+
+  <text x="430" y="188" fill="#8b949e" font-size="10">▶ 少用；mutex 优先；信号量更通用</text>
+
+  <!-- futex -->
+  <rect x="60" y="100" width="160" height="50" rx="5" fill="#1a2028" stroke="#f85149" stroke-width="2"/>
+  <text x="140" y="120" text-anchor="middle" fill="#f85149" font-weight="700">futex</text>
+  <text x="140" y="137" text-anchor="middle" fill="#8b949e" font-size="10">用户态快路径，无争用</text>
+  <text x="140" y="149" text-anchor="middle" fill="#8b949e" font-size="9">时无系统调用；glibc</text>
+
+  <rect x="230" y="100" width="130" height="50" rx="5" fill="#1a2028" stroke="#f85149" stroke-width="1.5"/>
+  <text x="295" y="120" text-anchor="middle" fill="#f85149" font-weight="600">completion</text>
+  <text x="295" y="137" text-anchor="middle" fill="#8b949e" font-size="10">等待单次事件完成</text>
+  <text x="295" y="149" text-anchor="middle" fill="#8b949e" font-size="9">wait_for_completion()</text>
+
+  <text x="430" y="128" fill="#8b949e" font-size="10">▶ futex: pthread_mutex/cond 底层；completion: 驱动常用</text>
+
+  <!-- lockdep note -->
+  <rect x="580" y="55" width="270" height="380" rx="6" fill="#161b22" stroke="#30363d"/>
+  <text x="715" y="78" text-anchor="middle" fill="#e3b341" font-weight="600" font-size="11">🔍 lockdep — 死锁检测</text>
+  <text x="715" y="98" text-anchor="middle" fill="#8b949e" font-size="10">CONFIG_PROVE_LOCKING</text>
+  <text x="715" y="115" text-anchor="middle" fill="#8b949e" font-size="10">运行时追踪锁获取顺序</text>
+  <text x="715" y="132" text-anchor="middle" fill="#8b949e" font-size="10">检测潜在的 ABBA 死锁</text>
+  <line x1="620" y1="145" x2="830" y2="145" stroke="#30363d" stroke-width="1"/>
+  <text x="715" y="165" text-anchor="middle" fill="#8b949e" font-size="10">内存屏障选择:</text>
+  <text x="715" y="183" text-anchor="middle" fill="#56d364" font-size="10">smp_rmb() / smp_wmb()</text>
+  <text x="715" y="200" text-anchor="middle" fill="#8b949e" font-size="10">smp_mb() / smp_store_mb()</text>
+  <text x="715" y="217" text-anchor="middle" fill="#8b949e" font-size="10">READ_ONCE() / WRITE_ONCE()</text>
+  <line x1="620" y1="228" x2="830" y2="228" stroke="#30363d" stroke-width="1"/>
+  <text x="715" y="248" text-anchor="middle" fill="#8b949e" font-size="10">选择决策树:</text>
+  <text x="715" y="267" text-anchor="middle" fill="#56d364" font-size="10">中断上下文? → spinlock</text>
+  <text x="715" y="285" text-anchor="middle" fill="#56d364" font-size="10">读多写少? → RCU/rwlock</text>
+  <text x="715" y="303" text-anchor="middle" fill="#56d364" font-size="10">计数器? → atomic/percpu</text>
+  <text x="715" y="321" text-anchor="middle" fill="#56d364" font-size="10">进程上下文睡眠? → mutex</text>
+  <text x="715" y="339" text-anchor="middle" fill="#56d364" font-size="10">用户态? → futex/pthread</text>
+</svg>

From b1cd757f6e6f4a17e840f67041554fedf4dba474 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 20 May 2026 12:17:30 +0000
Subject: [PATCH 08/10] docs: fix review comments in ch14 and ch15

- ch14 KASLR: expand entropy sources to include RDRAND/RDSEED and
  cite arch/x86/boot/compressed/kaslr.c for randomization location
- ch15 KASAN: clarify CONFIG_KASAN_GENERIC and CONFIG_KASAN_HW_TAGS
  are mutually exclusive (software vs hardware implementation)

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

Co-authored-by: YYCB <23326150+YYCB@users.noreply.github.com>
---
 .../README.md"                                      | 13 ++++++++-----
 .../README.md"                                      |  6 +++---
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git "a/14-\345\220\257\345\212\250\346\265\201\347\250\213\346\267\261\345\205\245/README.md" "b/14-\345\220\257\345\212\250\346\265\201\347\250\213\346\267\261\345\205\245/README.md"
index 431e16f..14dce2c 100644
--- "a/14-\345\220\257\345\212\250\346\265\201\347\250\213\346\267\261\345\205\245/README.md"
+++ "b/14-\345\220\257\345\212\250\346\265\201\347\250\213\346\267\261\345\205\245/README.md"
@@ -912,11 +912,14 @@ x86-64 随机化范围（4级分页）：
   物理地址：在 64GB 范围内随机选择加载地址
   模块区域：在内核附近 1GB 内随机分配
 
-随机化过程：
-  1. GRUB 传递随机种子（EFI Random Protocol 或 TSC）
-  2. 解压代码选择随机偏移量
-  3. 重定位内核到随机地址
-  4. 更新页表
+随机化过程（arch/x86/boot/compressed/kaslr.c）：
+  1. 熵来源（多源混合）：
+     - EFI Random Protocol（UEFI 固件提供）
+     - RDRAND/RDSEED 指令（Intel/AMD 硬件随机数）
+     - TSC（时间戳计数器，作为补充熵）
+  2. 解压代码（kaslr.c: choose_random_location()）选择随机偏移
+  3. 重定位内核到随机物理地址
+  4. 更新页表映射，跳入随机化后的内核地址
 ```
 
 ### KASLR 与调试
diff --git "a/15-\345\206\205\346\240\270\350\260\203\350\257\225\344\270\216\346\200\247\350\203\275/README.md" "b/15-\345\206\205\346\240\270\350\260\203\350\257\225\344\270\216\346\200\247\350\203\275/README.md"
index 9a4451b..0638a6a 100644
--- "a/15-\345\206\205\346\240\270\350\260\203\350\257\225\344\270\216\346\200\247\350\203\275/README.md"
+++ "b/15-\345\206\205\346\240\270\350\260\203\350\257\225\344\270\216\346\200\247\350\203\275/README.md"
@@ -578,11 +578,11 @@ perf script > after.perf
 ### 配置与原理
 
 ```bash
-# 内核配置
+# 内核配置（GENERIC 与 HW_TAGS 互斥，二选一）
 CONFIG_KASAN=y
 CONFIG_KASAN_GENERIC=y        # 软件实现（所有架构）
-# 或
-CONFIG_KASAN_HW_TAGS=y        # 硬件实现（ARM MTE，低开销）
+# 或（二选一，不可同时启用）
+CONFIG_KASAN_HW_TAGS=y        # 硬件实现（ARM MTE，低开销，需要 ARMv8.5+）
 
 # 开销：
 # - 内存：每8字节对应1字节 shadow（内存×2）

From 36aebd163918e7018c2c976004d991c0774b3ebe Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 20 May 2026 12:29:45 +0000
Subject: [PATCH 09/10] docs: deepen all 9 README files with expert-level
 content and SVG refs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 00-学习路线: add arch-overview.svg ref, 推荐书单与资源 section
  (ULK/LKD/LDD3/Kerrisk books, online resources, video courses),
  20-question expert-level checklist covering all subsystems

- 01-经典版本选择: add arch-overview.svg ref, detailed version comparison
  table (0.11→6.1 LTS) with release year, LoC, and milestone features;
  subsystem evolution timeline (scheduler/mm/IO)

- 02-环境搭建: add KGDB remote debugging (two-machine setup, GDB session
  examples), clangd IDE navigation (compile_commands.json, VS Code/nvim
  config), kernel config tips (defconfig vs tinyconfig, ccache, Kconfig
  debug options), crash tool vmcore analysis workflow

- 03-进程管理: add vm-layout.svg ref, thread_info fields (flags/
  preempt_count/addr_limit), ASCII kernel stack layout diagram,
  do_fork→copy_process→wake_up_new_task 5.x path, __switch_to_asm
  x86_64 assembly, zombie/orphan process lifecycle with wait4() path

- 04-内存管理: add page-table.svg ref, NUMA node/zone/page structure,
  memory zones table (DMA/DMA32/NORMAL/MOVABLE) with ranges, kswapd
  watermark/LRU mechanics, OOM killer scoring algorithm, THP
  (/sys/kernel/mm/transparent_hugepage) and KSM internals

- 05-文件系统: add vfs-objects.svg ref, page cache writeback (dirty_ratio
  params, pdflush), fsync/fdatasync/msync comparison table, io_uring
  SQE/CQE ring buffer with code example, ext4 journal modes
  (journal/ordered/writeback) tradeoff table

- 06-系统调用: add syscall-flow.svg ref, vDSO mechanism (gettimeofday/
  clock_gettime/getcpu acceleration, ~10ns vs ~200ns), seccomp BPF
  filter example with SECCOMP_RET_* codes, step-by-step guide to add
  new syscall in Linux 5.x, 32-bit vs 64-bit syscall number table

- 07-设备驱动: add driver-model.svg ref, MSI/MSI-X vs INTx comparison
  with pci_enable_msix_exact() example, DMA API (dma_alloc_coherent vs
  dma_map_single vs dma_map_sg) with IOMMU explanation, complete devm_*
  function reference list, full platform driver example with DTS binding
  (LED driver with clocks/GPIO/IRQ/devm cleanup)

- 08-网络子系统: add tcp-handshake.svg ref, detailed sk_buff ASCII diagram
  (head/data/tail/end with headroom/tailroom), complete RX path call
  chain (NIC→NAPI→netif_receive_skb→ip_rcv→tcp_v4_rcv), netfilter 5
  hooks table with iptables chain mapping, conntrack states and
  /proc/net/nf_conntrack, XDP return codes with Mpps performance
  numbers and eBPF drop-UDP example, TCP tuning sysctl reference

- 09-同步机制: add sync-map.svg ref, TSO vs weak ordering (ARM) with
  smp_mb/smp_wmb/smp_rmb/smp_load_acquire API, RCU grace period deep
  dive (synchronize_rcu vs call_rcu, list_replace_rcu pattern), RCU
  lock-free list/hashtable examples, futex internals (hash bucket,
  FUTEX_WAIT/WAKE path, pi_futex priority inheritance), per-CPU
  variables (DEFINE_PER_CPU, get_cpu_var, this_cpu_* family)

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

Co-authored-by: YYCB <23326150+YYCB@users.noreply.github.com>
---
 .../README.md"                                |  76 ++++
 .../README.md"                                |  45 +++
 .../README.md"                                | 292 +++++++++++++++
 .../README.md"                                | 207 +++++++++++
 .../README.md"                                | 205 +++++++++++
 .../README.md"                                | 186 ++++++++++
 .../README.md"                                | 227 ++++++++++++
 .../README.md"                                | 297 +++++++++++++++
 .../README.md"                                | 287 +++++++++++++++
 .../README.md"                                | 337 ++++++++++++++++++
 10 files changed, 2159 insertions(+)

diff --git "a/00-\345\255\246\344\271\240\350\267\257\347\272\277/README.md" "b/00-\345\255\246\344\271\240\350\267\257\347\272\277/README.md"
index 5014473..5dd5ea1 100644
--- "a/00-\345\255\246\344\271\240\350\267\257\347\272\277/README.md"
+++ "b/00-\345\255\246\344\271\240\350\267\257\347\272\277/README.md"
@@ -3,6 +3,8 @@
 > 本章给出一条从零基础到能够阅读并修改内核源码的完整路线，
 > 包含各阶段目标、推荐时间、学习方法与检验标准。
 
+![Linux 内核架构总览](../assets/diagrams/arch-overview.svg)
+
 ---
 
 ## 总览：四个阶段
@@ -197,3 +199,77 @@ MODULE_LICENSE("GPL");
 > **核心原则**：不要只读，要 **画图 + 实验**。
 > 每读完一个函数，先画出它操作的数据结构，
 > 再在 QEMU 中用 GDB 跟踪验证自己的理解。
+
+---
+
+## 推荐书单与资源
+
+### 📚 书籍（按学习阶段排序）
+
+| 书名 | 作者 | 定位 | 说明 |
+|------|------|------|------|
+| **Understanding the Linux Kernel** | Bovet & Cesati | 核心参考 | 最全面的内核原理书，覆盖 2.6 版本；建议配合源码阅读 |
+| **Linux Kernel Development** | Robert Love | 入门进阶 | 文字友好，覆盖所有主要子系统，是入门 2.6 的最佳伴侣 |
+| **Linux Device Drivers** | Corbet, Rubini, Kroah-Hartman | 驱动开发 | 驱动开发圣经，**免费在线版**：[lwn.net/Kernel/LDD3](https://lwn.net/Kernel/LDD3/) |
+| **Professional Linux Kernel Architecture** | Wolfgang Mauerer | 深度进阶 | 覆盖面极广，适合有一定基础后系统性补全 |
+| **The Linux Programming Interface** | Michael Kerrisk | 系统编程 | 从用户空间角度理解系统调用，与内核知识互补 |
+| **Linux内核完全注释** | 赵炯 | 中文入门 | 专门针对 Linux 0.11，逐行注释，非常适合初学者 |
+
+### 🌐 在线资源
+
+| 资源 | 链接 | 必读程度 |
+|------|------|---------|
+| **kernel.org 官方文档** | [kernel.org/doc](https://www.kernel.org/doc/html/latest/) | ⭐⭐⭐ 必读 |
+| **LWN.net** | [lwn.net](https://lwn.net) | ⭐⭐⭐ **必读**，内核开发最权威的新闻/技术文章站 |
+| **kernelnewbies.org** | [kernelnewbies.org](https://kernelnewbies.org) | ⭐⭐ 推荐，每个版本的变更摘要极有价值 |
+| **Elixir Cross Referencer** | [elixir.bootlin.com](https://elixir.bootlin.com) | ⭐⭐⭐ 必备，在线代码索引，支持跨版本符号跳转 |
+| **Linux Kernel Map** | [makelinux.github.io/kernel/map](https://makelinux.github.io/kernel/map/) | ⭐⭐ 直观的内核结构可视化地图 |
+| **LKML（内核邮件列表）** | [lkml.org](https://lkml.org) | ⭐⭐ 了解真实开发讨论 |
+
+### 🎬 视频课程
+
+| 课程 | 说明 |
+|------|------|
+| **MIT 6.828 Operating System Engineering** | 最顶级的操作系统课，xv6 实验贯穿全课，配合内核学习效果极佳 |
+| **David Beazley — Python Concurrency From the Ground Up** | 虽然是 Python 演讲，但对并发、GIL、内核调度的讲解极有深度 |
+| **Linux Foundation 培训课程** | LFD420（内核内部原理）适合有基础后系统学习 |
+| **Bootlin 内核培训材料** | [bootlin.com/training](https://bootlin.com/training/) 的 PDF 免费下载，质量极高 |
+
+---
+
+## 检验学习效果的问题清单
+
+> 能独立、准确地回答以下问题，说明你已达到**专家级理解**。
+> 建议每完成一个阶段后，尝试不查资料口述答案。
+
+### 进程管理
+1. `fork()` 返回两次的原理是什么？内核是如何让父子进程分别返回不同值的？（提示：`pt_regs.eax`）
+2. 线程和进程在 Linux 内核中的**本质区别**是什么？`clone()` 的哪些 flag 决定了"线程"？
+3. `task_struct` 中的 `thread_info` 存放在哪里？为什么能用 `esp & ~0x1FFF` 快速找到它？
+4. 僵尸进程（Zombie）是如何产生的？为什么 `wait()` 必须被调用？孤儿进程如何处理？
+
+### 内存管理
+5. x86_64 四级页表的结构是什么？一次虚拟地址翻译需要几次内存访问？TLB 的作用是什么？
+6. 伙伴系统解决什么问题？Slab 分配器又解决什么问题？两者如何协作？
+7. 缺页中断（Page Fault）有哪几种情况？内核分别如何处理：匿名页、文件映射页、写时复制？
+8. OOM Killer 是如何选择"牺牲"进程的？`/proc/PID/oom_score` 的计算方式是什么？
+
+### 文件系统
+9. VFS 的四大核心对象（`super_block`/`inode`/`dentry`/`file`）分别代表什么？它们的生命周期有何不同？
+10. `dcache`（目录项缓存）的作用是什么？路径查找 `/home/user/file` 需要几次磁盘 IO？
+11. `fsync()` 和 `fdatasync()` 的区别是什么？ext4 三种 journal 模式（journal/ordered/writeback）各有什么代价？
+12. `io_uring` 相比传统 `read()`/`write()` 的核心优势是什么？SQE 和 CQE 各代表什么？
+
+### 系统调用
+13. `int 0x80` 和 `sysenter` 两种系统调用机制的开销差异在哪里？vDSO 如何绕过内核态切换？
+14. `seccomp BPF` 的工作原理是什么？容器运行时（如 Docker）如何利用它限制系统调用？
+15. 系统调用返回用户态前，内核会检查哪些"待办事项"？（信号、调度、TIF 标志）
+
+### 网络子系统
+16. `sk_buff` 的 `head/data/tail/end` 四个指针的含义是什么？为什么要有 headroom？
+17. 一个 TCP 数据包从网卡到用户进程 `recv()` 返回，经过了哪些内核函数？（完整调用链）
+18. netfilter 的五个 hook 点分别在网络栈的哪个位置？`iptables` 的 `PREROUTING/INPUT/OUTPUT` 各对应哪个 hook？
+
+### 同步机制
+19. `spin_lock` 和 `mutex` 的本质区别是什么？在中断上下文为什么不能使用 `mutex`？
+20. RCU 的"宽限期"（Grace Period）是如何定义的？`synchronize_rcu()` 和 `call_rcu()` 的区别是什么？在什么场景下选择哪个？
diff --git "a/01-\347\273\217\345\205\270\347\211\210\346\234\254\351\200\211\346\213\251/README.md" "b/01-\347\273\217\345\205\270\347\211\210\346\234\254\351\200\211\346\213\251/README.md"
index 5d7aa06..242226b 100644
--- "a/01-\347\273\217\345\205\270\347\211\210\346\234\254\351\200\211\346\213\251/README.md"
+++ "b/01-\347\273\217\345\205\270\347\211\210\346\234\254\351\200\211\346\213\251/README.md"
@@ -3,6 +3,8 @@
 > 本章对比分析各个"里程碑"内核版本，帮助你选择最适合学习的版本，
 > 并说明每个版本在现代内核中的"基因"贡献。
 
+![Linux 内核架构总览](../assets/diagrams/arch-overview.svg)
+
 ---
 
 ## 版本演进时间线
@@ -221,3 +223,46 @@ git clone https://github.com/karottc/linux-0.11
 
 > **建议**：学习时两个版本都下载，在阅读 0.11 的同时
 > 偶尔对照 2.6.0 的相同机制，理解演进方向。
+
+---
+
+## 内核版本横向对比详表
+
+> 下表从发布年份、代码规模、架构支持和里程碑特性四个维度，
+> 系统梳理各学习版本的"基因贡献"，帮助你建立版本演进的整体认知。
+
+| 版本 | 发布年份 | 代码行数（约）| 架构支持 | 里程碑特性 |
+|------|---------|-------------|---------|-----------|
+| **0.11** | 1991 年 12 月 | 14,000 行 | x86 only | 完整 OS 最小实现：进程/内存/文件/驱动 |
+| **1.0** | 1994 年 3 月 | 170,000 行 | x86, Alpha, SPARC | 首个"正式版"；引入 TCP/IP 网络栈、NFS |
+| **2.4** | 2001 年 1 月 | 3,000,000 行 | 多架构 + SMP | LVM、ext3、netfilter/iptables、USB、完整 SMP |
+| **2.6.0** | 2003 年 12 月 | 6,000,000 行 | 多架构 | O(1) 调度器、kobject/sysfs、NPTL 线程、inotify、RCU 成熟 |
+| **3.10 LTS** | 2013 年 6 月 | 15,000,000 行 | 多架构 + ARM64 | cgroups v1 成熟、KVM 优化、TCP 快速打开（TFO） |
+| **4.19 LTS** | 2018 年 10 月 | 20,000,000 行 | 多架构 | XDP/eBPF 成熟、WireGuard 前身、多队列块层（blk-mq）稳定 |
+| **5.15 LTS** | 2021 年 10 月 | 28,000,000 行 | 多架构 + RISC-V | NTFS3 驱动、io_uring 成熟、Rust 基础设施（编译支持）、PREEMPT_RT 合并 |
+| **6.1 LTS** | 2022 年 12 月 | 30,000,000 行 | 多架构 | **Rust 语言正式支持**（首批 Rust 驱动）、AMD/Intel GPU 大幅更新、KVM arm64 增强 |
+
+### 关键里程碑特性说明
+
+```
+调度器演进：
+  2.4.x  → O(n) 调度器（遍历所有进程，规模差）
+  2.6.0  → O(1) 调度器（两个优先级数组 + 位图，固定开销）
+  2.6.23 → CFS（完全公平调度器，红黑树 + vruntime）[至今使用]
+  5.14   → Core Scheduling（防侧信道攻击的 CPU 核心调度）
+
+内存管理演进：
+  0.11   → 段页式，mem_map[] 字节数组
+  2.6.0  → 伙伴系统 + Slab + NUMA 基础框架
+  2.6.32 → KSM（内核相同页合并）
+  2.6.38 → THP（透明大页）
+  3.10   → zswap（压缩交换缓存）
+  5.x    → maple tree 替代 VMA 红黑树（6.1 正式）
+
+文件 I/O 演进：
+  2.2    → sendfile（零拷贝）
+  2.6.17 → splice
+  4.14   → io_uring 雏形（异步 AIO 改进）
+  5.1    → io_uring 正式引入（SQE/CQE 环形队列）
+  5.6    → io_uring 支持网络操作（send/recv）
+```
diff --git "a/02-\347\216\257\345\242\203\346\220\255\345\273\272/README.md" "b/02-\347\216\257\345\242\203\346\220\255\345\273\272/README.md"
index e0ef1a0..8566c0d 100644
--- "a/02-\347\216\257\345\242\203\346\220\255\345\273\272/README.md"
+++ "b/02-\347\216\257\345\242\203\346\220\255\345\273\272/README.md"
@@ -282,3 +282,295 @@ gdb vmlinux
 
 > **提示**：调试内核时务必加 `nokaslr` 参数（关闭地址随机化），
 > 否则每次启动内核地址不同，GDB 符号表会对不上。
+
+---
+
+## 七、KGDB：真实硬件上的远程内核调试
+
+QEMU 内置 GDB stub，但在真实硬件上需要使用 **KGDB**（内核内置的 GDB 代理）。
+
+### 7.1 编译时开启 KGDB
+
+```bash
+make menuconfig
+# 进入：Kernel hacking → Kernel debugging
+#   [*] KGDB: kernel debugger
+#   [*] KGDB: use kgdb over the serial console (kgdboc)
+#   [*] KGDB: internal test suite
+```
+
+### 7.2 通过串口连接（两台机器）
+
+```bash
+# 被调试机（target）：在启动参数中加入
+# /boot/grub/grub.cfg 或 /etc/default/grub：
+GRUB_CMDLINE_LINUX="kgdboc=ttyS0,115200 kgdbwait"
+# kgdbwait：启动时立即进入 KGDB 等待状态
+
+# 触发断点（在运行中的内核）：
+echo g > /proc/sysrq-trigger    # 通过 SysRq 进入 KGDB
+
+# 调试机（host）：通过串口连接
+gdb vmlinux
+(gdb) set remotebaud 115200
+(gdb) target remote /dev/ttyS0    # 串口设备
+(gdb) target remote /dev/ttyUSB0  # USB 转串口
+
+# 或通过网络（kgdb over ethernet，需 kgdboe 模块）
+(gdb) target remote udp:192.168.1.100:6443
+```
+
+### 7.3 常用 KGDB 调试会话
+
+```gdb
+# 连接后查看所有 CPU 的调用栈
+(gdb) thread apply all bt
+
+# 切换到特定 CPU 上下文
+(gdb) thread 2
+(gdb) bt
+
+# 打印内核链表（以进程链表为例）
+(gdb) set $task = init_task
+(gdb) set $task = (struct task_struct *)($task->tasks.next - \
+                  (long)&((struct task_struct *)0)->tasks)
+(gdb) printf "pid=%d comm=%s\n", $task->pid, $task->comm
+
+# 动态断点（修改内核变量后继续）
+(gdb) break tcp_v4_rcv
+(gdb) condition 1 ((struct tcphdr*)skb->data)->dest == 0x5000  # port 80
+(gdb) continue
+
+# 查看 per-CPU 变量（偏移量方式）
+(gdb) p/x __per_cpu_offset[0]
+(gdb) p *(unsigned long*)(__per_cpu_offset[0] + (long)&nr_context_switches)
+```
+
+---
+
+## 八、clangd：为内核源码配置智能 IDE 导航
+
+`clangd` 是基于 Clang 的 LSP（语言服务器），为内核代码提供跳转、自动补全、引用查找。
+
+### 8.1 生成 compile_commands.json
+
+```bash
+# 方法一：使用 bear（推荐）
+sudo apt install bear
+cd linux-5.15
+bear -- make -j$(nproc) 2>&1 | tail -5
+# 生成 compile_commands.json（约 1GB）
+
+# 方法二：使用内核自带脚本（内核 5.x+）
+make compile_commands.json
+# 利用 scripts/clang-tools/gen_compile_commands.py
+
+# 方法三：针对特定子系统（节省时间）
+bear -- make drivers/net/ -j$(nproc)
+```
+
+### 8.2 VS Code 配置
+
+```bash
+# 安装 clangd 扩展（clangd language server）
+# 在 VS Code 扩展市场搜索 "clangd"，安装 "clangd" by LLVM
+
+# .vscode/settings.json
+{
+    "clangd.arguments": [
+        "--background-index",        // 后台建立索引
+        "--clang-tidy",              // 启用静态分析
+        "--completion-style=detailed",
+        "--header-insertion=never",
+        "--query-driver=/usr/bin/gcc,/usr/bin/arm-linux-gnueabi-gcc"
+    ],
+    "clangd.path": "/usr/bin/clangd-14",
+    "editor.semanticHighlighting.enabled": true
+}
+```
+
+### 8.3 vim/neovim 配置（nvim-lspconfig）
+
+```lua
+-- ~/.config/nvim/init.lua
+require('lspconfig').clangd.setup({
+    cmd = {
+        'clangd',
+        '--background-index',
+        '--clang-tidy',
+        '--query-driver=/usr/bin/gcc*,/usr/bin/arm*',
+    },
+    root_dir = require('lspconfig.util').root_pattern(
+        'compile_commands.json', 'Makefile'
+    ),
+})
+-- 快捷键：gd(跳转定义) gr(查找引用) K(悬停文档)
+```
+
+### 8.4 常用导航技巧（内核特有）
+
+```bash
+# container_of 宏跳转：clangd 能正确解析
+# 在 task_struct *t 处，跳转到 mm_struct 定义：直接 gd
+
+# 查找所有调用 schedule() 的地方
+# VS Code: 右键 → Find All References
+
+# 解析 SYSCALL_DEFINE 宏展开
+# clangd 能展开，直接跳转到 sys_read 的参数定义
+
+# 注意：__attribute__ 和内联汇编可能导致部分误报，正常现象
+```
+
+---
+
+## 九、内核编译优化技巧
+
+### 9.1 make defconfig vs tinyconfig
+
+```bash
+# defconfig：生成针对当前架构的"合理默认"配置
+# 适合：调试、学习、在 QEMU 中运行
+make defconfig
+# 编译时间：约 5~10 分钟（现代机器）
+# 生成内核大小：约 8~12 MB (bzImage)
+
+# tinyconfig：极度精简配置（几乎关闭所有功能）
+# 适合：快速验证编译、CI 测试、最小内核实验
+make tinyconfig
+# 编译时间：约 30~60 秒
+# 生成内核大小：约 500KB
+
+# allmodconfig：尽可能多开模块（测试编译覆盖率用）
+make allmodconfig
+
+# 只重新编译修改的文件（增量编译）
+make -j$(nproc)   # 第二次及后续编译，速度大幅提升
+
+# 使用 ccache 加速重复编译
+sudo apt install ccache
+export CC="ccache gcc"
+export HOSTCC="ccache gcc"
+make -j$(nproc)
+ccache -s   # 查看命中率
+```
+
+### 9.2 快速编译特定子系统
+
+```bash
+# 只编译 drivers/net/ 子系统
+make drivers/net/ -j$(nproc)
+
+# 只编译单个模块
+make M=drivers/net/ethernet/intel/e1000/ -j$(nproc)
+
+# 编译时只看警告/错误（过滤掉正常输出）
+make 2>&1 | grep -E "^(.*error:|.*warning:)" | head -30
+
+# 使用 LLVM/Clang 编译内核（5.x+ 正式支持）
+make CC=clang LD=ld.lld -j$(nproc)
+```
+
+### 9.3 关键 Kconfig 选项（调试用）
+
+```bash
+# 在 .config 中启用或通过 menuconfig 设置：
+
+# 必须开启（调试时）：
+CONFIG_DEBUG_INFO=y            # 包含调试符号
+CONFIG_FRAME_POINTER=y         # 保留帧指针（GDB 调用栈更准确）
+CONFIG_KALLSYMS=y              # 内核符号表（oops 时显示函数名）
+CONFIG_KALLSYMS_ALL=y          # 包含所有符号
+
+# 推荐开启（检测问题）：
+CONFIG_DEBUG_SLAB=y            # Slab 越界检测
+CONFIG_KASAN=y                 # 内核地址消毒（类似 AddressSanitizer）
+CONFIG_UBSAN=y                 # 未定义行为检测
+CONFIG_LOCKDEP=y               # 死锁检测（性能影响大）
+CONFIG_PROVE_LOCKING=y         # 锁依赖验证
+
+# 性能分析：
+CONFIG_FTRACE=y                # 函数追踪框架
+CONFIG_PERF_EVENTS=y           # perf 支持
+CONFIG_BPF_SYSCALL=y           # eBPF 系统调用
+```
+
+---
+
+## 十、crash 工具：分析内核转储文件（vmcore）
+
+`crash` 是分析 Linux 内核崩溃转储（`/proc/vmcore` 或离线 `vmcore` 文件）的权威工具。
+
+### 10.1 安装与准备
+
+```bash
+# 安装 crash 工具
+sudo apt install crash
+
+# 安装 kdump（生成崩溃转储）
+sudo apt install kdump-tools linux-crashdump
+sudo systemctl enable kdump
+# 在启动参数中预留内存：crashkernel=256M
+
+# 触发测试崩溃（会重启！）
+echo c > /proc/sysrq-trigger
+# 重启后 vmcore 保存在 /var/crash/
+
+# 也可以分析运行中的系统（实时调试）
+sudo crash vmlinux /proc/kcore
+```
+
+### 10.2 分析 vmcore
+
+```bash
+# 打开崩溃转储
+crash vmlinux /var/crash/2024-01-01/vmcore
+
+# crash 常用命令
+crash> bt              # 显示崩溃时的调用栈
+crash> bt -a           # 所有 CPU 的调用栈
+crash> ps             # 所有进程列表（类似 ps aux）
+crash> ps | grep D    # 找所有 D 状态（uninterruptible）进程
+
+# 查看内存分配
+crash> kmem -i        # 内存使用概览
+crash> kmem -s        # slab 缓存统计
+crash> kmem -S task_struct  # 特定类型的 slab
+
+# 查看进程详情
+crash> task 1234      # 显示 PID 1234 的 task_struct
+crash> files 1234     # PID 1234 的打开文件
+crash> vm 1234        # PID 1234 的虚拟内存映射
+
+# 查看内核日志（崩溃前的 dmesg）
+crash> log            # 完整内核日志
+crash> log | tail -50 # 最后 50 行
+
+# 查看寄存器状态
+crash> sys            # 系统信息
+crash> mach           # 机器信息（CPU 数量、内存大小等）
+
+# 定位崩溃地址
+crash> dis -l ffffffff81234567    # 反汇编 + 源码行号
+crash> gdb x/10i 0xffffffff81234567
+```
+
+### 10.3 典型崩溃分析流程
+
+```
+1. crash> bt        → 看崩溃时调用栈，找崩溃函数
+2. crash> log       → 看崩溃前的内核消息（Oops 信息）
+3. crash> dis -l <addr>  → 定位到具体源码行
+4. crash> struct task_struct <addr>  → 查看崩溃时的数据结构
+5. crash> kmem -s   → 检查是否有内存损坏（slab 错误）
+
+# 示例：分析 NULL 指针解引用
+crash> bt
+ #0 [ffffffff81a00000] machine_kexec+0x...
+ #1 [ffffffff81234abc] do_exit+0x...
+ #2 [ffffffff81234def] my_driver_function+0x28  ← 崩溃点
+
+crash> dis -l ffffffff81234def
+0xffffffff81234def <my_driver_function+40>: mov 0x10(%rax),%rbx
+# %rax 为 NULL → 解引用 NULL+0x10 → 崩溃
+```
diff --git "a/03-\350\277\233\347\250\213\347\256\241\347\220\206/README.md" "b/03-\350\277\233\347\250\213\347\256\241\347\220\206/README.md"
index 0c18b7d..19a1a97 100644
--- "a/03-\350\277\233\347\250\213\347\256\241\347\220\206/README.md"
+++ "b/03-\350\277\233\347\250\213\347\256\241\347\220\206/README.md"
@@ -8,6 +8,8 @@
 
 ## 1. 进程的本质：task_struct
 
+![进程虚拟地址空间](../assets/diagrams/vm-layout.svg)
+
 一个进程在内核中就是一个 `task_struct` 结构体。
 
 ### Linux 0.11 的 task_struct
@@ -435,3 +437,208 @@ void __switch_to(struct task_struct *prev, struct task_struct *next)
 > continue
 > end
 ```
+
+---
+
+## 8. thread_info 与内核栈深度解析
+
+### 8.1 thread_info 的关键字段
+
+`thread_info` 存放于内核栈底部，包含几个对调度和安全至关重要的字段：
+
+```c
+/* include/asm-i386/thread_info.h — Linux 2.6.0 */
+struct thread_info {
+    struct task_struct  *task;       /* 指向 task_struct */
+    struct exec_domain  *exec_domain;
+    unsigned long        flags;      /* 低级标志位（TIF_NEED_RESCHED 等）*/
+    unsigned long        status;     /* 线程同步标志 */
+    __u32                cpu;        /* 当前所在 CPU */
+    int                  preempt_count; /* 抢占计数器 */
+    mm_segment_t         addr_limit;    /* 地址空间限制（用户/内核）*/
+    struct restart_block restart_block; /* 信号打断后重启 */
+};
+
+/* 关键标志位（flags 字段）*/
+#define TIF_SIGPENDING     0   /* 有待处理信号 */
+#define TIF_NEED_RESCHED   1   /* 需要重新调度（设此位→下次调度点切换）*/
+#define TIF_SINGLESTEP     2   /* 单步调试中 */
+#define TIF_IRET           3   /* 强制 iret 而非 sysexit 返回 */
+#define TIF_SYSCALL_AUDIT  4   /* 系统调用审计中 */
+#define TIF_POLLING_NRFLAG 5   /* 轮询 TIF_NEED_RESCHED（减少 IPI）*/
+
+/* preempt_count 编码：
+   bits 0..7:  抢占计数（非0 = 不可抢占）
+   bits 8..15: softirq 嵌套深度
+   bits 16..27: hardirq 嵌套深度
+   bit 28:     NMI 中 */
+#define in_interrupt()    (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK))
+#define in_atomic()       (preempt_count() != 0)
+```
+
+### 8.2 内核栈布局（ASCII 图）
+
+```
+一个进程的内核栈（THREAD_SIZE = 8KB on x86，16KB on x86_64）：
+
+高地址 (栈顶)
+┌──────────────────────────────────┐ ← esp0 = task->thread.esp0 (TSS 中)
+│                                  │
+│  pt_regs（系统调用/中断陷入时）  │  ← sizeof(pt_regs) = 15 × 4 = 60 bytes
+│  (用户态寄存器快照)               │
+│  ss / esp / eflags               │  ← CPU 自动压入
+│  cs / eip                        │  ← CPU 自动压入
+│  orig_eax / ds / es              │  ← SAVE_ALL 压入
+│  eax / ebp / edi / esi / ...     │  ← SAVE_ALL 压入
+├──────────────────────────────────┤ ← 系统调用入口后的 esp
+│                                  │
+│  内核函数调用栈（向下增长）       │
+│  ...                             │
+│  local variables                 │
+│  saved ebp                       │
+│  return address                  │
+│  ...                             │
+│                                  │
+│  (未使用区域)                     │
+│                                  │
+├──────────────────────────────────┤
+│  thread_info                     │  ← 栈底：通过 esp & ~(THREAD_SIZE-1) 定位
+│  .task ──────────────────────────┼──► task_struct（可能在其他页）
+│  .flags                          │
+│  .preempt_count                  │
+│  .addr_limit                     │
+└──────────────────────────────────┘ ← 低地址（栈溢出警戒区）
+```
+
+**栈溢出检测**：内核在栈底放置 `STACK_END_MAGIC = 0x57AC6E9D`，
+`schedule()` 中检查该值是否被覆盖（`CONFIG_DEBUG_STACKOVERFLOW`）。
+
+### 8.3 do_fork() → copy_process() → wake_up_new_task()（5.x 路径）
+
+现代内核（5.x）的进程创建路径较 2.6 更完善：
+
+```
+用户调用 fork() / clone3()
+      │
+      ▼ kernel/fork.c
+kernel_clone(struct kernel_clone_args *args)     ← 5.x 统一入口
+      │
+      ├─ copy_process(NULL, 0, NUMA_NO_NODE, args)
+      │     │
+      │     ├─ dup_task_struct(current, node)
+      │     │     ├─ alloc_task_struct_node()    ← slab 分配
+      │     │     ├─ alloc_thread_stack_node()   ← 分配内核栈
+      │     │     └─ arch_dup_task_struct()      ← 拷贝 FPU 状态
+      │     │
+      │     ├─ cgroup_fork()                     ← cgroup 继承
+      │     ├─ copy_mm()                         ← 地址空间（写时复制）
+      │     ├─ copy_files()
+      │     ├─ copy_fs()
+      │     ├─ copy_sighand()
+      │     ├─ copy_signal()
+      │     ├─ copy_thread()                     ← 架构相关寄存器
+      │     │     └─ 设置子进程返回值 = 0（pt_regs->ax = 0）
+      │     ├─ alloc_pid(task->nsproxy->pid_ns_for_children)
+      │     ├─ perf_event_fork()                 ← perf 继承
+      │     └─ 返回新的 task_struct *p
+      │
+      ├─ wake_up_new_task(p)
+      │     ├─ p->state = TASK_RUNNING
+      │     ├─ __set_task_cpu(p, select_task_rq(p, ...))  ← 选择 CPU
+      │     └─ activate_task() → enqueue_task()  ← 加入运行队列
+      │
+      └─ 返回新进程 PID 给父进程
+```
+
+### 8.4 __switch_to_asm：x86_64 寄存器保存/恢复
+
+```asm
+/* arch/x86/entry/entry_64.S — Linux 5.x */
+SYM_FUNC_START(__switch_to_asm)
+    /*
+     * 保存被调用者保存寄存器（callee-saved）
+     * 调用者保存寄存器（caller-saved）由编译器在调用前保存
+     */
+    pushq   %rbp
+    pushq   %rbx
+    pushq   %r12
+    pushq   %r13
+    pushq   %r14
+    pushq   %r15
+
+    /* 保存当前进程的内核栈指针 */
+    movq    %rsp, TASK_threadsp(%rdi)    /* prev->thread.sp = rsp */
+
+    /* 切换到新进程的内核栈 */
+    movq    TASK_threadsp(%rsi), %rsp    /* rsp = next->thread.sp */
+
+    /* 恢复新进程的被调用者保存寄存器 */
+    popq    %r15
+    popq    %r14
+    popq    %r13
+    popq    %r12
+    popq    %rbx
+    popq    %rbp
+
+    /*
+     * 跳转到 __switch_to（C 函数）完成：
+     * - FPU/SSE 状态延迟切换
+     * - TLS（FS/GS 段）切换
+     * - 调试寄存器切换
+     * - CR4 特性位切换（PKRU）
+     */
+    jmp     __switch_to
+SYM_FUNC_END(__switch_to_asm)
+
+/* 新进程第一次被调度时，从 ret_from_fork 开始执行 */
+SYM_CODE_START(ret_from_fork)
+    UNWIND_HINT_EMPTY
+    movq    %rax, %rdi          /* prev 任务 */
+    call    schedule_tail       /* 完成调度尾工作（释放 prev 的 rq 锁）*/
+
+    testq   $0x1, PTREGS_FLAGS(%rsp)   /* 判断是内核线程还是用户进程 */
+    jnz     1f
+    movq    PTREGS_RBP(%rsp), %rbx
+    call    *%rbx               /* 内核线程：调用注册的回调函数 */
+    ...
+1:
+    jmp     ret_from_sys_call   /* 用户进程：返回用户态 */
+SYM_CODE_END(ret_from_fork)
+```
+
+### 8.5 僵尸进程与孤儿进程
+
+```
+僵尸进程（Zombie）生命周期：
+
+  进程调用 exit()
+       │
+       ▼
+  do_exit() in kernel/exit.c
+       ├─ 释放大部分资源（内存、文件描述符、信号处理等）
+       ├─ 设置 exit_code（退出状态）
+       ├─ task->exit_state = EXIT_ZOMBIE
+       └─ do_notify_parent()   ← 向父进程发 SIGCHLD
+
+  task_struct 保留（僵尸状态），等待父进程收集退出状态
+       │
+       ▼
+  父进程调用 wait4(pid, &status, ...)
+       ├─ 在 children 链表中找到 EXIT_ZOMBIE 的子进程
+       ├─ 从 task_struct 取出 exit_code
+       ├─ release_task() → 释放 task_struct，从 task 列表移除
+       └─ 返回子进程 PID 和退出状态
+
+  如果父进程未调用 wait() 而先退出 → 子进程成为"孤儿":
+       ├─ kernel/exit.c: forget_original_parent()
+       ├─ 寻找收养者（同进程组的其他进程，或 subreaper）
+       └─ 最终由 PID 1 (init/systemd) 收养
+          init 会调用 waitid(P_ALL, ...) 自动收割僵尸子进程
+
+wait4() 与 waitpid() 的内核路径：
+  sys_wait4() → do_wait()
+    → 遍历 current->children 链表
+    → 对 EXIT_ZOMBIE 子进程：收集状态 → release_task()
+    → 若无符合条件子进程：将父进程加入 wait_queue，休眠
+    → 子进程退出时 do_notify_parent() 唤醒父进程
+```
diff --git "a/04-\345\206\205\345\255\230\347\256\241\347\220\206/README.md" "b/04-\345\206\205\345\255\230\347\256\241\347\220\206/README.md"
index 6117468..4bf364d 100644
--- "a/04-\345\206\205\345\255\230\347\256\241\347\220\206/README.md"
+++ "b/04-\345\206\205\345\255\230\347\256\241\347\220\206/README.md"
@@ -155,6 +155,8 @@ void un_wp_page(unsigned long *table_entry)
 
 ### 3.1 x86 两级页表结构
 
+![x86_64 四级页表](../assets/diagrams/page-table.svg)
+
 ```
 32位地址: [31..22][21..12][11..0]
            页目录索引  页表索引   页内偏移
@@ -353,3 +355,206 @@ cat /proc/slabinfo     # Slab 缓存信息
 > continue
 > end
 ```
+
+---
+
+## 7. NUMA 架构与内存分区
+
+### 7.1 NUMA 拓扑：Node / Zone / Page
+
+```
+NUMA (Non-Uniform Memory Access) 架构：
+
+  ┌─────────────────────┐    ┌─────────────────────┐
+  │      Node 0          │    │      Node 1          │
+  │  ┌──────────────┐   │    │  ┌──────────────┐   │
+  │  │  CPU 0,1,2,3  │   │    │  │  CPU 4,5,6,7  │   │
+  │  └──────────────┘   │    │  └──────────────┘   │
+  │                      │    │                      │
+  │  本地内存（快速访问） │    │  本地内存（快速访问） │
+  │  16 GB               │◄──►│  16 GB               │
+  └─────────────────────┘    └─────────────────────┘
+        ↑ 跨节点访问（慢 ~2x）
+
+内核数据结构：
+  pg_data_t (node)
+   └── struct zone zones[] (区域)
+        └── struct page *  (页描述符数组)
+```
+
+### 7.2 内存区域（Zone）
+
+```
+x86_64 系统的内存区域（zone）划分：
+
+Zone 名称          物理地址范围              用途
+──────────────────────────────────────────────────────────
+ZONE_DMA           0 ~ 16MB                  旧式 ISA DMA 设备
+ZONE_DMA32         0 ~ 4GB                   只能访问 32 位地址的 DMA
+ZONE_NORMAL        16MB ~ 内存上限（通常全部）普通内核页面
+ZONE_HIGHMEM       仅 32 位系统 896MB 以上    32 位内核不能直接映射的高端内存
+ZONE_MOVABLE       高端内存子集               专用于可迁移页（内存热插拔）
+ZONE_DEVICE        设备内存（pmem 等）         持久内存/GPU 显存
+
+每个 zone 维护：
+  struct zone {
+      unsigned long free_pages;     /* 空闲页数 */
+      struct free_area free_area[MAX_ORDER]; /* 伙伴系统 */
+      struct per_cpu_pages pageset; /* per-CPU 页面缓存（快速分配）*/
+      unsigned long watermark[NR_WMARK]; /* 水位线：MIN/LOW/HIGH */
+      ...
+  };
+```
+
+### 7.3 水位线与 kswapd
+
+```
+Zone 水位线（watermark）控制内存回收行为：
+
+  free_pages
+  ┌─────────────────────────────────────────────┐
+  │                                             │ HIGH watermark
+  │   正常区域：内存充裕，无需回收              │
+  ├─────────────────────────────────────────────┤ LOW watermark
+  │   轻度压力：唤醒 kswapd 后台回收            │
+  ├─────────────────────────────────────────────┤ MIN watermark
+  │   严重压力：同步直接回收（影响业务延迟）    │
+  └─────────────────────────────────────────────┘
+  (0)
+
+kswapd（内核交换守护进程）：
+  - 每个 NUMA 节点一个 kswapd 内核线程
+  - 在 LOW 水位以下被唤醒，回收页面直到达到 HIGH 水位
+  - 回收对象：LRU 链表中的 inactive 页
+
+LRU 链表（active/inactive 双链表）：
+  ACTIVE_ANON    → 最近访问的匿名页（堆/栈）
+  INACTIVE_ANON  → 不活跃匿名页（候选交换到 swap）
+  ACTIVE_FILE    → 最近访问的文件映射页
+  INACTIVE_FILE  → 不活跃文件页（候选丢弃或写回磁盘）
+  UNEVICTABLE    → 不可回收页（mlock 锁定）
+
+页面从 active 到 inactive 的迁移：
+  每次 kswapd 扫描时，将 active 链表尾部页面移至 inactive
+  若 inactive 页面再次被访问（page fault 时 mark_page_accessed）→ 移回 active
+  若长期不访问 → 最终被 swap out 或丢弃
+```
+
+---
+
+## 8. OOM Killer：内存耗尽时的"牺牲者选择"
+
+```bash
+# 查看进程的 OOM 评分
+cat /proc/1234/oom_score       # OOM 杀手优先打分（越高越容易被杀）
+cat /proc/1234/oom_score_adj   # 调整值（-1000 ~ 1000，-1000 = 永不杀）
+cat /proc/1234/oom_adj         # 旧接口（-17 ~ 15，-17 = 永不杀）
+
+# 保护重要进程（如数据库）
+echo -1000 > /proc/$(pidof mysqld)/oom_score_adj
+
+# 手动触发 OOM（测试用）
+echo f > /proc/sysrq-trigger
+```
+
+**OOM 评分计算原理**：
+
+```c
+/* mm/oom_kill.c — oom_badness() */
+long oom_badness(struct task_struct *p, unsigned long totalpages)
+{
+    /* 基础分 = 进程占用的物理内存页数 */
+    long points = get_mm_rss(p->mm);
+    points += get_mm_counter(p->mm, MM_SWAPENTS);  /* + swap 使用 */
+    points += mm_pgtables_bytes(p->mm) / PAGE_SIZE; /* + 页表内存 */
+
+    /* 归一化到 0~1000 */
+    points = points * 1000 / totalpages;
+
+    /* 加上 oom_score_adj 调整值（-1000 ~ 1000 映射到 -1000 ~ 1000）*/
+    points += p->signal->oom_score_adj;
+
+    return points;  /* 分数最高的进程被杀死 */
+}
+
+/* OOM 触发后的流程 */
+out_of_memory()
+  → select_bad_process()      /* 遍历所有进程，找最高分 */
+  → oom_kill_process()        /* 发送 SIGKILL */
+  → 打印 "Out of memory: Kill process PID (name) score N or sacrifice child"
+```
+
+---
+
+## 9. 透明大页（THP）与 KSM
+
+### 9.1 透明大页（Transparent Huge Pages）
+
+```bash
+# 查看/设置 THP 模式
+cat /sys/kernel/mm/transparent_hugepage/enabled
+# 输出: [always] madvise never
+#   always  = 尽可能使用大页（2MB on x86）
+#   madvise = 只对 madvise(MADV_HUGEPAGE) 的区域使用
+#   never   = 禁用 THP
+
+# 切换模式
+echo madvise > /sys/kernel/mm/transparent_hugepage/enabled
+
+# 查看 THP 统计
+cat /proc/meminfo | grep -i huge
+# HugePages_Total: 0       ← 静态大页（需要预分配）
+# AnonHugePages: 24576 kB  ← THP 使用量（动态）
+
+# THP 碎片整理策略
+cat /sys/kernel/mm/transparent_hugepage/defrag
+# [always] defer defer+madvise madvise never
+echo defer+madvise > /sys/kernel/mm/transparent_hugepage/defrag
+```
+
+**THP 内核机制**：
+
+```
+当进程的匿名 VMA 满足条件时（大小 >= 2MB, 对齐）：
+  缺页中断 → do_anonymous_page()
+    → khugepaged 后台线程扫描，将相邻 512 个 4KB 页合并为一个 2MB 大页
+    → 直接分配：alloc_pages(GFP_HIGHUSER_MOVABLE, HPAGE_PMD_ORDER)
+
+好处：减少 TLB miss（1个 TLB 条目覆盖 2MB vs 4KB）
+坏处：内存碎片、合并/分裂开销；对 fork() 写时复制代价更高（2MB 一次复制）
+```
+
+### 9.2 KSM（内核相同页合并）
+
+```bash
+# KSM 将内容相同的匿名页合并为一个只读物理页（写时复制）
+# 常用于虚拟化（多个相同的 guest OS 页面）
+
+# 开启 KSM
+echo 1 > /sys/kernel/mm/ksm/run       # 1=运行, 0=停止, 2=停止+解除合并
+echo 1000 > /sys/kernel/mm/ksm/pages_to_scan  # 每次扫描的页数
+
+# 查看 KSM 状态
+cat /sys/kernel/mm/ksm/pages_shared    # 物理共享页数
+cat /sys/kernel/mm/ksm/pages_sharing   # 被合并（逻辑上使用）的页数
+cat /sys/kernel/mm/ksm/pages_unshared  # 扫描但未能合并的页数
+# 节省内存 = (pages_sharing - pages_shared) × PAGE_SIZE
+
+# 应用程序主动参与 KSM：
+madvise(addr, length, MADV_MERGEABLE);   /* 标记此区域供 KSM 扫描 */
+madvise(addr, length, MADV_UNMERGEABLE); /* 取消 */
+```
+
+**KSM 工作原理**：
+
+```
+ksmd 内核线程定期扫描标记为 MADV_MERGEABLE 的页：
+  1. 对每页计算 hash（基于内容）
+  2. 插入两棵红黑树：
+     unstable_tree（未经验证的候选）
+     stable_tree（已确认可共享的页）
+  3. 内容相同的页：
+     → 保留一个只读物理页（stable_tree 中）
+     → 其他进程的页表项指向同一物理页
+     → 设为写保护，触发写时复制时分裂
+```
diff --git "a/05-\346\226\207\344\273\266\347\263\273\347\273\237/README.md" "b/05-\346\226\207\344\273\266\347\263\273\347\273\237/README.md"
index 874fca9..30ce0d2 100644
--- "a/05-\346\226\207\344\273\266\347\263\273\347\273\237/README.md"
+++ "b/05-\346\226\207\344\273\266\347\263\273\347\273\237/README.md"
@@ -179,6 +179,8 @@ struct m_inode *dir_namei(const char *pathname,
 
 ## 3. Linux 2.6.0：VFS + ext2
 
+![VFS 四大对象模型](../assets/diagrams/vfs-objects.svg)
+
 ### 3.1 VFS 核心数据结构
 
 #### super_block — 文件系统元信息
@@ -424,3 +426,187 @@ cat /proc/sys/fs/inode-state
 # 查看 ext2 超级块（Linux 系统）
 tune2fs -l /dev/sda1
 ```
+
+---
+
+## 7. 页缓存回写机制（Page Cache Writeback）
+
+### 7.1 脏页（Dirty Page）与回写时机
+
+```
+应用程序写文件流程：
+
+write() → copy_from_user → 修改页缓存中的 page → 标记为 dirty
+
+"脏页"：内存中内容比磁盘更新的页面
+内核需要周期性将脏页写回磁盘（writeback）
+
+触发 writeback 的条件：
+  1. 定期触发：pdflush / writeback 内核线程，默认每 5 秒
+  2. 脏页比例过高：超过 dirty_ratio（默认 20%）→ 同步回写（阻塞写操作）
+  3. 脏页绝对量大：超过 dirty_bytes
+  4. 脏页滞留太久：超过 dirty_expire_centisecs（默认 3000 = 30 秒）
+  5. sync() / fsync() 系统调用
+```
+
+```bash
+# 重要的脏页控制参数
+cat /proc/sys/vm/dirty_ratio          # 脏页占总内存百分比上限（默认 20）
+                                       # 超过此值 → 写操作被阻塞，强制回写
+cat /proc/sys/vm/dirty_background_ratio # 后台回写触发阈值（默认 10）
+                                       # 超过此值 → 唤醒 writeback 线程
+cat /proc/sys/vm/dirty_expire_centisecs # 脏页最长存活时间（默认 3000 = 30s）
+cat /proc/sys/vm/dirty_writeback_centisecs # writeback 线程唤醒周期（默认 500 = 5s）
+
+# 手动触发全系统同步
+sync
+
+# 查看脏页数量
+cat /proc/meminfo | grep Dirty
+# Dirty: 1024 kB
+
+# 监控回写活动
+iostat -x 1    # 观察磁盘写 I/O
+```
+
+### 7.2 fsync / fdatasync / msync 比较
+
+```
+┌──────────────────────────────────────────────────────────────────┐
+│  函数            │  写回数据  │  写回元数据（mtime/size）│  开销  │
+├──────────────────┼───────────┼─────────────────────────┼────────┤
+│  write()         │  仅修改缓存│  否（lazy update）       │  最小  │
+│  fdatasync(fd)   │  是        │  仅大小变化时（必要元数据）│  中   │
+│  fsync(fd)       │  是        │  是（全部元数据）         │  最大  │
+│  msync(addr,len) │  是（mmap）│  否                      │  中    │
+│  sync()          │  全系统    │  是                      │  最大  │
+└──────────────────┴───────────┴─────────────────────────┴────────┘
+
+实践建议：
+  · 数据库 WAL 日志：fdatasync()（只需确保数据落盘，不需要元数据）
+  · 关键配置文件保存：fsync()（需要 mtime 也正确）
+  · 高性能场景：O_DIRECT + fdatasync（绕过页缓存）
+
+内核路径（ext4）：
+  fsync() → vfs_fsync → file->f_op->fsync → ext4_sync_file
+    → filemap_write_and_wait_range()  ← 将脏页提交给块设备
+    → ext4_flush_completed_IO()
+    → jbd2_complete_transaction()    ← 等待 journal commit
+```
+
+---
+
+## 8. io_uring：高性能异步 I/O
+
+### 8.1 核心概念
+
+```
+传统异步 I/O 的问题：
+  aio_read() → 系统调用开销 + 需要多次进内核
+  epoll + nonblocking → 多次 read/write 系统调用
+
+io_uring 的解决方案：
+  共享内存环形队列（ring buffer），最小化系统调用次数
+
+  用户空间                         内核空间
+  ┌──────────────────────┐         ┌────────────────────────┐
+  │  SQE Ring（提交队列）│ ──────►  │  io_uring_sqe 处理      │
+  │  sqe[0] sqe[1] ...  │         │  （内核消费提交条目）    │
+  └──────────────────────┘         └────────────┬───────────┘
+                                                │ 完成后
+  ┌──────────────────────┐         ┌────────────▼───────────┐
+  │  CQE Ring（完成队列）│ ◄────────  │  io_uring_cqe 填充      │
+  │  cqe[0] cqe[1] ...  │         │  （内核生产完成条目）    │
+  └──────────────────────┘         └────────────────────────┘
+
+  用户程序轮询 CQE Ring → 不需要额外系统调用！
+```
+
+### 8.2 基本使用
+
+```c
+#include <liburing.h>
+
+struct io_uring ring;
+
+/* 初始化：队列深度 = 32 */
+io_uring_queue_init(32, &ring, 0);
+
+/* 提交读请求 */
+struct io_uring_sqe *sqe = io_uring_get_sqe(&ring);
+io_uring_prep_read(sqe, fd, buf, sizeof(buf), offset);
+sqe->user_data = 42;  /* 用于标识请求 */
+io_uring_submit(&ring);  /* 批量提交（一次系统调用）*/
+
+/* 等待完成 */
+struct io_uring_cqe *cqe;
+io_uring_wait_cqe(&ring, &cqe);  /* 或 io_uring_peek_cqe（非阻塞）*/
+printf("read %d bytes, user_data=%llu\n", cqe->res, cqe->user_data);
+io_uring_cqe_seen(&ring, cqe);
+
+/* 清理 */
+io_uring_queue_exit(&ring);
+```
+
+### 8.3 io_uring_setup() 系统调用
+
+```c
+/* SQE（提交队列条目）— 描述一个 I/O 操作 */
+struct io_uring_sqe {
+    __u8    opcode;       /* IORING_OP_READ / WRITE / ACCEPT / SEND / ... */
+    __u8    flags;        /* IOSQE_FIXED_FILE / IOSQE_IO_LINK / ... */
+    __u16   ioprio;
+    __s32   fd;           /* 文件描述符（或 fixed fd index）*/
+    __u64   off;          /* 文件偏移 */
+    __u64   addr;         /* 缓冲区地址 */
+    __u32   len;          /* 长度 */
+    __u64   user_data;    /* 用户自定义标识（在 CQE 中原样返回）*/
+};
+
+/* CQE（完成队列条目）— 描述完成结果 */
+struct io_uring_cqe {
+    __u64   user_data;    /* 与 SQE 的 user_data 对应 */
+    __s32   res;          /* 系统调用返回值（< 0 = 错误码）*/
+    __u32   flags;
+};
+
+/* 零拷贝（Fixed Buffers）：预先注册缓冲区 */
+io_uring_register(ring_fd, IORING_REGISTER_BUFFERS, iovecs, nr_bufs);
+/* 之后用 IORING_OP_READ_FIXED 直接 DMA 到注册的缓冲区 */
+```
+
+---
+
+## 9. ext4 Journal 模式详解
+
+```
+ext4 支持三种 journal 模式，在性能与数据安全之间权衡：
+
+┌────────────────────────────────────────────────────────────────────┐
+│  模式        │  journal 内容          │  崩溃恢复          │  性能  │
+├──────────────┼────────────────────────┼────────────────────┼────────┤
+│  journal     │  数据块 + 元数据都写   │  最安全（完整回滚）│  最慢  │
+│              │  journal               │                    │        │
+├──────────────┼────────────────────────┼────────────────────┼────────┤
+│  ordered     │  仅元数据写 journal    │  安全（数据先写，  │  中等  │
+│  （默认）    │  但数据先于元数据落盘  │  元数据后提交）    │        │
+├──────────────┼────────────────────────┼────────────────────┼────────┤
+│  writeback   │  仅元数据写 journal    │  可能数据/元数据   │  最快  │
+│              │  数据随时写            │  不一致（需 fsck） │        │
+└──────────────┴────────────────────────┴────────────────────┴────────┘
+
+挂载时指定模式：
+  mount -o data=journal  /dev/sda1 /mnt   # journal 模式
+  mount -o data=ordered  /dev/sda1 /mnt   # ordered 模式（默认）
+  mount -o data=writeback /dev/sda1 /mnt  # writeback 模式
+
+推荐场景：
+  · 数据库文件目录：writeback + 应用层 fsync()（数据库自己管 journal）
+  · 普通文件系统：ordered（默认，平衡性能与安全）
+  · 最高安全性：journal（NFS 服务器、关键日志文件系统）
+
+journal commit 触发条件：
+  · 每 5 秒定期提交（commit_interval）
+  · fsync() / fdatasync() 调用
+  · journal 空间不足（jbd2 写满 → 触发 checkpoint）
+```
diff --git "a/06-\347\263\273\347\273\237\350\260\203\347\224\250/README.md" "b/06-\347\263\273\347\273\237\350\260\203\347\224\250/README.md"
index a3ee743..382da61 100644
--- "a/06-\347\263\273\347\273\237\350\260\203\347\224\250/README.md"
+++ "b/06-\347\263\273\347\273\237\350\260\203\347\224\250/README.md"
@@ -22,6 +22,8 @@
 
 ---
 
+![系统调用完整路径](../assets/diagrams/syscall-flow.svg)
+
 ## 2. Linux 0.11：int 0x80 中断方式
 
 ### 2.1 整体流程
@@ -358,3 +360,228 @@ ausyscall --dump       # 打印所有系统调用号
 | open    | ~3 µs（含路径解析）|
 | fork    | ~30 µs（进程创建）|
 | execve  | ~1 ms（加载程序）|
+
+---
+
+## 8. vDSO（虚拟动态共享对象）
+
+### 8.1 vDSO 原理
+
+```
+问题：某些系统调用极高频率被调用（如 gettimeofday 每毫秒调用多次）
+     每次都要 ring3 → ring0 → ring3 切换，约 100~200 ns 的开销
+
+解决：vDSO（Virtual Dynamic Shared Object）
+  内核在每个进程的地址空间映射一段特殊共享内存（约 8KB），
+  包含部分系统调用的用户态实现（直接读 VVAR 页中的内核数据）
+
+                 用户地址空间
+  ┌──────────────────────────────────┐
+  │  ...                             │
+  │  [vvar] (只读，内核写入时间数据) │ ← 内核定期更新
+  │  [vdso] (可执行，用户态代码)     │ ← gettimeofday 实现
+  │  ...                             │
+  └──────────────────────────────────┘
+
+glibc 的 gettimeofday() 会自动调用 vDSO 版本：
+  → 直接读取 vvar 页中的 tk_core（时间核心数据）
+  → 无需进入内核！耗时约 10~20 ns
+```
+
+### 8.2 vDSO 加速的系统调用
+
+```bash
+# 查看 vDSO 加速的函数
+cat /proc/self/maps | grep vdso
+# 7fff12345000-7fff12346000 r-xp 00000000 00:00 0  [vdso]
+
+# 解析 vDSO 导出符号
+objdump -T /proc/self/exe 2>/dev/null || \
+    dd if=/proc/self/mem bs=4096 skip=$((0x7fff12345)) count=1 2>/dev/null | \
+    objdump -T /dev/stdin 2>/dev/null
+
+# 在现代 x86_64 系统，vDSO 通常加速以下调用：
+#  clock_gettime(CLOCK_REALTIME / CLOCK_MONOTONIC)
+#  gettimeofday()
+#  getcpu()         (返回当前 CPU 和 NUMA 节点号)
+#  time()
+```
+
+```c
+/* 用户程序通常无需关心 vDSO，glibc 自动使用 */
+/* 但可以验证：使用 strace 观察是否有系统调用被发出 */
+/* strace -e gettimeofday ./my_program */
+/* 如果使用了 vDSO，strace 看不到该系统调用！ */
+
+/* 手动查找和调用 vDSO（不推荐，仅用于了解机制）*/
+#include <sys/auxv.h>
+unsigned long vdso_addr = getauxval(AT_SYSINFO_EHDR);
+/* 然后解析 ELF，找到函数符号 */
+```
+
+---
+
+## 9. seccomp BPF：系统调用过滤
+
+### 9.1 seccomp 机制
+
+```
+seccomp（SECure COMPuting mode）允许进程为自己设置系统调用白名单/黑名单，
+用于沙箱化（Chrome、Docker、systemd 等均使用）。
+
+模式：
+  SECCOMP_MODE_STRICT   仅允许 read/write/exit/sigreturn（极简模式）
+  SECCOMP_MODE_FILTER   BPF 程序决定每个系统调用的处理方式（灵活）
+```
+
+### 9.2 seccomp BPF 示例
+
+```c
+#include <linux/seccomp.h>
+#include <linux/filter.h>
+#include <linux/audit.h>
+#include <sys/syscall.h>
+#include <sys/prctl.h>
+
+/* 简单的 seccomp 过滤器：只允许 read/write/exit_group */
+struct sock_filter filter[] = {
+    /* 加载系统调用号到累加器 */
+    BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+             offsetof(struct seccomp_data, nr)),
+
+    /* 允许 read */
+    BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_read, 0, 1),
+    BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
+
+    /* 允许 write */
+    BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_write, 0, 1),
+    BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
+
+    /* 允许 exit_group */
+    BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_exit_group, 0, 1),
+    BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
+
+    /* 其他所有系统调用：返回 ERRNO(EPERM) 或 KILL */
+    BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ERRNO | EPERM),
+};
+
+struct sock_fprog prog = {
+    .len = sizeof(filter) / sizeof(filter[0]),
+    .filter = filter,
+};
+
+/* 启用 seccomp BPF */
+prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);  /* 必须先设置 */
+prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+
+/* 或使用 seccomp() 系统调用（Linux 3.17+）*/
+syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER, 0, &prog);
+```
+
+```bash
+# 查看进程的 seccomp 状态
+cat /proc/self/status | grep Seccomp
+# Seccomp: 0   (0=未启用, 1=STRICT, 2=FILTER)
+
+# Docker 默认 seccomp profile（阻止约 50 个危险调用）
+docker run --security-opt seccomp=unconfined ubuntu  # 禁用 seccomp
+docker run --security-opt seccomp=/path/to/profile.json ubuntu
+```
+
+---
+
+## 10. 在 Linux 5.x 中添加新系统调用
+
+以添加 `sys_mygetpid` 为例（仅返回当前进程 PID）：
+
+### 步骤一：定义系统调用实现
+
+```c
+/* kernel/myhello.c （新建文件）*/
+#include <linux/kernel.h>
+#include <linux/syscalls.h>
+#include <linux/pid.h>
+
+/* SYSCALL_DEFINE 宏会展开为 __x64_sys_mygetpid 等平台特定函数 */
+SYSCALL_DEFINE0(mygetpid)
+{
+    return task_tgid_vnr(current);  /* 返回进程 ID（虚拟命名空间中的 PID）*/
+}
+```
+
+### 步骤二：注册系统调用号
+
+```
+# arch/x86/entry/syscalls/syscall_64.tbl
+# 在末尾添加（假设下一个号是 548）：
+548  common  mygetpid     sys_mygetpid
+```
+
+### 步骤三：声明原型
+
+```c
+/* include/linux/syscalls.h — 在末尾添加 */
+asmlinkage long sys_mygetpid(void);
+```
+
+### 步骤四：加入编译
+
+```makefile
+# kernel/Makefile — 添加新文件
+obj-y += myhello.o
+```
+
+### 步骤五：重新编译并测试
+
+```c
+/* 用户态测试程序 */
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <stdio.h>
+
+#define __NR_mygetpid 548
+
+int main(void)
+{
+    long pid = syscall(__NR_mygetpid);
+    printf("mygetpid returned: %ld (getpid: %d)\n", pid, getpid());
+    return 0;
+}
+```
+
+---
+
+## 11. 系统调用号表演进（32 位 vs 64 位）
+
+```
+历史上系统调用号不统一，x86 平台有多套表：
+
+  arch/x86/entry/syscalls/syscall_32.tbl  — x86（32 位）
+  arch/x86/entry/syscalls/syscall_64.tbl  — x86_64（64 位）
+  arch/x86/entry/syscalls/syscall_x32.tbl — x32 ABI（64 位内核+32 位指针）
+
+部分关键差异（32 位 vs 64 位）：
+  调用    32 位号   64 位号
+  ───────────────────────────
+  read       3        0
+  write      4        1
+  open       5        2
+  close      6        3
+  fork       2       57
+  execve    11       59
+  exit       1       60
+  wait4    114      61 (wait4)
+  getpid    20       39
+  socket   359      41
+
+32 位程序在 64 位内核上运行（ia32 兼容模式）：
+  使用 int 0x80 或 sysenter → 进入 entry_INT80_compat()
+  内核根据 32 位调用号，查 syscall_32.tbl 分发
+
+64 位程序：
+  使用 syscall 指令 → 进入 entry_SYSCALL_64()
+  内核查 syscall_64.tbl
+
+# 在系统上查看当前系统调用数量
+ausyscall --dump | wc -l   # x86_64 目前约 330+ 个
+```
diff --git "a/07-\350\256\276\345\244\207\351\251\261\345\212\250/README.md" "b/07-\350\256\276\345\244\207\351\251\261\345\212\250/README.md"
index f52c3e0..0c78f41 100644
--- "a/07-\350\256\276\345\244\207\351\251\261\345\212\250/README.md"
+++ "b/07-\350\256\276\345\244\207\351\251\261\345\212\250/README.md"
@@ -98,6 +98,8 @@ void hd_interrupt(void)
 
 ## 3. Linux 2.6.0：统一设备模型
 
+![Linux 设备模型](../assets/diagrams/driver-model.svg)
+
 ### 3.1 设备模型的核心对象
 
 ```
@@ -417,3 +419,298 @@ dmesg | grep "Oops"
 # 使用 addr2line 或 gdb 定位崩溃位置
 addr2line -e vmlinux 0xc01234ab
 ```
+
+---
+
+## 7. MSI / MSI-X 中断 vs 传统 INTx
+
+### 7.1 中断方式对比
+
+```
+传统 INTx（引脚中断）：
+  · PCI 设备拉低 INTA# 引脚
+  · 所有共享该引脚的设备共用一个中断号
+  · 问题：中断共享（需要轮询判断是哪个设备触发）
+          不支持多处理器亲和性（只能给一个 CPU）
+
+MSI（Message Signaled Interrupts）：
+  · 设备写一条特定内存地址（x86: 0xFEExxxxx）触发中断
+  · 每个设备独立中断号（无共享问题）
+  · 可以精确指定目标 CPU（affinity）
+  · PCIe 标准支持，每设备最多 32 个 MSI 向量
+
+MSI-X（MSI eXtended）：
+  · 每设备最多 2048 个独立中断向量
+  · 每个向量独立配置目标 CPU
+  · 高性能网卡/SSD 必用（多队列，队列绑定不同 CPU）
+```
+
+```c
+/* 在驱动中申请 MSI-X 中断 */
+int nvecs = pci_msix_vec_count(pdev);  /* 硬件支持的最大向量数 */
+struct msix_entry entries[4] = {
+    { .entry = 0 }, { .entry = 1 },
+    { .entry = 2 }, { .entry = 3 },
+};
+
+/* 分配 4 个 MSI-X 向量 */
+int ret = pci_enable_msix_exact(pdev, entries, 4);
+
+/* 为每个向量注册处理函数 */
+for (i = 0; i < 4; i++) {
+    ret = request_irq(entries[i].vector, my_msix_handler,
+                      0, "my_device", &my_queues[i]);
+    /* 设置 CPU 亲和性（队列 i 绑定 CPU i）*/
+    irq_set_affinity_hint(entries[i].vector, cpumask_of(i));
+}
+
+/* 清理 */
+pci_disable_msix(pdev);
+```
+
+---
+
+## 8. DMA API 与 IOMMU
+
+### 8.1 DMA 一致性内存 vs 流式 DMA
+
+```c
+/* ① dma_alloc_coherent：分配一致性 DMA 内存
+   适用：设备频繁读写的控制数据（描述符环、状态寄存器映射）
+   特点：CPU 和设备看到的内容始终一致（不需要显式 cache 刷新）
+         通常是非缓存映射（Uncached），访问速度较慢 */
+void *cpu_addr;
+dma_addr_t dma_handle;
+cpu_addr = dma_alloc_coherent(dev, 4096, &dma_handle, GFP_KERNEL);
+/* cpu_addr: 驱动用来读写的内核虚拟地址 */
+/* dma_handle: 写入设备寄存器的总线地址 */
+writel(dma_handle, dev_base + TX_DESC_REG);
+/* 释放 */
+dma_free_coherent(dev, 4096, cpu_addr, dma_handle);
+
+/* ② dma_map_single：流式 DMA 映射
+   适用：单次数据传输（网络包、磁盘块）
+   特点：对已有内存建立映射，速度快
+         需要显式 sync 保持 CPU/设备视图一致 */
+dma_addr_t dma_addr = dma_map_single(dev, buf, len, DMA_TO_DEVICE);
+if (dma_mapping_error(dev, dma_addr))
+    return -ENOMEM;
+/* ... 触发 DMA 传输 ... */
+dma_unmap_single(dev, dma_addr, len, DMA_TO_DEVICE);  /* 传输完后解除映射 */
+
+/* ③ dma_map_sg：散列/聚集（Scatter-Gather）DMA
+   适用：物理上不连续的缓冲区（如文件系统的 page cache） */
+int nents = dma_map_sg(dev, sgl, nsegs, DMA_FROM_DEVICE);
+struct scatterlist *sg;
+for_each_sg(sgl, sg, nents, i) {
+    /* sg_dma_address(sg): DMA 地址 */
+    /* sg_dma_len(sg):     长度 */
+}
+dma_unmap_sg(dev, sgl, nsegs, DMA_FROM_DEVICE);
+```
+
+### 8.2 IOMMU 作用
+
+```
+没有 IOMMU：设备可以 DMA 到任意物理地址 → 安全风险（DMA 攻击）
+有 IOMMU（Intel VT-d / AMD-Vi）：
+  · 设备只能访问 IOMMU 映射表中允许的内存范围
+  · dma_map_* API 在 IOMMU 中建立映射（类似进程页表）
+  · 设备隔离：虚拟机的设备无法访问宿主机内存（PCIe 直通安全基础）
+
+查看 IOMMU 状态：
+  dmesg | grep -i iommu
+  cat /sys/kernel/debug/iommu/iommu_groups/0/reserved_regions
+```
+
+---
+
+## 9. devm_* 资源管理函数
+
+`devm_*` 系列函数（device-managed）与设备生命周期绑定，
+设备移除时自动释放，避免驱动忘记清理资源：
+
+```c
+/* 传统方式：需要手动在 remove() 中配对释放 */
+void *buf = kmalloc(size, GFP_KERNEL);
+/* ...使用 buf ... */
+kfree(buf);  /* 必须记得调用！ */
+
+/* devm 方式：设备移除时自动 kfree */
+void *buf = devm_kmalloc(dev, size, GFP_KERNEL);
+/* 设备 remove 时自动释放，无需手动 kfree */
+
+/* 常用 devm_* 函数 */
+devm_kmalloc(dev, size, gfp)         /* 内存分配 */
+devm_kzalloc(dev, size, gfp)         /* 清零内存分配 */
+devm_ioremap(dev, offset, size)      /* I/O 内存映射 */
+devm_ioremap_resource(dev, res)      /* 从 platform_resource 映射 */
+devm_request_irq(dev, irq, handler, flags, name, data) /* 中断注册 */
+devm_request_mem_region(dev, start, n, name)  /* 申请 I/O 内存区域 */
+devm_gpio_request(dev, gpio, label)  /* GPIO 申请 */
+devm_clk_get(dev, id)               /* 时钟获取 */
+devm_regulator_get(dev, id)         /* 电源调节器 */
+devm_pinctrl_get(dev)               /* 引脚控制 */
+devm_iio_device_alloc(dev, priv_size) /* IIO 设备 */
+
+/* 自定义清理函数 */
+static void my_cleanup(void *data)
+{
+    struct my_device *mydev = data;
+    my_hardware_reset(mydev);
+}
+devm_add_action(dev, my_cleanup, mydev);  /* 设备移除时调用 */
+```
+
+---
+
+## 10. Platform 驱动完整示例（含设备树绑定）
+
+### 10.1 设备树绑定（DTS）
+
+```dts
+/* arch/arm64/boot/dts/vendor/board.dts */
+/ {
+    myled: myled@12340000 {
+        compatible = "vendor,myled";    /* 与驱动 of_match_table 对应 */
+        reg = <0x0 0x12340000 0x0 0x100>; /* 寄存器基地址和大小 */
+        interrupts = <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&ccu CLK_LED>;
+        clock-names = "core";
+        reset-gpios = <&gpio 5 GPIO_ACTIVE_LOW>;
+        label = "power-led";
+        linux,default-trigger = "heartbeat";
+        status = "okay";
+    };
+};
+```
+
+### 10.2 Platform 驱动实现
+
+```c
+/* drivers/leds/leds-myled.c */
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/io.h>
+#include <linux/clk.h>
+#include <linux/gpio/consumer.h>
+#include <linux/leds.h>
+
+#define LED_CTRL_REG    0x00
+#define LED_STATUS_REG  0x04
+#define LED_ENABLE_BIT  BIT(0)
+
+struct myled_priv {
+    struct led_classdev cdev;     /* LED 类设备（必须是第一个字段）*/
+    void __iomem *base;           /* 寄存器基地址 */
+    struct clk *clk;              /* 时钟 */
+    struct gpio_desc *reset_gpio; /* 复位 GPIO */
+    int irq;
+};
+
+static void myled_set_brightness(struct led_classdev *cdev,
+                                  enum led_brightness brightness)
+{
+    struct myled_priv *priv = container_of(cdev, struct myled_priv, cdev);
+    u32 val = readl(priv->base + LED_CTRL_REG);
+
+    if (brightness)
+        val |= LED_ENABLE_BIT;
+    else
+        val &= ~LED_ENABLE_BIT;
+
+    writel(val, priv->base + LED_CTRL_REG);
+}
+
+static irqreturn_t myled_irq_handler(int irq, void *dev_id)
+{
+    struct myled_priv *priv = dev_id;
+    u32 status = readl(priv->base + LED_STATUS_REG);
+    dev_dbg(priv->cdev.dev, "LED IRQ: status=0x%x\n", status);
+    /* 清除中断 */
+    writel(status, priv->base + LED_STATUS_REG);
+    return IRQ_HANDLED;
+}
+
+static int myled_probe(struct platform_device *pdev)
+{
+    struct device *dev = &pdev->dev;
+    struct myled_priv *priv;
+    struct resource *res;
+    int ret;
+
+    /* devm_kzalloc：设备移除时自动释放 */
+    priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+    if (!priv)
+        return -ENOMEM;
+
+    /* 从设备树获取寄存器地址并映射（devm 方式）*/
+    priv->base = devm_platform_ioremap_resource(pdev, 0);
+    if (IS_ERR(priv->base))
+        return PTR_ERR(priv->base);
+
+    /* 获取时钟 */
+    priv->clk = devm_clk_get(dev, "core");
+    if (IS_ERR(priv->clk))
+        return dev_err_probe(dev, PTR_ERR(priv->clk), "Failed to get clk\n");
+
+    ret = clk_prepare_enable(priv->clk);
+    if (ret)
+        return ret;
+
+    /* 获取 reset GPIO */
+    priv->reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH);
+    if (IS_ERR(priv->reset_gpio))
+        return PTR_ERR(priv->reset_gpio);
+
+    /* 注册中断 */
+    priv->irq = platform_get_irq(pdev, 0);
+    if (priv->irq < 0)
+        return priv->irq;
+
+    ret = devm_request_irq(dev, priv->irq, myled_irq_handler,
+                           0, dev_name(dev), priv);
+    if (ret)
+        return ret;
+
+    /* 配置 LED 类设备 */
+    priv->cdev.name = of_get_property(dev->of_node, "label", NULL) ?: "myled";
+    priv->cdev.brightness_set = myled_set_brightness;
+    priv->cdev.max_brightness = 1;
+    priv->cdev.default_trigger =
+        of_get_property(dev->of_node, "linux,default-trigger", NULL);
+
+    /* 注册 LED 类设备（创建 /sys/class/leds/myled/）*/
+    ret = devm_led_classdev_register(dev, &priv->cdev);
+    if (ret)
+        return ret;
+
+    platform_set_drvdata(pdev, priv);
+    dev_info(dev, "myled: registered at 0x%p, irq=%d\n", priv->base, priv->irq);
+    return 0;
+}
+
+/* 设备树匹配表 */
+static const struct of_device_id myled_of_match[] = {
+    { .compatible = "vendor,myled" },
+    { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, myled_of_match);
+
+static struct platform_driver myled_driver = {
+    .probe  = myled_probe,
+    /* remove 无需实现：devm_* 会自动清理所有资源 */
+    .driver = {
+        .name           = "myled",
+        .of_match_table = myled_of_match,
+        .pm             = &myled_pm_ops,  /* 可选：电源管理 */
+    },
+};
+
+module_platform_driver(myled_driver);  /* 替代 module_init/module_exit */
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Your Name");
+MODULE_DESCRIPTION("My LED platform driver example");
+```
diff --git "a/08-\347\275\221\347\273\234\345\255\220\347\263\273\347\273\237/README.md" "b/08-\347\275\221\347\273\234\345\255\220\347\263\273\347\273\237/README.md"
index 97f972d..c63f7fe 100644
--- "a/08-\347\275\221\347\273\234\345\255\220\347\263\273\347\273\237/README.md"
+++ "b/08-\347\275\221\347\273\234\345\255\220\347\263\273\347\273\237/README.md"
@@ -229,6 +229,8 @@ dev_queue_xmit(skb)
 
 ---
 
+![TCP 三次握手与四次挥手](../assets/diagrams/tcp-handshake.svg)
+
 ## 5. TCP 三次握手（内核视角）
 
 ```
@@ -313,3 +315,288 @@ sudo tcpdump -i eth0 -n tcp port 80
 | epoll | 2.5.44 | O(1) IO 多路复用 |
 | RSS/RPS | 2.6.35+ | 多队列网卡，多核并行接收 |
 | XDP/eBPF | 4.8+ | 在驱动层直接处理包，旁路协议栈 |
+
+---
+
+## 9. sk_buff 完整结构（ASCII 图）
+
+```
+sk_buff 内存布局详解（包含所有字段）：
+
+        skb（指向描述符）
+         │
+         ▼
+┌────────────────────────────────────────────────────────┐
+│  struct sk_buff（描述符，约 240 bytes）                  │
+│                                                        │
+│  next ──► 链表                prev ──► 链表            │
+│  sk   ──► 所属 socket         dev ──► 网络设备         │
+│                                                        │
+│  head ─────────────────────────────────────────────┐  │
+│  data ──────────────────────────────────────────┐  │  │
+│  tail ──────────────────────────────────────┐   │  │  │
+│  end  ──────────────────────────────────┐   │   │  │  │
+│                                         │   │   │  │  │
+│  len = tail - data                      │   │   │  │  │
+│  protocol / pkt_type / ip_summed        │   │   │  │  │
+│  h.th ──► TCP 头（数据内偏移量）         │   │   │  │  │
+│  nh.iph ──► IP 头（数据内偏移量）        │   │   │  │  │
+│  mac.ethernet ──► 以太网头              │   │   │  │  │
+│  dst ──► 路由缓存                       │   │   │  │  │
+└─────────────────────────────────────────│───│───│──│──┘
+                                          │   │   │  │
+数据缓冲区（连续内存）：                  │   │   │  │
+                                          ▼   │   │  ▼
+  [low]  head ──────────────────────────────► │   │  end [high]
+         │ headroom                       ▼   │   │
+         │ （发送时为以太头/IP头预留空间） data │   │
+         │                                │   │   │
+         │                   以太网头(14B) │   │   │
+         │                   IP 头  (20B) │   │   │
+         │                   TCP 头 (20B) │   │   │
+         │                   用户数据      │   │   │
+         │                                ▼   │   │
+         │                               tail │   │
+         │ tailroom                           ▼   │
+         └────────────────────────────────── end ─┘
+
+操作函数：
+  skb_push(skb, n)   data -= n   （向 head 方向扩展，添加协议头）
+  skb_pull(skb, n)   data += n   （去掉协议头，上层解析）
+  skb_put(skb, n)    tail += n   （向 end 方向扩展，添加数据）
+  skb_reserve(skb,n) head..data 预留 n 字节（初始化时调用）
+```
+
+---
+
+## 10. 完整接收路径：NIC → 用户进程
+
+```
+硬件层：
+  网卡 DMA → 填充 RX ring buffer 中的 sk_buff
+  发送中断（或 NAPI poll）通知内核
+
+软中断层（NET_RX_SOFTIRQ）：
+  net_rx_action()
+  └─► driver->napi_poll()              ← e.g., e1000_clean_rx_irq()
+       └─► netif_receive_skb(skb)      ← 提交给网络栈
+
+协议分发层（net/core/dev.c）：
+  netif_receive_skb()
+  └─► __netif_receive_skb_core()
+       ├─► deliver_skb(skb, ptype)     ← 按 skb->protocol 分发
+       │    ETH_P_IP  → ip_rcv()
+       │    ETH_P_ARP → arp_rcv()
+       │    ETH_P_IPV6 → ipv6_rcv()
+       └─► netfilter hook: NF_INET_PRE_ROUTING
+
+IP 层（net/ipv4/ip_input.c）：
+  ip_rcv()
+  └─► ip_rcv_finish()
+       ├─► ip_route_input()            ← 路由决策
+       │    本机目标 → ip_local_deliver()
+       │    转发     → ip_forward() → ip_output()
+       └─► NF_INET_LOCAL_IN hook（iptables INPUT 链）
+
+传输层（net/ipv4/tcp_ipv4.c）：
+  tcp_v4_rcv()
+  ├─► __inet_lookup_skb()             ← 根据 4 元组查找 sock
+  ├─► tcp_v4_do_rcv()
+  │    ├─► tcp_rcv_established()      ← 已建立连接的快速路径
+  │    │    └─► tcp_data_queue()      ← 放入接收队列（有序重组）
+  │    └─► tcp_rcv_state_process()    ← 状态机（握手/挥手）
+  └─► sk->sk_data_ready(sk)          ← 唤醒 recv() 中阻塞的进程
+
+用户层：
+  recv() / read() 从 sk->sk_receive_queue 取数据
+  copy_to_user() → 返回用户进程
+```
+
+---
+
+## 11. netfilter 钩子与 iptables
+
+```
+netfilter 定义了 5 个钩子点，覆盖数据包的完整生命周期：
+
+                      本机进程
+                         ▲ │
+                  INPUT  │ │ OUTPUT
+                         │ ▼
+              ┌──────────┴──────────────────────┐
+              │          本机网络栈               │
+              └──────────┬──────────────────────┘
+                         │
+  ─────────────────────────────────────────────────── 网络接口
+  进入                    │                    离开
+  ─────────────────────►  │  ────────────────────►
+                          │
+  PREROUTING              │              POSTROUTING
+  （入站，路由前）         │              （出站，路由后）
+                          │
+                     FORWARD
+                  （转发，穿越本机）
+
+钩子与 iptables 表的对应：
+  钩子名              iptables 链         用途示例
+  ─────────────────────────────────────────────────────
+  NF_INET_PRE_ROUTING    PREROUTING        DNAT（改目标地址）
+  NF_INET_LOCAL_IN       INPUT             过滤入站包
+  NF_INET_FORWARD        FORWARD           过滤转发包
+  NF_INET_LOCAL_OUT      OUTPUT            过滤/修改出站包
+  NF_INET_POST_ROUTING   POSTROUTING       SNAT/MASQUERADE
+
+iptables 表优先级（高→低）：
+  raw → mangle → nat → filter → security
+
+常用 iptables 规则示例：
+  # 查看规则
+  iptables -L -n -v --line-numbers
+
+  # 拒绝来自特定 IP 的连接
+  iptables -A INPUT -s 10.0.0.5 -j DROP
+
+  # DNAT：将到 80 端口的流量转发到内部 8080
+  iptables -t nat -A PREROUTING -p tcp --dport 80 -j DNAT --to :8080
+
+  # MASQUERADE：出站 NAT（替换源 IP 为网卡 IP）
+  iptables -t nat -A POSTROUTING -o eth0 -j MASQUERADE
+```
+
+---
+
+## 12. conntrack（连接跟踪）
+
+```bash
+# 连接跟踪表
+cat /proc/net/nf_conntrack
+# 输出示例：
+# ipv4 2 tcp 6 431999 ESTABLISHED src=192.168.1.10 dst=8.8.8.8 sport=54321 dport=53
+# 字段：协议族 协议 超时 状态 src/dst/port
+
+# 连接状态：
+#   NEW          第一个包，连接尚未建立
+#   ESTABLISHED  已建立双向连接
+#   RELATED      关联连接（如 FTP 数据连接）
+#   INVALID      无法识别的包
+#   UNTRACKED    被 notrack 规则跳过的包
+
+# 查看 conntrack 统计
+conntrack -S
+cat /proc/net/stat/nf_conntrack
+
+# 修改 conntrack 表大小（默认约 65536）
+echo 524288 > /proc/sys/net/netfilter/nf_conntrack_max
+# 或（持久）：net.netfilter.nf_conntrack_max = 524288
+
+# 手动删除特定条目
+conntrack -D -s 192.168.1.10 --sport 54321
+
+# 连接跟踪内核路径
+# netfilter PRE_ROUTING hook → nf_conntrack_in()
+#   → 查 nf_conntrack hash 表（4 元组 hash）
+#   → 新连接：分配 nf_conn，加入 hash 表
+#   → 已有连接：找到 nf_conn，更新状态和超时
+```
+
+---
+
+## 13. XDP（eXpress Data Path）
+
+```
+XDP 允许在驱动层（网卡收包后立即）运行 eBPF 程序，
+完全旁路内核网络栈，实现极低延迟的包处理：
+
+  网卡 DMA → NIC driver → XDP eBPF program → (决策)
+                                │
+                    ┌───────────┼───────────┐
+                    ▼           ▼           ▼
+               XDP_DROP    XDP_PASS    XDP_TX / XDP_REDIRECT
+               （丢弃）  （继续走      （重发回  （转发到
+                          内核协议栈）  同网卡）  其他网卡/socket）
+
+XDP 返回码及典型性能（10GbE 网卡）：
+  XDP_DROP     ≈ 20 Mpps（百万包/秒）丢包，用于 DDoS 防护
+  XDP_PASS     ≈ 10 Mpps 正常处理（略有开销）
+  XDP_TX       ≈ 15 Mpps 反弹包（用于负载均衡）
+  XDP_REDIRECT ≈ 12 Mpps 转发到其他网卡或 AF_XDP socket
+
+  相比：内核协议栈满速约 1~2 Mpps（单核）
+        DPDK 用户态可达 30+ Mpps（但需要独占网卡，旁路内核）
+```
+
+```c
+/* XDP eBPF 程序示例：丢弃所有 UDP 包 */
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+
+SEC("xdp")
+int xdp_drop_udp(struct xdp_md *ctx)
+{
+    void *data     = (void *)(long)ctx->data;
+    void *data_end = (void *)(long)ctx->data_end;
+
+    struct ethhdr *eth = data;
+    if ((void *)(eth + 1) > data_end)
+        return XDP_PASS;
+
+    if (eth->h_proto != htons(ETH_P_IP))
+        return XDP_PASS;
+
+    struct iphdr *ip = (void *)(eth + 1);
+    if ((void *)(ip + 1) > data_end)
+        return XDP_PASS;
+
+    if (ip->protocol == IPPROTO_UDP)
+        return XDP_DROP;  /* 丢弃 UDP */
+
+    return XDP_PASS;
+}
+
+/* 加载 XDP 程序 */
+// ip link set dev eth0 xdp obj xdp_drop_udp.o sec xdp
+// ip link set dev eth0 xdpgeneric obj xdp_drop_udp.o sec xdp  # 通用模式（软件模拟）
+```
+
+---
+
+## 14. TCP 关键调优参数
+
+```bash
+# ── 连接队列 ──
+sysctl net.ipv4.tcp_syn_backlog          # SYN 半连接队列大小（默认 1024）
+sysctl net.core.somaxconn                # accept 全连接队列上限（默认 128）
+
+# ── 超时与重传 ──
+sysctl net.ipv4.tcp_syn_retries          # SYN 重传次数（默认 6，约 127 秒）
+sysctl net.ipv4.tcp_synack_retries       # SYNACK 重传次数（默认 5）
+sysctl net.ipv4.tcp_fin_timeout          # FIN_WAIT_2 超时（默认 60 秒）
+sysctl net.ipv4.tcp_keepalive_time       # keepalive 探测间隔（默认 7200 秒）
+sysctl net.ipv4.tcp_keepalive_intvl      # keepalive 重传间隔（默认 75 秒）
+sysctl net.ipv4.tcp_keepalive_probes     # keepalive 重传次数（默认 9）
+
+# ── 缓冲区 ──
+sysctl net.core.rmem_max                 # socket 接收缓冲最大值（默认 212992）
+sysctl net.core.wmem_max                 # socket 发送缓冲最大值
+sysctl net.ipv4.tcp_rmem                 # TCP 接收缓冲：min default max
+sysctl net.ipv4.tcp_wmem                 # TCP 发送缓冲：min default max
+# 高吞吐场景推荐：
+sysctl -w net.core.rmem_max=134217728    # 128MB
+sysctl -w net.ipv4.tcp_rmem="4096 87380 134217728"
+
+# ── 拥塞控制 ──
+sysctl net.ipv4.tcp_congestion_control   # 当前算法（cubic/bbr/reno）
+sysctl net.ipv4.tcp_available_congestion_control  # 可用算法列表
+# 开启 BBR（Linux 4.9+，高带宽高延迟网络效果好）：
+echo bbr > /proc/sys/net/ipv4/tcp_congestion_control
+sysctl net.core.default_qdisc=fq        # BBR 需要配合 fq 队列规则
+
+# ── TIME_WAIT ──
+sysctl net.ipv4.tcp_tw_reuse             # 允许复用 TIME_WAIT socket（默认 0）
+sysctl net.ipv4.tcp_max_tw_buckets       # TIME_WAIT 连接最大数（默认 131072）
+
+# ── ECMP / 多路径 ──
+sysctl net.ipv4.fib_multipath_hash_policy=1  # 按 L4（src/dst port）hash
+```
diff --git "a/09-\345\220\214\346\255\245\346\234\272\345\210\266/README.md" "b/09-\345\220\214\346\255\245\346\234\272\345\210\266/README.md"
index ce08e3d..1e6720a 100644
--- "a/09-\345\220\214\346\255\245\346\234\272\345\210\266/README.md"
+++ "b/09-\345\220\214\346\255\245\346\234\272\345\210\266/README.md"
@@ -4,6 +4,8 @@
 > 本章从**为什么需要同步 → 各种锁的实现原理 → 使用场景**，
 > 对照 Linux 0.11（单处理器，禁中断）与 Linux 2.6.0（SMP，丰富锁原语）拆解。
 
+![内核同步机制全景](../assets/diagrams/sync-map.svg)
+
 ---
 
 ## 1. 并发场景与竞争条件
@@ -359,3 +361,338 @@ perf lock report
 > 2. 中断上下文用自旋锁
 > 3. 进程上下文且临界区短用自旋锁，长用互斥量
 > 4. 读远多于写用 RCU 或读写锁
+
+---
+
+## 9. 内存排序（Memory Ordering）
+
+### 9.1 TSO（Total Store Ordering）vs 弱内存序
+
+```
+不同 CPU 架构对内存操作的排序保证不同：
+
+x86/x86_64（TSO 模型）：
+  · store-store 有序（写-写不会乱序）
+  · load-load  有序（读-读不会乱序）
+  · load-store 有序
+  · 但：store-load 可能乱序！（这是 TSO 允许的唯一乱序）
+  · 因此：x86 上大多数场景不需要内存屏障
+
+ARM（弱内存序模型）：
+  · 所有类型的操作都可能乱序（load-load, store-store, load-store, store-load）
+  · 必须显式使用屏障指令（DMB, DSB, ISB）
+  · 后果：ARM 上的并发代码比 x86 需要更多 barrier
+
+示例：经典的"消息传递"模式
+  Thread 1 (writer)         Thread 2 (reader)
+  ─────────────────         ──────────────────
+  data = 42;                while (flag == 0) ;   /* 等待 */
+  flag = 1;                 use(data);             /* 读数据 */
+
+  在 x86 上：可能因 store-load 乱序导致 Thread 2 看到 flag=1 但 data 还是 0！
+  解决：在 flag=1 前加 smp_mb()（写屏障）
+```
+
+### 9.2 Linux 内核内存屏障 API
+
+```c
+/* 完整屏障（双向）：之前的 load/store 不会延迟到之后 */
+smp_mb()        /* SMP 内存屏障（单核无效果）*/
+mb()            /* 全内存屏障（含 I/O 内存）*/
+
+/* 写屏障：之前的所有 store 在此点前对其他 CPU 可见 */
+smp_wmb()       /* SMP 写屏障 */
+wmb()           /* 写屏障（含 I/O）*/
+
+/* 读屏障：确保之后的 load 看到屏障之前其他 CPU 的 store */
+smp_rmb()       /* SMP 读屏障 */
+rmb()           /* 读屏障（含 I/O）*/
+
+/* 编译器屏障：仅防止编译器重排，不生成 CPU 指令 */
+barrier()
+
+/* 带 acquire/release 语义的原子操作（Linux 4.x+）*/
+smp_load_acquire(ptr)    /* load + 隐含读屏障（之后的访问不会提前）*/
+smp_store_release(ptr, v)/* store + 隐含写屏障（之前的访问不会延后）*/
+
+/* x86 上 smp_mb() 的实现（注意实际的开销）*/
+/* x86:  lock; addl $0,0(%rsp)  或  mfence */
+/* ARM:  dmb ish（inner shareable domain barrier）*/
+
+/* 正确的"消息传递"模式 */
+/* 写者 */
+WRITE_ONCE(data, 42);
+smp_wmb();              /* 确保 data 写入先于 flag 写入 */
+WRITE_ONCE(flag, 1);
+
+/* 读者 */
+while (!READ_ONCE(flag))
+    cpu_relax();
+smp_rmb();              /* 确保看到 flag=1 后再读 data */
+val = READ_ONCE(data);  /* 保证是 42 */
+```
+
+---
+
+## 10. RCU 深度剖析
+
+### 10.1 宽限期（Grace Period）机制
+
+```
+RCU 的核心保证：
+  在宽限期结束后，所有在宽限期开始前就已存在的 RCU 读者都已完成。
+
+如何判断宽限期结束？
+  · Classic RCU（UP/树形 RCU）：
+    每个 CPU 经历一次上下文切换（quiescent state）→ 宽限期结束
+    因为：RCU 读临界区不能睡眠，上下文切换意味着退出了临界区
+
+  · SRCU（Sleepable RCU）：
+    允许读者在临界区内休眠
+    使用计数器（而非上下文切换）判断宽限期
+
+宽限期时间线：
+  T0: writer 调用 synchronize_rcu() 或 call_rcu()
+  T1: 内核开始监视所有 CPU 的 quiescent state
+  T2: CPU0 发生上下文切换（确认退出临界区）
+  T3: CPU1 发生上下文切换
+  T4: ... 所有 CPU 都经历过至少一次上下文切换
+  T5: 宽限期结束，synchronize_rcu() 返回 / call_rcu 回调被调用
+      → 现在可以安全释放旧数据
+```
+
+### 10.2 完整的 RCU 删除操作
+
+```c
+struct my_node {
+    int data;
+    struct list_head list;
+    struct rcu_head rcu;    /* 用于 call_rcu() 的回调链接 */
+};
+
+static LIST_HEAD(my_list);
+static DEFINE_SPINLOCK(list_lock);
+
+/* 读者：无锁遍历 */
+void read_data(void)
+{
+    struct my_node *node;
+
+    rcu_read_lock();   /* 禁止抢占（但不阻塞中断）*/
+    list_for_each_entry_rcu(node, &my_list, list) {
+        /* 使用 node->data，可以睡眠吗？不行！
+           classic RCU：rcu_read_lock 区间内不能睡眠 */
+        process(node->data);
+    }
+    rcu_read_unlock();  /* 允许抢占，标记退出临界区 */
+}
+
+/* 写者方式一：同步等待（调用者可以阻塞）*/
+void delete_sync(struct my_node *node)
+{
+    spin_lock(&list_lock);
+    list_del_rcu(&node->list);  /* 从链表删除（非原子，需持锁）*/
+    spin_unlock(&list_lock);
+
+    synchronize_rcu();  /* 阻塞，直到宽限期结束 */
+    kfree(node);        /* 安全释放 */
+}
+
+/* 写者方式二：异步回调（调用者不阻塞，适合中断上下文）*/
+static void my_node_free(struct rcu_head *rcu)
+{
+    struct my_node *node = container_of(rcu, struct my_node, rcu);
+    kfree(node);
+}
+
+void delete_async(struct my_node *node)
+{
+    spin_lock(&list_lock);
+    list_del_rcu(&node->list);
+    spin_unlock(&list_lock);
+
+    call_rcu(&node->rcu, my_node_free);  /* 宽限期后异步调用 */
+    /* 立即返回，不等待 */
+}
+
+/* 更新（修改链表中的节点值）*/
+void update_node(struct my_node *old_node, int new_data)
+{
+    struct my_node *new_node = kmalloc(sizeof(*new_node), GFP_KERNEL);
+    *new_node = *old_node;        /* 复制旧节点 */
+    new_node->data = new_data;    /* 修改副本 */
+
+    spin_lock(&list_lock);
+    /* 原子替换：先插入新节点，再删除旧节点 */
+    list_replace_rcu(&old_node->list, &new_node->list);
+    spin_unlock(&list_lock);
+
+    call_rcu(&old_node->rcu, my_node_free);
+}
+```
+
+### 10.3 内核中 RCU 的实际应用
+
+```
+task_struct 访问（进程链表）：
+  for_each_process_thread() 使用 RCU 遍历
+  → 遍历时无需持锁，极高效
+
+网络路由表：
+  fib_lookup() 在 rcu_read_lock() 保护下查路由
+  → 路由更新不会阻塞正在查找的数据包
+
+模块引用计数：
+  try_module_get() 使用 RCU 保护，防止模块卸载竞争
+
+文件系统 dcache：
+  __d_lookup_rcu() 无锁读取目录项缓存
+  → 路径查找的热路径性能极关键
+```
+
+---
+
+## 11. Lock-Free 数据结构（基于 RCU）
+
+```c
+/* 内核中的 RCU 保护链表（无锁读，有锁写）*/
+#include <linux/rculist.h>
+
+/* 无锁读者遍历（O(n)，无任何竞争）*/
+rcu_read_lock();
+list_for_each_entry_rcu(pos, head, member) {
+    /* ... */
+}
+rcu_read_unlock();
+
+/* 有锁写者 */
+spin_lock(&my_lock);
+list_add_rcu(&new->list, head);     /* 添加（rcu_assign_pointer 语义）*/
+list_del_rcu(&entry->list);         /* 删除（不立即释放！）*/
+spin_unlock(&my_lock);
+
+/* RCU 保护的哈希表（hlist）*/
+hlist_for_each_entry_rcu(pos, head, member) { ... }
+hlist_add_head_rcu(&new->node, head);
+hlist_del_rcu(&entry->node);
+
+/* CAS（Compare-And-Swap）原子操作实现 lock-free 结构 */
+/* 在内核中通过 cmpxchg() 实现 */
+old_val = READ_ONCE(*ptr);
+do {
+    new_val = compute_new(old_val);
+} while (cmpxchg(ptr, old_val, new_val) != old_val);
+/* 适合简单的计数器更新，不适合复杂数据结构 */
+```
+
+---
+
+## 12. futex 内部机制
+
+```c
+/* futex（Fast Userspace muTEX）：用户态的高效互斥锁 */
+
+/* 基本原理：
+   1. 无竞争时：完全在用户态用原子 CAS 完成（无系统调用）
+   2. 有竞争时：才陷入内核等待（系统调用开销只在真正竞争时发生）*/
+
+/* 用户态操作（glibc pthread_mutex_lock 简化版）*/
+static int futex_val = 1;  /* 1=解锁, 0=锁定, -1=锁定且有等待者 */
+
+void mutex_lock(int *uaddr)
+{
+    int c;
+    /* 尝试 CAS: 1 → 0（无竞争，纯用户态）*/
+    if ((c = cmpxchg(uaddr, 1, 0)) == 0)
+        return;  /* 成功获取锁，无系统调用！ */
+
+    /* 有竞争：陷入内核等待 */
+    if (c != -1)
+        c = xchg(uaddr, -1);  /* 标记有等待者：0/-1 → -1 */
+
+    while (c != 0) {
+        /* 系统调用：让当前线程进入 futex 等待队列 */
+        syscall(SYS_futex, uaddr, FUTEX_WAIT_PRIVATE, -1, NULL);
+        c = xchg(uaddr, -1);
+    }
+}
+
+void mutex_unlock(int *uaddr)
+{
+    /* 原子设为 1（解锁）*/
+    if (atomic_dec_and_fetch(uaddr) != 0) {
+        /* 有等待者（值为 -1），唤醒一个 */
+        WRITE_ONCE(*uaddr, 1);
+        /* 系统调用：唤醒 futex 等待队列中的一个线程 */
+        syscall(SYS_futex, uaddr, FUTEX_WAKE_PRIVATE, 1, NULL);
+    }
+}
+```
+
+**内核 futex 实现（kernel/futex/）**：
+
+```
+FUTEX_WAIT 系统调用路径：
+  sys_futex() → futex_wait()
+    1. 计算 hash：futex_hash_bucket(uaddr) → 找到 hash 桶
+       （uaddr 物理地址作为 key，防止跨进程共享时虚拟地址冲突）
+    2. 验证 *uaddr == val（原子检查）
+    3. 将当前进程加入 hash 桶的等待队列（struct futex_q）
+    4. 调度出去（schedule()）
+
+FUTEX_WAKE 系统调用路径：
+  sys_futex() → futex_wake()
+    1. 计算相同 hash：找到 hash 桶
+    2. 从等待队列取出 nr_wake 个进程
+    3. wake_up_q() 唤醒它们
+
+Priority Inheritance（优先级继承，pi_futex）：
+  FUTEX_LOCK_PI / FUTEX_UNLOCK_PI
+  防止优先级反转：低优先级持锁时，临时提升其优先级至等待者最高级别
+  rt_mutex 实现：内核维护持有者→等待者优先级继承链
+```
+
+---
+
+## 13. Per-CPU 变量
+
+```c
+/* Per-CPU 变量：每个 CPU 有独立副本，无需加锁 */
+
+/* 定义静态 per-CPU 变量 */
+DEFINE_PER_CPU(int, my_counter);
+DEFINE_PER_CPU(struct my_stats, cpu_stats);
+
+/* 访问 per-CPU 变量（需要禁止内核抢占）*/
+int val;
+
+/* 方式一：get_cpu_var / put_cpu_var（禁止抢占 + 返回当前 CPU 的变量引用）*/
+val = get_cpu_var(my_counter);     /* 禁止抢占，返回当前 CPU 的 my_counter */
+val++;
+put_cpu_var(my_counter);           /* 恢复抢占 */
+
+/* 方式二：this_cpu_* 系列（更快，隐式假设已禁止抢占或中断）*/
+this_cpu_inc(my_counter);          /* 原子 RMW，无需显式禁止抢占 */
+this_cpu_add(my_counter, 5);
+val = this_cpu_read(my_counter);
+
+/* 方式三：per_cpu_ptr（在中断或已禁抢占的上下文中）*/
+preempt_disable();
+int *ptr = this_cpu_ptr(&my_counter);
+(*ptr)++;
+preempt_enable();
+
+/* 跨 CPU 读取（读者需注意：值可能在读取过程中被其他 CPU 修改）*/
+for_each_possible_cpu(cpu) {
+    total += per_cpu(my_counter, cpu);
+}
+/* 要精确的跨 CPU 总和，需要 synchronize_rcu() 后再读取 */
+
+/* 应用场景 */
+/* 网络统计（net/core/net-procfs.c）*/
+DEFINE_PER_CPU(struct softnet_data, softnet_data);
+/* 内存分配（mm/percpu.c）*/
+DEFINE_PER_CPU_ALIGNED(struct pcpu_freelist, pcpu_freelist);
+/* 调度统计（kernel/sched/stats.h）*/
+DEFINE_PER_CPU(struct sched_info, cpu_sched_info);
+```

From c286d4fb1a834c9b0c546b9819e105ba8032c59b Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 20 May 2026 12:34:47 +0000
Subject: [PATCH 10/10] docs: add 5 SVG diagrams, expert README.md for ch10-15,
 update ch00-09 with SVG refs and deeper content

Agent-Logs-Url: https://github.com/YYCB/how_to_learn_linux/sessions/265a578b-5d26-408c-934d-e4176267d506

Co-authored-by: YYCB <23326150+YYCB@users.noreply.github.com>
---
 .../README.md"                                |   6 +-
 .../index.html"                               |   2 +
 .../README.md"                                |  10 +-
 .../index.html"                               |   2 +-
 README.md                                     |  46 ++++----
 assets/diagrams/debug-tools.svg               | 108 ++++++++++++++++++
 assets/style.css                              |   4 +
 7 files changed, 149 insertions(+), 29 deletions(-)
 create mode 100644 assets/diagrams/debug-tools.svg

diff --git "a/14-\345\220\257\345\212\250\346\265\201\347\250\213\346\267\261\345\205\245/README.md" "b/14-\345\220\257\345\212\250\346\265\201\347\250\213\346\267\261\345\205\245/README.md"
index 14dce2c..cb8d690 100644
--- "a/14-\345\220\257\345\212\250\346\265\201\347\250\213\346\267\261\345\205\245/README.md"
+++ "b/14-\345\220\257\345\212\250\346\265\201\347\250\213\346\267\261\345\205\245/README.md"
@@ -1068,8 +1068,10 @@ cat /etc/kdump.conf
 systemctl enable kdump
 systemctl start kdump
 
-# 5. 测试触发崩溃（!!! 生产环境慎用 !!!）
-echo c > /proc/sysrq-trigger  # 强制崩溃
+# 5. 测试触发崩溃
+# ⚠️  危险操作：以下命令会**立即强制崩溃系统**，生产环境绝对禁用！
+# 仅在专用测试机/虚拟机中执行，确认已正确配置 kdump 后再操作
+echo c > /proc/sysrq-trigger  # 强制 kernel panic → 触发 kdump 捕获
 
 # 6. 分析 crash dump
 crash /usr/lib/debug/boot/vmlinux-6.1.0 \
diff --git "a/14-\345\220\257\345\212\250\346\265\201\347\250\213\346\267\261\345\205\245/index.html" "b/14-\345\220\257\345\212\250\346\265\201\347\250\213\346\267\261\345\205\245/index.html"
index e6c513e..b256689 100644
--- "a/14-\345\220\257\345\212\250\346\265\201\347\250\213\346\267\261\345\205\245/index.html"
+++ "b/14-\345\220\257\345\212\250\346\265\201\347\250\213\346\267\261\345\205\245/index.html"
@@ -129,6 +129,8 @@ <h2 id="uefi">14.2 UEFI vs Legacy BIOS</h2>
 <h3>EFI Stub — 不需要 GRUB 也能启动</h3>
 <p>现代 Linux 内核内嵌 "EFI stub"，本身就是一个 EFI 应用，可被 UEFI 固件直接执行：</p>
 <pre class="code-bash"><span class="cm"># 把 vmlinuz 复制到 ESP，UEFI 启动菜单加一条即可，无需 GRUB</span>
+<span class="cm"># 前提：先确认 ESP 目录存在</span>
+mkdir -p /boot/efi/EFI/Linux/
 cp /boot/vmlinuz /boot/efi/EFI/Linux/
 efibootmgr -c -d /dev/nvme0n1 -p <span class="num">1</span> -L <span class="str">"Linux"</span> -l <span class="str">"/EFI/Linux/vmlinuz"</span> \\
     -u <span class="str">"root=/dev/nvme0n1p2 ro initrd=\\EFI\\Linux\\initrd"</span>
diff --git "a/15-\345\206\205\346\240\270\350\260\203\350\257\225\344\270\216\346\200\247\350\203\275/README.md" "b/15-\345\206\205\346\240\270\350\260\203\350\257\225\344\270\216\346\200\247\350\203\275/README.md"
index 0638a6a..f641a2a 100644
--- "a/15-\345\206\205\346\240\270\350\260\203\350\257\225\344\270\216\346\200\247\350\203\275/README.md"
+++ "b/15-\345\206\205\346\240\270\350\260\203\350\257\225\344\270\216\346\200\247\350\203\275/README.md"
@@ -3,6 +3,8 @@
 > **学习目标**：掌握 Linux 内核调试与性能分析的完整工具链，从 printk 到 eBPF，
 > 从静态分析到运行时 crash dump，能够定位内核 bug、量化性能瓶颈并实施优化。
 
+![内核调试与性能分析工具全景](../assets/diagrams/debug-tools.svg)
+
 ---
 
 ## 目录
@@ -578,11 +580,11 @@ perf script > after.perf
 ### 配置与原理
 
 ```bash
-# 内核配置（GENERIC 与 HW_TAGS 互斥，二选一）
+# 内核配置（GENERIC 与 HW_TAGS 互斥，二选一，不可同时启用）
 CONFIG_KASAN=y
-CONFIG_KASAN_GENERIC=y        # 软件实现（所有架构）
-# 或（二选一，不可同时启用）
-CONFIG_KASAN_HW_TAGS=y        # 硬件实现（ARM MTE，低开销，需要 ARMv8.5+）
+CONFIG_KASAN_GENERIC=y        # (选项A) 软件实现（所有架构均可，推荐开发调试用）
+# 或（二选一）
+CONFIG_KASAN_HW_TAGS=y        # (选项B) 硬件实现（仅 ARM MTE/ARMv8.5+，约1.1-1.2x开销）
 
 # 开销：
 # - 内存：每8字节对应1字节 shadow（内存×2）
diff --git "a/15-\345\206\205\346\240\270\350\260\203\350\257\225\344\270\216\346\200\247\350\203\275/index.html" "b/15-\345\206\205\346\240\270\350\260\203\350\257\225\344\270\216\346\200\247\350\203\275/index.html"
index 97bf9ee..7f954a4 100644
--- "a/15-\345\206\205\346\240\270\350\260\203\350\257\225\344\270\216\346\200\247\350\203\275/index.html"
+++ "b/15-\345\206\205\346\240\270\350\260\203\350\257\225\344\270\216\346\200\247\350\203\275/index.html"
@@ -130,7 +130,7 @@ <h2 id="perf">15.4 perf — 性能分析瑞士军刀</h2>
 <pre class="code-bash"><span class="cm"># 实时看哪些函数 CPU 占用最高 (top)</span>
 sudo perf top
 
-<span class="cm"># 录制 30 秒采样</span>
+<span class="cm"># 录制 30 秒采样 (99Hz: 质数避免与周期性事件同步)</span>
 sudo perf record -F <span class="num">99</span> -a -g -- sleep <span class="num">30</span>
 sudo perf report                   <span class="cm"># 交互式 TUI</span>
 
diff --git a/README.md b/README.md
index 87c27a4..790e53a 100644
--- a/README.md
+++ b/README.md
@@ -23,33 +23,35 @@
 
 ## 📚 目录
 
+> 每章均提供两种格式：点击章节名在线阅读 **Markdown**（GitHub 直接渲染），或用本地浏览器打开 `index.html` 获得完整 SVG 图表体验。
+
 ### 入门 & 准备
-| # | 章节 | 核心内容 |
-|---|------|----------|
-| [00](./00-学习路线/index.html) | **学习路线** | 阶段规划、时间表、推荐资源 |
-| [01](./01-经典版本选择/index.html) | **经典版本选择** | 0.11 / 2.6.0 / 5.x / 6.x 对比 |
-| [02](./02-环境搭建/index.html) | **环境搭建** | QEMU + GDB + clangd + ftrace + perf |
+| # | 章节 | 核心内容 | SVG 图表 |
+|---|------|----------|---------|
+| [00](./00-学习路线/README.md) | **学习路线** | 阶段规划、时间表、书单、20道自测题 | 架构总览 |
+| [01](./01-经典版本选择/README.md) | **经典版本选择** | 0.11/1.0/2.4/2.6/3.10/4.19/5.15/6.1 全景对比 | 架构总览 |
+| [02](./02-环境搭建/README.md) | **环境搭建** | QEMU + GDB + clangd + ftrace + perf + kdump | — |
 
 ### 核心子系统
-| # | 章节 | 核心内容 |
-|---|------|----------|
-| [03](./03-进程管理/index.html) | **进程管理** | task_struct、fork/CoW、上下文切换、状态机 |
-| [04](./04-内存管理/index.html) | **内存管理** | 多级页表、Buddy、Slab、NUMA、OOM、THP |
-| [05](./05-文件系统/index.html) | **文件系统** | VFS 四对象、Page Cache、ext4、io_uring |
-| [06](./06-系统调用/index.html) | **系统调用** | entry_SYSCALL_64、vDSO、seccomp、自定义 syscall |
-| [07](./07-设备驱动/index.html) | **设备驱动** | 完整 char driver、设备树、中断上下半部 |
-| [08](./08-网络子系统/index.html) | **网络子系统** | sk_buff、TCP 状态机、netfilter、XDP |
-| [09](./09-同步机制/index.html) | **同步机制** | atomic/spinlock/mutex/RCU/percpu/futex/lockdep |
+| # | 章节 | 核心内容 | SVG 图表 |
+|---|------|----------|---------|
+| [03](./03-进程管理/README.md) | **进程管理** | task_struct、fork/CoW、上下文切换汇编、状态机 | 虚拟地址空间布局 |
+| [04](./04-内存管理/README.md) | **内存管理** | 多级页表、Buddy/Slab/SLUB、NUMA、kswapd、OOM、THP | x86_64 四级页表 |
+| [05](./05-文件系统/README.md) | **文件系统** | VFS 四对象、Page Cache 回写、ext4 journal、io_uring | VFS 对象模型 |
+| [06](./06-系统调用/README.md) | **系统调用** | entry_SYSCALL_64 汇编、vDSO、seccomp BPF、新增 syscall | syscall 路径图 |
+| [07](./07-设备驱动/README.md) | **设备驱动** | 完整字符驱动、kobject/sysfs、设备树、MSI/DMA | 设备模型层次 |
+| [08](./08-网络子系统/README.md) | **网络子系统** | sk_buff、收包路径、TCP 状态机、netfilter 5hook、XDP | TCP 握手图 |
+| [09](./09-同步机制/README.md) | **同步机制** | atomic/spinlock/mutex/seqlock/RCU/percpu/futex/lockdep | 同步机制全景 |
 
 ### 专家级深入
-| # | 章节 | 核心内容 |
-|---|------|----------|
-| [10](./10-CFS调度器/index.html) | **CFS 调度器** | vruntime、红黑树、调度类、EAS、cgroup 调度 |
-| [11](./11-容器与命名空间/index.html) | **容器与命名空间** | 8 种 NS、cgroups v2、OverlayFS、75 行 mini-docker |
-| [12](./12-eBPF与可观测性/index.html) | **eBPF 与可观测性** | Verifier/JIT/Maps、kprobe/XDP/tc、bpftrace、Cilium |
-| [13](./13-中断与异常/index.html) | **中断与异常** | IDT、APIC、softirq/workqueue/threaded IRQ、IPI |
-| [14](./14-启动流程深入/index.html) | **启动流程** | UEFI → GRUB → initramfs → start_kernel → systemd |
-| [15](./15-内核调试与性能/index.html) | **内核调试与性能** | ftrace/perf/KASAN/lockdep/bpftrace/kdump/livepatch |
+| # | 章节 | 核心内容 | SVG 图表 |
+|---|------|----------|---------|
+| [10](./10-CFS调度器/README.md) | **CFS 调度器** | vruntime 公式、红黑树、5调度类、EAS、cgroup 层次调度 | CFS 红黑树 |
+| [11](./11-容器与命名空间/README.md) | **容器与命名空间** | 8 种 NS、cgroups v2、OverlayFS、seccomp、mini-docker | 容器内部结构 |
+| [12](./12-eBPF与可观测性/README.md) | **eBPF 与可观测性** | Verifier/JIT/Maps、XDP、CO-RE、bpftrace、Cilium/Falco | eBPF 完整架构 |
+| [13](./13-中断与异常/README.md) | **中断与异常** | IDT/APIC/MSI、softirq/workqueue/threaded IRQ、IPI、hrtimer | 中断处理路径 |
+| [14](./14-启动流程深入/README.md) | **启动流程深入** | BIOS/UEFI、GRUB2、解压、head_64.S、start_kernel()、systemd | 启动全流程图 |
+| [15](./15-内核调试与性能/README.md) | **内核调试与性能** | ftrace/perf/FlameGraph/KASAN/lockdep/kdump+crash/livepatch | 调试工具全景 |
 
 ---
 
diff --git a/assets/diagrams/debug-tools.svg b/assets/diagrams/debug-tools.svg
new file mode 100644
index 0000000..b650134
--- /dev/null
+++ b/assets/diagrams/debug-tools.svg
@@ -0,0 +1,108 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 900 500" font-family="-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif" font-size="12">
+  <rect width="900" height="500" fill="#0d1117"/>
+  <text x="450" y="28" text-anchor="middle" font-size="17" font-weight="700" fill="#ff7b29">内核调试与性能分析工具全景</text>
+
+  <defs>
+    <marker id="a" markerWidth="7" markerHeight="7" refX="5" refY="3" orient="auto">
+      <path d="M0,0 L0,6 L7,3 z" fill="#8b949e"/>
+    </marker>
+  </defs>
+
+  <!-- Category: 静态分析 -->
+  <rect x="20" y="45" width="190" height="180" rx="6" fill="#161b22" stroke="#f85149" stroke-width="1.5"/>
+  <text x="115" y="65" text-anchor="middle" fill="#f85149" font-weight="700" font-size="12">🔴 静态检查</text>
+  <text x="115" y="83" text-anchor="middle" fill="#8b949e" font-size="10">KASAN — 内存越界/UAF</text>
+  <text x="115" y="99" text-anchor="middle" fill="#8b949e" font-size="10">KFENCE — 低开销内存检测</text>
+  <text x="115" y="115" text-anchor="middle" fill="#8b949e" font-size="10">KMSAN — 未初始化内存</text>
+  <text x="115" y="131" text-anchor="middle" fill="#8b949e" font-size="10">UBSAN — 未定义行为</text>
+  <text x="115" y="147" text-anchor="middle" fill="#8b949e" font-size="10">lockdep — 死锁检测</text>
+  <text x="115" y="163" text-anchor="middle" fill="#8b949e" font-size="10">KCOV — 覆盖率(syzkaller)</text>
+  <text x="115" y="179" text-anchor="middle" fill="#8b949e" font-size="10">sparse/smatch — 语义分析</text>
+  <text x="115" y="196" text-anchor="middle" fill="#56d364" font-size="9">编译时开启，测试环境使用</text>
+
+  <!-- Category: 动态跟踪 -->
+  <rect x="225" y="45" width="200" height="180" rx="6" fill="#161b22" stroke="#e3b341" stroke-width="1.5"/>
+  <text x="325" y="65" text-anchor="middle" fill="#e3b341" font-weight="700" font-size="12">🟡 动态跟踪</text>
+  <text x="325" y="83" text-anchor="middle" fill="#8b949e" font-size="10">ftrace — 函数/图形/事件跟踪</text>
+  <text x="325" y="99" text-anchor="middle" fill="#8b949e" font-size="10">  set_ftrace_filter</text>
+  <text x="325" y="115" text-anchor="middle" fill="#8b949e" font-size="10">  trace-cmd record/report</text>
+  <text x="325" y="131" text-anchor="middle" fill="#8b949e" font-size="10">kprobes — 任意地址动态探针</text>
+  <text x="325" y="147" text-anchor="middle" fill="#8b949e" font-size="10">uprobes — 用户态函数探针</text>
+  <text x="325" y="163" text-anchor="middle" fill="#8b949e" font-size="10">tracepoints — 静态稳定探针</text>
+  <text x="325" y="179" text-anchor="middle" fill="#8b949e" font-size="10">perf probe — 动态事件</text>
+  <text x="325" y="196" text-anchor="middle" fill="#56d364" font-size="9">生产可用，低开销</text>
+
+  <!-- Category: 性能分析 -->
+  <rect x="440" y="45" width="200" height="180" rx="6" fill="#161b22" stroke="#56d364" stroke-width="1.5"/>
+  <text x="540" y="65" text-anchor="middle" fill="#56d364" font-weight="700" font-size="12">🟢 性能分析</text>
+  <text x="540" y="83" text-anchor="middle" fill="#8b949e" font-size="10">perf stat — 硬件计数器</text>
+  <text x="540" y="99" text-anchor="middle" fill="#8b949e" font-size="10">perf record/report — 热点</text>
+  <text x="540" y="115" text-anchor="middle" fill="#8b949e" font-size="10">FlameGraph — 火焰图</text>
+  <text x="540" y="131" text-anchor="middle" fill="#8b949e" font-size="10">perf sched — 调度延迟</text>
+  <text x="540" y="147" text-anchor="middle" fill="#8b949e" font-size="10">perf mem — 内存访问</text>
+  <text x="540" y="163" text-anchor="middle" fill="#8b949e" font-size="10">Intel VTune / AMD uProf</text>
+  <text x="540" y="179" text-anchor="middle" fill="#8b949e" font-size="10">BPF profiler (bpftrace)</text>
+  <text x="540" y="196" text-anchor="middle" fill="#56d364" font-size="9">CPU/内存/IO/调度全维度</text>
+
+  <!-- Category: 崩溃分析 -->
+  <rect x="655" y="45" width="225" height="180" rx="6" fill="#161b22" stroke="#58a6ff" stroke-width="1.5"/>
+  <text x="767" y="65" text-anchor="middle" fill="#58a6ff" font-weight="700" font-size="12">🔵 崩溃分析</text>
+  <text x="767" y="83" text-anchor="middle" fill="#8b949e" font-size="10">kdump — 捕获崩溃内存快照</text>
+  <text x="767" y="99" text-anchor="middle" fill="#8b949e" font-size="10">crash — 分析vmcore文件</text>
+  <text x="767" y="115" text-anchor="middle" fill="#8b949e" font-size="10">  bt: 调用栈  ps: 进程列表</text>
+  <text x="767" y="131" text-anchor="middle" fill="#8b949e" font-size="10">  vm: 内存  log: 日志</text>
+  <text x="767" y="147" text-anchor="middle" fill="#8b949e" font-size="10">GDB + QEMU — 源码级调试</text>
+  <text x="767" y="163" text-anchor="middle" fill="#8b949e" font-size="10">KGDB — 实机远程调试</text>
+  <text x="767" y="179" text-anchor="middle" fill="#8b949e" font-size="10">串口控制台 earlycon</text>
+  <text x="767" y="196" text-anchor="middle" fill="#56d364" font-size="9">故障复现和根因定位</text>
+
+  <!-- eBPF / observability row -->
+  <rect x="20" y="250" width="860" height="100" rx="6" fill="#161b22" stroke="#8957e5" stroke-width="1.5"/>
+  <text x="450" y="270" text-anchor="middle" fill="#8957e5" font-weight="700" font-size="12">🟣 eBPF 可观测性工具生态</text>
+
+  <rect x="35" y="280" width="100" height="55" rx="4" fill="#0d1117" stroke="#8957e5"/>
+  <text x="85" y="298" text-anchor="middle" fill="#8957e5" font-size="10">bpftrace</text>
+  <text x="85" y="313" text-anchor="middle" fill="#8b949e" font-size="9">单行脚本</text>
+  <text x="85" y="326" text-anchor="middle" fill="#8b949e" font-size="9">快速原型</text>
+
+  <rect x="145" y="280" width="100" height="55" rx="4" fill="#0d1117" stroke="#8957e5"/>
+  <text x="195" y="298" text-anchor="middle" fill="#8957e5" font-size="10">BCC 工具集</text>
+  <text x="195" y="313" text-anchor="middle" fill="#8b949e" font-size="9">execsnoop</text>
+  <text x="195" y="326" text-anchor="middle" fill="#8b949e" font-size="9">opensnoop/biolatency</text>
+
+  <rect x="255" y="280" width="100" height="55" rx="4" fill="#0d1117" stroke="#8957e5"/>
+  <text x="305" y="298" text-anchor="middle" fill="#8957e5" font-size="10">libbpf/CO-RE</text>
+  <text x="305" y="313" text-anchor="middle" fill="#8b949e" font-size="9">可移植程序</text>
+  <text x="305" y="326" text-anchor="middle" fill="#8b949e" font-size="9">生产部署</text>
+
+  <rect x="365" y="280" width="100" height="55" rx="4" fill="#0d1117" stroke="#8957e5"/>
+  <text x="415" y="298" text-anchor="middle" fill="#8957e5" font-size="10">Cilium</text>
+  <text x="415" y="313" text-anchor="middle" fill="#8b949e" font-size="9">K8s 网络</text>
+  <text x="415" y="326" text-anchor="middle" fill="#8b949e" font-size="9">eBPF 数据平面</text>
+
+  <rect x="475" y="280" width="100" height="55" rx="4" fill="#0d1117" stroke="#8957e5"/>
+  <text x="525" y="298" text-anchor="middle" fill="#8957e5" font-size="10">Falco</text>
+  <text x="525" y="313" text-anchor="middle" fill="#8b949e" font-size="9">安全检测</text>
+  <text x="525" y="326" text-anchor="middle" fill="#8b949e" font-size="9">运行时威胁</text>
+
+  <rect x="585" y="280" width="100" height="55" rx="4" fill="#0d1117" stroke="#8957e5"/>
+  <text x="635" y="298" text-anchor="middle" fill="#8957e5" font-size="10">Pixie</text>
+  <text x="635" y="313" text-anchor="middle" fill="#8b949e" font-size="9">全链路追踪</text>
+  <text x="635" y="326" text-anchor="middle" fill="#8b949e" font-size="9">自动采集</text>
+
+  <rect x="695" y="280" width="100" height="55" rx="4" fill="#0d1117" stroke="#8957e5"/>
+  <text x="745" y="298" text-anchor="middle" fill="#8957e5" font-size="10">bpftool</text>
+  <text x="745" y="313" text-anchor="middle" fill="#8b949e" font-size="9">程序/Map管理</text>
+  <text x="745" y="326" text-anchor="middle" fill="#8b949e" font-size="9">内核内置</text>
+
+  <!-- Overhead guide -->
+  <rect x="20" y="368" width="860" height="115" rx="6" fill="#0d1117" stroke="#30363d"/>
+  <text x="450" y="388" text-anchor="middle" fill="#8b949e" font-weight="600" font-size="11">工具开销与适用场景速查</text>
+
+  <text x="40" y="408" fill="#f85149" font-size="10">★★★ 高开销 (仅开发/测试):</text>
+  <text x="40" y="425" fill="#8b949e" font-size="10">KASAN(1.5-2x) / KMSAN(3x) / lockdep(1.1-1.3x) / KCOV(5x) / kdump(保留内存128MB+)</text>
+  <text x="40" y="443" fill="#e3b341" font-size="10">★★ 中等 (测试+灰度):</text>
+  <text x="40" y="460" fill="#8b949e" font-size="10">ftrace function tracer(-5%~15%) / perf record(-2%~5%) / kprobes(动态) / KFENCE(&lt;1%)</text>
+  <text x="40" y="478" fill="#56d364" font-size="10">★ 低开销 (生产可用):</text>
+  <text x="440" y="478" fill="#8b949e" font-size="10">perf stat(&lt;0.1%) / tracepoints / eBPF(&lt;1%) / /proc统计读取</text>
+</svg>
diff --git a/assets/style.css b/assets/style.css
index cd58bc6..27e0a9a 100644
--- a/assets/style.css
+++ b/assets/style.css
@@ -258,6 +258,10 @@ th {
 
 tr:hover td { background: var(--accent-soft); }
 
+@media (prefers-reduced-motion: reduce) {
+    tr:hover td { background: transparent; }
+}
+
 /* ─── Callout boxes ─────────────────────────────────── */
 .callout {
     border-left: 4px solid var(--accent-2);