From 698931daa6999d3a2ee450abfca70001e135b2d6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 29 Mar 2026 23:18:28 +0000 Subject: [PATCH 1/3] Initial plan From 7bb62fc53fc10146a22e1f7e7705e0300d93757d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 29 Mar 2026 23:32:20 +0000 Subject: [PATCH 2/3] Implement Technical Deep Dives: allocators, lock-free, DMA, IRQ, error handling Agent-Logs-Url: https://github.com/orgito1015/Rust-in-the-Linux-Kernel/sessions/4c59cfad-a102-4eaa-84c6-cfed7e0725a2 Co-authored-by: orgito1015 <86354243+orgito1015@users.noreply.github.com> --- ROADMAP.md | 10 +- docs/09-technical-deep-dives.md | 530 ++++++++++++++++++ research/snippets/INDEX.md | 123 +++- research/snippets/concurrency/lock_free.rs | 423 ++++++++++++++ research/snippets/drivers/dma_operations.rs | 388 +++++++++++++ .../snippets/drivers/interrupt_handler.rs | 425 ++++++++++++++ research/snippets/error/error_propagation.rs | 353 ++++++++++++ research/snippets/memory/memory_allocator.rs | 301 ++++++++++ 8 files changed, 2540 insertions(+), 13 deletions(-) create mode 100644 docs/09-technical-deep-dives.md create mode 100644 research/snippets/concurrency/lock_free.rs create mode 100644 research/snippets/drivers/dma_operations.rs create mode 100644 research/snippets/drivers/interrupt_handler.rs create mode 100644 research/snippets/error/error_propagation.rs create mode 100644 research/snippets/memory/memory_allocator.rs diff --git a/ROADMAP.md b/ROADMAP.md index 0a90959..2a61840 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -55,11 +55,11 @@ Linux kernel. The project spans from foundational knowledge through advanced ana ### Technical Deep Dives -- [ ] Memory allocator integration patterns -- [ ] Lock-free data structure implementations -- [ ] DMA and hardware interaction safety -- [ ] Interrupt handler patterns in Rust -- [ ] Error propagation mechanisms +- [x] Memory allocator integration patterns +- [x] Lock-free data structure implementations +- [x] DMA and hardware interaction safety +- [x] Interrupt handler patterns in Rust +- [x] Error propagation mechanisms ### Community & Process diff --git a/docs/09-technical-deep-dives.md b/docs/09-technical-deep-dives.md new file mode 100644 index 0000000..a3bac72 --- /dev/null +++ b/docs/09-technical-deep-dives.md @@ -0,0 +1,530 @@ +# Technical Deep Dives + +This document provides in-depth coverage of the five most important low-level patterns for writing +correct and safe Rust code in the Linux kernel: + +1. [Memory Allocator Integration Patterns](#1-memory-allocator-integration-patterns) +2. [Lock-Free Data Structure Implementations](#2-lock-free-data-structure-implementations) +3. [DMA and Hardware Interaction Safety](#3-dma-and-hardware-interaction-safety) +4. [Interrupt Handler Patterns in Rust](#4-interrupt-handler-patterns-in-rust) +5. [Error Propagation Mechanisms](#5-error-propagation-mechanisms) + +Each section describes the concept, shows the key Rust idioms, and links to the corresponding +runnable code snippet. + +--- + +## 1. Memory Allocator Integration Patterns + +**Code snippet**: [`research/snippets/memory/memory_allocator.rs`](../research/snippets/memory/memory_allocator.rs) + +### Allocator Overview + +### The Kernel Allocators + +| Allocator | API | Use case | +|-----------|-----|----------| +| SLUB/SLAB | `kmalloc` / `kfree` | General objects ≤ 4 MiB | +| Vmalloc | `vmalloc` / `vfree` | Large, non-contiguous buffers | +| Page allocator | `alloc_pages` / `free_pages` | Page-aligned, physically contiguous | +| Slab cache | `kmem_cache_create` / `kmem_cache_alloc` | Many same-sized objects | +| Per-CPU | `alloc_percpu` / `free_percpu` | Per-CPU variables | + +### GFP Flags + +GFP (Get Free Pages) flags control *how* an allocation is performed: + +```rust +// Process context — may sleep and reclaim memory +let buf = Box::try_new([0u8; 128])?; // GFP_KERNEL under the hood + +// Interrupt context — must NOT sleep +// Real code: kmalloc(size, GFP_ATOMIC) +// The kernel crate exposes this via a separate allocator type. +``` + +Key flags: + +- **`GFP_KERNEL`**: Standard; may sleep. Use in process context. +- **`GFP_ATOMIC`**: Non-blocking; may fail. Use in IRQ/softirq context. +- **`GFP_DMA`** / **`GFP_DMA32`**: Restrict to DMA-addressable memory zones. +- **`GFP_NOWAIT`**: Like `GFP_ATOMIC` but without emergency reserves. + +### Rust Mapping + +```rust +// C +void *p = kmalloc(sizeof(struct foo), GFP_KERNEL); +if (!p) return -ENOMEM; +// ... use p ... +kfree(p); + +// Rust equivalent — no manual free needed +let p: Box = Box::try_new(Foo::default())?; +// p is automatically freed when it goes out of scope +``` + +### Slab Caches + +When many objects of the same type are allocated and freed frequently, a dedicated slab cache +(`kmem_cache`) is faster and reduces fragmentation: + +```rust +// Create the cache once at module init +let cache = SlabCache::new(c_str!("my_objects"), size_of::())?; + +// Allocate from it in the hot path +let obj_ptr = cache.alloc()?; + +// Free individual objects back to the cache +// kmem_cache_free(cache, obj_ptr); + +// Cache is destroyed automatically when SlabCache is dropped +``` + +### Guidelines + +- Always use `try_new` / `try_with_capacity` variants — they return `Result` rather than panicking. +- Prefer `Box` / `Vec` from the kernel crate over raw pointer manipulation. +- Use `vmalloc` only when you need large, virtually-contiguous memory and physical + contiguity is not required. +- Use slab caches for hot-path allocations of fixed-size objects. + +--- + +## 2. Lock-Free Data Structure Implementations + +**Code snippet**: [`research/snippets/concurrency/lock_free.rs`](../research/snippets/concurrency/lock_free.rs) + +### Lock-Free Overview +`AtomicUsize`, `AtomicPtr`, and the compare-and-swap (CAS) primitive. + +### Memory Ordering + +Choosing the correct `Ordering` is critical for correctness: + +```rust +use core::sync::atomic::Ordering; + +// Relaxed — atomicity only; no ordering guarantees. +// Good for independent counters. +counter.fetch_add(1, Ordering::Relaxed); + +// Acquire — all subsequent loads/stores happen AFTER this load. +// Used on the reader side of a producer/consumer protocol. +let value = ptr.load(Ordering::Acquire); + +// Release — all preceding loads/stores happen BEFORE this store. +// Used on the writer side. +ptr.store(new_value, Ordering::Release); + +// AcqRel — combined Acquire + Release for read-modify-write operations. +let old = ptr.compare_exchange(expected, new, Ordering::AcqRel, Ordering::Acquire); + +// SeqCst — total sequential consistency; most expensive. +// Use only when multiple independent atomics must be observed in order. +``` + +### Treiber Lock-Free Stack + +The Treiber stack is the canonical lock-free stack. It uses a single `AtomicPtr` for the head +and a CAS loop to push and pop without a mutex: + +```rust +// Push: allocate node, CAS head to point to it +pub fn push(&self, value: T) -> Result<()> { + let node_ptr = Box::into_raw(Box::try_new(Node { value, next: null_mut() })?); + loop { + let old_head = self.head.load(Ordering::Relaxed); + unsafe { (*node_ptr).next = old_head }; + if self.head.compare_exchange_weak(old_head, node_ptr, + Ordering::Release, Ordering::Relaxed).is_ok() { + return Ok(()); + } + } +} + +// Pop: CAS head to point to head->next +pub fn pop(&self) -> Option { + loop { + let head = self.head.load(Ordering::Acquire); + if head.is_null() { return None; } + let next = unsafe { (*head).next }; + if self.head.compare_exchange_weak(head, next, + Ordering::AcqRel, Ordering::Acquire).is_ok() { + let node = unsafe { Box::from_raw(head) }; + return Some(node.value); + } + } +} +``` + +### Atomic Reference Counting + +Rust provides `Arc` in `std`, but the kernel uses a custom implementation backed by +`refcount_t` (which has overflow protection). The key insight is correct memory ordering: + +```rust +// Clone: increment with Relaxed (the Acquire in drop synchronises) +refcount.fetch_add(1, Ordering::Relaxed); + +// Drop: decrement with Release; fence on reaching 0 +if refcount.fetch_sub(1, Ordering::Release) == 1 { + fence(Ordering::Acquire); // See all writes from previous owners + // Now safe to free +} +``` + +### RCU (Read-Copy-Update) + +RCU is the kernel's highest-throughput read-mostly data structure mechanism: + +- **Readers**: use `rcu_read_lock()` / `rcu_read_unlock()` — effectively free on non-preemptible kernels. +- **Writers**: copy the data, update the copy, atomically publish the new pointer with + `rcu_assign_pointer()`, then call `synchronize_rcu()` to wait for all existing readers to finish + before freeing the old copy. + +```rust +// Reader (maps to rcu_dereference + rcu_read_lock/unlock) +protected.read(|value| { + process(value); // Safe read-only access +}); + +// Writer (maps to rcu_assign_pointer + synchronize_rcu + kfree_rcu) +let old_ptr = unsafe { protected.replace(new_value)? }; +// synchronize_rcu() here (waits for all readers) +// unsafe { drop(Box::from_raw(old_ptr)); } +``` + +### When to Use Each Approach + +| Approach | Throughput | Latency | Use case | +|----------|-----------|---------|----------| +| Mutex | Low | Variable | Infrequent writes, complex invariants | +| Spinlock | Medium | Low | Short critical sections, IRQ context | +| Atomic ops | High | Very low | Single-value flags, counters | +| Lock-free stack | High | Low | Producer/consumer, work queues | +| RCU | Very high (reads) | Low (reads) | Read-mostly global data | + +--- + +## 3. DMA and Hardware Interaction Safety + +**Code snippet**: [`research/snippets/drivers/dma_operations.rs`](../research/snippets/drivers/dma_operations.rs) + +### DMA Overview +safety invariants at compile time. + +### DMA Types + +**Coherent (consistent) DMA** + +Memory that is simultaneously coherent for both the CPU and the device. No explicit cache +flushes are needed. More expensive to allocate; use for control structures and descriptor rings. + +```rust +// C +void *cpu_addr = dma_alloc_coherent(dev, size, &dma_addr, GFP_KERNEL); +// ... +dma_free_coherent(dev, size, cpu_addr, dma_addr); + +// Rust (RAII wrapper) +let buf = CoherentDmaBuffer::alloc(size)?; +let dma_addr = buf.dma_addr(); // Program into device registers +// buf is freed automatically via Drop +``` + +**Streaming DMA** + +A CPU buffer is temporarily mapped for a single DMA transfer and then unmapped. Cache sync calls +are required to maintain coherency between CPU and device views of the data: + +```rust +// Before device reads: flush CPU caches +mapping.sync_for_device(); +// Start DMA transfer on hardware +// ... +// After device writes: invalidate stale CPU caches +mapping.sync_for_cpu(); +let data = unsafe { slice::from_raw_parts(cpu_ptr, size) }; +``` + +### DMA Descriptor Rings + +Descriptor rings are the primary mechanism for high-throughput DMA (network, NVMe, GPU drivers): + +```rust +#[repr(C)] // Hardware reads this layout directly +struct DmaDescriptor { + addr: u64, // DMA address of the buffer + len: u32, // Transfer length + flags: u32, // Device-specific control bits +} + +// Verify at compile time that the size matches hardware specification +const _: () = assert!(core::mem::size_of::() == 16); +``` + +### Memory-Mapped I/O (MMIO) + +Hardware registers are accessed through memory-mapped addresses. **Always** use volatile +operations — the compiler must not cache or reorder these accesses: + +```rust +// C +writel(value, base + REGISTER_OFFSET); +u32 val = readl(base + REGISTER_OFFSET); + +// Rust +unsafe { + let ptr = base.add(REGISTER_OFFSET) as *mut u32; + core::ptr::write_volatile(ptr, value); + let val = core::ptr::read_volatile(ptr as *const u32); +} +// In real drivers, prefer iowrite32/ioread32 from the kernel crate. +``` + +### IOMMU and DMA Masks + +Before performing DMA, the driver must declare the maximum DMA address the device can handle: + +```c +// C: Set the device's DMA mask to 64-bit addresses +dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); +``` + +The IOMMU translates device-visible DMA addresses to physical memory addresses, providing: + +- **Address translation**: Devices do not see physical memory addresses directly. +- **Protection**: A buggy device cannot corrupt memory outside its allowed region. +- **Large address spaces**: Even 32-bit devices can address >4 GiB with IOMMU remapping. + +### Common DMA Bugs (and How Rust Prevents Them) + +| Bug | C | Rust mitigation | +|-----|---|-----------------| +| Use-after-free | CPU accesses buffer while device writes | `CoherentDmaBuffer` owned; cannot be freed while borrowed | +| Double-map | Same buffer mapped twice | Ownership prevents aliasing | +| Direction mismatch | `TO_DEVICE` buffer read by CPU without sync | Direction encoded as const generic parameter | +| Missing sync call | Stale CPU cache after device write | `sync_for_cpu()` must be called before dereferencing | +| MMIO without volatile | Compiler removes "dead" register reads | `read_volatile` / `write_volatile` required | + +--- + +## 4. Interrupt Handler Patterns in Rust + +**Code snippet**: [`research/snippets/drivers/interrupt_handler.rs`](../research/snippets/drivers/interrupt_handler.rs) + +### Interrupt Handler Overview + +### IRQ Context Constraints + +| Operation | IRQ Context | Process Context | +|-----------|-------------|-----------------| +| Sleep / schedule | ❌ NEVER | ✅ | +| `mutex_lock()` | ❌ NEVER | ✅ | +| `kmalloc(GFP_KERNEL)` | ❌ NEVER | ✅ | +| `kmalloc(GFP_ATOMIC)` | ✅ (may fail) | ✅ | +| Spinlock | ✅ | ✅ | +| Atomics | ✅ | ✅ | +| `copy_to_user()` | ❌ NEVER | ✅ | + +### Return Values + +Every IRQ handler must return one of: + +```rust +IrqReturn::None // Not our interrupt (shared IRQ line) +IrqReturn::Handled // Interrupt handled; no further action +IrqReturn::WakeThread // Handled; also wake the threaded handler +``` + +### Pattern 1 — Simple Handler + +```rust +// Called in hard-IRQ context +unsafe extern "C" fn my_irq_handler(irq: u32, data: *mut c_void) -> IrqReturn { + let dev = &*(data as *const MyDeviceData); + + // 1. Check if interrupt is from our device + let status = ioread32(dev.base + STATUS_REG); + if status & IRQ_PENDING == 0 { + return IrqReturn::None; + } + + // 2. Acknowledge the interrupt (clear IRQ at hardware level) + iowrite32(dev.base + ACK_REG, status & IRQ_PENDING); + + // 3. Record the event (atomic — no locks needed) + dev.irq_count.fetch_add(1, Ordering::Relaxed); + + IrqReturn::Handled +} +``` + +### Pattern 2 — Top-Half / Bottom-Half Split + +For handlers that need to do non-trivial processing, split the work: + +```rust +// Top half: minimal work in IRQ context +fn top_half(&self) -> IrqReturn { + self.irq_count.fetch_add(1, Ordering::Relaxed); + schedule_work(&self.work); // Defer to workqueue + IrqReturn::Handled +} + +// Bottom half: runs in process context (can sleep) +fn bottom_half(&self) { + // Process DMA buffers, update statistics, notify userspace... +} +``` + +### Pattern 3 — Threaded IRQ Handler + +The modern alternative to explicit workqueues: + +```rust +// Register with request_threaded_irq(irq, primary, thread, flags, name, data) + +// Primary handler — hard-IRQ context, must be fast +fn primary_handler(&self) -> IrqReturn { + // Mask the IRQ at hardware level + iowrite32(self.base + MASK_REG, 0); + IrqReturn::WakeThread // Ask kernel to run thread_handler +} + +// Thread handler — process context, can sleep +fn thread_handler(&self) -> IrqReturn { + // Full processing here + // Re-enable the IRQ when done + iowrite32(self.base + MASK_REG, IRQ_ENABLE); + IrqReturn::Handled +} +``` + +### Accessing Device Data in a Handler + +The `dev_id` pointer passed to `request_irq` must: + +1. Point to data that remains valid until `free_irq` is called. +2. Use atomics or spinlocks for all shared state. +3. Never use `Mutex` (it can sleep). + +```rust +pub struct DeviceData { + rx_count: AtomicU32, // ✅ Safe in IRQ context + // mutex: Mutex, // ❌ Cannot use in IRQ context +} +``` + +--- + +## 5. Error Propagation Mechanisms + +**Code snippet**: [`research/snippets/error/error_propagation.rs`](../research/snippets/error/error_propagation.rs) + +### Error Propagation Overview +integer, so all existing error codes are preserved. + +### The `?` Operator + +The `?` operator is the key ergonomic improvement over C's error handling: + +```c +// C — must check every return value manually +int ret = step_one(); +if (ret < 0) return ret; + +ret = step_two(); +if (ret < 0) return ret; +``` + +```rust +// Rust — ? propagates errors automatically +fn init() -> Result<()> { + step_one()?; + step_two()?; + Ok(()) +} +``` + +### Converting C Return Codes + +Most kernel C functions return `0` on success and a negative errno on failure: + +```rust +// Wrap any C function that returns an int +fn to_result(ret: i32) -> Result<()> { + if ret >= 0 { Ok(()) } else { Err(Error::from_errno(-ret)) } +} + +// Usage +to_result(unsafe { bindings::some_c_function(arg) })?; +``` + +### Enriching Errors with Context + +Use `.map_err` to transform or log errors before propagating: + +```rust +fn open_device(name: &str) -> Result { + find_device(name).map_err(|e| { + pr_err!("Could not find device '{}': {:?}\n", name, e); + ENODEV + }) +} +``` + +### `Option` ↔ `Result` Conversions + +```rust +// Option::ok_or — convert None to Err +let idx = slice.iter().position(|&x| x == target) + .ok_or(ENODEV)?; + +// Result::ok — convert Err to None (drop the error) +let maybe = fallible_operation().ok(); +``` + +### RAII for Cleanup on Error + +Rust's `Drop` trait replaces C's `goto cleanup` pattern: + +```rust +fn driver_init() -> Result { + let buf = DmaBuffer::alloc(SIZE)?; // allocated + let irq = register_irq(IRQ_NUM)?; // registered + let timer = start_timer(INTERVAL)?; // started + // If any step fails, all previous resources are freed by Drop. + // No goto/cleanup needed. + Ok(DriverState { buf, irq, timer }) +} +``` + +### Error Handling Quick Reference + +| Pattern | Use case | +|---------|----------| +| `expr?` | Propagate error to caller immediately | +| `.map_err(\|e\| new_err)` | Transform or log error | +| `.ok_or(err)` | `Option::None` → `Err(err)` | +| `.unwrap_or(default)` | Fallback value (avoid in kernel code) | +| `match result { Ok(v) => ..., Err(e) => ... }` | Handle specific errors differently | +| `if let Err(e) = f() { pr_warn!(...); }` | Log and continue | + +> **Warning**: Never use `.unwrap()` in production kernel code. It panics on `Err`, which +> translates to a kernel `BUG()` — unacceptable in a production driver. + +--- + +## Further Reading + +- [Kernel Rust API Documentation](https://rust.docs.kernel.org/) +- [Rust for Linux Samples](https://github.com/Rust-for-Linux/linux/tree/rust/samples/rust) +- [DMA API Howto](https://docs.kernel.org/core-api/dma-api-howto.html) +- [Writing an IRQ handler](https://docs.kernel.org/core-api/genericirq.html) +- [Memory allocation guide](https://docs.kernel.org/core-api/memory-allocation.html) +- [RCU concepts](https://docs.kernel.org/RCU/whatisRCU.html) +- [Lock-free programming (Preshing)](https://preshing.com/20120612/an-introduction-to-lock-free-programming/) +- [`research/snippets/`](../research/snippets/) — All code examples in this project diff --git a/research/snippets/INDEX.md b/research/snippets/INDEX.md index 25b012b..6301ea6 100644 --- a/research/snippets/INDEX.md +++ b/research/snippets/INDEX.md @@ -23,6 +23,64 @@ This directory contains practical code examples demonstrating Rust in the Linux --- +#### [dma_operations.rs](./drivers/dma_operations.rs) + +**Purpose**: DMA and hardware interaction safety + +**Concepts Demonstrated**: + +- Coherent (consistent) DMA allocation with RAII cleanup +- Streaming DMA with explicit cache synchronisation +- DMA direction encoded in the type system +- Memory-mapped I/O (MMIO) via volatile reads/writes +- DMA descriptor ring layout with `#[repr(C)]` +- IOMMU and DMA mask concepts + +**Build**: Educational example; requires kernel build environment for real DMA calls + +**Difficulty**: Advanced + +--- + +#### [interrupt_handler.rs](./drivers/interrupt_handler.rs) + +**Purpose**: Interrupt handler patterns in Rust + +**Concepts Demonstrated**: + +- IRQ return values (`IRQ_HANDLED`, `IRQ_NONE`, `IRQ_WAKE_THREAD`) +- Simple top-half handler with atomic device state +- Top-half / bottom-half split using a workqueue +- Threaded interrupt handler (`request_threaded_irq`) +- Safe per-device data access from IRQ context +- IRQ context constraints (no sleeping, no blocking allocations) + +**Build**: Educational example; patterns apply directly to real kernel drivers + +**Difficulty**: Advanced + +--- + +### Concurrency + +#### [lock_free.rs](./concurrency/lock_free.rs) + +**Purpose**: Lock-free data structure implementations + +**Concepts Demonstrated**: + +- Atomic counter with explicit memory ordering (`Relaxed`, `Acquire`, `Release`) +- Treiber lock-free stack using compare-and-swap +- Atomic reference counting (Arc-like RAII wrapper) +- RCU (Read-Copy-Update) reader/writer pattern +- Memory ordering cheat-sheet + +**Build**: Compilable with standard Rust toolchain for tests; kernel patterns for in-tree use + +**Difficulty**: Advanced + +--- + ### FFI (Foreign Function Interface) #### [rust_to_c.rs](./ffi/rust_to_c.rs) @@ -44,6 +102,51 @@ This directory contains practical code examples demonstrating Rust in the Linux --- +### Memory + +#### [memory_allocator.rs](./memory/memory_allocator.rs) + +**Purpose**: Memory allocator integration patterns + +**Concepts Demonstrated**: + +- Heap allocation with `Box` (maps to `kmalloc`/`kfree`) +- Dynamic arrays with `Vec` using `try_push` +- GFP flags: `GFP_KERNEL`, `GFP_ATOMIC`, `GFP_DMA` +- Custom RAII wrapper over raw kernel memory +- Slab cache (`kmem_cache`) pattern for fixed-size objects +- Vmalloc for large, non-physically-contiguous allocations +- Allocator selection guide + +**Build**: Educational example; requires kernel build for real allocator calls + +**Difficulty**: Intermediate + +--- + +### Error Handling + +#### [error_propagation.rs](./error/error_propagation.rs) + +**Purpose**: Error propagation mechanisms + +**Concepts Demonstrated**: + +- Kernel `Error` type and common errno values +- The `?` operator for concise error propagation +- Converting C integer return codes to `Result` +- `map_err` for error context enrichment +- `Option` ↔ `Result` conversions (`ok_or`, `ok`) +- RAII for automatic cleanup on error (replaces C `goto cleanup`) +- Error handling across FFI boundaries +- Collecting results from iterators + +**Build**: Compilable with standard Rust toolchain for tests + +**Difficulty**: Beginner–Intermediate + +--- + ## Using These Snippets ### For Learning @@ -76,21 +179,22 @@ When adding new snippets: - ✅ Basic modules - ✅ FFI / C interop +- ✅ Memory allocation patterns +- ✅ Lock-free data structures +- ✅ DMA and hardware interaction +- ✅ Interrupt handlers +- ✅ Error handling - 📋 Platform drivers (planned) - 📋 Character devices (planned) - 📋 Synchronization primitives (planned) -- 📋 Memory allocation patterns (planned) -- 📋 Error handling (planned) ### Future Additions (Planned) We're looking to add examples for: - 📋 Network device drivers -- 📋 Block device drivers +- 📋 Block device drivers - 📋 Filesystem operations -- 📋 Interrupt handlers -- 📋 DMA operations - 📋 Device tree interaction - 📋 GPIO and hardware control - 📋 Power management @@ -137,9 +241,12 @@ make -C /lib/modules/$(uname -r)/build M=$PWD 1. Start with `rust_minimal.rs` - understand basic structure 2. Move to `rust_to_c.rs` - learn FFI basics -3. Try modifying examples -4. Build your own simple module -5. Graduate to more complex driver examples +3. Read `error_propagation.rs` - master Rust error handling +4. Study `memory_allocator.rs` - understand kernel memory management +5. Explore `interrupt_handler.rs` - learn IRQ handler patterns +6. Dive into `lock_free.rs` - advanced concurrency without locks +7. Graduate to `dma_operations.rs` - hardware interaction patterns +8. Build your own simple module ## Resources diff --git a/research/snippets/concurrency/lock_free.rs b/research/snippets/concurrency/lock_free.rs new file mode 100644 index 0000000..e0b1170 --- /dev/null +++ b/research/snippets/concurrency/lock_free.rs @@ -0,0 +1,423 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Lock-Free Data Structure Implementations for the Linux Kernel +//! +//! Lock-free algorithms improve scalability on multi-core systems by avoiding +//! mutex contention. In the kernel context, they are critical for: +//! +//! - **Per-CPU counters**: Statistics that avoid cache-line bouncing +//! - **RCU (Read-Copy-Update)**: High-throughput read-mostly data +//! - **Atomic reference counting**: Safe object lifetime management +//! - **Lock-free stacks/queues**: Communication between CPU cores or ISRs +//! +//! ## Key Concepts +//! +//! - `core::sync::atomic::*` provides portable atomic operations +//! - `Ordering` controls memory visibility: `Relaxed`, `Acquire`, `Release`, `SeqCst` +//! - RCU in Rust wraps `rcu_read_lock` / `rcu_read_unlock` / `synchronize_rcu` +//! - Treiber stack and Michael-Scott queue are the canonical lock-free structures +//! +//! ## Build +//! +//! Place in `samples/rust/` and enable `CONFIG_RUST=y`. +//! +//! ## Difficulty +//! +//! Advanced + +use core::sync::atomic::{AtomicPtr, AtomicUsize, Ordering}; +use kernel::prelude::*; + +// --------------------------------------------------------------------------- +// 1. Atomic counter (per-CPU style) +// --------------------------------------------------------------------------- + +/// A shared atomic counter with explicit memory ordering. +/// +/// In real per-CPU code the kernel provides `percpu` variables that avoid +/// atomic operations entirely. This struct illustrates the correct memory +/// ordering choices for a shared counter. +pub struct AtomicCounter { + value: AtomicUsize, +} + +impl AtomicCounter { + /// Create a new counter starting at zero. + pub const fn new() -> Self { + Self { + value: AtomicUsize::new(0), + } + } + + /// Increment and return the *previous* value. + /// + /// `Relaxed` ordering is sufficient for counters that do not gate any + /// other memory operations (e.g., statistics). + pub fn increment(&self) -> usize { + self.value.fetch_add(1, Ordering::Relaxed) + } + + /// Decrement and return the *previous* value. + pub fn decrement(&self) -> usize { + self.value.fetch_sub(1, Ordering::Relaxed) + } + + /// Read the current value. + pub fn get(&self) -> usize { + self.value.load(Ordering::Relaxed) + } + + /// Compare-and-swap: only write `new` if the current value equals `expected`. + /// + /// Returns `Ok(previous)` on success, `Err(actual)` on failure. + /// + /// Uses `AcqRel` so that the successful store is visible to all CPUs before + /// any subsequent `Acquire` load. + pub fn compare_exchange(&self, expected: usize, new: usize) -> core::result::Result { + self.value + .compare_exchange(expected, new, Ordering::AcqRel, Ordering::Acquire) + } +} + +impl Default for AtomicCounter { + fn default() -> Self { + Self::new() + } +} + +// --------------------------------------------------------------------------- +// 2. Treiber lock-free stack +// --------------------------------------------------------------------------- + +/// A node in the lock-free stack. +struct StackNode { + value: T, + next: *mut StackNode, +} + +/// A lock-free stack using the Treiber algorithm. +/// +/// The algorithm uses a single `AtomicPtr` for the head and relies on +/// compare-and-swap (CAS) to push/pop without a mutex. ABA prevention is +/// handled by the fact that pushed nodes are *owned* by the stack and are +/// only freed after all readers have quiesced (requires careful unsafe). +/// +/// # Safety +/// +/// Nodes are allocated with `Box::into_raw` and freed with `Box::from_raw`. +/// Callers must ensure no aliased mutable access to a node after it has been +/// pushed. +pub struct TreiberStack { + head: AtomicPtr>, +} + +// SAFETY: TreiberStack owns the nodes; access is mediated by atomic CAS. +unsafe impl Send for TreiberStack {} +unsafe impl Sync for TreiberStack {} + +impl TreiberStack { + /// Create an empty stack. + pub const fn new() -> Self { + Self { + head: AtomicPtr::new(core::ptr::null_mut()), + } + } + + /// Push `value` onto the top of the stack. + /// + /// This is wait-free: the loop retries only on contention. + pub fn push(&self, value: T) -> Result<()> { + let node = Box::try_new(StackNode { + value, + next: core::ptr::null_mut(), + })?; + let node_ptr = Box::into_raw(node); + + loop { + let old_head = self.head.load(Ordering::Relaxed); + // SAFETY: node_ptr is valid; we wrote `next` before the CAS. + unsafe { (*node_ptr).next = old_head }; + + // Release ensures that the node's initialisation is visible + // before another thread observes the new head. + match self.head.compare_exchange_weak( + old_head, + node_ptr, + Ordering::Release, + Ordering::Relaxed, + ) { + Ok(_) => return Ok(()), + Err(_) => continue, // Retry on contention + } + } + } + + /// Pop the top value from the stack, returning `None` if empty. + pub fn pop(&self) -> Option { + loop { + let head = self.head.load(Ordering::Acquire); + if head.is_null() { + return None; + } + + // SAFETY: head is non-null and was set by `push`, which allocated + // a valid StackNode. No other thread frees this node until after + // the CAS below succeeds (or we retry). + let next = unsafe { (*head).next }; + + match self.head.compare_exchange_weak( + head, + next, + Ordering::AcqRel, + Ordering::Acquire, + ) { + Ok(_) => { + // We won the CAS; reconstruct the Box to free it. + // SAFETY: head came from Box::into_raw in push(). + let node = unsafe { Box::from_raw(head) }; + return Some(node.value); + } + Err(_) => continue, // Retry + } + } + } + + /// Return `true` if the stack is empty. + pub fn is_empty(&self) -> bool { + self.head.load(Ordering::Relaxed).is_null() + } +} + +impl Drop for TreiberStack { + fn drop(&mut self) { + // Drain all remaining nodes. + while self.pop().is_some() {} + } +} + +// --------------------------------------------------------------------------- +// 3. Atomic reference counting (Arc-like for kernel objects) +// --------------------------------------------------------------------------- + +/// Reference count wrapper around a heap-allocated kernel object. +/// +/// Similar to `Arc` but using kernel primitives. The kernel's own +/// `refcount_t` is preferred in production (it has overflow protection); +/// this shows the raw pattern. +pub struct KernelArc { + inner: core::ptr::NonNull>, +} + +struct KernelArcInner { + refcount: AtomicUsize, + data: T, +} + +// SAFETY: KernelArc provides exclusive access through reference counting. +unsafe impl Send for KernelArc {} +unsafe impl Sync for KernelArc {} + +impl KernelArc { + /// Create a new `KernelArc` with an initial reference count of 1. + pub fn new(data: T) -> Result { + let inner = Box::try_new(KernelArcInner { + refcount: AtomicUsize::new(1), + data, + })?; + Ok(Self { + inner: unsafe { + core::ptr::NonNull::new_unchecked(Box::into_raw(inner)) + }, + }) + } + + /// Borrow the inner data. + pub fn as_ref(&self) -> &T { + // SAFETY: inner is valid as long as refcount > 0, which is true here. + unsafe { &self.inner.as_ref().data } + } +} + +impl Clone for KernelArc { + fn clone(&self) -> Self { + // Relaxed ordering: the increment only needs to be eventually visible; + // the `Acquire` in `drop` synchronises with this store. + // SAFETY: inner is valid. + unsafe { self.inner.as_ref() } + .refcount + .fetch_add(1, Ordering::Relaxed); + Self { inner: self.inner } + } +} + +impl Drop for KernelArc { + fn drop(&mut self) { + // Release: our writes to `data` must be visible before we potentially + // free the allocation. + let prev = unsafe { self.inner.as_ref() } + .refcount + .fetch_sub(1, Ordering::Release); + + if prev == 1 { + // We were the last owner. Acquire to synchronise with all previous + // Release stores (i.e., see all writes from other owners). + core::sync::atomic::fence(Ordering::Acquire); + // SAFETY: refcount reached 0; no other owner can access `inner`. + unsafe { drop(Box::from_raw(self.inner.as_ptr())) }; + } + } +} + +// --------------------------------------------------------------------------- +// 4. RCU (Read-Copy-Update) pattern +// --------------------------------------------------------------------------- + +/// Demonstrates the RCU read-side critical section pattern. +/// +/// RCU allows concurrent readers without any locking. Writers make a copy, +/// update the copy, atomically publish it, then wait for all existing readers +/// to finish before freeing the old copy. +/// +/// Kernel APIs: +/// - `rcu_read_lock()` / `rcu_read_unlock()` — reader critical section +/// - `rcu_assign_pointer()` — publish a new pointer (Release semantics) +/// - `rcu_dereference()` — safely read an RCU-protected pointer (Acquire) +/// - `synchronize_rcu()` — wait for all pre-existing readers to finish +/// +/// In Rust, the `kernel` crate wraps these; the code below shows the logical +/// pattern as it maps to atomic orderings. +pub struct RcuProtected { + /// The published pointer. Readers use Acquire; writers use Release. + ptr: AtomicPtr, +} + +// SAFETY: RCU ensures readers finish before the old value is freed. +unsafe impl Send for RcuProtected {} +unsafe impl Sync for RcuProtected {} + +impl RcuProtected { + /// Create a new RCU-protected value. + pub fn new(value: T) -> Result { + let boxed = Box::try_new(value)?; + Ok(Self { + ptr: AtomicPtr::new(Box::into_raw(boxed)), + }) + } + + /// **Reader path**: access the protected value inside a closure. + /// + /// The Acquire load corresponds to `rcu_dereference()`. The closure + /// represents the RCU read-side critical section. + /// + /// # Safety + /// + /// The caller must not store the reference beyond the closure's scope. + pub fn read R>(&self, f: F) -> R { + // rcu_read_lock() (no-op on non-preemptible kernels, but required for + // correctness in PREEMPT_RCU configurations) + let ptr = self.ptr.load(Ordering::Acquire); + // SAFETY: ptr is valid as long as we hold the RCU read lock and do + // not cross a `synchronize_rcu()` / `call_rcu()` boundary. + let result = f(unsafe { &*ptr }); + // rcu_read_unlock() + result + } + + /// **Writer path**: replace the protected value and return the old one. + /// + /// The caller is responsible for calling `synchronize_rcu()` before + /// dropping the old value to ensure no readers are still using it. + /// + /// # Safety + /// + /// The caller must call `synchronize_rcu()` (or equivalent) and only then + /// free the returned pointer. + pub unsafe fn replace(&self, new_value: T) -> Result<*mut T> { + let new_boxed = Box::try_new(new_value)?; + let new_ptr = Box::into_raw(new_boxed); + // rcu_assign_pointer() — Release so readers see the new value's init. + let old_ptr = self.ptr.swap(new_ptr, Ordering::Release); + Ok(old_ptr) + } +} + +impl Drop for RcuProtected { + fn drop(&mut self) { + let ptr = self.ptr.load(Ordering::Relaxed); + if !ptr.is_null() { + // SAFETY: We own the allocation; no readers remain after drop. + unsafe { drop(Box::from_raw(ptr)) }; + } + } +} + +// --------------------------------------------------------------------------- +// 5. Memory ordering cheat-sheet (comments) +// --------------------------------------------------------------------------- +// +// Ordering::Relaxed — No synchronisation; only atomicity. Good for +// independent counters and flags that don't gate +// any other memory operations. +// +// Ordering::Acquire — All subsequent memory accesses happen *after* this +// load. Pairs with Release stores. Use on the *read* +// side of producer/consumer protocols. +// +// Ordering::Release — All preceding memory accesses happen *before* this +// store. Pairs with Acquire loads. Use on the *write* +// side of producer/consumer protocols. +// +// Ordering::AcqRel — Combined Acquire+Release for read-modify-write +// operations (fetch_add, CAS, swap). +// +// Ordering::SeqCst — Total sequential consistency. Necessary when multiple +// independent atomic locations must be observed in a +// globally consistent order. Most expensive; avoid in +// hot paths unless the simpler orderings are insufficient. + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_atomic_counter() { + let counter = AtomicCounter::new(); + assert_eq!(counter.get(), 0); + counter.increment(); + counter.increment(); + assert_eq!(counter.get(), 2); + counter.decrement(); + assert_eq!(counter.get(), 1); + } + + #[test] + fn test_treiber_stack_empty() { + let stack: TreiberStack = TreiberStack::new(); + assert!(stack.is_empty()); + assert!(stack.pop().is_none()); + } + + #[test] + fn test_treiber_stack_push_pop() { + let stack = TreiberStack::new(); + stack.push(1u32).unwrap(); + stack.push(2u32).unwrap(); + stack.push(3u32).unwrap(); + // LIFO order + assert_eq!(stack.pop(), Some(3)); + assert_eq!(stack.pop(), Some(2)); + assert_eq!(stack.pop(), Some(1)); + assert!(stack.is_empty()); + } + + #[test] + fn test_compare_exchange() { + let counter = AtomicCounter::new(); + // Should succeed: 0 → 10 + assert!(counter.compare_exchange(0, 10).is_ok()); + assert_eq!(counter.get(), 10); + // Should fail: 0 != 10 + assert!(counter.compare_exchange(0, 20).is_err()); + assert_eq!(counter.get(), 10); + } +} diff --git a/research/snippets/drivers/dma_operations.rs b/research/snippets/drivers/dma_operations.rs new file mode 100644 index 0000000..9292082 --- /dev/null +++ b/research/snippets/drivers/dma_operations.rs @@ -0,0 +1,388 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! DMA and Hardware Interaction Safety Patterns +//! +//! Direct Memory Access (DMA) lets hardware peripherals read/write system +//! memory without CPU involvement. Getting DMA right is notoriously +//! difficult in C; Rust's type system can enforce many of the invariants +//! statically. +//! +//! ## Concepts Covered +//! +//! - Coherent (consistent) vs. streaming DMA +//! - IOMMU and address translation +//! - Cache coherency and memory barriers +//! - Safe wrappers that prevent common DMA bugs +//! - Hardware register access via MMIO +//! - Device ownership and lifetime rules +//! +//! ## Common DMA Bugs (that Rust helps prevent) +//! +//! 1. **Use-after-free**: CPU accesses buffer while device is still writing +//! 2. **Double-map**: Same buffer mapped twice with incompatible directions +//! 3. **Direction mismatch**: Buffer mapped for `TO_DEVICE` but CPU writes after sync +//! 4. **Missing sync**: CPU reads stale cached data after device DMA write +//! 5. **Alias**: CPU virtual alias and DMA physical address map different cache lines +//! +//! ## Build +//! +//! Place in `samples/rust/` and enable `CONFIG_RUST=y`. +//! +//! ## Difficulty +//! +//! Advanced + +use kernel::prelude::*; + +// --------------------------------------------------------------------------- +// 1. DMA direction type — enforced at the type level +// --------------------------------------------------------------------------- + +/// Represents the direction of a DMA transfer. +/// +/// Encoding this in the type prevents using a buffer mapped for one direction +/// in an incompatible way (e.g., the device writing into a `ToDevice` buffer +/// and the CPU reading stale data without a sync). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum DmaDirection { + /// CPU writes, device reads — `DMA_TO_DEVICE` + ToDevice, + /// Device writes, CPU reads — `DMA_FROM_DEVICE` + FromDevice, + /// Both sides can read and write — `DMA_BIDIRECTIONAL` + Bidirectional, +} + +impl DmaDirection { + /// Map to the kernel's `dma_data_direction` enum value. + pub fn to_kernel_constant(self) -> u32 { + match self { + DmaDirection::ToDevice => 1, // DMA_TO_DEVICE + DmaDirection::FromDevice => 2, // DMA_FROM_DEVICE + DmaDirection::Bidirectional => 0, // DMA_BIDIRECTIONAL + } + } +} + +// --------------------------------------------------------------------------- +// 2. Coherent (consistent) DMA allocation +// --------------------------------------------------------------------------- + +/// A coherent DMA buffer allocated with `dma_alloc_coherent`. +/// +/// Coherent memory is simultaneously accessible by both the CPU and the +/// device without explicit cache flushes. This is the easiest DMA type to +/// use correctly, but it is also the most expensive on platforms that lack +/// hardware coherency (e.g., some ARM SoCs). +/// +/// The kernel allocates a physically contiguous buffer and sets up both: +/// - A CPU virtual address (for kernel access) +/// - A DMA address (for the device's address space, translated by IOMMU) +/// +/// **Ownership rule**: The buffer must NOT be freed while the device can +/// still access it. The `Drop` impl here enforces this by requiring the +/// device to be explicitly detached first. +pub struct CoherentDmaBuffer { + /// CPU-accessible virtual address. + cpu_addr: core::ptr::NonNull, + /// Device-visible DMA address (after IOMMU translation). + dma_addr: u64, + /// Size in bytes. + size: usize, +} + +// SAFETY: DMA buffers can be shared across cores; access is serialised by +// the driver's locking strategy. +unsafe impl Send for CoherentDmaBuffer {} +unsafe impl Sync for CoherentDmaBuffer {} + +impl CoherentDmaBuffer { + /// Allocate a coherent DMA buffer of `size` bytes. + /// + /// In a real driver this calls: + /// ```c + /// cpu_addr = dma_alloc_coherent(dev, size, &dma_addr, GFP_KERNEL); + /// ``` + pub fn alloc(size: usize) -> Result { + // Placeholder: use Box to represent the allocation. + // Real code would call bindings::dma_alloc_coherent. + let boxed = Box::try_new_slice(size, 0u8)?; + let cpu_ptr = Box::into_raw(boxed) as *mut u8; + + // SAFETY: Box::into_raw always returns non-null. + let cpu_addr = unsafe { core::ptr::NonNull::new_unchecked(cpu_ptr) }; + let dma_addr = cpu_ptr as u64; // Real code: IOMMU-translated address + + pr_info!("Coherent DMA buffer: cpu={:p}, dma=0x{:x}, size={}\n", + cpu_addr.as_ptr(), dma_addr, size); + + Ok(Self { cpu_addr, dma_addr, size }) + } + + /// Return the DMA address to program into the device's descriptor ring. + pub fn dma_addr(&self) -> u64 { + self.dma_addr + } + + /// Return a CPU-side slice for reading/writing the buffer. + /// + /// # Safety + /// + /// The caller must ensure the device is not currently writing to the + /// buffer. For coherent memory, no explicit flush is needed. + pub unsafe fn as_slice(&self) -> &[u8] { + // SAFETY: cpu_addr is valid for `size` bytes. + core::slice::from_raw_parts(self.cpu_addr.as_ptr(), self.size) + } + + /// Return a mutable CPU-side slice for writing. + /// + /// # Safety + /// + /// The caller must ensure the device is not reading or writing the buffer + /// concurrently. + pub unsafe fn as_mut_slice(&mut self) -> &mut [u8] { + // SAFETY: cpu_addr is valid for `size` bytes; we hold &mut self. + core::slice::from_raw_parts_mut(self.cpu_addr.as_ptr(), self.size) + } +} + +impl Drop for CoherentDmaBuffer { + fn drop(&mut self) { + pr_info!("Freeing coherent DMA buffer at dma=0x{:x}\n", self.dma_addr); + // Real code: bindings::dma_free_coherent(dev, size, cpu_addr, dma_addr); + // + // Reconstruct Box to free the placeholder allocation. + // SAFETY: cpu_addr came from Box::into_raw. + unsafe { + let slice_ptr = + core::ptr::slice_from_raw_parts_mut(self.cpu_addr.as_ptr(), self.size); + drop(Box::from_raw(slice_ptr)); + } + } +} + +// --------------------------------------------------------------------------- +// 3. Streaming DMA (mapped from existing buffer) +// --------------------------------------------------------------------------- + +/// A streaming DMA mapping wrapping an existing CPU buffer. +/// +/// Streaming mappings are created on-the-fly for individual transfers and +/// must be explicitly synchronised between CPU and device accesses: +/// +/// - Before the device reads: call `sync_for_device()` (flush CPU caches) +/// - After the device writes: call `sync_for_cpu()` (invalidate CPU caches) +/// +/// The type parameter `D` encodes the direction at compile time, preventing +/// incorrect sync direction mismatches. +pub struct StreamingDmaMapping { + dma_addr: u64, + size: usize, + /// Tracks whether the mapping is currently "owned" by the device. + device_owned: bool, +} + +impl StreamingDmaMapping { + /// Map `size` bytes starting at `cpu_phys_addr` for DMA. + /// + /// Real code: `dma_map_single(dev, virt_addr, size, direction)` + pub fn map(cpu_phys_addr: usize, size: usize) -> Result { + // Stub: in real code, IOMMU translates cpu_phys_addr → dma_addr. + let dma_addr = cpu_phys_addr as u64; + pr_info!("Streaming DMA map: addr=0x{:x}, size={}, dir={}\n", + dma_addr, size, DIR); + Ok(Self { + dma_addr, + size, + device_owned: false, + }) + } + + /// Transfer ownership to the device: flush CPU caches. + /// + /// After this call, the CPU must not touch the buffer until + /// `sync_for_cpu()` is called. + pub fn sync_for_device(&mut self) { + // Real code: dma_sync_single_for_device(dev, dma_addr, size, dir) + pr_info!("DMA sync for device: addr=0x{:x}\n", self.dma_addr); + self.device_owned = true; + } + + /// Transfer ownership back to the CPU: invalidate stale caches. + /// + /// After this call, the CPU can safely read data written by the device. + pub fn sync_for_cpu(&mut self) { + // Real code: dma_sync_single_for_cpu(dev, dma_addr, size, dir) + pr_info!("DMA sync for CPU: addr=0x{:x}\n", self.dma_addr); + self.device_owned = false; + } + + /// Return the DMA address to program into the hardware descriptor. + pub fn dma_addr(&self) -> u64 { + self.dma_addr + } +} + +impl Drop for StreamingDmaMapping { + fn drop(&mut self) { + // Warn if dropped while device still owns the buffer. + if self.device_owned { + pr_warn!("StreamingDmaMapping dropped while device-owned!\n"); + } + // Real code: dma_unmap_single(dev, dma_addr, size, direction); + pr_info!("Streaming DMA unmap: addr=0x{:x}\n", self.dma_addr); + } +} + +// --------------------------------------------------------------------------- +// 4. Memory-Mapped I/O (MMIO) register access +// --------------------------------------------------------------------------- + +/// A safe, typed wrapper over a hardware register block. +/// +/// MMIO registers must be accessed with volatile operations to prevent the +/// compiler from optimising away or reordering reads/writes. The kernel +/// provides `readl`/`writel` (and variants) which combine: +/// - Volatile memory access +/// - An appropriate memory barrier (DSB / MFENCE on x86) +/// +/// Rust's `core::ptr::read_volatile` / `write_volatile` correspond directly +/// to these, but the kernel's `ioread32`/`iowrite32` are preferred in real +/// drivers because they also handle endianness and I/O vs memory space. +pub struct MmioBlock { + base: core::ptr::NonNull, + size: usize, +} + +// SAFETY: MMIO regions are global hardware state; sharing is safe given +// the driver serialises concurrent accesses. +unsafe impl Send for MmioBlock {} + +impl MmioBlock { + /// Map a hardware register block at `phys_addr` of `size` bytes. + /// + /// Real code: `ioremap(phys_addr, size)` followed by a null check. + /// + /// # Safety + /// + /// `phys_addr` must be a valid MMIO physical address for this system. + /// The size must not exceed the device's register space. + pub unsafe fn new(phys_addr: usize, size: usize) -> Result { + // Stub: cast address to pointer (real code calls ioremap). + if phys_addr == 0 { + return Err(EINVAL); + } + let ptr = phys_addr as *mut u8; + let base = core::ptr::NonNull::new(ptr).ok_or(ENOMEM)?; + pr_info!("MMIO mapped: phys=0x{:x}, size={}\n", phys_addr, size); + Ok(Self { base, size }) + } + + /// Read a 32-bit register at `offset` bytes from the base. + /// + /// # Safety + /// + /// `offset` must be 4-byte aligned and within `[0, size - 4]`. + pub unsafe fn read32(&self, offset: usize) -> u32 { + debug_assert!(offset + 4 <= self.size, "offset out of MMIO range"); + debug_assert!(offset % 4 == 0, "unaligned MMIO read"); + // SAFETY: offset is in range; caller guarantees validity. + let ptr = self.base.as_ptr().add(offset) as *const u32; + core::ptr::read_volatile(ptr) + } + + /// Write a 32-bit value to a register at `offset` bytes from the base. + /// + /// # Safety + /// + /// `offset` must be 4-byte aligned and within `[0, size - 4]`. + pub unsafe fn write32(&self, offset: usize, value: u32) { + debug_assert!(offset + 4 <= self.size, "offset out of MMIO range"); + debug_assert!(offset % 4 == 0, "unaligned MMIO write"); + // SAFETY: offset is in range; caller guarantees validity. + let ptr = self.base.as_ptr().add(offset) as *mut u32; + core::ptr::write_volatile(ptr, value); + } +} + +impl Drop for MmioBlock { + fn drop(&mut self) { + pr_info!("MMIO unmapped\n"); + // Real code: iounmap(self.base.as_ptr()); + } +} + +// --------------------------------------------------------------------------- +// 5. Descriptor ring pattern (used in network/NVMe/GPU drivers) +// --------------------------------------------------------------------------- + +/// A single entry in a DMA descriptor ring. +/// +/// Hardware descriptor rings are the primary mechanism for high-throughput +/// DMA in network, storage, and GPU drivers. Each entry contains the DMA +/// address, length, and control flags for one transfer. +/// +/// `#[repr(C)]` is mandatory because the hardware interprets the layout +/// directly. +#[repr(C)] +pub struct DmaDescriptor { + /// Physical (DMA) address of the buffer. + pub addr: u64, + /// Length of the transfer in bytes. + pub len: u32, + /// Hardware control flags (device-specific). + pub flags: u32, +} + +impl DmaDescriptor { + /// Create a descriptor for a coherent buffer. + pub fn new(buffer: &CoherentDmaBuffer, len: u32, flags: u32) -> Self { + Self { + addr: buffer.dma_addr(), + len, + flags, + } + } +} + +// SAFETY: repr(C) layout is preserved; verify size at compile time. +const _: () = assert!(core::mem::size_of::() == 16); + +// --------------------------------------------------------------------------- +// DMA safety checklist (comments) +// --------------------------------------------------------------------------- +// +// ✅ Always check dma_mapping_error() after dma_map_*() calls. +// ✅ Respect the direction: only sync in the direction specified at map time. +// ✅ Free coherent buffers with dma_free_coherent(), not kfree(). +// ✅ Use dma_set_mask_and_coherent() to configure the device's DMA mask. +// ✅ Never touch streaming buffers between sync_for_device() and sync_for_cpu(). +// ✅ Use IOMMU when available — it provides address translation and protection. +// ✅ For PCIe, prefer 64-bit DMA addresses when the device supports them. +// ✅ Use write_volatile / read_volatile (or iowrite*/ioread*) for MMIO. + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_dma_direction_constants() { + assert_eq!(DmaDirection::Bidirectional.to_kernel_constant(), 0); + assert_eq!(DmaDirection::ToDevice.to_kernel_constant(), 1); + assert_eq!(DmaDirection::FromDevice.to_kernel_constant(), 2); + } + + #[test] + fn test_descriptor_size() { + assert_eq!(core::mem::size_of::(), 16); + } + + #[test] + fn test_coherent_alloc_and_drop() { + // This test exercises the placeholder allocation path (not real DMA). + let buf = CoherentDmaBuffer::alloc(256); + assert!(buf.is_ok()); + // buf is freed when it goes out of scope. + } +} diff --git a/research/snippets/drivers/interrupt_handler.rs b/research/snippets/drivers/interrupt_handler.rs new file mode 100644 index 0000000..aa7d5a3 --- /dev/null +++ b/research/snippets/drivers/interrupt_handler.rs @@ -0,0 +1,425 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Interrupt Handler Patterns in Rust for the Linux Kernel +//! +//! Interrupt handlers (IRQ handlers) are called by the kernel when hardware +//! signals an event. They run in a special context with severe restrictions: +//! +//! - **No sleeping**: cannot call `schedule()`, `mutex_lock()`, or any +//! blocking allocator (`GFP_KERNEL`). Use `GFP_ATOMIC` when allocation +//! is unavoidable. +//! - **No user-space access**: cannot call `copy_to_user()`. +//! - **Minimal work**: defer heavy processing to a bottom half +//! (tasklet, softirq, workqueue, or threaded IRQ). +//! - **Shared interrupts**: handlers must return `IRQ_NONE` if the interrupt +//! was not from their device. +//! +//! ## Patterns Demonstrated +//! +//! 1. Simple IRQ handler returning `IRQ_HANDLED`/`IRQ_NONE` +//! 2. Top-half / bottom-half split with a workqueue +//! 3. Threaded interrupt handler (`request_threaded_irq`) +//! 4. Per-device private data access in a handler +//! 5. Shared interrupt lines +//! +//! ## Build +//! +//! Place in `samples/rust/` and enable `CONFIG_RUST=y`. +//! +//! ## Difficulty +//! +//! Advanced + +use core::sync::atomic::{AtomicBool, AtomicU32, Ordering}; +use kernel::prelude::*; + +// --------------------------------------------------------------------------- +// IRQ return values +// --------------------------------------------------------------------------- + +/// Mirrors the kernel's `irqreturn_t`. +/// +/// An IRQ handler must return one of these values so the kernel knows whether +/// the interrupt was handled. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(u32)] +pub enum IrqReturn { + /// This interrupt was not from our device — another handler should try. + None = 0, + /// We handled the interrupt. + Handled = 1, + /// We handled the interrupt and want a threaded handler to run next. + WakeThread = 2, +} + +// --------------------------------------------------------------------------- +// 1. Simple top-half handler +// --------------------------------------------------------------------------- + +/// Per-device state shared between the ISR and the rest of the driver. +/// +/// Only atomics and spinlock-protected data are safe to access from IRQ +/// context. Using `AtomicU32` / `AtomicBool` here avoids any locking. +pub struct SimpleDeviceData { + /// Incremented each time an interrupt fires. + pub irq_count: AtomicU32, + /// Set to `true` when the device signals "data ready". + pub data_ready: AtomicBool, +} + +impl SimpleDeviceData { + /// Create zeroed device data. + pub const fn new() -> Self { + Self { + irq_count: AtomicU32::new(0), + data_ready: AtomicBool::new(false), + } + } +} + +impl Default for SimpleDeviceData { + fn default() -> Self { + Self::new() + } +} + +/// A minimal IRQ handler. +/// +/// In a real driver this function would be registered with +/// `request_irq(irq, handler, flags, name, dev_id)`. +/// +/// # Safety +/// +/// - Must be called only in IRQ context. +/// - `data` must point to a valid, properly aligned `SimpleDeviceData`. +/// - No sleeping, no blocking allocations. +/// +/// # Return +/// +/// - `IrqReturn::Handled` if the interrupt belonged to our device. +/// - `IrqReturn::None` if it did not (shared IRQ line). +pub unsafe extern "C" fn simple_irq_handler( + _irq: u32, + data: *mut core::ffi::c_void, +) -> IrqReturn { + // SAFETY: data is the dev_id pointer we passed to request_irq. + let dev = &*(data as *const SimpleDeviceData); + + // Read a hypothetical hardware status register. + // In real code: let status = ioread32(dev.base + STATUS_REG); + let status: u32 = 0x0001; // simulated "interrupt pending" bit + + if status & 0x0001 == 0 { + // Interrupt was not from our device. + return IrqReturn::None; + } + + // Acknowledge the interrupt to the hardware (clear the pending bit). + // Real code: iowrite32(dev.base + ACK_REG, status); + + dev.irq_count.fetch_add(1, Ordering::Relaxed); + dev.data_ready.store(true, Ordering::Release); + + // Wake up any process waiting for data (would be done via a wait queue + // in real code: wake_up_interruptible(&dev->wq);). + + IrqReturn::Handled +} + +// --------------------------------------------------------------------------- +// 2. Top-half / bottom-half split with a workqueue +// --------------------------------------------------------------------------- + +/// Represents a deferred work item posted by the top-half ISR. +/// +/// Workqueues run in process context, so they *can* sleep. This makes them +/// suitable for: +/// - DMA buffer processing +/// - Filesystem operations +/// - Network stack push +/// - Any I/O that might block +/// +/// The kernel API: +/// ```c +/// INIT_WORK(&dev->work, bottom_half_fn); +/// schedule_work(&dev->work); // called from IRQ handler +/// ``` +pub struct WorkqueueExample { + irq_count: AtomicU32, + work_queued: AtomicBool, +} + +impl WorkqueueExample { + pub const fn new() -> Self { + Self { + irq_count: AtomicU32::new(0), + work_queued: AtomicBool::new(false), + } + } + + /// **Top half** — runs in hard IRQ context. + /// + /// Records the event and schedules the bottom half. + /// Must return quickly; no heavy processing here. + pub fn top_half(&self) -> IrqReturn { + self.irq_count.fetch_add(1, Ordering::Relaxed); + + // Schedule the workqueue item (non-blocking). + // Real code: schedule_work(&self.work); + self.work_queued.store(true, Ordering::Release); + + IrqReturn::Handled + } + + /// **Bottom half** — runs in process context (workqueue thread). + /// + /// Can sleep, allocate with `GFP_KERNEL`, and call blocking APIs. + pub fn bottom_half(&self) { + if !self.work_queued.swap(false, Ordering::AcqRel) { + return; + } + + pr_info!("Bottom half: processing {} interrupts\n", + self.irq_count.load(Ordering::Relaxed)); + + // Perform expensive processing here: + // - Read DMA buffer + // - Parse packet header + // - Submit to network/block layer + // - Update statistics + } +} + +impl Default for WorkqueueExample { + fn default() -> Self { + Self::new() + } +} + +// --------------------------------------------------------------------------- +// 3. Threaded interrupt handler +// --------------------------------------------------------------------------- + +/// Demonstrates the threaded IRQ pattern. +/// +/// `request_threaded_irq()` registers two functions: +/// 1. **Primary handler** — runs in hard IRQ context; must be fast. +/// Returns `IRQ_WAKE_THREAD` to defer to the threaded handler. +/// 2. **Thread handler** — runs in a dedicated kernel thread; can sleep. +/// +/// This is the preferred modern pattern for drivers that need process context +/// but want the simplicity of a single IRQ handler. +pub struct ThreadedIrqDevice { + pending: AtomicBool, + packets_processed: AtomicU32, +} + +impl ThreadedIrqDevice { + pub const fn new() -> Self { + Self { + pending: AtomicBool::new(false), + packets_processed: AtomicU32::new(0), + } + } + + /// **Primary handler** — called in hard-IRQ context. + /// + /// Should only: + /// 1. Check whether the interrupt is ours. + /// 2. Mask the interrupt at the hardware level (prevent re-triggering). + /// 3. Return `IrqReturn::WakeThread`. + pub fn primary_handler(&self) -> IrqReturn { + // Check device status register (simulated). + let our_interrupt = true; // In real code: read status register + + if !our_interrupt { + return IrqReturn::None; + } + + // Disable the interrupt source to prevent flooding while we process. + // Real code: iowrite32(dev.base + IRQ_MASK_REG, 0); + + self.pending.store(true, Ordering::Release); + IrqReturn::WakeThread + } + + /// **Thread handler** — called in process context (kernel thread). + /// + /// Safe to sleep, allocate, and perform heavy work. + pub fn thread_handler(&self) -> IrqReturn { + if !self.pending.swap(false, Ordering::AcqRel) { + return IrqReturn::None; + } + + // Process received data (can sleep here). + pr_info!("Threaded handler: processing IRQ\n"); + self.packets_processed.fetch_add(1, Ordering::Relaxed); + + // Re-enable the interrupt source. + // Real code: iowrite32(dev.base + IRQ_MASK_REG, IRQ_ENABLE); + + IrqReturn::Handled + } +} + +impl Default for ThreadedIrqDevice { + fn default() -> Self { + Self::new() + } +} + +// --------------------------------------------------------------------------- +// 4. Accessing per-device data safely from a handler +// --------------------------------------------------------------------------- + +/// Demonstrates the correct pattern for per-device private data. +/// +/// The kernel passes the `dev_id` pointer (registered with `request_irq`) to +/// the handler. Rust enforces that: +/// - The pointer is cast to the correct type. +/// - All shared state uses appropriate synchronisation (`AtomicXxx` or +/// spinlocks for IRQ-safe access). +/// +/// **Never** use `Mutex` in an IRQ handler — it can sleep. +/// **Always** use `SpinLock` (with IRQ save) or atomics. +pub struct PerDeviceIrqData { + rx_packets: AtomicU32, + tx_packets: AtomicU32, + error_count: AtomicU32, +} + +impl PerDeviceIrqData { + pub const fn new() -> Self { + Self { + rx_packets: AtomicU32::new(0), + tx_packets: AtomicU32::new(0), + error_count: AtomicU32::new(0), + } + } + + /// IRQ-safe statistics accessor. + pub fn on_rx(&self) { + self.rx_packets.fetch_add(1, Ordering::Relaxed); + } + + pub fn on_tx(&self) { + self.tx_packets.fetch_add(1, Ordering::Relaxed); + } + + pub fn on_error(&self) { + self.error_count.fetch_add(1, Ordering::Relaxed); + } + + pub fn stats(&self) -> (u32, u32, u32) { + ( + self.rx_packets.load(Ordering::Relaxed), + self.tx_packets.load(Ordering::Relaxed), + self.error_count.load(Ordering::Relaxed), + ) + } +} + +impl Default for PerDeviceIrqData { + fn default() -> Self { + Self::new() + } +} + +/// Example of casting the `dev_id` void pointer back to our data. +/// +/// # Safety +/// +/// `data` must be the same pointer that was passed to `request_irq`, and it +/// must remain valid until `free_irq` is called. +pub unsafe extern "C" fn per_device_irq_handler( + _irq: u32, + data: *mut core::ffi::c_void, +) -> IrqReturn { + // SAFETY: data is our PerDeviceIrqData, valid for the lifetime of the driver. + let dev = &*(data as *const PerDeviceIrqData); + + // Simulate reading a hardware interrupt cause register. + let cause: u32 = 0b0011; // bits 0=RX, 1=TX, 2=ERR + + if cause == 0 { + return IrqReturn::None; + } + + if cause & 0b0001 != 0 { + dev.on_rx(); + } + if cause & 0b0010 != 0 { + dev.on_tx(); + } + if cause & 0b0100 != 0 { + dev.on_error(); + } + + IrqReturn::Handled +} + +// --------------------------------------------------------------------------- +// IRQ handler rules summary (comments) +// --------------------------------------------------------------------------- +// +// ✅ Return IRQ_NONE when the interrupt is not from your device. +// ✅ Acknowledge (ACK) the interrupt before returning IRQ_HANDLED. +// ✅ Use only atomic ops or spinlocks (with IRQs disabled) for shared state. +// ✅ Defer heavy work to workqueues or threaded handlers. +// ✅ Never call schedule(), msleep(), wait_event(), or mutex_lock(). +// ✅ Never allocate with GFP_KERNEL; use GFP_ATOMIC if you must allocate. +// ✅ Keep the handler as short as possible to minimise interrupt latency. +// ✅ Use IRQF_SHARED flag when sharing an IRQ line with other devices. + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_irq_return_values() { + assert_eq!(IrqReturn::None as u32, 0); + assert_eq!(IrqReturn::Handled as u32, 1); + assert_eq!(IrqReturn::WakeThread as u32, 2); + } + + #[test] + fn test_simple_device_data() { + let data = SimpleDeviceData::new(); + assert_eq!(data.irq_count.load(Ordering::Relaxed), 0); + assert!(!data.data_ready.load(Ordering::Relaxed)); + } + + #[test] + fn test_top_half_bottom_half() { + let dev = WorkqueueExample::new(); + let ret = dev.top_half(); + assert_eq!(ret, IrqReturn::Handled); + assert_eq!(dev.irq_count.load(Ordering::Relaxed), 1); + assert!(dev.work_queued.load(Ordering::Relaxed)); + dev.bottom_half(); + assert!(!dev.work_queued.load(Ordering::Relaxed)); + } + + #[test] + fn test_threaded_irq() { + let dev = ThreadedIrqDevice::new(); + let ret = dev.primary_handler(); + assert_eq!(ret, IrqReturn::WakeThread); + let ret = dev.thread_handler(); + assert_eq!(ret, IrqReturn::Handled); + assert_eq!(dev.packets_processed.load(Ordering::Relaxed), 1); + } + + #[test] + fn test_per_device_stats() { + let data = PerDeviceIrqData::new(); + data.on_rx(); + data.on_rx(); + data.on_tx(); + data.on_error(); + let (rx, tx, err) = data.stats(); + assert_eq!(rx, 2); + assert_eq!(tx, 1); + assert_eq!(err, 1); + } +} diff --git a/research/snippets/error/error_propagation.rs b/research/snippets/error/error_propagation.rs new file mode 100644 index 0000000..036cf08 --- /dev/null +++ b/research/snippets/error/error_propagation.rs @@ -0,0 +1,353 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Error Propagation Mechanisms in Rust Kernel Code +//! +//! Error handling is one of the areas where Rust's design most improves over C. +//! The Linux kernel's C convention (negative errno integers, NULL pointers, +//! global `errno`) is replaced with Rust's `Result` type, which: +//! +//! - Makes error paths **explicit** and **statically checked** +//! - Prevents "forgot to check the return value" bugs +//! - Enables the `?` operator for ergonomic propagation +//! - Integrates with the kernel's existing errno values +//! +//! ## Topics Covered +//! +//! 1. The kernel `Error` type and common errno values +//! 2. The `?` operator for concise propagation +//! 3. Converting from C integer return codes +//! 4. Custom error context with `map_err` +//! 5. Error handling in module init / cleanup +//! 6. Working with `Option` alongside `Result` +//! 7. Error handling across FFI boundaries +//! +//! ## Build +//! +//! Place in `samples/rust/` and enable `CONFIG_RUST=y`. +//! +//! ## Difficulty +//! +//! Beginner–Intermediate + +use kernel::prelude::*; + +// --------------------------------------------------------------------------- +// 1. Basic Result usage with kernel errno values +// --------------------------------------------------------------------------- + +/// The kernel crate re-exports `Error` which wraps a Linux errno integer. +/// Common values (all available as constants in the kernel crate): +/// +/// | Constant | errno | Meaning | +/// |----------|-------|-----------------------------------| +/// | ENOMEM | 12 | Out of memory | +/// | EINVAL | 22 | Invalid argument | +/// | ENODEV | 19 | No such device | +/// | EBUSY | 16 | Device or resource busy | +/// | ENOTSUPP | 524 | Operation not supported | +/// | ENOSYS | 38 | Function not implemented | +/// | EIO | 5 | I/O error | +/// | ETIMEDOUT| 110 | Connection timed out | +/// | EACCES | 13 | Permission denied | +/// +/// Returning `Ok(value)` signals success; `Err(errno)` signals failure. +pub fn read_device_register(valid: bool) -> Result { + if valid { + Ok(0xDEAD_BEEF) + } else { + Err(EIO) + } +} + +// --------------------------------------------------------------------------- +// 2. The `?` operator — propagate errors without boilerplate +// --------------------------------------------------------------------------- + +/// Demonstrates chained `?` calls. +/// +/// Without `?`, every call would require an explicit `match` or `if let Err`. +/// With `?`, the error is returned immediately to the caller on failure. +pub fn initialise_device() -> Result<()> { + // Each `?` returns early with the error if any step fails. + let _reg0 = read_device_register(true)?; + let _reg1 = read_device_register(true)?; + + pr_info!("Device initialised successfully\n"); + Ok(()) +} + +/// A more realistic driver init sequence. +pub fn driver_init_sequence() -> Result<()> { + // Step 1: Validate hardware presence. + let _id = read_device_id()?; + + // Step 2: Allocate driver state. + let _state = allocate_driver_state()?; + + // Step 3: Configure hardware. + configure_hardware()?; + + // Step 4: Register with subsystem. + register_with_subsystem()?; + + Ok(()) +} + +fn read_device_id() -> Result { + // Simulated: returns a valid device ID. + Ok(0x1234_5678) +} + +fn allocate_driver_state() -> Result> { + Box::try_new([0u8; 64]).map_err(|_| ENOMEM) +} + +fn configure_hardware() -> Result<()> { + // Simulated hardware configuration. + pr_info!("Hardware configured\n"); + Ok(()) +} + +fn register_with_subsystem() -> Result<()> { + // Simulated subsystem registration. + pr_info!("Registered with subsystem\n"); + Ok(()) +} + +// --------------------------------------------------------------------------- +// 3. Converting C integer return codes to Result +// --------------------------------------------------------------------------- + +/// Wrap a C function that returns 0 on success and a negative errno on error. +/// +/// This pattern is ubiquitous when calling `bindings::*` functions. +pub fn call_c_function_safely() -> Result<()> { + // Simulate a C function call returning 0 (success). + let ret: i32 = 0; // In real code: unsafe { bindings::some_c_function(...) } + + if ret == 0 { + Ok(()) + } else { + // Convert negative errno to kernel Error. + // The kernel crate provides `Error::from_errno` / `to_result`. + Err(EINVAL) // In real code: Err(Error::from_errno(-ret)) + } +} + +/// Helper that converts a C-style `int` return to `Result<()>`. +/// +/// Maps 0 → `Ok(())` and negative values → `Err(errno)`. +pub fn to_result(ret: i32) -> Result<()> { + if ret >= 0 { + Ok(()) + } else { + // In real kernel code: kernel::error::to_result(ret) + Err(EINVAL) + } +} + +/// Helper for C functions that return a pointer (NULL on error). +pub fn ptr_to_result(ptr: *mut T) -> Result> { + core::ptr::NonNull::new(ptr).ok_or(ENOMEM) +} + +// --------------------------------------------------------------------------- +// 4. `map_err` — enrich errors with context +// --------------------------------------------------------------------------- + +/// Wraps errors with additional context using `map_err`. +/// +/// Since the kernel's `Error` type is a single errno integer, enrichment +/// typically means logging before propagating, or choosing a more appropriate +/// errno for the caller. +pub fn open_device_file(name: &str) -> Result { + // Simulate failing to find the device. + find_device(name).map_err(|e| { + pr_err!("Failed to open device '{}': {:?}\n", name, e); + ENODEV // Return ENODEV regardless of the original error + }) +} + +fn find_device(_name: &str) -> Result { + // Simulate device not found. + Err(ENODEV) +} + +// --------------------------------------------------------------------------- +// 5. `Option` ↔ `Result` conversions +// --------------------------------------------------------------------------- + +/// Shows conversions between `Option` and `Result`. +/// +/// `Option::ok_or(err)` converts `None` → `Err(err)`. +/// `Result::ok()` converts `Err(_)` → `None`. +pub fn find_and_validate(haystack: &[u32], needle: u32) -> Result { + // `position` returns Option. + // `.ok_or(ENOENT)` converts None → Err(ENOENT). + haystack + .iter() + .position(|&x| x == needle) + .ok_or(ENODEV) +} + +/// Uses `Option` for nullable pointers and converts to `Result` at the boundary. +pub fn get_optional_resource() -> Result { + let maybe_resource: Option = Some(42); + maybe_resource.ok_or(ENODEV) +} + +// --------------------------------------------------------------------------- +// 6. Error handling across the module init / cleanup boundary +// --------------------------------------------------------------------------- + +/// Demonstrates resource cleanup on partial initialisation failure. +/// +/// In C, this is handled with `goto cleanup` labels. In Rust, RAII handles +/// cleanup automatically: if `?` returns an error, any values already +/// constructed are dropped in reverse order. +pub struct DriverResources { + _buffer: Box<[u8; 128]>, + _device_id: u32, +} + +impl DriverResources { + /// Allocate all driver resources. On failure, already-allocated resources + /// are freed automatically by their `Drop` impls. + pub fn init() -> Result { + // Step 1: allocate DMA buffer. + let buffer = Box::try_new([0u8; 128]).map_err(|_| ENOMEM)?; + + // Step 2: probe device ID. + let device_id = read_device_id()?; + + // Step 3: further initialisation (may fail). + configure_hardware()?; + + Ok(Self { + _buffer: buffer, + _device_id: device_id, + }) + // If configure_hardware() returns Err, `buffer` is already freed here + // because it goes out of scope. No goto/cleanup labels needed. + } +} + +// --------------------------------------------------------------------------- +// 7. Error handling at FFI boundaries +// --------------------------------------------------------------------------- + +/// Pattern for calling an `unsafe` C function and wrapping its return. +/// +/// # Safety +/// +/// The caller must ensure that `data_ptr` is a valid pointer to at least +/// `len` bytes of readable memory. +pub unsafe fn safe_ffi_wrapper(data_ptr: *const u8, len: usize) -> Result { + if data_ptr.is_null() || len == 0 { + return Err(EINVAL); + } + + // Call a hypothetical C function: + // extern "C" { fn process_data(ptr: *const u8, len: usize) -> i32; } + // let ret = unsafe { process_data(data_ptr, len) }; + let ret: i32 = 0; // Simulated return value + + to_result(ret)?; + Ok(len as u32) +} + +// --------------------------------------------------------------------------- +// 8. Propagating errors from closures +// --------------------------------------------------------------------------- + +/// Collects results from a set of operations; first failure short-circuits. +pub fn batch_operations(items: &[u32]) -> Result> { + items + .iter() + .map(|&item| process_item(item)) + .collect() +} + +fn process_item(item: u32) -> Result { + if item == 0 { + Err(EINVAL) + } else { + Ok(item * 2) + } +} + +// --------------------------------------------------------------------------- +// Error propagation cheat-sheet (comments) +// --------------------------------------------------------------------------- +// +// Pattern | Use case +// ---------------------------|------------------------------------------------ +// `expr?` | Propagate error immediately to caller +// `.map_err(|e| new_err)` | Transform error type or add context +// `.ok_or(err)` | Convert Option::None → Err(err) +// `.unwrap_or_default()` | Use default value on error (rarely appropriate) +// `match result { ... }` | Handle specific error variants differently +// `if let Err(e) = result` | Log-and-continue pattern +// +// Never use `.unwrap()` in production kernel code — it panics, which is a BUG(). + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_read_device_register_ok() { + assert!(read_device_register(true).is_ok()); + assert_eq!(read_device_register(true).unwrap(), 0xDEAD_BEEF); + } + + #[test] + fn test_read_device_register_err() { + assert!(read_device_register(false).is_err()); + } + + #[test] + fn test_find_and_validate_found() { + let data = [1u32, 2, 3, 4, 5]; + assert_eq!(find_and_validate(&data, 3).unwrap(), 2); + } + + #[test] + fn test_find_and_validate_not_found() { + let data = [1u32, 2, 3]; + assert!(find_and_validate(&data, 99).is_err()); + } + + #[test] + fn test_ptr_to_result_null() { + let null: *mut u32 = core::ptr::null_mut(); + assert!(ptr_to_result(null).is_err()); + } + + #[test] + fn test_ptr_to_result_valid() { + let mut val: u32 = 42; + let ptr = &mut val as *mut u32; + assert!(ptr_to_result(ptr).is_ok()); + } + + #[test] + fn test_to_result() { + assert!(to_result(0).is_ok()); + assert!(to_result(-1).is_err()); + assert!(to_result(1).is_ok()); + } + + #[test] + fn test_batch_operations_ok() { + let items = [1u32, 2, 3]; + let result = batch_operations(&items).unwrap(); + assert_eq!(result, vec![2, 4, 6]); + } + + #[test] + fn test_batch_operations_err() { + let items = [1u32, 0, 3]; + assert!(batch_operations(&items).is_err()); + } +} diff --git a/research/snippets/memory/memory_allocator.rs b/research/snippets/memory/memory_allocator.rs new file mode 100644 index 0000000..a90b82f --- /dev/null +++ b/research/snippets/memory/memory_allocator.rs @@ -0,0 +1,301 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Memory Allocator Integration Patterns in the Linux Kernel +//! +//! This module demonstrates how Rust code integrates with the Linux kernel's +//! memory allocation subsystem. The kernel provides several allocators: +//! +//! - **SLUB/SLAB**: The primary kernel object allocator (`kmalloc`/`kfree`) +//! - **Vmalloc**: Virtually contiguous, not physically contiguous (`vmalloc`/`vfree`) +//! - **Page allocator**: Direct page allocation (`alloc_pages`/`free_pages`) +//! - **Percpu allocator**: Per-CPU variable storage +//! +//! ## Key Concepts +//! +//! - GFP flags control allocation behavior (e.g., `GFP_KERNEL`, `GFP_ATOMIC`) +//! - Rust's `Box` maps to `kmalloc`/`kfree` via the global allocator +//! - `KBox` (or `Box`) carries GFP flags in the type system +//! - RAII wrappers ensure memory is freed when values are dropped +//! - `GFP_ATOMIC` must be used in interrupt context (no sleeping allowed) +//! +//! ## Build +//! +//! Place in `samples/rust/` and enable `CONFIG_RUST=y`. +//! +//! ## Difficulty +//! +//! Intermediate + +use kernel::prelude::*; + +// --------------------------------------------------------------------------- +// 1. Basic heap allocation with the kernel allocator +// --------------------------------------------------------------------------- + +/// Wraps a heap-allocated kernel object. +/// +/// Uses `Box` backed by the kernel's `kmalloc`/`kfree` implementation. +/// The allocation happens with `GFP_KERNEL`, which is the standard flag for +/// allocations that can sleep (i.e., process context only). +pub fn basic_heap_allocation() -> Result> { + // Box::new calls the global kernel allocator under the hood. + // If allocation fails, the kernel returns ENOMEM. + let value = Box::try_new(42u32)?; + pr_info!("Allocated value on kernel heap: {}\n", *value); + // `value` is freed automatically when it goes out of scope. + Ok(value) +} + +// --------------------------------------------------------------------------- +// 2. Vec-based dynamic arrays +// --------------------------------------------------------------------------- + +/// Demonstrates dynamic array allocation. +/// +/// `Vec` in the kernel uses `krealloc` under the hood. Always use +/// `try_push` / `Vec::try_with_capacity` so allocation failures are +/// propagated as `ENOMEM` rather than panicking. +pub fn dynamic_array_example() -> Result> { + let mut buffer = Vec::try_with_capacity(64)?; + + for i in 0u8..64 { + buffer.try_push(i)?; + } + + pr_info!("Allocated Vec of {} bytes\n", buffer.len()); + Ok(buffer) +} + +// --------------------------------------------------------------------------- +// 3. GFP flags and allocation contexts +// --------------------------------------------------------------------------- + +/// Allocation flags guide the allocator on *how* to allocate memory. +/// +/// # GFP_KERNEL +/// Standard flag. The allocator may sleep/reclaim memory. Only usable in +/// process context. +/// +/// # GFP_ATOMIC +/// Non-blocking. Must be used in: +/// - Interrupt handlers +/// - Softirqs / tasklets +/// - Any context where sleeping is forbidden +/// Returns `ENOMEM` immediately if no free pages are available. +/// +/// # GFP_NOWAIT +/// Like `GFP_ATOMIC` but with weaker reclaim pressure. Use when you can +/// tolerate failure and will retry later. +/// +/// # GFP_DMA / GFP_DMA32 +/// Restrict allocation to DMA-accessible memory zones. Required for +/// hardware that cannot address all of physical memory. +pub struct AllocationContext; + +impl AllocationContext { + /// Allocate in process context (can sleep). + pub fn allocate_process_context() -> Result> { + // GFP_KERNEL: normal kernel allocation, may reclaim/sleep + let buf = Box::try_new([0u8; 128])?; + Ok(buf) + } + + /// Allocate in interrupt context (must NOT sleep). + /// + /// In real code, pass `GFP_ATOMIC` to the allocator. + /// Illustrated here as a pattern; actual kernel crate API may differ. + pub fn allocate_interrupt_context() -> Result> { + // GFP_ATOMIC: allocation may fail rather than sleeping + // SAFETY: called from a context where sleeping is forbidden + let buf = Box::try_new([0u8; 64])?; + pr_info!("Atomic allocation succeeded\n"); + Ok(buf) + } +} + +// --------------------------------------------------------------------------- +// 4. Custom RAII wrapper over raw kernel memory +// --------------------------------------------------------------------------- + +/// A RAII wrapper that holds a raw pointer to kernel-allocated memory. +/// +/// Demonstrates the pattern of wrapping `kmalloc`/`kfree` when the safe +/// kernel-crate wrappers are not yet available for a particular API. +pub struct KernelBuffer { + ptr: core::ptr::NonNull, + len: usize, +} + +// SAFETY: Kernel memory is not thread-local; sharing across threads is safe +// as long as no data races occur (enforced by &mut access or synchronization). +unsafe impl Send for KernelBuffer {} +unsafe impl Sync for KernelBuffer {} + +impl KernelBuffer { + /// Allocate `len` bytes using the kernel allocator. + /// + /// # Errors + /// + /// Returns `ENOMEM` if the allocation fails. + pub fn new(len: usize) -> Result { + // In real code: bindings::kmalloc(len, bindings::GFP_KERNEL) + // For illustration, we use Box<[u8]> as a stand-in. + let boxed = Box::try_new_slice(len, 0u8)?; + // Leak the Box and keep the raw pointer. + let ptr = Box::into_raw(boxed) as *mut u8; + // SAFETY: Box::into_raw always returns a non-null pointer. + let ptr = unsafe { core::ptr::NonNull::new_unchecked(ptr) }; + Ok(Self { ptr, len }) + } + + /// Write a byte at `offset`. + /// + /// # Panics + /// + /// Panics if `offset >= self.len`. + pub fn write_byte(&mut self, offset: usize, value: u8) { + assert!(offset < self.len, "offset out of bounds"); + // SAFETY: offset is bounds-checked above; ptr is valid for `self.len` bytes. + unsafe { self.ptr.as_ptr().add(offset).write(value) }; + } + + /// Read a byte at `offset`. + /// + /// # Panics + /// + /// Panics if `offset >= self.len`. + pub fn read_byte(&self, offset: usize) -> u8 { + assert!(offset < self.len, "offset out of bounds"); + // SAFETY: offset is bounds-checked above; ptr is valid for `self.len` bytes. + unsafe { self.ptr.as_ptr().add(offset).read() } + } +} + +impl Drop for KernelBuffer { + fn drop(&mut self) { + // Reconstruct the Box so it deallocates properly. + // SAFETY: ptr was created from Box::into_raw with element count `self.len`. + unsafe { + let slice_ptr = + core::ptr::slice_from_raw_parts_mut(self.ptr.as_ptr(), self.len); + drop(Box::from_raw(slice_ptr)); + } + } +} + +// --------------------------------------------------------------------------- +// 5. Memory pools (slab caches) +// --------------------------------------------------------------------------- + +/// Pattern for a fixed-size slab cache (kmem_cache). +/// +/// `kmem_cache` is the preferred allocator when many objects of the same +/// size are created and destroyed frequently. It reduces fragmentation and +/// improves allocation speed through object reuse. +/// +/// The actual `kmem_cache_create` / `kmem_cache_destroy` calls live in the +/// `bindings` crate; this struct shows the RAII ownership pattern. +pub struct SlabCache { + // In real code this would be: ptr: *mut bindings::kmem_cache, + // Using a placeholder type here for illustration. + _name: &'static kernel::str::CStr, + object_size: usize, +} + +impl SlabCache { + /// Create a named slab cache for objects of `object_size` bytes. + pub fn new(name: &'static kernel::str::CStr, object_size: usize) -> Result { + pr_info!("Creating slab cache for {} byte objects\n", object_size); + // Real code: + // let ptr = unsafe { + // bindings::kmem_cache_create(name.as_char_ptr(), object_size, ...) + // }; + // if ptr.is_null() { return Err(ENOMEM); } + Ok(Self { _name: name, object_size }) + } + + /// Allocate one object from the cache. + pub fn alloc(&self) -> Result<*mut u8> { + pr_info!("Allocating {} byte object from slab cache\n", self.object_size); + // Real code: + // let obj = unsafe { bindings::kmem_cache_alloc(self.ptr, bindings::GFP_KERNEL) }; + // if obj.is_null() { return Err(ENOMEM); } + // Ok(obj as *mut u8) + Err(ENOSYS) // Stub: not implemented outside real kernel build + } +} + +impl Drop for SlabCache { + fn drop(&mut self) { + pr_info!("Destroying slab cache\n"); + // Real code: unsafe { bindings::kmem_cache_destroy(self.ptr); } + } +} + +// --------------------------------------------------------------------------- +// 6. Vmalloc for large, virtually-contiguous allocations +// --------------------------------------------------------------------------- + +/// When you need a large buffer that does not need to be *physically* +/// contiguous, `vmalloc` maps multiple non-contiguous pages into a single +/// virtually-contiguous range. +/// +/// Use cases: +/// - Large driver firmware buffers +/// - Kernel module `.text`/`.data` sections +/// - Buffers too large for `kmalloc` (> 4 MiB typically) +/// +/// **Note**: vmalloc memory cannot be used for DMA without an IOMMU. +pub struct VmallocBuffer { + // ptr: *mut c_void — in real kernel code via bindings::vmalloc + len: usize, +} + +impl VmallocBuffer { + /// Allocate `len` bytes of virtually contiguous kernel memory. + pub fn new(len: usize) -> Result { + pr_info!("vmalloc: requesting {} bytes\n", len); + // Real code: + // let ptr = unsafe { bindings::vmalloc(len) }; + // if ptr.is_null() { return Err(ENOMEM); } + Ok(Self { len }) + } +} + +impl Drop for VmallocBuffer { + fn drop(&mut self) { + pr_info!("vmalloc: freeing {} bytes\n", self.len); + // Real code: unsafe { bindings::vfree(self.ptr); } + } +} + +// --------------------------------------------------------------------------- +// Memory allocator pattern summary (comments only) +// --------------------------------------------------------------------------- +// +// Rule of thumb for choosing an allocator: +// +// Object size | Physically contiguous? | DMA? | Allocator +// -------------|------------------------|-------|------------------------ +// <= ~4 MiB | Required | Maybe | kmalloc (GFP_KERNEL/DMA) +// <= ~4 MiB | Not required | No | kmalloc (GFP_KERNEL) +// > ~4 MiB | Not required | No | vmalloc +// Many same-sz | Irrelevant | No | kmem_cache (slab) +// Page aligned | Required | Yes | alloc_pages + dma_map +// +// In Rust, always prefer the safe wrappers in the `kernel` crate when they +// exist. Fall back to `unsafe` + `bindings::*` only when no safe wrapper is +// available, and document the invariants carefully. + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_kernel_buffer_bounds() { + // This test runs in userspace during development; in-kernel tests + // use the KUnit framework. + // We can only exercise logic that does not call kernel allocators. + assert_eq!(core::mem::size_of::(), 16); + } +} From 376f7bfbcee51b73e9d586fbfee18c3bfe513aff Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 29 Mar 2026 23:35:34 +0000 Subject: [PATCH 3/3] Address code review feedback: fix doc fragments, improve comments, add tests, use ENOENT Agent-Logs-Url: https://github.com/orgito1015/Rust-in-the-Linux-Kernel/sessions/4c59cfad-a102-4eaa-84c6-cfed7e0725a2 Co-authored-by: orgito1015 <86354243+orgito1015@users.noreply.github.com> --- docs/09-technical-deep-dives.md | 9 +++++ research/snippets/concurrency/lock_free.rs | 9 +++-- .../snippets/drivers/interrupt_handler.rs | 4 +++ research/snippets/error/error_propagation.rs | 4 +-- research/snippets/memory/memory_allocator.rs | 36 ++++++++++++++++--- 5 files changed, 53 insertions(+), 9 deletions(-) diff --git a/docs/09-technical-deep-dives.md b/docs/09-technical-deep-dives.md index a3bac72..1470929 100644 --- a/docs/09-technical-deep-dives.md +++ b/docs/09-technical-deep-dives.md @@ -97,6 +97,9 @@ let obj_ptr = cache.alloc()?; **Code snippet**: [`research/snippets/concurrency/lock_free.rs`](../research/snippets/concurrency/lock_free.rs) ### Lock-Free Overview + +Lock-free algorithms improve scalability on multi-core systems by eliminating mutex contention. +Rust's `core::sync::atomic` module provides the building blocks: `AtomicBool`, `AtomicU32`, `AtomicUsize`, `AtomicPtr`, and the compare-and-swap (CAS) primitive. ### Memory Ordering @@ -213,6 +216,9 @@ let old_ptr = unsafe { protected.replace(new_value)? }; **Code snippet**: [`research/snippets/drivers/dma_operations.rs`](../research/snippets/drivers/dma_operations.rs) ### DMA Overview + +Direct Memory Access (DMA) is how hardware peripherals read and write system memory without CPU +involvement. Getting DMA right is notoriously difficult, but Rust's type system can enforce key safety invariants at compile time. ### DMA Types @@ -425,6 +431,9 @@ pub struct DeviceData { **Code snippet**: [`research/snippets/error/error_propagation.rs`](../research/snippets/error/error_propagation.rs) ### Error Propagation Overview + +Rust replaces C's ad-hoc error conventions (negative errno, NULL pointers, global `errno`) with +a single, consistent `Result` type. The kernel crate's `Error` type wraps a Linux errno integer, so all existing error codes are preserved. ### The `?` Operator diff --git a/research/snippets/concurrency/lock_free.rs b/research/snippets/concurrency/lock_free.rs index e0b1170..dc4cf46 100644 --- a/research/snippets/concurrency/lock_free.rs +++ b/research/snippets/concurrency/lock_free.rs @@ -240,8 +240,13 @@ impl KernelArc { impl Clone for KernelArc { fn clone(&self) -> Self { - // Relaxed ordering: the increment only needs to be eventually visible; - // the `Acquire` in `drop` synchronises with this store. + // Relaxed ordering on increment: only atomicity is required here. + // There is no ordering relationship between this increment and any + // other memory accesses — we are simply bumping a counter. + // The actual memory synchronisation is established by the + // Release store in `drop` (fetch_sub) and the corresponding + // Acquire fence in the last owner's `drop`, ensuring all writes + // are visible before the allocation is freed. // SAFETY: inner is valid. unsafe { self.inner.as_ref() } .refcount diff --git a/research/snippets/drivers/interrupt_handler.rs b/research/snippets/drivers/interrupt_handler.rs index aa7d5a3..56ebf95 100644 --- a/research/snippets/drivers/interrupt_handler.rs +++ b/research/snippets/drivers/interrupt_handler.rs @@ -103,6 +103,10 @@ pub unsafe extern "C" fn simple_irq_handler( data: *mut core::ffi::c_void, ) -> IrqReturn { // SAFETY: data is the dev_id pointer we passed to request_irq. + // - The pointer is non-null (the kernel guarantees dev_id is passed as-is). + // - SimpleDeviceData was allocated before register_irq and remains valid + // until free_irq is called in the driver's remove path. + // - No other mutable reference exists; all fields use atomic operations. let dev = &*(data as *const SimpleDeviceData); // Read a hypothetical hardware status register. diff --git a/research/snippets/error/error_propagation.rs b/research/snippets/error/error_propagation.rs index 036cf08..c359bd1 100644 --- a/research/snippets/error/error_propagation.rs +++ b/research/snippets/error/error_propagation.rs @@ -183,11 +183,11 @@ fn find_device(_name: &str) -> Result { /// `Result::ok()` converts `Err(_)` → `None`. pub fn find_and_validate(haystack: &[u32], needle: u32) -> Result { // `position` returns Option. - // `.ok_or(ENOENT)` converts None → Err(ENOENT). + // `.ok_or(ENOENT)` converts None → Err(ENOENT) ("no such entry"). haystack .iter() .position(|&x| x == needle) - .ok_or(ENODEV) + .ok_or(ENOENT) } /// Uses `Option` for nullable pointers and converts to `Result` at the boundary. diff --git a/research/snippets/memory/memory_allocator.rs b/research/snippets/memory/memory_allocator.rs index a90b82f..71d0272 100644 --- a/research/snippets/memory/memory_allocator.rs +++ b/research/snippets/memory/memory_allocator.rs @@ -95,7 +95,10 @@ pub struct AllocationContext; impl AllocationContext { /// Allocate in process context (can sleep). pub fn allocate_process_context() -> Result> { - // GFP_KERNEL: normal kernel allocation, may reclaim/sleep + // Box::try_new uses GFP_KERNEL by default in the kernel allocator. + // GFP_KERNEL: normal kernel allocation, may sleep/reclaim memory. + // This flag is baked into the kernel crate's global allocator; + // callers in process context should always use Box/Vec for this reason. let buf = Box::try_new([0u8; 128])?; Ok(buf) } @@ -292,10 +295,33 @@ mod tests { use super::*; #[test] - fn test_kernel_buffer_bounds() { - // This test runs in userspace during development; in-kernel tests - // use the KUnit framework. - // We can only exercise logic that does not call kernel allocators. + fn test_kernel_buffer_size() { + // KernelBuffer holds one NonNull (8 bytes) + one usize (8 bytes). + // This test ensures the struct layout is as expected. + // In-kernel tests would use the KUnit framework instead. assert_eq!(core::mem::size_of::(), 16); } + + #[test] + fn test_kernel_buffer_write_read() { + let mut buf = KernelBuffer::new(64).expect("allocation failed"); + buf.write_byte(0, 0xAB); + buf.write_byte(63, 0xCD); + assert_eq!(buf.read_byte(0), 0xAB); + assert_eq!(buf.read_byte(63), 0xCD); + } + + #[test] + #[should_panic] + fn test_kernel_buffer_out_of_bounds_read() { + let buf = KernelBuffer::new(8).expect("allocation failed"); + let _ = buf.read_byte(8); // must panic + } + + #[test] + #[should_panic] + fn test_kernel_buffer_out_of_bounds_write() { + let mut buf = KernelBuffer::new(8).expect("allocation failed"); + buf.write_byte(8, 0xFF); // must panic + } }