diff --git a/src/arch/aarch64/kernel/mod.rs b/src/arch/aarch64/kernel/mod.rs index 28859b7734..53fd57342c 100644 --- a/src/arch/aarch64/kernel/mod.rs +++ b/src/arch/aarch64/kernel/mod.rs @@ -14,17 +14,16 @@ pub mod serial; mod start; pub mod systemtime; -use alloc::alloc::{Layout, alloc}; use core::arch::global_asm; use core::sync::atomic::{AtomicPtr, AtomicU32, Ordering}; use core::{ptr, str}; - +use hermit_sync::InterruptTicketMutex; use memory_addresses::PhysAddr; use crate::arch::aarch64::kernel::core_local::*; -use crate::arch::aarch64::mm::paging::{BasePageSize, PageSize}; use crate::config::*; use crate::env; +use crate::mm::stack_alloc::{allocate_stack, StackAllocation}; #[repr(align(8))] pub(crate) struct AlignedAtomicU32(AtomicU32); @@ -34,6 +33,7 @@ pub(crate) struct AlignedAtomicU32(AtomicU32); /// It also synchronizes initialization of CPU cores. pub(crate) static CPU_ONLINE: AlignedAtomicU32 = AlignedAtomicU32(AtomicU32::new(0)); +pub static CURRENT_STACK: InterruptTicketMutex> = InterruptTicketMutex::new(None); pub(crate) static CURRENT_STACK_ADDRESS: AtomicPtr = AtomicPtr::new(ptr::null_mut()); #[cfg(target_os = "none")] @@ -105,10 +105,9 @@ fn finish_processor_init() { debug!("Initialized processor {}", core_id()); // Allocate stack for the CPU and pass the addresses. - let layout = Layout::from_size_align(KERNEL_STACK_SIZE, BasePageSize::SIZE as usize).unwrap(); - let stack = unsafe { alloc(layout) }; - assert!(!stack.is_null()); - CURRENT_STACK_ADDRESS.store(stack, Ordering::Relaxed); + let stack = allocate_stack(KERNEL_STACK_SIZE); + CURRENT_STACK_ADDRESS.store(stack.stack_start().as_mut_ptr(), Ordering::Relaxed); + let _ = CURRENT_STACK.lock().insert(stack.leak()); } pub fn boot_next_processor() { diff --git a/src/arch/aarch64/kernel/scheduler.rs b/src/arch/aarch64/kernel/scheduler.rs index ca264c0426..bf075203c9 100644 --- a/src/arch/aarch64/kernel/scheduler.rs +++ b/src/arch/aarch64/kernel/scheduler.rs @@ -6,16 +6,16 @@ use core::sync::atomic::Ordering; use aarch64_cpu::asm::barrier::{SY, isb}; use aarch64_cpu::registers::*; use align_address::Align; -use free_list::{PageLayout, PageRange}; -use memory_addresses::{PhysAddr, VirtAddr}; +use memory_addresses::VirtAddr; use crate::arch::aarch64::kernel::CURRENT_STACK_ADDRESS; use crate::arch::aarch64::kernel::core_local::core_scheduler; -use crate::arch::aarch64::mm::paging::{BasePageSize, PageSize, PageTableEntryFlags}; -use crate::mm::{FrameAlloc, PageAlloc, PageRangeAllocator}; +use crate::arch::aarch64::mm::paging::{BasePageSize, PageSize}; use crate::scheduler::PerCoreSchedulerExt; use crate::scheduler::task::{Task, TaskFrame}; use crate::{DEFAULT_STACK_SIZE, KERNEL_STACK_SIZE}; +use crate::arch::kernel::CURRENT_STACK; +use crate::mm::stack_alloc::{allocate_stack, StackAllocation}; #[derive(Debug)] #[repr(C, packed)] @@ -94,156 +94,52 @@ pub(crate) struct State { pub x30: u64, } -pub struct BootStack { - /// Stack for kernel tasks - stack: VirtAddr, -} - -pub struct CommonStack { - /// Start address of allocated virtual memory region - virt_addr: VirtAddr, - /// Start address of allocated virtual memory region - phys_addr: PhysAddr, - /// Total size of all stacks - total_size: usize, -} - -pub enum TaskStacks { - Boot(BootStack), - Common(CommonStack), +pub struct TaskStacks { + kernel_stack: StackAllocation, + user_stack: Option, } impl TaskStacks { - /// Size of the debug marker at the very top of each stack. - /// - /// We have a marker at the very top of the stack for debugging (`0xdeadbeef`), which should not be overridden. - pub const MARKER_SIZE: usize = 0x10; - pub fn new(size: usize) -> Self { let user_stack_size = if size < KERNEL_STACK_SIZE { KERNEL_STACK_SIZE } else { size.align_up(BasePageSize::SIZE as usize) }; - let total_size = user_stack_size + DEFAULT_STACK_SIZE; - let layout = PageLayout::from_size(total_size + 3 * BasePageSize::SIZE as usize).unwrap(); - let page_range = PageAlloc::allocate(layout).unwrap(); - let virt_addr = VirtAddr::from(page_range.start()); - let frame_layout = PageLayout::from_size(total_size).unwrap(); - let frame_range = FrameAlloc::allocate(frame_layout) - .expect("Failed to allocate Physical Memory for TaskStacks"); - let phys_addr = PhysAddr::from(frame_range.start()); - - debug!( - "Create stacks at {:p} with a size of {} KB", - virt_addr, - total_size >> 10 - ); - - let mut flags = PageTableEntryFlags::empty(); - flags.normal().writable().execute_disable(); - // map kernel stack into the address space - crate::arch::mm::paging::map::( - virt_addr + BasePageSize::SIZE, - phys_addr, - DEFAULT_STACK_SIZE / BasePageSize::SIZE as usize, - flags, - ); + let kernel_stack = allocate_stack(DEFAULT_STACK_SIZE); + let user_stack = allocate_stack(user_stack_size); - // map user stack into the address space - crate::arch::mm::paging::map::( - virt_addr + DEFAULT_STACK_SIZE + 2 * BasePageSize::SIZE, - phys_addr + DEFAULT_STACK_SIZE, - user_stack_size / BasePageSize::SIZE as usize, - flags, - ); - - // clear user stack - unsafe { - (virt_addr + DEFAULT_STACK_SIZE + 2 * BasePageSize::SIZE) - .as_mut_ptr::() - .write_bytes(0, user_stack_size); + TaskStacks { + kernel_stack, user_stack: Some(user_stack) } - - TaskStacks::Common(CommonStack { - virt_addr, - phys_addr, - total_size, - }) } pub fn from_boot_stacks() -> TaskStacks { - let stack = VirtAddr::from_ptr(CURRENT_STACK_ADDRESS.load(Ordering::Relaxed)); - debug!("Using boot stack {stack:p}"); - - TaskStacks::Boot(BootStack { stack }) - } - - pub fn get_user_stack_size(&self) -> usize { - match self { - TaskStacks::Boot(_) => 0, - TaskStacks::Common(stacks) => stacks.total_size - DEFAULT_STACK_SIZE, - } - } + let kernel_stack = if let Some(stack) = CURRENT_STACK.lock().as_ref() { + stack.weak() + } else { + let stack = VirtAddr::from_ptr(CURRENT_STACK_ADDRESS.load(Ordering::Relaxed)); + debug!("Using boot stack {stack:p}"); - pub fn get_user_stack(&self) -> VirtAddr { - match self { - TaskStacks::Boot(_) => VirtAddr::zero(), - TaskStacks::Common(stacks) => { - stacks.virt_addr + DEFAULT_STACK_SIZE + 2 * BasePageSize::SIZE + unsafe { + StackAllocation::new_external(stack, KERNEL_STACK_SIZE) } - } - } + }; - pub fn get_kernel_stack(&self) -> VirtAddr { - match self { - TaskStacks::Boot(stacks) => stacks.stack, - TaskStacks::Common(stacks) => stacks.virt_addr + BasePageSize::SIZE, + Self { + kernel_stack, user_stack: None } } - pub fn get_kernel_stack_size(&self) -> usize { - match self { - TaskStacks::Boot(_) => KERNEL_STACK_SIZE, - TaskStacks::Common(_) => DEFAULT_STACK_SIZE, - } + #[inline(always)] + pub fn get_user_stack(&self) -> Option<&StackAllocation> { + self.user_stack.as_ref() } -} - -impl Drop for TaskStacks { - fn drop(&mut self) { - // we should never deallocate a boot stack - match self { - TaskStacks::Boot(_) => {} - TaskStacks::Common(stacks) => { - debug!( - "Deallocating stacks at {:p} with a size of {} KB", - stacks.virt_addr, - stacks.total_size >> 10, - ); - crate::arch::mm::paging::unmap::( - stacks.virt_addr, - stacks.total_size / BasePageSize::SIZE as usize + 3, - ); - let range = PageRange::from_start_len( - stacks.virt_addr.as_usize(), - stacks.total_size + 3 * BasePageSize::SIZE as usize, - ) - .unwrap(); - unsafe { - PageAlloc::deallocate(range); - } - - let range = - PageRange::from_start_len(stacks.phys_addr.as_usize(), stacks.total_size) - .unwrap(); - unsafe { - FrameAlloc::deallocate(range); - } - } - } + #[inline(always)] + pub fn get_kernel_stack(&self) -> &StackAllocation { + &self.kernel_stack } } @@ -288,10 +184,7 @@ impl TaskFrame for Task { } unsafe { - // Set a marker for debugging at the very top. - let mut stack = self.stacks.get_kernel_stack() + self.stacks.get_kernel_stack_size() - - TaskStacks::MARKER_SIZE; - *stack.as_mut_ptr::() = 0xdead_beefu64; + let mut stack = self.stacks.get_kernel_stack().top_of_stack(); // Put the State structure expected by the ASM switch() function on the stack. stack -= size_of::(); @@ -314,12 +207,8 @@ impl TaskFrame for Task { // Set the task's stack pointer entry to the stack we have just crafted. self.last_stack_pointer = stack; + self.user_stack_pointer = self.stacks.get_user_stack().unwrap().top_of_stack(); - // initialize user-level stack - self.user_stack_pointer = self.stacks.get_user_stack() - + self.stacks.get_user_stack_size() - - TaskStacks::MARKER_SIZE; - *self.user_stack_pointer.as_mut_ptr::() = 0xdead_beefu64; (*state).sp_el0 = self.user_stack_pointer.as_u64(); } } diff --git a/src/arch/aarch64/kernel/start.rs b/src/arch/aarch64/kernel/start.rs index 6e5bb21f0a..ce93508c7f 100644 --- a/src/arch/aarch64/kernel/start.rs +++ b/src/arch/aarch64/kernel/start.rs @@ -10,8 +10,8 @@ use aarch64_cpu::asm::barrier::{SY, dsb}; use hermit_entry::Entry; use hermit_entry::boot_info::RawBootInfo; -use crate::arch::aarch64::kernel::scheduler::TaskStacks; use crate::{KERNEL_STACK_SIZE, env}; +use crate::mm::stack_alloc; /* * Memory types available. @@ -102,7 +102,7 @@ pub unsafe extern "C" fn _start(boot_info: Option<&'static RawBootInfo>, cpu_id: "b {pre_init}", cpu_online = sym super::CPU_ONLINE, - stack_top_offset = const KERNEL_STACK_SIZE - TaskStacks::MARKER_SIZE, + stack_top_offset = const KERNEL_STACK_SIZE - stack_alloc::MARKER_SIZE, current_stack_address = sym super::CURRENT_STACK_ADDRESS, pre_init = sym pre_init, ) @@ -239,7 +239,7 @@ pub(crate) unsafe extern "C" fn smp_start() -> ! { mair_el1 = const mair(0x00, MT_DEVICE_nGnRnE) | mair(0x04, MT_DEVICE_nGnRE) | mair(0x0c, MT_DEVICE_GRE) | mair(0x44, MT_NORMAL_NC) | mair(0xff, MT_NORMAL), tcr_bits = const tcr_size(VA_BITS) | TCR_TG1_4K | TCR_FLAGS, - stack_top_offset = const KERNEL_STACK_SIZE - TaskStacks::MARKER_SIZE, + stack_top_offset = const KERNEL_STACK_SIZE - stack_alloc::MARKER_SIZE, current_stack_address = sym super::CURRENT_STACK_ADDRESS, sctlr_el1 = const SCTLR_EL1, ttbr0 = sym TTBR0, diff --git a/src/arch/aarch64/mm/paging.rs b/src/arch/aarch64/mm/paging.rs index 61316a906a..92161ddeff 100644 --- a/src/arch/aarch64/mm/paging.rs +++ b/src/arch/aarch64/mm/paging.rs @@ -413,8 +413,8 @@ impl PageTableMethods for PageTable { } if flags == PageTableEntryFlags::BLANK { - // in this case we unmap the pages - self.entries[index].set(physical_address, flags); + // We already unmapped the page + return; } else { self.entries[index].set(physical_address, S::MAP_EXTRA_FLAG | flags); } diff --git a/src/arch/riscv64/kernel/core_local.rs b/src/arch/riscv64/kernel/core_local.rs index 7306f2af69..08315b50f2 100644 --- a/src/arch/riscv64/kernel/core_local.rs +++ b/src/arch/riscv64/kernel/core_local.rs @@ -19,7 +19,7 @@ pub struct CoreLocal { core_id: CoreId, /// Scheduler of the current Core. scheduler: Cell<*mut PerCoreScheduler>, - /// Start address of the kernel stack + /// Start address of the kernel stack (appears unused) pub kernel_stack: Cell, /// The core-local async executor. ex: StaticLocalExecutor, diff --git a/src/arch/riscv64/kernel/mod.rs b/src/arch/riscv64/kernel/mod.rs index 891e11eaea..55140fa741 100644 --- a/src/arch/riscv64/kernel/mod.rs +++ b/src/arch/riscv64/kernel/mod.rs @@ -15,7 +15,6 @@ use alloc::vec::Vec; use core::ptr; use core::sync::atomic::{AtomicPtr, AtomicU32, AtomicU64, Ordering}; -use free_list::PageLayout; use memory_addresses::PhysAddr; use riscv::register::sstatus; @@ -25,7 +24,7 @@ use crate::arch::riscv64::kernel::processor::lsb; use crate::config::KERNEL_STACK_SIZE; use crate::env; use crate::init_cell::InitCell; -use crate::mm::{FrameAlloc, PageRangeAllocator}; +use crate::mm::stack_alloc::allocate_stack; // Used to store information about available harts. The index of the hart in the vector // represents its CpuId and does not need to match its hart_id @@ -148,10 +147,9 @@ pub fn boot_next_processor() { { debug!("Allocating stack for hard_id {next_hart_id}"); - let frame_layout = PageLayout::from_size(KERNEL_STACK_SIZE).unwrap(); - let frame_range = - FrameAlloc::allocate(frame_layout).expect("Failed to allocate boot stack for new core"); - let stack = ptr::with_exposed_provenance_mut(frame_range.start()); + + let stack = allocate_stack(KERNEL_STACK_SIZE).leak(); + let stack = ptr::with_exposed_provenance_mut(stack.stack_start().as_usize()); CURRENT_STACK_ADDRESS.store(stack, Ordering::Relaxed); } diff --git a/src/arch/riscv64/kernel/scheduler.rs b/src/arch/riscv64/kernel/scheduler.rs index dac25973a8..f585202a80 100644 --- a/src/arch/riscv64/kernel/scheduler.rs +++ b/src/arch/riscv64/kernel/scheduler.rs @@ -1,13 +1,11 @@ use align_address::Align; -use free_list::{PageLayout, PageRange}; -use memory_addresses::{PhysAddr, VirtAddr}; use crate::arch::riscv64::kernel::core_local::core_scheduler; -use crate::arch::riscv64::mm::paging::{BasePageSize, PageSize, PageTableEntryFlags}; -use crate::mm::{FrameAlloc, PageAlloc, PageRangeAllocator}; +use crate::arch::riscv64::mm::paging::{BasePageSize, PageSize}; use crate::scheduler::task::{Task, TaskFrame}; use crate::scheduler::{PerCoreSchedulerExt, timer_interrupts}; use crate::{DEFAULT_STACK_SIZE, KERNEL_STACK_SIZE}; +use crate::mm::stack_alloc::{allocate_stack, StackAllocation}; /// For details, see [RISC-V Calling Conventions]. /// @@ -79,186 +77,53 @@ pub struct State { t6: usize, } -pub struct BootStack { - /// Stack for kernel tasks - stack: VirtAddr, -} - -pub struct CommonStack { - /// Start address of allocated virtual memory region - virt_addr: VirtAddr, - /// Start address of allocated virtual memory region - phys_addr: PhysAddr, - /// Total size of all stacks - total_size: usize, -} - -pub enum TaskStacks { - Boot(BootStack), - Common(CommonStack), +pub struct TaskStacks { + kernel_stack: StackAllocation, + user_stack: Option, } impl TaskStacks { - /// Size of the debug marker at the very top of each stack. - /// - /// We have a marker at the very top of the stack for debugging (`0xdeadbeef`), which should not be overridden. - pub const MARKER_SIZE: usize = 0x10; - pub fn new(size: usize) -> Self { let user_stack_size = if size < KERNEL_STACK_SIZE { KERNEL_STACK_SIZE } else { size.align_up(BasePageSize::SIZE as usize) }; - let total_size = user_stack_size + DEFAULT_STACK_SIZE + KERNEL_STACK_SIZE; - let layout = PageLayout::from_size(total_size + 4 * BasePageSize::SIZE as usize).unwrap(); - let page_range = PageAlloc::allocate(layout).unwrap(); - let virt_addr = VirtAddr::from(page_range.start()); - let frame_layout = PageLayout::from_size(total_size).unwrap(); - let frame_range = FrameAlloc::allocate(frame_layout) - .expect("Failed to allocate Physical Memory for TaskStacks"); - let phys_addr = PhysAddr::from(frame_range.start()); - - debug!( - "Create stacks at {:#X} with a size of {} KB", - virt_addr, - total_size >> 10 - ); - - let mut flags = PageTableEntryFlags::empty(); - flags.normal().writable().execute_disable(); - - // map IST0 into the address space - crate::arch::mm::paging::map::( - virt_addr + BasePageSize::SIZE, - //virt_addr, - phys_addr, - KERNEL_STACK_SIZE / BasePageSize::SIZE as usize, - flags, - ); - // map kernel stack into the address space - crate::arch::mm::paging::map::( - virt_addr + KERNEL_STACK_SIZE + 2 * BasePageSize::SIZE, - //virt_addr + KERNEL_STACK_SIZE, - phys_addr + KERNEL_STACK_SIZE, - DEFAULT_STACK_SIZE / BasePageSize::SIZE as usize, - flags, - ); + let kernel_stack = allocate_stack(DEFAULT_STACK_SIZE); + let user_stack = allocate_stack(user_stack_size); - // map user stack into the address space - crate::arch::mm::paging::map::( - virt_addr + KERNEL_STACK_SIZE + DEFAULT_STACK_SIZE + 3 * BasePageSize::SIZE, - //virt_addr + KERNEL_STACK_SIZE + DEFAULT_STACK_SIZE, - phys_addr + KERNEL_STACK_SIZE + DEFAULT_STACK_SIZE, - user_stack_size / BasePageSize::SIZE as usize, - flags, - ); - - // clear user stack - debug!("Clearing user stack..."); - unsafe { - (virt_addr + KERNEL_STACK_SIZE + DEFAULT_STACK_SIZE + 3 * BasePageSize::SIZE) - .as_mut_ptr::() - .write_bytes(0, user_stack_size); + TaskStacks { + kernel_stack, user_stack: Some(user_stack) } - - debug!("Creating stacks finished"); - - TaskStacks::Common(CommonStack { - virt_addr, - phys_addr, - total_size, - }) } pub fn from_boot_stacks() -> TaskStacks { - TaskStacks::Boot(BootStack { - stack: VirtAddr::zero(), - }) - } - - pub fn get_user_stack_size(&self) -> usize { - match self { - TaskStacks::Boot(_) => 0, - TaskStacks::Common(stacks) => { - stacks.total_size - DEFAULT_STACK_SIZE - KERNEL_STACK_SIZE - } + TaskStacks { + kernel_stack: unsafe { + StackAllocation::new_bootstack(KERNEL_STACK_SIZE) + }, + user_stack: None, } } - pub fn get_user_stack(&self) -> VirtAddr { - match self { - TaskStacks::Boot(_) => VirtAddr::zero(), - TaskStacks::Common(stacks) => { - stacks.virt_addr + KERNEL_STACK_SIZE + DEFAULT_STACK_SIZE + 3 * BasePageSize::SIZE - //stacks.virt_addr + KERNEL_STACK_SIZE + DEFAULT_STACK_SIZE - } - } - } - - pub fn get_kernel_stack(&self) -> VirtAddr { - match self { - TaskStacks::Boot(stacks) => stacks.stack, - TaskStacks::Common(stacks) => { - stacks.virt_addr + KERNEL_STACK_SIZE + 2 * BasePageSize::SIZE - //stacks.virt_addr + KERNEL_STACK_SIZE - } - } + #[inline(always)] + pub fn get_user_stack(&self) -> Option<&StackAllocation> { + self.user_stack.as_ref() } - pub fn get_kernel_stack_size(&self) -> usize { - match self { - TaskStacks::Boot(_) => KERNEL_STACK_SIZE, - TaskStacks::Common(_) => DEFAULT_STACK_SIZE, - } + #[inline(always)] + pub fn get_kernel_stack(&self) -> &StackAllocation { + &self.kernel_stack } } impl Clone for TaskStacks { fn clone(&self) -> TaskStacks { - match self { - TaskStacks::Boot(_) => TaskStacks::new(0), - TaskStacks::Common(stacks) => { - TaskStacks::new(stacks.total_size - DEFAULT_STACK_SIZE - KERNEL_STACK_SIZE) - } - } - } -} - -impl Drop for TaskStacks { - fn drop(&mut self) { - // we should never deallocate a boot stack - match self { - TaskStacks::Boot(_) => {} - TaskStacks::Common(stacks) => { - debug!( - "Deallocating stacks at {:#X} with a size of {} KB", - stacks.virt_addr, - stacks.total_size >> 10, - ); - - crate::arch::mm::paging::unmap::( - stacks.virt_addr, - stacks.total_size / BasePageSize::SIZE as usize + 4, - //stacks.total_size / BasePageSize::SIZE as usize, - ); - let range = PageRange::from_start_len( - stacks.virt_addr.as_usize(), - stacks.total_size + 4 * BasePageSize::SIZE as usize, - ) - .unwrap(); - unsafe { - PageAlloc::deallocate(range); - } - - let range = - PageRange::from_start_len(stacks.phys_addr.as_usize(), stacks.total_size) - .unwrap(); - unsafe { - FrameAlloc::deallocate(range); - } - } + if let Some(user_task) = self.user_stack.as_ref() { + TaskStacks::new(user_task.stack_size()) + } else { + TaskStacks::from_boot_stacks() } } } @@ -284,10 +149,7 @@ impl TaskFrame for Task { } unsafe { - // Set a marker for debugging at the very top. - let mut stack = - self.stacks.get_kernel_stack() + self.stacks.get_kernel_stack_size() - 0x10u64; - *stack.as_mut_ptr::() = 0xdead_beefu64; + let mut stack = self.stacks.get_kernel_stack().top_of_stack(); // Put the State structure expected by the ASM switch() function on the stack. stack -= size_of::(); @@ -303,8 +165,7 @@ impl TaskFrame for Task { // Set the task's stack pointer entry to the stack we have just crafted. self.last_stack_pointer = stack; - self.user_stack_pointer = - self.stacks.get_user_stack() + self.stacks.get_user_stack_size() - 0x10u64; + self.user_stack_pointer = self.stacks.get_user_stack().unwrap().top_of_stack(); (*state).sp = self.last_stack_pointer.as_usize(); (*state).a2 = self.user_stack_pointer.as_usize() - size_of::(); diff --git a/src/arch/riscv64/kernel/start.rs b/src/arch/riscv64/kernel/start.rs index 137578efc4..3d8cc65a11 100644 --- a/src/arch/riscv64/kernel/start.rs +++ b/src/arch/riscv64/kernel/start.rs @@ -6,6 +6,7 @@ use hermit_entry::boot_info::RawBootInfo; use super::{CPU_ONLINE, CURRENT_BOOT_ID, HART_MASK, NUM_CPUS}; use crate::arch::riscv64::kernel::CURRENT_STACK_ADDRESS; +use crate::arch::riscv64::mm::paging::SATP_VALUE; #[cfg(not(feature = "smp"))] use crate::arch::riscv64::kernel::processor; use crate::{KERNEL_STACK_SIZE, env}; @@ -30,6 +31,13 @@ pub unsafe extern "C" fn _start(hart_id: usize, boot_info: Option<&'static RawBo } naked_asm!( + // Load page table if set + // Required in order to be able to change the stack pointer just after + "ld t0, {satp_value}", + "beqz t0, 1f", + "csrrw t0, satp, t0", + + "1:", // Use stack pointer from `CURRENT_STACK_ADDRESS` if set "ld t0, {current_stack_pointer}", "beqz t0, 2f", @@ -39,6 +47,7 @@ pub unsafe extern "C" fn _start(hart_id: usize, boot_info: Option<&'static RawBo "2:", "j {pre_init}", + satp_value = sym SATP_VALUE, current_stack_pointer = sym CURRENT_STACK_ADDRESS, top_offset = const KERNEL_STACK_SIZE, pre_init = sym pre_init, diff --git a/src/arch/riscv64/mm/paging.rs b/src/arch/riscv64/mm/paging.rs index 167f1099da..7eac093a2e 100644 --- a/src/arch/riscv64/mm/paging.rs +++ b/src/arch/riscv64/mm/paging.rs @@ -1,6 +1,6 @@ use core::marker::PhantomData; use core::ptr; - +use core::sync::atomic::{AtomicUsize, Ordering}; use align_address::Align; use free_list::PageLayout; use hermit_sync::SpinMutex; @@ -11,6 +11,7 @@ use riscv::register::satp; use crate::mm::{FrameAlloc, PageRangeAllocator}; static ROOT_PAGETABLE: SpinMutex> = SpinMutex::new(PageTable::new()); +pub(crate) static SATP_VALUE: AtomicUsize = AtomicUsize::new(0); /// Number of Offset bits of a virtual address for a 4 KiB page, which are shifted away to get its Page Frame Number (PFN). const PAGE_BITS: usize = 12; @@ -135,6 +136,11 @@ impl PageTableEntry { (self.physical_address_and_flags & PageTableEntryFlags::EXECUTABLE.bits()) != 0 } + /// Mark this as an invalid (not present) entry + fn unset(&mut self) { + self.physical_address_and_flags = PhysAddr::zero(); + } + /// Mark this as a valid (present) entry and set address translation and flags. /// /// # Arguments @@ -377,10 +383,15 @@ impl PageTableMethods for PageTable { let index = page.table_index::(); let flush = self.entries[index].is_present(); - self.entries[index].set( - physical_address, - S::MAP_EXTRA_FLAG | PageTableEntryFlags::ACCESSED | PageTableEntryFlags::DIRTY | flags, - ); + if physical_address.is_null() && flags == PageTableEntryFlags::BLANK { + // Clear PTE + self.entries[index].unset(); + } else { + self.entries[index].set( + physical_address, + S::MAP_EXTRA_FLAG | PageTableEntryFlags::ACCESSED | PageTableEntryFlags::DIRTY | flags, + ); + } if flush { page.flush_from_tlb(); @@ -650,4 +661,9 @@ pub unsafe fn enable_page_table() { satp::set(mode, asid, ppn); asm::sfence_vma_all(); } + + SATP_VALUE.store( + ((mode as usize) << 60) | (asid << 44) | ppn, + Ordering::Relaxed + ); } diff --git a/src/arch/x86_64/kernel/apic.rs b/src/arch/x86_64/kernel/apic.rs index a5fefe4fbd..57c5d9de64 100644 --- a/src/arch/x86_64/kernel/apic.rs +++ b/src/arch/x86_64/kernel/apic.rs @@ -1,6 +1,4 @@ -use alloc::alloc::alloc; use alloc::vec::Vec; -use core::alloc::Layout; #[cfg(feature = "smp")] use core::arch::x86_64::_mm_mfence; #[cfg(feature = "acpi")] @@ -21,7 +19,7 @@ use x86_64::registers::control::Cr3; use x86_64::registers::model_specific::Msr; use super::interrupts::IDT; -use crate::arch::x86_64::kernel::CURRENT_STACK_ADDRESS; +use crate::arch::x86_64::kernel::{CURRENT_STACK, CURRENT_STACK_ADDRESS}; #[cfg(feature = "acpi")] use crate::arch::x86_64::kernel::acpi; use crate::arch::x86_64::mm::paging; @@ -33,6 +31,7 @@ use crate::config::*; use crate::mm::{PageAlloc, PageBox, PageRangeAllocator}; use crate::scheduler::CoreId; use crate::{arch, env, scheduler}; +use crate::mm::stack_alloc::allocate_stack; /// APIC Location and Status (R/W) See Table 35-2. See Section 10.4.4, Local APIC Status and Location. const IA32_APIC_BASE: Msr = Msr::new(0x1b); @@ -731,10 +730,9 @@ pub fn init_x2apic() { /// Initialize the required _start variables for the next CPU to be booted. pub fn init_next_processor_variables() { // Allocate stack for the CPU and pass the addresses. - let layout = Layout::from_size_align(KERNEL_STACK_SIZE, BasePageSize::SIZE as usize).unwrap(); - let stack = unsafe { alloc(layout) }; - assert!(!stack.is_null()); - CURRENT_STACK_ADDRESS.store(stack, Ordering::Relaxed); + let stack = allocate_stack(KERNEL_STACK_SIZE); + CURRENT_STACK_ADDRESS.store(stack.stack_start().as_mut_ptr(), Ordering::Relaxed); + let _ = CURRENT_STACK.lock().insert(stack); } /// Boot all Application Processors diff --git a/src/arch/x86_64/kernel/core_local.rs b/src/arch/x86_64/kernel/core_local.rs index 9b89a7659b..9320a39705 100644 --- a/src/arch/x86_64/kernel/core_local.rs +++ b/src/arch/x86_64/kernel/core_local.rs @@ -1,6 +1,6 @@ use alloc::boxed::Box; use core::arch::asm; -use core::cell::Cell; +use core::cell::{Cell, RefCell}; #[cfg(feature = "smp")] use core::sync::atomic::AtomicBool; use core::sync::atomic::Ordering; @@ -13,7 +13,8 @@ use hermit_sync::{RawRwSpinLock, RawSpinMutex}; use x86_64::VirtAddr; use x86_64::registers::model_specific::GsBase; use x86_64::structures::tss::TaskStateSegment; - +use crate::arch::interrupts::IST_ENTRIES; +use crate::mm::stack_alloc::StackAllocation; use super::CPU_ONLINE; use super::interrupts::{IRQ_COUNTERS, IrqStatistics}; #[cfg(feature = "smp")] @@ -29,7 +30,8 @@ pub(crate) struct CoreLocal { /// Task State Segment (TSS) allocated for this CPU Core. pub tss: Cell<*mut TaskStateSegment>, /// Start address of the kernel stack - pub kernel_stack: Cell<*mut u8>, + pub kernel_stack: RefCell>, + pub interrupt_stack_allocs: [RefCell>; IST_ENTRIES], /// Interface to the interrupt counters irq_statistics: &'static IrqStatistics, /// The core-local async executor. @@ -59,7 +61,8 @@ impl CoreLocal { core_id, scheduler: Cell::new(ptr::null_mut()), tss: Cell::new(ptr::null_mut()), - kernel_stack: Cell::new(ptr::null_mut()), + kernel_stack: RefCell::new(None), + interrupt_stack_allocs: [const { RefCell::new(None) }; IST_ENTRIES], irq_statistics, ex: StaticLocalExecutor::new(), #[cfg(feature = "smp")] @@ -110,6 +113,10 @@ pub(crate) fn core_scheduler() -> &'static mut PerCoreScheduler { unsafe { CoreLocal::get().scheduler.get().as_mut().unwrap() } } +pub(crate) fn is_kernel_task() -> bool { + unsafe { CoreLocal::get().scheduler.get().as_mut().is_none_or(|v| v.is_idle()) } +} + pub(crate) fn ex() -> &'static StaticLocalExecutor { &CoreLocal::get().ex } diff --git a/src/arch/x86_64/kernel/gdt.rs b/src/arch/x86_64/kernel/gdt.rs index c924a72aa0..cb4c045197 100644 --- a/src/arch/x86_64/kernel/gdt.rs +++ b/src/arch/x86_64/kernel/gdt.rs @@ -1,22 +1,17 @@ -use alloc::alloc::alloc; use alloc::boxed::Box; -use core::alloc::Layout; -use core::sync::atomic::Ordering; - -use x86_64::VirtAddr; use x86_64::instructions::tables; use x86_64::registers::segmentation::{CS, DS, ES, SS, Segment}; #[cfg(feature = "common-os")] use x86_64::structures::gdt::DescriptorFlags; use x86_64::structures::gdt::{Descriptor, GlobalDescriptorTable}; +use x86_64::structures::paging::PageSize; use x86_64::structures::tss::TaskStateSegment; -use super::CURRENT_STACK_ADDRESS; use super::interrupts::{IST_ENTRIES, IST_SIZE}; -use super::scheduler::TaskStacks; +use crate::arch::BasePageSize; +use crate::arch::kernel::CURRENT_STACK; use crate::arch::x86_64::kernel::core_local::{CoreLocal, core_scheduler}; -use crate::arch::x86_64::mm::paging::{BasePageSize, PageSize}; -use crate::config::KERNEL_STACK_SIZE; +use crate::mm::stack_alloc::allocate_stack; pub fn add_current_core() { let gdt: &mut GlobalDescriptorTable = Box::leak(Box::new(GlobalDescriptorTable::new())); @@ -35,10 +30,12 @@ pub fn add_current_core() { // Every task later gets its own stack, so this boot stack is only used by the Idle task on each core. // When switching to another task on this core, this entry is replaced. - let rsp = CURRENT_STACK_ADDRESS.load(Ordering::Relaxed); - let rsp = unsafe { rsp.add(KERNEL_STACK_SIZE - TaskStacks::MARKER_SIZE) }; - tss.privilege_stack_table[0] = VirtAddr::from_ptr(rsp); - CoreLocal::get().kernel_stack.set(rsp); + let rsp = CURRENT_STACK + .lock() + .take() + .expect("no pre-reserved stack for kernel"); + tss.privilege_stack_table[0] = rsp.stack_end().into(); + drop(CoreLocal::get().kernel_stack.replace(Some(rsp.leak()))); // Allocate all ISTs for this core. // Every task later gets its own IST, so the IST allocated here is only used by the Idle task. @@ -49,11 +46,9 @@ pub fn add_current_core() { BasePageSize::SIZE as usize }; - let layout = Layout::from_size_align(size, BasePageSize::SIZE as usize).unwrap(); - let ist = unsafe { alloc(layout) }; - assert!(!ist.is_null()); - let ist_start = unsafe { ist.add(size - TaskStacks::MARKER_SIZE) }; - tss.interrupt_stack_table[i] = VirtAddr::from_ptr(ist_start); + let stack = allocate_stack(size); + tss.interrupt_stack_table[i] = stack.stack_end().into(); + drop(CoreLocal::get().interrupt_stack_allocs[i].replace(Some(stack.leak()))); } CoreLocal::get().tss.set(tss); diff --git a/src/arch/x86_64/kernel/kernel_stack.rs b/src/arch/x86_64/kernel/kernel_stack.rs index eb5f80e428..8f469cbd92 100644 --- a/src/arch/x86_64/kernel/kernel_stack.rs +++ b/src/arch/x86_64/kernel/kernel_stack.rs @@ -58,7 +58,7 @@ macro_rules! kernel_function_impl { let $z = Reg::uninit(); )* - let kernel_stack = CoreLocal::get().kernel_stack.get().cast(); + let kernel_stack = CoreLocal::get().kernel_stack.borrow().as_ref().unwrap().stack_end().as_mut_ptr(); call_with_stack( $($arg,)* diff --git a/src/arch/x86_64/kernel/mod.rs b/src/arch/x86_64/kernel/mod.rs index b7adf3cda7..cf9405ea8c 100644 --- a/src/arch/x86_64/kernel/mod.rs +++ b/src/arch/x86_64/kernel/mod.rs @@ -1,16 +1,19 @@ #[cfg(feature = "common-os")] use core::arch::asm; -use core::ptr; -#[cfg(feature = "common-os")] -use core::slice; +use core::arch::naked_asm; +use core::{ptr, slice}; use core::sync::atomic::{AtomicPtr, AtomicU32, Ordering}; use hermit_entry::boot_info::{PlatformInfo, RawBootInfo}; +use hermit_sync::InterruptTicketMutex; use memory_addresses::PhysAddr; use x86_64::registers::control::{Cr0, Cr4}; use crate::arch::x86_64::kernel::core_local::*; +use crate::config::KERNEL_STACK_SIZE; use crate::env::{self, is_uhyve}; +use crate::mm::stack_alloc; +use crate::mm::stack_alloc::{allocate_stack, StackAllocation}; #[cfg(feature = "acpi")] pub mod acpi; @@ -81,6 +84,55 @@ pub fn args() -> Option<&'static str> { } } +/// Allocates a new stack, copies the current stack to it +fn allocate_copy_jump_stack() { + // Get top of stack + let top_of_stack = CURRENT_STACK_ADDRESS.load(Ordering::Relaxed).addr(); + let top_of_stack = top_of_stack + KERNEL_STACK_SIZE - stack_alloc::MARKER_SIZE; + + // Allocate and set stack + let stack = allocate_stack(KERNEL_STACK_SIZE).leak(); + let _ = CURRENT_STACK.lock().insert(stack.weak()); + CURRENT_STACK_ADDRESS.store(stack.top_of_stack().as_mut_ptr(), Ordering::Relaxed); + + unsafe { + clone_relocate_stack_raw(stack.top_of_stack().as_usize(), top_of_stack); + } +} + +#[unsafe(naked)] +unsafe extern "sysv64" fn clone_relocate_stack_raw(new_stack_top: usize, old_stack_top: usize) { + // Strategy: clone everything up to RSP, then set the new stack pointer (coming from RAX) and + // return + // This is dangerous + naked_asm!( + "mov rdx, rsp", + "call {clone_relocate_stack}", + // Set stack pointer to new returned stack pointer, and hope + "mov rsp, rax", + "ret", + + clone_relocate_stack = sym clone_relocate_stack, + ) +} + +unsafe fn clone_relocate_stack(new_stack_top: usize, old_stack_top: usize, old_stack_base: usize) -> usize { + assert!(old_stack_base < old_stack_top); + + let stack_len = old_stack_top - old_stack_base; + let old_stack = unsafe { + slice::from_raw_parts::(ptr::with_exposed_provenance(old_stack_base), stack_len) + }; + + let new_stack_base = new_stack_top - stack_len; + let new_stack = unsafe { + slice::from_raw_parts_mut::(ptr::with_exposed_provenance_mut(new_stack_base), stack_len) + }; + + new_stack.copy_from_slice(old_stack); + new_stack_base +} + /// Real Boot Processor initialization as soon as we have put the first Welcome message on the screen. #[cfg(target_os = "none")] pub fn boot_processor_init() { @@ -94,6 +146,9 @@ pub fn boot_processor_init() { crate::mm::init(); crate::mm::print_information(); + + allocate_copy_jump_stack(); + CoreLocal::get().add_irq_counter(); gdt::add_current_core(); interrupts::load_idt(); @@ -177,6 +232,7 @@ pub fn print_statistics() { /// It also synchronizes initialization of CPU cores. pub static CPU_ONLINE: AtomicU32 = AtomicU32::new(0); +pub static CURRENT_STACK: InterruptTicketMutex> = InterruptTicketMutex::new(None); pub static CURRENT_STACK_ADDRESS: AtomicPtr = AtomicPtr::new(ptr::null_mut()); #[cfg(target_os = "none")] @@ -296,8 +352,6 @@ pub unsafe fn jump_to_user_land(entry_point: usize, code_size: usize, arg: &[&st use align_address::Align; use x86_64::structures::paging::{PageSize, Size4KiB as BasePageSize}; - use crate::arch::x86_64::kernel::scheduler::TaskStacks; - info!("Create new file descriptor table"); core_scheduler().recreate_objmap().unwrap(); @@ -339,7 +393,7 @@ pub unsafe fn jump_to_user_land(entry_point: usize, code_size: usize, arg: &[&st "mov rdi, {6}", "mov rsi, {7}", "iretq", - const u64::MAX - (TaskStacks::MARKER_SIZE as u64 - 1), + const u64::MAX - (stack_alloc::MARKER_SIZE as u64 - 1), const 0x23usize, in(reg) stack_pointer, const 0x1202u64, diff --git a/src/arch/x86_64/kernel/scheduler.rs b/src/arch/x86_64/kernel/scheduler.rs index 1a31956dd0..e72bb9fe3e 100644 --- a/src/arch/x86_64/kernel/scheduler.rs +++ b/src/arch/x86_64/kernel/scheduler.rs @@ -1,22 +1,17 @@ //! Architecture dependent interface to initialize a task use core::arch::naked_asm; - -use align_address::Align; -use free_list::{PageLayout, PageRange}; -use memory_addresses::{PhysAddr, VirtAddr}; - +use core::mem::MaybeUninit; use super::interrupts::{IDT, IST_SIZE}; +use crate::arch::interrupts::IST_ENTRIES; use crate::arch::x86_64::kernel::core_local::*; use crate::arch::x86_64::kernel::{apic, interrupts}; -use crate::arch::x86_64::mm::paging::{ - BasePageSize, PageSize, PageTableEntryFlags, PageTableEntryFlagsExt, -}; +use crate::arch::x86_64::mm::paging::{BasePageSize, PageSize}; use crate::config::*; -use crate::env; -use crate::mm::{FrameAlloc, PageAlloc, PageRangeAllocator}; +use crate::mm::stack_alloc::{allocate_stack, StackAllocation}; use crate::scheduler::task::{Task, TaskFrame}; -use crate::scheduler::{PerCoreSchedulerExt, timer_interrupts}; +use crate::scheduler::{timer_interrupts, PerCoreSchedulerExt}; +use align_address::Align; #[repr(C, packed)] struct State { @@ -61,188 +56,71 @@ struct State { rip: extern "C" fn(extern "C" fn(usize), usize, u64) -> !, } -pub struct BootStack { - /// Stack for kernel tasks - stack: VirtAddr, - /// Stack to handle interrupts - ist1: VirtAddr, -} - -pub struct CommonStack { - /// Start address of allocated virtual memory region - virt_addr: VirtAddr, - /// Start address of allocated virtual memory region - phys_addr: PhysAddr, - /// Total size of all stacks - total_size: usize, -} - -pub enum TaskStacks { - Boot(BootStack), - Common(CommonStack), +pub struct TaskStacks { + ist_stacks: [StackAllocation; IST_ENTRIES], + kernel_stack: StackAllocation, + user_stack: Option, } impl TaskStacks { - /// Size of the debug marker at the very top of each stack. - /// - /// We have a marker at the very top of the stack for debugging (`0xdeadbeef`), which should not be overridden. - pub const MARKER_SIZE: usize = 0x10; - pub fn new(size: usize) -> TaskStacks { let user_stack_size = if size < KERNEL_STACK_SIZE { KERNEL_STACK_SIZE } else { size.align_up(BasePageSize::SIZE as usize) }; - let total_size = user_stack_size + DEFAULT_STACK_SIZE + IST_SIZE; - let layout = PageLayout::from_size(total_size + 4 * BasePageSize::SIZE as usize).unwrap(); - let page_range = PageAlloc::allocate(layout).unwrap(); - let virt_addr = VirtAddr::from(page_range.start()); - - let frame_layout = PageLayout::from_size(total_size).unwrap(); - let frame_range = FrameAlloc::allocate(frame_layout) - .expect("Failed to allocate Physical Memory for TaskStacks"); - let phys_addr = PhysAddr::from(frame_range.start()); - - debug!( - "Create stacks at {:p} with a size of {} KB", - virt_addr, - total_size >> 10 - ); - - let mut flags = PageTableEntryFlags::empty(); - flags.normal().writable().execute_disable(); // map IST1 into the address space - crate::arch::mm::paging::map::( - virt_addr + BasePageSize::SIZE, - phys_addr, - IST_SIZE / BasePageSize::SIZE as usize, - flags, - ); - - // map kernel stack into the address space - crate::arch::mm::paging::map::( - virt_addr + IST_SIZE + 2 * BasePageSize::SIZE, - phys_addr + IST_SIZE, - DEFAULT_STACK_SIZE / BasePageSize::SIZE as usize, - flags, - ); - - // map user stack into the address space - crate::arch::mm::paging::map::( - virt_addr + IST_SIZE + DEFAULT_STACK_SIZE + 3 * BasePageSize::SIZE, - phys_addr + IST_SIZE + DEFAULT_STACK_SIZE, - user_stack_size / BasePageSize::SIZE as usize, - flags, - ); - - // clear user stack - unsafe { - (virt_addr + IST_SIZE + DEFAULT_STACK_SIZE + 3 * BasePageSize::SIZE) - .as_mut_ptr::() - .write_bytes(0, user_stack_size); + let mut ist_stacks = [const { MaybeUninit::::uninit() }; IST_ENTRIES]; + #[allow(clippy::needless_range_loop)] + for i in 0..IST_ENTRIES { + let size = if i == 0 { + IST_SIZE + } else { + BasePageSize::SIZE as usize + }; + + let stack = allocate_stack(size); + ist_stacks[i] = MaybeUninit::new(stack); } + let ist_stacks: MaybeUninit<[StackAllocation; 4]> = ist_stacks.into(); - TaskStacks::Common(CommonStack { - virt_addr, - phys_addr, - total_size, - }) - } - - pub fn from_boot_stacks() -> TaskStacks { - let tss = unsafe { &*CoreLocal::get().tss.get() }; - let stack = VirtAddr::new( - tss.privilege_stack_table[0].as_u64() + Self::MARKER_SIZE as u64 - - KERNEL_STACK_SIZE as u64, - ); - debug!("Using boot stack {stack:p}"); - let ist1 = VirtAddr::new( - tss.interrupt_stack_table[0].as_u64() + Self::MARKER_SIZE as u64 - IST_SIZE as u64, - ); - debug!("IST1 is located at {ist1:p}"); - - TaskStacks::Boot(BootStack { stack, ist1 }) - } + let kernel_stack = allocate_stack(DEFAULT_STACK_SIZE); + let user_stack = allocate_stack(user_stack_size); - pub fn get_user_stack_size(&self) -> usize { - match self { - TaskStacks::Boot(_) => 0, - TaskStacks::Common(stacks) => stacks.total_size - DEFAULT_STACK_SIZE - IST_SIZE, + TaskStacks { + ist_stacks: unsafe { ist_stacks.assume_init() }, kernel_stack, user_stack: Some(user_stack) } } - pub fn get_user_stack(&self) -> VirtAddr { - match self { - TaskStacks::Boot(_) => VirtAddr::zero(), - TaskStacks::Common(stacks) => { - stacks.virt_addr + IST_SIZE + DEFAULT_STACK_SIZE + 3 * BasePageSize::SIZE - } - } - } + pub fn from_boot_stacks() -> TaskStacks { + let core_local = CoreLocal::get(); + let kernel = core_local.kernel_stack.borrow().as_ref().expect("no kernel stack").weak(); - pub fn get_kernel_stack(&self) -> VirtAddr { - match self { - TaskStacks::Boot(stacks) => stacks.stack, - TaskStacks::Common(stacks) => stacks.virt_addr + IST_SIZE + 2 * BasePageSize::SIZE, + let mut ist_stacks = [const { MaybeUninit::::uninit() }; IST_ENTRIES]; + for (i, stack) in core_local.interrupt_stack_allocs.iter().enumerate() { + ist_stacks[i] = MaybeUninit::new(stack.borrow().as_ref().expect("no ist stack").weak()); } - } + let ist_stacks: MaybeUninit<[StackAllocation; 4]> = ist_stacks.into(); - pub fn get_kernel_stack_size(&self) -> usize { - match self { - TaskStacks::Boot(_) => KERNEL_STACK_SIZE, - TaskStacks::Common(_) => DEFAULT_STACK_SIZE, + TaskStacks { + ist_stacks: unsafe { ist_stacks.assume_init() }, kernel_stack: kernel, user_stack: None } } - pub fn get_interrupt_stack(&self) -> VirtAddr { - match self { - TaskStacks::Boot(stacks) => stacks.ist1, - TaskStacks::Common(stacks) => stacks.virt_addr + BasePageSize::SIZE, - } + #[inline(always)] + pub fn get_user_stack(&self) -> Option<&StackAllocation> { + self.user_stack.as_ref() } - pub fn get_interrupt_stack_size(&self) -> usize { - IST_SIZE + #[inline(always)] + pub fn get_kernel_stack(&self) -> &StackAllocation { + &self.kernel_stack } -} -impl Drop for TaskStacks { - fn drop(&mut self) { - // we should never deallocate a boot stack - match self { - TaskStacks::Boot(_) => {} - TaskStacks::Common(stacks) => { - debug!( - "Deallocating stacks at {:p} with a size of {} KB", - stacks.virt_addr, - stacks.total_size >> 10, - ); - - if !env::is_uefi() { - crate::arch::mm::paging::unmap::( - stacks.virt_addr, - stacks.total_size / BasePageSize::SIZE as usize + 4, - ); - } - let range = PageRange::from_start_len( - stacks.virt_addr.as_usize(), - stacks.total_size + 4 * BasePageSize::SIZE as usize, - ) - .unwrap(); - unsafe { - PageAlloc::deallocate(range); - } - - let range = - PageRange::from_start_len(stacks.phys_addr.as_usize(), stacks.total_size) - .unwrap(); - unsafe { - FrameAlloc::deallocate(range); - } - } - } + #[inline(always)] + pub fn get_interrupt_stacks(&self) -> &[StackAllocation; IST_ENTRIES] { + &self.ist_stacks } } @@ -280,10 +158,7 @@ impl TaskFrame for Task { } unsafe { - // Set a marker for debugging at the very top. - let mut stack = self.stacks.get_kernel_stack() + self.stacks.get_kernel_stack_size() - - TaskStacks::MARKER_SIZE; - *stack.as_mut_ptr::() = 0xdead_beefu64; + let mut stack = self.stacks.get_kernel_stack().top_of_stack(); // Put the State structure expected by the ASM switch() function on the stack. stack -= size_of::(); @@ -302,9 +177,7 @@ impl TaskFrame for Task { // Set the task's stack pointer entry to the stack we have just crafted. self.last_stack_pointer = stack; - self.user_stack_pointer = self.stacks.get_user_stack() - + self.stacks.get_user_stack_size() - - TaskStacks::MARKER_SIZE; + self.user_stack_pointer = self.stacks.get_user_stack().unwrap().top_of_stack(); // rdx is required to initialize the stack (*state).rdx = self.user_stack_pointer.as_u64() - size_of::() as u64; diff --git a/src/arch/x86_64/kernel/start.rs b/src/arch/x86_64/kernel/start.rs index 269725e53f..0634e925ab 100644 --- a/src/arch/x86_64/kernel/start.rs +++ b/src/arch/x86_64/kernel/start.rs @@ -5,7 +5,7 @@ use hermit_entry::boot_info::RawBootInfo; use crate::KERNEL_STACK_SIZE; use crate::kernel::pre_init; -use crate::kernel::scheduler::TaskStacks; +use crate::mm::stack_alloc; #[unsafe(no_mangle)] #[unsafe(naked)] @@ -58,7 +58,7 @@ pub unsafe extern "C" fn _start(_boot_info: Option<&'static RawBootInfo>, cpu_id cpu_online = sym super::CPU_ONLINE, current_stack_address = sym super::CURRENT_STACK_ADDRESS, - stack_top_offset = const KERNEL_STACK_SIZE - TaskStacks::MARKER_SIZE, + stack_top_offset = const KERNEL_STACK_SIZE - stack_alloc::MARKER_SIZE, pre_init = sym pre_init, ) } diff --git a/src/arch/x86_64/mm/paging.rs b/src/arch/x86_64/mm/paging.rs index aa56d2f599..0720eac47a 100644 --- a/src/arch/x86_64/mm/paging.rs +++ b/src/arch/x86_64/mm/paging.rs @@ -16,7 +16,7 @@ use x86_64::structures::paging::{ use crate::arch::x86_64::kernel::processor; use crate::arch::x86_64::mm::{PhysAddr, VirtAddr}; -use crate::mm::{FrameAlloc, PageRangeAllocator}; +use crate::mm::{stack_alloc, FrameAlloc, PageRangeAllocator}; use crate::{env, scheduler}; unsafe impl FrameAllocator for FrameAlloc { @@ -103,6 +103,7 @@ impl PageTableEntryFlagsExt for PageTableEntryFlags { pub use x86_64::structures::paging::{ PageSize, Size1GiB as HugePageSize, Size2MiB as LargePageSize, Size4KiB as BasePageSize, }; +use crate::arch::core_local::is_kernel_task; /// Returns a mapping of the physical memory where physical address is equal to the virtual address (no offset) pub unsafe fn identity_mapped_page_table() -> OffsetPageTable<'static> { @@ -275,13 +276,30 @@ where } } -#[cfg(not(feature = "common-os"))] -pub(crate) extern "x86-interrupt" fn page_fault_handler( - stack_frame: ExceptionStackFrame, - error_code: PageFaultErrorCode, -) { - error!("Page fault (#PF)!"); - error!("page_fault_linear_address = {:p}", Cr2::read().unwrap()); +fn handle_page_fault(stack_frame: ExceptionStackFrame, error_code: PageFaultErrorCode) { + let address = Cr2::read().unwrap(); + let is_kernel_task = is_kernel_task(); + + if is_kernel_task { + panic_println!("page_fault_linear_address = {address:p}"); + panic_println!("error_code = {error_code:?}"); + panic_println!("fs = {:#X}", processor::readfs()); + panic_println!("gs = {:#X}", processor::readgs()); + panic_println!("stack_frame = {stack_frame:#?}"); + + if stack_alloc::is_in_stack_range(address) { + panic!("Probable Stack Overflow in kernel (#PF)!"); + } else { + panic!("Page fault (#PF) in kernel!"); + } + } + + if stack_alloc::is_in_stack_range(address) { + error!("Probable Stack Overflow (#PF)!"); + } else { + error!("Page fault (#PF)!"); + } + error!("page_fault_linear_address = {address:p}"); error!("error_code = {error_code:?}"); error!("fs = {:#X}", processor::readfs()); error!("gs = {:#X}", processor::readgs()); @@ -289,6 +307,14 @@ pub(crate) extern "x86-interrupt" fn page_fault_handler( scheduler::abort(); } +#[cfg(not(feature = "common-os"))] +pub(crate) extern "x86-interrupt" fn page_fault_handler( + stack_frame: ExceptionStackFrame, + error_code: PageFaultErrorCode, +) { + handle_page_fault(stack_frame, error_code); +} + #[cfg(feature = "common-os")] pub(crate) extern "x86-interrupt" fn page_fault_handler( mut stack_frame: ExceptionStackFrame, @@ -299,13 +325,8 @@ pub(crate) extern "x86-interrupt" fn page_fault_handler( core::arch::asm!("swapgs", options(nostack)); } } - error!("Page fault (#PF)!"); - error!("page_fault_linear_address = {:p}", Cr2::read().unwrap()); - error!("error_code = {error_code:?}"); - error!("fs = {:#X}", processor::readfs()); - error!("gs = {:#X}", processor::readgs()); - error!("stack_frame = {stack_frame:#?}"); - scheduler::abort(); + + handle_page_fault(stack_frame, error_code); } pub fn init() { diff --git a/src/mm/mod.rs b/src/mm/mod.rs index 9c5ace4ac6..6106156400 100644 --- a/src/mm/mod.rs +++ b/src/mm/mod.rs @@ -43,6 +43,7 @@ pub(crate) mod device_alloc; mod page_range_alloc; mod physicalmem; +pub mod stack_alloc; mod virtualmem; use core::alloc::Layout; diff --git a/src/mm/stack_alloc.rs b/src/mm/stack_alloc.rs new file mode 100644 index 0000000000..ffeb626fba --- /dev/null +++ b/src/mm/stack_alloc.rs @@ -0,0 +1,248 @@ +use core::fmt::{Display, Formatter}; +use core::ops::Add; + +use align_address::Align; +use free_list::{FreeList, PageLayout, PageRange}; +use hermit_sync::{InterruptTicketMutex, Lazy}; +use memory_addresses::{PhysAddr, VirtAddr}; + +use crate::arch::mm::paging; +#[cfg(target_arch = "x86_64")] +use crate::arch::mm::paging::PageTableEntryFlagsExt; +use crate::arch::mm::paging::{BasePageSize, HugePageSize, PageSize, PageTableEntryFlags}; +use crate::mm::{FrameAlloc, PageAlloc, PageRangeAllocator, virtualmem}; + +static MAX_STACK_SIZE: usize = HugePageSize::SIZE as usize; +/// End of the stack. Ideally, we'd take the heap end, but in x86_64 this causes ptr computation +/// problems +static STACK_REGION_END: usize = virtualmem::kernel_heap_end().as_usize() + 1 - MAX_STACK_SIZE; +static STACK_REGION_START: usize = STACK_REGION_END - MAX_STACK_SIZE; +static STACK_FREE_LIST: Lazy>> = Lazy::new(|| { + // Remove all mappings in the stack region range + let start = VirtAddr::new(STACK_REGION_START as u64); + let count = MAX_STACK_SIZE / HugePageSize::SIZE as usize; + let range = PageRange::new(STACK_REGION_START, STACK_REGION_END).unwrap(); + + // Take the pages from the allocator + PageAlloc::allocate_at(range).expect("failed to reserve stack area"); + + paging::unmap::(start, count); + + let mut free_list = FreeList::new(); + unsafe { + free_list + .deallocate(PageRange::new(STACK_REGION_START, STACK_REGION_END).unwrap()) + .expect("failed to deallocate stack range"); + } + + InterruptTicketMutex::new(free_list) +}); + +const _: () = { + assert!(MAX_STACK_SIZE.is_multiple_of(HugePageSize::SIZE as usize)); +}; + +/// Size of the debug marker at the very top of each stack. +/// +/// We have a marker at the very top of the stack for debugging (`0xdeadbeef`), which should not be overridden. +pub const MARKER_SIZE: usize = 0x10; +pub const MARKER: u64 = 0xdead_beef; +pub const GUARD_PAGE_MARKER: u64 = 0xdead_cafe; + +pub fn allocate_stack(requested_size: usize) -> StackAllocation { + // Determine basic number of pages + let num_pages = + requested_size.align_up(BasePageSize::SIZE as usize) / BasePageSize::SIZE as usize; + + let pages_with_guard = num_pages + 1; + let size = pages_with_guard * BasePageSize::SIZE as usize; + let layout = PageLayout::from_size_align(size, BasePageSize::SIZE as usize).unwrap(); + + // Allocate virtual memory for this stack + let page_range = STACK_FREE_LIST + .lock() + .allocate(layout) + .expect("failed to allocate virtual memory space for stack"); + let stack_start = page_range.start(); + + // Allocate physical pages for this stack + let frame_range = FrameAlloc::allocate(layout).expect("failed to allocate frames for stack"); + let phys_addr_start = PhysAddr::new(frame_range.start() as u64); + let virt_addr_start = VirtAddr::new(stack_start as u64); + + // Map first page to a disabled page full of a marker, then unmap it + let mut flags = PageTableEntryFlags::empty(); + flags.normal().writable().execute_disable(); + paging::map::(virt_addr_start, phys_addr_start, 1, flags); + unsafe { + let marker_pos = virt_addr_start.add(BasePageSize::SIZE as usize - size_of::()); + *marker_pos.as_mut_ptr::() = GUARD_PAGE_MARKER; + } + paging::unmap::(virt_addr_start, 1); + + // Map rest correctly + let virt_addr_stack_start = virt_addr_start.add(BasePageSize::SIZE); + let phys_addr_stack_start = phys_addr_start.add(BasePageSize::SIZE); + + let mut flags = PageTableEntryFlags::empty(); + flags.normal().writable().execute_disable(); + paging::map::( + virt_addr_stack_start, + phys_addr_stack_start, + num_pages, + flags, + ); + + // Clear the stack + unsafe { + virt_addr_stack_start + .as_mut_ptr::() + .write_bytes(0, num_pages * BasePageSize::SIZE as usize); + }; + + // Insert marker on top + let marker_addr = virt_addr_start + size - MARKER_SIZE; + unsafe { + marker_addr.as_mut_ptr::().write(MARKER); + } + + StackAllocation { + virt_addr: virt_addr_start, + phys_addr: phys_addr_start, + stack_size: size, + weak: false, + } +} + +pub struct StackAllocation { + /// Start address of allocated virtual memory region + virt_addr: VirtAddr, + /// Start address of allocated virtual memory region + phys_addr: PhysAddr, + /// Number of pages of this stack, including guard page + stack_size: usize, + + /// If true, this is a weak reference to a stack that should not be freed + weak: bool, +} + +impl Drop for StackAllocation { + fn drop(&mut self) { + if self.weak || self.phys_addr.is_null() { + return; + } + + assert!(self.stack_size.is_multiple_of(BasePageSize::SIZE as usize)); + + paging::unmap::( + self.virt_addr, + self.stack_size / BasePageSize::SIZE as usize, + ); + + // Release memory + let virt_range = + PageRange::from_start_len(self.virt_addr.as_usize(), self.stack_size).unwrap(); + unsafe { + STACK_FREE_LIST + .lock() + .deallocate(virt_range) + .expect("failed to free stack memory"); + } + + let phys_range = + PageRange::from_start_len(self.phys_addr.as_usize(), self.stack_size).unwrap(); + unsafe { + FrameAlloc::deallocate(phys_range); + } + } +} + +impl StackAllocation { + /// Returns the available stack size in this stack (end to start, without marker or guard) + pub fn stack_size(&self) -> usize { + self.stack_size - BasePageSize::SIZE as usize - MARKER_SIZE + } + + /// Returns the last address in the stack + pub fn stack_end(&self) -> VirtAddr { + self.virt_addr.add(self.stack_size) + } + + /// Returns the top of the stack, that is the last address with the marker excluded + pub fn top_of_stack(&self) -> VirtAddr { + self.stack_end() - MARKER_SIZE + } + + /// Returns the first address usable in the stack + pub fn stack_start(&self) -> VirtAddr { + self.virt_addr.add(BasePageSize::SIZE) + } + + /// Returns the stack guard page (excluded from [Self::stack_end] / [Self::stack_start] / [Self::stack_size]) + pub fn stack_guard_start(&self) -> VirtAddr { + self.virt_addr + } + + /// Returns a clone of this stack allocation that will not cause memory to be leaked when + /// dropped + pub fn weak(&self) -> StackAllocation { + StackAllocation { + weak: true, + stack_size: self.stack_size, + phys_addr: self.phys_addr, + virt_addr: self.virt_addr, + } + } + + pub fn leak(mut self) -> Self { + self.weak = true; + self + } + + /// Create a stack alloc for an externally created stack. + /// No stack protection will be available for that stack. + /// + /// # Safety + /// + /// The address must be mapped and correspond to a stack of the provided size + #[cfg(target_arch = "aarch64")] + pub unsafe fn new_external(top_of_stack: VirtAddr, size: usize) -> Self { + let phys = PhysAddr::zero(); + + Self { + virt_addr: top_of_stack - size, + phys_addr: phys, + stack_size: size, + weak: true, + } + } + + #[cfg(target_arch = "riscv64")] + pub unsafe fn new_bootstack(size: usize) -> Self { + Self { + virt_addr: VirtAddr::zero(), + phys_addr: PhysAddr::zero(), + stack_size: size, + weak: true, + } + } +} + +impl Display for StackAllocation { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + write!( + f, + "Stack Allocation: usable range {:x}..{:x}, guard page: {:x?}", + self.stack_start().as_usize(), + self.top_of_stack().as_usize(), + self.stack_guard_start() + ) + } +} + +#[inline(always)] +#[cfg(target_arch = "x86_64")] +pub fn is_in_stack_range(address: x86_64::addr::VirtAddr) -> bool { + let address_usize = address.as_u64() as usize; + address_usize < STACK_REGION_END && address_usize >= STACK_REGION_START +} diff --git a/src/mm/virtualmem.rs b/src/mm/virtualmem.rs index ba313bb16d..8edff28c08 100644 --- a/src/mm/virtualmem.rs +++ b/src/mm/virtualmem.rs @@ -64,7 +64,7 @@ unsafe fn init() { /// End of the virtual memory address space reserved for kernel memory (inclusive). /// The virtual memory address space reserved for the task heap starts after this. #[inline] -pub fn kernel_heap_end() -> VirtAddr { +pub const fn kernel_heap_end() -> VirtAddr { cfg_select! { target_arch = "aarch64" => { // maximum address, which can be supported by TTBR0 @@ -75,17 +75,13 @@ pub fn kernel_heap_end() -> VirtAddr { VirtAddr::new(0x0040_0000_0000 - 1) } target_arch = "x86_64" => { - use x86_64::structures::paging::PageTableIndex; - let p4_index = if cfg!(feature = "common-os") { - PageTableIndex::new(1) + 1u64 } else { - PageTableIndex::new(256) + 256u64 }; - let addr = u64::from(p4_index) << 39; - assert_eq!(VirtAddr::new_truncate(addr).p4_index(), p4_index); - + let addr = p4_index << 39; VirtAddr::new_truncate(addr - 1) } } diff --git a/src/scheduler/mod.rs b/src/scheduler/mod.rs index 8731482ace..9915d51b31 100644 --- a/src/scheduler/mod.rs +++ b/src/scheduler/mod.rs @@ -327,7 +327,13 @@ impl PerCoreScheduler { arg, prio: current_task_borrowed.prio, core_id, - stacks: TaskStacks::new(current_task_borrowed.stacks.get_user_stack_size()), + stacks: TaskStacks::new( + current_task_borrowed + .stacks + .get_user_stack() + .map(|s| s.stack_size()) + .unwrap_or_default(), + ), object_map: current_task_borrowed.object_map.clone(), }; @@ -441,6 +447,12 @@ impl PerCoreScheduler { }) } + /// Returns true if the current task is the idle task + #[allow(unused)] + pub fn is_idle(&self) -> bool { + without_interrupts(|| self.current_task.borrow().id == self.idle_task.borrow().id) + } + #[inline] pub fn get_current_task_id(&self) -> TaskId { without_interrupts(|| self.current_task.borrow().id) @@ -594,15 +606,21 @@ impl PerCoreScheduler { let current_task_borrowed = self.current_task.borrow(); let tss = unsafe { &mut *CoreLocal::get().tss.get() }; - let rsp = current_task_borrowed.stacks.get_kernel_stack() - + current_task_borrowed.stacks.get_kernel_stack_size() as u64 - - TaskStacks::MARKER_SIZE as u64; + let rsp = current_task_borrowed + .stacks + .get_kernel_stack() + .top_of_stack(); tss.privilege_stack_table[0] = rsp.into(); - CoreLocal::get().kernel_stack.set(rsp.as_mut_ptr()); - let ist_start = current_task_borrowed.stacks.get_interrupt_stack() - + current_task_borrowed.stacks.get_interrupt_stack_size() as u64 - - TaskStacks::MARKER_SIZE as u64; - tss.interrupt_stack_table[0] = ist_start.into(); + + let _ = CoreLocal::get() + .kernel_stack + .borrow_mut() + .insert(current_task_borrowed.stacks.get_kernel_stack().weak()); + + let interrupt_stacks = current_task_borrowed.stacks.get_interrupt_stacks(); + for (i, stack) in interrupt_stacks.iter().enumerate() { + tss.interrupt_stack_table[i] = stack.top_of_stack().into(); + } } pub fn set_current_task_priority(&mut self, prio: Priority) { @@ -640,9 +658,10 @@ impl PerCoreScheduler { pub fn set_current_kernel_stack(&self) { let current_task_borrowed = self.current_task.borrow(); - let stack = (current_task_borrowed.stacks.get_kernel_stack() - + current_task_borrowed.stacks.get_kernel_stack_size() as u64 - - TaskStacks::MARKER_SIZE as u64) + let stack = current_task_borrowed + .stacks + .get_kernel_stack() + .stack_start() .as_u64(); CoreLocal::get().kernel_stack.set(stack); }