diff --git a/etc/syscalls_linux_aarch64.md b/etc/syscalls_linux_aarch64.md index 82371995..b5456251 100644 --- a/etc/syscalls_linux_aarch64.md +++ b/etc/syscalls_linux_aarch64.md @@ -21,9 +21,9 @@ | 0x10 (16) | fremovexattr | (int fd, const char *name) | __arm64_sys_fremovexattr | true | | 0x11 (17) | getcwd | (char *buf, unsigned long size) | __arm64_sys_getcwd | true | | 0x13 (19) | eventfd2 | (unsigned int count, int flags) | __arm64_sys_eventfd2 | false | -| 0x14 (20) | epoll_create1 | (int flags) | __arm64_sys_epoll_create1 | false | -| 0x15 (21) | epoll_ctl | (int epfd, int op, int fd, struct epoll_event *event) | __arm64_sys_epoll_ctl | false | -| 0x16 (22) | epoll_pwait | (int epfd, struct epoll_event *events, int maxevents, int timeout, const sigset_t *sigmask, size_t sigsetsize) | __arm64_sys_epoll_pwait | false | +| 0x14 (20) | epoll_create1 | (int flags) | __arm64_sys_epoll_create1 | true | +| 0x15 (21) | epoll_ctl | (int epfd, int op, int fd, struct epoll_event *event) | __arm64_sys_epoll_ctl | true | +| 0x16 (22) | epoll_pwait | (int epfd, struct epoll_event *events, int maxevents, int timeout, const sigset_t *sigmask, size_t sigsetsize) | __arm64_sys_epoll_pwait | partially | | 0x17 (23) | dup | (unsigned int fildes) | __arm64_sys_dup | true | | 0x18 (24) | dup3 | (unsigned int oldfd, unsigned int newfd, int flags) | __arm64_sys_dup3 | true | | 0x19 (25) | fcntl | (unsigned int fd, unsigned int cmd, unsigned long arg) | __arm64_sys_fcntl | true | @@ -41,7 +41,7 @@ | 0x25 (37) | linkat | (int olddfd, const char *oldname, int newdfd, const char *newname, int flags) | __arm64_sys_linkat | true | | 0x26 (38) | renameat | (int olddfd, const char *oldname, int newdfd, const char *newname) | __arm64_sys_renameat | true | | 0x27 (39) | umount | (char *name, int flags) | __arm64_sys_umount | false | -| 0x28 (40) | mount | (char *dev_name, char *dir_name, char *type, unsigned long flags, void *data) | __arm64_sys_mount | false | +| 0x28 (40) | mount | (char *dev_name, char *dir_name, char *type, unsigned long flags, void *data) | __arm64_sys_mount | partial | | 0x29 (41) | pivot_root | (const char *new_root, const char *put_old) | __arm64_sys_pivot_root | false | | 0x2b (43) | statfs | (const char *pathname, struct statfs *buf) | __arm64_sys_statfs | partial | | 0x2c (44) | fstatfs | (unsigned int fd, struct statfs *buf) | __arm64_sys_fstatfs | partial | @@ -289,7 +289,7 @@ | 0x1af (431) | fsconfig | (int fd, unsigned int cmd, const char *_key, const void *_value, int aux) | __arm64_sys_fsconfig | false | | 0x1b0 (432) | fsmount | (int fs_fd, unsigned int flags, unsigned int attr_flags) | __arm64_sys_fsmount | false | | 0x1b1 (433) | fspick | (int dfd, const char *path, unsigned int flags) | __arm64_sys_fspick | false | -| 0x1b2 (434) | pidfd_open | (pid_t pid, unsigned int flags) | __arm64_sys_pidfd_open | false | +| 0x1b2 (434) | pidfd_open | (pid_t pid, unsigned int flags) | __arm64_sys_pidfd_open | true | | 0x1b3 (435) | clone3 | (struct clone_args *uargs, size_t size) | __arm64_sys_clone3 | false | | 0x1b4 (436) | close_range | (unsigned int fd, unsigned int max_fd, unsigned int flags) | __arm64_sys_close_range | partially | | 0x1b5 (437) | openat2 | (int dfd, const char *filename, struct open_how *how, size_t usize) | __arm64_sys_openat2 | false | diff --git a/libkernel/src/sync/mutex.rs b/libkernel/src/sync/mutex.rs index 23a47e96..532c4615 100644 --- a/libkernel/src/sync/mutex.rs +++ b/libkernel/src/sync/mutex.rs @@ -132,6 +132,12 @@ impl DerefMut for AsyncMutexGuard<'_, T, CPU> { unsafe impl Send for Mutex {} unsafe impl Sync for Mutex {} +impl Default for Mutex { + fn default() -> Self { + Self::new(T::default()) + } +} + impl Mutex<(), CPU> { /// Acquires the mutex lock without caring about the data. pub(crate) fn acquire(&self) -> MutexAcquireFuture<'_, CPU> { diff --git a/scripts/qemu-runner.sh b/scripts/qemu-runner.sh index eae48c52..add84b51 100755 --- a/scripts/qemu-runner.sh +++ b/scripts/qemu-runner.sh @@ -23,4 +23,4 @@ bin="${elf%.elf}.bin" # Convert to binary format aarch64-none-elf-objcopy -O binary "$elf" "$bin" -qemu-system-aarch64 -M virt,gic-version=3 -initrd moss.img -cpu cortex-a72 -m 2G -smp 4 -nographic -s -kernel "$bin" -append "$append_args --rootfs=ext4fs --automount=/dev,devfs --automount=/tmp,tmpfs --automount=/proc,procfs --automount=/sys,sysfs" +qemu-system-aarch64 -M virt,gic-version=3 -initrd arch.img -cpu cortex-a72 -m 2G -smp 4 -nographic -s -kernel "$bin" -append "$append_args --rootfs=ext4fs --automount=/dev,devfs --automount=/tmp,tmpfs --automount=/proc,procfs --automount=/sys,sysfs" diff --git a/src/arch/arm64/exceptions/syscall.rs b/src/arch/arm64/exceptions/syscall.rs index 2a7fd08a..252797a1 100644 --- a/src/arch/arm64/exceptions/syscall.rs +++ b/src/arch/arm64/exceptions/syscall.rs @@ -34,6 +34,7 @@ use crate::{ ioctl::sys_ioctl, iov::{sys_preadv, sys_preadv2, sys_pwritev, sys_pwritev2, sys_readv, sys_writev}, listxattr::{sys_flistxattr, sys_listxattr, sys_llistxattr}, + mount::sys_mount, removexattr::{sys_fremovexattr, sys_lremovexattr, sys_removexattr}, rw::{sys_pread64, sys_pwrite64, sys_read, sys_write}, seek::sys_lseek, @@ -56,7 +57,7 @@ use crate::{ process_vm::sys_process_vm_readv, }, process::{ - TaskState, + TaskState, Tid, caps::{sys_capget, sys_capset}, clone::sys_clone, creds::{ @@ -67,11 +68,14 @@ use crate::{ exit::{sys_exit, sys_exit_group}, fd_table::{ dup::{sys_dup, sys_dup3}, + epoll::{sys_epoll_create1, sys_epoll_ctl, sys_epoll_pwait}, fcntl::sys_fcntl, select::{sys_ppoll, sys_pselect6}, }, + pidfd::sys_pidfd_open, prctl::sys_prctl, ptrace::{TracePoint, ptrace_stop, sys_ptrace}, + signalfd::sys_signalfd4, sleep::{sys_clock_nanosleep, sys_nanosleep}, thread_group::{ Pgid, @@ -197,10 +201,33 @@ pub async fn handle_syscall() { 0xf => sys_lremovexattr(TUA::from_value(arg1 as _), TUA::from_value(arg2 as _)).await, 0x10 => sys_fremovexattr(arg1.into(), TUA::from_value(arg2 as _)).await, 0x11 => sys_getcwd(TUA::from_value(arg1 as _), arg2 as _).await, + 0x14 => sys_epoll_create1(arg1 as _), + 0x15 => { + sys_epoll_ctl( + arg1.into(), + arg2 as _, + arg3.into(), + TUA::from_value(arg4 as _), + ) + .await + } + 0x16 => { + sys_epoll_pwait( + arg1.into(), + TUA::from_value(arg2 as _), + arg3 as _, + arg4 as _, + TUA::from_value(arg5 as _), + arg6 as _, + ) + .await + } 0x17 => sys_dup(arg1.into()), 0x18 => sys_dup3(arg1.into(), arg2.into(), arg3 as _), 0x19 => sys_fcntl(arg1.into(), arg2 as _, arg3 as _).await, 0x1d => sys_ioctl(arg1.into(), arg2 as _, arg3 as _).await, + 0x20 => Ok(0), + 0x21 => Err(KernelError::NotSupported), 0x22 => sys_mkdirat(arg1.into(), TUA::from_value(arg2 as _), arg3 as _).await, 0x23 => sys_unlinkat(arg1.into(), TUA::from_value(arg2 as _), arg3 as _).await, 0x24 => { @@ -230,6 +257,16 @@ pub async fn handle_syscall() { ) .await } + 0x28 => { + sys_mount( + TUA::from_value(arg1 as _), + TUA::from_value(arg2 as _), + TUA::from_value(arg3 as _), + arg4 as _, + TUA::from_value(arg5 as _), + ) + .await + } 0x2b => sys_statfs(TUA::from_value(arg1 as _), TUA::from_value(arg2 as _)).await, 0x2c => sys_fstatfs(arg1.into(), TUA::from_value(arg2 as _)).await, 0x2d => sys_truncate(TUA::from_value(arg1 as _), arg2 as _).await, @@ -342,6 +379,7 @@ pub async fn handle_syscall() { ) .await } + 0x4a => sys_signalfd4(arg1.into(), TUA::from_value(arg2 as _), arg3 as _).await, 0x4e => { sys_readlinkat( arg1.into(), @@ -623,6 +661,8 @@ pub async fn handle_syscall() { .await } 0x125 => Err(KernelError::NotSupported), + 0x1ae => Err(KernelError::NotSupported), + 0x1b2 => sys_pidfd_open(Tid(arg1 as _), arg2 as _).await, 0x1b4 => sys_close_range(arg1.into(), arg2.into(), arg3 as _).await, 0x1b7 => { sys_faccessat2( diff --git a/src/console/tty.rs b/src/console/tty.rs index a9359d77..92d21d77 100644 --- a/src/console/tty.rs +++ b/src/console/tty.rs @@ -12,6 +12,7 @@ use crate::{ use alloc::{boxed::Box, sync::Arc}; use async_trait::async_trait; use cooker::TtyInputCooker; +use core::any::Any; use core::{cmp::min, pin::Pin}; use futures::{ future::{Either, select}, @@ -86,6 +87,14 @@ impl Tty { #[async_trait] impl FileOps for Tty { + fn as_any(&self) -> &dyn Any { + self + } + + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } + async fn read(&mut self, _ctx: &mut FileCtx, usr_buf: UA, count: usize) -> Result { self.readat(usr_buf, count, 0).await } diff --git a/src/drivers/fs/dev.rs b/src/drivers/fs/dev.rs index 3f8ed7f5..4d4c359c 100644 --- a/src/drivers/fs/dev.rs +++ b/src/drivers/fs/dev.rs @@ -24,6 +24,17 @@ pub struct DevFs { impl DevFs { pub fn new() -> Arc { + let shm = DevFsINode { + id: InodeId::from_fsid_and_inodeid(DEVFS_ID, 1), + attr: SpinLock::new(FileAttr { + file_type: FileType::Directory, + mode: FilePermissions::from_bits_retain(0o755), + ..FileAttr::default() + }), + kind: InodeKind::Directory(SpinLock::new(BTreeMap::new())), + }; + let mut root_children = BTreeMap::new(); + root_children.insert("shm".to_string(), Arc::new(shm)); let root_inode = Arc::new(DevFsINode { id: InodeId::from_fsid_and_inodeid(DEVFS_ID, 0), attr: SpinLock::new(FileAttr { @@ -31,12 +42,12 @@ impl DevFs { mode: FilePermissions::from_bits_retain(0o755), ..FileAttr::default() }), - kind: InodeKind::Directory(SpinLock::new(BTreeMap::new())), + kind: InodeKind::Directory(SpinLock::new(root_children)), }); Arc::new(Self { root: root_inode, - next_inode_id: AtomicU64::new(1), + next_inode_id: AtomicU64::new(2), }) } diff --git a/src/drivers/null.rs b/src/drivers/null.rs index da6785bc..bd2c572e 100644 --- a/src/drivers/null.rs +++ b/src/drivers/null.rs @@ -9,6 +9,7 @@ use crate::{ use alloc::string::ToString; use alloc::{boxed::Box, sync::Arc}; use async_trait::async_trait; +use core::any::Any; use core::{future::Future, pin::Pin}; use libkernel::{ driver::CharDevDescriptor, @@ -22,6 +23,14 @@ struct NullFileOps; #[async_trait] impl FileOps for NullFileOps { + fn as_any(&self) -> &dyn Any { + self + } + + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } + async fn readat(&mut self, _buf: UA, _count: usize, _offset: u64) -> Result { // EOF Ok(0) diff --git a/src/drivers/zero.rs b/src/drivers/zero.rs index db4bc667..938c67ca 100644 --- a/src/drivers/zero.rs +++ b/src/drivers/zero.rs @@ -9,6 +9,7 @@ use crate::{ }; use alloc::{boxed::Box, string::ToString, sync::Arc}; use async_trait::async_trait; +use core::any::Any; use core::{cmp::min, future::Future, pin::Pin}; use libkernel::{ driver::CharDevDescriptor, @@ -25,6 +26,14 @@ struct ZeroFileOps; #[async_trait] impl FileOps for ZeroFileOps { + fn as_any(&self) -> &dyn Any { + self + } + + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } + async fn read(&mut self, _ctx: &mut FileCtx, buf: UA, count: usize) -> Result { self.readat(buf, count, 0).await } diff --git a/src/fs/dir.rs b/src/fs/dir.rs index c6b16d28..387710e4 100644 --- a/src/fs/dir.rs +++ b/src/fs/dir.rs @@ -2,6 +2,7 @@ use alloc::boxed::Box; use alloc::ffi::CString; use async_trait::async_trait; use core::alloc::Layout; +use core::any::Any; use libkernel::{ error::{FsError, KernelError, Result}, fs::{DirStream, Dirent, FileType, Inode}, @@ -60,6 +61,14 @@ impl DirFile { #[async_trait] impl FileOps for DirFile { + fn as_any(&self) -> &dyn Any { + self + } + + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } + async fn read(&mut self, _ctx: &mut FileCtx, _buf: UA, _count: usize) -> Result { Err(FsError::IsADirectory.into()) } diff --git a/src/fs/fops.rs b/src/fs/fops.rs index 6388cd0b..24726a53 100644 --- a/src/fs/fops.rs +++ b/src/fs/fops.rs @@ -1,3 +1,4 @@ +use core::any::Any; use core::pin::Pin; use alloc::boxed::Box; @@ -39,7 +40,10 @@ macro_rules! process_iovec { } #[async_trait] -pub trait FileOps: Send + Sync { +pub trait FileOps: Send + Sync + Any { + fn as_any(&self) -> &dyn Any; + fn as_any_mut(&mut self) -> &mut dyn Any; + /// Reads data from the current file position into `buf`. /// The file's cursor is advanced by the number of bytes read. async fn read(&mut self, ctx: &mut FileCtx, buf: UA, count: usize) -> Result { @@ -137,4 +141,8 @@ pub trait FileOps: Send + Sync { ) -> Result { Err(KernelError::InvalidValue) } + + fn to_epoll(&self) -> Option<&crate::process::fd_table::epoll::EpollFileOps> { + None + } } diff --git a/src/fs/mod.rs b/src/fs/mod.rs index cd5438fd..867f5ada 100644 --- a/src/fs/mod.rs +++ b/src/fs/mod.rs @@ -79,6 +79,13 @@ impl VfsState { self.mounts.insert(mount_point_id, mount); } + /// Removes a mount point by its inode ID. + fn remove_mount(&mut self, mount_point_id: &InodeId) -> Option<()> { + let mount = self.mounts.remove(mount_point_id)?; + self.filesystems.remove(&mount.fs.id())?; + Some(()) + } + /// Checks if an inode is a mount point and returns the root inode of the /// mounted filesystem if it is. fn get_mount_root(&self, inode_id: &InodeId) -> Option> { @@ -177,6 +184,19 @@ impl VFS { Ok(()) } + #[expect(unused)] + pub async fn unmount(&self, mount_point: Arc) -> Result<()> { + let mount_point_id = mount_point.id(); + + // Lock the state and remove the mount. + self.state + .lock_save_irq() + .remove_mount(&mount_point_id) + .ok_or(FsError::NotFound)?; + + Ok(()) + } + pub async fn get_fs(&self, inode: Arc) -> Result> { self.state .lock_save_irq() diff --git a/src/fs/pipe.rs b/src/fs/pipe.rs index 14cab70b..d64f56ec 100644 --- a/src/fs/pipe.rs +++ b/src/fs/pipe.rs @@ -11,6 +11,7 @@ use crate::{ }; use alloc::{boxed::Box, sync::Arc}; use async_trait::async_trait; +use core::any::Any; use core::{ future, pin::pin, @@ -115,6 +116,14 @@ impl PipeReader { #[async_trait] impl FileOps for PipeReader { + fn as_any(&self) -> &dyn Any { + self + } + + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } + async fn read(&mut self, _ctx: &mut FileCtx, u_buf: UA, count: usize) -> Result { self.readat(u_buf, count, 0).await } @@ -194,6 +203,14 @@ impl PipeWriter { #[async_trait] impl FileOps for PipeWriter { + fn as_any(&self) -> &dyn Any { + self + } + + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } + async fn read(&mut self, _ctx: &mut FileCtx, _buf: UA, _count: usize) -> Result { Err(KernelError::BadFd) } diff --git a/src/fs/reg.rs b/src/fs/reg.rs index fba4c74c..469757af 100644 --- a/src/fs/reg.rs +++ b/src/fs/reg.rs @@ -8,6 +8,7 @@ use crate::{ }; use alloc::{boxed::Box, sync::Arc}; use async_trait::async_trait; +use core::any::Any; use core::{cmp::min, pin::Pin}; use libkernel::{ error::Result, @@ -29,6 +30,14 @@ impl RegFile { #[async_trait] impl FileOps for RegFile { + fn as_any(&self) -> &dyn Any { + self + } + + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } + /// Reads data from the current file position into `buf`. The file's cursor /// is advanced by the number of bytes read. async fn readat( diff --git a/src/fs/syscalls/mod.rs b/src/fs/syscalls/mod.rs index 1cd8a2d5..1cc4c3c9 100644 --- a/src/fs/syscalls/mod.rs +++ b/src/fs/syscalls/mod.rs @@ -8,6 +8,7 @@ pub mod getxattr; pub mod ioctl; pub mod iov; pub mod listxattr; +pub mod mount; pub mod open; pub mod removexattr; pub mod rw; diff --git a/src/fs/syscalls/mount.rs b/src/fs/syscalls/mount.rs new file mode 100644 index 00000000..b89ebabe --- /dev/null +++ b/src/fs/syscalls/mount.rs @@ -0,0 +1,93 @@ +use crate::fs::VFS; +use crate::memory::uaccess::cstr::UserCStr; +use crate::sched::current::current_task_shared; +use bitflags::bitflags; +use core::ffi::c_char; +use libkernel::error::{KernelError, Result}; +use libkernel::fs::path::Path; +use libkernel::memory::address::{TUA, UA}; + +bitflags! { + #[derive(Debug)] + pub struct MountFlags: u64 { + const MS_RDONLY = 1; + const MS_NOSUID = 2; + const MS_NODEV = 4; + const MS_NOEXEC = 8; + const MS_SYNCHRONOUS = 16; + const MS_REMOUNT = 32; + const MS_MANDLOCK = 64; + const MS_DIRSYNC = 128; + const NOSYMFOLLOW = 256; + const MS_NOATIME = 1024; + const MS_NODIRATIME = 2048; + const MS_BIND = 4096; + const MS_MOVE = 8192; + const MS_REC = 16384; + const MS_VERBOSE = 32768; + const MS_SILENT = 65536; + const MS_POSIXACL = 1 << 16; + const MS_UNBINDABLE = 1 << 17; + const MS_PRIVATE = 1 << 18; + const MS_SLAVE = 1 << 19; + const MS_SHARED = 1 << 20; + const MS_RELATIME = 1 << 21; + const MS_KERNMOUNT = 1 << 22; + const MS_I_VERSION = 1 << 23; + const MS_STRICTATIME = 1 << 24; + const MS_LAZYTIME = 1 << 25; + const MS_SUBMOUNT = 1 << 26; + const MS_NOREMOTELOCK = 1 << 27; + const MS_NOSEC = 1 << 28; + const MS_BORN = 1 << 29; + const MS_ACTIVE = 1 << 30; + const MS_NOUSER = 1 << 31; + } +} + +pub async fn sys_mount( + dev_name: TUA, + dir_name: TUA, + type_: TUA, + flags: i64, + _data: UA, +) -> Result { + let flags = MountFlags::from_bits_truncate(flags as u64); + if flags.contains(MountFlags::MS_REC) { + // TODO: Handle later + return Ok(0); + } + let mut buf = [0u8; 1024]; + let dev_name = UserCStr::from_ptr(dev_name) + .copy_from_user(&mut buf) + .await + .ok(); + let mut buf = [0u8; 1024]; + let dir_name = UserCStr::from_ptr(dir_name) + .copy_from_user(&mut buf) + .await?; + let mount_point = VFS + .resolve_path( + Path::new(dir_name), + VFS.root_inode(), + ¤t_task_shared(), + ) + .await?; + let mut buf = [0u8; 1024]; + let _type = UserCStr::from_ptr(type_) + .copy_from_user(&mut buf) + .await + .ok(); + if let Some(dev_name) = dev_name { + let dev_name = match dev_name { + "proc" => "procfs", + "devtmpfs" => "devfs", + "cgroup2" => "cgroupfs", + s => s, + }; + VFS.mount(mount_point, dev_name, None).await?; + Ok(0) + } else { + Err(KernelError::NotSupported) + } +} diff --git a/src/kernel/hostname.rs b/src/kernel/hostname.rs index 6c0abbe0..236b2b11 100644 --- a/src/kernel/hostname.rs +++ b/src/kernel/hostname.rs @@ -1,4 +1,4 @@ -use crate::memory::uaccess::cstr::UserCStr; +use crate::memory::uaccess::copy_from_user_slice; use crate::sched::current::current_task_shared; use crate::sync::OnceLock; use crate::sync::SpinLock; @@ -30,9 +30,10 @@ pub async fn sys_sethostname(name_ptr: TUA, name_len: usize) -> Result) -> Result { let nodename = CString::from_str(&hostname().lock_save_irq()).unwrap(); copy_str_to_c_char_arr(&mut uts.nodename, nodename.as_c_str().to_bytes_with_nul()); - let release = c"4.2.3".to_bytes_with_nul(); + let release = c"5.7.1".to_bytes_with_nul(); copy_str_to_c_char_arr(&mut uts.release, release); #[cfg(feature = "smp")] diff --git a/src/process/fd_table.rs b/src/process/fd_table.rs index a8234962..98e2367a 100644 --- a/src/process/fd_table.rs +++ b/src/process/fd_table.rs @@ -3,6 +3,7 @@ use alloc::{sync::Arc, vec::Vec}; use libkernel::error::{FsError, Result}; pub mod dup; +pub mod epoll; pub mod fcntl; pub mod select; @@ -189,6 +190,14 @@ impl FileDescriptorTable { } } + pub fn iter(&self) -> impl Iterator)> { + self.entries.iter().enumerate().filter_map(|(i, entry)| { + entry + .as_ref() + .map(|entry| (Fd(i as i32), entry.file.clone())) + }) + } + /// Number of file descriptors in use. pub fn len(&self) -> usize { self.entries.iter().filter(|e| e.is_some()).count() diff --git a/src/process/fd_table/epoll.rs b/src/process/fd_table/epoll.rs new file mode 100644 index 00000000..9ec59220 --- /dev/null +++ b/src/process/fd_table/epoll.rs @@ -0,0 +1,323 @@ +use alloc::{boxed::Box, collections::BTreeMap, sync::Arc, vec::Vec}; +use core::{future::Future, pin::Pin, task::Poll, time::Duration}; +use core::any::Any; +use crate::{ + drivers::timer::sleep, + fs::{ + fops::FileOps, + open_file::{FileCtx, OpenFile} + }, + memory::uaccess::{UserCopyable, copy_from_user, copy_objs_to_user}, + process::fd_table::{Fd, select::PollFlags}, + sched::current::current_task_shared, + sync::Mutex, +}; +use libkernel::{ + error::{FsError, KernelError, Result}, + fs::OpenFlags, + memory::address::{TUA, UA}, +}; +use async_trait::async_trait; + +pub const EPOLL_CTL_ADD: i32 = 1; +pub const EPOLL_CTL_DEL: i32 = 2; +pub const EPOLL_CTL_MOD: i32 = 3; + +pub const EPOLLIN: u32 = 0x001; +pub const EPOLLOUT: u32 = 0x004; +pub const EPOLLERR: u32 = 0x008; +pub const EPOLLHUP: u32 = 0x010; + +#[repr(C)] +#[derive(Clone, Copy, Debug, Default)] +pub struct EpollEvent { + pub events: u32, + pub data: u64, +} + +unsafe impl UserCopyable for EpollEvent {} + +/// Entry inside an [`EpollInstance`]. +#[derive(Clone)] +struct EpItem { + file: Arc, + flags: PollFlags, + data: u64, +} + +/// The kernel object backing an epoll file-descriptor. +#[derive(Default)] +pub struct EpollInstance { + /// Registered interest list (keyed by raw file-descriptor number). + inner: Mutex>, +} + +impl EpollInstance { + fn new() -> Arc { + Arc::new(Self { + inner: Mutex::new(BTreeMap::new()), + }) + } + + /// Convert an epoll event bitmask into the internal [`PollFlags`]. + fn ep_to_poll(mask: u32) -> PollFlags { + let mut pf = PollFlags::empty(); + if mask & EPOLLIN != 0 { + pf.insert(PollFlags::POLLIN); + } + if mask & EPOLLOUT != 0 { + pf.insert(PollFlags::POLLOUT); + } + if mask & EPOLLERR != 0 { + pf.insert(PollFlags::POLLERR); + } + if mask & EPOLLHUP != 0 { + pf.insert(PollFlags::POLLHUP); + } + pf + } + + fn poll_to_ep(pf: PollFlags) -> u32 { + let mut ev = 0; + if pf.contains(PollFlags::POLLIN) { + ev |= EPOLLIN; + } + if pf.contains(PollFlags::POLLOUT) { + ev |= EPOLLOUT; + } + if pf.contains(PollFlags::POLLERR) { + ev |= EPOLLERR; + } + if pf.contains(PollFlags::POLLHUP) { + ev |= EPOLLHUP; + } + ev + } +} + +pub struct EpollFileOps { + epi: Arc, +} + +impl EpollFileOps { + fn new(epi: Arc) -> Self { + Self { epi } + } +} + +#[async_trait] +impl FileOps for EpollFileOps { + fn as_any(&self) -> &dyn Any { + self + } + + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } + + async fn readat(&mut self, _buf: UA, _count: usize, _offset: u64) -> Result { + Err(KernelError::BadFd) + } + + async fn writeat(&mut self, _buf: UA, _count: usize, _offset: u64) -> Result { + Err(KernelError::BadFd) + } + + fn poll_read_ready(&self) -> Pin> + Send>> { + Box::pin(async { Err(KernelError::NotSupported) }) + } + + fn poll_write_ready(&self) -> Pin> + Send>> { + Box::pin(async { Err(KernelError::NotSupported) }) + } + + async fn release(&mut self, _ctx: &FileCtx) -> Result<()> { + Ok(()) + } + + fn to_epoll(&self) -> Option<&EpollFileOps> { + Some(self) + } +} + +pub fn sys_epoll_create1(flags: i32) -> Result { + const EPOLL_CLOEXEC: i32 = 0x80000; + + let cloexec = (flags & EPOLL_CLOEXEC) != 0; + if flags & !EPOLL_CLOEXEC != 0 { + return Err(KernelError::InvalidValue); + } + + // Allocate kernel object + let epi = EpollInstance::new(); + + // Wrap inside an OpenFile + let mut oflags = OpenFlags::empty(); + if cloexec { + oflags |= OpenFlags::O_CLOEXEC; + } + let file = OpenFile::new(Box::new(EpollFileOps::new(epi)), oflags); + + // Insert into current task's FD-table + let fd = { + let task = current_task_shared(); + let mut fdt = task.fd_table.lock_save_irq(); + fdt.insert(Arc::new(file))? + }; + + Ok(fd.as_raw() as _) +} + +pub async fn sys_epoll_ctl(epfd: Fd, op: i32, fd: Fd, event: TUA) -> Result { + // Retrieve the epoll instance. + let epi = get_instance(epfd).await?; + + match op { + EPOLL_CTL_ADD => { + let ev: EpollEvent = copy_from_user(event).await?; + let task = current_task_shared(); + let target_file = task + .fd_table + .lock_save_irq() + .get(fd) + .ok_or(KernelError::BadFd)?; + + let mut map = epi.inner.lock().await; + if map.contains_key(&fd.as_raw()) { + return Err(FsError::AlreadyExists)?; + } + map.insert( + fd.as_raw(), + EpItem { + file: target_file, + flags: EpollInstance::ep_to_poll(ev.events), + data: ev.data, + }, + ); + } + EPOLL_CTL_MOD => { + let ev: EpollEvent = copy_from_user(event).await?; + let mut map = epi.inner.lock().await; + let entry = map.get_mut(&fd.as_raw()).ok_or(FsError::NotFound)?; + + entry.flags = EpollInstance::ep_to_poll(ev.events); + entry.data = ev.data; + } + EPOLL_CTL_DEL => { + let mut map = epi.inner.lock().await; + if map.remove(&fd.as_raw()).is_none() { + return Err(FsError::NotFound)?; + } + } + _ => return Err(KernelError::InvalidValue), + } + + Ok(0) +} + +pub async fn sys_epoll_pwait( + epfd: Fd, + events: TUA, + maxevents: i32, + timeout_ms: i32, + _sigmask: TUA<()>, + _sigsetsize: usize, +) -> Result { + if maxevents <= 0 { + return Err(KernelError::InvalidValue); + } + + let epi = get_instance(epfd).await?; + + // Snapshot of current interest list. + let items: Vec = { + let map = epi.inner.lock().await; + map.values().cloned().collect() + }; + + // Build poll futures + let mut futs: Vec<_> = Vec::> + Send>>>::new(); + for item in &items { + let poll_fut = item.file.poll(item.flags).await; + futs.push( + Box::pin(async move { poll_fut.await }) as Pin + Send>> + ); + } + + // Optional absolute timeout + let mut timeout_fut = if timeout_ms < 0 { + None + } else { + Some(Box::pin(sleep(Duration::from_millis(timeout_ms as _))) + as Pin + Send>>) + }; + + // Await for readiness + let ready_count = core::future::poll_fn(|cx| { + // Check main fd list + let mut num_ready = 0; + + for fut in futs.iter_mut() { + match fut.as_mut().poll(cx) { + Poll::Ready(Ok(pf)) => { + if !pf.is_empty() { + num_ready += 1; + } + } + Poll::Ready(Err(e)) => return Poll::Ready(Err(e)), + Poll::Pending => continue, + } + } + + if num_ready != 0 { + Poll::Ready(Ok(num_ready)) + } else { + // Check timeout + if let Some(to) = timeout_fut.as_mut() { + if to.as_mut().poll(cx).is_ready() { + return Poll::Ready(Ok(0)); + } + } + + // No ready events yet, continue waiting. + Poll::Pending + } + }) + .await?; + + // Copy up to `maxevents` entries to userspace + let mut user_events: Vec = Vec::new(); + for (item, fut) in items.into_iter().zip(futs) { + if ready_count == 0 || user_events.len() as i32 == maxevents { + break; + } + // We already know it is ready (ready_count > 0) – no need to re-await. + let flags = fut.await.unwrap_or(PollFlags::empty()); + if !flags.is_empty() { + user_events.push(EpollEvent { + events: EpollInstance::poll_to_ep(flags), + data: item.data, + }); + } + } + + copy_objs_to_user(&user_events[..], events).await?; + Ok(user_events.len()) +} + +async fn get_instance(epfd: Fd) -> Result> { + let task = current_task_shared(); + let file = task + .fd_table + .lock_save_irq() + .get(epfd) + .ok_or(KernelError::BadFd)?; + + let (ops, _) = &*file.lock().await; + + if let Some(ep_ops) = ops.to_epoll() { + Ok(ep_ops.epi.clone()) + } else { + Err(KernelError::BadFd) + } +} diff --git a/src/process/mod.rs b/src/process/mod.rs index 5a0af63d..ce4aa4e6 100644 --- a/src/process/mod.rs +++ b/src/process/mod.rs @@ -39,8 +39,10 @@ pub mod exec; pub mod exit; pub mod fd_table; pub mod owned; +pub mod pidfd; pub mod prctl; pub mod ptrace; +pub mod signalfd; pub mod sleep; pub mod thread_group; pub mod threading; diff --git a/src/process/pidfd.rs b/src/process/pidfd.rs new file mode 100644 index 00000000..8d11d6fa --- /dev/null +++ b/src/process/pidfd.rs @@ -0,0 +1,75 @@ +use crate::fs::fops::FileOps; +use crate::fs::open_file::OpenFile; +use crate::process::thread_group::Tgid; +use crate::process::{TaskDescriptor, Tid, find_task_by_descriptor}; +use crate::sched::current::current_task_shared; +use alloc::boxed::Box; +use alloc::sync::Arc; +use core::any::Any; +use async_trait::async_trait; +use bitflags::bitflags; +use libkernel::error::{KernelError, Result}; +use libkernel::fs::OpenFlags; +use libkernel::memory::address::UA; + +bitflags! { + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] + pub struct PidfdFlags: u32 { + const PIDFD_NONBLOCK = OpenFlags::O_NONBLOCK.bits(); + const PIDFD_THREAD = OpenFlags::O_EXCL.bits(); + } +} + +pub struct PidFile { + pid: Tid, + flags: PidfdFlags, +} + +impl PidFile { + pub fn new(pid: Tid, flags: PidfdFlags) -> Self { + Self { pid, flags } + } + + pub fn new_open_file(pid: Tid, flags: PidfdFlags) -> Arc { + let file = PidFile::new(pid, flags); + Arc::new(OpenFile::new( + Box::new(file), + OpenFlags::from_bits(flags.bits()).unwrap(), + )) + } +} + +#[async_trait] +impl FileOps for PidFile { + fn as_any(&self) -> &dyn Any { + self + } + + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } + + async fn readat(&mut self, _buf: UA, _count: usize, _offset: u64) -> Result { + Err(KernelError::InvalidValue) + } + + async fn writeat(&mut self, _buf: UA, _count: usize, _offset: u64) -> Result { + Err(KernelError::InvalidValue) + } +} + +pub async fn sys_pidfd_open(pid: Tid, flags: u32) -> Result { + let flags = PidfdFlags::from_bits(flags).ok_or(KernelError::InvalidValue)?; + if !flags.contains(PidfdFlags::PIDFD_THREAD) { + // Ensure the pid exists and is a thread group leader. + let _ = find_task_by_descriptor(&TaskDescriptor::from_tgid_tid(Tgid(pid.value()), pid)) + .unwrap(); + } + let task = current_task_shared(); + + let file = PidFile::new_open_file(pid, flags); + + let fd = task.fd_table.lock_save_irq().insert(file)?; + + Ok(fd.as_raw() as _) +} diff --git a/src/process/signalfd.rs b/src/process/signalfd.rs new file mode 100644 index 00000000..0fed7eaf --- /dev/null +++ b/src/process/signalfd.rs @@ -0,0 +1,222 @@ +use alloc::{boxed::Box, sync::Arc, vec::Vec}; +use async_trait::async_trait; +use core::any::Any; +use core::{future::Future, pin::Pin}; +use libkernel::{ + error::{KernelError, Result}, + fs::{OpenFlags, SeekFrom}, + memory::address::{TUA, UA}, + sync::condvar::WakeupType, +}; + +use crate::memory::uaccess::copy_from_user; + +use crate::{ + fs::{fops::FileOps, open_file::FileCtx, open_file::OpenFile}, + process::{ + Task, + fd_table::Fd, + thread_group::signal::{SigId, SigSet}, + }, + sched::current::current_task_shared, + sync::{CondVar, Mutex}, +}; + +pub const SFD_CLOEXEC: i32 = 0x0008_0000; +pub const SFD_NONBLOCK: i32 = 0x0000_8000; + +/// Kernel object backing one signalfd file-descriptor. +struct SignalFd { + mask: SigSet, + queue: Mutex>, + cv: CondVar, + nonblock: bool, +} + +impl SignalFd { + fn new(mask: SigSet, nonblock: bool) -> Self { + Self { + mask, + queue: Mutex::new(Vec::new()), + cv: CondVar::new(false), + nonblock, + } + } + + /// Enqueue a newly delivered signal if it matches the interest mask. + async fn notify_signal(&self, sig: SigId) { + if self.mask.contains(sig.into()) && sig != SigId::SIGKILL && sig != SigId::SIGSTOP { + let mut q = self.queue.lock().await; + q.push(sig.user_id() as u32); + drop(q); + self.cv.update(|flag| { + *flag = true; + WakeupType::All + }); + } + } + + async fn pop_signal(&self) -> Option { + let mut q = self.queue.lock().await; + if !q.is_empty() { + let sig = q.remove(0); + let empty = q.is_empty(); + drop(q); + if empty { + self.cv.update(|flag| { + *flag = false; + WakeupType::None + }); + } + Some(sig) + } else { + None + } + } +} + +#[async_trait] +impl FileOps for SignalFd { + fn as_any(&self) -> &dyn Any { + self + } + + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } + + async fn readat(&mut self, buf: UA, count: usize, _offset: u64) -> Result { + // Reading anything but multiples of 4 bytes is rejected – we only + // return a list of u32 signal numbers. + if count < size_of::() { + return Err(KernelError::InvalidValue); + } + + // Attempt to get one pending signal. + if let Some(sig) = self.pop_signal().await { + let bytes = (sig).to_ne_bytes(); + crate::memory::uaccess::copy_to_user_slice(&bytes, buf).await?; + Ok(size_of::()) + } else if self.nonblock { + Err(KernelError::TryAgain) + } else { + // Wait until a signal arrives. + self.cv + .wait_until(|s| if *s { Some(()) } else { None }) + .await; + // Recurse – now there must be something. + self.readat(buf, count, 0).await + } + } + + async fn writeat(&mut self, _buf: UA, _count: usize, _offset: u64) -> Result { + Err(KernelError::BadFd) + } + + fn poll_read_ready(&self) -> Pin> + Send>> { + let cv = self.cv.clone(); + Box::pin(async move { + cv.wait_until(|flag| if *flag { Some(()) } else { None }) + .await; + Ok(()) + }) + } + + fn poll_write_ready(&self) -> Pin> + Send>> { + Box::pin(async { Err(KernelError::NotSupported) }) + } + + async fn seek(&mut self, _ctx: &mut FileCtx, _pos: SeekFrom) -> Result { + Err(KernelError::InvalidValue) + } + + async fn release(&mut self, _ctx: &FileCtx) -> Result<()> { + Ok(()) + } +} + +pub async fn sys_signalfd4(fd: Fd, mask_ptr: TUA, flags: i32) -> Result { + // SIGKILL/SIGSTOP must be silently ignored – clear them from the mask. + let mut mask = copy_from_user(mask_ptr).await?; + mask.remove(SigId::SIGKILL.into()); + mask.remove(SigId::SIGSTOP.into()); + + let nonblock = (flags & SFD_NONBLOCK) != 0; + let cloexec = (flags & SFD_CLOEXEC) != 0; + + let task = current_task_shared(); + + if fd.as_raw() == -1 { + // Create a brand-new signalfd. + let sfd = Arc::new(OpenFile::new(Box::new(SignalFd::new(mask, nonblock)), { + let mut of = OpenFlags::empty(); + if nonblock { + of |= OpenFlags::O_NONBLOCK; + } + if cloexec { + of |= OpenFlags::O_CLOEXEC; + } + of + })); + let mut fdt = task.fd_table.lock_save_irq(); + let new_fd = fdt.insert(sfd)?; + Ok(new_fd.as_raw() as _) + } else { + // Modify an existing one. + let file = { + let fdt = task.fd_table.lock_save_irq(); + fdt.get(fd).ok_or(KernelError::BadFd)? + }; + + // Verify this really is a signalfd instance. + { + let (ops, _) = &mut *file.lock().await; + if let Some(sigops) = ops.as_any_mut().downcast_mut::() { + sigops.mask = mask; + sigops.nonblock = nonblock; + } else { + return Err(KernelError::InvalidValue); + } + } + + { + let mut new_flags = file.flags().await; + if nonblock { + new_flags |= OpenFlags::O_NONBLOCK; + } else { + new_flags.remove(OpenFlags::O_NONBLOCK); + } + if cloexec { + new_flags |= OpenFlags::O_CLOEXEC; + } else { + new_flags.remove(OpenFlags::O_CLOEXEC); + } + file.set_flags(new_flags).await; + } + + Ok(fd.as_raw() as _) + } +} + +/// Notify all signalfd instances in the current task’s FD table about a newly +/// delivered signal. +pub async fn broadcast_to_signalfds(task: Arc, signal: SigId) { + // Collect all open files from the task's FD table (maximum 8192 fds). + const MAX_FDS_SCAN: usize = 8192; + let mut files = Vec::new(); + { + let fdt_guard = task.fd_table.lock_save_irq(); + for i in 0..MAX_FDS_SCAN { + if let Some(file) = fdt_guard.get(Fd(i as i32)) { + files.push(file); + } + } + } + + for file in files { + let (ops, _) = &*file.lock().await; + if let Some(sigops) = ops.as_any().downcast_ref::() { + sigops.notify_signal(signal).await; + } + } +} diff --git a/src/process/thread_group.rs b/src/process/thread_group.rs index 56edf9cb..8283fc9f 100644 --- a/src/process/thread_group.rs +++ b/src/process/thread_group.rs @@ -1,5 +1,10 @@ use super::{Task, TaskState, Tid}; -use crate::{memory::uaccess::UserCopyable, sched::waker::create_waker, sync::SpinLock}; +use crate::{ + memory::uaccess::UserCopyable, + process::signalfd::broadcast_to_signalfds, + sched::{spawn_kernel_work, waker::create_waker}, + sync::SpinLock, +}; use alloc::{ collections::btree_map::BTreeMap, sync::{Arc, Weak}, @@ -172,6 +177,8 @@ impl ThreadGroup { ) { create_waker(task.descriptor()).wake(); + // Notify any signalfd instances that are listening in this task. + spawn_kernel_work(broadcast_to_signalfds(task.clone(), signal)); } } } @@ -186,6 +193,8 @@ impl ThreadGroup { TaskState::Runnable | TaskState::Running ) { + // Notify signalfd listeners on this runnable task. + spawn_kernel_work(broadcast_to_signalfds(task.clone(), signal)); // Signal delivered. This task will eventually be // dispatched again by the uspc_ret code and the // signal picked up. @@ -196,6 +205,8 @@ impl ThreadGroup { // No task will pick up the signal. Wake one up. for task in self.tasks.lock_save_irq().values() { if let Some(task) = task.upgrade() { + // Wake the task and broadcast the signal to any signalfd fds. + spawn_kernel_work(broadcast_to_signalfds(task.clone(), signal)); create_waker(task.descriptor()).wake(); return; } diff --git a/usertest/src/epoll.rs b/usertest/src/epoll.rs new file mode 100644 index 00000000..37089d44 --- /dev/null +++ b/usertest/src/epoll.rs @@ -0,0 +1,37 @@ +use crate::register_test; + +fn test_epoll() { + unsafe { + // create epoll instance + let epfd = libc::epoll_create1(libc::EPOLL_CLOEXEC); + assert!(epfd >= 0, "epoll_create1 failed"); + + // create a pipe + let mut fds = [0; 2]; + assert_eq!(libc::pipe(fds.as_mut_ptr()), 0, "pipe failed"); + + // add read end of pipe to epoll with EPOLLIN + let mut ev = libc::epoll_event { + events: libc::EPOLLIN as u32, + u64: fds[0] as u64, // store fd in data + }; + assert_eq!( + libc::epoll_ctl(epfd, libc::EPOLL_CTL_ADD, fds[0], &mut ev as *mut _), + 0, + "epoll_ctl ADD failed" + ); + + // write to pipe to trigger EPOLLIN + let msg = b"x"; + let written = libc::write(fds[1], msg.as_ptr() as *const _, msg.len()); + assert_eq!(written, msg.len() as isize, "write failed"); + + // wait for the event + let mut out: libc::epoll_event = std::mem::zeroed(); + let n = libc::epoll_wait(epfd, &mut out as *mut _, 1, 100); + assert_eq!(n, 1, "epoll_wait did not return 1"); + assert!((out.events & libc::EPOLLIN as u32) != 0, "EPOLLIN not set"); + } +} + +register_test!(test_epoll); diff --git a/usertest/src/main.rs b/usertest/src/main.rs index a13581b2..9ef48df4 100644 --- a/usertest/src/main.rs +++ b/usertest/src/main.rs @@ -8,6 +8,7 @@ use std::{ mod fs; mod futex; mod signals; +mod epoll; pub struct Test { pub test_text: &'static str,