diff --git a/etc/syscalls_linux_aarch64.md b/etc/syscalls_linux_aarch64.md index bfd3028b..c23fd5d4 100644 --- a/etc/syscalls_linux_aarch64.md +++ b/etc/syscalls_linux_aarch64.md @@ -269,7 +269,7 @@ | 0x11a (282) | userfaultfd | (int flags) | __arm64_sys_userfaultfd | false | | 0x11b (283) | membarrier | (int cmd, unsigned int flags, int cpu_id) | __arm64_sys_membarrier | false | | 0x11c (284) | mlock2 | (unsigned long start, size_t len, int flags) | __arm64_sys_mlock2 | false | -| 0x11d (285) | copy_file_range | (int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags) | __arm64_sys_copy_file_range | false | +| 0x11d (285) | copy_file_range | (int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags) | __arm64_sys_copy_file_range | true | | 0x11e (286) | preadv2 | (unsigned long fd, const struct iovec *vec, unsigned long vlen, unsigned long pos_l, unsigned long pos_h, rwf_t flags) | __arm64_sys_preadv2 | true | | 0x11f (287) | pwritev2 | (unsigned long fd, const struct iovec *vec, unsigned long vlen, unsigned long pos_l, unsigned long pos_h, rwf_t flags) | __arm64_sys_pwritev2 | true | | 0x120 (288) | pkey_mprotect | (unsigned long start, size_t len, unsigned long prot, int pkey) | __arm64_sys_pkey_mprotect | false | diff --git a/libkernel/src/memory/kbuf.rs b/libkernel/src/memory/kbuf.rs index b03a3b44..48e036cd 100644 --- a/libkernel/src/memory/kbuf.rs +++ b/libkernel/src/memory/kbuf.rs @@ -8,6 +8,7 @@ use crate::{ }, }; use alloc::sync::Arc; +use core::num::NonZeroUsize; use core::{cmp::min, future, mem::MaybeUninit, task::Poll}; use ringbuf::{ SharedRb, @@ -113,6 +114,10 @@ impl, C: CpuOps> KBufCore { res } + + pub fn capacity(&self) -> NonZeroUsize { + self.inner.lock_save_irq().buf.capacity() + } } impl, C: CpuOps> KBufCore { diff --git a/src/arch/arm64/exceptions/syscall.rs b/src/arch/arm64/exceptions/syscall.rs index ec6e1dbe..b1910cad 100644 --- a/src/arch/arm64/exceptions/syscall.rs +++ b/src/arch/arm64/exceptions/syscall.rs @@ -28,6 +28,7 @@ use crate::{ chmod::sys_fchmod, chown::sys_fchown, close::{sys_close, sys_close_range}, + copy_file_range::sys_copy_file_range, getxattr::{sys_fgetxattr, sys_getxattr, sys_lgetxattr}, ioctl::sys_ioctl, iov::{sys_preadv, sys_preadv2, sys_pwritev, sys_pwritev2, sys_readv, sys_writev}, @@ -578,6 +579,17 @@ pub async fn handle_syscall() { .await } 0x116 => sys_getrandom(TUA::from_value(arg1 as _), arg2 as _, arg3 as _).await, + 0x11d => { + sys_copy_file_range( + arg1.into(), + TUA::from_value(arg2 as _), + arg3.into(), + TUA::from_value(arg4 as _), + arg5 as _, + arg6 as _, + ) + .await + } 0x11e => { sys_preadv2( arg1.into(), diff --git a/src/fs/syscalls/copy_file_range.rs b/src/fs/syscalls/copy_file_range.rs new file mode 100644 index 00000000..ce1cb1d5 --- /dev/null +++ b/src/fs/syscalls/copy_file_range.rs @@ -0,0 +1,248 @@ +use alloc::sync::Arc; +use libkernel::error::KernelError; +use libkernel::memory::address::TUA; + +use crate::kernel::kpipe::KPipe; +use crate::memory::uaccess::{copy_from_user, copy_to_user}; +use crate::process::fd_table::Fd; +use crate::sched::current::current_task; + +pub async fn sys_copy_file_range( + fd_in: Fd, + off_in: TUA, + fd_out: Fd, + off_out: TUA, + size: usize, + flags: u32, +) -> libkernel::error::Result { + if flags != 0 { + return Err(KernelError::InvalidValue); + } + + if size == 0 { + return Ok(0); + } + + let mut in_off: u64 = if off_in.is_null() { + 0 + } else { + let v = copy_from_user(off_in).await? as i64; + if v < 0 { + return Err(KernelError::InvalidValue); + } + v as u64 + }; + + let mut out_off: u64 = if off_out.is_null() { + 0 + } else { + let v = copy_from_user(off_out).await? as i64; + if v < 0 { + return Err(KernelError::InvalidValue); + } + v as u64 + }; + + let (reader, writer) = { + let task = current_task(); + let fds = task.fd_table.lock_save_irq(); + + let reader = fds.get(fd_in).ok_or(KernelError::BadFd)?; + let writer = fds.get(fd_out).ok_or(KernelError::BadFd)?; + + (reader, writer) + }; + + if Arc::ptr_eq(&reader, &writer) { + return Err(KernelError::InvalidValue); + } + + // Fast path: both offsets are NULL, so we can splice using each file's + // internal cursor. + if in_off == 0 && out_off == 0 { + let kbuf = KPipe::new()?; + + let (reader_ops, reader_ctx) = &mut *reader.lock().await; + let (writer_ops, writer_ctx) = &mut *writer.lock().await; + + let mut remaining = size; + let mut total_written = 0; + + while remaining > 0 { + let read = match reader_ops.splice_into(reader_ctx, &kbuf, remaining).await { + Ok(v) => v, + Err(e) => { + return if total_written > 0 { + Ok(total_written) + } else { + Err(e) + }; + } + }; + + if read == 0 { + return Ok(total_written); + } + + let mut to_write = read; + + while to_write > 0 { + let written = match writer_ops.splice_from(writer_ctx, &kbuf, to_write).await { + Ok(v) => v, + Err(e) => { + return if total_written > 0 { + Ok(total_written) + } else { + Err(e) + }; + } + }; + to_write -= written; + total_written += written; + } + + remaining -= read; + } + + return Ok(total_written); + } + + // Offset path: at least one of the offsets was provided. + let kpipe = KPipe::new()?; + + let (reader_ops, reader_ctx) = &mut *reader.lock().await; + let (writer_ops, writer_ctx) = &mut *writer.lock().await; + + // If an offset pointer is NULL, we use (and update) the file cursor in that direction. + if off_in.is_null() { + in_off = reader_ctx.pos; + } + if off_out.is_null() { + out_off = writer_ctx.pos; + } + + let mut remaining = size; + let mut total_written = 0usize; + + while remaining > 0 { + let chunk_sz = core::cmp::min(kpipe.capacity().get(), remaining); + + // Read into the pipe using cursor-based splice, but with a temporary seek when + // explicit offsets are requested. + if !off_in.is_null() { + let saved = reader_ctx.pos; + reader_ctx.pos = in_off; + let read = match reader_ops.splice_into(reader_ctx, &kpipe, chunk_sz).await { + Ok(v) => v, + Err(e) => { + reader_ctx.pos = saved; + return if total_written > 0 { + Ok(total_written) + } else { + Err(e) + }; + } + }; + reader_ctx.pos = saved; + + if read == 0 { + break; + } + in_off = in_off.saturating_add(read as u64); + + // Write from the pipe similarly, using temporary seek for explicit out offsets. + let mut to_write = read; + while to_write > 0 { + let saved_out = writer_ctx.pos; + if !off_out.is_null() { + writer_ctx.pos = out_off; + } + + let written = match writer_ops.splice_from(writer_ctx, &kpipe, to_write).await { + Ok(v) => v, + Err(e) => { + writer_ctx.pos = saved_out; + return if total_written > 0 { + Ok(total_written) + } else { + Err(e) + }; + } + }; + + if !off_out.is_null() { + writer_ctx.pos = saved_out; + out_off = out_off.saturating_add(written as u64); + } + + to_write -= written; + total_written += written; + } + + remaining -= read; + } else { + // Input uses file cursor. We can splice directly. + let read = match reader_ops.splice_into(reader_ctx, &kpipe, chunk_sz).await { + Ok(v) => v, + Err(e) => { + return if total_written > 0 { + Ok(total_written) + } else { + Err(e) + }; + } + }; + + if read == 0 { + break; + } + + let mut to_write = read; + while to_write > 0 { + // Output might use explicit offset; use temporary seek if so. + let saved_out = writer_ctx.pos; + if !off_out.is_null() { + writer_ctx.pos = out_off; + } + + let written = match writer_ops.splice_from(writer_ctx, &kpipe, to_write).await { + Ok(v) => v, + Err(e) => { + writer_ctx.pos = saved_out; + return if total_written > 0 { + Ok(total_written) + } else { + Err(e) + }; + } + }; + + if !off_out.is_null() { + writer_ctx.pos = saved_out; + out_off = out_off.saturating_add(written as u64); + } + + to_write -= written; + total_written += written; + } + + remaining -= read; + } + + // Update user offsets if provided. + if !off_in.is_null() { + copy_to_user(off_in, in_off as i32).await?; + } else { + // Track cursor for correctness if we modified it (we didn't), but keep in_off in sync. + in_off = reader_ctx.pos; + } + + if !off_out.is_null() { + copy_to_user(off_out, out_off as i32).await?; + } else { + out_off = writer_ctx.pos; + } + } + + Ok(total_written) +} diff --git a/src/fs/syscalls/mod.rs b/src/fs/syscalls/mod.rs index 19085927..1cd8a2d5 100644 --- a/src/fs/syscalls/mod.rs +++ b/src/fs/syscalls/mod.rs @@ -3,6 +3,7 @@ pub mod chdir; pub mod chmod; pub mod chown; pub mod close; +pub mod copy_file_range; pub mod getxattr; pub mod ioctl; pub mod iov; diff --git a/src/kernel/kpipe.rs b/src/kernel/kpipe.rs index 84c1acf9..9ab10588 100644 --- a/src/kernel/kpipe.rs +++ b/src/kernel/kpipe.rs @@ -7,6 +7,7 @@ use crate::{ uaccess::{copy_from_user_slice, copy_to_user_slice}, }, }; +use core::num::NonZeroUsize; use core::{cmp::min, marker::PhantomData, ops::Deref}; use libkernel::{ error::Result, @@ -95,4 +96,8 @@ impl KPipe { pub async fn splice_from(&self, source: &KPipe, count: usize) -> usize { self.inner.splice_from(&source.inner, count).await } + + pub fn capacity(&self) -> NonZeroUsize { + self.inner.capacity() + } }