From 6afa5278f711e9543981fcc05125092cf63f0370 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Andersen Date: Tue, 7 Apr 2026 14:49:02 +0200 Subject: [PATCH 1/2] Various fixes * Use proper C types for ccalls; this fixes UB and is more maintainable * Add fast path for append!(::Vector, ::MemoryView) * Add 5-arg copyto! to avoid generic fallback * Improve precision of @static VERSION checks * Simplify impl of old parentindices before memoryindex was public * Fix a couple of pointer retentions that was UB * Amend AGENTS.md --- AGENTS.md | 1 - examples/find.jl | 2 +- src/MemoryViews.jl | 9 +++++-- src/base_arrays.jl | 1 + src/basic.jl | 59 ++++++++++++++++++++++++++-------------------- test/runtests.jl | 15 ++++++++++++ 6 files changed, 57 insertions(+), 30 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 4c8c9f8..909ab81 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -29,7 +29,6 @@ Set `--check-bounds=yes` to force boundschecking when running experiments. **Source files**: - `construction.jl` — constructors from Array, Memory, String, SubArray, CodeUnits - `basic.jl` — indexing, slicing (returns views, not copies), copying, find operations with memchr/memrchr C calls, comparison via memcmp -- `experimental.jl` — `split_first`, `split_last`, `split_at`, `split_unaligned` - `delimited.jl` — `split_each` delimiter iterator - `base_arrays.jl` — Vector/Memory conversion, append - `io.jl` — `readbytes!` diff --git a/examples/find.jl b/examples/find.jl index 46ecbea..714059c 100644 --- a/examples/find.jl +++ b/examples/find.jl @@ -74,7 +74,7 @@ function find_next_byte(needle::UInt8, haystack::ImmutableMemoryView{UInt8}, i:: ulen = len % UInt GC.@preserve haystack begin ptr = pointer(haystack, i) - p = @ccall memchr(ptr::Ptr{UInt8}, needle::UInt8, ulen::UInt)::Ptr{Nothing} + p = @ccall memchr(ptr::Ptr{Cvoid}, needle::Cint, ulen::Csize_t)::Ptr{Cvoid} end return p == C_NULL ? nothing : (p - ptr + i) % Int end diff --git a/src/MemoryViews.jl b/src/MemoryViews.jl index 60219ce..014b23a 100644 --- a/src/MemoryViews.jl +++ b/src/MemoryViews.jl @@ -31,7 +31,10 @@ struct Immutable end """ MemoryView{T, M} <: DenseVector{T} -View into a `Memory{T}`. +`MemoryView` is the common representation of dense, contiguous, Julia-owned +data. `String`, `Vector`, `Memory` and other memory-backed types should have +a fast `MemoryView` constructor. + Construct from memory-backed values `x` with `MemoryView(x)`. `MemoryView`s are guaranteed to point to contiguous, valid CPU memory, @@ -102,9 +105,11 @@ Create a mutable memory view from its parts. * All indices `i in 1:len` are valid for `ref` (i.e. `memoryref(ref, i)` would not throw) * If `ref` is derived from immutable memory, it is the caller's responsibility - to ensure that mutating the memory does not result in undefined behaviour. + to ensure that the resulting view is not mutated. For example, `ref` may be derived from a `String`, and mutating `String`s in Julia may result in undefined behaviour. + In these cases, the caller may convert to `ImmutableMemoryView` immediately + after calling this function. # Examples ```jldoctest diff --git a/src/base_arrays.jl b/src/base_arrays.jl index d1b5e44..7b42804 100644 --- a/src/base_arrays.jl +++ b/src/base_arrays.jl @@ -26,6 +26,7 @@ function Base.copy!(A::Union{Memory{T}, Array{T}}, mem::MemoryView{T}) where {T} end function Base.append!(v::Vector{T}, mem::MemoryView{T}) where {T} + isempty(mem) && return v old_len = length(v) resize!(v, length(v) + length(mem)) dst = @inbounds MemoryView(v)[(old_len + 1):end] diff --git a/src/basic.jl b/src/basic.jl index bb4e944..bb3985a 100644 --- a/src/basic.jl +++ b/src/basic.jl @@ -8,7 +8,7 @@ end # The parent method for memoryref was added in 1.12. In versions before that, # it can be accessed by reaching into internals. -@static if VERSION < v"1.12" +@static if VERSION < v"1.12.0-DEV.966" Base.parent(v::MemoryView) = v.ref.mem else Base.parent(v::MemoryView) = parent(v.ref) @@ -23,17 +23,10 @@ function Base.iterate(x::MemoryView, i::Int = 1) end # Base.memoryindex exists in Julia 1.13 onwards. -@static if VERSION < v"1.13" +@static if VERSION < v"1.13.0-DEV.1289" function Base.parentindices(x::MemoryView) - elz = Base.elsize(x) - return if iszero(elz) - offset = Int(x.ref.ptr_or_offset) - ((1 + offset):(x.len + offset),) - else - byte_offset = pointer(x.ref) - pointer(x.ref.mem) - elem_offset = div(byte_offset % UInt, elz % UInt) % Int - ((elem_offset + 1):(elem_offset + x.len),) - end + start = Core.memoryrefoffset(x.ref) + return (start:(start + length(x) - 1),) end else function Base.parentindices(x::MemoryView) @@ -85,12 +78,14 @@ function Base.mightalias(a::MemoryView{T}, b::MemoryView{T}) where {T} (isempty(a) | isempty(b)) && return false # We can't compare the underlying Memory with === to add a fast path here, # because users can create aliasing, but distinct Memory using unsafe_wrap. - (p1, p2) = (pointer(a), pointer(b)) - elz = Base.elsize(a) - return if p1 < p2 - p1 + length(a) * elz > p2 - else - p2 + length(b) * elz > p1 + GC.@preserve a b begin + (p1, p2) = (pointer(a), pointer(b)) + elz = Base.elsize(a) + return if p1 < p2 + p1 + length(a) * elz > p2 + else + p2 + length(b) * elz > p1 + end end end @@ -183,12 +178,23 @@ function Base.copyto!(dst::MutableMemoryView{T}, src::MemoryView{T}) where {T} return unsafe_copyto!(dst, src) end +# This function is kind of bad API, and users should not use it. However, without this overload, +# the fallback definition is used instead which is even worse. +function Base.copyto!(dst::MutableMemoryView, di::Integer, src::MemoryView{T}, si::Integer, N::Integer) where {T} + di = Int(di)::Int + si = Int(si)::Int + N = Int(N)::Int + dst = dst[di:(di + N - 1)] + src = src[si:(si + N - 1)] + copyto!(dst, src) +end + function Base.fill!(v::MutableMemoryView{UInt8}, x::Integer) xT = convert(UInt8, x)::UInt8 isempty(v) && return v GC.@preserve v @ccall memset( - pointer(v)::Ptr{Nothing}, - Int32(xT)::Cint, + pointer(v)::Ptr{Cvoid}, + Cint(xT)::Cint, (length(v) % UInt)::Csize_t )::Cvoid return v @@ -252,7 +258,7 @@ function memchr(mem::ImmutableMemoryView{T}, byte::T) where {T <: Union{Int8, UI GC.@preserve mem begin ptr = Ptr{UInt8}(pointer(mem)) p = @ccall memchr( - ptr::Ptr{UInt8}, + ptr::Ptr{Cvoid}, (byte % UInt8)::Cint, length(mem)::Csize_t, )::Ptr{Cvoid} @@ -311,7 +317,7 @@ function memrchr(mem::ImmutableMemoryView{T}, byte::T) where {T <: Union{Int8, U GC.@preserve mem begin ptr = Ptr{UInt8}(pointer(mem)) p = @ccall memrchr( - ptr::Ptr{UInt8}, + ptr::Ptr{Cvoid}, (byte % UInt8)::Cint, length(mem)::Csize_t, )::Ptr{Cvoid} @@ -338,7 +344,7 @@ function Base.:(==)(a::Mem, b::Mem) where {Mem <: BitMemory} GC.@preserve a b begin aptr = Ptr{Nothing}(pointer(a)) bptr = Ptr{Nothing}(pointer(b)) - y = @ccall memcmp(aptr::Ptr{Nothing}, bptr::Ptr{Nothing}, nbytes::Csize_t)::Cint + y = @ccall memcmp(aptr::Ptr{Cvoid}, bptr::Ptr{Cvoid}, nbytes::Csize_t)::Cint end return iszero(y) end @@ -349,9 +355,9 @@ function Base.cmp(a::MemoryView{UInt8}, b::MemoryView{UInt8}) aptr = Ptr{Nothing}(pointer(a)) bptr = Ptr{Nothing}(pointer(b)) @ccall memcmp( - aptr::Ptr{Nothing}, - bptr::Ptr{Nothing}, - min(length(a), length(b))::Int, + aptr::Ptr{Cvoid}, + bptr::Ptr{Cvoid}, + min(length(a), length(b))::Csize_t, )::Cint end else @@ -518,7 +524,8 @@ function split_unaligned(v::MemoryView{T, M}, ::Val{A}) where {A, T, M} # Early return here to avoid division by zero: Size sz is statically known, # this will be compiled away iszero(sz) && return (unsafe_new_memoryview(M, v.ref, 0), v) - unaligned_bytes = ((alignment - (UInt(pointer(v)) & mask)) & mask) + ptr_int = GC.@preserve v UInt(pointer(v)) + unaligned_bytes = ((alignment - (ptr_int & mask)) & mask) n_elements = min(length(v), div(unaligned_bytes, sz % UInt) % Int) return @inbounds split_at(v, n_elements + 1) end diff --git a/test/runtests.jl b/test/runtests.jl index 789c6f4..88ee261 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -436,6 +436,21 @@ end v2 = rand(Int, 4) unsafe_copyto!(MemoryView(v1), MemoryView(v2)) @test v2 == v1 + + # 5-argument copyto! should also stay on LightBoundsError rather than + # falling back to Base's BoundsError-based array path. + mem = MemoryView([1, 2, 3]) + @test_throws LightBoundsError copyto!(mem, UInt(1), mem, UInt(4), UInt(1)) + @test_throws LightBoundsError copyto!(mem, 4, mem, 1, 1) + @test_throws LightBoundsError copyto!(mem, 1, mem, 1, 4) + + mem = MemoryView([1, 2, 3, 4]) + @test copyto!(mem, 2, mem, 3, 2) == [3, 4] + @test mem == [1, 3, 4, 4] + + mem = MemoryView([1, 2, 3, 4]) + @test copyto!(mem, Int32(1), mem, Int32(3), Int32(2)) == [3, 4] + @test mem == [3, 4, 3, 4] end @testset "Reverse and reverse!" begin From 8ea40e16382c83c5ca6ce1de86194d4e26130a89 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Andersen Date: Tue, 7 Apr 2026 15:29:52 +0200 Subject: [PATCH 2/2] Fixup: Format --- src/basic.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/basic.jl b/src/basic.jl index bb3985a..44e8bb0 100644 --- a/src/basic.jl +++ b/src/basic.jl @@ -186,7 +186,7 @@ function Base.copyto!(dst::MutableMemoryView, di::Integer, src::MemoryView{T}, s N = Int(N)::Int dst = dst[di:(di + N - 1)] src = src[si:(si + N - 1)] - copyto!(dst, src) + return copyto!(dst, src) end function Base.fill!(v::MutableMemoryView{UInt8}, x::Integer)