diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ba39cc5 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +Manifest.toml diff --git a/Project.toml b/Project.toml index 3fe4bd6..f512105 100644 --- a/Project.toml +++ b/Project.toml @@ -1,15 +1,17 @@ name = "GroupedArrays" uuid = "6407cd72-fade-4a84-8a1e-56e431fc1533" authors = ["matthieugomez "] -version = "0.3.4" +version = "0.3.5" [deps] DataAPI = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" +PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" [compat] DataAPI = "1" Missings = "1" +PrecompileTools = "1" julia = "1.4" [extras] diff --git a/src/GroupedArrays.jl b/src/GroupedArrays.jl index 2b0776c..aa1c7ab 100644 --- a/src/GroupedArrays.jl +++ b/src/GroupedArrays.jl @@ -2,6 +2,8 @@ module GroupedArrays using Missings using DataAPI using Base.Threads +using PrecompileTools + include("spawn.jl") include("utils.jl") @@ -70,7 +72,7 @@ Construct a `GroupedArray` taking on distinct values for the groups formed by el * `args...`: `AbstractArrays` of same sizes. ### Keyword arguments -* `coalesce::Bool`: should missing values considered as distinct grotups indicators? +* `coalesce::Bool`: should missing values be considered as distinct group indicators? * `sort::Union{Bool, Nothing}`: should the order of the groups be the sort order? Set to `nothing` for best performance. ### Examples @@ -103,7 +105,6 @@ function GroupedArray(args...; coalesce = false, sort = true) end # Find index of representative row for each group -# now in fillfirst! function find_index(g::GroupedArray) groups, ngroups = g.groups, g.ngroups idx = Vector{Int}(undef, ngroups) @@ -132,7 +133,7 @@ function Base.convert(::Type{GroupedArray{Union{Int, Missing},N}}, g::GroupedArr return GroupedArray{Union{Int, Missing},N}(g.groups, g.ngroups) end function Base.convert(::Type{GroupedArray{Int, N}}, g::GroupedArray{Union{Int, Missing}, N}) where {N} - @assert all(x > 0 for x in g.groups) + all(x > 0 for x in g.groups) || throw(InexactError(:convert, GroupedArray{Int,N}, g)) return GroupedArray{Int,N}(g.groups, g.ngroups) end @@ -191,17 +192,32 @@ end @inline Base.haskey(x::GroupedInvRefPool{T}, ::Missing) where {T} = T >: Missing @inline Base.haskey(x::GroupedInvRefPool, v::Integer) = 1 <= v <= x.ngroups @inline function Base.getindex(x::GroupedInvRefPool{T}, ::Missing) where {T} - @boundscheck T >: Missing + @boundscheck T >: Missing || throw(KeyError(missing)) 0 end @inline function Base.getindex(x::GroupedInvRefPool, i::Integer) - @boundscheck 1 <= i <= x.ngroups + @boundscheck 1 <= i <= x.ngroups || throw(KeyError(i)) i end @inline Base.get(x::GroupedInvRefPool{T}, ::Missing, default) where {T} = T >: Missing ? 0 : default -@inline Base.get(x::GroupedInvRefPool, i::Integer, default) = 1 <= v <= x.ngroups ? i : default +@inline Base.get(x::GroupedInvRefPool, i::Integer, default) = 1 <= i <= x.ngroups ? i : default DataAPI.invrefpool(g::GroupedArray{T}) where {T} = GroupedInvRefPool{T}(g.ngroups) +@compile_workload begin + p1 = [1, 2, 3, 2] + p2 = [1, 1, 2, 2] + GroupedArray(p1) + GroupedArray(p1; sort = nothing) + GroupedArray(p1, p2) + GroupedArray(p1, p2; sort = nothing) + p3 = ["a", "b", "c", "c"] + GroupedArray(p1, p3) + p4 = Union{Int,Missing}[1, 2, missing, 2] + GroupedArray(p4) + GroupedArray(p4; coalesce = true) + p5 = Union{String,Missing}["a", "b", missing, "c"] + GroupedArray(p5) +end export GroupedArray, GroupedVector, GroupedMatrix end # module diff --git a/test/runtests.jl b/test/runtests.jl index 96dcbfc..4517789 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -112,6 +112,35 @@ invrefpools = DataAPI.invrefpool(g) g = GroupedArray([missing, missing, missing]) @test all(ismissing(x) for x in g) - +# sort=nothing and sort=false +g_sort = GroupedArray(p1_missing; sort = true) +g_nosort = GroupedArray(p1_missing; sort = nothing) +g_false = GroupedArray(p1_missing; sort = false) +@test g_nosort.ngroups == g_sort.ngroups +@test g_false.ngroups == g_sort.ngroups + +# empty arrays +g = GroupedArray(Int[]) +@test length(g) == 0 +@test g.ngroups == 0 + +# convert methods +g = GroupedArray([1, 2, 3, 1]) +g_missing = convert(GroupedArray{Union{Int, Missing}, 1}, g) +@test eltype(g_missing) == Union{Int, Missing} +@test all(g .== g_missing) +g_back = convert(GroupedArray{Int, 1}, g_missing) +@test eltype(g_back) == Int +@test all(g .== g_back) +# convert with missing should error +g_with_missing = GroupedArray([1, missing, 2]) +@test_throws InexactError convert(GroupedArray{Int, 1}, g_with_missing) + +# Base.get for GroupedInvRefPool with Integer +g = GroupedArray(PooledArray(p1_missing), p2) +invrefpools = DataAPI.invrefpool(g) +@test get(invrefpools, 1, -1) == 1 +@test get(invrefpools, g.ngroups, -1) == g.ngroups +@test get(invrefpools, g.ngroups + 1, -1) == -1