Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 7 additions & 9 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ steps:
command: |
julia -e 'println("--- :julia: Developing CUDA")
using Pkg
Pkg.add(url="https://github.com/christiangnrd/CUDA.jl", rev="intrinsics")'
Pkg.add(url="https://github.com/christiangnrd/CUDA.jl", rev="intrinsicsnew")'
julia -e 'println("--- :julia: Instantiating project")
using Pkg
Pkg.develop(; path=pwd())' || exit 3
Expand Down Expand Up @@ -76,7 +76,7 @@ steps:
command: |
julia -e 'println("--- :julia: Developing Metal")
using Pkg
Pkg.add(url="https://github.com/JuliaGPU/Metal.jl", rev="kaintr")'
Pkg.add(url="https://github.com/JuliaGPU/Metal.jl", rev="kaintrnew")'
julia -e 'println("--- :julia: Instantiating project")
using Pkg
Pkg.develop(; path=pwd())' || exit 3
Expand Down Expand Up @@ -108,9 +108,8 @@ steps:
command: |
julia -e 'println("--- :julia: Developing oneAPI")
using Pkg
Pkg.add(url="https://github.com/christiangnrd/oneAPI.jl", rev="intrinsics")
Pkg.develop(; name="AcceleratedKernels")'
sed -i 's/^KernelAbstractions = "0\.9.*"/KernelAbstractions = "0.10"/' \${JULIA_DEPOT_PATH}/dev/AcceleratedKernels/Project.toml
Pkg.add(url="https://github.com/christiangnrd/oneAPI.jl", rev="intrinsicsnew")
Pkg.add(url="https://github.com/christiangnrd/AcceleratedKernels.jl", rev="ka0.10simple")'
julia -e 'println("--- :julia: Instantiating project")
using Pkg
Pkg.develop(; path=pwd())' || exit 3
Expand Down Expand Up @@ -141,11 +140,10 @@ steps:
command: |
julia -e 'println("--- :julia: Developing AMDGPU")
using Pkg
Pkg.develop(; name="AcceleratedKernels")'
sed -i 's/^KernelAbstractions = "0\.9.*"/KernelAbstractions = "0.9, 0.10"/' \${JULIA_DEPOT_PATH}/dev/AcceleratedKernels/Project.toml
Pkg.add(url="https://github.com/christiangnrd/AcceleratedKernels.jl", rev="ka0.10simple")'
julia -e '
using Pkg
Pkg.add(url="https://github.com/christiangnrd/AMDGPU.jl", rev="intrinsics")
Pkg.add(url="https://github.com/christiangnrd/AMDGPU.jl", rev="intrinsicsnew")
println("--- :julia: Instantiating project")
Pkg.develop(; path=pwd())' || exit 3

Expand Down Expand Up @@ -176,7 +174,7 @@ steps:
command: |
julia -e 'println("--- :julia: Developing OpenCL")
using Pkg
Pkg.add(url="https://github.com/christiangnrd/OpenCL.jl", rev="intrinsics")
Pkg.add(url="https://github.com/christiangnrd/OpenCL.jl", rev="intrinsicsnew")
Pkg.develop(; name="SPIRVIntrinsics")'
julia -e 'println("--- :julia: Instantiating project")
using Pkg
Expand Down
2 changes: 1 addition & 1 deletion src/KernelAbstractions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -833,7 +833,7 @@ include("macros.jl")
###

function Scratchpad end
SharedMemory(t::Type{T}, dims::Val{Dims}, id::Val{Id}) where {T, Dims, Id} = KI.localmemory(t, dims)
SharedMemory(::Type{T}, dims::Val{Dims}, id::Val{Id}) where {T, Dims, Id} = KI.localmemory(T, dims)

__synchronize() = KI.barrier()

Expand Down
147 changes: 143 additions & 4 deletions src/intrinsics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -103,19 +103,119 @@ Returns the unique group ID.
function get_group_id end

"""
localmemory(T, dims)
get_sub_group_size()::UInt32

Returns the number of work-items in the sub-group.

!!! note
Backend implementations **must** implement:
```
@device_override get_sub_group_size()::UInt32
```
"""
function get_sub_group_size end

"""
get_max_sub_group_size()::UInt32

Returns the maximum sub-group size for sub-groups in the current workgroup.

!!! note
Backend implementations **must** implement:
```
@device_override get_max_sub_group_size()::UInt32
```
"""
function get_max_sub_group_size end

"""
get_num_sub_groups()::UInt32

Returns the number of sub-groups in the current workgroup.

!!! note
Backend implementations **must** implement:
```
@device_override get_num_sub_groups()::UInt32
```
"""
function get_num_sub_groups end

"""
get_sub_group_id()::UInt32

Returns the sub-group ID within the work-group.

!!! note
1-based.

!!! note
Backend implementations **must** implement:
```
@device_override get_sub_group_id()::UInt32
```
"""
function get_sub_group_id end

"""
get_sub_group_local_id()::UInt32

Returns the work-item ID within the current sub-group.

!!! note
1-based.

!!! note
Backend implementations **must** implement:
```
@device_override get_sub_group_local_id()::UInt32
```
"""
function get_sub_group_local_id end


"""
localmemory(::Type{T}, dims)

Declare memory that is local to a workgroup.

!!! note
Backend implementations **must** implement:
```
@device_override localmemory(T::DataType, ::Val{Dims}) where {T, Dims}
@device_override localmemory(::Type{T}, ::Val{Dims}) where {T, Dims}
```
As well as the on-device functionality.
"""
localmemory(::Type{T}, dims) where {T} = localmemory(T, Val(dims))

"""
shfl_down(val::T, offset::Integer) where T

Read `val` from a lane with higher id given by `offset`.
When writing kernels using this function, it should be
assumed that it is not synchronized.

!!! note
Backend implementations **must** implement:
```
@device_override shfl_down(val::T, offset::Integer) where T
```
As well as the on-device functionality.
"""
function shfl_down end

"""
shfl_down_types(::Backend)::Vector{DataType}

Returns a vector of `DataType`s supported on `backend`

!!! note
Backend implementations **must** implement this function
only if they support `shfl_down` for any types.
"""
shfl_down_types(::Backend) = DataType[]


"""
barrier()

Expand All @@ -139,6 +239,29 @@ function barrier()
error("Group barrier used outside kernel or not captured")
end

"""
sub_group_barrier()

After a `sub_group_barrier()` call, all read and writes to global and local memory
from each thread in the sub-group are visible in from all other threads in the
sub-group.

This does **not** guarantee that a write from a thread in a certain sub-group will
be visible to a thread in a different sub-group.

!!! note
`sub_group_barrier()` must be encountered by all workitems of a sub-group executing the kernel or by none at all.

!!! note
Backend implementations **must** implement:
```
@device_override sub_group_barrier()
```
"""
function sub_group_barrier()
error("Sub-group barrier used outside kernel or not captured")
end

"""
_print(args...)

Expand Down Expand Up @@ -174,7 +297,7 @@ kernel on the host.

Backends must also implement the on-device kernel launch functionality.
"""
struct Kernel{B, Kern}
struct Kernel{B,Kern}
backend::B
kern::Kern
end
Expand Down Expand Up @@ -220,6 +343,22 @@ kernel launch with too big a workgroup is attempted.
"""
function max_work_group_size end

"""
sub_group_size(backend)::Int

Returns a reasonable sub-group size supported by the currently
active device for the specified backend. This would typically
be 32, or 64 for devices that don't support 32.

!!! note
Backend implementations **must** implement:
```
sub_group_size(backend::NewBackend)::Int
```
As well as the on-device functionality.
"""
function sub_group_size end

"""
multiprocessor_count(backend::NewBackend)::Int

Expand Down Expand Up @@ -299,7 +438,7 @@ There are a few keyword arguments that influence the behavior of `KI.@kernel`:
"""
macro kernel(backend, ex...)
call = ex[end]
kwargs = map(ex[1:(end - 1)]) do kwarg
kwargs = map(ex[1:(end-1)]) do kwarg
if kwarg isa Symbol
:($kwarg = $kwarg)
elseif Meta.isexpr(kwarg, :(=))
Expand Down
Loading
Loading