Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ steps:
- "1.10"
- "1.11"
- "1.12"
- "1.13"
- "nightly"
adjustments:
- with:
Expand Down
28 changes: 12 additions & 16 deletions lib/nvml/NVML.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,17 @@ import Libdl

export has_nvml

function libnvml()
@memoize begin
if Sys.iswindows()
# the NVSMI dir isn't added to PATH by the installer
nvsmi = joinpath(ENV["ProgramFiles"], "NVIDIA Corporation", "NVSMI")
if isdir(nvsmi)
joinpath(nvsmi, "nvml.dll")
else
# let's just hope for the best
"nvml"
end
else
"libnvidia-ml.so.1"
end
end::String
const libnvml::String = if Sys.iswindows()
# the NVSMI dir isn't added to PATH by the installer
nvsmi = joinpath(ENV["ProgramFiles"], "NVIDIA Corporation", "NVSMI")
if isdir(nvsmi)
joinpath(nvsmi, "nvml.dll")
else
# let's just hope for the best
"nvml"
end
else
"libnvidia-ml.so.1"
end

function has_nvml()
Expand All @@ -37,7 +33,7 @@ function has_nvml()
return false
end

if Libdl.dlopen(libnvml(); throw_error=false) === nothing
if Libdl.dlopen(libnvml; throw_error=false) === nothing
return false
end

Expand Down
796 changes: 398 additions & 398 deletions lib/nvml/libnvml.jl

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion res/wrap/nvml.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[general]
library_name = "libnvml()"
library_name = "libnvml"
output_file_path = "../../lib/nvml/libnvml.jl"
prologue_file_path = "./libnvml_prologue.jl"

Expand Down
56 changes: 28 additions & 28 deletions src/device/intrinsics/indexing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -63,54 +63,54 @@ end

@device_functions begin

"""
gridDim()::NamedTuple
@doc """
threadIdx()::NamedTuple

Returns the dimensions of the grid.
"""
@inline gridDim() = (x=gridDim_x(), y=gridDim_y(), z=gridDim_z())
Returns the thread index within the block.
""" threadIdx
@inline threadIdx() = (x=threadIdx_x(), y=threadIdx_y(), z=threadIdx_z())

"""
blockIdx()::NamedTuple
@doc """
blockDim()::NamedTuple

Returns the block index within the grid.
"""
@inline blockIdx() = (x=blockIdx_x(), y=blockIdx_y(), z=blockIdx_z())
Returns the dimensions (in threads) of the block.
""" blockDim
@inline blockDim() = (x=blockDim_x(), y=blockDim_y(), z=blockDim_z())

"""
blockDim()::NamedTuple
@doc """
blockIdx()::NamedTuple

Returns the dimensions of the block.
"""
@inline blockDim() = (x=blockDim_x(), y=blockDim_y(), z=blockDim_z())
Returns the block index within the grid.
""" blockIdx
@inline blockIdx() = (x=blockIdx_x(), y=blockIdx_y(), z=blockIdx_z())

"""
threadIdx()::NamedTuple
@doc """
gridDim()::NamedTuple

Returns the thread index within the block.
"""
@inline threadIdx() = (x=threadIdx_x(), y=threadIdx_y(), z=threadIdx_z())
Returns the dimensions (in blocks) of the grid.
""" gridDim
@inline gridDim() = (x=gridDim_x(), y=gridDim_y(), z=gridDim_z())

"""
@doc """
warpsize()::Int32

Returns the warp size (in threads).
"""
""" warpsize
@inline warpsize() = ccall("llvm.nvvm.read.ptx.sreg.warpsize", llvmcall, Int32, ())

"""
@doc """
laneid()::Int32

Returns the thread's lane within the warp.
"""
""" laneid
@inline laneid() = ccall("llvm.nvvm.read.ptx.sreg.laneid", llvmcall, Int32, ()) + 1i32

"""
@doc """
lanemask(pred)::UInt32

Returns a 32-bit mask indicating which threads in a warp satisfy the given predicate.
Supported predicates are `==`, `<`, `<=`, `>=`, and `>`.
"""
""" lanemask
@inline function lanemask(pred::F) where F
if pred === Base.:(==)
ccall("llvm.nvvm.read.ptx.sreg.lanemask.eq", llvmcall, UInt32, ())
Expand All @@ -127,12 +127,12 @@ Supported predicates are `==`, `<`, `<=`, `>=`, and `>`.
end
end

"""
@doc """
active_mask()

Returns a 32-bit mask indicating which threads in a warp are active with the current
executing thread.
"""
""" active_mask
@inline active_mask() = @asmcall("activemask.b32 \$0;", "=r", false, UInt32, Tuple{})

end
Expand Down
8 changes: 6 additions & 2 deletions src/device/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,12 @@ macro device_functions(ex)
# descend in blocks
push!(out.args, rewrite(arg))
elseif Meta.isexpr(arg, [:function, :(=)])
# rewrite function definitions
push!(out.args, :(@device_function $arg))
# capture temp variable for Julia 1.13 and rewrite function definitions
if Meta.isexpr(arg, :(=)) && isa(arg.args[1], Symbol) && Meta.isexpr(arg.args[2], [:function, :(=)])
push!(out.args, Expr(:(=), arg.args[1], :(@device_function $(arg.args[2]))))
else
push!(out.args, :(@device_function $arg))
end
else
# preserve all the rest
push!(out.args, arg)
Expand Down
5 changes: 5 additions & 0 deletions test/base/texture.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Broken on LLVM 20, see JuliaGPU/CUDA.jl#3037
if !(v"20-" <= Base.libllvm_version < v"21-")

using Interpolations

@inline function calcpoint(blockIdx, blockDim, threadIdx, size)
Expand Down Expand Up @@ -241,3 +244,5 @@ end
end
end
end

end
10 changes: 8 additions & 2 deletions test/core/device/ldg.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
@testset "ldg" begin
ir = sprint(io->CUDA.code_llvm(io, CUDA.pointerref_ldg, Tuple{Core.LLVMPtr{Int,AS.Global},Int,Val{1}}))
@test occursin("@llvm.nvvm.ldg", ir)
ir = sprint(io->CUDA.code_llvm(io, CUDA.pointerref_ldg, Tuple{Core.LLVMPtr{Int,AS.Global},Int,Val{1}}; raw=true))
if Base.libllvm_version >= v"20"
# `@llvm.nvvm.ldg` was removed in LLVM 20; the auto-upgrade
# replaces it with a load bearing `!invariant.load` metadata
@test occursin("!invariant.load", ir)
else
@test occursin("@llvm.nvvm.ldg", ir)
end
end


Expand Down
Loading