diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index c944641..1f9e012 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -13,7 +13,7 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          - '1.1'
+          - 'min'
           - '1'
 #          - 'nightly'
         os:
@@ -22,7 +22,7 @@ jobs:
           - x64
     steps:
       - uses: actions/checkout@v2
-      - uses: julia-actions/setup-julia@v1
+      - uses: julia-actions/setup-julia@v2
         with:
           version: ${{ matrix.version }}
           arch: ${{ matrix.arch }}
@@ -41,6 +41,7 @@ jobs:
       - uses: julia-actions/julia-buildpkg@v1
       - uses: julia-actions/julia-runtest@v1
       - uses: julia-actions/julia-processcoverage@v1
-      - uses: codecov/codecov-action@v1
+      - uses: codecov/codecov-action@v5
         with:
           file: lcov.info
+          token: ${{ secrets.CODECOV_TOKEN }}
diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml
index 9880dd0..aabc045 100644
--- a/.github/workflows/TagBot.yml
+++ b/.github/workflows/TagBot.yml
@@ -28,8 +28,5 @@ jobs:
     steps:
       - uses: JuliaRegistries/TagBot@v1
         with:
-          token: ${{ secrets.GITHUB_TOKEN }}
-          # Edit the following line to reflect the actual name of the GitHub Secret containing your private key
-          ssh: ${{ secrets.DOCUMENTER_KEY }}
-          # ssh: ${{ secrets.NAME_OF_MY_SSH_PRIVATE_KEY_SECRET }}
+          token: ${{ secrets.TAGBOT_TOKEN }}
           registry: HolyLab/HolyLabRegistry
\ No newline at end of file
diff --git a/Project.toml b/Project.toml
index f6c4e53..af92025 100644
--- a/Project.toml
+++ b/Project.toml
@@ -15,9 +15,9 @@ SharedArrays = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
 CUDA = "3,4,5"
 ImageCore = "0.8.1, 0.9, 0.10"
 Primes = "0.4, 0.5"
-RegisterCore = "0.2"
-RegisterMismatchCommon = "0.2.2"
-julia = "1"
+RegisterCore = "0.2, 1"
+RegisterMismatchCommon = "0.2.2, 1"
+julia = "1.10"
 
 [extras]
 CenterIndexedArrays = "46a7138f-0d70-54e1-8ada-fb8296f91f24"
diff --git a/src/RegisterMismatchCuda.jl b/src/RegisterMismatchCuda.jl
index d4abd85..db93d65 100644
--- a/src/RegisterMismatchCuda.jl
+++ b/src/RegisterMismatchCuda.jl
@@ -35,38 +35,38 @@ The major types and functions exported are:
 """
 RegisterMismatchCuda
 
-mutable struct CuRCpair{T<:AbstractFloat,N}
-    R::CuArray{T,N}
-    C::CuArray{Complex{T},N}
+mutable struct CuRCpair{T <: AbstractFloat, N}
+    R::CuArray{T, N}
+    C::CuArray{Complex{T}, N}
     rng::NTuple{N}
 end
 
-function CuRCpair{T}(::UndefInitializer, realsize::Dims{N}) where {T<:AbstractFloat,N}
+function CuRCpair{T}(::UndefInitializer, realsize::Dims{N}) where {T <: AbstractFloat, N}
     csize = [realsize...]
-    csize[1] = realsize[1]>>1 + 1
-    C = CuArray{Complex{T},N}(undef, csize...)
+    csize[1] = realsize[1] >> 1 + 1
+    C = CuArray{Complex{T}, N}(undef, csize...)
     csize[1] *= 2
     R = reshape(reinterpret(T, vec(C)), (csize...,)::Dims{N})
-    rng = map(n->1:n,realsize)
-    CuRCpair{T,N}(R, C, rng)
+    rng = map(n -> 1:n, realsize)
+    return CuRCpair{T, N}(R, C, rng)
 end
 
-function CuRCpair(A::Array{T}) where {T<:AbstractFloat}
+function CuRCpair(A::Array{T}) where {T <: AbstractFloat}
     P = CuRCpair(eltype(A), size(A))
     copyto!(P.R, A)
-    P
+    return P
 end
 
-function plan_fft_pair(P::CuRCpair{T,N}) where {T,N}
+function plan_fft_pair(P::CuRCpair{T, N}) where {T, N}
     fwd = CUFFT.plan_rfft(P.R[P.rng...])
     inv = CUFFT.plan_inv(fwd)
-    fwd, inv
+    return fwd, inv
 end
 
-mutable struct NanCorrFFTs{T<:AbstractFloat,N}
-    I0::CuRCpair{T,N}
-    I1::CuRCpair{T,N}
-    I2::CuRCpair{T,N}
+mutable struct NanCorrFFTs{T <: AbstractFloat, N}
+    I0::CuRCpair{T, N}
+    I1::CuRCpair{T, N}
+    I2::CuRCpair{T, N}
 end
 
 """
@@ -75,51 +75,51 @@ end
 Prepare for FFT-based mismatch computations over domains of size `aperture_width`, computing the
 mismatch up to shifts of size `maxshift`.
 """
-mutable struct CMStorage{T<:AbstractFloat,N}
+mutable struct CMStorage{T <: AbstractFloat, N}
     aperture_width::Vector{Float64}
     maxshift::Vector{Int}
     getindexes::Vector{UnitRange{Int}}   # indexes for pulling padded data, in source-coordinates
     setindexes::Vector{UnitRange{Int}}   # indexes for pushing fixed data, in source-coordinates
-    fixed::NanCorrFFTs{T,N}
-    moving::NanCorrFFTs{T,N}
-    num::CuRCpair{T,N}
-    denom::CuRCpair{T,N}
-    numhost::Array{T,N}
-    denomhost::Array{T,N}
+    fixed::NanCorrFFTs{T, N}
+    moving::NanCorrFFTs{T, N}
+    num::CuRCpair{T, N}
+    denom::CuRCpair{T, N}
+    numhost::Array{T, N}
+    denomhost::Array{T, N}
     # the next two store the result of calling plan_fft! and plan_ifft!
     fftfunc::Any
     ifftfunc::Any
     shiftindices::Vector{Vector{Int}} # indices for performing fftshift & snipping from -maxshift:maxshift
 end
 
-function CMStorage{T,N}(::UndefInitializer, aperture_width::NTuple{N,<:Real}, maxshift::Dims{N}) where {T<:AbstractFloat,N}
-    blocksize = map(x->ceil(Int,x), aperture_width)
+function CMStorage{T, N}(::UndefInitializer, aperture_width::NTuple{N, <:Real}, maxshift::Dims{N}) where {T <: AbstractFloat, N}
+    blocksize = map(x -> ceil(Int, x), aperture_width)
     padsz = padsize(blocksize, maxshift)
     getindexes = padranges(blocksize, maxshift)
-    setindexes = UnitRange{Int}[(1:blocksize[i]).+maxshift[i] for i = 1:length(blocksize)]
-    fixed  = NanCorrFFTs(CuRCpair{T}(undef, padsz), CuRCpair{T}(undef, padsz), CuRCpair{T}(undef, padsz))
+    setindexes = UnitRange{Int}[(1:blocksize[i]) .+ maxshift[i] for i in 1:length(blocksize)]
+    fixed = NanCorrFFTs(CuRCpair{T}(undef, padsz), CuRCpair{T}(undef, padsz), CuRCpair{T}(undef, padsz))
     moving = NanCorrFFTs(CuRCpair{T}(undef, padsz), CuRCpair{T}(undef, padsz), CuRCpair{T}(undef, padsz))
     num = CuRCpair{T}(undef, padsz)
     denom = CuRCpair{T}(undef, padsz)
-    mmsz = map(x->2x+1, (maxshift...,))
+    mmsz = map(x -> 2x + 1, (maxshift...,))
     numhost, denomhost = Array{T}(undef, mmsz), Array{T}(undef, mmsz)
     fftfunc, ifftfunc = plan_fft_pair(num)
     maxshiftv = [maxshift...]
-    shiftindices = Vector{Int}[ [padsz[i].+(-maxshift[i]+1:0); 1:maxshift[i]+1] for i = 1:length(maxshift) ]
-    CMStorage{T,N}(Float64[aperture_width...], maxshiftv, getindexes, setindexes, fixed, moving, num, denom, numhost, denomhost, fftfunc, ifftfunc, shiftindices)
+    shiftindices = Vector{Int}[ [padsz[i] .+ ((-maxshift[i] + 1):0); 1:(maxshift[i] + 1)] for i in 1:length(maxshift) ]
+    return CMStorage{T, N}(Float64[aperture_width...], maxshiftv, getindexes, setindexes, fixed, moving, num, denom, numhost, denomhost, fftfunc, ifftfunc, shiftindices)
 end
 
-CMStorage{T}(::UndefInitializer, aperture_width::NTuple{N,<:Real}, maxshift::Dims{N}) where {T<:AbstractFloat,N} =
-    CMStorage{T,N}(undef, aperture_width, maxshift)
+CMStorage{T}(::UndefInitializer, aperture_width::NTuple{N, <:Real}, maxshift::Dims{N}) where {T <: AbstractFloat, N} =
+    CMStorage{T, N}(undef, aperture_width, maxshift)
 
 # Note: display doesn't do anything, but here for compatibility with the CPU version
-CMStorage{T}(::UndefInitializer, blocksize::NTuple{N,<:Real}, maxshift::Dims{N}; display=false) where {T<:Real,N} = CMStorage{T}(undef, blocksize, maxshift)
-CMStorage{T,N}(::UndefInitializer, blocksize::NTuple{N,<:Real}, maxshift::Dims{N}; display=false) where {T<:Real,N} = CMStorage{T,N}(undef, blocksize, maxshift)
+CMStorage{T}(::UndefInitializer, blocksize::NTuple{N, <:Real}, maxshift::Dims{N}; display = false) where {T <: Real, N} = CMStorage{T}(undef, blocksize, maxshift)
+CMStorage{T, N}(::UndefInitializer, blocksize::NTuple{N, <:Real}, maxshift::Dims{N}; display = false) where {T <: Real, N} = CMStorage{T, N}(undef, blocksize, maxshift)
 
 CUDA.context(cms::CMStorage) = context(cms.num.C)
 
-eltype(cms::CMStorage{T,N}) where {T,N} = T
- ndims(cms::CMStorage{T,N}) where {T,N} = N
+eltype(cms::CMStorage{T, N}) where {T, N} = T
+ndims(cms::CMStorage{T, N}) where {T, N} = N
 
 # Some tools from Images
 sdims(A::CuArray) = ndims(A)
@@ -140,22 +140,22 @@ normalization scheme (`:intensity` or `:pixels`).
 
 This operation is synchronous with respect to the host.
 """
-function mismatch(::Type{T}, fixed::AbstractArray, moving::AbstractArray, maxshift::DimsLike; normalization = :intensity) where T<:Real
+function mismatch(::Type{T}, fixed::AbstractArray, moving::AbstractArray, maxshift::DimsLike; normalization = :intensity) where {T <: Real}
     assertsamesize(fixed, moving)
-    d_fixed  = CuArray{T}(fixed)
+    d_fixed = CuArray{T}(fixed)
     d_moving = CuArray{T}(moving)
-    mm = mismatch(d_fixed, d_moving, maxshift, normalization=normalization)
-    mm
+    mm = mismatch(d_fixed, d_moving, maxshift, normalization = normalization)
+    return mm
 end
 
-function mismatch(fixed::CuArray{T}, moving::CuArray{T}, maxshift::DimsLike; normalization = :intensity) where T
+function mismatch(fixed::CuArray{T}, moving::CuArray{T}, maxshift::DimsLike; normalization = :intensity) where {T}
     assertsamesize(fixed, moving)
     nd = ndims(fixed)
     cms = CMStorage{T}(undef, size(fixed), maxshift)
     mm = MismatchArray(T, (2 .* maxshift .+ 1)...)
     fillfixed!(cms, fixed)
-    mismatch!(mm, cms, moving, normalization=normalization)
-    mm
+    mismatch!(mm, cms, moving, normalization = normalization)
+    return mm
 end
 
 """
@@ -183,39 +183,43 @@ in a rectangular grid, you can use an `N`-dimensional array-of-tuples
 (or array-of-vectors) or an `N+1`-dimensional array with the center
 positions specified along the first dimension. See `aperture_grid`.
 """
-function mismatch_apertures(::Type{T},
-                            fixed::AbstractArray,
-                            moving::AbstractArray,
-                            aperture_centers::AbstractArray,
-                            aperture_width::WidthLike,
-                            maxshift::DimsLike;
-                            kwargs...) where T
+function mismatch_apertures(
+        ::Type{T},
+        fixed::AbstractArray,
+        moving::AbstractArray,
+        aperture_centers::AbstractArray,
+        aperture_width::WidthLike,
+        maxshift::DimsLike;
+        kwargs...
+    ) where {T}
     assertsamesize(fixed, moving)
-    d_fixed  = CuArray{T}(sdata(fixed))
+    d_fixed = CuArray{T}(sdata(fixed))
     d_moving = CuArray{T}(moving)
     mms = mismatch_apertures(d_fixed, d_moving, aperture_centers, aperture_width, maxshift; kwargs...)
-    mms
+    return mms
 end
 
 # only difference here relative to RegisterMismatch is the lack of the
 # FFTW keywords
-function mismatch_apertures(fixed::CuArray{T},
-                            moving::CuArray,
-                            aperture_centers::AbstractArray,
-                            aperture_width::WidthLike,
-                            maxshift::DimsLike;
-                            normalization = :pixels,
-                            kwargs...) where T
+function mismatch_apertures(
+        fixed::CuArray{T},
+        moving::CuArray,
+        aperture_centers::AbstractArray,
+        aperture_width::WidthLike,
+        maxshift::DimsLike;
+        normalization = :pixels,
+        kwargs...
+    ) where {T}
     nd = sdims(fixed)
-    assertsamesize(fixed,moving)
+    assertsamesize(fixed, moving)
     (length(aperture_width) == nd && length(maxshift) == nd) || error("Dimensionality mismatch")
     mms = allocate_mmarrays(T, aperture_centers, maxshift)
     cms = CMStorage{T}(undef, aperture_width, maxshift; kwargs...)
-    mismatch_apertures!(mms, fixed, moving, aperture_centers, cms; normalization=normalization)
-    mms
+    mismatch_apertures!(mms, fixed, moving, aperture_centers, cms; normalization = normalization)
+    return mms
 end
 
-function fillfixed!(cms::CMStorage{T}, fixed::CuArray; f_indexes = ntuple(i->1:size(fixed,i), ndims(fixed))) where T
+function fillfixed!(cms::CMStorage{T}, fixed::CuArray; f_indexes = ntuple(i -> 1:size(fixed, i), ndims(fixed))) where {T}
     ctx = context(cms)
     context(fixed) == ctx || error("Fixed and cms must be on the same context")
     nd = ndims(cms)
@@ -227,17 +231,17 @@ function fillfixed!(cms::CMStorage{T}, fixed::CuArray; f_indexes = ntuple(i->1:s
     fill!(paddedf, NaN)
     dstindexes = Array{UnitRange{Int}}(undef, nd)
     srcindexes = Array{UnitRange{Int}}(undef, nd)
-    for idim = 1:nd
+    for idim in 1:nd
         tmp = f_indexes[idim]
-        i1 = first(tmp) >= 1 ? 1 : 2-first(tmp)
-        i2 = last(tmp) <= size(fixed, idim) ? length(tmp) : length(tmp)-(last(tmp)-size(fixed, idim))
+        i1 = first(tmp) >= 1 ? 1 : 2 - first(tmp)
+        i2 = last(tmp) <= size(fixed, idim) ? length(tmp) : length(tmp) - (last(tmp) - size(fixed, idim))
         srcindexes[idim] = tmp[i1]:tmp[i2]
         dstindexes[idim] = cms.setindexes[idim][i1]:cms.setindexes[idim][i2]
     end
     copyto!(paddedf, CartesianIndices(Tuple(dstindexes)), fixed, CartesianIndices(Tuple(srcindexes))) #This conversion may be inefficient.
     # Prepare the components of the convolution
-    threadspb = calculate_threads(size(paddedf), attribute(dev, CUDA.DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK)÷2)
-    nblocks = ceil.(Int, size(paddedf)./ threadspb)
+    threadspb = calculate_threads(size(paddedf), attribute(dev, CUDA.DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK) ÷ 2)
+    nblocks = ceil.(Int, size(paddedf) ./ threadspb)
     @cuda blocks = nblocks threads = threadspb kernel_conv_components!(paddedf, cms.fixed.I2.R, cms.fixed.I0.R)
     synchronize()
     # Compute FFTs
@@ -245,7 +249,7 @@ function fillfixed!(cms::CMStorage{T}, fixed::CuArray; f_indexes = ntuple(i->1:s
     for item in (obj.I0, obj.I1, obj.I2)
         copyto!(item.C, cms.fftfunc * item.R[item.rng...])
     end
-    obj
+    return obj
 end
 
 """
@@ -253,7 +257,7 @@ end
 computes the mismatch as a function of shift, storing the result in
 `mm`. The `fixed` image has been prepared in `cms`, a `CMStorage` object.
 """
-function mismatch!(mm::MismatchArray, cms::CMStorage{T}, moving::CuArray; normalization = :intensity, m_offset = ntuple(i->0, ndims(cms))) where T
+function mismatch!(mm::MismatchArray, cms::CMStorage{T}, moving::CuArray; normalization = :intensity, m_offset = ntuple(i -> 0, ndims(cms))) where {T}
     ctx = context(cms)
     context(moving) == ctx || error("Fixed and cms must be on the same context")
     activate(ctx)
@@ -261,10 +265,10 @@ function mismatch!(mm::MismatchArray, cms::CMStorage{T}, moving::CuArray; normal
     checksize_maxshift(mm, cms.maxshift)
     nd = ndims(cms)
     paddedm = cms.moving.I1.R
-    get!(paddedm, moving, ntuple(d->cms.getindexes[d].+m_offset[d], nd), T(NaN))
+    get!(paddedm, moving, ntuple(d -> cms.getindexes[d] .+ m_offset[d], nd), T(NaN))
     # Prepare the components of the convolution
-    threadspb = calculate_threads(size(paddedm), attribute(dev, CUDA.DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK)÷2)
-    nblocks = ceil.(Int, size(paddedm)./ threadspb)
+    threadspb = calculate_threads(size(paddedm), attribute(dev, CUDA.DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK) ÷ 2)
+    nblocks = ceil.(Int, size(paddedm) ./ threadspb)
     @cuda blocks = nblocks threads = threadspb kernel_conv_components!(paddedm, cms.moving.I2.R, cms.moving.I0.R)
     synchronize()
     # Compute FFTs
@@ -275,11 +279,13 @@ function mismatch!(mm::MismatchArray, cms::CMStorage{T}, moving::CuArray; normal
     # Perform the convolution in fourier space
     d_numC = cms.num.C
     d_denomC = cms.denom.C
-    args = (cms.fixed.I1.C,  cms.fixed.I2.C,  cms.fixed.I0.C,
-            cms.moving.I1.C, cms.moving.I2.C, cms.moving.I0.C,
-            cms.num.C, cms.denom.C)
-    threadspb = calculate_threads(size(d_numC), attribute(dev, CUDA.DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK)÷2)
-    nblocks = ceil.(Int, size(d_numC)./ threadspb)
+    args = (
+        cms.fixed.I1.C, cms.fixed.I2.C, cms.fixed.I0.C,
+        cms.moving.I1.C, cms.moving.I2.C, cms.moving.I0.C,
+        cms.num.C, cms.denom.C,
+    )
+    threadspb = calculate_threads(size(d_numC), attribute(dev, CUDA.DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK) ÷ 2)
+    nblocks = ceil.(Int, size(d_numC) ./ threadspb)
     if normalization == :intensity
         @cuda blocks = nblocks threads = threadspb kernel_calcNumDenom_intensity!(args...)
     elseif normalization == :pixels
@@ -292,11 +298,11 @@ function mismatch!(mm::MismatchArray, cms::CMStorage{T}, moving::CuArray; normal
     # Compute the IFFTs
     d_num = cms.num.R
     d_denom = cms.denom.R
-    rng = CartesianIndices(cms.num.rng);
+    rng = CartesianIndices(cms.num.rng)
     copyto!(d_num, rng, cms.ifftfunc * d_numC, rng)
     copyto!(d_denom, rng, cms.ifftfunc * d_denomC, rng)
     # Copy result to host
-    copyto!(mm, (view(Array(d_num), cms.shiftindices...), view(Array(d_denom), cms.shiftindices...)))
+    return copyto!(mm, (view(Array(d_num), cms.shiftindices...), view(Array(d_denom), cms.shiftindices...)))
 end
 
 """
@@ -308,36 +314,36 @@ in `cms`, a `CMStorage` object. The results are stored in `mms`, an
 Array-of-MismatchArrays which must have length equal to the number of
 aperture centers.
 """
-function mismatch_apertures!(mms, fixed, moving, aperture_centers, cms; normalization=:pixels)
+function mismatch_apertures!(mms, fixed, moving, aperture_centers, cms; normalization = :pixels)
     assertsamesize(fixed, moving)
     N = ndims(cms)
-    for (mm,center) in zip(mms, each_point(aperture_centers))
+    for (mm, center) in zip(mms, each_point(aperture_centers))
         rng = aperture_range(center, cms.aperture_width)
-        fillfixed!(cms, fixed; f_indexes=rng)
-        offset = [first(rng[d])-1 for d = 1:N]
-        mismatch!(mm, cms, moving; normalization=normalization, m_offset=offset)
+        fillfixed!(cms, fixed; f_indexes = rng)
+        offset = [first(rng[d]) - 1 for d in 1:N]
+        mismatch!(mm, cms, moving; normalization = normalization, m_offset = offset)
     end
     synchronize()
-    mms
+    return mms
 end
 
 
 ### Utilities
 
 function assertsamesize(A::AbstractArray, B::AbstractArray)
-    size(A,1) == size(B,1) && size(A,2) == size(B,2) && size(A,3) == size(B,3) || error("Arrays are not the same size")
+    return size(A, 1) == size(B, 1) && size(A, 2) == size(B, 2) && size(A, 3) == size(B, 3) || error("Arrays are not the same size")
 end
 
 ### Deprecations
 
-@deprecate CuRCpair(realtype::Type{T}, realsize) where {T<:AbstractFloat} CuRCpair{T}(undef, realsize)
+@deprecate CuRCpair(realtype::Type{T}, realsize) where {T <: AbstractFloat} CuRCpair{T}(undef, realsize)
 
-function CMStorage{T}(::UndefInitializer, aperture_width::WidthLike, maxshift::DimsLike; kwargs...) where {T<:Real}
+function CMStorage{T}(::UndefInitializer, aperture_width::WidthLike, maxshift::DimsLike; kwargs...) where {T <: Real}
     Base.depwarn("CMStorage with aperture_width::$(typeof(aperture_width)) and maxshift::$(typeof(maxshift)) is deprecated, use tuples instead", :CMStorage)
     (N = length(aperture_width)) == length(maxshift) || error("Dimensionality mismatch")
-    return CMStorage{T,N}(undef, (aperture_width...,), (maxshift...,); kwargs...)
+    return CMStorage{T, N}(undef, (aperture_width...,), (maxshift...,); kwargs...)
 end
 
-@deprecate CMStorage(::Type{T}, aperture_width::WidthLike, maxshift::DimsLike; kwargs...) where {T<:AbstractFloat}  CMStorage{T}(undef, aperture_width, maxshift; kwargs...)
+@deprecate CMStorage(::Type{T}, aperture_width::WidthLike, maxshift::DimsLike; kwargs...) where {T <: AbstractFloat}  CMStorage{T}(undef, aperture_width, maxshift; kwargs...)
 
 end # module
diff --git a/src/kernels.jl b/src/kernels.jl
index 2af71ee..e650269 100644
--- a/src/kernels.jl
+++ b/src/kernels.jl
@@ -1,17 +1,21 @@
-cucartesianindex(a::AbstractVector) = CartesianIndex((blockIdx().x-1) * blockDim().x + threadIdx().x)
-cucartesianindex(a::AbstractMatrix) = CartesianIndex((blockIdx().x-1) * blockDim().x + threadIdx().x,
-                                                     (blockIdx().y-1) * blockDim().y + threadIdx().y)
-cucartesianindex(a::AbstractArray{<:Any,3}) = CartesianIndex((blockIdx().x-1) * blockDim().x + threadIdx().x,
-                                                             (blockIdx().y-1) * blockDim().y + threadIdx().y,
-                                                             (blockIdx().z-1) * blockDim().z + threadIdx().z)
+cucartesianindex(a::AbstractVector) = CartesianIndex((blockIdx().x - 1) * blockDim().x + threadIdx().x)
+cucartesianindex(a::AbstractMatrix) = CartesianIndex(
+    (blockIdx().x - 1) * blockDim().x + threadIdx().x,
+    (blockIdx().y - 1) * blockDim().y + threadIdx().y
+)
+cucartesianindex(a::AbstractArray{<:Any, 3}) = CartesianIndex(
+    (blockIdx().x - 1) * blockDim().x + threadIdx().x,
+    (blockIdx().y - 1) * blockDim().y + threadIdx().y,
+    (blockIdx().z - 1) * blockDim().z + threadIdx().z
+)
 
-function calculate_threads(sz::Dims{N}, maxthreadspb) where N
+function calculate_threads(sz::Dims{N}, maxthreadspb) where {N}
     threads = ones(Int, N)
     L = prod(sz)
     iterdim = Iterators.cycle(1:N)
     dim, state = iterate(iterdim)
     for (b, p) in factor(maxthreadspb)
-        for i = 1:p
+        for i in 1:p
             P = prod(threads)
             P >= L && return (threads...,)::Dims{N}
             threads[dim] *= b
@@ -25,7 +29,7 @@ end
 Pitch must be expressed in elements, not bytes!
 width (x) corresponds to the fastest dimension (the same as pitch), depth (z) to the slowest
 The array-dimension parameters must be the same for all 3 arrays. =#
-function kernel_conv_components!(A::T, A2::T, thetaA::T) where T
+function kernel_conv_components!(A::T, A2::T, thetaA::T) where {T}
     Tel = eltype(T)
     i = cucartesianindex(A)
     if checkbounds(Bool, A, i)
@@ -36,15 +40,17 @@ function kernel_conv_components!(A::T, A2::T, thetaA::T) where T
             A[i] = local_A
         end
         thetaA[i] = local_thetaA ? Tel(0) : Tel(1)
-        A2[i] = local_A * local_A;
+        A2[i] = local_A * local_A
     end
     return nothing
 end
 
 # CUDA kernel_calcNumDenom: case INTENSITY, compute numerator and denominator before ifftn
-function kernel_calcNumDenom_intensity!(f_fft::T, f2_fft::T, thetaf_fft::T,
-                                        m_fft::T, m2_fft::T, thetam_fft::T,
-                                        numerator_fft::T, denominator_fft::T) where T
+function kernel_calcNumDenom_intensity!(
+        f_fft::T, f2_fft::T, thetaf_fft::T,
+        m_fft::T, m2_fft::T, thetam_fft::T,
+        numerator_fft::T, denominator_fft::T
+    ) where {T}
     i = cucartesianindex(f_fft)
     if checkbounds(Bool, f_fft, i)
         c1 = f2_fft[i]' * thetam_fft[i]
@@ -59,18 +65,20 @@ function kernel_calcNumDenom_intensity!(f_fft::T, f2_fft::T, thetaf_fft::T,
 end
 
 # CUDA kernel_calcNumDenom: case PIXELS, compute numerator and denominator before ifftn
-function kernel_calcNumDenom_pixels!(f_fft::T, f2_fft::T, thetaf_fft::T,
-                                        m_fft::T, m2_fft::T, thetam_fft::T,
-                                        numerator_fft::T, denominator_fft::T) where T
+function kernel_calcNumDenom_pixels!(
+        f_fft::T, f2_fft::T, thetaf_fft::T,
+        m_fft::T, m2_fft::T, thetam_fft::T,
+        numerator_fft::T, denominator_fft::T
+    ) where {T}
     i = cucartesianindex(f_fft)
     if checkbounds(Bool, f_fft, i)
-         c1 = -2 * f_fft[i]'
-         c1 = c1 * m_fft[i]
-         c2 = thetaf_fft[i]' * m2_fft[i]
-         c1 = c1 + c2
-         c2 = f2_fft[i]' * thetam_fft[i]
-         numerator_fft[i] = c1 + c2;
-         denominator_fft[i] = thetaf_fft[i]' * thetam_fft[i]
+        c1 = -2 * f_fft[i]'
+        c1 = c1 * m_fft[i]
+        c2 = thetaf_fft[i]' * m2_fft[i]
+        c1 = c1 + c2
+        c2 = f2_fft[i]' * thetam_fft[i]
+        numerator_fft[i] = c1 + c2
+        denominator_fft[i] = thetaf_fft[i]' * thetam_fft[i]
     end
     return nothing
 end
diff --git a/test/runtests.jl b/test/runtests.jl
index c6b816b..2fcde5d 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -3,12 +3,18 @@ using CUDA, RegisterCore, CenterIndexedArrays
 import RegisterMismatchCuda
 RM = RegisterMismatchCuda
 
-accuracy = 1e-5
+accuracy = 1.0e-5
+
+if !CUDA.functional()
+    @warn "No functional CUDA device found; skipping tests."
+    exit(0)
+end
 
 devlist = CuDevice[]
-map(dev->capability(dev) >= v"2.0" ? push!(devlist,dev) : nothing, devices())
+map(dev -> capability(dev) >= v"2.0" ? push!(devlist, dev) : nothing, devices())
 if isempty(devlist)
-    error("There is no CUDA device having capability bigger than version 2.0.")
+    @warn "No CUDA device with capability >= 2.0 found; skipping tests."
+    exit(0)
 end
 
 @testset "kernel_conv_components" begin
@@ -45,16 +51,16 @@ end
 end
 
 @testset "kernel_calcNumDenom" begin
-    function run_cpu(f::Function, f_fft, f2_fft, thetaf_fft, m_fft,m2_fft, thetam_fft)
+    function run_cpu(f::Function, f_fft, f2_fft, thetaf_fft, m_fft, m2_fft, thetam_fft)
         numerator_fft = similar(f_fft)
         denominator_fft = similar(f_fft)
         for i in CartesianIndices(f_fft)
-            f(i, f_fft, f2_fft, thetaf_fft, m_fft,m2_fft, thetam_fft, numerator_fft, denominator_fft)
+            f(i, f_fft, f2_fft, thetaf_fft, m_fft, m2_fft, thetam_fft, numerator_fft, denominator_fft)
         end
         numerator_fft, denominator_fft
     end
 
-    function run_gpu(kernel::Function, f_fft, f2_fft, thetaf_fft, m_fft,m2_fft, thetam_fft)
+    function run_gpu(kernel::Function, f_fft, f2_fft, thetaf_fft, m_fft, m2_fft, thetam_fft)
         d_f = CuArray(f_fft)
         d_f2 = CuArray(f2_fft)
         d_tf = CuArray(thetaf_fft)
@@ -73,7 +79,7 @@ end
         Array(d_numerator), Array(d_denominator)
     end
 
-    function calcNumDenom_intensity!(i, f_fft, f2_fft, thetaf_fft, m_fft,m2_fft, thetam_fft, numerator_fft, denominator_fft)
+    function calcNumDenom_intensity!(i, f_fft, f2_fft, thetaf_fft, m_fft, m2_fft, thetam_fft, numerator_fft, denominator_fft)
         if checkbounds(Bool, f_fft, i)
             c1 = f2_fft[i]' * thetam_fft[i]
             c2 = thetaf_fft[i]' * m2_fft[i]
@@ -85,34 +91,34 @@ end
         end
     end
 
-    function calcNumDenom_pixels!(i, f_fft, f2_fft, thetaf_fft, m_fft,m2_fft, thetam_fft, numerator_fft, denominator_fft)
+    function calcNumDenom_pixels!(i, f_fft, f2_fft, thetaf_fft, m_fft, m2_fft, thetam_fft, numerator_fft, denominator_fft)
         if checkbounds(Bool, f_fft, i)
             c1 = -2 * f_fft[i]'
             c1 = c1 * m_fft[i]
             c2 = thetaf_fft[i]' * m2_fft[i]
             c1 = c1 + c2
             c2 = f2_fft[i]' * thetam_fft[i]
-            numerator_fft[i] = c1 + c2;
+            numerator_fft[i] = c1 + c2
             denominator_fft[i] = thetaf_fft[i]' * thetam_fft[i]
         end
     end
 
     for dev in devlist
         device!(dev) do
-            A = rand(Complex{Float64},3,3)
-            B = rand(Complex{Float64},3,3)
-            C = rand(Complex{Float64},3,3)
-            D = rand(Complex{Float64},3,3)
-            E = rand(Complex{Float64},3,3)
-            F = rand(Complex{Float64},3,3)
-            num_cpu, denom_cpu = run_cpu(calcNumDenom_intensity!,A,B,C,D,E,F)
-            num_gpu, denom_gpu = run_gpu(RM.kernel_calcNumDenom_intensity!,A,B,C,D,E,F)
-            @test ≈(num_cpu, num_gpu, atol=accuracy)
-            @test ≈(denom_cpu, denom_gpu, atol=accuracy)
-            num_cpu, denom_cpu = run_cpu(calcNumDenom_pixels!,A,B,C,D,E,F)
-            num_gpu, denom_gpu = run_gpu(RM.kernel_calcNumDenom_pixels!,A,B,C,D,E,F)
-            @test ≈(num_cpu, num_gpu, atol=accuracy)
-            @test ≈(denom_cpu, denom_gpu, atol=accuracy)
+            A = rand(Complex{Float64}, 3, 3)
+            B = rand(Complex{Float64}, 3, 3)
+            C = rand(Complex{Float64}, 3, 3)
+            D = rand(Complex{Float64}, 3, 3)
+            E = rand(Complex{Float64}, 3, 3)
+            F = rand(Complex{Float64}, 3, 3)
+            num_cpu, denom_cpu = run_cpu(calcNumDenom_intensity!, A, B, C, D, E, F)
+            num_gpu, denom_gpu = run_gpu(RM.kernel_calcNumDenom_intensity!, A, B, C, D, E, F)
+            @test ≈(num_cpu, num_gpu, atol = accuracy)
+            @test ≈(denom_cpu, denom_gpu, atol = accuracy)
+            num_cpu, denom_cpu = run_cpu(calcNumDenom_pixels!, A, B, C, D, E, F)
+            num_gpu, denom_gpu = run_gpu(RM.kernel_calcNumDenom_pixels!, A, B, C, D, E, F)
+            @test ≈(num_cpu, num_gpu, atol = accuracy)
+            @test ≈(denom_cpu, denom_gpu, atol = accuracy)
         end
     end
 end
@@ -120,18 +126,18 @@ end
 @testset "Mismatch 2D test" begin
     for dev in devlist
         device!(dev) do
-            A = zeros(5,5)
-            A[3,3] = 3
-            B = zeros(5,5)
-            B[4,5] = 3
-            maxshift = (2,3)
+            A = zeros(5, 5)
+            A[3, 3] = 3
+            B = zeros(5, 5)
+            B[4, 5] = 3
+            maxshift = (2, 3)
             mm = RM.mismatch(A, A, maxshift)
             num, denom = RM.separate(mm)
             RM.truncatenoise!(mm, 0.01)
-            @test indmin_mismatch(mm, 0.01) == CartesianIndex((0,0))
+            @test argmin_mismatch(mm, 0.01) == CartesianIndex((0, 0))
             mm = RM.mismatch(A, B, maxshift)
             RM.truncatenoise!(mm, 0.01)
-            @test indmin_mismatch(mm, 0.01) == CartesianIndex((1,2))
+            @test argmin_mismatch(mm, 0.01) == CartesianIndex((1, 2))
 
             # Testing on more complex objects
             # img = rand(map(UInt8,0:255), 256, 256)
@@ -139,13 +145,13 @@ end
             # fixed = map(Float32, img[rng...])
             # moving = map(Float32, img[rng[1].+13, rng[2].-8])
             # maxshift = (20, 20)
-            img = rand(map(UInt8,0:255), 60, 60)
+            img = rand(map(UInt8, 0:255), 60, 60)
             rng = Any[1:40, 10:50]
             fixed = map(Float32, img[rng...])
-            moving = map(Float32, img[rng[1].+6, rng[2].-8])
+            moving = map(Float32, img[rng[1] .+ 6, rng[2] .- 8])
             maxshift = (10, 10)
             mm = RM.mismatch(fixed, moving, maxshift)
-            @test indmin_mismatch(mm, 0.01) == CartesianIndex((-6,8))
+            @test argmin_mismatch(mm, 0.01) == CartesianIndex((-6, 8))
         end
     end
 end
@@ -154,19 +160,19 @@ end
     for dev in devlist
         device!(dev) do
             # Test 3d similarly
-            Apad = parent(padarray(reshape(1:80*6, 10, 8, 6), Fill(0, (4,3,2))))
-            Bpad = parent(padarray(rand(1:80*6, 10, 8, 6), Fill(0, (4,3,2))))
-            mm = RM.mismatch(Apad, Bpad, (4,3,2))
+            Apad = parent(padarray(reshape(1:(80 * 6), 10, 8, 6), Fill(0, (4, 3, 2))))
+            Bpad = parent(padarray(rand(1:(80 * 6), 10, 8, 6), Fill(0, (4, 3, 2))))
+            mm = RM.mismatch(Apad, Bpad, (4, 3, 2))
             num, denom = RegisterCore.separate(mm)
             mmref = CenterIndexedArray{Float64}(undef, 9, 7, 5)
-            for k=-2:2, j = -3:3, i = -4:4
-                Bshift = circshift(Bpad,-[i,j,k])
-                df = Apad-Bshift
-                mmref[i,j,k] = sum(df.^2)
+            for k in -2:2, j in -3:3, i in -4:4
+                Bshift = circshift(Bpad, -[i, j, k])
+                df = Apad - Bshift
+                mmref[i, j, k] = sum(df .^ 2)
             end
-            nrm = sum(Apad.^2)+sum(Bpad.^2)
-            @test ≈(mmref.data, num.data, atol=accuracy*nrm)
-            @test ≈(fill(nrm,size(denom)), denom.data, atol=accuracy*nrm)
+            nrm = sum(Apad .^ 2) + sum(Bpad .^ 2)
+            @test ≈(mmref.data, num.data, atol = accuracy * nrm)
+            @test ≈(fill(nrm, size(denom)), denom.data, atol = accuracy * nrm)
         end
     end
 end
@@ -174,31 +180,31 @@ end
 @testset "Mismatch apertures 2D test" begin
     for dev in devlist
         device!(dev) do
-            for imsz in ((15,16), (14,17))
-                for maxshift in ((4,3), (3,2))
-                    for gridsize in ((3,3), (2,1), (2,3), (2,2), (1,3))
+            for imsz in ((15, 16), (14, 17))
+                for maxshift in ((4, 3), (3, 2))
+                    for gridsize in ((3, 3), (2, 1), (2, 3), (2, 2), (1, 3))
                         Apad = parent(padarray(reshape(1:prod(imsz), imsz[1], imsz[2]), Fill(0, maxshift, maxshift)))
                         Bpad = parent(padarray(rand(1:20, imsz[1], imsz[2]), Fill(0, maxshift, maxshift)))
                         # intensity normalization
-                        mms = RM.mismatch_apertures(Float64, Apad, Bpad, gridsize, maxshift, normalization=:intensity, display=false)
+                        mms = RM.mismatch_apertures(Float64, Apad, Bpad, gridsize, maxshift, normalization = :intensity, display = false)
                         nums, denoms = RegisterCore.separate(mms)
                         num = sum(nums)
                         denom = sum(denoms)
                         mm = CenterIndexedArray{Float64}(undef, (2 .* maxshift .+ 1)...)
-                        for j = -maxshift[2]:maxshift[2], i = -maxshift[1]:maxshift[1]
-                            Bshift = circshift(Bpad,-[i,j])
-                            df = Apad-Bshift
-                            mm[i,j] = sum(df.^2)
+                        for j in -maxshift[2]:maxshift[2], i in -maxshift[1]:maxshift[1]
+                            Bshift = circshift(Bpad, -[i, j])
+                            df = Apad - Bshift
+                            mm[i, j] = sum(df .^ 2)
                         end
-                        nrm = sum(Apad.^2)+sum(Bpad.^2)
-                        @test ≈(mm.data, num.data, atol=accuracy*nrm)
-                        @test ≈(fill(nrm,size(denom)), denom.data, atol=accuracy*nrm)
+                        nrm = sum(Apad .^ 2) + sum(Bpad .^ 2)
+                        @test ≈(mm.data, num.data, atol = accuracy * nrm)
+                        @test ≈(fill(nrm, size(denom)), denom.data, atol = accuracy * nrm)
                         # pixel normalization
-                        mms = RM.mismatch_apertures(Float64, Apad, Bpad, gridsize, maxshift, normalization=:pixels, display=false)
+                        mms = RM.mismatch_apertures(Float64, Apad, Bpad, gridsize, maxshift, normalization = :pixels, display = false)
                         _, denoms = RegisterCore.separate(mms)
                         denom = sum(denoms)
-                        n = Vector{Int}[size(Apad,i).-abs.(-maxshift[i]:maxshift[i]) for i = 1:2]
-                        @test ≈(denom.data, n[1].*n[2]', atol=accuracy*maximum(denom))
+                        n = Vector{Int}[size(Apad, i) .- abs.(-maxshift[i]:maxshift[i]) for i in 1:2]
+                        @test ≈(denom.data, n[1] .* n[2]', atol = accuracy * maximum(denom))
                     end
                 end
             end
@@ -210,21 +216,21 @@ end
     for dev in devlist
         device!(dev) do
             # Test 3d similarly
-            Apad = parent(padarray(reshape(1:80*6, 10, 8, 6), Fill(0, (4,3,2))))
-            Bpad = parent(padarray(rand(1:80*6, 10, 8, 6), Fill(0, (4,3,2))))
-            mms = RM.mismatch_apertures(Apad, Bpad, (2,3,2), (4,3,2), normalization=:intensity, display=false)
+            Apad = parent(padarray(reshape(1:(80 * 6), 10, 8, 6), Fill(0, (4, 3, 2))))
+            Bpad = parent(padarray(rand(1:(80 * 6), 10, 8, 6), Fill(0, (4, 3, 2))))
+            mms = RM.mismatch_apertures(Apad, Bpad, (2, 3, 2), (4, 3, 2), normalization = :intensity, display = false)
             nums, denoms = RegisterCore.separate(mms)
             num = sum(nums)
             denom = sum(denoms)
             mmref = CenterIndexedArray{Float64}(undef, 9, 7, 5)
-            for k=-2:2, j = -3:3, i = -4:4
-                Bshift = circshift(Bpad,-[i,j,k])
-                df = Apad-Bshift
-                mmref[i,j,k] = sum(df.^2)
+            for k in -2:2, j in -3:3, i in -4:4
+                Bshift = circshift(Bpad, -[i, j, k])
+                df = Apad - Bshift
+                mmref[i, j, k] = sum(df .^ 2)
             end
-            nrm = sum(Apad.^2)+sum(Bpad.^2)
-            @test ≈(mmref.data, num.data, atol=accuracy*nrm)
-            @test ≈(fill(sum(Apad.^2)+sum(Bpad.^2),size(denom)), denom.data, atol=accuracy*nrm)
+            nrm = sum(Apad .^ 2) + sum(Bpad .^ 2)
+            @test ≈(mmref.data, num.data, atol = accuracy * nrm)
+            @test ≈(fill(sum(Apad .^ 2) + sum(Bpad .^ 2), size(denom)), denom.data, atol = accuracy * nrm)
         end
     end
 end