Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ uuid = "8646bddf-ab1c-4fa7-9c51-ba187d647618"
version = "0.2.8"

[deps]
DataFlowTasks = "d1549cb6-e9f4-42f8-98cc-ffc8d067ff5b"
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
Expand Down
51 changes: 51 additions & 0 deletions src/hmatrix.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
using DataFlowTasks

"""
mutable struct HMatrix{R,T} <: AbstractMatrix{T}

Expand Down Expand Up @@ -156,6 +158,7 @@ function _show(io, hmat, allow_empty = false)
@printf(io, "\n\t min number of elements per leaf: %i", minimum(points_per_leaf))
@printf(io, "\n\t max number of elements per leaf: %i", maximum(points_per_leaf))
depth_per_leaf = map(depth, leaves_)
# TODO: the depth of tree is always zero because all leafs are roots
@printf(io, "\n\t depth of tree: %i", maximum(depth_per_leaf))
@printf(io, "\n\t compression ratio: %f\n", compression_ratio(hmat))
return io
Expand Down Expand Up @@ -467,6 +470,54 @@ function compress!(H::HMatrix, comp)
return H
end

"""
ancestors(H::HMatrix)

Return all ancestors of `H`.
"""
function ancestors(H::HMatrix)
ancestors = []
W = H
while W !== parentnode(W)
W = parentnode(W)
push!(ancestors, W)
end
return ancestors
end

"""
issubmatrix(A::HMatrix, B::HMatrix)

Return `true` if B is a submatrix of A.
"""
function issubmatrix(A::HMatrix, B::HMatrix)
for ancB in ancestors(B)
if A === ancB
return true
end
end
return false
end

function DataFlowTasks.memory_overlap(A::HMatrix, B::HMatrix)
# TODO: compare leaves in more efficient way.
if A === B
return true
elseif issubmatrix(A, B) || issubmatrix(B, A)
return true
end
chdA = leaves(A)
chdB = leaves(B)
for i in eachindex(chdA)
for j in eachindex(chdB)
if data(chdA[i]) === data(chdB[j])
return true
Comment on lines +502 to +514
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think for most use cases, we can simply:

  1. Check if A and B have a common root
  2. Check the intersection of the row and column ranges of A and B

When they don't have a common root, we can (probably?) assume they don't overlap?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If they don't have a common root it is not nesseccery they don't overlap. For example, we can have this situation:
M = Matrix(...)
H = HMatrix(M, ...)
H1 = HMatrix(M, ...)
Then even we check intersections of H and H1 we need to compare their data anyway. But we can assume that the H and H1 are built correctly(without using one data matrix for more than one HMatrix) and then remove the loops altogether.

end
end
end
return false
end

############################################################################################
# Recipes
############################################################################################
Expand Down
65 changes: 60 additions & 5 deletions src/lu.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
using DataFlowTasks

const NOPIVOT = VERSION >= v"1.7" ? NoPivot : Val{false}

const HLU = LU{<:Any,<:HMatrix}
Expand Down Expand Up @@ -30,9 +32,16 @@ function LinearAlgebra.lu!(M::HMatrix, compressor; threads = use_threads())
nt = Threads.nthreads()
chn = Channel{ACABuffer{T}}(nt)
foreach(i -> put!(chn, ACABuffer(T)), 1:nt)
_lu!(M, compressor, threads, chn)
# wrap the result in the LU structure
return LU(M, Int[], 0)
if (threads)
_lu_threads!(M, compressor, chn)
# wrap the result in the LU structure
d = @dspawn LU(@R(M), Int[], 0) label = "result"
return fetch(d)
else
_lu!(M, compressor, chn)
# wrap the result in the LU structure
return LU(M, Int[], 0)
end
end

"""
Expand All @@ -59,7 +68,7 @@ Hierarchical LU factorization. See [`lu!`](@ref) for the available options.
"""
LinearAlgebra.lu(M::HMatrix, args...; kwargs...) = lu!(deepcopy(M), args...; kwargs...)

function _lu!(M::HMatrix, compressor, threads, bufs = nothing)
function _lu!(M::HMatrix, compressor, bufs = nothing)
if isleaf(M)
d = data(M)
@assert d isa Matrix
Expand All @@ -69,7 +78,7 @@ function _lu!(M::HMatrix, compressor, threads, bufs = nothing)
chdM = children(M)
m, n = size(chdM)
for i in 1:m
_lu!(chdM[i, i], compressor, threads, bufs)
_lu!(chdM[i, i], compressor, bufs)
for j in (i+1):n
ldiv!(UnitLowerTriangular(chdM[i, i]), chdM[i, j], compressor, bufs)
rdiv!(chdM[j, i], UpperTriangular(chdM[i, i]), compressor, bufs)
Expand All @@ -84,6 +93,52 @@ function _lu!(M::HMatrix, compressor, threads, bufs = nothing)
return M
end

function _lu_threads!(M::HMatrix, compressor, bufs = nothing, level = 0, parent = (0, 0))
if isleaf(M)
@dspawn begin
@RW(M)
d = data(M)
@assert d isa Matrix
lu!(d, NOPIVOT())
end label = "lu($(parent[1]),$(parent[2]))\nlevel=$(level)"
else
@assert !hasdata(M)
chdM = children(M)
m, n = size(chdM)
for i in 1:m
_lu_threads!(chdM[i, i], compressor, bufs, level + 1, (i, i))
for j in (i+1):n
@dspawn ldiv!(
UnitLowerTriangular(@R(chdM[i, i])),
@RW(chdM[i, j]),
compressor,
bufs,
) label = "ldiv($i,$j)\nlevel=$(level+1)"
@dspawn rdiv!(
@RW(chdM[j, i]),
UpperTriangular(@R(chdM[i, i])),
compressor,
bufs,
) label = "rdiv($j,$i)\nlevel=$(level+1)"
end
for j in (i+1):m
for k in (i+1):n
@dspawn hmul!(
@RW(chdM[j, k]),
@R(chdM[j, i]),
@R(chdM[i, k]),
-1,
1,
compressor,
bufs,
) label = "hmul($j,$k)\nlevel=$(level+1)"
end
end
end
end
return M
end
Comment on lines +96 to +140
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems fine as a first implementation, but this level of granularity is not going to be sufficient for a good parallel scaling (I think). You probably need to spawn task a finer scale inside the hmul! and ldiv!/rdiv! functions...

We should probably start by looking at the TaskGraph for the current implementation. Could you post one e.g. on the PR?

Copy link
Copy Markdown
Collaborator Author

@AZEY4 AZEY4 Jun 24, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, you are right. I am planning to use higher level of granularity and to spawn tasks inside ldiv!, rdiv! and hmul!.


function LinearAlgebra.ldiv!(A::HLU, y::AbstractVector; global_index = true)
p = A.factors # underlying data
ctree = coltree(p)
Expand Down
2 changes: 1 addition & 1 deletion src/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ function leaves(tree)
end

"""
leaves(tree)
nodes(tree)

Return a vector containing all the nodes of `tree`.
"""
Expand Down
3 changes: 3 additions & 0 deletions test/Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
[deps]
CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
ComputationalResources = "ed09eef8-17a6-5b46-8889-db040fac31e3"
DataFlowTasks = "d1549cb6-e9f4-42f8-98cc-ffc8d067ff5b"
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Expand Down
30 changes: 30 additions & 0 deletions test/hmatrix_test.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ using StaticArrays
using HMatrices
using LinearAlgebra
using SparseArrays
using DataFlowTasks

include(joinpath(HMatrices.PROJECT_ROOT, "test", "testutils.jl"))

Expand Down Expand Up @@ -58,3 +59,32 @@ end
Hnew = axpy!(true, S, deepcopy(H))
@test Matrix(Hnew) == (H_full + Matrix(S))
end

@testset "Memory overlap" begin
m = 1000
T = Float64

X = points_on_sphere(m)
Y = X
K = laplace_matrix(X, X)

X1 = points_on_sphere(m)
Y1 = X1
K1 = laplace_matrix(X1, X1)

splitter = CardinalitySplitter(; nmax = 50)
Xclt = ClusterTree(X, splitter)
Yclt = ClusterTree(Y, splitter)
X1clt = ClusterTree(X1, splitter)
Y1clt = ClusterTree(Y1, splitter)
adm = StrongAdmissibilityStd(3)
comp = PartialACA(; atol = 1e-10)

H = assemble_hmatrix(K, Xclt, Yclt; adm, comp, threads = false, distributed = false)
H1 = assemble_hmatrix(K1, X1clt, Y1clt; adm, comp, threads = false, distributed = false)

# Test memory overlap function
@test DataFlowTasks.memory_overlap(H, H) == true
@test DataFlowTasks.memory_overlap(H, H1) == false
@test DataFlowTasks.memory_overlap(H, H.children[1]) == true
end
37 changes: 37 additions & 0 deletions test/lu_plots/create_lu_plots.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
@echo off

:: First argument: path to HMatrices project
:: Second argument: path where to store plots

if "%1"=="" (
set JULIA_PROJECT_PATH=%~dp0\..\..
) else (
set JULIA_PROJECT_PATH=%1
)

if "%2"=="" (
set PLOTS_PATH=%~dp0
) else (
set PLOTS_PATH=%2
)

:: Path to the Julia project environment
set JULIA_ENVIROMENT_PATH=%JULIA_PROJECT_PATH%\test

:: Paths to the scripts
set RUN_LU_PATH=%JULIA_PROJECT_PATH%\test\lu_plots\run_lu.jl
set CREATE_PLOTS_PATH=%JULIA_PROJECT_PATH%\test\lu_plots\create_plots.jl

:: Space-separated list of thread counts to use
set THREAD_COUNTS=1 2 4 6 8

:: Loop over each thread count
for %%t in (%THREAD_COUNTS%) do (
echo Running LU with %%t threads...
set JULIA_NUM_THREADS=%%t
julia --project=%JULIA_ENVIROMENT_PATH% %RUN_LU_PATH% %PLOTS_PATH%
)

:: Create the plots
echo Creating plots...
julia --project=%JULIA_ENVIROMENT_PATH% %CREATE_PLOTS_PATH% %PLOTS_PATH%
37 changes: 37 additions & 0 deletions test/lu_plots/create_lu_plots.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/bin/bash

# First argument: path to HMatrices project
# Second argument: path where to store plots

if [ -z "$1" ]; then
JULIA_PROJECT_PATH="$(dirname "$(realpath "$0")")/../.."
else
JULIA_PROJECT_PATH="$1"
fi

if [ -z "$2" ]; then
PLOTS_PATH="$(dirname "$(realpath "$0")")"
else
PLOTS_PATH="$2"
fi

# Path to the Julia project environment
JULIA_ENVIRONMENT_PATH="$JULIA_PROJECT_PATH/test"

# Paths to the scripts
RUN_LU_PATH="$JULIA_PROJECT_PATH/test/lu_plots/run_lu.jl"
CREATE_PLOTS_PATH="$JULIA_PROJECT_PATH/test/lu_plots/create_plots.jl"

# Space-separated list of thread counts to use
THREAD_COUNTS="1 2 4 6 8"

# Loop over each thread count
for t in $THREAD_COUNTS; do
echo "Running LU with $t threads..."
export JULIA_NUM_THREADS=$t
julia --project="$JULIA_ENVIRONMENT_PATH" "$RUN_LU_PATH" "$PLOTS_PATH"
done

# Create the plots
echo "Creating plots..."
julia --project="$JULIA_ENVIRONMENT_PATH" "$CREATE_PLOTS_PATH" "$PLOTS_PATH"
58 changes: 58 additions & 0 deletions test/lu_plots/create_plots.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
using HMatrices
using LinearAlgebra
using StaticArrays
using JLD2

using CairoMakie

const FILE_PATH = joinpath(ARGS[1], "lu_results.jld2")
const PLOT_PATH = joinpath(ARGS[1], "nthreads_speedup_plot.svg")

results = load(FILE_PATH)["results"]
signequal_result = results["1"]
info = Dict{Int,Dict{Int,Float64}}()

for (nthreads, result) in results
for (size, time) in result
if !haskey(info, size)
info[size] = Dict{Int,Float64}()
end
speedup = signequal_result[size] / time
info[size][parse(Int64, nthreads)] = speedup
end
end

f = Figure()
ax = Axis(
f[1, 1];
title = "Speedup of HLU depending on number of threads",
xlabel = "Number of threads",
ylabel = "Speedup",
)

for (size, speedup_info) in info
sorted_info = sort(collect(pairs(speedup_info)))
nthreads = [x[1] for x in sorted_info]
speedups = [x[2] for x in sorted_info]
lines!(ax, nthreads, speedups; label = "$size")
end

perfect_speedup = collect(keys(info[minimum(keys(info))])) # Assuming the smallest size has all thread counts
lines!(
ax,
perfect_speedup,
perfect_speedup;
linestyle = :dash,
linewidth = 2,
color = :red,
label = "Perfect speedup",
)

ax.yticks = (minimum(perfect_speedup):1:maximum(perfect_speedup))
ax.xticks = (minimum(perfect_speedup):1:maximum(perfect_speedup))

f[1, 2] = Legend(f, ax, "Size"; framevisible = false)

save(PLOT_PATH, f)

f
Loading