-
Notifications
You must be signed in to change notification settings - Fork 7
Implementation of LU decomposition using data flow tasks #65
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
e7daf13
186e0de
88f38e2
78e26b9
4bfb148
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,3 +1,5 @@ | ||
| using DataFlowTasks | ||
|
|
||
| const NOPIVOT = VERSION >= v"1.7" ? NoPivot : Val{false} | ||
|
|
||
| const HLU = LU{<:Any,<:HMatrix} | ||
|
|
@@ -30,9 +32,16 @@ function LinearAlgebra.lu!(M::HMatrix, compressor; threads = use_threads()) | |
| nt = Threads.nthreads() | ||
| chn = Channel{ACABuffer{T}}(nt) | ||
| foreach(i -> put!(chn, ACABuffer(T)), 1:nt) | ||
| _lu!(M, compressor, threads, chn) | ||
| # wrap the result in the LU structure | ||
| return LU(M, Int[], 0) | ||
| if (threads) | ||
| _lu_threads!(M, compressor, chn) | ||
| # wrap the result in the LU structure | ||
| d = @dspawn LU(@R(M), Int[], 0) label = "result" | ||
| return fetch(d) | ||
| else | ||
| _lu!(M, compressor, chn) | ||
| # wrap the result in the LU structure | ||
| return LU(M, Int[], 0) | ||
| end | ||
| end | ||
|
|
||
| """ | ||
|
|
@@ -59,7 +68,7 @@ Hierarchical LU factorization. See [`lu!`](@ref) for the available options. | |
| """ | ||
| LinearAlgebra.lu(M::HMatrix, args...; kwargs...) = lu!(deepcopy(M), args...; kwargs...) | ||
|
|
||
| function _lu!(M::HMatrix, compressor, threads, bufs = nothing) | ||
| function _lu!(M::HMatrix, compressor, bufs = nothing) | ||
| if isleaf(M) | ||
| d = data(M) | ||
| @assert d isa Matrix | ||
|
|
@@ -69,7 +78,7 @@ function _lu!(M::HMatrix, compressor, threads, bufs = nothing) | |
| chdM = children(M) | ||
| m, n = size(chdM) | ||
| for i in 1:m | ||
| _lu!(chdM[i, i], compressor, threads, bufs) | ||
| _lu!(chdM[i, i], compressor, bufs) | ||
| for j in (i+1):n | ||
| ldiv!(UnitLowerTriangular(chdM[i, i]), chdM[i, j], compressor, bufs) | ||
| rdiv!(chdM[j, i], UpperTriangular(chdM[i, i]), compressor, bufs) | ||
|
|
@@ -84,6 +93,52 @@ function _lu!(M::HMatrix, compressor, threads, bufs = nothing) | |
| return M | ||
| end | ||
|
|
||
| function _lu_threads!(M::HMatrix, compressor, bufs = nothing, level = 0, parent = (0, 0)) | ||
| if isleaf(M) | ||
| @dspawn begin | ||
| @RW(M) | ||
| d = data(M) | ||
| @assert d isa Matrix | ||
| lu!(d, NOPIVOT()) | ||
| end label = "lu($(parent[1]),$(parent[2]))\nlevel=$(level)" | ||
| else | ||
| @assert !hasdata(M) | ||
| chdM = children(M) | ||
| m, n = size(chdM) | ||
| for i in 1:m | ||
| _lu_threads!(chdM[i, i], compressor, bufs, level + 1, (i, i)) | ||
| for j in (i+1):n | ||
| @dspawn ldiv!( | ||
| UnitLowerTriangular(@R(chdM[i, i])), | ||
| @RW(chdM[i, j]), | ||
| compressor, | ||
| bufs, | ||
| ) label = "ldiv($i,$j)\nlevel=$(level+1)" | ||
| @dspawn rdiv!( | ||
| @RW(chdM[j, i]), | ||
| UpperTriangular(@R(chdM[i, i])), | ||
| compressor, | ||
| bufs, | ||
| ) label = "rdiv($j,$i)\nlevel=$(level+1)" | ||
| end | ||
| for j in (i+1):m | ||
| for k in (i+1):n | ||
| @dspawn hmul!( | ||
| @RW(chdM[j, k]), | ||
| @R(chdM[j, i]), | ||
| @R(chdM[i, k]), | ||
| -1, | ||
| 1, | ||
| compressor, | ||
| bufs, | ||
| ) label = "hmul($j,$k)\nlevel=$(level+1)" | ||
| end | ||
| end | ||
| end | ||
| end | ||
| return M | ||
| end | ||
|
Comment on lines
+96
to
+140
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Seems fine as a first implementation, but this level of granularity is not going to be sufficient for a good parallel scaling (I think). You probably need to spawn task a finer scale inside the We should probably start by looking at the
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, you are right. I am planning to use higher level of granularity and to spawn tasks inside ldiv!, rdiv! and hmul!. |
||
|
|
||
| function LinearAlgebra.ldiv!(A::HLU, y::AbstractVector; global_index = true) | ||
| p = A.factors # underlying data | ||
| ctree = coltree(p) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,37 @@ | ||
| @echo off | ||
|
|
||
| :: First argument: path to HMatrices project | ||
| :: Second argument: path where to store plots | ||
|
|
||
| if "%1"=="" ( | ||
| set JULIA_PROJECT_PATH=%~dp0\..\.. | ||
| ) else ( | ||
| set JULIA_PROJECT_PATH=%1 | ||
| ) | ||
|
|
||
| if "%2"=="" ( | ||
| set PLOTS_PATH=%~dp0 | ||
| ) else ( | ||
| set PLOTS_PATH=%2 | ||
| ) | ||
|
|
||
| :: Path to the Julia project environment | ||
| set JULIA_ENVIROMENT_PATH=%JULIA_PROJECT_PATH%\test | ||
|
|
||
| :: Paths to the scripts | ||
| set RUN_LU_PATH=%JULIA_PROJECT_PATH%\test\lu_plots\run_lu.jl | ||
| set CREATE_PLOTS_PATH=%JULIA_PROJECT_PATH%\test\lu_plots\create_plots.jl | ||
|
|
||
| :: Space-separated list of thread counts to use | ||
| set THREAD_COUNTS=1 2 4 6 8 | ||
|
|
||
| :: Loop over each thread count | ||
| for %%t in (%THREAD_COUNTS%) do ( | ||
| echo Running LU with %%t threads... | ||
| set JULIA_NUM_THREADS=%%t | ||
| julia --project=%JULIA_ENVIROMENT_PATH% %RUN_LU_PATH% %PLOTS_PATH% | ||
| ) | ||
|
|
||
| :: Create the plots | ||
| echo Creating plots... | ||
| julia --project=%JULIA_ENVIROMENT_PATH% %CREATE_PLOTS_PATH% %PLOTS_PATH% |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,37 @@ | ||
| #!/bin/bash | ||
|
|
||
| # First argument: path to HMatrices project | ||
| # Second argument: path where to store plots | ||
|
|
||
| if [ -z "$1" ]; then | ||
| JULIA_PROJECT_PATH="$(dirname "$(realpath "$0")")/../.." | ||
| else | ||
| JULIA_PROJECT_PATH="$1" | ||
| fi | ||
|
|
||
| if [ -z "$2" ]; then | ||
| PLOTS_PATH="$(dirname "$(realpath "$0")")" | ||
| else | ||
| PLOTS_PATH="$2" | ||
| fi | ||
|
|
||
| # Path to the Julia project environment | ||
| JULIA_ENVIRONMENT_PATH="$JULIA_PROJECT_PATH/test" | ||
|
|
||
| # Paths to the scripts | ||
| RUN_LU_PATH="$JULIA_PROJECT_PATH/test/lu_plots/run_lu.jl" | ||
| CREATE_PLOTS_PATH="$JULIA_PROJECT_PATH/test/lu_plots/create_plots.jl" | ||
|
|
||
| # Space-separated list of thread counts to use | ||
| THREAD_COUNTS="1 2 4 6 8" | ||
|
|
||
| # Loop over each thread count | ||
| for t in $THREAD_COUNTS; do | ||
| echo "Running LU with $t threads..." | ||
| export JULIA_NUM_THREADS=$t | ||
| julia --project="$JULIA_ENVIRONMENT_PATH" "$RUN_LU_PATH" "$PLOTS_PATH" | ||
| done | ||
|
|
||
| # Create the plots | ||
| echo "Creating plots..." | ||
| julia --project="$JULIA_ENVIRONMENT_PATH" "$CREATE_PLOTS_PATH" "$PLOTS_PATH" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,58 @@ | ||
| using HMatrices | ||
| using LinearAlgebra | ||
| using StaticArrays | ||
| using JLD2 | ||
|
|
||
| using CairoMakie | ||
|
|
||
| const FILE_PATH = joinpath(ARGS[1], "lu_results.jld2") | ||
| const PLOT_PATH = joinpath(ARGS[1], "nthreads_speedup_plot.svg") | ||
|
|
||
| results = load(FILE_PATH)["results"] | ||
| signequal_result = results["1"] | ||
| info = Dict{Int,Dict{Int,Float64}}() | ||
|
|
||
| for (nthreads, result) in results | ||
| for (size, time) in result | ||
| if !haskey(info, size) | ||
| info[size] = Dict{Int,Float64}() | ||
| end | ||
| speedup = signequal_result[size] / time | ||
| info[size][parse(Int64, nthreads)] = speedup | ||
| end | ||
| end | ||
|
|
||
| f = Figure() | ||
| ax = Axis( | ||
| f[1, 1]; | ||
| title = "Speedup of HLU depending on number of threads", | ||
| xlabel = "Number of threads", | ||
| ylabel = "Speedup", | ||
| ) | ||
|
|
||
| for (size, speedup_info) in info | ||
| sorted_info = sort(collect(pairs(speedup_info))) | ||
| nthreads = [x[1] for x in sorted_info] | ||
| speedups = [x[2] for x in sorted_info] | ||
| lines!(ax, nthreads, speedups; label = "$size") | ||
| end | ||
|
|
||
| perfect_speedup = collect(keys(info[minimum(keys(info))])) # Assuming the smallest size has all thread counts | ||
| lines!( | ||
| ax, | ||
| perfect_speedup, | ||
| perfect_speedup; | ||
| linestyle = :dash, | ||
| linewidth = 2, | ||
| color = :red, | ||
| label = "Perfect speedup", | ||
| ) | ||
|
|
||
| ax.yticks = (minimum(perfect_speedup):1:maximum(perfect_speedup)) | ||
| ax.xticks = (minimum(perfect_speedup):1:maximum(perfect_speedup)) | ||
|
|
||
| f[1, 2] = Legend(f, ax, "Size"; framevisible = false) | ||
|
|
||
| save(PLOT_PATH, f) | ||
|
|
||
| f |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think for most use cases, we can simply:
AandBhave a common rootAandBWhen they don't have a common root, we can (probably?) assume they don't overlap?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If they don't have a common root it is not nesseccery they don't overlap. For example, we can have this situation:
M = Matrix(...)
H = HMatrix(M, ...)
H1 = HMatrix(M, ...)
Then even we check intersections of H and H1 we need to compare their data anyway. But we can assume that the H and H1 are built correctly(without using one data matrix for more than one HMatrix) and then remove the loops altogether.