From 767684285fa404c602aa800236799d30615db652 Mon Sep 17 00:00:00 2001 From: maximvassiliev Date: Fri, 30 Aug 2024 18:02:00 +0100 Subject: [PATCH 01/10] move everything over --- Project.toml | 20 ++- examples/convlayer.jl | 65 ++++++++++ examples/heatequation.jl | 2 +- examples/neuralnet.jl | 45 +++++++ examples/newtonpendulum.jl | 41 ++++-- src/DualArrays.jl | 251 ++++++++++++++++++++++++++++++++----- test/runtests.jl | 124 +++++++++++++----- 7 files changed, 472 insertions(+), 76 deletions(-) create mode 100644 examples/convlayer.jl create mode 100644 examples/neuralnet.jl diff --git a/Project.toml b/Project.toml index 0375150..a78c37d 100644 --- a/Project.toml +++ b/Project.toml @@ -6,10 +6,26 @@ version = "0.1.0" [deps] ArrayLayouts = "4c555306-a7a7-4459-81d9-ec55ddd5c99a" BandedMatrices = "aae01518-5342-5314-be14-df237901396f" -DifferentialEquations = "0c46a032-eb83-5123-abaf-570d42b7fbaa" +ChainRules = "082447d4-558c-5d27-93f4-14fc19e9eca2" +ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66" FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +Lux = "b2108857-7c20-44ae-9111-449ecde12c47" +MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" -Revise = "295af30f-e4ad-537b-8983-00126c2a3abe" +Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" + +[compat] +ArrayLayouts = "1" +ForwardDiff = "0.10.36" +Lux = "0.5.62" +Plots = "1.40.5" + +[extras] +ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" +Lux = "b2108857-7c20-44ae-9111-449ecde12c47" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[targets] +test = ["Lux", "ForwardDiff", "Test"] diff --git a/examples/convlayer.jl b/examples/convlayer.jl new file mode 100644 index 0000000..2e16d64 --- /dev/null +++ b/examples/convlayer.jl @@ -0,0 +1,65 @@ +using LinearAlgebra, MLDatasets, Plots, DualArrays, Random, FillArrays + +#GOAL: Implement and differentiate a convolutional neural network layer +function convlayer(img, ker, xstride = 1, ystride = 1) + n, m = size(ker) + t = eltype(ker) + + n2, m2 = size(img) + n3, m3 = div(n2-n+1,xstride), div(m2-m+1,ystride) + fmap = zeros(promote_type(eltype(img), t), n3, m3) + + #Apply kernel to section of image + for i= 1:xstride:n3,j = 1:ystride:m3 + ft = img[i:i+n-1,j:j+m-1] .* ker + fmap[i,j] = sum(ft) + end + + fmap +end + +function softmax(x) + s = sum(exp.(x)) + exp.(x) / s +end + +function dense_layer(f, W, b, x) + f(W*x + b) +end + +function cross_entropy(x, y) + -sum(y .* log.(x)) +end + +function model_loss(x, y, w) + ker = reshape(w[1:9], 3, 3) + weights = reshape(w[10:6769], 10, 676) + biases = w[6770:6779] + l1 = vec(DualMatrix(convlayer(x, ker))) + l2 = dense_layer(softmax, weights, biases, l1) + target = OneElement(1, y+1, 10) + loss = cross_entropy(l2, target) + loss.value, loss.partials +end + +#TODO: Implement remaining layers upon more support for DualMatrix + +function train_model() + p = rand(6779) + epochs = 1000 + lr = 0.02 + dataset = MNIST(:train) + + for i = 1:epochs + train, test = dataset[i] + d = DualVector(p, I(6779)) + + loss, grads = model_loss(train, test, d) + println(loss) + p = p - lr * grads + end +end + +train_model() + + diff --git a/examples/heatequation.jl b/examples/heatequation.jl index e4a61b3..09b4542 100644 --- a/examples/heatequation.jl +++ b/examples/heatequation.jl @@ -9,7 +9,7 @@ # Compare performance and results. ## -using LinearAlgebra, BandedMatrices, DifferentialEquations, Plots, +using LinearAlgebra, BandedMatrices, OrdinaryDiffEqs, Plots, ForwardDiff U = 500 diff --git a/examples/neuralnet.jl b/examples/neuralnet.jl new file mode 100644 index 0000000..ff4e403 --- /dev/null +++ b/examples/neuralnet.jl @@ -0,0 +1,45 @@ +import Lux: relu +using DualArrays, LinearAlgebra, Plots, FillArrays, Test + +#GOAL: Learn exp() using simple one-layer relu neural network. + +N = 20 +d = 0.1 + +#Domain over which to learn exp +data = collect(d:d:N*d) + +function model(a, b, x) + return relu.(a .* x + b) +end + +function model_loss(w) + sum((model(w[1:N], w[(N+1):end], data) - exp.(data)) .^ 2) +end + +function gradient_descent_sparse(n, lr = 0.01) + weights = ones(2 * N) + for i = 1:n + dw = DualVector(weights, Eye{Float64}(2*N)) + grads = model_loss(dw).partials + weights -= lr * grads + end + model(weights[1:N], weights[(N + 1):end], data) +end + +function gradient_descent_dense(n, lr = 0.01) + weights = ones(2 * N) + for i = 1:n + dw = DualVector(weights, Matrix(I, 2*N, 2*N)) + grads = model_loss(dw).partials + weights -= lr * grads + end + model(weights[1:N], weights[(N + 1):end], data) +end + +@time densesol = gradient_descent_dense(500) +@time sparsesol = gradient_descent_sparse(500) +@test densesol == sparsesol +@test sparsesol ≈ exp.(data) rtol = 1e-2 + + diff --git a/examples/newtonpendulum.jl b/examples/newtonpendulum.jl index 81d727b..1d753b6 100644 --- a/examples/newtonpendulum.jl +++ b/examples/newtonpendulum.jl @@ -6,24 +6,36 @@ # via discretisation and Newton's method. ## -using LinearAlgebra, ForwardDiff, Plots, DualArrays + +using LinearAlgebra, ForwardDiff, Plots, DualArrays, FillArrays #Boundary Conditions a = 0.1 b = 0.0 #Time step, Time period and number of x for discretisation. -ts = 0.001 +ts = 0.1 + Tmax = 5.0 N = Int(Tmax/ts) - 1 #LHS of ode function f(x) n = length(x) - D = Tridiagonal([ones(Float64, n) / ts ; 0.0], [1.0; -2ones(Float64, n) / ts; 1.0], [0.0; ones(Float64, n) / ts]) + D = Tridiagonal([ones(n) / ts ; 0.0], [1.0; -2ones(n) / ts; 1.0], [0.0; ones(n) / ts]) (D * [a; x; b])[2:end-1] + sin.(x) end + +function f(u, a, b, Tmax) + h = Tmax/(length(u)-1) + [u[1] - a; + (u[1:end-2] - 2u[2:end-1] + u[3:end])/h^2 + sin.(u[2:end-1]); + u[end] - b] +end + + + #Newtons method using ForwardDiff.jl function newton_method_forwarddiff(f, x0, n) x = x0 @@ -38,18 +50,33 @@ function newton_method_dualvector(f, x0, n) x = x0 l = length(x0) for i = 1:n - ∇f = f(DualVector(x, Matrix(I, l, l))).jacobian + ∇f = f(DualVector(x, Eye(l))).jacobian + x = x - ∇f \ f(x) + end + x +end + +function newton_method_dualvector2(f, x0, n) + x = x0 + l = length(x0) + for i = 1:n + ∇f = f(DualVector(x, Eye(l)), a, b, Tmax).jacobian x = x - ∇f \ f(x) end x end #Initial guess -x0 = zeros(Float64, N) +x0 = zeros(N) #Solve and plot both solution and LHS ('deviation' from system) -@time sol = newton_method_forwarddiff(f, x0, 100) -@time sol = newton_method_dualvector(f, x0, 100) +@time sol1 = newton_method_forwarddiff(f, x0, 100); +@time sol2 = newton_method_dualvector(f, x0, 100); +@test sol1 ≈ sol2 +@time sol = newton_method_dualvector2(f, x0, 100); + +x0 = zeros(N); x0[1] = a; x0[end] = b; + plot(0:ts:Tmax, [a; sol; b]) plot!(0:ts:Tmax, [0; f(sol); 0]) diff --git a/src/DualArrays.jl b/src/DualArrays.jl index e4074d5..56d705d 100644 --- a/src/DualArrays.jl +++ b/src/DualArrays.jl @@ -1,80 +1,236 @@ module DualArrays -export DualVector +export DualVector, DualArray, DualMatrix +export dropzeros import Base: +, ==, getindex, size, broadcast, axes, broadcasted, show, sum, - vcat, convert, * -using LinearAlgebra, ArrayLayouts, BandedMatrices, FillArrays + vcat, convert, *, -, ^, /, ndims, hcat, vec, promote_rule, zero, + reshape +using LinearAlgebra, ArrayLayouts, BandedMatrices, FillArrays, ComponentArrays +import ChainRules: frule, ZeroTangent -struct Dual{T, Partials <: AbstractVector{T}} <: Real +# TODO: support non-banded +sparsevcat(As...) = vcat(convert.(BandedMatrix, As)...) + +# Should Partials necessarily be a vector? +# For example, if we index a DualMatrix, should partials retain the shape +# of the perturbations, thereby giving a matrix of partials? +struct Dual{T, Partials <: AbstractArray{T}} <: Real value::T partials::Partials end +zero(::Type{Dual{T}}) where {T} = Dual(zero(T), zeros(T,1)) +promote_rule(::Type{Dual{T}},::Type{S}) where {T,S} = Dual{promote_type(T,S)} + ==(a::Dual, b::Dual) = a.value == b.value && a.partials == b.partials +for op in (:+, :-) + @eval $op(a::Dual, k::Real) = Dual($op(a.value, k), a.partials) +end +for op in (:*, :/) + @eval $op(a::Dual, k::Real) = Dual($op(a.value, k), $op(a.partials,k)) +end + +-(x::Dual) = Dual(-1 * x.value, -1 * x.partials) + +*(x::Dual, y::Dual) = Dual(x.value*y.value, x.value*y.partials + y.value*x.partials) +/(x::Dual, y::Dual) = x*Dual(y.value, -y.partials) / y.value^2 sparse_getindex(a...) = layout_getindex(a...) sparse_getindex(D::Diagonal, k::Integer, ::Colon) = OneElement(D.diag[k], k, size(D,2)) sparse_getindex(D::Diagonal, ::Colon, j::Integer) = OneElement(D.diag[j], j, size(D,1)) """ -reprents a vector of duals given by +reprents an array of duals given by - values + jacobian * [ε_1,…,ε_n]. + values + jacobian * 𝛜. -For now the entries just return the values. +where 𝛜 is an array with dual parts corresponding to each entry of values. """ -struct DualVector{T, M <: AbstractMatrix{T}} <: AbstractVector{Dual{T}} - value::Vector{T} - jacobian::M - function DualVector(value::Vector{T},jacobian::M) where {T, M <: AbstractMatrix{T}} - if(size(jacobian)[1] != length(value)) - x,y = length(value),size(jacobian)[1] - throw(ArgumentError("vector length must match number of rows in jacobian.\n - vector length: $x \n - no. of jacobian rows: $y")) +#Helper function to check valid dimensions of DualArray +function _checkdims(val, jac) + s1, s2 = size(val), size(jac) + if length(s1) > length(s2) + return false + end + for (i, x) in enumerate(s1) + if(x != s2[i]) + return false end - new{T,M}(value,jacobian) end + return true end -function DualVector(value::AbstractVector, jacobian::AbstractMatrix) - T = promote_type(eltype(value), eltype(jacobian)) - DualVector(convert(Vector{T}, value), convert(AbstractMatrix{T}, jacobian)) +struct DualArray{T, N, M, J <: AbstractArray{T, M}} <: AbstractArray{Dual{T}, N} + value::AbstractArray{T, N} + jacobian::J + function DualArray(value::AbstractArray{T,N},jacobian::AbstractArray{T, M}) where {T, N, M} + if(!_checkdims(value,jacobian)) + s1, s2 = size(value), size(jacobian) + throw(ArgumentError("N-dimensional array must be of equal size to first N dimensions of jacobian. \n\nGot $s1, $s2")) + end + new{T,N,2*N,typeof(jacobian)}(value,jacobian) + end +end + +function DualArray(value::AbstractArray{T,N}, jacobian::AbstractArray{S,M}) where {S,T,N,M} + t = promote_type(T, S) + DualArray(convert(AbstractArray{t,N}, value), convert(AbstractArray{t, M}, jacobian)) end +#Aliases and Constructors +const DualVector{T, M} = DualArray{T, 1, 2, M} where {T, M <: AbstractMatrix{T}} +const DualMatrix{T, M} = DualArray{T, 2, 4, M} where {T, M <: AbstractArray{T, 4}} + +DualVector(x::AbstractVector, j::AbstractMatrix) = DualArray(x, j) +DualVector(x::AbstractVector, j::ComponentArray) = DualArray(x, j) +DualMatrix(x::AbstractMatrix, j::AbstractArray{T,4}) where {T} = DualArray(x, j) +function DualMatrix(x::AbstractMatrix{Dual{T}}) where {T} + val = zeros(T, size(x)...) + jac = zeros(T, size(x)..., size(x[1,1].partials)...) + n, m = size(x) + for i = 1:n, j = 1:m + val[i,j] = x[i,j].value + jac[i,j,:,:] = x[i,j].partials + end + DualMatrix(val, jac) +end function getindex(x::DualVector, y::Int) Dual(x.value[y], sparse_getindex(x.jacobian,y,:)) end - function getindex(x::DualVector, y::UnitRange) newval = x.value[y] newjac = sparse_getindex(x.jacobian,y,:) + newjac = layout_getindex(x.jacobian,y,:) DualVector(newval, newjac) end -size(x::DualVector) = length(x.value) -axes(x::DualVector) = axes(x.value) +getindex(x::DualVector, ::Colon) = x + +function getindex(x::DualArray, y::Int...) + idx = ntuple(i -> i > length(y) ? Colon() : y[i], 2*ndims(x)) + Dual(x.value[y...], x.jacobian[idx...]) +end + +for op in (:size, :axes, :ndims) + @eval $op(x::DualArray) = $op(x.value) +end + +(x::DualVector,y::DualVector) = DualVector(x.value + y.value, x.jacobian + y.jacobian) +-(x::DualVector,y::DualVector) = DualVector(x.value - y.value, x.jacobian - y.jacobian) ++(x::DualVector,y::Vector) = DualVector(x.value + y, x.jacobian) +-(x::DualVector,y::Vector) = DualVector(x.value - y, x.jacobian) ++(x::Vector,y::DualVector) = DualVector(y.value + x, y.jacobian) +-(x::Vector,y::DualVector) = DualVector(x - y.value, -y.jacobian) *(x::AbstractMatrix, y::DualVector) = DualVector(x * y.value, x * y.jacobian) +*(x::Number, y::DualVector) = DualVector(x * y.value, x * y.jacobian) +*(x::DualVector, y::Dual) = DualVector(x.value * y.value, y.value * x.jacobian + x.value * y.partials') +/(x::DualVector, y::Number) = DualVector(x.value / y, x.jacobian/ y) +/(x::DualVector, y::Dual) = x * Dual(y.value, -y.partials) / y.value^2 -broadcasted(::typeof(sin),x::DualVector) = DualVector(sin.(x.value),Diagonal(cos.(x.value))*x.jacobian) +## +# Note about 4-tensor by matrix multiplication: +# We can consider matrix-vector multiplication as follows: +# +# (Ax)[i] = A[i,:] . x +# +# This can be generalised to higher dimensions (B a 4-tensor, X a matrix): +# +# (BX)[i,j] = B[i,j,:,:] . X +# +# This is useful since if we consider 4-tensors dimension n × m × s × t +# equivalent to linear maps from n × m matrices to s × t matrices, this +# gives us multiplication analogous to applying the linear transform on +# a matrix in the same way that matrix-vector multiplication applies the +# linear transformation to the vector. This also gives us a way to +# 'expand' the Dual part of a DualMatrix. +## + +# Assumes that the total number of epsilons is equal, i.e the array of +# perturbations are reshapes of each other. +function *(x::DualMatrix, y::DualVector) + val = x.value * y.value + t = promote_type(eltype(x.value), eltype(y.value)) + jac = zeros(t, length(val), size(y.jacobian, 2)) + n, m = size(jac, 1), size(x.value, 2) + for i = 1:n + new_row = zeros(size(jac, 2)) + for j = 1:m + new_row += x.value[i,j] * y.jacobian[j,:] + y.value[j] * vec(x.jacobian[i, j, :, :]) + end + jac[i,:] = new_row + end + DualVector(val, jac) +end + +function *(x::DualMatrix, y::AbstractVector) + val = x.value * y + t = promote_type(eltype(x.value), eltype(y)) + jac = zeros(t, length(val), size(x.jacobian, 3) * size(x.jacobian, 4)) + n, m = size(jac, 1), size(x.value, 2) + for i = 1:n + new_row = zeros(size(jac, 2)) + for j = 1:m + new_row += y[j] * vec(x.jacobian[i, j, :, :]) + end + jac[i,:] = new_row + end + DualVector(val, jac) +end + +function broadcasted(f::Function,d::DualVector) + jvals = zeros(eltype(d.value), length(d.value)) + for (i, x) = enumerate(d.value) + _, df = frule((ZeroTangent(), 1),f, x) + jvals[i] = df + end + DualVector(f.(d.value), Diagonal(jvals)*d.jacobian) +end function broadcasted(::typeof(*),x::DualVector,y::DualVector) newval = x.value .* y.value newjac = x.value .* y.jacobian + y.value .* x.jacobian + DualArray(newval,newjac) +end + +function broadcasted(::typeof(*),x::AbstractArray,y::DualArray) + newval = x .* y.value + newjac = x .* y.jacobian + DualArray(newval,newjac) +end + +function broadcasted(::typeof(*),x::DualArray,y::AbstractArray) + newval = x.value .* y + newjac = y .* x.jacobian DualVector(newval,newjac) end +function broadcasted(::typeof(^),x::DualVector,n::Int) + newval = x.value .^ n + newjac = n * x.value .^ (n - 1) .* x.jacobian + DualVector(newval,newjac) +end + +broadcasted(::typeof(Base.literal_pow), ::typeof(^), x::DualVector, ::Val{n}) where n = broadcasted(^, x, n) + +### +# sum (higher-dimensional case assuming that shape of perturbations is preserved) +### function sum(x::DualVector) - n = length(x.value) Dual(sum(x.value), vec(sum(x.jacobian; dims=1))) end +function sum(x::DualArray) + #Drop redundant leading dimensions of 1 in jacobian + idx = ntuple(i -> i > ndims(x) ? Colon() : 1, 2*ndims(x)) + Dual(sum(x.value), sum(x.jacobian; dims=1:ndims(x))[idx...]) +end + _jacobian(d::Dual) = permutedims(d.partials) +_jacobian(d::DualVector) = d.jacobian _jacobian(d::DualVector, ::Int) = d.jacobian -_jacobian(x::Number, N::Int) = zeros(typeof(x), 1, N) +_jacobian(x::Number, N::Int) = Zeros{typeof(x)}(1, N) _value(d::DualVector) = d.value _value(x::Number) = x @@ -88,16 +244,45 @@ function vcat(x::Union{Dual, DualVector}...) DualVector(value,jacobian) end -_size(x::Real) = 1 -_size(x::DualVector) = size(x) +function vcat(a::Dual ,x::DualVector, b::Dual) + val = vcat(a.value, x.value, b.value) + jac = sparsevcat(_jacobian(a), x.jacobian, _jacobian(b)) + DualVector(val, jac) +end + +function vcat(a::Real ,x::DualVector, b::Real) + cols = size(x.jacobian,2) + val = vcat(a, x.value, b) + jac = sparsevcat(_jacobian(a, cols), x.jacobian, _jacobian(b, cols)) + DualVector(val, jac) +end + +### +# +# enable converting between DualVector and DualMatrix. +# the 4D jacobian can be thought of as a generalisation of the 2D jacobian: +# +# n × m 2D jacobian: J[i,j] = ∂fᵢ / ∂xⱼ +# n × m × s × t 4D jacobian: J[i, j, k, l] = ∂f[i, j] / δx[k, l] +# +### + +# reshape does not preserve shape of perturbations (since jacobian can have +# any number of columns) +function reshape(x::DualVector,dims::Vararg{Int}) + val = reshape(x.value, dims...) + jac = reshape(x.jacobian, dims..., size(x.jacobian, 2), 1) + DualMatrix(val, jac) +end -function vcat(x::Union{Real, DualVector}...) - cols = max((_size(i) for i in x)...) - val = vcat((_value(i) for i in x)...) - jac = vcat((_jacobian(i, cols) for i in x)...) +function vec(x::DualArray) + val = vec(x.value) + jac = reshape(x.jacobian, length(val), prod(size(x.jacobian)[ndims(x)+1:end])) DualVector(val, jac) end +show(io::IO,::MIME"text/plain", x::DualArray) = (print(io,x.value); print(io," + "); print(io,x.jacobian);print("𝛜")) show(io::IO,::MIME"text/plain", x::DualVector) = (print(io,x.value); print(io," + "); print(io,x.jacobian);print("𝛜")) +show(io::IO,::MIME"text/plain", x::Dual) = (print(io,x.value); print(io," + "); print(io,x.partials);print("𝛜")) end -# module DualArrays +# module DualArrays \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index d1cba3c..9f10864 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,42 +1,100 @@ -using DualArrays, Test, LinearAlgebra, ForwardDiff, BandedMatrices +using DualArrays, Test, LinearAlgebra, BandedMatrices, ForwardDiff, ComponentArrays using DualArrays: Dual +using Lux: relu @testset "DualArrays" begin - @test_throws ArgumentError DualVector([1,2],I(3)) - v = DualVector([1,2, 3], [1 2 3; 4 5 6;7 8 9]) - @test v[1] isa Dual - @test v[1] == Dual(1,[1,2,3]) - @test v[2] == Dual(2,[4,5,6]) - @test v[3] == Dual(3,[7,8,9]) - @test_throws BoundsError v[4] - @test v == DualVector([1,2, 3], [1 2 3; 4 5 6;7 8 9]) - - w = v + v - @test w == DualVector([2,4,6],[2 4 6;8 10 12;14 16 18]) - @test w.jacobian == 2v.jacobian - - @test sin.(v) isa DualVector - @test sin.(v).value == [sin(1), sin(2), sin(3)] - @test sin.(v).jacobian == Diagonal(cos.(v.value)) * v.jacobian - - x,y = v[1:2],v[2:3] - @test x == DualVector([1,2],[1 2 3;4 5 6]) - @test y == DualVector([2,3],[4 5 6;7 8 9]) - @test x .* y == DualVector([2,6],[6 9 12;26 31 36]) + v = DualArray([1.,2, 3], [1 2 3; 4 5 6;7 8 9]) + @testset "constructors" begin + @test v isa DualVector + @test eltype(v) == DualArrays.Dual{Float64} + w = DualArray(ones(2,2),ones(2,2,2,2)) + @test w isa DualMatrix + @test_throws TypeError DualArray([1,2], [1,2]) + @test_throws ArgumentError DualVector([1,2],I(3)) + end + + @testset "indexing" begin + @test v[1] isa Dual + @test v[1] == Dual(1,[1,2,3]) + @test v[2] == Dual(2,[4,5,6]) + @test v[3] == Dual(3,[7,8,9]) + @test_throws BoundsError v[4] + @test v == DualVector([1,2, 3], [1 2 3; 4 5 6;7 8 9]) + + x,y = v[1:2],v[2:3] + @test x == DualVector([1,2],[1 2 3;4 5 6]) + @test y == DualVector([2,3],[4 5 6;7 8 9]) + + n = 10 + w = DualVector(1:n, I(n)) + @test w[2:end].jacobian isa BandedMatrix + end + + @testset "basic_operations" begin + w = v + v + @test w == DualVector([2.,4.,6.],[2. 4 6;8 10 12;14 16 18]) + @test w.jacobian == 2v.jacobian + + u = v - w + @test u == -v + + x = v.value + v - v.value + @test x == v + + @test v .^ 2 == DualVector([1, 4, 9], [2 4 6;16 20 24; 42 48 54]) + + x,y = v[1:2],v[2:3] + @test x .* y == DualVector([2,6],[6 9 12;26 31 36]) + @test sum(x .* y) isa Dual + @test sum(x .* y) == Dual(8,[32,40,48]) + end + + @testset "broadcasting" begin + @test sin.(v) isa DualVector + @test sin.(v).value == [sin(1), sin(2), sin(3)] + @test sin.(v).jacobian == Diagonal(cos.(v.value)) * v.jacobian - @test sum(x .* y) isa Dual - @test sum(x .* y) == Dual(8,[32,40,48]) + @test exp.(v) isa DualVector + @test exp.(v).value == [exp(1), exp(2), exp(3)] + @test exp.(v).jacobian == Diagonal(exp.(v.value)) * v.jacobian + + @test relu.(v) isa DualVector + @test relu.(v).value == [relu(1), relu(2), relu(3)] + @test relu.(v).jacobian == Diagonal( 0.5 * (sign.(v.value) .+ 1)) * v.jacobian + end + + @testset "misc" begin + n = 10 + v = DualVector(1:n, I(n)) + @test sum(v[1:end-1] .* v[2:end]).partials == ForwardDiff.gradient(v -> sum(v[1:end-1] .* v[2:end]), 1:n) + end - n = 10 - v = DualVector(1:n, I(n)) - @test v[2:end].jacobian isa BandedMatrix + @testset "vcat" begin + x = Dual(1, [1, 2, 3]) + y = DualVector([2, 3], [4 5 6;7 8 9]) + @test vcat(x) == x + @test vcat(x, x) == DualVector([1, 1], [1 2 3;1 2 3]) + @test vcat(x, y) == DualVector([1, 2, 3], [1 2 3;4 5 6;7 8 9]) + @test vcat(1, y, 2) == DualVector([1,2,3,2], [0 0 0; 4 5 6; 7 8 9; 0 0 0]) + end - @test sum(v[1:end-1] .* v[2:end]).partials == ForwardDiff.gradient(v -> sum(v[1:end-1] .* v[2:end]), 1:n) + @testset "reshape" begin + x = DualVector([1,2,3,4], I(4)) + @test vec(reshape(x, 2, 2)) == x + end - x = Dual(1, [1, 2, 3]) - y = DualVector([2, 3], [4 5 6;7 8 9]) - @test vcat(x) == x - @test vcat(x, x) == DualVector([1, 1], [1 2 3;1 2 3]) - @test vcat(x, y) == DualVector([1, 2, 3], [1 2 3;4 5 6;7 8 9]) + @testset "DualMatrix" begin + x = DualVector(ones(6), I(6)) + y = reshape(x[1:4], 2, 2) + z = x[5:6] + @test (y*z).value == [2,2] + @test (y*z).jacobian == [1 0 1 0 1 1; 0 1 0 1 1 1] + for t in (([1, 0, 0, 1, 0, 0], [1 0 -1 0 1 0;0 0 0 0 0 0]), ([1, 1, 1, 1, 1, 1], [1 0 -1 0 1 0;0 1 0 -1 0 1]), ([0, 1, 1, 0, -1, -1], zeros(2,6))) + x = DualVector(t[1], I(6)) + y = reshape(x[1:4], 2, 2) + z = x[5:6] + @test relu.(y*[1,-1] + z).jacobian == t[2] + end + end end \ No newline at end of file From b8e7fc8c1f1ba4747d679f125f1ee58448108ebc Mon Sep 17 00:00:00 2001 From: maximvassiliev Date: Wed, 25 Sep 2024 20:08:28 +0100 Subject: [PATCH 02/10] initial (slow) implementation of blockmatrixtensor --- Project.toml | 4 + examples/convlayer.jl | 15 ++- src/BlockMatrixTensor.jl | 83 ++++++++++++ src/DualArray.jl | 281 ++++++++++++++++++++++++++++++++++++++ src/DualArrays.jl | 282 +-------------------------------------- test/runtests.jl | 4 + 6 files changed, 387 insertions(+), 282 deletions(-) create mode 100644 src/BlockMatrixTensor.jl create mode 100644 src/DualArray.jl diff --git a/Project.toml b/Project.toml index a78c37d..0cf0764 100644 --- a/Project.toml +++ b/Project.toml @@ -6,6 +6,8 @@ version = "0.1.0" [deps] ArrayLayouts = "4c555306-a7a7-4459-81d9-ec55ddd5c99a" BandedMatrices = "aae01518-5342-5314-be14-df237901396f" +BlockArrays = "8e7c35d0-a365-5155-bbbb-fb81a777f24e" +BlockBandedMatrices = "ffab5731-97b5-5995-9138-79e8c1846df0" ChainRules = "082447d4-558c-5d27-93f4-14fc19e9eca2" ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66" FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" @@ -14,6 +16,8 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Lux = "b2108857-7c20-44ae-9111-449ecde12c47" MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +ProfileView = "c46f51b8-102a-5cf2-8d2c-8597cb0e0da7" +SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [compat] diff --git a/examples/convlayer.jl b/examples/convlayer.jl index 2e16d64..538fd57 100644 --- a/examples/convlayer.jl +++ b/examples/convlayer.jl @@ -8,13 +8,11 @@ function convlayer(img, ker, xstride = 1, ystride = 1) n2, m2 = size(img) n3, m3 = div(n2-n+1,xstride), div(m2-m+1,ystride) fmap = zeros(promote_type(eltype(img), t), n3, m3) - #Apply kernel to section of image for i= 1:xstride:n3,j = 1:ystride:m3 ft = img[i:i+n-1,j:j+m-1] .* ker fmap[i,j] = sum(ft) end - fmap end @@ -23,8 +21,12 @@ function softmax(x) exp.(x) / s end -function dense_layer(f, W, b, x) - f(W*x + b) +function dense_layer(W, b, x, f::Function = identity) + ret = W*x + println("Multiplication complete") + ret += b + println("Addition Complete") + f(ret) end function cross_entropy(x, y) @@ -36,9 +38,12 @@ function model_loss(x, y, w) weights = reshape(w[10:6769], 10, 676) biases = w[6770:6779] l1 = vec(DualMatrix(convlayer(x, ker))) - l2 = dense_layer(softmax, weights, biases, l1) + println("Conv layer complete") + l2 = dense_layer(weights, biases, l1, softmax) + println("Dense Layer complete") target = OneElement(1, y+1, 10) loss = cross_entropy(l2, target) + println("Loss complete") loss.value, loss.partials end diff --git a/src/BlockMatrixTensor.jl b/src/BlockMatrixTensor.jl new file mode 100644 index 0000000..4bb693b --- /dev/null +++ b/src/BlockMatrixTensor.jl @@ -0,0 +1,83 @@ +#4-Tensor with data stored in a BlockArray +#Enables sparsity for DualMatrix jacobian. +using BlockArrays, SparseArrays + +struct BlockMatrixTensor{T} <: AbstractArray{T, 4} + data::AbstractBlockMatrix{T} +end + +function BlockMatrixTensor(x::AbstractArray{T, 4}) where {T} + n, m, s, t = size(x) + data = BlockArray(zeros(T, n * s, m * t), fill(s, n), fill(t, m)) + for i = 1:n, j = 1:m + data[Block(i,j)] = x[i,j,:,:] + end + BlockMatrixTensor(data) +end + +size(x::BlockMatrixTensor) = (blocksize(x.data)..., size(x.data[Block(1), Block(1)])...) + +getindex(x::BlockMatrixTensor, a::Int, b::Int, c, d) = sparse_getindex(x.data[Block(a), Block(b)], c, d) +getindex(x::BlockMatrixTensor, a::UnitRange, b::UnitRange, ::Colon, ::Colon) = BlockMatrixTensor(x.data[Block.(a),Block.(b)]) + +function show(io::IO,m::MIME"text/plain", x::BlockMatrixTensor) + print("BlockMatrixTensor containing: \n") + show(io,m,x.data) +end +show(io::IO,x::BlockMatrixTensor) = show(io, x.data) + +#Blockwise broadcast +function broadcasted(f::Function, x::BlockMatrixTensor, y::AbstractMatrix) + z = undef + if size(y, 1) == 1 + z = repeat(y, blocksize(x.data, 1)) + elseif size(y, 2) == 1 + z = repeat(y, 1, blocksize(x.data, 2)) + else + z = copy(y) + end + n, m = size(z) + blocked = mortar([fill(z[i,j], size(x.data[Block(1,1)])) for i = 1:n, j = 1:m]) + BlockMatrixTensor(f.(blocked, x.data)) +end + +function broadcasted(f::Function, x::AbstractMatrix, y::BlockMatrixTensor) + z = undef + if size(x, 1) == 1 + z = repeat(x, blocksize(y.data, 1)) + elseif size(x, 2) == 1 + z = repeat(y, 1, blocksize(y.data, 2)) + else + z = copy(x) + end + n, m = size(z) + blocked = mortar([fill(z[i,j], size(y.data[Block(1,1)])) for i = 1:n, j = 1:m]) + BlockMatrixTensor(f.(blocked, y.data)) +end + +function sum(x::BlockMatrixTensor{T}; dims = Colon()) where {T} + if dims == 1:2 + n, m = size(x, 3), size(x, 4) + ret = zeros(T, n, m) + for i = 1:size(x, 1), j = 1:size(x, 2) + ret += x.data[Block(i, j)] + end + sparse(ret) + elseif dims == 2 + n, m, s, t = size(x) + ret = BlockArray(zeros(T, n*s, t), fill(s, n), fill(t, 1)) + for i = 1:n, j = 1:m + ret[Block(i,1)] += x.data[Block(i,j)] + end + BlockMatrixTensor(ret) + end +end + +function reshape(x::BlockMatrixTensor, dims::Vararg{Int64}) + #Blockwise reshape + if dims[3] == size(x, 3) && dims[4] == size(x, 4) + mortar(reshape(blocks(x.data), dims[1], dims[2])) + end +end + +vec(x::BlockMatrixTensor) = vcat(blocks(x.data)...) diff --git a/src/DualArray.jl b/src/DualArray.jl new file mode 100644 index 0000000..53f0282 --- /dev/null +++ b/src/DualArray.jl @@ -0,0 +1,281 @@ +# TODO: support non-banded +sparsevcat(As...) = vcat(convert.(BandedMatrix, As)...) + +# Should Partials necessarily be a vector? +# For example, if we index a DualMatrix, should partials retain the shape +# of the perturbations, thereby giving a matrix of partials? +struct Dual{T, Partials <: AbstractArray{T}} <: Real + value::T + partials::Partials +end + +zero(::Type{Dual{T}}) where {T} = Dual(zero(T), zeros(T,1)) +promote_rule(::Type{Dual{T}},::Type{S}) where {T,S} = Dual{promote_type(T,S)} + +==(a::Dual, b::Dual) = a.value == b.value && a.partials == b.partials +for op in (:+, :-) + @eval $op(a::Dual, k::Real) = Dual($op(a.value, k), a.partials) +end +for op in (:*, :/) + @eval $op(a::Dual, k::Real) = Dual($op(a.value, k), $op(a.partials,k)) +end + +-(x::Dual) = Dual(-1 * x.value, -1 * x.partials) + +*(x::Dual, y::Dual) = Dual(x.value*y.value, x.value*y.partials + y.value*x.partials) +/(x::Dual, y::Dual) = x*Dual(y.value, -y.partials) / y.value^2 + +sparse_getindex(a...) = layout_getindex(a...) +sparse_getindex(D::Diagonal, k::Integer, ::Colon) = OneElement(D.diag[k], k, size(D,2)) +sparse_getindex(D::Diagonal, ::Colon, j::Integer) = OneElement(D.diag[j], j, size(D,1)) +sparse_getindex(s::AbstractSparseArray, idx...) = s[idx...] + +sparse_getindex(b::BandedMatrix, k::Integer, ::Colon) = sparsevec( + rowrange(b, k), b[k, rowrange(b, k)], size(b, 2) + ) +sparse_getindex(b::BandedMatrix, ::Colon, k::Integer) = sparsevec( + colrange(b, k), b[colrange(b, k), k], size(b, 1) + ) + +sparse_transpose(s::SparseVector) = sparse(ones(length(s.nzind)),s.nzind,s.nzval, 1, length(s)) +sparse_transpose(o::OneElement{T,1}) where {T} = reshape(o, 1, :) +sparse_transpose(x::AbstractVector) = x' + +""" +reprents an array of duals given by + + values + jacobian * 𝛜. + +where 𝛜 is an array with dual parts corresponding to each entry of values. +""" + +#Helper function to check valid dimensions of DualArray +function _checkdims(val, jac) + s1, s2 = size(val), size(jac) + if length(s1) > length(s2) + return false + end + for (i, x) in enumerate(s1) + if(x != s2[i]) + return false + end + end + return true +end + +struct DualArray{T, N, M, J <: AbstractArray{T, M}} <: AbstractArray{Dual{T}, N} + value::AbstractArray{T, N} + jacobian::J + function DualArray(value::AbstractArray{T,N},jacobian::AbstractArray{T, M}) where {T, N, M} + if(!_checkdims(value,jacobian)) + s1, s2 = size(value), size(jacobian) + throw(ArgumentError("N-dimensional array must be of equal size to first N dimensions of jacobian. \n\nGot $s1, $s2")) + end + new{T,N,2*N,typeof(jacobian)}(value,jacobian) + end +end + +function DualArray(value::AbstractArray{T,N}, jacobian::AbstractArray{S,M}) where {S,T,N,M} + t = promote_type(T, S) + DualArray(convert(AbstractArray{t,N}, value), convert(AbstractArray{t, M}, jacobian)) +end + +#Aliases and Constructors +const DualVector{T, M} = DualArray{T, 1, 2, M} where {T, M <: AbstractMatrix{T}} +const DualMatrix{T, M} = DualArray{T, 2, 4, M} where {T, M <: AbstractArray{T, 4}} + +DualVector(x::AbstractVector, j::AbstractMatrix) = DualArray(x, j) +DualVector(x::AbstractVector, j::ComponentArray) = DualArray(x, j) +DualMatrix(x::AbstractMatrix, j::AbstractArray{T,4}) where {T} = DualArray(x, j) + +function DualMatrix(x::AbstractMatrix) + val = [y.value for y in x] + n, m = size(x) + jac = mortar([x[i,j].partials for i = 1:n, j = 1:m]) + DualMatrix(sparse(val), BlockMatrixTensor(jac)) +end + +function getindex(x::DualVector, y::Int) + Dual(x.value[y], sparse_getindex(x.jacobian,y,:)) +end +function getindex(x::DualVector, y::UnitRange) + newval = x.value[y] + newjac = sparse_getindex(x.jacobian,y,:) + #newjac = layout_getindex(x.jacobian,y,:) + DualVector(newval, newjac) +end +getindex(x::DualVector, ::Colon) = x + +function getindex(x::DualArray, y::Int...) + idx = ntuple(i -> i > length(y) ? Colon() : y[i], 2*ndims(x)) + Dual(x.value[y...], x.jacobian[idx...]) +end + +for op in (:size, :axes, :ndims) + @eval $op(x::DualArray) = $op(x.value) +end + ++(x::DualVector,y::DualVector) = DualVector(x.value + y.value, x.jacobian + y.jacobian) +-(x::DualVector,y::DualVector) = DualVector(x.value - y.value, x.jacobian - y.jacobian) ++(x::DualVector,y::Vector) = DualVector(x.value + y, x.jacobian) +-(x::DualVector,y::Vector) = DualVector(x.value - y, x.jacobian) ++(x::Vector,y::DualVector) = DualVector(y.value + x, y.jacobian) +-(x::Vector,y::DualVector) = DualVector(x - y.value, -y.jacobian) +*(x::AbstractMatrix, y::DualVector) = DualVector(x * y.value, x * y.jacobian) +*(x::Number, y::DualVector) = DualVector(x * y.value, x * y.jacobian) +*(x::DualVector, y::Dual) = DualVector(x.value * y.value, y.value * x.jacobian + x.value * y.partials') +/(x::DualVector, y::Number) = DualVector(x.value / y, x.jacobian/ y) +/(x::DualVector, y::Dual) = x * Dual(y.value, -y.partials) / y.value^2 + +## +# Note about 4-tensor by matrix multiplication: +# We can consider matrix-vector multiplication as follows: +# +# (Ax)[i] = A[i,:] . x +# +# This can be generalised to higher dimensions (B a 4-tensor, X a matrix): +# +# (BX)[i,j] = B[i,j,:,:] . X +# +# This is useful since if we consider 4-tensors dimension n × m × s × t +# equivalent to linear maps from n × m matrices to s × t matrices, this +# gives us multiplication analogous to applying the linear transform on +# a matrix in the same way that matrix-vector multiplication applies the +# linear transformation to the vector. This also gives us a way to +# 'expand' the Dual part of a DualMatrix. +## + +# Assumes that the total number of epsilons is equal, i.e the array of +# perturbations are reshapes of each other. +function *(x::DualMatrix, y::DualVector) + val = x.value * y.value + jac = x.value * y.jacobian + vcat(blocks(sum(x.jacobian .* y.value'; dims = 2).data)...) + DualVector(val, jac) +end + +function *(x::DualMatrix, y::AbstractVector) + val = x.value * y + t = promote_type(eltype(x.value), eltype(y)) + jac = zeros(t, length(val), size(x.jacobian, 3) * size(x.jacobian, 4)) + n, m = size(jac, 1), size(x.value, 2) + for i = 1:n + new_row = zeros(size(jac, 2)) + for j = 1:m + new_row += y[j] * vec(x.jacobian[i, j, :, :]) + end + jac[i,:] = new_row + end + DualVector(val, jac) +end + +function broadcasted(f::Function,d::DualVector) + jvals = zeros(eltype(d.value), length(d.value)) + for (i, x) = enumerate(d.value) + _, df = frule((ZeroTangent(), 1),f, x) + jvals[i] = df + end + DualVector(f.(d.value), Diagonal(jvals)*d.jacobian) +end + +function broadcasted(::typeof(*),x::DualVector,y::DualVector) + newval = x.value .* y.value + newjac = x.value .* y.jacobian + y.value .* x.jacobian + DualArray(newval,newjac) +end + +function broadcasted(::typeof(*),x::AbstractArray,y::DualArray) + newval = x .* y.value + newjac = x .* y.jacobian + DualArray(newval,newjac) +end + +function broadcasted(::typeof(*),x::DualArray,y::AbstractArray) + newval = x.value .* y + newjac = y .* x.jacobian + DualArray(newval,newjac) +end + +function broadcasted(::typeof(^),x::DualVector,n::Int) + newval = x.value .^ n + newjac = n * x.value .^ (n - 1) .* x.jacobian + DualVector(newval,newjac) +end + +broadcasted(::typeof(Base.literal_pow), ::typeof(^), x::DualVector, ::Val{n}) where n = broadcasted(^, x, n) + +### +# sum (higher-dimensional case assuming that shape of perturbations is preserved) +### +function sum(x::DualVector) + Dual(sum(x.value), vec(sum(x.jacobian; dims=1))) +end + +function sum(x::DualArray) + Dual(sum(x.value), sum(x.jacobian; dims=1:ndims(x))) +end + +_jacobian(d::Dual) = permutedims(d.partials) +_jacobian(d::DualVector) = d.jacobian +_jacobian(d::DualVector, ::Int) = d.jacobian +_jacobian(x::Number, N::Int) = Zeros{typeof(x)}(1, N) + +_value(d::DualVector) = d.value +_value(x::Number) = x + +function vcat(x::Union{Dual, DualVector}...) + if length(x) == 1 + return x[1] + end + value = vcat((d.value for d in x)...) + jacobian = vcat((_jacobian(d) for d in x)...) + DualVector(value,jacobian) +end + +function vcat(a::Dual ,x::DualVector, b::Dual) + val = vcat(a.value, x.value, b.value) + jac = sparsevcat(_jacobian(a), x.jacobian, _jacobian(b)) + DualVector(val, jac) +end + +function vcat(a::Real ,x::DualVector, b::Real) + cols = size(x.jacobian,2) + val = vcat(a, x.value, b) + jac = sparsevcat(_jacobian(a, cols), x.jacobian, _jacobian(b, cols)) + DualVector(val, jac) +end + +### +# +# enable converting between DualVector and DualMatrix. +# the 4D jacobian can be thought of as a generalisation of the 2D jacobian: +# +# n × m 2D jacobian: J[i,j] = ∂fᵢ / ∂xⱼ +# n × m × s × t 4D jacobian: J[i, j, k, l] = ∂f[i, j] / δx[k, l] +# +### + +# reshape does not preserve shape of perturbations (since jacobian can have +# any number of columns) + +#Should a PR be made into reshape() +_tomatrix(v::AbstractVector) = reshape(v, :, 1) + +function reshape(x::DualVector,dims::Vararg{Int}) + val = reshape(x.value, dims...) + jac_blockmat = [sparse_transpose(sparse_getindex(x.jacobian, i, :)) for i = 1:size(x.jacobian, 1)] + jac = BlockMatrixTensor(mortar(reshape(jac_blockmat, dims...))) + DualMatrix(val, jac) +end + +_blockvec(x::AbstractArray{T,4}) where {T} = reshape(x, size(x, 1) * size(x, 2), size(x, 3) * size(x, 4)) +_blockvec(x::BlockMatrixTensor) = vcat(blocks(x.data)...) + +function vec(x::DualArray) + val = vec(x.value) + jac = _blockvec(x.jacobian) + DualVector(val, jac) +end + +show(io::IO,::MIME"text/plain", x::DualArray) = (print(io,x.value); print(io," + "); print(io,x.jacobian);print("𝛜")) +show(io::IO,::MIME"text/plain", x::DualVector) = (print(io,x.value); print(io," + "); print(io,x.jacobian);print("𝛜")) +show(io::IO,::MIME"text/plain", x::Dual) = (print(io,x.value); print(io," + "); print(io,x.partials);print("𝛜")) \ No newline at end of file diff --git a/src/DualArrays.jl b/src/DualArrays.jl index 56d705d..830ba8f 100644 --- a/src/DualArrays.jl +++ b/src/DualArrays.jl @@ -1,288 +1,16 @@ module DualArrays -export DualVector, DualArray, DualMatrix + +export DualVector, DualArray, DualMatrix, BlockMatrixTensor export dropzeros import Base: +, ==, getindex, size, broadcast, axes, broadcasted, show, sum, vcat, convert, *, -, ^, /, ndims, hcat, vec, promote_rule, zero, reshape -using LinearAlgebra, ArrayLayouts, BandedMatrices, FillArrays, ComponentArrays +using LinearAlgebra, ArrayLayouts, BandedMatrices, FillArrays, ComponentArrays, SparseArrays import ChainRules: frule, ZeroTangent -# TODO: support non-banded -sparsevcat(As...) = vcat(convert.(BandedMatrix, As)...) - -# Should Partials necessarily be a vector? -# For example, if we index a DualMatrix, should partials retain the shape -# of the perturbations, thereby giving a matrix of partials? -struct Dual{T, Partials <: AbstractArray{T}} <: Real - value::T - partials::Partials -end - -zero(::Type{Dual{T}}) where {T} = Dual(zero(T), zeros(T,1)) -promote_rule(::Type{Dual{T}},::Type{S}) where {T,S} = Dual{promote_type(T,S)} - -==(a::Dual, b::Dual) = a.value == b.value && a.partials == b.partials -for op in (:+, :-) - @eval $op(a::Dual, k::Real) = Dual($op(a.value, k), a.partials) -end -for op in (:*, :/) - @eval $op(a::Dual, k::Real) = Dual($op(a.value, k), $op(a.partials,k)) -end - --(x::Dual) = Dual(-1 * x.value, -1 * x.partials) - -*(x::Dual, y::Dual) = Dual(x.value*y.value, x.value*y.partials + y.value*x.partials) -/(x::Dual, y::Dual) = x*Dual(y.value, -y.partials) / y.value^2 - -sparse_getindex(a...) = layout_getindex(a...) -sparse_getindex(D::Diagonal, k::Integer, ::Colon) = OneElement(D.diag[k], k, size(D,2)) -sparse_getindex(D::Diagonal, ::Colon, j::Integer) = OneElement(D.diag[j], j, size(D,1)) - -""" -reprents an array of duals given by - - values + jacobian * 𝛜. - -where 𝛜 is an array with dual parts corresponding to each entry of values. -""" - -#Helper function to check valid dimensions of DualArray -function _checkdims(val, jac) - s1, s2 = size(val), size(jac) - if length(s1) > length(s2) - return false - end - for (i, x) in enumerate(s1) - if(x != s2[i]) - return false - end - end - return true -end - -struct DualArray{T, N, M, J <: AbstractArray{T, M}} <: AbstractArray{Dual{T}, N} - value::AbstractArray{T, N} - jacobian::J - function DualArray(value::AbstractArray{T,N},jacobian::AbstractArray{T, M}) where {T, N, M} - if(!_checkdims(value,jacobian)) - s1, s2 = size(value), size(jacobian) - throw(ArgumentError("N-dimensional array must be of equal size to first N dimensions of jacobian. \n\nGot $s1, $s2")) - end - new{T,N,2*N,typeof(jacobian)}(value,jacobian) - end -end - -function DualArray(value::AbstractArray{T,N}, jacobian::AbstractArray{S,M}) where {S,T,N,M} - t = promote_type(T, S) - DualArray(convert(AbstractArray{t,N}, value), convert(AbstractArray{t, M}, jacobian)) -end - -#Aliases and Constructors -const DualVector{T, M} = DualArray{T, 1, 2, M} where {T, M <: AbstractMatrix{T}} -const DualMatrix{T, M} = DualArray{T, 2, 4, M} where {T, M <: AbstractArray{T, 4}} - -DualVector(x::AbstractVector, j::AbstractMatrix) = DualArray(x, j) -DualVector(x::AbstractVector, j::ComponentArray) = DualArray(x, j) -DualMatrix(x::AbstractMatrix, j::AbstractArray{T,4}) where {T} = DualArray(x, j) - -function DualMatrix(x::AbstractMatrix{Dual{T}}) where {T} - val = zeros(T, size(x)...) - jac = zeros(T, size(x)..., size(x[1,1].partials)...) - n, m = size(x) - for i = 1:n, j = 1:m - val[i,j] = x[i,j].value - jac[i,j,:,:] = x[i,j].partials - end - DualMatrix(val, jac) -end - -function getindex(x::DualVector, y::Int) - Dual(x.value[y], sparse_getindex(x.jacobian,y,:)) -end -function getindex(x::DualVector, y::UnitRange) - newval = x.value[y] - newjac = sparse_getindex(x.jacobian,y,:) - newjac = layout_getindex(x.jacobian,y,:) - DualVector(newval, newjac) -end -getindex(x::DualVector, ::Colon) = x - -function getindex(x::DualArray, y::Int...) - idx = ntuple(i -> i > length(y) ? Colon() : y[i], 2*ndims(x)) - Dual(x.value[y...], x.jacobian[idx...]) -end - -for op in (:size, :axes, :ndims) - @eval $op(x::DualArray) = $op(x.value) -end - -+(x::DualVector,y::DualVector) = DualVector(x.value + y.value, x.jacobian + y.jacobian) --(x::DualVector,y::DualVector) = DualVector(x.value - y.value, x.jacobian - y.jacobian) -+(x::DualVector,y::Vector) = DualVector(x.value + y, x.jacobian) --(x::DualVector,y::Vector) = DualVector(x.value - y, x.jacobian) -+(x::Vector,y::DualVector) = DualVector(y.value + x, y.jacobian) --(x::Vector,y::DualVector) = DualVector(x - y.value, -y.jacobian) -*(x::AbstractMatrix, y::DualVector) = DualVector(x * y.value, x * y.jacobian) -*(x::Number, y::DualVector) = DualVector(x * y.value, x * y.jacobian) -*(x::DualVector, y::Dual) = DualVector(x.value * y.value, y.value * x.jacobian + x.value * y.partials') -/(x::DualVector, y::Number) = DualVector(x.value / y, x.jacobian/ y) -/(x::DualVector, y::Dual) = x * Dual(y.value, -y.partials) / y.value^2 - -## -# Note about 4-tensor by matrix multiplication: -# We can consider matrix-vector multiplication as follows: -# -# (Ax)[i] = A[i,:] . x -# -# This can be generalised to higher dimensions (B a 4-tensor, X a matrix): -# -# (BX)[i,j] = B[i,j,:,:] . X -# -# This is useful since if we consider 4-tensors dimension n × m × s × t -# equivalent to linear maps from n × m matrices to s × t matrices, this -# gives us multiplication analogous to applying the linear transform on -# a matrix in the same way that matrix-vector multiplication applies the -# linear transformation to the vector. This also gives us a way to -# 'expand' the Dual part of a DualMatrix. -## - -# Assumes that the total number of epsilons is equal, i.e the array of -# perturbations are reshapes of each other. -function *(x::DualMatrix, y::DualVector) - val = x.value * y.value - t = promote_type(eltype(x.value), eltype(y.value)) - jac = zeros(t, length(val), size(y.jacobian, 2)) - n, m = size(jac, 1), size(x.value, 2) - for i = 1:n - new_row = zeros(size(jac, 2)) - for j = 1:m - new_row += x.value[i,j] * y.jacobian[j,:] + y.value[j] * vec(x.jacobian[i, j, :, :]) - end - jac[i,:] = new_row - end - DualVector(val, jac) -end - -function *(x::DualMatrix, y::AbstractVector) - val = x.value * y - t = promote_type(eltype(x.value), eltype(y)) - jac = zeros(t, length(val), size(x.jacobian, 3) * size(x.jacobian, 4)) - n, m = size(jac, 1), size(x.value, 2) - for i = 1:n - new_row = zeros(size(jac, 2)) - for j = 1:m - new_row += y[j] * vec(x.jacobian[i, j, :, :]) - end - jac[i,:] = new_row - end - DualVector(val, jac) -end - -function broadcasted(f::Function,d::DualVector) - jvals = zeros(eltype(d.value), length(d.value)) - for (i, x) = enumerate(d.value) - _, df = frule((ZeroTangent(), 1),f, x) - jvals[i] = df - end - DualVector(f.(d.value), Diagonal(jvals)*d.jacobian) -end - -function broadcasted(::typeof(*),x::DualVector,y::DualVector) - newval = x.value .* y.value - newjac = x.value .* y.jacobian + y.value .* x.jacobian - DualArray(newval,newjac) -end - -function broadcasted(::typeof(*),x::AbstractArray,y::DualArray) - newval = x .* y.value - newjac = x .* y.jacobian - DualArray(newval,newjac) -end - -function broadcasted(::typeof(*),x::DualArray,y::AbstractArray) - newval = x.value .* y - newjac = y .* x.jacobian - DualVector(newval,newjac) -end - -function broadcasted(::typeof(^),x::DualVector,n::Int) - newval = x.value .^ n - newjac = n * x.value .^ (n - 1) .* x.jacobian - DualVector(newval,newjac) -end - -broadcasted(::typeof(Base.literal_pow), ::typeof(^), x::DualVector, ::Val{n}) where n = broadcasted(^, x, n) - -### -# sum (higher-dimensional case assuming that shape of perturbations is preserved) -### -function sum(x::DualVector) - Dual(sum(x.value), vec(sum(x.jacobian; dims=1))) -end - -function sum(x::DualArray) - #Drop redundant leading dimensions of 1 in jacobian - idx = ntuple(i -> i > ndims(x) ? Colon() : 1, 2*ndims(x)) - Dual(sum(x.value), sum(x.jacobian; dims=1:ndims(x))[idx...]) -end - -_jacobian(d::Dual) = permutedims(d.partials) -_jacobian(d::DualVector) = d.jacobian -_jacobian(d::DualVector, ::Int) = d.jacobian -_jacobian(x::Number, N::Int) = Zeros{typeof(x)}(1, N) - -_value(d::DualVector) = d.value -_value(x::Number) = x - -function vcat(x::Union{Dual, DualVector}...) - if length(x) == 1 - return x[1] - end - value = vcat((d.value for d in x)...) - jacobian = vcat((_jacobian(d) for d in x)...) - DualVector(value,jacobian) -end - -function vcat(a::Dual ,x::DualVector, b::Dual) - val = vcat(a.value, x.value, b.value) - jac = sparsevcat(_jacobian(a), x.jacobian, _jacobian(b)) - DualVector(val, jac) -end - -function vcat(a::Real ,x::DualVector, b::Real) - cols = size(x.jacobian,2) - val = vcat(a, x.value, b) - jac = sparsevcat(_jacobian(a, cols), x.jacobian, _jacobian(b, cols)) - DualVector(val, jac) -end - -### -# -# enable converting between DualVector and DualMatrix. -# the 4D jacobian can be thought of as a generalisation of the 2D jacobian: -# -# n × m 2D jacobian: J[i,j] = ∂fᵢ / ∂xⱼ -# n × m × s × t 4D jacobian: J[i, j, k, l] = ∂f[i, j] / δx[k, l] -# -### - -# reshape does not preserve shape of perturbations (since jacobian can have -# any number of columns) -function reshape(x::DualVector,dims::Vararg{Int}) - val = reshape(x.value, dims...) - jac = reshape(x.jacobian, dims..., size(x.jacobian, 2), 1) - DualMatrix(val, jac) -end - -function vec(x::DualArray) - val = vec(x.value) - jac = reshape(x.jacobian, length(val), prod(size(x.jacobian)[ndims(x)+1:end])) - DualVector(val, jac) -end +include("BlockMatrixTensor.jl") +include("DualArray.jl") -show(io::IO,::MIME"text/plain", x::DualArray) = (print(io,x.value); print(io," + "); print(io,x.jacobian);print("𝛜")) -show(io::IO,::MIME"text/plain", x::DualVector) = (print(io,x.value); print(io," + "); print(io,x.jacobian);print("𝛜")) -show(io::IO,::MIME"text/plain", x::Dual) = (print(io,x.value); print(io," + "); print(io,x.partials);print("𝛜")) end # module DualArrays \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 9f10864..fea6117 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -97,4 +97,8 @@ using Lux: relu @test relu.(y*[1,-1] + z).jacobian == t[2] end end + + @testset "BlockMatrixTensor" begin + + end end \ No newline at end of file From da6fa4992c8f88f0c5703b86df38b1c7697f55d0 Mon Sep 17 00:00:00 2001 From: maximvassiliev Date: Wed, 25 Sep 2024 22:20:01 +0100 Subject: [PATCH 03/10] add better broadcasting + unit tests --- src/BlockMatrixTensor.jl | 71 ++++++++++++++++++++++++++++------------ test/runtests.jl | 24 ++++++++++++-- 2 files changed, 72 insertions(+), 23 deletions(-) diff --git a/src/BlockMatrixTensor.jl b/src/BlockMatrixTensor.jl index 4bb693b..17fdcbf 100644 --- a/src/BlockMatrixTensor.jl +++ b/src/BlockMatrixTensor.jl @@ -26,35 +26,64 @@ function show(io::IO,m::MIME"text/plain", x::BlockMatrixTensor) end show(io::IO,x::BlockMatrixTensor) = show(io, x.data) -#Blockwise broadcast -function broadcasted(f::Function, x::BlockMatrixTensor, y::AbstractMatrix) - z = undef - if size(y, 1) == 1 - z = repeat(y, blocksize(x.data, 1)) - elseif size(y, 2) == 1 - z = repeat(y, 1, blocksize(x.data, 2)) +# Blockwise broadcast +for op in (:*, :/) + @eval $op(x::BlockMatrixTensor, y::Number) = BlockMatrixTensor($op(x.data, y)) + @eval $op(x::Number, y::BlockMatrixTensor) = BlockMatrixTensor($op(x, y.data)) +end + +function broadcasted(f::Function, x::BlockMatrixTensor{T}, y::AbstractMatrix) where {T} + ret = BlockArray{T}(undef, blocksizes(x.data)...) + n, m = blocksize(x.data) + if blocksize(x.data) == size(y) + for i = 1:n, j = 1:m + ret[Block(i,j)] = f.(y[i,j], x.data[Block(i,j)]) + end + elseif size(y) == (1, 1) + f.(y[1,1], x.data) + elseif size(y) == (1, m) + for j = 1:m + ret[:, Block(j)] = f.(y[1,j], x.data[:, Block(j)]) + end + elseif size(y) == (n, 1) + for i = 1:n + ret[Block(i), :] = f.(y[i, 1], x.data[Block(i), :]) + end else - z = copy(y) + a = size(x) + b = size(y) + throw(DimensionMismatch("Invalid dimensions for broadcasting. Got $a, $b.")) end - n, m = size(z) - blocked = mortar([fill(z[i,j], size(x.data[Block(1,1)])) for i = 1:n, j = 1:m]) - BlockMatrixTensor(f.(blocked, x.data)) + BlockMatrixTensor(ret) end -function broadcasted(f::Function, x::AbstractMatrix, y::BlockMatrixTensor) - z = undef - if size(x, 1) == 1 - z = repeat(x, blocksize(y.data, 1)) - elseif size(x, 2) == 1 - z = repeat(y, 1, blocksize(y.data, 2)) +function broadcasted(f::Function, x::AbstractMatrix, y::BlockMatrixTensor{T}) where {T} + ret = BlockArray{T}(undef, blocksizes(y.data)...) + n, m = blocksize(y.data) + if blocksize(y.data) == size(x) + for i = 1:n, j = 1:m + ret[Block(i,j)] = f.(x[i,j], y.data[Block(i,j)]) + end + elseif size(x) == (1, 1) + f.(x[1,1], y.data) + elseif size(x) == (1, m) + for j = 1:m + ret[:, Block(j)] = f.(x[1,j], y.data[:, Block(j)]) + end + elseif size(x) == (n, 1) + for i = 1:n + ret[Block(i), :] = f.(x[i, 1], y.data[Block(i), :]) + end else - z = copy(x) + a = size(y) + b = size(x) + throw(DimensionMismatch("Invalid dimensions for broadcasting. Got $a, $b.")) end - n, m = size(z) - blocked = mortar([fill(z[i,j], size(y.data[Block(1,1)])) for i = 1:n, j = 1:m]) - BlockMatrixTensor(f.(blocked, y.data)) + BlockMatrixTensor(ret) end + + function sum(x::BlockMatrixTensor{T}; dims = Colon()) where {T} if dims == 1:2 n, m = size(x, 3), size(x, 4) diff --git a/test/runtests.jl b/test/runtests.jl index fea6117..8671332 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,4 +1,5 @@ -using DualArrays, Test, LinearAlgebra, BandedMatrices, ForwardDiff, ComponentArrays +using DualArrays, Test, LinearAlgebra, BandedMatrices, ForwardDiff, +BlockArrays using DualArrays: Dual using Lux: relu @@ -99,6 +100,25 @@ using Lux: relu end @testset "BlockMatrixTensor" begin - + @testset "constructors" begin + x = BlockMatrixTensor(BlockArray(ones(4, 4), [2, 2], [2, 2])) + y = BlockMatrixTensor(ones(2, 2, 2, 2)) + @test x isa BlockMatrixTensor + @test y isa BlockMatrixTensor + @test x == y + end + @testset "broadcasting" begin + y = BlockMatrixTensor(ones(2, 2, 2, 2)) + z = BlockArray( + [fill(1,2,2) fill(2,2,2); fill(3,2,2) fill(4,2,2)], + [2, 2], + [2, 2] + ) + @test ([1 2;3 4] .* y).data == z + @test reshape([3], 1, 1) .* y == 3 * y + @test ([1 2] .* y).data == BlockArray([fill(1,4,2) fill(2,4,2)], [2, 2], [2, 2]) + @test ([1 2]' .* y).data == BlockArray([fill(1,2,4); fill(2,2,4)], [2, 2], [2, 2]) + @test_throws DimensionMismatch [1 2;3 4;5 6] .* y + end end end \ No newline at end of file From 92167601f8967cb5acb31ff3ecc6c9477f5787d0 Mon Sep 17 00:00:00 2001 From: maximvassiliev Date: Thu, 26 Sep 2024 14:17:07 +0100 Subject: [PATCH 04/10] slightly better broadcasting --- src/BlockMatrixTensor.jl | 10 +++++----- src/DualArray.jl | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/BlockMatrixTensor.jl b/src/BlockMatrixTensor.jl index 17fdcbf..d92dee4 100644 --- a/src/BlockMatrixTensor.jl +++ b/src/BlockMatrixTensor.jl @@ -3,7 +3,7 @@ using BlockArrays, SparseArrays struct BlockMatrixTensor{T} <: AbstractArray{T, 4} - data::AbstractBlockMatrix{T} + data::BlockMatrix{T} end function BlockMatrixTensor(x::AbstractArray{T, 4}) where {T} @@ -33,7 +33,7 @@ for op in (:*, :/) end function broadcasted(f::Function, x::BlockMatrixTensor{T}, y::AbstractMatrix) where {T} - ret = BlockArray{T}(undef, blocksizes(x.data)...) + ret = copy(x.data) n, m = blocksize(x.data) if blocksize(x.data) == size(y) for i = 1:n, j = 1:m @@ -58,7 +58,7 @@ function broadcasted(f::Function, x::BlockMatrixTensor{T}, y::AbstractMatrix) wh end function broadcasted(f::Function, x::AbstractMatrix, y::BlockMatrixTensor{T}) where {T} - ret = BlockArray{T}(undef, blocksizes(y.data)...) + ret = copy(y.data) n, m = blocksize(y.data) if blocksize(y.data) == size(x) for i = 1:n, j = 1:m @@ -87,11 +87,11 @@ end function sum(x::BlockMatrixTensor{T}; dims = Colon()) where {T} if dims == 1:2 n, m = size(x, 3), size(x, 4) - ret = zeros(T, n, m) + ret = similar(x.data[Block(1, 1)]) * 0 for i = 1:size(x, 1), j = 1:size(x, 2) ret += x.data[Block(i, j)] end - sparse(ret) + ret elseif dims == 2 n, m, s, t = size(x) ret = BlockArray(zeros(T, n*s, t), fill(s, n), fill(t, 1)) diff --git a/src/DualArray.jl b/src/DualArray.jl index 53f0282..9396b8a 100644 --- a/src/DualArray.jl +++ b/src/DualArray.jl @@ -92,7 +92,7 @@ function DualMatrix(x::AbstractMatrix) val = [y.value for y in x] n, m = size(x) jac = mortar([x[i,j].partials for i = 1:n, j = 1:m]) - DualMatrix(sparse(val), BlockMatrixTensor(jac)) + DualMatrix(val, BlockMatrixTensor(jac)) end function getindex(x::DualVector, y::Int) From d3d8727e773f2bf5137c3c0ad7d95016f316eb95 Mon Sep 17 00:00:00 2001 From: maximvassiliev Date: Thu, 26 Sep 2024 22:08:37 +0100 Subject: [PATCH 05/10] remake with views --- examples/convlayer.jl | 1 + src/BlockMatrixTensor.jl | 28 +++++++++++++--------------- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/examples/convlayer.jl b/examples/convlayer.jl index 538fd57..2ca3636 100644 --- a/examples/convlayer.jl +++ b/examples/convlayer.jl @@ -37,6 +37,7 @@ function model_loss(x, y, w) ker = reshape(w[1:9], 3, 3) weights = reshape(w[10:6769], 10, 676) biases = w[6770:6779] + println("Reshape Complete") l1 = vec(DualMatrix(convlayer(x, ker))) println("Conv layer complete") l2 = dense_layer(weights, biases, l1, softmax) diff --git a/src/BlockMatrixTensor.jl b/src/BlockMatrixTensor.jl index d92dee4..773ba55 100644 --- a/src/BlockMatrixTensor.jl +++ b/src/BlockMatrixTensor.jl @@ -10,14 +10,14 @@ function BlockMatrixTensor(x::AbstractArray{T, 4}) where {T} n, m, s, t = size(x) data = BlockArray(zeros(T, n * s, m * t), fill(s, n), fill(t, m)) for i = 1:n, j = 1:m - data[Block(i,j)] = x[i,j,:,:] + view(data, Block(i,j)) .= x[i,j,:,:] end BlockMatrixTensor(data) end -size(x::BlockMatrixTensor) = (blocksize(x.data)..., size(x.data[Block(1), Block(1)])...) +size(x::BlockMatrixTensor) = (blocksize(x.data)..., blocksizes(x.data, 1)...) -getindex(x::BlockMatrixTensor, a::Int, b::Int, c, d) = sparse_getindex(x.data[Block(a), Block(b)], c, d) +getindex(x::BlockMatrixTensor, a::Int, b::Int, c, d) = sparse_getindex(x.data[Block(a, b)], c, d) getindex(x::BlockMatrixTensor, a::UnitRange, b::UnitRange, ::Colon, ::Colon) = BlockMatrixTensor(x.data[Block.(a),Block.(b)]) function show(io::IO,m::MIME"text/plain", x::BlockMatrixTensor) @@ -37,17 +37,17 @@ function broadcasted(f::Function, x::BlockMatrixTensor{T}, y::AbstractMatrix) wh n, m = blocksize(x.data) if blocksize(x.data) == size(y) for i = 1:n, j = 1:m - ret[Block(i,j)] = f.(y[i,j], x.data[Block(i,j)]) + view(ret, Block(i,j)) .= f.(y[i,j], x.data[Block(i,j)]) end elseif size(y) == (1, 1) - f.(y[1,1], x.data) + f.(y[1,1], ret) elseif size(y) == (1, m) for j = 1:m - ret[:, Block(j)] = f.(y[1,j], x.data[:, Block(j)]) + view(ret, :, Block(j)) .= f.(y[1,j], x.data[:, Block(j)]) end elseif size(y) == (n, 1) for i = 1:n - ret[Block(i), :] = f.(y[i, 1], x.data[Block(i), :]) + view(ret, Block(i), :) .= f.(y[i, 1], x.data[Block(i), :]) end else a = size(x) @@ -62,17 +62,17 @@ function broadcasted(f::Function, x::AbstractMatrix, y::BlockMatrixTensor{T}) wh n, m = blocksize(y.data) if blocksize(y.data) == size(x) for i = 1:n, j = 1:m - ret[Block(i,j)] = f.(x[i,j], y.data[Block(i,j)]) + view(ret, Block(i,j)) .= f.(x[i,j], y.data[Block(i,j)]) end elseif size(x) == (1, 1) - f.(x[1,1], y.data) + f.(x[1,1], ret) elseif size(x) == (1, m) for j = 1:m - ret[:, Block(j)] = f.(x[1,j], y.data[:, Block(j)]) + view(ret, :, Block(j)) .= f.(x[1,j], y.data[:, Block(j)]) end elseif size(x) == (n, 1) for i = 1:n - ret[Block(i), :] = f.(x[i, 1], y.data[Block(i), :]) + view(ret, Block(i), :) .= f.(x[i, 1], y.data[Block(i), :]) end else a = size(y) @@ -82,8 +82,6 @@ function broadcasted(f::Function, x::AbstractMatrix, y::BlockMatrixTensor{T}) wh BlockMatrixTensor(ret) end - - function sum(x::BlockMatrixTensor{T}; dims = Colon()) where {T} if dims == 1:2 n, m = size(x, 3), size(x, 4) @@ -94,9 +92,9 @@ function sum(x::BlockMatrixTensor{T}; dims = Colon()) where {T} ret elseif dims == 2 n, m, s, t = size(x) - ret = BlockArray(zeros(T, n*s, t), fill(s, n), fill(t, 1)) + ret = x.data[Block.(1:n), Block(1)] * 0 for i = 1:n, j = 1:m - ret[Block(i,1)] += x.data[Block(i,j)] + view(ret, Block(i,1)) .+= x.data[Block(i,j)] end BlockMatrixTensor(ret) end From 70c77a26dba90b31600307c83381d2dabe8d8683 Mon Sep 17 00:00:00 2001 From: maximvassiliev Date: Fri, 27 Sep 2024 20:08:34 +0100 Subject: [PATCH 06/10] temporarily fix sparsity issue with sum() --- src/BlockMatrixTensor.jl | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/src/BlockMatrixTensor.jl b/src/BlockMatrixTensor.jl index 773ba55..963584e 100644 --- a/src/BlockMatrixTensor.jl +++ b/src/BlockMatrixTensor.jl @@ -1,4 +1,4 @@ -#4-Tensor with data stored in a BlockArray +#4-Tensor with data stored in a BlockMatrix #Enables sparsity for DualMatrix jacobian. using BlockArrays, SparseArrays @@ -10,21 +10,21 @@ function BlockMatrixTensor(x::AbstractArray{T, 4}) where {T} n, m, s, t = size(x) data = BlockArray(zeros(T, n * s, m * t), fill(s, n), fill(t, m)) for i = 1:n, j = 1:m - view(data, Block(i,j)) .= x[i,j,:,:] + view(data, Block(i, j)) .= x[i, j, :, :] end BlockMatrixTensor(data) end -size(x::BlockMatrixTensor) = (blocksize(x.data)..., blocksizes(x.data, 1)...) +size(x::BlockMatrixTensor) = (blocksize(x.data)..., blocksizes(x.data)[1,1]...) getindex(x::BlockMatrixTensor, a::Int, b::Int, c, d) = sparse_getindex(x.data[Block(a, b)], c, d) -getindex(x::BlockMatrixTensor, a::UnitRange, b::UnitRange, ::Colon, ::Colon) = BlockMatrixTensor(x.data[Block.(a),Block.(b)]) +getindex(x::BlockMatrixTensor, a::UnitRange, b::UnitRange, ::Colon, ::Colon) = BlockMatrixTensor(x.data[Block.(a), Block.(b)]) function show(io::IO,m::MIME"text/plain", x::BlockMatrixTensor) print("BlockMatrixTensor containing: \n") - show(io,m,x.data) + show(io,m, x.data) end -show(io::IO,x::BlockMatrixTensor) = show(io, x.data) +show(io::IO, x::BlockMatrixTensor) = show(io, x.data) # Blockwise broadcast for op in (:*, :/) @@ -37,13 +37,13 @@ function broadcasted(f::Function, x::BlockMatrixTensor{T}, y::AbstractMatrix) wh n, m = blocksize(x.data) if blocksize(x.data) == size(y) for i = 1:n, j = 1:m - view(ret, Block(i,j)) .= f.(y[i,j], x.data[Block(i,j)]) + view(ret, Block(i, j)) .= f.(y[i, j], x.data[Block(i, j)]) end elseif size(y) == (1, 1) - f.(y[1,1], ret) + f.(y[1, 1], ret) elseif size(y) == (1, m) for j = 1:m - view(ret, :, Block(j)) .= f.(y[1,j], x.data[:, Block(j)]) + view(ret, :, Block(j)) .= f.(y[1, j], x.data[:, Block(j)]) end elseif size(y) == (n, 1) for i = 1:n @@ -62,13 +62,13 @@ function broadcasted(f::Function, x::AbstractMatrix, y::BlockMatrixTensor{T}) wh n, m = blocksize(y.data) if blocksize(y.data) == size(x) for i = 1:n, j = 1:m - view(ret, Block(i,j)) .= f.(x[i,j], y.data[Block(i,j)]) + view(ret, Block(i, j)) .= f.(x[i, j], y.data[Block(i, j)]) end elseif size(x) == (1, 1) - f.(x[1,1], ret) + f.(x[1, 1], ret) elseif size(x) == (1, m) for j = 1:m - view(ret, :, Block(j)) .= f.(x[1,j], y.data[:, Block(j)]) + view(ret, :, Block(j)) .= f.(x[1, j], y.data[:, Block(j)]) end elseif size(x) == (n, 1) for i = 1:n @@ -92,9 +92,10 @@ function sum(x::BlockMatrixTensor{T}; dims = Colon()) where {T} ret elseif dims == 2 n, m, s, t = size(x) - ret = x.data[Block.(1:n), Block(1)] * 0 + # Indexing [:, Block(j)] does not preserve density + ret = BlockArray(spzeros(n * s, t), fill(s, n), [t]) for i = 1:n, j = 1:m - view(ret, Block(i,1)) .+= x.data[Block(i,j)] + view(ret, Block(i, 1)) .+= x.data[Block(i, j)] end BlockMatrixTensor(ret) end From 83cadf0a8f2a66c141f6c6595f062256b14f2af8 Mon Sep 17 00:00:00 2001 From: maximvassiliev Date: Sat, 28 Sep 2024 00:49:34 +0100 Subject: [PATCH 07/10] rework with BlockedMatrix --- examples/convlayer.jl | 2 +- src/BlockMatrixTensor.jl | 54 ++++++++++++++++++++++++++++++---------- src/DualArray.jl | 17 ++++++------- 3 files changed, 49 insertions(+), 24 deletions(-) diff --git a/examples/convlayer.jl b/examples/convlayer.jl index 2ca3636..90ec72c 100644 --- a/examples/convlayer.jl +++ b/examples/convlayer.jl @@ -41,7 +41,7 @@ function model_loss(x, y, w) l1 = vec(DualMatrix(convlayer(x, ker))) println("Conv layer complete") l2 = dense_layer(weights, biases, l1, softmax) - println("Dense Layer complete") + println(l2.jacobian) target = OneElement(1, y+1, 10) loss = cross_entropy(l2, target) println("Loss complete") diff --git a/src/BlockMatrixTensor.jl b/src/BlockMatrixTensor.jl index 963584e..0e069ae 100644 --- a/src/BlockMatrixTensor.jl +++ b/src/BlockMatrixTensor.jl @@ -3,22 +3,36 @@ using BlockArrays, SparseArrays struct BlockMatrixTensor{T} <: AbstractArray{T, 4} - data::BlockMatrix{T} + data::BlockedMatrix{T} end +#Construct BlockMatrixTensor from 4-tensor. Useful for testing purposes. function BlockMatrixTensor(x::AbstractArray{T, 4}) where {T} n, m, s, t = size(x) - data = BlockArray(zeros(T, n * s, m * t), fill(s, n), fill(t, m)) + data = BlockedMatrix(zeros(T, n * s, m * t), fill(s, n), fill(t, m)) for i = 1:n, j = 1:m view(data, Block(i, j)) .= x[i, j, :, :] end BlockMatrixTensor(data) end +#Construct from matrix of blocks (similar to mortar()) +function BlockMatrixTensor(x::Matrix{T}) where {T <: AbstractMatrix} + n, m = size(x) + s, t = size(x[1, 1]) + mat = hcat((vcat(x[i,:]...) for i = 1:n)...) + blockmat = BlockedMatrix(mat, fill(s, n), fill(t, m)) + BlockMatrixTensor(blockmat) +end + size(x::BlockMatrixTensor) = (blocksize(x.data)..., blocksizes(x.data)[1,1]...) -getindex(x::BlockMatrixTensor, a::Int, b::Int, c, d) = sparse_getindex(x.data[Block(a, b)], c, d) -getindex(x::BlockMatrixTensor, a::UnitRange, b::UnitRange, ::Colon, ::Colon) = BlockMatrixTensor(x.data[Block.(a), Block.(b)]) +function getindex(x::BlockMatrixTensor, a::Int, b::Int, c, d) + s, t = size(x, 3), size(x, 4) + mat = parent(x.data) + sub = mat[((a - 1)* s + 1):(a * s),((b - 1)* t + 1):(b * t)] + sub[c, d] +end function show(io::IO,m::MIME"text/plain", x::BlockMatrixTensor) print("BlockMatrixTensor containing: \n") @@ -82,18 +96,18 @@ function broadcasted(f::Function, x::AbstractMatrix, y::BlockMatrixTensor{T}) wh BlockMatrixTensor(ret) end -function sum(x::BlockMatrixTensor{T}; dims = Colon()) where {T} +function sum(x::BlockMatrixTensor; dims = Colon()) if dims == 1:2 n, m = size(x, 3), size(x, 4) - ret = similar(x.data[Block(1, 1)]) * 0 + ret = similarzeros(x[1,1,:,:], n, m) for i = 1:size(x, 1), j = 1:size(x, 2) - ret += x.data[Block(i, j)] + ret += x[i,j,:,:] end ret elseif dims == 2 n, m, s, t = size(x) # Indexing [:, Block(j)] does not preserve density - ret = BlockArray(spzeros(n * s, t), fill(s, n), [t]) + ret = BlockedArray(similarzeros(x[1,1,:,:], n * s, t), fill(s, n), [t]) for i = 1:n, j = 1:m view(ret, Block(i, 1)) .+= x.data[Block(i, j)] end @@ -101,11 +115,25 @@ function sum(x::BlockMatrixTensor{T}; dims = Colon()) where {T} end end -function reshape(x::BlockMatrixTensor, dims::Vararg{Int64}) - #Blockwise reshape - if dims[3] == size(x, 3) && dims[4] == size(x, 4) - mortar(reshape(blocks(x.data), dims[1], dims[2])) +#Helper function for initialising a BlockedMatrix to be populated while preserving sparsity. +similarzeros(x::Matrix, dims...) = zeros(eltype(x), dims...) +similarzeros(x::AbstractMatrix, dims...) = spzeros(eltype(x), dims...) + +function reshape(x::BlockMatrixTensor, dims::Vararg{Union{Colon, Int}, 4}) + n, m, s, t = size(x) + #Block-wise reshape + if dims[3] isa Colon && dims[4] isa Colon + ret = BlockedMatrix(similarzeros(x[1, 1, :, :], s * dims[1], t * dims[2]), fill(s, dims[1]), fill(t, dims[2])) + for j = 1:m, i = 1:n + c = (j - 1) * n + (i - 1) + a, b = c % dims[1] + 1, div(c, dims[1]) + 1 + ret[Block(a,b)] = x.data[Block(i,j)] + end + BlockMatrixTensor(ret) end end -vec(x::BlockMatrixTensor) = vcat(blocks(x.data)...) +#'Flatten': converts 4-Tensor to matrix. For BlockMatrixTensors calls parent(). +# Mimics the reshape achieved by this for general 4-Tensors +flatten(x::BlockMatrixTensor) = parent(x.data) +flatten(x::AbstractArray{T, 4}) where {T} = reshape(x, size(x, 1) * size(x, 3), size(x, 2) * size(x, 4)) diff --git a/src/DualArray.jl b/src/DualArray.jl index 9396b8a..ccec2c3 100644 --- a/src/DualArray.jl +++ b/src/DualArray.jl @@ -90,9 +90,8 @@ DualMatrix(x::AbstractMatrix, j::AbstractArray{T,4}) where {T} = DualArray(x, j) function DualMatrix(x::AbstractMatrix) val = [y.value for y in x] - n, m = size(x) - jac = mortar([x[i,j].partials for i = 1:n, j = 1:m]) - DualMatrix(val, BlockMatrixTensor(jac)) + jac_blocks = [y.partials for y in x] + DualMatrix(val, BlockMatrixTensor(jac_blocks)) end function getindex(x::DualVector, y::Int) @@ -149,7 +148,7 @@ end # perturbations are reshapes of each other. function *(x::DualMatrix, y::DualVector) val = x.value * y.value - jac = x.value * y.jacobian + vcat(blocks(sum(x.jacobian .* y.value'; dims = 2).data)...) + jac = x.value * y.jacobian + flatten(sum(x.jacobian .* y.value'; dims = 2)) DualVector(val, jac) end @@ -256,14 +255,12 @@ end # reshape does not preserve shape of perturbations (since jacobian can have # any number of columns) - -#Should a PR be made into reshape() _tomatrix(v::AbstractVector) = reshape(v, :, 1) -function reshape(x::DualVector,dims::Vararg{Int}) +function reshape(x::DualVector,dims::Vararg{Int, 2}) val = reshape(x.value, dims...) - jac_blockmat = [sparse_transpose(sparse_getindex(x.jacobian, i, :)) for i = 1:size(x.jacobian, 1)] - jac = BlockMatrixTensor(mortar(reshape(jac_blockmat, dims...))) + blocked_jac = BlockedMatrix(x.jacobian, fill(1, length(x)), [size(x.jacobian, 2)]) + jac = reshape(BlockMatrixTensor(blocked_jac), dims..., :, :) DualMatrix(val, jac) end @@ -272,7 +269,7 @@ _blockvec(x::BlockMatrixTensor) = vcat(blocks(x.data)...) function vec(x::DualArray) val = vec(x.value) - jac = _blockvec(x.jacobian) + jac = parent(reshape(x.jacobian, length(val), 1, :, :).data) DualVector(val, jac) end From 9e707dd8809b913c7edf4d462dd909c100a65541 Mon Sep 17 00:00:00 2001 From: maximvassiliev Date: Mon, 30 Sep 2024 11:05:34 +0100 Subject: [PATCH 08/10] unit tests, and back to BlockMatrix --- examples/convlayer.jl | 4 +- src/BlockMatrixTensor.jl | 124 ++++++++++----------------------------- src/DualArray.jl | 4 +- src/DualArrays.jl | 2 +- test/runtests.jl | 27 +++++++-- 5 files changed, 56 insertions(+), 105 deletions(-) diff --git a/examples/convlayer.jl b/examples/convlayer.jl index 90ec72c..390cc75 100644 --- a/examples/convlayer.jl +++ b/examples/convlayer.jl @@ -41,15 +41,13 @@ function model_loss(x, y, w) l1 = vec(DualMatrix(convlayer(x, ker))) println("Conv layer complete") l2 = dense_layer(weights, biases, l1, softmax) - println(l2.jacobian) + println("Dense Layer Complete") target = OneElement(1, y+1, 10) loss = cross_entropy(l2, target) println("Loss complete") loss.value, loss.partials end -#TODO: Implement remaining layers upon more support for DualMatrix - function train_model() p = rand(6779) epochs = 1000 diff --git a/src/BlockMatrixTensor.jl b/src/BlockMatrixTensor.jl index 0e069ae..27aad6a 100644 --- a/src/BlockMatrixTensor.jl +++ b/src/BlockMatrixTensor.jl @@ -3,35 +3,34 @@ using BlockArrays, SparseArrays struct BlockMatrixTensor{T} <: AbstractArray{T, 4} - data::BlockedMatrix{T} + data::BlockMatrix{T} end #Construct BlockMatrixTensor from 4-tensor. Useful for testing purposes. function BlockMatrixTensor(x::AbstractArray{T, 4}) where {T} n, m, s, t = size(x) - data = BlockedMatrix(zeros(T, n * s, m * t), fill(s, n), fill(t, m)) + data = BlockMatrix(zeros(T, n * s, m * t), fill(s, n), fill(t, m)) for i = 1:n, j = 1:m view(data, Block(i, j)) .= x[i, j, :, :] end BlockMatrixTensor(data) end -#Construct from matrix of blocks (similar to mortar()) -function BlockMatrixTensor(x::Matrix{T}) where {T <: AbstractMatrix} - n, m = size(x) - s, t = size(x[1, 1]) - mat = hcat((vcat(x[i,:]...) for i = 1:n)...) - blockmat = BlockedMatrix(mat, fill(s, n), fill(t, m)) - BlockMatrixTensor(blockmat) -end +# Useful for creating a BlockMatrixTensor from blocks() +BlockMatrixTensor(x::Matrix{T}) where {T <: AbstractMatrix} = BlockMatrixTensor(mortar(x)) size(x::BlockMatrixTensor) = (blocksize(x.data)..., blocksizes(x.data)[1,1]...) -function getindex(x::BlockMatrixTensor, a::Int, b::Int, c, d) - s, t = size(x, 3), size(x, 4) - mat = parent(x.data) - sub = mat[((a - 1)* s + 1):(a * s),((b - 1)* t + 1):(b * t)] - sub[c, d] +#Indexing entire blocks +getindex(x::BlockMatrixTensor, a::Int, b::Int, ::Colon, ::Colon) = blocks(x.data)[a, b] +getindex(x::BlockMatrixTensor, a::Int, b, ::Colon, ::Colon) = BlockMatrixTensor(reshape(blocks(x.data)[a, b], 1, :)) +getindex(x::BlockMatrixTensor, a, b::Int, ::Colon, ::Colon) = BlockMatrixTensor(reshape(blocks(x.data)[a, b], :, 1)) +getindex(x::BlockMatrixTensor, a, b, ::Colon, ::Colon) = BlockMatrixTensor(blocks(x.data)[a,b]) + + +# For populating a BlockMatrixTensor +function setindex!(A::BlockMatrixTensor, value, a::Int, b::Int, ::Colon, ::Colon) + blocks(A.data)[a, b] = value end function show(io::IO,m::MIME"text/plain", x::BlockMatrixTensor) @@ -40,100 +39,37 @@ function show(io::IO,m::MIME"text/plain", x::BlockMatrixTensor) end show(io::IO, x::BlockMatrixTensor) = show(io, x.data) -# Blockwise broadcast for op in (:*, :/) @eval $op(x::BlockMatrixTensor, y::Number) = BlockMatrixTensor($op(x.data, y)) @eval $op(x::Number, y::BlockMatrixTensor) = BlockMatrixTensor($op(x, y.data)) end -function broadcasted(f::Function, x::BlockMatrixTensor{T}, y::AbstractMatrix) where {T} - ret = copy(x.data) - n, m = blocksize(x.data) - if blocksize(x.data) == size(y) - for i = 1:n, j = 1:m - view(ret, Block(i, j)) .= f.(y[i, j], x.data[Block(i, j)]) - end - elseif size(y) == (1, 1) - f.(y[1, 1], ret) - elseif size(y) == (1, m) - for j = 1:m - view(ret, :, Block(j)) .= f.(y[1, j], x.data[:, Block(j)]) - end - elseif size(y) == (n, 1) - for i = 1:n - view(ret, Block(i), :) .= f.(y[i, 1], x.data[Block(i), :]) - end - else - a = size(x) - b = size(y) - throw(DimensionMismatch("Invalid dimensions for broadcasting. Got $a, $b.")) - end - BlockMatrixTensor(ret) -end - -function broadcasted(f::Function, x::AbstractMatrix, y::BlockMatrixTensor{T}) where {T} - ret = copy(y.data) - n, m = blocksize(y.data) - if blocksize(y.data) == size(x) - for i = 1:n, j = 1:m - view(ret, Block(i, j)) .= f.(x[i, j], y.data[Block(i, j)]) - end - elseif size(x) == (1, 1) - f.(x[1, 1], ret) - elseif size(x) == (1, m) - for j = 1:m - view(ret, :, Block(j)) .= f.(x[1, j], y.data[:, Block(j)]) - end - elseif size(x) == (n, 1) - for i = 1:n - view(ret, Block(i), :) .= f.(x[i, 1], y.data[Block(i), :]) - end - else - a = size(y) - b = size(x) - throw(DimensionMismatch("Invalid dimensions for broadcasting. Got $a, $b.")) - end - BlockMatrixTensor(ret) -end +#Block-wise broadcast +broadcasted(f::Function, x::BlockMatrixTensor, y::AbstractMatrix) = BlockMatrixTensor(f.(blocks(x.data), y)) +broadcasted(f::Function, x::BlockMatrixTensor, y::AbstractVector) = BlockMatrixTensor(f.(x, reshape(y, :, 1))) +broadcasted(f::Function, x::AbstractVecOrMat, y::BlockMatrixTensor) = f.(y, x) function sum(x::BlockMatrixTensor; dims = Colon()) + # Blockwise sum if dims == 1:2 - n, m = size(x, 3), size(x, 4) - ret = similarzeros(x[1,1,:,:], n, m) - for i = 1:size(x, 1), j = 1:size(x, 2) - ret += x[i,j,:,:] - end - ret - elseif dims == 2 - n, m, s, t = size(x) - # Indexing [:, Block(j)] does not preserve density - ret = BlockedArray(similarzeros(x[1,1,:,:], n * s, t), fill(s, n), [t]) - for i = 1:n, j = 1:m - view(ret, Block(i, 1)) .+= x.data[Block(i, j)] - end - BlockMatrixTensor(ret) + sum(blocks(x.data)) + elseif dims == 1 || dims == 2 + BlockMatrixTensor(sum(blocks(x.data); dims)) + else + # For now, treat all other cases as if summing the 4-Tensor + sum(Array(x); dims = dims) end end -#Helper function for initialising a BlockedMatrix to be populated while preserving sparsity. -similarzeros(x::Matrix, dims...) = zeros(eltype(x), dims...) -similarzeros(x::AbstractMatrix, dims...) = spzeros(eltype(x), dims...) - function reshape(x::BlockMatrixTensor, dims::Vararg{Union{Colon, Int}, 4}) - n, m, s, t = size(x) - #Block-wise reshape + #Reshape block-wise + #TODO: Implement non-blockwise if dims[3] isa Colon && dims[4] isa Colon - ret = BlockedMatrix(similarzeros(x[1, 1, :, :], s * dims[1], t * dims[2]), fill(s, dims[1]), fill(t, dims[2])) - for j = 1:m, i = 1:n - c = (j - 1) * n + (i - 1) - a, b = c % dims[1] + 1, div(c, dims[1]) + 1 - ret[Block(a,b)] = x.data[Block(i,j)] - end - BlockMatrixTensor(ret) + BlockMatrixTensor(reshape(blocks(x.data), dims[1], dims[2])) end end -#'Flatten': converts 4-Tensor to matrix. For BlockMatrixTensors calls parent(). +#'Flatten': converts BlockMatrixTensor to Matrix by removing block structure # Mimics the reshape achieved by this for general 4-Tensors -flatten(x::BlockMatrixTensor) = parent(x.data) +flatten(x::BlockMatrixTensor) = hcat((vcat((x[i, j, :, :] for i = 1:size(x, 1))...) for j = 1:size(x, 2))...) flatten(x::AbstractArray{T, 4}) where {T} = reshape(x, size(x, 1) * size(x, 3), size(x, 2) * size(x, 4)) diff --git a/src/DualArray.jl b/src/DualArray.jl index ccec2c3..32faf3e 100644 --- a/src/DualArray.jl +++ b/src/DualArray.jl @@ -259,7 +259,7 @@ _tomatrix(v::AbstractVector) = reshape(v, :, 1) function reshape(x::DualVector,dims::Vararg{Int, 2}) val = reshape(x.value, dims...) - blocked_jac = BlockedMatrix(x.jacobian, fill(1, length(x)), [size(x.jacobian, 2)]) + blocked_jac = BlockMatrix(x.jacobian, fill(1, length(x)), [size(x.jacobian, 2)]) jac = reshape(BlockMatrixTensor(blocked_jac), dims..., :, :) DualMatrix(val, jac) end @@ -269,7 +269,7 @@ _blockvec(x::BlockMatrixTensor) = vcat(blocks(x.data)...) function vec(x::DualArray) val = vec(x.value) - jac = parent(reshape(x.jacobian, length(val), 1, :, :).data) + jac = flatten(reshape(x.jacobian, length(val), 1, :, :)) DualVector(val, jac) end diff --git a/src/DualArrays.jl b/src/DualArrays.jl index 830ba8f..bedc76f 100644 --- a/src/DualArrays.jl +++ b/src/DualArrays.jl @@ -5,7 +5,7 @@ export dropzeros import Base: +, ==, getindex, size, broadcast, axes, broadcasted, show, sum, vcat, convert, *, -, ^, /, ndims, hcat, vec, promote_rule, zero, - reshape + reshape, setindex! using LinearAlgebra, ArrayLayouts, BandedMatrices, FillArrays, ComponentArrays, SparseArrays import ChainRules: frule, ZeroTangent diff --git a/test/runtests.jl b/test/runtests.jl index 8671332..2374b14 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -91,7 +91,7 @@ using Lux: relu z = x[5:6] @test (y*z).value == [2,2] @test (y*z).jacobian == [1 0 1 0 1 1; 0 1 0 1 1 1] - for t in (([1, 0, 0, 1, 0, 0], [1 0 -1 0 1 0;0 0 0 0 0 0]), ([1, 1, 1, 1, 1, 1], [1 0 -1 0 1 0;0 1 0 -1 0 1]), ([0, 1, 1, 0, -1, -1], zeros(2,6))) + for t in (([1, 0, 0, 1, 0, 0], [1 0 -1 0 1 0;0 0 0 0 0 0]), ([1, 1, 1, 1, 1, 1], [1 0 -1 0 1 0;0 1 0 -1 0 1]), ([0, 1, 1, 0, -1, -1], zeros(2, 6))) x = DualVector(t[1], I(6)) y = reshape(x[1:4], 2, 2) z = x[5:6] @@ -105,7 +105,7 @@ using Lux: relu y = BlockMatrixTensor(ones(2, 2, 2, 2)) @test x isa BlockMatrixTensor @test y isa BlockMatrixTensor - @test x == y + @test x.data == y.data end @testset "broadcasting" begin y = BlockMatrixTensor(ones(2, 2, 2, 2)) @@ -115,10 +115,27 @@ using Lux: relu [2, 2] ) @test ([1 2;3 4] .* y).data == z - @test reshape([3], 1, 1) .* y == 3 * y - @test ([1 2] .* y).data == BlockArray([fill(1,4,2) fill(2,4,2)], [2, 2], [2, 2]) - @test ([1 2]' .* y).data == BlockArray([fill(1,2,4); fill(2,2,4)], [2, 2], [2, 2]) + @test (reshape([3], 1, 1) .* y).data == (3 * y).data + @test ([1 2] .* y).data == BlockArray([fill(1, 4, 2) fill(2, 4, 2)], [2, 2], [2, 2]) + @test ([1 2]' .* y).data == BlockArray([fill(1, 2, 4); fill(2, 2, 4)], [2, 2], [2, 2]) + @test (y .* [1, 2]).data == BlockArray([fill(1, 2, 4); fill(2, 2, 4)], [2, 2], [2, 2]) @test_throws DimensionMismatch [1 2;3 4;5 6] .* y end + @testset "sum" begin + r = rand(2, 2, 2, 2) + b = BlockMatrixTensor(r) + #TODO: Fix traversal + for d in (1:2, 1, 2) + @test sum(r; dims = 1:2)[1, 1, :, :] ≈ sum(b; dims = 1:2) + end + end + + @testset "reshape" begin + r= rand(3, 4, 2, 2) + b = BlockMatrixTensor(r) + for d in ((2, 6), (1, 12), (6, 2), (12, 1)) + @test BlockMatrixTensor(reshape(r, d..., 2, 2)).data == reshape(b, d..., :, :).data + end + end end end \ No newline at end of file From 02c65e63db2eadca4b6746a5772aac693e402ca2 Mon Sep 17 00:00:00 2001 From: maximvassiliev Date: Mon, 30 Sep 2024 11:38:33 +0100 Subject: [PATCH 09/10] fix getindex to allow accessing 4-tensor by Array() --- src/BlockMatrixTensor.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/BlockMatrixTensor.jl b/src/BlockMatrixTensor.jl index 27aad6a..73623bd 100644 --- a/src/BlockMatrixTensor.jl +++ b/src/BlockMatrixTensor.jl @@ -22,7 +22,7 @@ BlockMatrixTensor(x::Matrix{T}) where {T <: AbstractMatrix} = BlockMatrixTensor( size(x::BlockMatrixTensor) = (blocksize(x.data)..., blocksizes(x.data)[1,1]...) #Indexing entire blocks -getindex(x::BlockMatrixTensor, a::Int, b::Int, ::Colon, ::Colon) = blocks(x.data)[a, b] +getindex(x::BlockMatrixTensor, a::Int, b::Int, c, d) = blocks(x.data)[a, b][c, d] getindex(x::BlockMatrixTensor, a::Int, b, ::Colon, ::Colon) = BlockMatrixTensor(reshape(blocks(x.data)[a, b], 1, :)) getindex(x::BlockMatrixTensor, a, b::Int, ::Colon, ::Colon) = BlockMatrixTensor(reshape(blocks(x.data)[a, b], :, 1)) getindex(x::BlockMatrixTensor, a, b, ::Colon, ::Colon) = BlockMatrixTensor(blocks(x.data)[a,b]) From ea54133c81f9e407fe14ea3483181f5a0a56c1a9 Mon Sep 17 00:00:00 2001 From: maximvassiliev Date: Mon, 30 Sep 2024 13:28:42 +0100 Subject: [PATCH 10/10] fix ambiguity --- src/BlockMatrixTensor.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/BlockMatrixTensor.jl b/src/BlockMatrixTensor.jl index 73623bd..3801337 100644 --- a/src/BlockMatrixTensor.jl +++ b/src/BlockMatrixTensor.jl @@ -23,6 +23,7 @@ size(x::BlockMatrixTensor) = (blocksize(x.data)..., blocksizes(x.data)[1,1]...) #Indexing entire blocks getindex(x::BlockMatrixTensor, a::Int, b::Int, c, d) = blocks(x.data)[a, b][c, d] +getindex(x::BlockMatrixTensor, a::Int, b::Int, ::Colon, ::Colon) = blocks(x.data)[a, b] getindex(x::BlockMatrixTensor, a::Int, b, ::Colon, ::Colon) = BlockMatrixTensor(reshape(blocks(x.data)[a, b], 1, :)) getindex(x::BlockMatrixTensor, a, b::Int, ::Colon, ::Colon) = BlockMatrixTensor(reshape(blocks(x.data)[a, b], :, 1)) getindex(x::BlockMatrixTensor, a, b, ::Colon, ::Colon) = BlockMatrixTensor(blocks(x.data)[a,b])