From 8469a9f60b20acf2f20f8b11287beec53d99d4a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Legat?= Date: Fri, 8 May 2026 14:03:30 +0200 Subject: [PATCH 1/9] Fix size inference for * --- src/sizes.jl | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/sizes.jl b/src/sizes.jl index 7fc3c90..7db11b0 100644 --- a/src/sizes.jl +++ b/src/sizes.jl @@ -309,8 +309,17 @@ function _infer_sizes( return !iszero(sizes.ndims[children_arr[i]]) end if !isnothing(first_matrix) - if sizes.ndims[children_arr[first(children_indices)]] == 0 - _add_size!(sizes, k, (1, 1)) + first_is_scalar = + sizes.ndims[children_arr[first(children_indices)]] == 0 + last_is_scalar = + sizes.ndims[children_arr[last(children_indices)]] == 0 + if first_is_scalar || last_is_scalar + # `scalar * matrix` (or `matrix * scalar`) is + # element-wise scaling, not matmul: result inherits + # the matrix's shape. + ix_mat = + children_arr[children_indices[first_matrix]] + _copy_size!(sizes, k, ix_mat) continue else _add_size!( From 06b437a32b89f2695980a38d64fd8c321cefda03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Legat?= Date: Fri, 8 May 2026 23:58:14 +0200 Subject: [PATCH 2/9] Add test --- test/JuMP.jl | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/test/JuMP.jl b/test/JuMP.jl index 56219fc..590a015 100644 --- a/test/JuMP.jl +++ b/test/JuMP.jl @@ -345,6 +345,64 @@ function test_moi_function() return end +# Build the non-broadcasted `:*` size-inference cases the HEAD commit fixed. +# JuMP's surface syntax always lowers `c * W` to a broadcasted node, so to +# exercise the non-broadcasted code path we build the `MatrixExpr` directly +# (same pattern `_test_neural` uses for `wrap`). Before the fix, scalar-first +# returned `(1, 1)` and scalar-last produced an out-of-range `_size` read; the +# fix copies the matrix child's full shape in both orderings. +# +# The runtime forward/reverse pass for non-broadcasted scalar*matrix isn't +# wired up yet, so this test only asserts the inferred shape — that's exactly +# what the commit changed. +function test_size_inference_scalar_times_matrix() + mode = ArrayDiff.Mode() + ME = ArrayDiff.GenericMatrixExpr{VariableRef} + @testset "$(rows)x$(cols)" for (rows, cols) in [(2, 3), (3, 2), (2, 2)] + model = Model() + @variable( + model, + W[1:rows, 1:cols], + container = ArrayDiff.ArrayOfVariables, + ) + @testset "$(name)" for (name, expr) in [ + ("scalar * M", ME(:*, Any[2.5, W], (rows, cols), false)), + ("M * scalar", ME(:*, Any[W, 2.5], (rows, cols), false)), + ] + ad = ArrayDiff.model(mode) + MOI.Nonlinear.set_objective( + ad, + JuMP.moi_function(LinearAlgebra.norm(expr)), + ) + evaluator = MOI.Nonlinear.Evaluator( + ad, + mode, + JuMP.index.(JuMP.all_variables(model)), + ) + MOI.initialize(evaluator, [:Grad]) + sizes = evaluator.backend.objective.expr.sizes + # Tape: norm (k=1, scalar), * (k=2, matrix), then the scalar leaf + # and the matrix leaf in some order. The * node must inherit the + # (rows, cols) shape from the matrix child. + @test sizes.ndims[1] == 0 + @test sizes.ndims[2] == 2 + mul_off = sizes.size_offset[2] + @test sizes.size[mul_off+1] == rows + @test sizes.size[mul_off+2] == cols + # Storage for the * node should be `rows * cols`, not `1` (which + # is what the old `(1, 1)` stub produced). + @test sizes.storage_offset[3] - sizes.storage_offset[2] == + rows * cols + # Exactly one of the two children is the scalar leaf. + @test sort(sizes.ndims[3:4]) == [0, 2] + # Two ndims=2 nodes (the * and the matrix leaf) each contribute + # a (rows, cols) entry to the flat size vector. + @test sort(sizes.size) == sort([rows, cols, rows, cols]) + end + end + return +end + end # module TestJuMP.runtests() From 26e78712ea23b5b4a8d40a1451d65523022fbafb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Legat?= Date: Mon, 11 May 2026 08:55:25 +0200 Subject: [PATCH 3/9] Fix format --- src/sizes.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/sizes.jl b/src/sizes.jl index 7db11b0..3ddf23f 100644 --- a/src/sizes.jl +++ b/src/sizes.jl @@ -317,8 +317,7 @@ function _infer_sizes( # `scalar * matrix` (or `matrix * scalar`) is # element-wise scaling, not matmul: result inherits # the matrix's shape. - ix_mat = - children_arr[children_indices[first_matrix]] + ix_mat = children_arr[children_indices[first_matrix]] _copy_size!(sizes, k, ix_mat) continue else From 5d77f7a37c7158c3424b9513458f29d70864e22f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Legat?= Date: Thu, 14 May 2026 17:26:47 +0200 Subject: [PATCH 4/9] Simplify --- src/JuMP/operators.jl | 3 +++ src/JuMP/variables.jl | 7 +++++- src/sizes.jl | 53 ++++++++++++++----------------------------- test/JuMP.jl | 41 ++++++++++++++++++++++++++++++++- 4 files changed, 66 insertions(+), 38 deletions(-) diff --git a/src/JuMP/operators.jl b/src/JuMP/operators.jl index 998d9d8..73c3d79 100644 --- a/src/JuMP/operators.jl +++ b/src/JuMP/operators.jl @@ -2,6 +2,9 @@ function _matmul(::Type{V}, A, B) where {V} return GenericMatrixExpr{V}(:*, Any[A, B], (size(A, 1), size(B, 2)), false) end +#function Base.:(*)(A::AbstractJuMPVector, B::) +# return _matmul(JuMP.variable_ref_type(A), A, B) +#end function Base.:(*)(A::AbstractJuMPMatrix, B::Matrix) return _matmul(JuMP.variable_ref_type(A), A, B) end diff --git a/src/JuMP/variables.jl b/src/JuMP/variables.jl index 1c5a59a..d2019b5 100644 --- a/src/JuMP/variables.jl +++ b/src/JuMP/variables.jl @@ -3,7 +3,7 @@ struct ArrayOfVariables{T,N} <: AbstractJuMPArray{JuMP.GenericVariableRef{T},N} model::JuMP.GenericModel{T} offset::Int64 - size::NTuple{N,Int64} + size::Dims{N} end const MatrixOfVariables{T} = ArrayOfVariables{T,2} @@ -15,6 +15,11 @@ function Base.getindex(A::ArrayOfVariables{T}, I...) where {T} return JuMP.GenericVariableRef{T}(A.model, MOI.VariableIndex(index)) end +function Base.reshape(array::ArrayOfVariables, size::Dims) + @assert prod(array.size) == prod(size) + return ArrayOfVariables(array.model, array.offset, size) +end + function JuMP.variable_ref_type(::Type{ArrayOfVariables{T,N}}) where {T,N} return JuMP.variable_ref_type(JuMP.GenericModel{T}) end diff --git a/src/sizes.jl b/src/sizes.jl index 3ddf23f..8d75477 100644 --- a/src/sizes.jl +++ b/src/sizes.jl @@ -304,42 +304,23 @@ function _infer_sizes( end _add_size!(sizes, k, tuple(shape...)) elseif op == :* - # TODO assert compatible sizes and all ndims should be 0 or 2 - first_matrix = findfirst(children_indices) do i - return !iszero(sizes.ndims[children_arr[i]]) - end - if !isnothing(first_matrix) - first_is_scalar = - sizes.ndims[children_arr[first(children_indices)]] == 0 - last_is_scalar = - sizes.ndims[children_arr[last(children_indices)]] == 0 - if first_is_scalar || last_is_scalar - # `scalar * matrix` (or `matrix * scalar`) is - # element-wise scaling, not matmul: result inherits - # the matrix's shape. - ix_mat = children_arr[children_indices[first_matrix]] - _copy_size!(sizes, k, ix_mat) - continue - else - _add_size!( - sizes, - k, - ( - _size( - sizes, - children_arr[first(children_indices)], - 1, - ), - _size( - sizes, - children_arr[last(children_indices)], - sizes.ndims[children_arr[last( - children_indices, - )],], - ), - ), - ) - continue + sizes.ndims[k] = 0 + sizes.size_offset[k] = length(sizes.size) + for child in children_indices + id = children_arr[child] + ndims = sizes.ndims[id] + if !iszero(ndims) + sz = _size(sizes, id) + if iszero(sizes.ndims[k]) + append!(sizes.size, sz) + sizes.ndims[k] = ndims + else + @assert sizes.ndims[k] > 1 + @assert sz[1] == sizes.size[end] + pop!(sizes.size) + append!(sizes.size, @view(sz[2:end])) + sizes.ndims[k] += ndims - 2 + end end end elseif op == :^ || op == :/ diff --git a/test/JuMP.jl b/test/JuMP.jl index 590a015..4c1b13b 100644 --- a/test/JuMP.jl +++ b/test/JuMP.jl @@ -8,7 +8,7 @@ import LinearAlgebra import MathOptInterface as MOI function runtests() - for name in names(@__MODULE__; all = true) + for name in names(@__MODULE__; all=true) if startswith("$(name)", "test_") @testset "$(name)" begin getfield(@__MODULE__, name)() @@ -403,6 +403,45 @@ function test_size_inference_scalar_times_matrix() return end +function test_size_vec_vect() + mode = ArrayDiff.Mode() + ME = ArrayDiff.GenericMatrixExpr{VariableRef} + @testset "$(rows)x$(cols)" for (rows, cols) in [(2, 3), (3, 2), (2, 2)] + model = Model() + @variable( + model, + a[1:rows], + container = ArrayDiff.ArrayOfVariables, + ) + b = ones(cols) + ad = ArrayDiff.model(mode) + # a * b' is redirected to broadcast(*, a, b') but we want to test product here + # this calls reshape(a, length(a), 1) + expr = a * Matrix(b') + MOI.Nonlinear.set_objective( + ad, + JuMP.moi_function(sum(expr)), + ) + evaluator = MOI.Nonlinear.Evaluator( + ad, + mode, + JuMP.index.(JuMP.all_variables(model)), + ) + MOI.initialize(evaluator, [:Grad]) + sizes = evaluator.backend.objective.expr.sizes + # Tape: norm (k=1, scalar), * (k=2, matrix), then the scalar leaf + # and the matrix leaf in some order. The * node must inherit the + # (rows, cols) shape from the matrix child. + @test sizes.ndims[1] == 0 + @test sizes.ndims[2] == 2 + mul_off = sizes.size_offset[2] + @test sizes.size[mul_off+1] == rows + @test sizes.size[mul_off+2] == cols + end + return +end + + end # module TestJuMP.runtests() From 5aa437aeed0af9e476a8387a9063fb622573c27e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Legat?= Date: Thu, 14 May 2026 17:26:58 +0200 Subject: [PATCH 5/9] Fix format --- test/JuMP.jl | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/test/JuMP.jl b/test/JuMP.jl index 4c1b13b..b00fc3c 100644 --- a/test/JuMP.jl +++ b/test/JuMP.jl @@ -8,7 +8,7 @@ import LinearAlgebra import MathOptInterface as MOI function runtests() - for name in names(@__MODULE__; all=true) + for name in names(@__MODULE__; all = true) if startswith("$(name)", "test_") @testset "$(name)" begin getfield(@__MODULE__, name)() @@ -408,20 +408,13 @@ function test_size_vec_vect() ME = ArrayDiff.GenericMatrixExpr{VariableRef} @testset "$(rows)x$(cols)" for (rows, cols) in [(2, 3), (3, 2), (2, 2)] model = Model() - @variable( - model, - a[1:rows], - container = ArrayDiff.ArrayOfVariables, - ) + @variable(model, a[1:rows], container = ArrayDiff.ArrayOfVariables,) b = ones(cols) ad = ArrayDiff.model(mode) # a * b' is redirected to broadcast(*, a, b') but we want to test product here # this calls reshape(a, length(a), 1) expr = a * Matrix(b') - MOI.Nonlinear.set_objective( - ad, - JuMP.moi_function(sum(expr)), - ) + MOI.Nonlinear.set_objective(ad, JuMP.moi_function(sum(expr))) evaluator = MOI.Nonlinear.Evaluator( ad, mode, @@ -441,7 +434,6 @@ function test_size_vec_vect() return end - end # module TestJuMP.runtests() From 52873773e98b96f35316d40b684075cc5fe219ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Legat?= Date: Thu, 14 May 2026 17:27:49 +0200 Subject: [PATCH 6/9] Fix --- src/JuMP/operators.jl | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/JuMP/operators.jl b/src/JuMP/operators.jl index 73c3d79..998d9d8 100644 --- a/src/JuMP/operators.jl +++ b/src/JuMP/operators.jl @@ -2,9 +2,6 @@ function _matmul(::Type{V}, A, B) where {V} return GenericMatrixExpr{V}(:*, Any[A, B], (size(A, 1), size(B, 2)), false) end -#function Base.:(*)(A::AbstractJuMPVector, B::) -# return _matmul(JuMP.variable_ref_type(A), A, B) -#end function Base.:(*)(A::AbstractJuMPMatrix, B::Matrix) return _matmul(JuMP.variable_ref_type(A), A, B) end From 77185ffc3f9348dce5d10fa497ca220a751ea301 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Legat?= Date: Thu, 14 May 2026 17:29:31 +0200 Subject: [PATCH 7/9] rm useless comment --- test/JuMP.jl | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/test/JuMP.jl b/test/JuMP.jl index b00fc3c..9901621 100644 --- a/test/JuMP.jl +++ b/test/JuMP.jl @@ -348,13 +348,7 @@ end # Build the non-broadcasted `:*` size-inference cases the HEAD commit fixed. # JuMP's surface syntax always lowers `c * W` to a broadcasted node, so to # exercise the non-broadcasted code path we build the `MatrixExpr` directly -# (same pattern `_test_neural` uses for `wrap`). Before the fix, scalar-first -# returned `(1, 1)` and scalar-last produced an out-of-range `_size` read; the -# fix copies the matrix child's full shape in both orderings. -# -# The runtime forward/reverse pass for non-broadcasted scalar*matrix isn't -# wired up yet, so this test only asserts the inferred shape — that's exactly -# what the commit changed. +# (same pattern `_test_neural` uses for `wrap`). function test_size_inference_scalar_times_matrix() mode = ArrayDiff.Mode() ME = ArrayDiff.GenericMatrixExpr{VariableRef} From 0691c45ad237c88139cb3ddec97fef9551bc9424 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Legat?= Date: Thu, 14 May 2026 17:39:48 +0200 Subject: [PATCH 8/9] Fix --- src/sizes.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sizes.jl b/src/sizes.jl index 8d75477..07c5490 100644 --- a/src/sizes.jl +++ b/src/sizes.jl @@ -305,7 +305,6 @@ function _infer_sizes( _add_size!(sizes, k, tuple(shape...)) elseif op == :* sizes.ndims[k] = 0 - sizes.size_offset[k] = length(sizes.size) for child in children_indices id = children_arr[child] ndims = sizes.ndims[id] @@ -314,6 +313,7 @@ function _infer_sizes( if iszero(sizes.ndims[k]) append!(sizes.size, sz) sizes.ndims[k] = ndims + sizes.size_offset[k] = length(sizes.size) else @assert sizes.ndims[k] > 1 @assert sz[1] == sizes.size[end] From 1aa7e600db83a94d8f70f033bf45ab2829e37f67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Legat?= Date: Thu, 14 May 2026 17:46:53 +0200 Subject: [PATCH 9/9] fix --- src/sizes.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sizes.jl b/src/sizes.jl index 07c5490..baf8f7e 100644 --- a/src/sizes.jl +++ b/src/sizes.jl @@ -311,9 +311,9 @@ function _infer_sizes( if !iszero(ndims) sz = _size(sizes, id) if iszero(sizes.ndims[k]) + sizes.size_offset[k] = length(sizes.size) append!(sizes.size, sz) sizes.ndims[k] = ndims - sizes.size_offset[k] = length(sizes.size) else @assert sizes.ndims[k] > 1 @assert sz[1] == sizes.size[end]