One type of question I am dealing with is that I will declare a vector of variables, let's all it x, and I map it to another vector of variabls y=Ax+b, A and b are both constant matrix/vector. Let's assume A is sparse. I will then organize y into a matrix, and required the resulting matrix to be PSD (or hermitian PSD). Typically the size of the matrix is around 400 by 400. It seems that when I use the mosek in JuMP, the memory blows up for this type of problem. Below is some minimal working example to demonstrate this problem, calling this function will result in Mosek error 1051, which I assume is out of memory:
using LinearAlgebra
using JuMP
using MosekTools
using Random
function minimal_psd_test(;
nvar::Int = 4000,
nmat::Int = 400,
entries_per_row::Int = 8,
seed::Int = 1234,
)
Random.seed!(seed)
ntri = nmat * (nmat + 1) ÷ 2
rows = Int[]
cols = Int[]
vals = Float64[]
for r in 1:ntri
chosen = rand(1:nvar, entries_per_row)
for c in chosen
push!(rows, r)
push!(cols, c)
push!(vals, randn())
end
end
A = sparse(rows, cols, vals, ntri, nvar)
b = 0.01 .* randn(ntri)
c = randn(nvar)
I, J, V = findnz(A)
row_terms = [Vector{Tuple{Int,Float64}}() for _ in 1:ntri]
for t in eachindex(I)
push!(row_terms[I[t]], (J[t], V[t]))
end
# IMPORTANT: not direct_model
model = Model(Mosek.Optimizer)
set_attribute(model, "MSK_DPAR_INTPNT_CO_TOL_PFEAS", 1e-10)
set_attribute(model, "MSK_DPAR_INTPNT_CO_TOL_DFEAS", 1e-10)
set_attribute(model, "MSK_DPAR_INTPNT_CO_TOL_REL_GAP", 1e-10)
set_attribute(model, "MSK_IPAR_LOG", 10)
set_attribute(model, "MSK_IPAR_LOG_INTPNT", 10)
set_attribute(model, "MSK_IPAR_LOG_STORAGE", 1)
set_attribute(model, "MSK_IPAR_LOG_CUT_SECOND_OPT", 1)
set_string_names_on_creation(model, false)
@variable(model, x[1:nvar])
y = Vector{AffExpr}(undef, ntri)
for r in 1:ntri
expr = AffExpr(b[r])
for (j, v) in row_terms[r]
add_to_expression!(expr, v, x[j])
end
y[r] = expr
end
Y = Matrix{AffExpr}(undef, nmat, nmat)
idx = 1
for j in 1:nmat
for i in 1:j
Y[i, j] = y[idx]
Y[j, i] = y[idx]
idx += 1
end
end
@constraint(model, Symmetric(Y) in PSDCone())
@objective(model, Min, sum(c[i] * x[i] for i in 1:nvar))
println("Finished building model.")
println("About to optimize...")
optimize!(model)
println("termination_status = ", termination_status(model))
println("primal_status = ", primal_status(model))
println("objective_value = ", objective_value(model))
return model
end
Here is a complete log from calling this function
Finished building model.
About to optimize...
Problem
Name :
Objective sense : minimize
Type : CONIC (conic optimization problem)
Constraints : 0
Affine conic cons. : 1 (80200 rows)
Disjunctive cons. : 0
Cones : 0
Scalar variables : 4000
Matrix variables : 0
Integer variables : 0
Optimizer started.
Presolve started.
Linear dependency checker started.
Linear dependency checker terminated.
Eliminator started.
Freed constraints in eliminator : 0
Eliminator terminated.
Eliminator - tries : 1 time : 0.00
Lin. dep. - tries : 1 time : 0.02
Lin. dep. - primal attempts : 1 successes : 1
Lin. dep. - dual attempts : 0 successes : 0
Lin. dep. - primal deps. : 0 dual deps. : 0
Presolve terminated. Time: 0.09
Optimizer terminated. Time: 71.92
Mosek.MosekError(1051, "")
Stacktrace:
[1] optimize
@ C:\Users\10519\.julia\packages\Mosek\Gzljt\src\msk_functions.jl:2399 [inlined]
[2] optimize!(m::MosekTools.Optimizer)
@ MosekTools C:\Users\10519\.julia\packages\MosekTools\hgngi\src\MosekTools.jl:356
[3] optimize!
@ C:\Users\10519\.julia\packages\MathOptInterface\7aTit\src\Bridges\bridge_optimizer.jl:367 [inlined]
[4] optimize!
@ C:\Users\10519\.julia\packages\MathOptInterface\7aTit\src\MathOptInterface.jl:122 [inlined]
[5] optimize!(m::MathOptInterface.Utilities.CachingOptimizer{MathOptInterface.Bridges.LazyBridgeOptimizer{MosekTools.Optimizer}, MathOptInterface.Utilities.UniversalFallback{MathOptInterface.Utilities.Model{Float64}}})
@ MathOptInterface.Utilities C:\Users\10519\.julia\packages\MathOptInterface\7aTit\src\Utilities\cachingoptimizer.jl:370
[6] optimize!(model::Model; ignore_optimize_hook::Bool, _differentiation_backend::MathOptInterface.Nonlinear.SparseReverseMode, kwargs::@Kwargs{})
@ JuMP C:\Users\10519\.julia\packages\JuMP\0tD10\src\optimizer_interface.jl:610
[7] optimize!
@ C:\Users\10519\.julia\packages\JuMP\0tD10\src\optimizer_interface.jl:560 [inlined]
[8] minimal_psd_test(; nvar::Int64, nmat::Int64, entries_per_row::Int64, seed::Int64)
@ Main .\In[9]:79
[9] top-level scope
@ In[10]:1
I also construct a function that works in matlab (using YALMIP and Mosek), calling this function in matlab finishes in about 20 seconds without any memory issue. I am rather confused. It seems that the type of problem I am working is common enough that I wouldn't expect to much difference between different modelling language?
function [sol, xval, objval, timing] = minimal_psd_test_matlab(varargin)
total_tic = tic;
fprintf('Start function\n');
fprintf('Parsing inputs...\n');
p = inputParser;
addParameter(p, 'nvar', 4000, @(x) isnumeric(x) && isscalar(x) && x > 0);
addParameter(p, 'nmat', 400, @(x) isnumeric(x) && isscalar(x) && x > 0);
addParameter(p, 'entries_per_row', 8, @(x) isnumeric(x) && isscalar(x) && x > 0);
addParameter(p, 'seed', 1234, @(x) isnumeric(x) && isscalar(x));
parse(p, varargin{:});
nvar = p.Results.nvar;
nmat = p.Results.nmat;
entries_per_row = p.Results.entries_per_row;
seed = p.Results.seed;
rng(seed);
fprintf('Finished parsing inputs\n');
build_tic = tic;
% -------------------------------------------------
% numeric data
% -------------------------------------------------
fprintf('Building random sparse data...\n');
ntri = nmat * (nmat + 1) / 2;
rows = zeros(ntri * entries_per_row, 1);
cols = zeros(ntri * entries_per_row, 1);
vals = zeros(ntri * entries_per_row, 1);
t = 0;
for r = 1:ntri
if mod(r, max(1, floor(ntri/10))) == 0
fprintf(' sparse data row %d / %d\n', r, ntri);
end
chosen = randi(nvar, entries_per_row, 1);
for jj = 1:entries_per_row
t = t + 1;
rows(t) = r;
cols(t) = chosen(jj);
vals(t) = randn();
end
end
A = sparse(rows, cols, vals, ntri, nvar);
b = 0.01 * randn(ntri, 1);
c = randn(nvar, 1);
fprintf('Finished building random sparse data\n');
% -------------------------------------------------
% variables
% -------------------------------------------------
fprintf('Creating YALMIP variable x...\n');
x = sdpvar(nvar, 1);
fprintf('Finished creating x\n');
fprintf('Building affine vector y = A*x + b ...\n');
y = A*x + b;
fprintf('Finished building y\n');
% -------------------------------------------------
% build symmetric Y WITHOUT scalar symbolic assignment
% -------------------------------------------------
fprintf('Building index map for upper triangle...\n');
I = zeros(ntri,1);
J = zeros(ntri,1);
idx = 1;
for j = 1:nmat
if mod(j, max(1, floor(nmat/10))) == 0
fprintf(' index column %d / %d\n', j, nmat);
end
for i = 1:j
I(idx) = i;
J(idx) = j;
idx = idx + 1;
end
end
fprintf('Finished building upper-triangle indices\n');
fprintf('Assembling symbolic sparse upper triangle...\n');
Yupper = sparse(I, J, y, nmat, nmat);
fprintf('Symmetrizing Y...\n');
Y = Yupper + triu(Yupper,1)';
fprintf('Finished building Y\n');
% -------------------------------------------------
% model
% -------------------------------------------------
fprintf('Building constraints and objective...\n');
Constraints = [Y >= 0];
Objective = c' * x;
fprintf('Finished constraints and objective\n');
fprintf('Setting MOSEK options...\n');
ops = sdpsettings( ...
'solver', 'mosek', ...
'verbose', 1, ...
'debug', 1, ...
'savesolveroutput', 1, ...
'savesolverinput', 1);
ops.mosek.MSK_DPAR_INTPNT_CO_TOL_PFEAS = 1e-10;
ops.mosek.MSK_DPAR_INTPNT_CO_TOL_DFEAS = 1e-10;
ops.mosek.MSK_DPAR_INTPNT_CO_TOL_REL_GAP = 1e-10;
ops.mosek.MSK_IPAR_LOG = 10;
ops.mosek.MSK_IPAR_LOG_INTPNT = 10;
ops.mosek.MSK_IPAR_LOG_STORAGE = 1;
ops.mosek.MSK_IPAR_LOG_CUT_SECOND_OPT = 1;
fprintf('Finished setting MOSEK options\n');
build_time = toc(build_tic);
fprintf('Finished building model. Build time = %.6f s\n', build_time);
fprintf('About to optimize...\n');
optimize_tic = tic;
sol = optimize(Constraints, Objective, ops);
optimize_time = toc(optimize_tic);
fprintf('Finished optimize\n');
fprintf('problem = %d\n', sol.problem);
fprintf('info = %s\n', sol.info);
fprintf('Optimize time = %.6f s\n', optimize_time);
if sol.problem == 0
fprintf('Extracting solution...\n');
xval = value(x);
objval = value(Objective);
fprintf('objective_value = %.16g\n', objval);
else
xval = [];
objval = NaN;
fprintf('objective_value = NaN\n');
end
total_time = toc(total_tic);
fprintf('Total time = %.6f s\n', total_time);
timing.build_time = build_time;
timing.optimize_time = optimize_time;
timing.total_time = total_time;
end
One type of question I am dealing with is that I will declare a vector of variables, let's all it x, and I map it to another vector of variabls y=Ax+b, A and b are both constant matrix/vector. Let's assume A is sparse. I will then organize y into a matrix, and required the resulting matrix to be PSD (or hermitian PSD). Typically the size of the matrix is around 400 by 400. It seems that when I use the mosek in JuMP, the memory blows up for this type of problem. Below is some minimal working example to demonstrate this problem, calling this function will result in Mosek error 1051, which I assume is out of memory:
Here is a complete log from calling this function
I also construct a function that works in matlab (using YALMIP and Mosek), calling this function in matlab finishes in about 20 seconds without any memory issue. I am rather confused. It seems that the type of problem I am working is common enough that I wouldn't expect to much difference between different modelling language?