From 2d2a3dee7abf9a102204250e7a3d42dbae2434f1 Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Mon, 16 Dec 2024 11:20:39 +0100 Subject: [PATCH 01/44] quick bench --- benchmark/benchmark.jl | 2 ++ benchmark/prof.jl | 9 +++------ 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/benchmark/benchmark.jl b/benchmark/benchmark.jl index 612aebb2..1a854369 100644 --- a/benchmark/benchmark.jl +++ b/benchmark/benchmark.jl @@ -66,6 +66,8 @@ function bench(;grid_size_list = [250, 500, 1000, 2500], verbose = 1, nlp_solver # load problems for benchmark if names_list == :default names_list = ["beam", "double_integrator_mintf", "double_integrator_minenergy", "double_integrator_freet0tf", "fuller", "goddard", "goddard_all", "jackson", "robbins", "simple_integrator", "vanderpol"] + elseif names_list == :quick + names_list = ["beam", "double_integrator_mintf", "fuller", "jackson", "robbins", "simple_integrator", "vanderpol"] elseif names_list == :all names_list = ["algal_bacterial", "beam", "bioreactor_1day", "bioreactor_Ndays", "bolza_freetf", "double_integrator_mintf", "double_integrator_minenergy", "double_integrator_freet0tf", "fuller", "goddard", "goddard_all", "insurance", "jackson", "robbins", "simple_integrator", "swimmer", "vanderpol"] elseif names_list == :hard diff --git a/benchmark/prof.jl b/benchmark/prof.jl index 95c35c33..825c9392 100644 --- a/benchmark/prof.jl +++ b/benchmark/prof.jl @@ -18,10 +18,7 @@ function local_mayer(obj, x0, xf, v) return end -function init(;grid_size, disc_method) - prob = goddard_all() - #prob = goddard() - #prob = simple_integrator() +function init(prob ;grid_size, disc_method) ocp = prob[:ocp] docp = CTDirect.DOCP(ocp, grid_size=grid_size, time_grid=CTDirect.__time_grid(), disc_method=disc_method) xu = CTDirect.DOCP_initial_guess(docp) @@ -29,14 +26,14 @@ function init(;grid_size, disc_method) end -function test_unit(;test_get=false, test_obj=true, test_cons=true, test_trans=true, test_solve=true, warntype=false, jet=false, profile=false, grid_size=100, disc_method=:trapeze) +function test_unit(prob ;test_get=false, test_obj=true, test_cons=true, test_trans=true, test_solve=true, warntype=false, jet=false, profile=false, grid_size=100, disc_method=:trapeze) if profile Profile.Allocs.clear() end # define problem and variables - prob, docp, xu = init(grid_size=grid_size, disc_method=disc_method) + prob, docp, xu = init(prob; grid_size=grid_size, disc_method=disc_method) disc = docp.discretization #= OK, same as calling the functions with docp NLP_objective = (xu) -> CTDirect.DOCP_objective(xu, docp) From a07940d6868b81e3df5eb331a54c289337706cdd Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Mon, 16 Dec 2024 11:31:26 +0100 Subject: [PATCH 02/44] moved pics --- test/problems/{ => pics}/beam.png | Bin test/problems/{ => pics}/bioreactor_1day.png | Bin test/problems/{ => pics}/bioreactor_Ndays.png | Bin test/problems/{ => pics}/fuller.png | Bin test/problems/{ => pics}/insurance.png | Bin test/problems/{ => pics}/jackson.png | Bin test/problems/{ => pics}/p_box.png | Bin test/problems/{ => pics}/p_cons.png | Bin test/problems/{ => pics}/robbins.png | Bin test/problems/{ => pics}/swimmer_displacement.png | Bin test/problems/{ => pics}/vanderpol.png | Bin 11 files changed, 0 insertions(+), 0 deletions(-) rename test/problems/{ => pics}/beam.png (100%) rename test/problems/{ => pics}/bioreactor_1day.png (100%) rename test/problems/{ => pics}/bioreactor_Ndays.png (100%) rename test/problems/{ => pics}/fuller.png (100%) rename test/problems/{ => pics}/insurance.png (100%) rename test/problems/{ => pics}/jackson.png (100%) rename test/problems/{ => pics}/p_box.png (100%) rename test/problems/{ => pics}/p_cons.png (100%) rename test/problems/{ => pics}/robbins.png (100%) rename test/problems/{ => pics}/swimmer_displacement.png (100%) rename test/problems/{ => pics}/vanderpol.png (100%) diff --git a/test/problems/beam.png b/test/problems/pics/beam.png similarity index 100% rename from test/problems/beam.png rename to test/problems/pics/beam.png diff --git a/test/problems/bioreactor_1day.png b/test/problems/pics/bioreactor_1day.png similarity index 100% rename from test/problems/bioreactor_1day.png rename to test/problems/pics/bioreactor_1day.png diff --git a/test/problems/bioreactor_Ndays.png b/test/problems/pics/bioreactor_Ndays.png similarity index 100% rename from test/problems/bioreactor_Ndays.png rename to test/problems/pics/bioreactor_Ndays.png diff --git a/test/problems/fuller.png b/test/problems/pics/fuller.png similarity index 100% rename from test/problems/fuller.png rename to test/problems/pics/fuller.png diff --git a/test/problems/insurance.png b/test/problems/pics/insurance.png similarity index 100% rename from test/problems/insurance.png rename to test/problems/pics/insurance.png diff --git a/test/problems/jackson.png b/test/problems/pics/jackson.png similarity index 100% rename from test/problems/jackson.png rename to test/problems/pics/jackson.png diff --git a/test/problems/p_box.png b/test/problems/pics/p_box.png similarity index 100% rename from test/problems/p_box.png rename to test/problems/pics/p_box.png diff --git a/test/problems/p_cons.png b/test/problems/pics/p_cons.png similarity index 100% rename from test/problems/p_cons.png rename to test/problems/pics/p_cons.png diff --git a/test/problems/robbins.png b/test/problems/pics/robbins.png similarity index 100% rename from test/problems/robbins.png rename to test/problems/pics/robbins.png diff --git a/test/problems/swimmer_displacement.png b/test/problems/pics/swimmer_displacement.png similarity index 100% rename from test/problems/swimmer_displacement.png rename to test/problems/pics/swimmer_displacement.png diff --git a/test/problems/vanderpol.png b/test/problems/pics/vanderpol.png similarity index 100% rename from test/problems/vanderpol.png rename to test/problems/pics/vanderpol.png From 2f4493b130143846d68b6c844ed13b8630a10478 Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Wed, 18 Dec 2024 15:16:57 +0100 Subject: [PATCH 03/44] try to give jacobian sparse pattern --- Project.toml | 2 ++ benchmark/prof.jl | 2 +- src/CTDirect.jl | 1 + src/disc/midpoint.jl | 4 +--- src/disc/trapeze.jl | 14 ++++++++------ src/docp.jl | 37 ++++++++++++++++++++++++++++++++++++- src/solve.jl | 12 ++++++++++-- 7 files changed, 59 insertions(+), 13 deletions(-) diff --git a/Project.toml b/Project.toml index bbfffc36..71463b93 100644 --- a/Project.toml +++ b/Project.toml @@ -10,6 +10,7 @@ DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" HSL = "34c5aeac-e683-54a6-a0e9-6e0fdc586c50" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" NLPModels = "a4795742-8479-5a88-8948-cc11e1c8c1a6" +SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" [weakdeps] MadNLP = "2621e9c9-9eb4-46b1-8089-e8c72242dfb6" @@ -27,4 +28,5 @@ HSL = "0.4" MadNLP = "0.8" NLPModels = "0.21" NLPModelsIpopt = "0.10" +SparseArrays = "1.11.0" julia = "1.10" diff --git a/benchmark/prof.jl b/benchmark/prof.jl index 825c9392..ff4c9c7d 100644 --- a/benchmark/prof.jl +++ b/benchmark/prof.jl @@ -26,7 +26,7 @@ function init(prob ;grid_size, disc_method) end -function test_unit(prob ;test_get=false, test_obj=true, test_cons=true, test_trans=true, test_solve=true, warntype=false, jet=false, profile=false, grid_size=100, disc_method=:trapeze) +function test_unit(prob ;test_get=false, test_obj=true, test_cons=true, test_trans=true, test_solve=true, warntype=false, jet=false, profile=false, grid_size=250, disc_method=:trapeze) if profile Profile.Allocs.clear() diff --git a/src/CTDirect.jl b/src/CTDirect.jl index e7b70d9b..e3446c23 100644 --- a/src/CTDirect.jl +++ b/src/CTDirect.jl @@ -5,6 +5,7 @@ using DocStringExtensions using ADNLPModels # docp model with AD using LinearAlgebra # norm and misc using HSL +using SparseArrays import CTBase: OptimalControlSolution, CTBase # extended diff --git a/src/disc/midpoint.jl b/src/disc/midpoint.jl index 1dc8faee..c3c2595d 100644 --- a/src/disc/midpoint.jl +++ b/src/disc/midpoint.jl @@ -19,13 +19,11 @@ struct Midpoint <: Discretization # aux variables step_variables_block = dim_NLP_x * 2 + dim_NLP_u state_stage_eqs_block = dim_NLP_x * 2 + step_pathcons_block = dim_u_cons + dim_x_cons + dim_xu_cons # NLP variables size ([state, control]_1..N, final state, variable) dim_NLP_variables = dim_NLP_steps * step_variables_block + dim_NLP_x + dim_NLP_v - # Path constraints (control, state, mixed) - step_pathcons_block = dim_u_cons + dim_x_cons + dim_xu_cons - # NLP constraints size ([dynamics, path]_1..N, final path, boundary, variable) dim_NLP_constraints = dim_NLP_steps * (state_stage_eqs_block + step_pathcons_block) + step_pathcons_block + dim_boundary_cons + dim_v_cons diff --git a/src/disc/trapeze.jl b/src/disc/trapeze.jl index 7e39c106..23d3602b 100644 --- a/src/disc/trapeze.jl +++ b/src/disc/trapeze.jl @@ -7,23 +7,25 @@ Internal layout for NLP variables: struct Trapeze <: Discretization info::String - _step_pathcons_block::Int + _step_variables_block::Int _state_stage_eqs_block::Int + _step_pathcons_block::Int # constructor function Trapeze(dim_NLP_steps, dim_NLP_x, dim_NLP_u, dim_NLP_v, dim_u_cons, dim_x_cons, dim_xu_cons, dim_boundary_cons, dim_v_cons) - # NLP variables size ([state, control]_1..N+1, variable) - dim_NLP_variables = (dim_NLP_steps + 1) * (dim_NLP_x + dim_NLP_u) + dim_NLP_v - - # Path constraints (control, state, mixed) + # aux variables + step_variables_block = dim_NLP_x + dim_NLP_u state_stage_eqs_block = dim_NLP_x step_pathcons_block = dim_u_cons + dim_x_cons + dim_xu_cons + # NLP variables size ([state, control]_1..N+1, variable) + dim_NLP_variables = (dim_NLP_steps + 1) * step_variables_block + dim_NLP_v + # NLP constraints size ([dynamics, stage, path]_1..N, final path, boundary, variable) dim_NLP_constraints = dim_NLP_steps * (state_stage_eqs_block + step_pathcons_block) + step_pathcons_block + dim_boundary_cons + dim_v_cons - disc = new("Implicit Trapeze aka Crank-Nicolson, 2nd order, A-stable", step_pathcons_block, state_stage_eqs_block) + disc = new("Implicit Trapeze aka Crank-Nicolson, 2nd order, A-stable", step_variables_block, state_stage_eqs_block, step_pathcons_block) return disc, dim_NLP_variables, dim_NLP_constraints end diff --git a/src/docp.jl b/src/docp.jl index 8c96ef43..f74d283e 100644 --- a/src/docp.jl +++ b/src/docp.jl @@ -376,7 +376,7 @@ function DOCP_constraints!(c, xu, docp::DOCP) setStepConstraints!(docp, c, xu, v, time_grid, i, work) end - # point constraints (NB. view on c block could be used with offset here) + # point constraints setPointConstraints!(docp, c, xu, v) # NB. the function *needs* to return c for AD... @@ -504,6 +504,41 @@ function setPointBounds!(docp::DOCP, index::Int, lb, ub) return index end +function DOCP_Jac_pattern(docp::DOCP) + + J = zeros(docp.dim_NLP_constraints, docp.dim_NLP_variables) + + # main loop over steps + for i = 1:docp.dim_NLP_steps + c_offset = (i-1)*(docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block) + c_block = docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block + var_offset = (i-1)*docp.discretization._step_variables_block + var_block = docp.discretization._step_variables_block + docp.dim_NLP_x + # dependence wrt step variable block x_i, u_i, x_i+1 + J[c_offset+1:c_offset+c_block, var_offset+1:var_offset+var_block] .= 1.0 + # dependence wrt v + J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= 1.0 + end + + # final path constraints (xf, uf, v) + c_offset = docp.dim_NLP_steps*(docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block) + c_block = docp.discretization._step_pathcons_block + var_offset = docp.dim_NLP_steps*docp.discretization._step_variables_block + var_block = docp.discretization._step_variables_block + J[c_offset+1:c_offset+c_block, var_offset+1:var_offset+var_block] .= 1.0 + J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= 1.0 + + # point constraints (x0, xf, v) + c_offset = docp.dim_NLP_steps * (docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block) + docp.discretization._step_pathcons_block + c_block = docp.dim_boundary_cons + docp.dim_v_cons + J[c_offset+1:c_offset+c_block, 1:docp.dim_NLP_x] .= 1.0 + J[c_offset+1:c_offset+c_block, var_offset+1:var_offset+docp.dim_NLP_x] .= 1.0 + J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= 1.0 + + return SparseMatrixCSC{Bool, Int}(J) +end + + """ $(TYPEDSIGNATURES) diff --git a/src/solve.jl b/src/solve.jl index 798e0271..81bb1360 100644 --- a/src/solve.jl +++ b/src/solve.jl @@ -45,16 +45,24 @@ function direct_transcription( ), ) + # objective and constraints functions + f = x -> DOCP_objective(x, docp) + c! = (c, x) -> DOCP_constraints!(c, x, docp) + + # sparsity pattern + J_backend = ADNLPModels.SparseADJacobian(docp.dim_NLP_variables, f, docp.dim_NLP_constraints, c!, DOCP_Jac_pattern(docp)) + # call NLP problem constructor nlp = ADNLPModel!( - x -> DOCP_objective(x, docp), + f, x0, docp.var_l, docp.var_u, - (c, x) -> DOCP_constraints!(c, x, docp), + c!, docp.con_l, docp.con_u, backend = :optimized, + jacobian_backend = J_backend ) return docp, nlp From 75293caeced0c1897067279a5468ba0c3d175e46 Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Wed, 18 Dec 2024 15:35:45 +0100 Subject: [PATCH 04/44] probable bug in jac pattern --- src/docp.jl | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/docp.jl b/src/docp.jl index f74d283e..e93576d3 100644 --- a/src/docp.jl +++ b/src/docp.jl @@ -508,6 +508,8 @@ function DOCP_Jac_pattern(docp::DOCP) J = zeros(docp.dim_NLP_constraints, docp.dim_NLP_variables) + #+++ split state eq and path cond since pathcond only depends on x_ocp and u ! + # main loop over steps for i = 1:docp.dim_NLP_steps c_offset = (i-1)*(docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block) @@ -531,9 +533,13 @@ function DOCP_Jac_pattern(docp::DOCP) # point constraints (x0, xf, v) c_offset = docp.dim_NLP_steps * (docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block) + docp.discretization._step_pathcons_block c_block = docp.dim_boundary_cons + docp.dim_v_cons - J[c_offset+1:c_offset+c_block, 1:docp.dim_NLP_x] .= 1.0 - J[c_offset+1:c_offset+c_block, var_offset+1:var_offset+docp.dim_NLP_x] .= 1.0 + J[c_offset+1:c_offset+c_block, 1:docp.dim_OCP_x] .= 1.0 + J[c_offset+1:c_offset+c_block, var_offset+1:var_offset+docp.dim_OCP_x] .= 1.0 J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= 1.0 + # null initial condition for lagrangian cost state + if docp.is_lagrange + J[docp.dim_NLP_constraints, docp.dim_NLP_x] = 1.0 + end return SparseMatrixCSC{Bool, Int}(J) end From de6dbc6e26af36464a9d96acd38b28192dd9bd57 Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Wed, 18 Dec 2024 15:55:00 +0100 Subject: [PATCH 05/44] solve ok for simple integrator but given patter is a bit worse than computed one dur to lagrange cost state --- src/docp.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/docp.jl b/src/docp.jl index e93576d3..79c4043e 100644 --- a/src/docp.jl +++ b/src/docp.jl @@ -509,14 +509,16 @@ function DOCP_Jac_pattern(docp::DOCP) J = zeros(docp.dim_NLP_constraints, docp.dim_NLP_variables) #+++ split state eq and path cond since pathcond only depends on x_ocp and u ! + #+++ also split state eq between ocp x and lagrange cost ? + #+++ NB we still have the computations in setWorkArray, maybe try on midpoint ? # main loop over steps for i = 1:docp.dim_NLP_steps c_offset = (i-1)*(docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block) c_block = docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block var_offset = (i-1)*docp.discretization._step_variables_block - var_block = docp.discretization._step_variables_block + docp.dim_NLP_x - # dependence wrt step variable block x_i, u_i, x_i+1 + var_block = docp.discretization._step_variables_block * 2 + # dependence state eq wrt step variable block x_i, u_i, x_i+1, u_i+1 (trapeze) J[c_offset+1:c_offset+c_block, var_offset+1:var_offset+var_block] .= 1.0 # dependence wrt v J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= 1.0 From 7ad48c8b6410cee1d793e259c74ce25def86b429 Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Wed, 18 Dec 2024 16:29:31 +0100 Subject: [PATCH 06/44] bench ok --- src/disc/trapeze.jl | 43 +++++++++++++++++++++++++++++++++++++++++++ src/docp.jl | 41 ++--------------------------------------- 2 files changed, 45 insertions(+), 39 deletions(-) diff --git a/src/disc/trapeze.jl b/src/disc/trapeze.jl index 23d3602b..4e366cdd 100644 --- a/src/disc/trapeze.jl +++ b/src/disc/trapeze.jl @@ -171,3 +171,46 @@ function setStepConstraints!(docp::DOCP{Trapeze}, c, xu, v, time_grid, i, work) setPathConstraints!(docp, c, ti, xi, ui, v, offset) end + + +function DOCP_Jac_pattern(docp::DOCP{Trapeze}) + + J = zeros(docp.dim_NLP_constraints, docp.dim_NLP_variables) + + #+++ split state eq and path cond since pathcond only depends on x_ocp and u ! + #+++ also split state eq between ocp x and lagrange cost ? + #+++ NB we still have the computations in setWorkArray, maybe try on midpoint ? + + # main loop over steps + for i = 1:docp.dim_NLP_steps + c_offset = (i-1)*(docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block) + c_block = docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block + var_offset = (i-1)*docp.discretization._step_variables_block + var_block = docp.discretization._step_variables_block * 2 + # dependence state eq wrt step variable block x_i, u_i, x_i+1, u_i+1 (trapeze) + J[c_offset+1:c_offset+c_block, var_offset+1:var_offset+var_block] .= 1.0 + # dependence wrt v + J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= 1.0 + end + + # final path constraints (xf, uf, v) + c_offset = docp.dim_NLP_steps*(docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block) + c_block = docp.discretization._step_pathcons_block + var_offset = docp.dim_NLP_steps*docp.discretization._step_variables_block + var_block = docp.discretization._step_variables_block + J[c_offset+1:c_offset+c_block, var_offset+1:var_offset+var_block] .= 1.0 + J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= 1.0 + + # point constraints (x0, xf, v) + c_offset = docp.dim_NLP_steps * (docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block) + docp.discretization._step_pathcons_block + c_block = docp.dim_boundary_cons + docp.dim_v_cons + J[c_offset+1:c_offset+c_block, 1:docp.dim_OCP_x] .= 1.0 + J[c_offset+1:c_offset+c_block, var_offset+1:var_offset+docp.dim_OCP_x] .= 1.0 + J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= 1.0 + # null initial condition for lagrangian cost state + if docp.is_lagrange + J[docp.dim_NLP_constraints, docp.dim_NLP_x] = 1.0 + end + + return SparseMatrixCSC{Bool, Int}(J) +end \ No newline at end of file diff --git a/src/docp.jl b/src/docp.jl index 79c4043e..0691177b 100644 --- a/src/docp.jl +++ b/src/docp.jl @@ -504,46 +504,9 @@ function setPointBounds!(docp::DOCP, index::Int, lb, ub) return index end -function DOCP_Jac_pattern(docp::DOCP) - - J = zeros(docp.dim_NLP_constraints, docp.dim_NLP_variables) - - #+++ split state eq and path cond since pathcond only depends on x_ocp and u ! - #+++ also split state eq between ocp x and lagrange cost ? - #+++ NB we still have the computations in setWorkArray, maybe try on midpoint ? - - # main loop over steps - for i = 1:docp.dim_NLP_steps - c_offset = (i-1)*(docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block) - c_block = docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block - var_offset = (i-1)*docp.discretization._step_variables_block - var_block = docp.discretization._step_variables_block * 2 - # dependence state eq wrt step variable block x_i, u_i, x_i+1, u_i+1 (trapeze) - J[c_offset+1:c_offset+c_block, var_offset+1:var_offset+var_block] .= 1.0 - # dependence wrt v - J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= 1.0 - end - - # final path constraints (xf, uf, v) - c_offset = docp.dim_NLP_steps*(docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block) - c_block = docp.discretization._step_pathcons_block - var_offset = docp.dim_NLP_steps*docp.discretization._step_variables_block - var_block = docp.discretization._step_variables_block - J[c_offset+1:c_offset+c_block, var_offset+1:var_offset+var_block] .= 1.0 - J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= 1.0 - - # point constraints (x0, xf, v) - c_offset = docp.dim_NLP_steps * (docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block) + docp.discretization._step_pathcons_block - c_block = docp.dim_boundary_cons + docp.dim_v_cons - J[c_offset+1:c_offset+c_block, 1:docp.dim_OCP_x] .= 1.0 - J[c_offset+1:c_offset+c_block, var_offset+1:var_offset+docp.dim_OCP_x] .= 1.0 - J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= 1.0 - # null initial condition for lagrangian cost state - if docp.is_lagrange - J[docp.dim_NLP_constraints, docp.dim_NLP_x] = 1.0 - end - return SparseMatrixCSC{Bool, Int}(J) +function DOCP_Jac_pattern(docp::DOCP) + error("DOCP_Jac_pattern not implemented for discretization ", typeof(docp.discretization)) end From d7e25a1abc6b775190a0b18eed6f8725058945c5 Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Thu, 19 Dec 2024 17:00:24 +0100 Subject: [PATCH 07/44] improved jacobian pattern a bit (lagrange state) --- src/disc/trapeze.jl | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/disc/trapeze.jl b/src/disc/trapeze.jl index 4e366cdd..ad1a6bbf 100644 --- a/src/disc/trapeze.jl +++ b/src/disc/trapeze.jl @@ -187,9 +187,18 @@ function DOCP_Jac_pattern(docp::DOCP{Trapeze}) c_block = docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block var_offset = (i-1)*docp.discretization._step_variables_block var_block = docp.discretization._step_variables_block * 2 - # dependence state eq wrt step variable block x_i, u_i, x_i+1, u_i+1 (trapeze) - J[c_offset+1:c_offset+c_block, var_offset+1:var_offset+var_block] .= 1.0 - # dependence wrt v + # state eq wrt x_i, u_i, x_i+1, u_i+1 + J[c_offset+1:c_offset+docp.dim_OCP_x, var_offset+1:var_offset+docp.dim_OCP_x] .= 1.0 + J[c_offset+1:c_offset+docp.dim_OCP_x, var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u+docp.dim_OCP_x] .= 1.0 + J[c_offset+1:c_offset+docp.dim_OCP_x, var_offset+docp.dim_NLP_x*2+docp.dim_NLP_u+1:var_offset+docp.dim_NLP_x*2+docp.dim_NLP_u*2] .= 1.0 + # lagrange part wrt x_i, l_i, u_i, x_i+1, l_i+1, u_i+1 + if docp.is_lagrange + J[c_offset+docp.dim_NLP_x, var_offset+1:var_offset+var_block] .= 1.0 + end + # path constraint wrt x_i, u_i + J[c_offset+docp.dim_NLP_x+1:c_offset+c_block, var_offset+1:var_offset+docp.dim_OCP_x] .= 1.0 + J[c_offset+docp.dim_NLP_x+1:c_offset+c_block, var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u] .= 1.0 + # whole block wrt v J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= 1.0 end @@ -198,7 +207,8 @@ function DOCP_Jac_pattern(docp::DOCP{Trapeze}) c_block = docp.discretization._step_pathcons_block var_offset = docp.dim_NLP_steps*docp.discretization._step_variables_block var_block = docp.discretization._step_variables_block - J[c_offset+1:c_offset+c_block, var_offset+1:var_offset+var_block] .= 1.0 + J[c_offset+1:c_offset+c_block, var_offset+1:var_offset+docp.dim_OCP_x] .= 1.0 + J[c_offset+1:c_offset+c_block, var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u] .= 1.0 J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= 1.0 # point constraints (x0, xf, v) From bd177d108dbc72fb361b47eec72c4b54d47c33c6 Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Thu, 19 Dec 2024 18:25:46 +0100 Subject: [PATCH 08/44] try hessian --- src/disc/trapeze.jl | 15 ++++++++++----- src/solve.jl | 4 ++-- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/disc/trapeze.jl b/src/disc/trapeze.jl index ad1a6bbf..bd4ce6eb 100644 --- a/src/disc/trapeze.jl +++ b/src/disc/trapeze.jl @@ -173,14 +173,10 @@ function setStepConstraints!(docp::DOCP{Trapeze}, c, xu, v, time_grid, i, work) end -function DOCP_Jac_pattern(docp::DOCP{Trapeze}) +function DOCP_Jacobian_pattern(docp::DOCP{Trapeze}) J = zeros(docp.dim_NLP_constraints, docp.dim_NLP_variables) - #+++ split state eq and path cond since pathcond only depends on x_ocp and u ! - #+++ also split state eq between ocp x and lagrange cost ? - #+++ NB we still have the computations in setWorkArray, maybe try on midpoint ? - # main loop over steps for i = 1:docp.dim_NLP_steps c_offset = (i-1)*(docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block) @@ -223,4 +219,13 @@ function DOCP_Jac_pattern(docp::DOCP{Trapeze}) end return SparseMatrixCSC{Bool, Int}(J) +end + + +function DOCP_Hessian_pattern(docp::DOCP{Trapeze}) + + # symmetry ? + H = zeros(docp.dim_NLP_variables, docp.dim_NLP_variables) + return SparseMatrixCSC{Bool, Int}(H) + end \ No newline at end of file diff --git a/src/solve.jl b/src/solve.jl index 81bb1360..b915f9a4 100644 --- a/src/solve.jl +++ b/src/solve.jl @@ -50,7 +50,7 @@ function direct_transcription( c! = (c, x) -> DOCP_constraints!(c, x, docp) # sparsity pattern - J_backend = ADNLPModels.SparseADJacobian(docp.dim_NLP_variables, f, docp.dim_NLP_constraints, c!, DOCP_Jac_pattern(docp)) + J_backend = ADNLPModels.SparseADJacobian(docp.dim_NLP_variables, f, docp.dim_NLP_constraints, c!, DOCP_Jacobian_pattern(docp)) # call NLP problem constructor nlp = ADNLPModel!( @@ -61,7 +61,7 @@ function direct_transcription( c!, docp.con_l, docp.con_u, - backend = :optimized, + backend = :optimized, # much slower without jacobian_backend = J_backend ) From 94a5a23413bd531ad4a2480d8818e06da408cdeb Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Fri, 20 Dec 2024 13:51:38 +0100 Subject: [PATCH 09/44] disable hessian for tests --- src/solve.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/solve.jl b/src/solve.jl index b915f9a4..f85294d7 100644 --- a/src/solve.jl +++ b/src/solve.jl @@ -61,8 +61,9 @@ function direct_transcription( c!, docp.con_l, docp.con_u, - backend = :optimized, # much slower without - jacobian_backend = J_backend + #backend = :optimized, # much slower without + jacobian_backend = J_backend, + hessian_backend = ADNLPModels.EmptyADbackend # to focus on jacobian tests ) return docp, nlp From 0b6d54c375dcee70ae355561aea43b94dcb7f083 Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Fri, 20 Dec 2024 14:08:20 +0100 Subject: [PATCH 10/44] use bool matrix for sparsity pattern generation --- src/disc/trapeze.jl | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/src/disc/trapeze.jl b/src/disc/trapeze.jl index bd4ce6eb..5c998c3d 100644 --- a/src/disc/trapeze.jl +++ b/src/disc/trapeze.jl @@ -175,7 +175,9 @@ end function DOCP_Jacobian_pattern(docp::DOCP{Trapeze}) - J = zeros(docp.dim_NLP_constraints, docp.dim_NLP_variables) + #+++ work directly with sparse matrix ? + J = zeros(Bool, docp.dim_NLP_constraints, docp.dim_NLP_variables) + #J = spzeros(Bool, docp.dim_NLP_constraints, docp.dim_NLP_variables) # main loop over steps for i = 1:docp.dim_NLP_steps @@ -184,18 +186,18 @@ function DOCP_Jacobian_pattern(docp::DOCP{Trapeze}) var_offset = (i-1)*docp.discretization._step_variables_block var_block = docp.discretization._step_variables_block * 2 # state eq wrt x_i, u_i, x_i+1, u_i+1 - J[c_offset+1:c_offset+docp.dim_OCP_x, var_offset+1:var_offset+docp.dim_OCP_x] .= 1.0 - J[c_offset+1:c_offset+docp.dim_OCP_x, var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u+docp.dim_OCP_x] .= 1.0 - J[c_offset+1:c_offset+docp.dim_OCP_x, var_offset+docp.dim_NLP_x*2+docp.dim_NLP_u+1:var_offset+docp.dim_NLP_x*2+docp.dim_NLP_u*2] .= 1.0 + J[c_offset+1:c_offset+docp.dim_OCP_x, var_offset+1:var_offset+docp.dim_OCP_x] .= true + J[c_offset+1:c_offset+docp.dim_OCP_x, var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u+docp.dim_OCP_x] .= true + J[c_offset+1:c_offset+docp.dim_OCP_x, var_offset+docp.dim_NLP_x*2+docp.dim_NLP_u+1:var_offset+docp.dim_NLP_x*2+docp.dim_NLP_u*2] .= true # lagrange part wrt x_i, l_i, u_i, x_i+1, l_i+1, u_i+1 if docp.is_lagrange - J[c_offset+docp.dim_NLP_x, var_offset+1:var_offset+var_block] .= 1.0 + J[c_offset+docp.dim_NLP_x, var_offset+1:var_offset+var_block] .= true end # path constraint wrt x_i, u_i - J[c_offset+docp.dim_NLP_x+1:c_offset+c_block, var_offset+1:var_offset+docp.dim_OCP_x] .= 1.0 - J[c_offset+docp.dim_NLP_x+1:c_offset+c_block, var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u] .= 1.0 + J[c_offset+docp.dim_NLP_x+1:c_offset+c_block, var_offset+1:var_offset+docp.dim_OCP_x] .= true + J[c_offset+docp.dim_NLP_x+1:c_offset+c_block, var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u] .= true # whole block wrt v - J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= 1.0 + J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true end # final path constraints (xf, uf, v) @@ -203,22 +205,22 @@ function DOCP_Jacobian_pattern(docp::DOCP{Trapeze}) c_block = docp.discretization._step_pathcons_block var_offset = docp.dim_NLP_steps*docp.discretization._step_variables_block var_block = docp.discretization._step_variables_block - J[c_offset+1:c_offset+c_block, var_offset+1:var_offset+docp.dim_OCP_x] .= 1.0 - J[c_offset+1:c_offset+c_block, var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u] .= 1.0 - J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= 1.0 + J[c_offset+1:c_offset+c_block, var_offset+1:var_offset+docp.dim_OCP_x] .= true + J[c_offset+1:c_offset+c_block, var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u] .= true + J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true # point constraints (x0, xf, v) c_offset = docp.dim_NLP_steps * (docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block) + docp.discretization._step_pathcons_block c_block = docp.dim_boundary_cons + docp.dim_v_cons - J[c_offset+1:c_offset+c_block, 1:docp.dim_OCP_x] .= 1.0 - J[c_offset+1:c_offset+c_block, var_offset+1:var_offset+docp.dim_OCP_x] .= 1.0 - J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= 1.0 + J[c_offset+1:c_offset+c_block, 1:docp.dim_OCP_x] .= true + J[c_offset+1:c_offset+c_block, var_offset+1:var_offset+docp.dim_OCP_x] .= true + J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true # null initial condition for lagrangian cost state if docp.is_lagrange - J[docp.dim_NLP_constraints, docp.dim_NLP_x] = 1.0 + J[docp.dim_NLP_constraints, docp.dim_NLP_x] = true end - return SparseMatrixCSC{Bool, Int}(J) + return sparse(J) end From 700d3c2cbf4d5038031c1fe546fa146591f98d2c Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Fri, 20 Dec 2024 15:21:00 +0100 Subject: [PATCH 11/44] try to build sparse jac form indexes and avlues sets instead of dense matrix ? --- src/disc/trapeze.jl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/disc/trapeze.jl b/src/disc/trapeze.jl index 5c998c3d..99a1d69a 100644 --- a/src/disc/trapeze.jl +++ b/src/disc/trapeze.jl @@ -175,9 +175,13 @@ end function DOCP_Jacobian_pattern(docp::DOCP{Trapeze}) - #+++ work directly with sparse matrix ? J = zeros(Bool, docp.dim_NLP_constraints, docp.dim_NLP_variables) - #J = spzeros(Bool, docp.dim_NLP_constraints, docp.dim_NLP_variables) + #+++ build Is, Js, Vs sets then call sparse constructor ? + #nnzj = + #Is = Vector{Int}(undef, nnzj) + #Js = Vector{Int}(undef, nnzj) + #Vs = ones(Bool, nnzj) + # use offset to fill Is, Js, Vs # main loop over steps for i = 1:docp.dim_NLP_steps From ce225954ddc283e13c2e459a0a5ff83368ed48aa Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Thu, 30 Jan 2025 18:15:30 +0100 Subject: [PATCH 12/44] hessian pattern (check cross terms) --- src/disc/trapeze.jl | 64 +++++++++++++++++++++++++++++++++++++-------- src/solve.jl | 3 ++- 2 files changed, 55 insertions(+), 12 deletions(-) diff --git a/src/disc/trapeze.jl b/src/disc/trapeze.jl index 5d51a5aa..12e86b00 100644 --- a/src/disc/trapeze.jl +++ b/src/disc/trapeze.jl @@ -185,28 +185,28 @@ function DOCP_Jacobian_pattern(docp::DOCP{Trapeze}) #Vs = ones(Bool, nnzj) # use offset to fill Is, Js, Vs - # main loop over steps + # 1. main loop over steps for i = 1:docp.dim_NLP_steps c_offset = (i-1)*(docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block) c_block = docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block var_offset = (i-1)*docp.discretization._step_variables_block var_block = docp.discretization._step_variables_block * 2 - # state eq wrt x_i, u_i, x_i+1, u_i+1 + # 1.1 state eq wrt x_i, u_i, x_i+1, u_i+1 J[c_offset+1:c_offset+docp.dim_OCP_x, var_offset+1:var_offset+docp.dim_OCP_x] .= true J[c_offset+1:c_offset+docp.dim_OCP_x, var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u+docp.dim_OCP_x] .= true J[c_offset+1:c_offset+docp.dim_OCP_x, var_offset+docp.dim_NLP_x*2+docp.dim_NLP_u+1:var_offset+docp.dim_NLP_x*2+docp.dim_NLP_u*2] .= true - # lagrange part wrt x_i, l_i, u_i, x_i+1, l_i+1, u_i+1 + # 1.2 lagrange part wrt x_i, l_i, u_i, x_i+1, l_i+1, u_i+1 if docp.is_lagrange J[c_offset+docp.dim_NLP_x, var_offset+1:var_offset+var_block] .= true end - # path constraint wrt x_i, u_i + # 1.3 path constraint wrt x_i, u_i J[c_offset+docp.dim_NLP_x+1:c_offset+c_block, var_offset+1:var_offset+docp.dim_OCP_x] .= true J[c_offset+docp.dim_NLP_x+1:c_offset+c_block, var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u] .= true - # whole block wrt v + # 1.4 whole block wrt v J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true end - # final path constraints (xf, uf, v) + # 2. final path constraints (xf, uf, v) c_offset = docp.dim_NLP_steps*(docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block) c_block = docp.discretization._step_pathcons_block var_offset = docp.dim_NLP_steps*docp.discretization._step_variables_block @@ -215,13 +215,13 @@ function DOCP_Jacobian_pattern(docp::DOCP{Trapeze}) J[c_offset+1:c_offset+c_block, var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u] .= true J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true - # point constraints (x0, xf, v) + # 3. boundary constraints (x0, xf, v) c_offset = docp.dim_NLP_steps * (docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block) + docp.discretization._step_pathcons_block c_block = docp.dim_boundary_cons + docp.dim_v_cons J[c_offset+1:c_offset+c_block, 1:docp.dim_OCP_x] .= true J[c_offset+1:c_offset+c_block, var_offset+1:var_offset+docp.dim_OCP_x] .= true J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true - # null initial condition for lagrangian cost state + # 3.1 null initial condition for lagrangian cost state l0 if docp.is_lagrange J[docp.dim_NLP_constraints, docp.dim_NLP_x] = true end @@ -232,8 +232,50 @@ end function DOCP_Hessian_pattern(docp::DOCP{Trapeze}) - # symmetry ? - H = zeros(docp.dim_NLP_variables, docp.dim_NLP_variables) - return SparseMatrixCSC{Bool, Int}(H) + # NB. need to provide full pattern for coloring, not just upper/lower part + H = zeros(Bool, docp.dim_NLP_variables, docp.dim_NLP_variables) + + # 0. objective + # 0.1 mayer cost (x0, xf, v) -> same as boundary conditions (x0, xf, v) ! + # 0.2 lagrange case (lf) + lf_index = docp.dim_NLP_steps * (docp.dim_NLP_x + docp.dim_NLP_u) + docp.dim_NLP_x + H[lf_index, lf_index] = true + + # 1. main loop over steps + for i = 1:docp.dim_NLP_steps + var_offset = (i-1)*docp.discretization._step_variables_block + var_block = docp.discretization._step_variables_block * 2 + # 1.1 state eq wrt x_i, u_i, x_i+1, u_i+1 + H[var_offset+1:var_offset+docp.dim_OCP_x, var_offset+1:var_offset+docp.dim_OCP_x] .= true + H[var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u+docp.dim_OCP_x, var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u+docp.dim_OCP_x] .= true + H[var_offset+docp.dim_NLP_x*2+docp.dim_NLP_u+1:var_offset+docp.dim_NLP_x*2+docp.dim_NLP_u*2, var_offset+docp.dim_NLP_x*2+docp.dim_NLP_u+1:var_offset+docp.dim_NLP_x*2+docp.dim_NLP_u*2] .= true + # 1.2 lagrange part wrt x_i, l_i, u_i, x_i+1, l_i+1, u_i+1 + if docp.is_lagrange + H[var_offset+1:var_offset+var_block, var_offset+1:var_offset+var_block] .= true + end + # 1.3 path constraint wrt x_i, u_i + H[var_offset+1:var_offset+docp.dim_OCP_x, var_offset+1:var_offset+docp.dim_OCP_x] .= true + H[var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u, var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u] .= true + # 1.4 whole block wrt v + H[docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true + end + + # 2. final path constraints (xf, uf, v) + var_offset = docp.dim_NLP_steps*docp.discretization._step_variables_block + var_block = docp.discretization._step_variables_block + H[var_offset+1:var_offset+docp.dim_OCP_x, var_offset+1:var_offset+docp.dim_OCP_x] .= true + H[var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u, var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u] .= true + H[docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true + + # 3. boundary constraints (x0, xf, v) + H[1:docp.dim_OCP_x, 1:docp.dim_OCP_x] .= true + H[var_offset+1:var_offset+docp.dim_OCP_x, var_offset+1:var_offset+docp.dim_OCP_x] .= true + H[docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true + # 3.1 null initial condition for lagrangian cost state l0 + if docp.is_lagrange + H[docp.dim_NLP_x, docp.dim_NLP_x] = true + end + + return sparse(H) end \ No newline at end of file diff --git a/src/solve.jl b/src/solve.jl index e8336021..a62bd95c 100644 --- a/src/solve.jl +++ b/src/solve.jl @@ -47,6 +47,7 @@ function direct_transcription( # sparsity pattern J_backend = ADNLPModels.SparseADJacobian(docp.dim_NLP_variables, f, docp.dim_NLP_constraints, c!, DOCP_Jacobian_pattern(docp)) + H_backend = ADNLPModels.SparseReverseADHessian(docp.dim_NLP_variables, f, docp.dim_NLP_constraints, c!, DOCP_Hessian_pattern(docp)) # call NLP problem constructor if adnlp_backend == :manual @@ -54,7 +55,7 @@ function direct_transcription( f, x0, docp.var_l, docp.var_u, c!, docp.con_l, docp.con_u, jacobian_backend = J_backend, - hessian_backend = ADNLPModels.EmptyADbackend + hessian_backend = H_backend ) elseif adnlp_backend == :no_hessian nlp = ADNLPModel!( From 318df04cb64edfc199d9481dc91afde60854cf71 Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Thu, 30 Jan 2025 19:07:53 +0100 Subject: [PATCH 13/44] goddard_all ok with manual sparse pattern --- Project.toml | 2 +- src/disc/trapeze.jl | 46 +++++++++++++++++++++++---------------------- src/solve.jl | 12 +++++++----- 3 files changed, 32 insertions(+), 28 deletions(-) diff --git a/Project.toml b/Project.toml index 36dca198..864ef6d1 100644 --- a/Project.toml +++ b/Project.toml @@ -28,5 +28,5 @@ HSL = "0.5" MadNLP = "0.8" NLPModels = "0.21" NLPModelsIpopt = "0.10" -SparseArrays = "1.11.0" +SparseArrays = "1.10.0" julia = "1.10" diff --git a/src/disc/trapeze.jl b/src/disc/trapeze.jl index 12e86b00..4748b4ca 100644 --- a/src/disc/trapeze.jl +++ b/src/disc/trapeze.jl @@ -191,7 +191,7 @@ function DOCP_Jacobian_pattern(docp::DOCP{Trapeze}) c_block = docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block var_offset = (i-1)*docp.discretization._step_variables_block var_block = docp.discretization._step_variables_block * 2 - # 1.1 state eq wrt x_i, u_i, x_i+1, u_i+1 + # 1.1 state eq wrt x_i, u_i, x_i+1, u_i+1 (skip l_i, l_i+1) J[c_offset+1:c_offset+docp.dim_OCP_x, var_offset+1:var_offset+docp.dim_OCP_x] .= true J[c_offset+1:c_offset+docp.dim_OCP_x, var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u+docp.dim_OCP_x] .= true J[c_offset+1:c_offset+docp.dim_OCP_x, var_offset+docp.dim_NLP_x*2+docp.dim_NLP_u+1:var_offset+docp.dim_NLP_x*2+docp.dim_NLP_u*2] .= true @@ -236,41 +236,43 @@ function DOCP_Hessian_pattern(docp::DOCP{Trapeze}) H = zeros(Bool, docp.dim_NLP_variables, docp.dim_NLP_variables) # 0. objective - # 0.1 mayer cost (x0, xf, v) -> same as boundary conditions (x0, xf, v) ! + # 0.1 mayer cost (x0, xf, v) + # -> see 3. term for boundary conditions ! # 0.2 lagrange case (lf) - lf_index = docp.dim_NLP_steps * (docp.dim_NLP_x + docp.dim_NLP_u) + docp.dim_NLP_x - H[lf_index, lf_index] = true + if docp.is_lagrange + lf_index = docp.dim_NLP_steps * (docp.dim_NLP_x + docp.dim_NLP_u) + docp.dim_NLP_x + H[lf_index, lf_index] = true + end # 1. main loop over steps for i = 1:docp.dim_NLP_steps var_offset = (i-1)*docp.discretization._step_variables_block var_block = docp.discretization._step_variables_block * 2 - # 1.1 state eq wrt x_i, u_i, x_i+1, u_i+1 - H[var_offset+1:var_offset+docp.dim_OCP_x, var_offset+1:var_offset+docp.dim_OCP_x] .= true - H[var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u+docp.dim_OCP_x, var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u+docp.dim_OCP_x] .= true - H[var_offset+docp.dim_NLP_x*2+docp.dim_NLP_u+1:var_offset+docp.dim_NLP_x*2+docp.dim_NLP_u*2, var_offset+docp.dim_NLP_x*2+docp.dim_NLP_u+1:var_offset+docp.dim_NLP_x*2+docp.dim_NLP_u*2] .= true + # 1.1 state eq wrt x_i, u_i, x_i+1, u_i+1 (skip l_i, l_i+1) # 1.2 lagrange part wrt x_i, l_i, u_i, x_i+1, l_i+1, u_i+1 - if docp.is_lagrange - H[var_offset+1:var_offset+var_block, var_offset+1:var_offset+var_block] .= true - end + # -> combine as a single block for all step variables + H[var_offset+1:var_offset+var_block, var_offset+1:var_offset+var_block] .= true # 1.3 path constraint wrt x_i, u_i - H[var_offset+1:var_offset+docp.dim_OCP_x, var_offset+1:var_offset+docp.dim_OCP_x] .= true - H[var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u, var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u] .= true - # 1.4 whole block wrt v + # -> included in previous term ! + # 1.4 whole block wrt v (including cross derivatives) H[docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true + H[docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables, var_offset+1:var_offset+var_block] .= true + H[var_offset+1:var_offset+var_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true end # 2. final path constraints (xf, uf, v) - var_offset = docp.dim_NLP_steps*docp.discretization._step_variables_block - var_block = docp.discretization._step_variables_block - H[var_offset+1:var_offset+docp.dim_OCP_x, var_offset+1:var_offset+docp.dim_OCP_x] .= true - H[var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u, var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u] .= true - H[docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true + # -> included in last iteration from loop ! # 3. boundary constraints (x0, xf, v) - H[1:docp.dim_OCP_x, 1:docp.dim_OCP_x] .= true - H[var_offset+1:var_offset+docp.dim_OCP_x, var_offset+1:var_offset+docp.dim_OCP_x] .= true - H[docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true + # -> (xf, v) part included in last iteration from loop ! + if docp.is_mayer || docp.dim_boundary_cons > 0 + var_offset = docp.dim_NLP_steps*docp.discretization._step_variables_block + H[1:docp.dim_OCP_x, 1:docp.dim_OCP_x] .= true # x0 / x0 + H[1:docp.dim_OCP_x, var_offset+1:var_offset+docp.dim_OCP_x] .= true # x0 / xf + H[1:docp.dim_OCP_x, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true # x0 / v + H[var_offset+1:var_offset+docp.dim_OCP_x, 1:docp.dim_OCP_x] .= true # xf / x0 + H[docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables, 1:docp.dim_OCP_x] .= true # v / x0 + end # 3.1 null initial condition for lagrangian cost state l0 if docp.is_lagrange H[docp.dim_NLP_x, docp.dim_NLP_x] = true diff --git a/src/solve.jl b/src/solve.jl index a62bd95c..95ce401b 100644 --- a/src/solve.jl +++ b/src/solve.jl @@ -26,7 +26,8 @@ function direct_transcription( time_grid = __time_grid(), disc_method = __disc_method(), constant_control = false, - adnlp_backend = __adnlp_backend() + adnlp_backend = __adnlp_backend(), + show_time = false ) # build DOCP @@ -49,26 +50,27 @@ function direct_transcription( J_backend = ADNLPModels.SparseADJacobian(docp.dim_NLP_variables, f, docp.dim_NLP_constraints, c!, DOCP_Jacobian_pattern(docp)) H_backend = ADNLPModels.SparseReverseADHessian(docp.dim_NLP_variables, f, docp.dim_NLP_constraints, c!, DOCP_Hessian_pattern(docp)) - # call NLP problem constructor + # call NLP problem constructor (+++ use show_time=true for info ?) if adnlp_backend == :manual nlp = ADNLPModel!( f, x0, docp.var_l, docp.var_u, c!, docp.con_l, docp.con_u, jacobian_backend = J_backend, - hessian_backend = H_backend + hessian_backend = H_backend, + show_time = show_time ) elseif adnlp_backend == :no_hessian nlp = ADNLPModel!( x -> DOCP_objective(x, docp), x0, docp.var_l, docp.var_u, (c, x) -> DOCP_constraints!(c, x, docp), docp.con_l, docp.con_u, - backend = __adnlp_backend() + backend = __adnlp_backend(), show_time = show_time ) set_adbackend!(nlp, hessian_backend = ADNLPModels.EmptyADbackend, hvprod_backend = ADNLPModels.EmptyADbackend) # directionalsecondderivative) else nlp = ADNLPModel!( x -> DOCP_objective(x, docp), x0, docp.var_l, docp.var_u, (c, x) -> DOCP_constraints!(c, x, docp), docp.con_l, docp.con_u, - backend = adnlp_backend + backend = adnlp_backend, show_time = show_time ) end From f6c17ff815a514f391017aa79e065f859884fa6a Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Fri, 31 Jan 2025 11:45:11 +0100 Subject: [PATCH 14/44] bench for manual sparse patterns --- src/solve.jl | 5 +++ test/benchmark.jl | 9 ++--- test/docs/AD_backend.md | 80 ++++++++++++++++++++++++++++++++--------- 3 files changed, 74 insertions(+), 20 deletions(-) diff --git a/src/solve.jl b/src/solve.jl index 95ce401b..09b46be7 100644 --- a/src/solve.jl +++ b/src/solve.jl @@ -51,10 +51,15 @@ function direct_transcription( H_backend = ADNLPModels.SparseReverseADHessian(docp.dim_NLP_variables, f, docp.dim_NLP_constraints, c!, DOCP_Hessian_pattern(docp)) # call NLP problem constructor (+++ use show_time=true for info ?) + # +++ check other backends (...prod) and set them as in :optimized + # +++ try to disable unused backends such as hvprod ?? bench ! if adnlp_backend == :manual nlp = ADNLPModel!( f, x0, docp.var_l, docp.var_u, c!, docp.con_l, docp.con_u, + gradient_backend = ADNLPModels.ReverseDiffADGradient, + hvprod_backend = ADNLPModels.ReverseDiffADHvprod, + jtprod_backend = ADNLPModels.ReverseDiffADJtprod, jacobian_backend = J_backend, hessian_backend = H_backend, show_time = show_time diff --git a/test/benchmark.jl b/test/benchmark.jl index e59c7b4c..24716450 100644 --- a/test/benchmark.jl +++ b/test/benchmark.jl @@ -24,7 +24,7 @@ end -function bench_list(problem_list; verbose=2, nlp_solver, linear_solver, kwargs...) +function bench_list(problem_list; verbose=1, nlp_solver, linear_solver, kwargs...) ####################################################### # solve examples with timer and objective check @@ -36,13 +36,13 @@ function bench_list(problem_list; verbose=2, nlp_solver, linear_solver, kwargs.. if !isnothing(problem[:obj]) && !isapprox(sol.objective, problem[:obj], rtol = 5e-2) error("Objective mismatch for ",problem[:name],": ",sol.objective," instead of ",problem[:obj]) else - verbose > 1 && @printf("%-30s: %4d iter ", problem[:name], sol.iterations) + verbose > 0 && @printf("%-30s: %4d iter ", problem[:name], sol.iterations) end # time t = @belapsed direct_solve($problem[:ocp], $nlp_solver; init=$problem[:init], display=false, $kwargs...) append!(t_list, t) - verbose > 1 && @printf("%7.2f s\n", t) + verbose > 0 && @printf("%7.2f s\n", t) end return sum(t_list) @@ -66,6 +66,7 @@ function bench(;grid_size_list = [250, 500, 1000, 2500, 5000], verbose = 1, nlp_ verbose > 1 && @printf("Blas config: %s\n", LinearAlgebra.BLAS.lbt_get_config()) # load problems for benchmark + # Note that problems may vary significantly in convergence times... if names_list == :default names_list = ["beam", "double_integrator_mintf", "double_integrator_minenergy", "double_integrator_freet0tf", "fuller", "goddard", "goddard_all", "jackson", "robbins", "simple_integrator", "vanderpol"] elseif names_list == :quick @@ -89,7 +90,7 @@ function bench(;grid_size_list = [250, 500, 1000, 2500, 5000], verbose = 1, nlp_ append!(t_list, t) @printf("Grid size %d: time (s) = %6.1f\n", grid_size, t) end - + return t_list end diff --git a/test/docs/AD_backend.md b/test/docs/AD_backend.md index 77a27644..e09bedfa 100644 --- a/test/docs/AD_backend.md +++ b/test/docs/AD_backend.md @@ -1,14 +1,12 @@ -Benchmark CTDirect for different AD backends (keyword adnlp_backend = ...) -- :default : ForwardDiff +# Benchmark CTDirect for different AD backends +The backend for ADNLPModels can be set in transcription / solve calls with the option `adnlp_backend = ...` - :optimized (default for CTDirect) : ForwardDiff for Jacobian / ReverseDiff for Hessian +- :default : ForwardDiff (much slower) +- :manual : sparse pattern for Jacobian / Hessian given to ADNLPModels - :enzyme : Enzyme - :zygote : Zygote -Using CTDirect benchmark function bench() -Problem list: ["beam", "double_integrator_mintf", "double_integrator_minenergy", "double_integrator_freet0tf", "fuller", "goddard", "goddard_all", "jackson", "robbins", "simple_integrator", "vanderpol"] - -Takeaways: -- optimized backend (with ReverseDiff for Hessian) is much better than full ForwardDiff. +## Errors: - enzyme gives correct nonzero counts for Jacobian and Hessian, but fails with ```ERROR: Constant memory is stored (or returned) to a differentiable variable. As a result, Enzyme cannot provably ensure correctness and throws this error. @@ -20,15 +18,65 @@ To work around this issue, either: Error apparently occurs when calling the boundary conditions. - zygote gives incorrect (huge) nonzero counts then also fails with an error message. +## Tests: +Using CTDirect benchmark function bench() +Problem list: ["beam", "double_integrator_mintf", "double_integrator_minenergy", "double_integrator_freet0tf", "fuller", "goddard", "goddard_all", "jackson", "robbins", "simple_integrator", "vanderpol"] + +Takeaways: +- optimized backend (with ReverseDiff for Hessian) is much better than full ForwardDiff. +- manual sparse pattern seems to give better performance for larger problems, likely because of the hugely increasing cost of computing the Hessian sparsity in terms of allocations and time (cf also comparison with Jump that uses a different, less sparse but faster Hessian). + +| Trapeze | default | optimized | manual | +|---------|---------|-----------|---------| +| 250 | 43.3 | 1.0 | 1.6 | +| 500 | 176.2 | 2.6 | 3.8 | +| 1000 | 926.0 | 7.3 | 7.7 | +| 2500 | | 29.2 | 30.5 | +| 5000 | | 108.5 | 84.1 | +| 6000 | | | 94.2 | +| 7000 | | | 130.8 | +| 8000 | | | 166.0 | +| 9000 | | | 201.7 | +| 10000 | | 1252.4 | 154.0 | + + +Sparsity details: goddard_all Trapeze (1000) +| transcription | :optimized | :manual | +|---------------|------------|---------| +| NLP vars | 4005 | 4005 | +| NLP cons | 6007 | 6007 | +| Hess nnz | 11011 | 30024 | +| H sparsity | 99.86% | 99.63% | +| Jac nnz | 28011 | 42043 | +| J sparsity | 99.88% | 99.83% | +| allocs | 1.16GB | 106MB | +| time | 750ms | 85ms | +|---------------|------------|---------| +| solve | :optimized | :manual | +| iterations | 42 | 28 | +| allocs | 2.0GB | 1.2GB | +| time | 2.5s | 2.5s | -| Trapeze | default | optimized | -|---------|---------|-----------| -| 250 | 43.3 | 1.0 | -| 500 | 176.2 | 2.4 | -| 1000 | 926.0 | 7.1 | -| 2500 | | 31.8 | -| 5000 | | | +Sparsity details: algal_bacterial Trapeze (1000) +| transcription | :optimized | :manual | Jump +|---------------|------------|---------| +| NLP vars | | | +| NLP cons | | | +| Hess nnz | | | +| H sparsity | | | +| Jac nnz | | | +| J sparsity | | | +| allocs | | | +| time | | | +|---------------|------------|---------| +| solve | :optimized | :manual | +| iterations | | | +| allocs | | | +| time | | | -Midpoint +## Todo: +- check all specific backends (jprod etc) and set them as in :optimized +- add pattern structure for midpoint and IRK schemes +- redo tests on algal_bacterial problem, including Jump +- reuse ADNLPModels functions to get block sparsity patterns then rebuild full patterns ? -GaussLegendre2 From fc9b156d81c31d83a50f7dc76e5304acf9adf943 Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Fri, 31 Jan 2025 14:23:05 +0100 Subject: [PATCH 15/44] more tests --- src/solve.jl | 5 +-- test/benchmark.jl | 12 +++--- test/docs/AD_backend.md | 89 +++++++++++++++++------------------------ 3 files changed, 45 insertions(+), 61 deletions(-) diff --git a/src/solve.jl b/src/solve.jl index 09b46be7..a275ae4b 100644 --- a/src/solve.jl +++ b/src/solve.jl @@ -50,15 +50,14 @@ function direct_transcription( J_backend = ADNLPModels.SparseADJacobian(docp.dim_NLP_variables, f, docp.dim_NLP_constraints, c!, DOCP_Jacobian_pattern(docp)) H_backend = ADNLPModels.SparseReverseADHessian(docp.dim_NLP_variables, f, docp.dim_NLP_constraints, c!, DOCP_Hessian_pattern(docp)) - # call NLP problem constructor (+++ use show_time=true for info ?) - # +++ check other backends (...prod) and set them as in :optimized + # call NLP problem constructor # +++ try to disable unused backends such as hvprod ?? bench ! if adnlp_backend == :manual nlp = ADNLPModel!( f, x0, docp.var_l, docp.var_u, c!, docp.con_l, docp.con_u, gradient_backend = ADNLPModels.ReverseDiffADGradient, - hvprod_backend = ADNLPModels.ReverseDiffADHvprod, + hprod_backend = ADNLPModels.ReverseDiffADHvprod, jtprod_backend = ADNLPModels.ReverseDiffADJtprod, jacobian_backend = J_backend, hessian_backend = H_backend, diff --git a/test/benchmark.jl b/test/benchmark.jl index 24716450..11f6bf55 100644 --- a/test/benchmark.jl +++ b/test/benchmark.jl @@ -36,13 +36,13 @@ function bench_list(problem_list; verbose=1, nlp_solver, linear_solver, kwargs.. if !isnothing(problem[:obj]) && !isapprox(sol.objective, problem[:obj], rtol = 5e-2) error("Objective mismatch for ",problem[:name],": ",sol.objective," instead of ",problem[:obj]) else - verbose > 0 && @printf("%-30s: %4d iter ", problem[:name], sol.iterations) + verbose > 1 && @printf("%-30s: %4d iter ", problem[:name], sol.iterations) end # time t = @belapsed direct_solve($problem[:ocp], $nlp_solver; init=$problem[:init], display=false, $kwargs...) append!(t_list, t) - verbose > 0 && @printf("%7.2f s\n", t) + verbose > 1 && @printf("%7.2f s\n", t) end return sum(t_list) @@ -68,9 +68,9 @@ function bench(;grid_size_list = [250, 500, 1000, 2500, 5000], verbose = 1, nlp_ # load problems for benchmark # Note that problems may vary significantly in convergence times... if names_list == :default - names_list = ["beam", "double_integrator_mintf", "double_integrator_minenergy", "double_integrator_freet0tf", "fuller", "goddard", "goddard_all", "jackson", "robbins", "simple_integrator", "vanderpol"] + names_list = ["beam", "double_integrator_mintf", "double_integrator_minenergy", "double_integrator_freet0tf", "fuller", "goddard", "goddard_all", "jackson", "simple_integrator", "vanderpol"] elseif names_list == :quick - names_list = ["beam", "double_integrator_mintf", "fuller", "jackson", "robbins", "simple_integrator", "vanderpol"] + names_list = ["beam", "double_integrator_mintf", "fuller", "jackson", "simple_integrator", "vanderpol"] elseif names_list == :all names_list = ["algal_bacterial", "beam", "bioreactor_1day", "bioreactor_Ndays", "bolza_freetf", "double_integrator_mintf", "double_integrator_minenergy", "double_integrator_freet0tf", "fuller", "goddard", "goddard_all", "insurance", "jackson", "robbins", "simple_integrator", "swimmer", "vanderpol"] elseif names_list == :hard @@ -88,9 +88,9 @@ function bench(;grid_size_list = [250, 500, 1000, 2500, 5000], verbose = 1, nlp_ for grid_size in grid_size_list t = bench_list(problem_list; grid_size=grid_size, verbose=verbose, nlp_solver=nlp_solver, linear_solver=linear_solver, kwargs...) append!(t_list, t) - @printf("Grid size %d: time (s) = %6.1f\n", grid_size, t) + @printf("Grid size %6d: time (s) = %6.1f\n", grid_size, t) end - return t_list + #return t_list end diff --git a/test/docs/AD_backend.md b/test/docs/AD_backend.md index e09bedfa..a58bbbd8 100644 --- a/test/docs/AD_backend.md +++ b/test/docs/AD_backend.md @@ -1,10 +1,10 @@ -# Benchmark CTDirect for different AD backends -The backend for ADNLPModels can be set in transcription / solve calls with the option `adnlp_backend = ...` -- :optimized (default for CTDirect) : ForwardDiff for Jacobian / ReverseDiff for Hessian -- :default : ForwardDiff (much slower) -- :manual : sparse pattern for Jacobian / Hessian given to ADNLPModels -- :enzyme : Enzyme -- :zygote : Zygote +# Benchmark for different AD backends +The backend for ADNLPModels can be set in transcription / solve calls with the option `adnlp_backend = ...`. Possible values are, including the predefined backups for ADNLPModels (*) : +- :optimized* (default for CTDirect) : Forward for Jacobian, Reverse for Gradient and Hessian +- :default* : Forward for everything (much slower) +- :manual : give to ADNLPModels the sparse pattern for Jacobian and Hessian (use same Forward / Reverse settings as the :optimized predefined backend) +- :enzyme* : Enzyme (not working) +- :zygote* : Zygote (not working) ## Errors: - enzyme gives correct nonzero counts for Jacobian and Hessian, but fails with @@ -15,7 +15,7 @@ If Enzyme should be able to prove this use non-differentable, open an issue! To work around this issue, either: a) rewrite this variable to not be conditionally active (fastest, but requires a code change), or b) set the Enzyme mode to turn on runtime activity (e.g. autodiff(set_runtime_activity(Reverse), ...) ). This will maintain correctness, but may slightly reduce performance.``` - Error apparently occurs when calling the boundary conditions. + Error apparently occurs when calling the boundary conditions.``` - zygote gives incorrect (huge) nonzero counts then also fails with an error message. ## Tests: @@ -23,60 +23,45 @@ Using CTDirect benchmark function bench() Problem list: ["beam", "double_integrator_mintf", "double_integrator_minenergy", "double_integrator_freet0tf", "fuller", "goddard", "goddard_all", "jackson", "robbins", "simple_integrator", "vanderpol"] Takeaways: -- optimized backend (with ReverseDiff for Hessian) is much better than full ForwardDiff. +- the optimized backend (with ReverseDiff for Hessian) is much better than full ForwardDiff. - manual sparse pattern seems to give better performance for larger problems, likely because of the hugely increasing cost of computing the Hessian sparsity in terms of allocations and time (cf also comparison with Jump that uses a different, less sparse but faster Hessian). | Trapeze | default | optimized | manual | |---------|---------|-----------|---------| -| 250 | 43.3 | 1.0 | 1.6 | -| 500 | 176.2 | 2.6 | 3.8 | -| 1000 | 926.0 | 7.3 | 7.7 | -| 2500 | | 29.2 | 30.5 | -| 5000 | | 108.5 | 84.1 | -| 6000 | | | 94.2 | -| 7000 | | | 130.8 | -| 8000 | | | 166.0 | -| 9000 | | | 201.7 | -| 10000 | | 1252.4 | 154.0 | +| 250 | 49.7 | 0.9 | 1.5 | +| 500 | | 2.4 | 3.4 | +| 1000 | | 6.2 | 6.4 | +| 2500 | | 24.7 | 20.6 | +| 5000 | | | 50.0 | +| 7500 | | | 61.2 | +| 10000 | | | | -Sparsity details: goddard_all Trapeze (1000) -| transcription | :optimized | :manual | -|---------------|------------|---------| -| NLP vars | 4005 | 4005 | -| NLP cons | 6007 | 6007 | -| Hess nnz | 11011 | 30024 | -| H sparsity | 99.86% | 99.63% | -| Jac nnz | 28011 | 42043 | -| J sparsity | 99.88% | 99.83% | -| allocs | 1.16GB | 106MB | -| time | 750ms | 85ms | -|---------------|------------|---------| -| solve | :optimized | :manual | -| iterations | 42 | 28 | -| allocs | 2.0GB | 1.2GB | -| time | 2.5s | 2.5s | +Sparsity details: goddard_all Trapeze (1000 and 10000 steps) -Sparsity details: algal_bacterial Trapeze (1000) -| transcription | :optimized | :manual | Jump -|---------------|------------|---------| -| NLP vars | | | -| NLP cons | | | -| Hess nnz | | | -| H sparsity | | | -| Jac nnz | | | -| J sparsity | | | -| allocs | | | -| time | | | -|---------------|------------|---------| -| solve | :optimized | :manual | -| iterations | | | -| allocs | | | -| time | | | +| transcription | optimized | manual | optimized | manual | +|---------------|-----------|---------|-----------|--------| +| NLP vars | 4005 | 4005 | 40005 | 40005 | +| NLP cons | 6007 | 6007 | 60007 | 60007 | +| Hess nnz | 11011 | 30024 | 110011 | 300024 | +| H sparsity | 99.86% | 99.63% | 99.99% | 99.96% | +| Jac nnz | 28011 | 42043 | 280011 | 420043 | +| J sparsity | 99.88% | 99.83% | 99.99% | 99.98% | +| allocs | 1.16GB | 106MB | 71.56GB | 4.55GB | +| time | 750ms | 85ms | 64.7s** | 3.8s | +|---------------|-----------|---------|-----------|--------| +| solve | optimized | manual | optimized | manual | +| iterations | 42 | 28 | 51 | 29 | +| allocs | 2.0GB | 1.2GB | 87.5GB | 16.9GB | +| time | 2.5s | 2.5s | 151.0s*** | 42.4s | + +** hessian accounts for 59 out of total 65s +*** building the hessian is one third of the total solve time ! ## Todo: -- check all specific backends (jprod etc) and set them as in :optimized +- try to disable some unused (?) parts such as hprod ? (according to show_time info the impact may be small) - add pattern structure for midpoint and IRK schemes - redo tests on algal_bacterial problem, including Jump +- add some tests for different backends in test_misc - reuse ADNLPModels functions to get block sparsity patterns then rebuild full patterns ? From a6b92e4e241b4926f14f5dad241a712bc4de2dca Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Fri, 31 Jan 2025 14:31:55 +0100 Subject: [PATCH 16/44] removed robbins from benchmark; updated markdown --- test/docs/AD_backend.md | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/test/docs/AD_backend.md b/test/docs/AD_backend.md index a58bbbd8..5c5defe0 100644 --- a/test/docs/AD_backend.md +++ b/test/docs/AD_backend.md @@ -1,26 +1,28 @@ # Benchmark for different AD backends -The backend for ADNLPModels can be set in transcription / solve calls with the option `adnlp_backend = ...`. Possible values are, including the predefined backups for ADNLPModels (*) : -- :optimized* (default for CTDirect) : Forward for Jacobian, Reverse for Gradient and Hessian -- :default* : Forward for everything (much slower) -- :manual : give to ADNLPModels the sparse pattern for Jacobian and Hessian (use same Forward / Reverse settings as the :optimized predefined backend) -- :enzyme* : Enzyme (not working) -- :zygote* : Zygote (not working) +The backend for ADNLPModels can be set in transcription / solve calls with the option `adnlp_backend=`. Possible values include the predefined(*) backends for ADNLPModels: +- `:optimized`* Default for CTDirect. Forward mode for Jacobian, reverse for Gradient and Hessian. +- `:default`* Forward mode for everything. Significantly slower. +- `:manual` Explicitely give to ADNLPModels the sparse pattern for Jacobian and Hessian. Uses the same forward / reverse settings as the `:optimized` predefined backend. +- `:enzyme`* Enzyme (not working). +- `:zygote`* Zygote (not working). -## Errors: +## Errors for Enzyme and Zygote: - enzyme gives correct nonzero counts for Jacobian and Hessian, but fails with -```ERROR: Constant memory is stored (or returned) to a differentiable variable. +``` +ERROR: Constant memory is stored (or returned) to a differentiable variable. As a result, Enzyme cannot provably ensure correctness and throws this error. This might be due to the use of a constant variable as temporary storage for active memory (https://enzyme.mit.edu/julia/stable/faq/#Runtime-Activity). If Enzyme should be able to prove this use non-differentable, open an issue! To work around this issue, either: a) rewrite this variable to not be conditionally active (fastest, but requires a code change), or b) set the Enzyme mode to turn on runtime activity (e.g. autodiff(set_runtime_activity(Reverse), ...) ). This will maintain correctness, but may slightly reduce performance.``` - Error apparently occurs when calling the boundary conditions.``` + Error apparently occurs when calling the boundary conditions. + ``` - zygote gives incorrect (huge) nonzero counts then also fails with an error message. ## Tests: -Using CTDirect benchmark function bench() -Problem list: ["beam", "double_integrator_mintf", "double_integrator_minenergy", "double_integrator_freet0tf", "fuller", "goddard", "goddard_all", "jackson", "robbins", "simple_integrator", "vanderpol"] +Using CTDirect benchmark function `bench()` +Problem list: ["beam", "double_integrator_mintf", "double_integrator_minenergy", "double_integrator_freet0tf", "fuller", "goddard", "goddard_all", "jackson", "simple_integrator", "vanderpol"] Takeaways: - the optimized backend (with ReverseDiff for Hessian) is much better than full ForwardDiff. From 0bbbd8815f7418a24f00896cdfc71d947b0ac459 Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Fri, 31 Jan 2025 14:38:52 +0100 Subject: [PATCH 17/44] updated markdown --- test/docs/AD_backend.md | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/test/docs/AD_backend.md b/test/docs/AD_backend.md index 5c5defe0..0c47b92c 100644 --- a/test/docs/AD_backend.md +++ b/test/docs/AD_backend.md @@ -21,22 +21,27 @@ To work around this issue, either: - zygote gives incorrect (huge) nonzero counts then also fails with an error message. ## Tests: -Using CTDirect benchmark function `bench()` +``` +julia> include("test/benchmark.jl") +test_unit (generic function with 1 method) + +julia> bench(grid_size_list=[250,500,1000,2500,5000,7500,10000], adnlp_backend=:manual) Problem list: ["beam", "double_integrator_mintf", "double_integrator_minenergy", "double_integrator_freet0tf", "fuller", "goddard", "goddard_all", "jackson", "simple_integrator", "vanderpol"] +``` Takeaways: -- the optimized backend (with ReverseDiff for Hessian) is much better than full ForwardDiff. -- manual sparse pattern seems to give better performance for larger problems, likely because of the hugely increasing cost of computing the Hessian sparsity in terms of allocations and time (cf also comparison with Jump that uses a different, less sparse but faster Hessian). +- the `:optimized` backend (with reverse mode for Hessian) is much better than full forward mode. +- manual sparse pattern seems to give even better performance for larger problems. This is likely due to the increasing cost of computing the Hessian sparsity in terms of allocations and time. This observation is consistent with the comparison with Jump that seems to use a different, less sparse but faster method for the Hessian. | Trapeze | default | optimized | manual | |---------|---------|-----------|---------| | 250 | 49.7 | 0.9 | 1.5 | -| 500 | | 2.4 | 3.4 | +| 500 | | 2.4 | 3.5 | | 1000 | | 6.2 | 6.4 | -| 2500 | | 24.7 | 20.6 | +| 2500 | | 24.7 | 23.9 | | 5000 | | | 50.0 | | 7500 | | | 61.2 | -| 10000 | | | | +| 10000 | | | | Sparsity details: goddard_all Trapeze (1000 and 10000 steps) @@ -51,19 +56,21 @@ Sparsity details: goddard_all Trapeze (1000 and 10000 steps) | J sparsity | 99.88% | 99.83% | 99.99% | 99.98% | | allocs | 1.16GB | 106MB | 71.56GB | 4.55GB | | time | 750ms | 85ms | 64.7s** | 3.8s | -|---------------|-----------|---------|-----------|--------| + +** hessian accounts for 59 out of total 65s + | solve | optimized | manual | optimized | manual | +|---------------|-----------|---------|-----------|--------| | iterations | 42 | 28 | 51 | 29 | | allocs | 2.0GB | 1.2GB | 87.5GB | 16.9GB | | time | 2.5s | 2.5s | 151.0s*** | 42.4s | -** hessian accounts for 59 out of total 65s *** building the hessian is one third of the total solve time ! ## Todo: -- try to disable some unused (?) parts such as hprod ? (according to show_time info the impact may be small) - add pattern structure for midpoint and IRK schemes - redo tests on algal_bacterial problem, including Jump - add some tests for different backends in test_misc +- try to disable some unused (?) parts such as hprod ? (according to show_time info the impact may be small) - reuse ADNLPModels functions to get block sparsity patterns then rebuild full patterns ? From ff362aaadfe4c7f476c7dc45fa57159036c93efa Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Fri, 31 Jan 2025 14:44:06 +0100 Subject: [PATCH 18/44] updated markdown --- test/docs/AD_backend.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/docs/AD_backend.md b/test/docs/AD_backend.md index 0c47b92c..aa9d5cce 100644 --- a/test/docs/AD_backend.md +++ b/test/docs/AD_backend.md @@ -21,7 +21,7 @@ To work around this issue, either: - zygote gives incorrect (huge) nonzero counts then also fails with an error message. ## Tests: -``` +```manpage julia> include("test/benchmark.jl") test_unit (generic function with 1 method) From 5f46cefce0c61e8cac2a36e0cf53c281c4b32d6c Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Fri, 31 Jan 2025 15:58:24 +0100 Subject: [PATCH 19/44] markdown --- test/docs/AD_backend.md | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/test/docs/AD_backend.md b/test/docs/AD_backend.md index aa9d5cce..7f6dd1b6 100644 --- a/test/docs/AD_backend.md +++ b/test/docs/AD_backend.md @@ -21,7 +21,7 @@ To work around this issue, either: - zygote gives incorrect (huge) nonzero counts then also fails with an error message. ## Tests: -```manpage +``` julia> include("test/benchmark.jl") test_unit (generic function with 1 method) @@ -37,11 +37,11 @@ Takeaways: |---------|---------|-----------|---------| | 250 | 49.7 | 0.9 | 1.5 | | 500 | | 2.4 | 3.5 | -| 1000 | | 6.2 | 6.4 | -| 2500 | | 24.7 | 23.9 | -| 5000 | | | 50.0 | -| 7500 | | | 61.2 | -| 10000 | | | | +| 1000 | | 5.6 | 6.4 | +| 2500 | | 23.9 | 23.9 | +| 5000 | | 89.6 | 56.3 | +| 7500 | | 225.4 | 85.9 | +| 10000 | | | 102.4 | Sparsity details: goddard_all Trapeze (1000 and 10000 steps) @@ -58,6 +58,9 @@ Sparsity details: goddard_all Trapeze (1000 and 10000 steps) | time | 750ms | 85ms | 64.7s** | 3.8s | ** hessian accounts for 59 out of total 65s +``` ++++ log info +``` | solve | optimized | manual | optimized | manual | |---------------|-----------|---------|-----------|--------| @@ -65,7 +68,7 @@ Sparsity details: goddard_all Trapeze (1000 and 10000 steps) | allocs | 2.0GB | 1.2GB | 87.5GB | 16.9GB | | time | 2.5s | 2.5s | 151.0s*** | 42.4s | -*** building the hessian is one third of the total solve time ! +*** building the hessian is one third of the total solve time... ## Todo: - add pattern structure for midpoint and IRK schemes From c3c3581bf8e5180bc9e17a0a783512e1d3aa43e8 Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Fri, 31 Jan 2025 16:18:46 +0100 Subject: [PATCH 20/44] removed no_hessian option --- src/solve.jl | 8 -------- test/docs/AD_backend.md | 14 ++++++++++---- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/solve.jl b/src/solve.jl index a275ae4b..dbd39b42 100644 --- a/src/solve.jl +++ b/src/solve.jl @@ -51,7 +51,6 @@ function direct_transcription( H_backend = ADNLPModels.SparseReverseADHessian(docp.dim_NLP_variables, f, docp.dim_NLP_constraints, c!, DOCP_Hessian_pattern(docp)) # call NLP problem constructor - # +++ try to disable unused backends such as hvprod ?? bench ! if adnlp_backend == :manual nlp = ADNLPModel!( f, x0, docp.var_l, docp.var_u, @@ -63,13 +62,6 @@ function direct_transcription( hessian_backend = H_backend, show_time = show_time ) - elseif adnlp_backend == :no_hessian - nlp = ADNLPModel!( - x -> DOCP_objective(x, docp), x0, docp.var_l, docp.var_u, - (c, x) -> DOCP_constraints!(c, x, docp), docp.con_l, docp.con_u, - backend = __adnlp_backend(), show_time = show_time - ) - set_adbackend!(nlp, hessian_backend = ADNLPModels.EmptyADbackend, hvprod_backend = ADNLPModels.EmptyADbackend) # directionalsecondderivative) else nlp = ADNLPModel!( x -> DOCP_objective(x, docp), x0, docp.var_l, docp.var_u, diff --git a/test/docs/AD_backend.md b/test/docs/AD_backend.md index 7f6dd1b6..b1c748e0 100644 --- a/test/docs/AD_backend.md +++ b/test/docs/AD_backend.md @@ -33,6 +33,7 @@ Takeaways: - the `:optimized` backend (with reverse mode for Hessian) is much better than full forward mode. - manual sparse pattern seems to give even better performance for larger problems. This is likely due to the increasing cost of computing the Hessian sparsity in terms of allocations and time. This observation is consistent with the comparison with Jump that seems to use a different, less sparse but faster method for the Hessian. +Standard benchmark: | Trapeze | default | optimized | manual | |---------|---------|-----------|---------| | 250 | 49.7 | 0.9 | 1.5 | @@ -41,11 +42,9 @@ Takeaways: | 2500 | | 23.9 | 23.9 | | 5000 | | 89.6 | 56.3 | | 7500 | | 225.4 | 85.9 | -| 10000 | | | 102.4 | - +| 10000 | | 526.3 | 102.4 | Sparsity details: goddard_all Trapeze (1000 and 10000 steps) - | transcription | optimized | manual | optimized | manual | |---------------|-----------|---------|-----------|--------| | NLP vars | 4005 | 4005 | 40005 | 40005 | @@ -59,7 +58,14 @@ Sparsity details: goddard_all Trapeze (1000 and 10000 steps) ** hessian accounts for 59 out of total 65s ``` -+++ log info +julia> direct_transcription(goddard_all().ocp, grid_size=10000, show_time=true); +gradient backend ADNLPModels.ReverseDiffADGradient: 0.000137972 seconds; +hprod backend ADNLPModels.ReverseDiffADHvprod: 0.314931491 seconds; +jprod backend ADNLPModels.ForwardDiffADJprod: 2.2412e-5 seconds; +jtprod backend ADNLPModels.ReverseDiffADJtprod: 0.612174104 seconds; +jacobian backend ADNLPModels.SparseADJacobian: 0.425535048 seconds; +hessian backend ADNLPModels.SparseReverseADHessian: 58.450146911 seconds; +ghjvprod backend ADNLPModels.ForwardDiffADGHjvprod: 4.339e-6 seconds. ``` | solve | optimized | manual | optimized | manual | From 2da824193e4b9e860305d04745def6339c99e769 Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Sat, 1 Feb 2025 12:17:00 +0100 Subject: [PATCH 21/44] todo: midpoint and irk --- src/disc/trapeze.jl | 4 +++- src/solve.jl | 26 +++++++++++++------------- test/docs/AD_backend.md | 1 - test/suite/test_nlp.jl | 6 ++++-- 4 files changed, 20 insertions(+), 17 deletions(-) diff --git a/src/disc/trapeze.jl b/src/disc/trapeze.jl index 4748b4ca..498dd241 100644 --- a/src/disc/trapeze.jl +++ b/src/disc/trapeze.jl @@ -179,7 +179,7 @@ function DOCP_Jacobian_pattern(docp::DOCP{Trapeze}) J = zeros(Bool, docp.dim_NLP_constraints, docp.dim_NLP_variables) #+++ build Is, Js, Vs sets then call sparse constructor ? - #nnzj = + #nnzj = ... #Is = Vector{Int}(undef, nnzj) #Js = Vector{Int}(undef, nnzj) #Vs = ones(Bool, nnzj) @@ -226,6 +226,7 @@ function DOCP_Jacobian_pattern(docp::DOCP{Trapeze}) J[docp.dim_NLP_constraints, docp.dim_NLP_x] = true end + # replace J with sparse matrix return sparse(J) end @@ -278,6 +279,7 @@ function DOCP_Hessian_pattern(docp::DOCP{Trapeze}) H[docp.dim_NLP_x, docp.dim_NLP_x] = true end + # replace H with sparse matrix return sparse(H) end \ No newline at end of file diff --git a/src/solve.jl b/src/solve.jl index dbd39b42..30dacb6e 100644 --- a/src/solve.jl +++ b/src/solve.jl @@ -37,24 +37,24 @@ function direct_transcription( variables_bounds!(docp) constraints_bounds!(docp) - # set initial guess - x0 = DOCP_initial_guess(docp, - OptimalControlInit(init, state_dim = ocp.state_dimension, control_dim = ocp.control_dimension, variable_dim = ocp.variable_dimension) - ) + # build and set initial guess in DOCP + docp_init = OptimalControlInit(init, state_dim = ocp.state_dimension, control_dim = ocp.control_dimension, variable_dim = ocp.variable_dimension) + x0 = DOCP_initial_guess(docp, docp_init) - # objective and constraints functions + # redeclare objective and constraints functions f = x -> DOCP_objective(x, docp) c! = (c, x) -> DOCP_constraints!(c, x, docp) - # sparsity pattern - J_backend = ADNLPModels.SparseADJacobian(docp.dim_NLP_variables, f, docp.dim_NLP_constraints, c!, DOCP_Jacobian_pattern(docp)) - H_backend = ADNLPModels.SparseReverseADHessian(docp.dim_NLP_variables, f, docp.dim_NLP_constraints, c!, DOCP_Hessian_pattern(docp)) - # call NLP problem constructor if adnlp_backend == :manual + + # build sparsity pattern + J_backend = ADNLPModels.SparseADJacobian(docp.dim_NLP_variables, f, docp.dim_NLP_constraints, c!, DOCP_Jacobian_pattern(docp)) + H_backend = ADNLPModels.SparseReverseADHessian(docp.dim_NLP_variables, f, docp.dim_NLP_constraints, c!, DOCP_Hessian_pattern(docp)) + + # build NLP with given patterns nlp = ADNLPModel!( - f, x0, docp.var_l, docp.var_u, - c!, docp.con_l, docp.con_u, + f, x0, docp.var_l, docp.var_u, c!, docp.con_l, docp.con_u, gradient_backend = ADNLPModels.ReverseDiffADGradient, hprod_backend = ADNLPModels.ReverseDiffADHvprod, jtprod_backend = ADNLPModels.ReverseDiffADJtprod, @@ -63,9 +63,9 @@ function direct_transcription( show_time = show_time ) else + # build NLP nlp = ADNLPModel!( - x -> DOCP_objective(x, docp), x0, docp.var_l, docp.var_u, - (c, x) -> DOCP_constraints!(c, x, docp), docp.con_l, docp.con_u, + f, x0, docp.var_l, docp.var_u, c!, docp.con_l, docp.con_u, backend = adnlp_backend, show_time = show_time ) end diff --git a/test/docs/AD_backend.md b/test/docs/AD_backend.md index b1c748e0..29b8ad70 100644 --- a/test/docs/AD_backend.md +++ b/test/docs/AD_backend.md @@ -79,7 +79,6 @@ ghjvprod backend ADNLPModels.ForwardDiffADGHjvprod: 4.339e-6 seconds. ## Todo: - add pattern structure for midpoint and IRK schemes - redo tests on algal_bacterial problem, including Jump -- add some tests for different backends in test_misc - try to disable some unused (?) parts such as hprod ? (according to show_time info the impact may be small) - reuse ADNLPModels functions to get block sparsity patterns then rebuild full patterns ? diff --git a/test/suite/test_nlp.jl b/test/suite/test_nlp.jl index a7a43936..996c336c 100644 --- a/test/suite/test_nlp.jl +++ b/test/suite/test_nlp.jl @@ -17,8 +17,10 @@ end @test sol.objective ≈ prob.obj rtol = 1e-2 sol = direct_solve(prob.ocp, display = false, adnlp_backend = :default) @test sol.objective ≈ prob.obj rtol = 1e-2 - #sol = direct_solve(prob.ocp, display = false, adnlp_backend = :enzyme) - #@test sol.objective ≈ prob.obj rtol = 1e-2 + sol = direct_solve(prob.ocp, display = false, adnlp_backend = :manual) + @test sol.objective ≈ prob.obj rtol = 1e-2 + # +++ midpoint / manual + # +++ gl2 / manual end # DOCP solving From ac88931b75f146be571d11d5afa52e71eb9a5050 Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Mon, 3 Feb 2025 13:16:29 +0100 Subject: [PATCH 22/44] manual sparsity for IRK (constant control) --- src/disc/irk.jl | 161 ++++++++++++++++++++++++++++++++++++++++++++ src/disc/trapeze.jl | 44 +++++++++--- 2 files changed, 196 insertions(+), 9 deletions(-) diff --git a/src/disc/irk.jl b/src/disc/irk.jl index 0bb9643c..f418a0e3 100644 --- a/src/disc/irk.jl +++ b/src/disc/irk.jl @@ -342,3 +342,164 @@ function setStepConstraints!(docp::DOCP{ <: GenericIRK}, c, xu, v, time_grid, i, setPathConstraints!(docp, c, ti, xi, ui, v, offset) end + + +""" +$(TYPEDSIGNATURES) + +Build sparsity pattern for Jacobian of constraints +""" +function DOCP_Jacobian_pattern(docp::DOCP{ <: GenericIRK}) + + J = zeros(Bool, docp.dim_NLP_constraints, docp.dim_NLP_variables) + + s = docp.discretization.stage + + # 1. main loop over steps + for i = 1:docp.dim_NLP_steps + + # constraints block and offset: state equation, path constraints + c_block = docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block + c_offset = (i-1)*c_block + + # variables block and offset: x_i (l_i) u_i k_i x_i+1 (l_i+1) + var_block = docp.discretization._step_variables_block + docp.dim_NLP_x + var_offset = (i-1)*docp.discretization._step_variables_block + + # state eq x_i+1 = x_i + h sum bj k_ij + # 1.1 state eq wrt x_i + J[c_offset+1:c_offset+docp.dim_OCP_x, var_offset+1:var_offset+docp.dim_OCP_x] .= true + # 1.2 state eq wrt k_i, x_i+1 (skip l_i, u_i) + J[c_offset+1:c_offset+docp.dim_OCP_x, var_offset+docp.dim_NLP_x+docp.dim_NLP_u+ 1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u+s*docp.dim_NLP_x+docp.dim_OCP_x] .= true + # 1.3 lagrange part l_i+1 = l_i + h (sum bj k_ij)[n+1] + if docp.is_lagrange + # wrt l_i and l_i+1 + J[c_offset+docp.dim_NLP_x, var_offset+docp.dim_NLP_x] = true + J[c_offset+docp.dim_NLP_x, var_offset+docp.discretization._step_variables_block+docp.dim_NLP_x] = true + # wrt k_ij[n+1] + for i=1:s + J[c_offset+docp.dim_NLP_x, var_offset+(s+1)*docp.dim_NLP_x] = true + end + end + + # 1.4 stage equations k_ij = f(t_ij, x_ij, u_ij, v) + # with + # x_ij depending on x_i and all k_ij + # u_ij depending on u_i for piecewise constant or (u_i, u_i+1) for piecewise linear + # ie whole block depends on x_i, u_i, k_i, and u_i+1 for piecewise linear control + # NB we could skip l_i here... + J[c_offset+docp.dim_NLP_x+1:c_offset+(s+1)*docp.dim_NLP_x , var_offset+1:var_offset+docp.discretization._step_variables_block] .= true + if !docp.discretization._constant_control + J[c_offset+docp.dim_NLP_x+1:c_offset+(s+1)*docp.dim_NLP_x , var_offset+docp.discretization._step_variables_block+docp.dim_NLP_x:var_offset+docp.discretization._step_variables_block+docp.dim_NLP_x+docp.dim_NLP_u] .= true + end + + # 1.5 path constraint wrt x_i, u_i + J[c_offset+(s+1)*docp.dim_NLP_x+1:c_offset+c_block, var_offset+1:var_offset+docp.dim_OCP_x] .= true + J[c_offset+(s+1)*docp.dim_NLP_x+1:c_offset+c_block, var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u] .= true + + # 1.6 whole block wrt v + J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true + end + + # 2. final path constraints (xf, uf, v) + c_offset = docp.dim_NLP_steps*(docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block) + c_block = docp.discretization._step_pathcons_block + var_offset = docp.dim_NLP_steps*docp.discretization._step_variables_block + var_block = docp.discretization._step_variables_block + # 2.1 wrt xf + J[c_offset+1:c_offset+c_block, var_offset+1:var_offset+docp.dim_OCP_x] .= true + # 2.2 wrt uf + J[c_offset+1:c_offset+c_block, var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u] .= true + # 2.3 wrt v + J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true + + # 3. boundary constraints (x0, xf, v) + c_offset = docp.dim_NLP_steps * (docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block) + docp.discretization._step_pathcons_block + c_block = docp.dim_boundary_cons + docp.dim_v_cons + # 3.1 wrt x0 + J[c_offset+1:c_offset+c_block, 1:docp.dim_OCP_x] .= true + # 3.2 wrt xf + J[c_offset+1:c_offset+c_block, var_offset+1:var_offset+docp.dim_OCP_x] .= true + # 3.3 wrt v + J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true + # 3.4 null initial condition for lagrangian cost state l0 + if docp.is_lagrange + J[docp.dim_NLP_constraints, docp.dim_NLP_x] = true + end + + # return sparse matrix + return sparse(J) +end + + +""" +$(TYPEDSIGNATURES) + +Build sparsity pattern for Hessian of Lagrangian +""" +function DOCP_Hessian_pattern(docp::DOCP{ <: GenericIRK}) + + # NB. need to provide full pattern for coloring, not just upper/lower part + H = zeros(Bool, docp.dim_NLP_variables, docp.dim_NLP_variables) + + s = docp.discretization.stage + + # 0. objective + # 0.1 mayer cost (x0, xf, v) + # -> see 3. term for boundary conditions ! + # 0.2 lagrange case (lf) + if docp.is_lagrange + lf_index = docp.dim_NLP_steps * docp.discretization._step_variables_block + docp.dim_NLP_x + H[lf_index, lf_index] = true + end + + # +++1. main loop over steps + for i = 1:docp.dim_NLP_steps + + # variables block and offset: x_i (l_i) u_i k_i x_i+1 (l_i+1) + var_block = docp.discretization._step_variables_block + docp.dim_NLP_x + var_offset = (i-1)*docp.discretization._step_variables_block + + # 1.1 state eq wrt x_i, k_i, x_i+1 (skip l_i, u_i) + # xi / xi, xi / (k_i x_i+1), (k_i x_i+1) / xi, x_i+1 / x_i+1 + # 1.2 lagrange part wrt l_i, k_i[n+1], l_i+1 + # ... pff + # stage eqs wrt x_i, u_i, k_i, and u_i+1 for piecewise linear control + # NB. we could skip l_i here... + # +++ assume constant_control and combine all in one single block ? + if docp.discretization._constant_control + H[var_offset+1:var_offset+var_block, var_offset+1:var_offset+var_block] .= true + else + error("Manual Hessian sparsity pattern not supported for IRK scheme with piecewise linear control") + end + + # 1.3 path constraint wrt x_i, u_i + # -> included in previous term ! + # 1.4 whole block wrt v (including cross derivatives: v/v v/var var/v) + H[docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true + H[docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables, var_offset+1:var_offset+var_block] .= true + H[var_offset+1:var_offset+var_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true + end + + # 2. final path constraints (xf, uf, v) + # -> included in last iteration from loop ! + + # 3. boundary constraints (x0, xf, v) + # -> (xf, v) part included in last iteration from loop ! + if docp.is_mayer || docp.dim_boundary_cons > 0 + var_offset = docp.dim_NLP_steps*docp.discretization._step_variables_block + H[1:docp.dim_OCP_x, 1:docp.dim_OCP_x] .= true # x0 / x0 + H[1:docp.dim_OCP_x, var_offset+1:var_offset+docp.dim_OCP_x] .= true # x0 / xf + H[1:docp.dim_OCP_x, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true # x0 / v + H[var_offset+1:var_offset+docp.dim_OCP_x, 1:docp.dim_OCP_x] .= true # xf / x0 + H[docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables, 1:docp.dim_OCP_x] .= true # v / x0 + end + # 3.1 null initial condition for lagrangian cost state l0 + if docp.is_lagrange + H[docp.dim_NLP_x, docp.dim_NLP_x] = true + end + + # return sparse matrix + return sparse(H) + +end \ No newline at end of file diff --git a/src/disc/trapeze.jl b/src/disc/trapeze.jl index 498dd241..5859f35d 100644 --- a/src/disc/trapeze.jl +++ b/src/disc/trapeze.jl @@ -175,6 +175,11 @@ function setStepConstraints!(docp::DOCP{Trapeze}, c, xu, v, time_grid, i, work) end +""" +$(TYPEDSIGNATURES) + +Build sparsity pattern for Jacobian of constraints +""" function DOCP_Jacobian_pattern(docp::DOCP{Trapeze}) J = zeros(Bool, docp.dim_NLP_constraints, docp.dim_NLP_variables) @@ -187,22 +192,32 @@ function DOCP_Jacobian_pattern(docp::DOCP{Trapeze}) # 1. main loop over steps for i = 1:docp.dim_NLP_steps - c_offset = (i-1)*(docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block) + + # constraints block and offset: state equation, path constraints c_block = docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block - var_offset = (i-1)*docp.discretization._step_variables_block + c_offset = (i-1)*c_block + + # variables block and offset: x_i (l_i) u_i x_i+1 (l_i+1) u_i+1 var_block = docp.discretization._step_variables_block * 2 - # 1.1 state eq wrt x_i, u_i, x_i+1, u_i+1 (skip l_i, l_i+1) + var_offset = (i-1)*docp.discretization._step_variables_block + + # state eq wrt x_i, u_i, x_i+1, u_i+1 (skip l_i, l_i+1) + # 1.1 state eq wrt x_i J[c_offset+1:c_offset+docp.dim_OCP_x, var_offset+1:var_offset+docp.dim_OCP_x] .= true + # 1.2 state eq wrt u_i, x_i+1 (skip l_i) J[c_offset+1:c_offset+docp.dim_OCP_x, var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u+docp.dim_OCP_x] .= true + # 1.3 state eq wrt u_i+1 (skip l_i+1) J[c_offset+1:c_offset+docp.dim_OCP_x, var_offset+docp.dim_NLP_x*2+docp.dim_NLP_u+1:var_offset+docp.dim_NLP_x*2+docp.dim_NLP_u*2] .= true - # 1.2 lagrange part wrt x_i, l_i, u_i, x_i+1, l_i+1, u_i+1 + # 1.4 lagrange part wrt x_i, l_i, u_i, x_i+1, l_i+1, u_i+1 if docp.is_lagrange J[c_offset+docp.dim_NLP_x, var_offset+1:var_offset+var_block] .= true end - # 1.3 path constraint wrt x_i, u_i + + # 1.5 path constraint wrt x_i, u_i J[c_offset+docp.dim_NLP_x+1:c_offset+c_block, var_offset+1:var_offset+docp.dim_OCP_x] .= true J[c_offset+docp.dim_NLP_x+1:c_offset+c_block, var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u] .= true - # 1.4 whole block wrt v + + # 1.6 whole block wrt v J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true end @@ -211,17 +226,23 @@ function DOCP_Jacobian_pattern(docp::DOCP{Trapeze}) c_block = docp.discretization._step_pathcons_block var_offset = docp.dim_NLP_steps*docp.discretization._step_variables_block var_block = docp.discretization._step_variables_block + # 2.1 wrt xf J[c_offset+1:c_offset+c_block, var_offset+1:var_offset+docp.dim_OCP_x] .= true + # 2.1 wrt uf J[c_offset+1:c_offset+c_block, var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u] .= true + # 2.1 wrt v J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true # 3. boundary constraints (x0, xf, v) c_offset = docp.dim_NLP_steps * (docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block) + docp.discretization._step_pathcons_block c_block = docp.dim_boundary_cons + docp.dim_v_cons + # 3.1 wrt x0 J[c_offset+1:c_offset+c_block, 1:docp.dim_OCP_x] .= true + # 3.2 wrt xf J[c_offset+1:c_offset+c_block, var_offset+1:var_offset+docp.dim_OCP_x] .= true + # 3.3 wrt v J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true - # 3.1 null initial condition for lagrangian cost state l0 + # 3.4 null initial condition for lagrangian cost state l0 if docp.is_lagrange J[docp.dim_NLP_constraints, docp.dim_NLP_x] = true end @@ -231,6 +252,11 @@ function DOCP_Jacobian_pattern(docp::DOCP{Trapeze}) end +""" +$(TYPEDSIGNATURES) + +Build sparsity pattern for Hessian of Lagrangian +""" function DOCP_Hessian_pattern(docp::DOCP{Trapeze}) # NB. need to provide full pattern for coloring, not just upper/lower part @@ -241,7 +267,7 @@ function DOCP_Hessian_pattern(docp::DOCP{Trapeze}) # -> see 3. term for boundary conditions ! # 0.2 lagrange case (lf) if docp.is_lagrange - lf_index = docp.dim_NLP_steps * (docp.dim_NLP_x + docp.dim_NLP_u) + docp.dim_NLP_x + lf_index = docp.dim_NLP_steps * docp.discretization._step_variables_block + docp.dim_NLP_x H[lf_index, lf_index] = true end @@ -255,7 +281,7 @@ function DOCP_Hessian_pattern(docp::DOCP{Trapeze}) H[var_offset+1:var_offset+var_block, var_offset+1:var_offset+var_block] .= true # 1.3 path constraint wrt x_i, u_i # -> included in previous term ! - # 1.4 whole block wrt v (including cross derivatives) + # 1.4 whole block wrt v (including cross derivatives: v/v v/var var/v) H[docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true H[docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables, var_offset+1:var_offset+var_block] .= true H[var_offset+1:var_offset+var_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true From 44959b1925ffa2c5be8afae6aacaee596cc19087 Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Tue, 4 Feb 2025 18:30:18 +0100 Subject: [PATCH 23/44] todo: debug irk --- src/disc/irk.jl | 73 ++++++++++++++++++++++---------------- src/disc/trapeze.jl | 49 ++++++++++++------------- src/solve.jl | 6 ++-- test/docs/AD_backend.md | 6 +++- test/docs/jump_ctdirect.md | 53 ++++++++++++++------------- test/problems/pattern.jl | 15 ++++++++ 6 files changed, 118 insertions(+), 84 deletions(-) create mode 100644 test/problems/pattern.jl diff --git a/src/disc/irk.jl b/src/disc/irk.jl index f418a0e3..f87fcac2 100644 --- a/src/disc/irk.jl +++ b/src/disc/irk.jl @@ -373,12 +373,10 @@ function DOCP_Jacobian_pattern(docp::DOCP{ <: GenericIRK}) J[c_offset+1:c_offset+docp.dim_OCP_x, var_offset+docp.dim_NLP_x+docp.dim_NLP_u+ 1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u+s*docp.dim_NLP_x+docp.dim_OCP_x] .= true # 1.3 lagrange part l_i+1 = l_i + h (sum bj k_ij)[n+1] if docp.is_lagrange - # wrt l_i and l_i+1 - J[c_offset+docp.dim_NLP_x, var_offset+docp.dim_NLP_x] = true - J[c_offset+docp.dim_NLP_x, var_offset+docp.discretization._step_variables_block+docp.dim_NLP_x] = true - # wrt k_ij[n+1] + J[c_offset+docp.dim_NLP_x, var_offset+docp.dim_NLP_x] = true # l_i + J[c_offset+docp.dim_NLP_x, var_offset+docp.discretization._step_variables_block+docp.dim_NLP_x] = true # l_i+1 for i=1:s - J[c_offset+docp.dim_NLP_x, var_offset+(s+1)*docp.dim_NLP_x] = true + J[c_offset+docp.dim_NLP_x, var_offset+(s+1)*docp.dim_NLP_x] = true # k_ij[n+1] end end @@ -416,12 +414,9 @@ function DOCP_Jacobian_pattern(docp::DOCP{ <: GenericIRK}) # 3. boundary constraints (x0, xf, v) c_offset = docp.dim_NLP_steps * (docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block) + docp.discretization._step_pathcons_block c_block = docp.dim_boundary_cons + docp.dim_v_cons - # 3.1 wrt x0 - J[c_offset+1:c_offset+c_block, 1:docp.dim_OCP_x] .= true - # 3.2 wrt xf - J[c_offset+1:c_offset+c_block, var_offset+1:var_offset+docp.dim_OCP_x] .= true - # 3.3 wrt v - J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true + J[c_offset+1:c_offset+c_block, 1:docp.dim_OCP_x] .= true # x0 + J[c_offset+1:c_offset+c_block, var_offset+1:var_offset+docp.dim_OCP_x] .= true # xf + J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true # v # 3.4 null initial condition for lagrangian cost state l0 if docp.is_lagrange J[docp.dim_NLP_constraints, docp.dim_NLP_x] = true @@ -446,53 +441,69 @@ function DOCP_Hessian_pattern(docp::DOCP{ <: GenericIRK}) # 0. objective # 0.1 mayer cost (x0, xf, v) - # -> see 3. term for boundary conditions ! + # -> grouped with term 3. for boundary conditions # 0.2 lagrange case (lf) if docp.is_lagrange lf_index = docp.dim_NLP_steps * docp.discretization._step_variables_block + docp.dim_NLP_x H[lf_index, lf_index] = true end - # +++1. main loop over steps + # 1. main loop over steps + # 1.0 v / v term + H[docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true + for i = 1:docp.dim_NLP_steps # variables block and offset: x_i (l_i) u_i k_i x_i+1 (l_i+1) var_block = docp.discretization._step_variables_block + docp.dim_NLP_x var_offset = (i-1)*docp.discretization._step_variables_block - # 1.1 state eq wrt x_i, k_i, x_i+1 (skip l_i, u_i) - # xi / xi, xi / (k_i x_i+1), (k_i x_i+1) / xi, x_i+1 / x_i+1 - # 1.2 lagrange part wrt l_i, k_i[n+1], l_i+1 - # ... pff - # stage eqs wrt x_i, u_i, k_i, and u_i+1 for piecewise linear control - # NB. we could skip l_i here... - # +++ assume constant_control and combine all in one single block ? + # 1.1 state eq x_i+1 = x_i + h sum bj k_ij + # wrt x_i, k_i, x_i+1 (skip l_i, u_i) + # -> included in 1.3 except x_i+1 terms + H[var_offset+1:var_offset+docp.dim_OCP_x, var_offset+docp.discretization._step_variables_block+1:var_offset+docp.discretization._step_variables_block+docp.dim_OCP_x] .= true # x_i / x_i+1 + H[var_offset+docp.discretization._step_variables_block+1:var_offset+docp.discretization._step_variables_block+docp.dim_OCP_x , var_offset+1:var_offset+docp.dim_OCP_x] .= true # x_i+1 / x_i + H[var_offset+docp.dim_NLP_x+1:var_offset+(s+1)*docp.dim_NLP_x, var_offset+docp.discretization._step_variables_block+1:var_offset+docp.discretization._step_variables_block+docp.dim_OCP_x] .= true # k_i / x_i+1 + H[var_offset+docp.discretization._step_variables_block+1:var_offset+docp.discretization._step_variables_block+docp.dim_OCP_x , var_offset+docp.dim_NLP_x+1:var_offset+(s+1)*docp.dim_NLP_x] .= true # x_i+1 / k_i + + # 1.2 lagrange part l_i+1 = l_i + h (sum bj k_ij)[n+1] + # -> included in 1.3 except l_i+1 terms + # +++ could be done fully here and l_i skipped in 1.3 + H[var_offset+docp.dim_NLP_x , var_offset+docp.discretization._step_variables_block+docp.dim_NLP_x] = true # l_i / l_i+1 + H[ var_offset+docp.discretization._step_variables_block+docp.dim_NLP_x, var_offset+docp.dim_NLP_x] = true # l_i+1 / l_i + for i=1:s + H[var_offset+docp.dim_NLP_u+i*docp.dim_NLP_x , var_offset+docp.discretization._step_variables_block+docp.dim_NLP_x] = true # k_i[n+1] / l_i+1 + H[var_offset+docp.discretization._step_variables_block+docp.dim_NLP_x, var_offset+docp.dim_NLP_u+i*docp.dim_NLP_x] = true # l_i+1 / k_i[n+1] + end + + # 1.3 stage equations k_ij = f(t_ij, x_ij, u_ij, v) + # wrt x_i, u_i, k_i (and u_i+1 for piecewise linear control) + # NB. l_i terms for 1.2 are included but we have excess nnz eg l_i / x_i,u_i,k_i[1:n] if docp.discretization._constant_control - H[var_offset+1:var_offset+var_block, var_offset+1:var_offset+var_block] .= true + H[var_offset+1:var_offset+docp.discretization._step_variables_block, var_offset+1:var_offset+docp.discretization._step_variables_block] .= true else error("Manual Hessian sparsity pattern not supported for IRK scheme with piecewise linear control") end - # 1.3 path constraint wrt x_i, u_i - # -> included in previous term ! - # 1.4 whole block wrt v (including cross derivatives: v/v v/var var/v) - H[docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true - H[docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables, var_offset+1:var_offset+var_block] .= true - H[var_offset+1:var_offset+var_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true + # 1.4 path constraint wrt x_i, u_i + # -> included in 1.3 + + # 1.5 whole block wrt v (NB. term v / v added before the loop) + H[docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables, var_offset+1:var_offset+var_block] .= true # v / var block + H[var_offset+1:var_offset+var_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true # var block / v end # 2. final path constraints (xf, uf, v) - # -> included in last iteration from loop ! + # -> included in last iteration from loop # 3. boundary constraints (x0, xf, v) - # -> (xf, v) part included in last iteration from loop ! + # -> x0 / x0, x0 / v, xf / xf, xf / v terms included in first/last iterations from loop if docp.is_mayer || docp.dim_boundary_cons > 0 var_offset = docp.dim_NLP_steps*docp.discretization._step_variables_block - H[1:docp.dim_OCP_x, 1:docp.dim_OCP_x] .= true # x0 / x0 H[1:docp.dim_OCP_x, var_offset+1:var_offset+docp.dim_OCP_x] .= true # x0 / xf - H[1:docp.dim_OCP_x, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true # x0 / v H[var_offset+1:var_offset+docp.dim_OCP_x, 1:docp.dim_OCP_x] .= true # xf / x0 H[docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables, 1:docp.dim_OCP_x] .= true # v / x0 + H[docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables, var_offset+1:var_offset+docp.dim_OCP_x] .= true # v / xf end # 3.1 null initial condition for lagrangian cost state l0 if docp.is_lagrange diff --git a/src/disc/trapeze.jl b/src/disc/trapeze.jl index 5859f35d..f2d301e9 100644 --- a/src/disc/trapeze.jl +++ b/src/disc/trapeze.jl @@ -226,22 +226,16 @@ function DOCP_Jacobian_pattern(docp::DOCP{Trapeze}) c_block = docp.discretization._step_pathcons_block var_offset = docp.dim_NLP_steps*docp.discretization._step_variables_block var_block = docp.discretization._step_variables_block - # 2.1 wrt xf - J[c_offset+1:c_offset+c_block, var_offset+1:var_offset+docp.dim_OCP_x] .= true - # 2.1 wrt uf - J[c_offset+1:c_offset+c_block, var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u] .= true - # 2.1 wrt v - J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true + J[c_offset+1:c_offset+c_block, var_offset+1:var_offset+docp.dim_OCP_x] .= true # xf + J[c_offset+1:c_offset+c_block, var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u] .= true # uf + J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true # v # 3. boundary constraints (x0, xf, v) c_offset = docp.dim_NLP_steps * (docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block) + docp.discretization._step_pathcons_block c_block = docp.dim_boundary_cons + docp.dim_v_cons - # 3.1 wrt x0 - J[c_offset+1:c_offset+c_block, 1:docp.dim_OCP_x] .= true - # 3.2 wrt xf - J[c_offset+1:c_offset+c_block, var_offset+1:var_offset+docp.dim_OCP_x] .= true - # 3.3 wrt v - J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true + J[c_offset+1:c_offset+c_block, 1:docp.dim_OCP_x] .= true # x0 + J[c_offset+1:c_offset+c_block, var_offset+1:var_offset+docp.dim_OCP_x] .= true # xf + J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true # v # 3.4 null initial condition for lagrangian cost state l0 if docp.is_lagrange J[docp.dim_NLP_constraints, docp.dim_NLP_x] = true @@ -264,7 +258,7 @@ function DOCP_Hessian_pattern(docp::DOCP{Trapeze}) # 0. objective # 0.1 mayer cost (x0, xf, v) - # -> see 3. term for boundary conditions ! + # -> grouped with term 3. for boundary conditions # 0.2 lagrange case (lf) if docp.is_lagrange lf_index = docp.dim_NLP_steps * docp.discretization._step_variables_block + docp.dim_NLP_x @@ -272,33 +266,40 @@ function DOCP_Hessian_pattern(docp::DOCP{Trapeze}) end # 1. main loop over steps + # 1.0 v / v term + H[docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true + for i = 1:docp.dim_NLP_steps + + # variables block and offset: x_i (l_i) u_i x_i+1 (l_i+1) u_i+1 var_offset = (i-1)*docp.discretization._step_variables_block - var_block = docp.discretization._step_variables_block * 2 + var_block = docp.discretization._step_variables_block * 2 + # 1.1 state eq wrt x_i, u_i, x_i+1, u_i+1 (skip l_i, l_i+1) + # -> included in 1.2 # 1.2 lagrange part wrt x_i, l_i, u_i, x_i+1, l_i+1, u_i+1 - # -> combine as a single block for all step variables + # -> single block for all step variables H[var_offset+1:var_offset+var_block, var_offset+1:var_offset+var_block] .= true + # 1.3 path constraint wrt x_i, u_i - # -> included in previous term ! - # 1.4 whole block wrt v (including cross derivatives: v/v v/var var/v) - H[docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true - H[docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables, var_offset+1:var_offset+var_block] .= true - H[var_offset+1:var_offset+var_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true + # -> included in 1.2 + + # 1.4 whole block wrt v (NB. term v / v added before the loop) + H[docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables, var_offset+1:var_offset+var_block] .= true # v / var block + H[var_offset+1:var_offset+var_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true # var block / v end # 2. final path constraints (xf, uf, v) - # -> included in last iteration from loop ! + # -> included in last loop iteration # 3. boundary constraints (x0, xf, v) - # -> (xf, v) part included in last iteration from loop ! + # -> x0 / x0, x0 / v, xf / xf, xf / v terms included in first/last loop iterations if docp.is_mayer || docp.dim_boundary_cons > 0 var_offset = docp.dim_NLP_steps*docp.discretization._step_variables_block - H[1:docp.dim_OCP_x, 1:docp.dim_OCP_x] .= true # x0 / x0 H[1:docp.dim_OCP_x, var_offset+1:var_offset+docp.dim_OCP_x] .= true # x0 / xf - H[1:docp.dim_OCP_x, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true # x0 / v H[var_offset+1:var_offset+docp.dim_OCP_x, 1:docp.dim_OCP_x] .= true # xf / x0 H[docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables, 1:docp.dim_OCP_x] .= true # v / x0 + H[docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables, var_offset+1:var_offset+docp.dim_OCP_x] .= true # v / xf end # 3.1 null initial condition for lagrangian cost state l0 if docp.is_lagrange diff --git a/src/solve.jl b/src/solve.jl index 30dacb6e..5967bd6f 100644 --- a/src/solve.jl +++ b/src/solve.jl @@ -25,7 +25,7 @@ function direct_transcription( grid_size = __grid_size(), time_grid = __time_grid(), disc_method = __disc_method(), - constant_control = false, + constant_control = true, adnlp_backend = __adnlp_backend(), show_time = false ) @@ -59,7 +59,7 @@ function direct_transcription( hprod_backend = ADNLPModels.ReverseDiffADHvprod, jtprod_backend = ADNLPModels.ReverseDiffADJtprod, jacobian_backend = J_backend, - hessian_backend = H_backend, + hessian_backend = ADNLPModels.SparseReverseADHessian, #H_backend, show_time = show_time ) else @@ -103,7 +103,7 @@ function direct_solve( grid_size::Int = CTDirect.__grid_size(), time_grid = CTDirect.__time_grid(), disc_method = __disc_method(), - constant_control = false, + constant_control = true, adnlp_backend = __adnlp_backend(), kwargs..., ) diff --git a/test/docs/AD_backend.md b/test/docs/AD_backend.md index 29b8ad70..892cd071 100644 --- a/test/docs/AD_backend.md +++ b/test/docs/AD_backend.md @@ -77,8 +77,12 @@ ghjvprod backend ADNLPModels.ForwardDiffADGHjvprod: 4.339e-6 seconds. *** building the hessian is one third of the total solve time... ## Todo: -- add pattern structure for midpoint and IRK schemes +- add dummy ocp with 'full' derivatives to better check patterns vs full AD +- check Jacobian / Hessian separately for IRK (beam, fuller and vanderpol fail) +- improve Hessian for IRK (reduce excess nonzeros) - redo tests on algal_bacterial problem, including Jump +- add pattern structure for midpoint - try to disable some unused (?) parts such as hprod ? (according to show_time info the impact may be small) - reuse ADNLPModels functions to get block sparsity patterns then rebuild full patterns ? +eg for dynamics and path constraints diff --git a/test/docs/jump_ctdirect.md b/test/docs/jump_ctdirect.md index 53603e4f..a59a7b2f 100644 --- a/test/docs/jump_ctdirect.md +++ b/test/docs/jump_ctdirect.md @@ -14,8 +14,11 @@ Maybe a less sparse but faster and less memory intensive method is used ? - in terms of control structures, GL2 solutions are clean, Jump Trapeze solutions shows a bit of noise, while CTDirect Trapeze solutions are very noisy. ## Todo +- check on ipopt last iteration that tol is also 1e-8 for Jump +- test CTDirect with manual sparsity patterns +- find more details on the Hessian in Jump +- investigate how jump finds a cleaner solution for trapeze discretization (print settings ?) - can we have linear memory wrt steps for Jump / Trapeze ? -- disable Hessian (in AD model then use ipopt limited memory option ?) and compare memory allocations and convergence. Find more details on the Hessian in Jump. ## Results: Jump vs CTDirect See `test/jump_comparison.jl` @@ -51,36 +54,36 @@ CTDirect gauss_legendre_2 5000: 363.224 s (211848763 allocations: 313.02 GiB) ## Details: Trapeze (1000 and 5000 steps) -| | Jump | CT | New | Jump | CT | New | +| | Jump | CT | Manual | Jump | CT | Manual | |-----------------|--------|--------|--------|----------|----------|----------| -|nnz jacobian | 42006 | 42006 | | 210006 | 210006 | | -|nnz hessian | 74000 | 12012 | | 370000 | 60012 | | -|variables | 8008 | 8008 | | 40008 | 40008 | | -|lowerbound | 6006 | 6006 | | 30006 | 30006 | | -|lower/upper | 2002 | 2002 | | 10002 | 10002 | | -|equality | 6006 | 6006 | | 30006 | 30006 | | -|iterations | 334 | 365 | | 517 | 420 | | -|objective | 5.4522 | 5.4522 | | 5.4522 | 5.4522 | | -|structure | ok | noisy | | ok | noisy | | -|allocations | 352MB | 4.5GB | | 2.1GB | 49GB | | -|time | 17 | 20 | | 126 | 136 | | +|nnz jacobian | 42006 | 42006 | 96076 | 210006 | 210006 | 480072 | +|nnz hessian | 74000 | 12012 | 100072 | 370000 | 60012 | 500072 | +|variables | 8008 | 8008 | 8008 | 40008 | 40008 | 40008 | +|lowerbound | 6006 | 6006 | 6006 | 30006 | 30006 | 30006 | +|lower/upper | 2002 | 2002 | 2002 | 10002 | 10002 | 10002 | +|equality | 6006 | 6006 | 6006 | 30006 | 30006 | 30006 | +|iterations | 334 | 365 | 333 | 517 | 420 | 419 | +|objective | 5.4522 | 5.4522 | 5.4522 | 5.4522 | 5.4522 | 5.4522 | +|structure | ok | noisy | noisy | ok | noisy | noisy | +|allocations | 352MB | 4.5GB | 5.50GB | 2.1GB | 49GB | 37GB | +|time | 17 | 20 | 50 | 126 | 136 | 354 | ## Details: Gauss Legendre 2 (1000 and 5000 steps) -| | Jump | CT | New | Jump | CT | New | +| | Jump | CT | Manual | Jump | CT | Manual | |-----------------|--------|--------|--------|----------|----------|----------| -|nnz jacobian | 118006 | 124000 | | 590006 | 620000 | | -|nnz hessian | 322000 | 63000 | | 1610000 | 315000 | | -|variables | 20006 | 20008 | | 100006 | 100008 | | -|lowerbound | 6006 | 6006 | | 3006 | 30006 | | -|lower/upper | 2000 | 2002 | | 10000 | 10002 | | -|equality | 18006 | 18006 | | 90006 | 90006 | | -|iterations | 117 | 96 | | 146 | 119 | | -|objective | 5.4522 | 5.4522 | | 5.4522 | 5.4522 | | -|structure | clean | clean | | clean | clean | | -|allocations | 726MB | 14.8GB | | 3.6GB | 312GB | | -|time | 15 | 33 | | 77 | 356* | | +|nnz jacobian | 118006 | 124000 | 384072 | 590006 | 620000 | | +|nnz hessian | 322000 | 63000 | 330057 | 1610000 | 315000 | | +|variables | 20006 | 20008 | 20008 | 100006 | 100008 | | +|lowerbound | 6006 | 6006 | 6006 | 3006 | 30006 | | +|lower/upper | 2000 | 2002 | 2002 | 10000 | 10002 | | +|equality | 18006 | 18006 | 18006 | 90006 | 90006 | | +|iterations | 117 | 96 | 91 | 146 | 119 | | +|objective | 5.4522 | 5.4522 | 5.4522 | 5.4522 | 5.4522 | | +|structure | clean | clean | clean | clean | clean | | +|allocations | 726MB | 14.8GB | 5.15GB | 3.6GB | 312GB | | +|time | 15 | 33 | 49 | 77 | 356* | | * half the time is before optimization, swap effect due to huge allocations ? diff --git a/test/problems/pattern.jl b/test/problems/pattern.jl new file mode 100644 index 00000000..bf8ed990 --- /dev/null +++ b/test/problems/pattern.jl @@ -0,0 +1,15 @@ +# Duumy problem to visualize sparsity patterns + +function pattern() + @def ocp begin + t ∈ [0, 1], time + x ∈ R, state + u ∈ R, control + v ∈ R, variable + x(0) + x(1) + v == 0 + ẋ(t) == x(t)^2 + u(t)^2 + v^2 + ∫(u(t)^2 + x(t)^2 + v^2) → min + end + + return ((ocp = ocp, obj = nothing, name = "pattern", init = nothing)) +end \ No newline at end of file From 186f521210ffe1615c686c7884212675a5052bac Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Tue, 4 Feb 2025 19:14:18 +0100 Subject: [PATCH 24/44] fixed bug in jacobian pattern for irk --- src/disc/irk.jl | 34 ++++++++++++++++------------------ src/solve.jl | 2 +- test/docs/jump_ctdirect.md | 1 + 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/src/disc/irk.jl b/src/disc/irk.jl index f87fcac2..829e5f97 100644 --- a/src/disc/irk.jl +++ b/src/disc/irk.jl @@ -251,8 +251,8 @@ $(TYPEDSIGNATURES) Set work array for all dynamics and lagrange cost evaluations """ function setWorkArray(docp::DOCP{ <: GenericIRK}, xu, time_grid, v) - # work array layout: [x_ij ; sum_bk ; u_ij] ? - work = similar(xu, docp.dim_OCP_x + docp.dim_NLP_x + docp.dim_NLP_u) + # work array layout: [x_ij ; sum_bk] + work = similar(xu, docp.dim_OCP_x + docp.dim_NLP_x) return work end @@ -264,12 +264,9 @@ Convention: 1 <= i <= dim_NLP_steps (+1) """ function setStepConstraints!(docp::DOCP{ <: GenericIRK}, c, xu, v, time_grid, i, work) - # work array layout: [x_ij ; sum_bk ; u_ij] ? + # work array layout: [x_ij ; sum_bk] work_xij = @view work[1:docp.dim_OCP_x] work_sumbk = @view work[docp.dim_OCP_x+1:docp.dim_OCP_x+docp.dim_NLP_x] - #work_sumbk .= zero(eltype(xu)) AD bug when affecting constant values... - @views @. work_sumbk[1:docp.dim_NLP_x] = xu[1:docp.dim_NLP_x] * 0. - #work_uij ? # offset for previous steps offset = (i-1)*(docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block) @@ -297,7 +294,11 @@ function setStepConstraints!(docp::DOCP{ <: GenericIRK}, c, xu, v, time_grid, i, kij = get_stagevars_at_time_step(xu, docp, i, j) # update sum b_j k_i^j (w/ lagrange term) for state equation after loop - @views @. work_sumbk[1:docp.dim_NLP_x] = work_sumbk[1:docp.dim_NLP_x] + docp.discretization.butcher_b[j] * kij[1:docp.dim_NLP_x] + if j == 1 + @views @. work_sumbk[1:docp.dim_NLP_x] = docp.discretization.butcher_b[j] * kij[1:docp.dim_NLP_x] + else + @views @. work_sumbk[1:docp.dim_NLP_x] = work_sumbk[1:docp.dim_NLP_x] + docp.discretization.butcher_b[j] * kij[1:docp.dim_NLP_x] + end # state at stage: x_i^j = x_i + h_i sum a_jl k_i^l # +++ still some allocations here @@ -312,8 +313,7 @@ function setStepConstraints!(docp::DOCP{ <: GenericIRK}, c, xu, v, time_grid, i, xij = work_xij end - # control at stage: interpolation between u_i and u_i+1 - # +++ use work aray to reduce allocs ? + # control at stage uij = get_OCP_control_at_time_stage(xu, docp, i, cj) # stage equations k_i^j = f(t_i^j, x_i^j, u_i, v) as c[] = k - f @@ -376,10 +376,11 @@ function DOCP_Jacobian_pattern(docp::DOCP{ <: GenericIRK}) J[c_offset+docp.dim_NLP_x, var_offset+docp.dim_NLP_x] = true # l_i J[c_offset+docp.dim_NLP_x, var_offset+docp.discretization._step_variables_block+docp.dim_NLP_x] = true # l_i+1 for i=1:s - J[c_offset+docp.dim_NLP_x, var_offset+(s+1)*docp.dim_NLP_x] = true # k_ij[n+1] + J[c_offset+docp.dim_NLP_x, var_offset+docp.dim_NLP_x+docp.dim_NLP_u+i*docp.dim_NLP_x] = true # k_ij[n+1] end end + # 1.4 stage equations k_ij = f(t_ij, x_ij, u_ij, v) # with # x_ij depending on x_i and all k_ij @@ -404,12 +405,9 @@ function DOCP_Jacobian_pattern(docp::DOCP{ <: GenericIRK}) c_block = docp.discretization._step_pathcons_block var_offset = docp.dim_NLP_steps*docp.discretization._step_variables_block var_block = docp.discretization._step_variables_block - # 2.1 wrt xf - J[c_offset+1:c_offset+c_block, var_offset+1:var_offset+docp.dim_OCP_x] .= true - # 2.2 wrt uf - J[c_offset+1:c_offset+c_block, var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u] .= true - # 2.3 wrt v - J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true + J[c_offset+1:c_offset+c_block, var_offset+1:var_offset+docp.dim_OCP_x] .= true # xf + J[c_offset+1:c_offset+c_block, var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u] .= true # uf + J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true # v # 3. boundary constraints (x0, xf, v) c_offset = docp.dim_NLP_steps * (docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block) + docp.discretization._step_pathcons_block @@ -472,8 +470,8 @@ function DOCP_Hessian_pattern(docp::DOCP{ <: GenericIRK}) H[var_offset+docp.dim_NLP_x , var_offset+docp.discretization._step_variables_block+docp.dim_NLP_x] = true # l_i / l_i+1 H[ var_offset+docp.discretization._step_variables_block+docp.dim_NLP_x, var_offset+docp.dim_NLP_x] = true # l_i+1 / l_i for i=1:s - H[var_offset+docp.dim_NLP_u+i*docp.dim_NLP_x , var_offset+docp.discretization._step_variables_block+docp.dim_NLP_x] = true # k_i[n+1] / l_i+1 - H[var_offset+docp.discretization._step_variables_block+docp.dim_NLP_x, var_offset+docp.dim_NLP_u+i*docp.dim_NLP_x] = true # l_i+1 / k_i[n+1] + H[var_offset+docp.dim_NLP_x+docp.dim_NLP_u+i*docp.dim_NLP_x , var_offset+docp.discretization._step_variables_block+docp.dim_NLP_x] = true # k_i[n+1] / l_i+1 + H[var_offset+docp.discretization._step_variables_block+docp.dim_NLP_x, var_offset+docp.dim_NLP_x+docp.dim_NLP_u+i*docp.dim_NLP_x] = true # l_i+1 / k_i[n+1] end # 1.3 stage equations k_ij = f(t_ij, x_ij, u_ij, v) diff --git a/src/solve.jl b/src/solve.jl index 5967bd6f..7911b286 100644 --- a/src/solve.jl +++ b/src/solve.jl @@ -59,7 +59,7 @@ function direct_transcription( hprod_backend = ADNLPModels.ReverseDiffADHvprod, jtprod_backend = ADNLPModels.ReverseDiffADJtprod, jacobian_backend = J_backend, - hessian_backend = ADNLPModels.SparseReverseADHessian, #H_backend, + hessian_backend = H_backend, show_time = show_time ) else diff --git a/test/docs/jump_ctdirect.md b/test/docs/jump_ctdirect.md index a59a7b2f..ea1c14bb 100644 --- a/test/docs/jump_ctdirect.md +++ b/test/docs/jump_ctdirect.md @@ -70,6 +70,7 @@ CTDirect gauss_legendre_2 5000: 363.224 s (211848763 allocations: 313.02 GiB) ## Details: Gauss Legendre 2 (1000 and 5000 steps) +redo all 4 ct tests | | Jump | CT | Manual | Jump | CT | Manual | |-----------------|--------|--------|--------|----------|----------|----------| From 7aa113bda55cb7e70a948268df4581d44e2954cb Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Tue, 4 Feb 2025 19:23:16 +0100 Subject: [PATCH 25/44] bench ok for irk with manual sparsity --- test/docs/AD_backend.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/docs/AD_backend.md b/test/docs/AD_backend.md index 892cd071..1d0cde83 100644 --- a/test/docs/AD_backend.md +++ b/test/docs/AD_backend.md @@ -77,8 +77,6 @@ ghjvprod backend ADNLPModels.ForwardDiffADGHjvprod: 4.339e-6 seconds. *** building the hessian is one third of the total solve time... ## Todo: -- add dummy ocp with 'full' derivatives to better check patterns vs full AD -- check Jacobian / Hessian separately for IRK (beam, fuller and vanderpol fail) - improve Hessian for IRK (reduce excess nonzeros) - redo tests on algal_bacterial problem, including Jump - add pattern structure for midpoint From 2f89200e39658d5d382eb8cb48f144fb46ed9761 Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Tue, 4 Feb 2025 20:55:39 +0100 Subject: [PATCH 26/44] todo: try to build sparse matrix directly (bool matrix too big for algal GL2 5000 for instance) --- src/disc/irk.jl | 1 + test/docs/jump_ctdirect.md | 29 +++++++++-------------------- 2 files changed, 10 insertions(+), 20 deletions(-) diff --git a/src/disc/irk.jl b/src/disc/irk.jl index 829e5f97..e9cb800a 100644 --- a/src/disc/irk.jl +++ b/src/disc/irk.jl @@ -352,6 +352,7 @@ Build sparsity pattern for Jacobian of constraints function DOCP_Jacobian_pattern(docp::DOCP{ <: GenericIRK}) J = zeros(Bool, docp.dim_NLP_constraints, docp.dim_NLP_variables) + +++ try a sparse version, add aux function add_nnz_block!(...) that updates the 3 vectors s = docp.discretization.stage diff --git a/test/docs/jump_ctdirect.md b/test/docs/jump_ctdirect.md index ea1c14bb..742f462a 100644 --- a/test/docs/jump_ctdirect.md +++ b/test/docs/jump_ctdirect.md @@ -1,7 +1,7 @@ # Jump / CTDirect comparison - algal bacterial problem Note that the problem is redefined for each method: jump, ctdirect and ctdirect new model. -Also, the Gauss Legendre 2 implementations for Jump and CTDirect here use a piecewise constant control (default for CTDirect would have been piecewise linear). +Also, the Gauss Legendre 2 implementations for Jump and CTDirect here use a piecewise constant control. ## Takeaways - CTDirect still allocates at least x10 more memory, worsening for higher problem sizes @@ -10,8 +10,7 @@ We note that Jump memory appears linear wrt steps for GL2, but a bit superlinear - Hessian seems to be handled differently by Jump, see the higher nonzero values. Maybe a less sparse but faster and less memory intensive method is used ? - convergence: objective and trajectory are similar, iterations differ, maybe due to the different hessian handling. Total computation times are similar for Trapeze and x2 to x5 slower for CTDirect for GL2, probably due to the memory effect. -- for GL2, Jump and CTDirect have slightly different nonzero counts for the Jacobian -- in terms of control structures, GL2 solutions are clean, Jump Trapeze solutions shows a bit of noise, while CTDirect Trapeze solutions are very noisy. +- in terms of control structures, GL2 solutions are clean, Jump Trapeze solutions shows a bit of noise, while CTDirect Trapeze solutions are very noisy. How Jump manages to find a cleaner solution with Trapeze is unclear. ## Todo - check on ipopt last iteration that tol is also 1e-8 for Jump @@ -25,6 +24,7 @@ See `test/jump_comparison.jl` Ipopt details: `Ipopt version 3.14.17, running with linear solver MUMPS 5.7.3` Settings: tol=1e-8, mu_strategy=adaptive ++++redo ``` Jump trapeze 1000: 17.029 s (7920527 allocations: 351.87 MiB) Jump trapeze 2000: 56.928 s (23055273 allocations: 891.64 MiB) @@ -38,19 +38,11 @@ Jump gauss_legendre_2 5000: 76.593 s (56269715 allocations: 3.57 GiB) CTDirect trapeze 1000: 20.110 s (46501059 allocations: 4.54 GiB) CTDirect trapeze 2000: 41.097 s (89302125 allocations: 12.26 GiB) CTDirect trapeze 5000: 133.268 s (267989400 allocations: 49.33 GiB) -``` -GL2 piecewise constant control -``` CTDirect gauss_legendre_2 1000: 33.181 s (37843213 allocations: 14.79 GiB) CTDirect gauss_legendre_2 2000: 82.605 s (82766476 allocations: 43.19 GiB) CTDirect gauss_legendre_2 5000: 356.338 s (221161426 allocations: 312.28 GiB) ``` -GL2 piecewise linear control -``` -CTDirect gauss_legendre_2 1000: 37.220 s (39259673 allocations: 15.12 GiB) -CTDirect gauss_legendre_2 2000: 112.687 s (104950745 allocations: 45.37 GiB) -CTDirect gauss_legendre_2 5000: 363.224 s (211848763 allocations: 313.02 GiB) -``` + ## Details: Trapeze (1000 and 5000 steps) @@ -74,19 +66,16 @@ redo all 4 ct tests | | Jump | CT | Manual | Jump | CT | Manual | |-----------------|--------|--------|--------|----------|----------|----------| -|nnz jacobian | 118006 | 124000 | 384072 | 590006 | 620000 | | -|nnz hessian | 322000 | 63000 | 330057 | 1610000 | 315000 | | +|nnz jacobian | 118006 | 118006 | 384072 | 590006 | 590006 | | +|nnz hessian | 322000 | 63000 | 319036 | 1610000 | 315000 | | |variables | 20006 | 20008 | 20008 | 100006 | 100008 | | |lowerbound | 6006 | 6006 | 6006 | 3006 | 30006 | | |lower/upper | 2000 | 2002 | 2002 | 10000 | 10002 | | |equality | 18006 | 18006 | 18006 | 90006 | 90006 | | -|iterations | 117 | 96 | 91 | 146 | 119 | | +|iterations | 117 | 95 | 91 | 146 | 78 | | |objective | 5.4522 | 5.4522 | 5.4522 | 5.4522 | 5.4522 | | |structure | clean | clean | clean | clean | clean | | -|allocations | 726MB | 14.8GB | 5.15GB | 3.6GB | 312GB | | -|time | 15 | 33 | 49 | 77 | 356* | | +|allocations | 726MB | 14.6GB | 5.0GB | 3.6GB | 305GB | | +|time | 15 | 28 | 45 | 77 | 291* | | * half the time is before optimization, swap effect due to huge allocations ? - - - From 6d0ee8f1b4cf4d909722af809cf3bf431e7e831b Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Wed, 5 Feb 2025 12:21:30 +0100 Subject: [PATCH 27/44] prepare pattern in sparse format --- src/disc/irk.jl | 1 - src/disc/trapeze.jl | 97 +++++++++++++++++++++++++++++------------ test/docs/AD_backend.md | 14 +++--- 3 files changed, 75 insertions(+), 37 deletions(-) diff --git a/src/disc/irk.jl b/src/disc/irk.jl index e9cb800a..829e5f97 100644 --- a/src/disc/irk.jl +++ b/src/disc/irk.jl @@ -352,7 +352,6 @@ Build sparsity pattern for Jacobian of constraints function DOCP_Jacobian_pattern(docp::DOCP{ <: GenericIRK}) J = zeros(Bool, docp.dim_NLP_constraints, docp.dim_NLP_variables) - +++ try a sparse version, add aux function add_nnz_block!(...) that updates the 3 vectors s = docp.discretization.stage diff --git a/src/disc/trapeze.jl b/src/disc/trapeze.jl index f2d301e9..8d6b4369 100644 --- a/src/disc/trapeze.jl +++ b/src/disc/trapeze.jl @@ -183,12 +183,15 @@ Build sparsity pattern for Jacobian of constraints function DOCP_Jacobian_pattern(docp::DOCP{Trapeze}) J = zeros(Bool, docp.dim_NLP_constraints, docp.dim_NLP_variables) - #+++ build Is, Js, Vs sets then call sparse constructor ? #nnzj = ... #Is = Vector{Int}(undef, nnzj) #Js = Vector{Int}(undef, nnzj) #Vs = ones(Bool, nnzj) - # use offset to fill Is, Js, Vs + # use offset to fill Is, Js (Vs is done) then sparse constructor ? + + # index alias for v + v_start = docp.dim_NLP_variables - docp.dim_NLP_v + 1 + v_end = docp.dim_NLP_variables # 1. main loop over steps for i = 1:docp.dim_NLP_steps @@ -200,48 +203,60 @@ function DOCP_Jacobian_pattern(docp::DOCP{Trapeze}) # variables block and offset: x_i (l_i) u_i x_i+1 (l_i+1) u_i+1 var_block = docp.discretization._step_variables_block * 2 var_offset = (i-1)*docp.discretization._step_variables_block + xi_start = var_offset + 1 + xi_end = var_offset + docp.dim_OCP_x + ui_start = var_offset + docp.dim_NLP_x + 1 + ui_end = var_offset + docp.dim_NLP_x + docp.dim_NLP_u + xip1_end = var_offset + docp.dim_NLP_x + docp.dim_NLP_u + docp.dim_OCP_x + uip1_start = var_offset + docp.dim_NLP_x*2 + docp.dim_NLP_u + 1 + uip1_end = var_offset + docp.dim_NLP_x*2 + docp.dim_NLP_u*2 # state eq wrt x_i, u_i, x_i+1, u_i+1 (skip l_i, l_i+1) # 1.1 state eq wrt x_i - J[c_offset+1:c_offset+docp.dim_OCP_x, var_offset+1:var_offset+docp.dim_OCP_x] .= true + add_nonzero_block!(J, c_offset+1, c_offset+docp.dim_OCP_x, xi_start, xi_end) # 1.2 state eq wrt u_i, x_i+1 (skip l_i) - J[c_offset+1:c_offset+docp.dim_OCP_x, var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u+docp.dim_OCP_x] .= true + add_nonzero_block!(J, c_offset+1, c_offset+docp.dim_OCP_x, ui_start, xip1_end) # 1.3 state eq wrt u_i+1 (skip l_i+1) - J[c_offset+1:c_offset+docp.dim_OCP_x, var_offset+docp.dim_NLP_x*2+docp.dim_NLP_u+1:var_offset+docp.dim_NLP_x*2+docp.dim_NLP_u*2] .= true + add_nonzero_block!(J, c_offset+1, c_offset+docp.dim_OCP_x, uip1_start, uip1_end) # 1.4 lagrange part wrt x_i, l_i, u_i, x_i+1, l_i+1, u_i+1 if docp.is_lagrange - J[c_offset+docp.dim_NLP_x, var_offset+1:var_offset+var_block] .= true + add_nonzero_block!(J, c_offset+docp.dim_NLP_x, c_offset+docp.dim_NLP_x, var_offset+1, var_offset+var_block) end - # 1.5 path constraint wrt x_i, u_i - J[c_offset+docp.dim_NLP_x+1:c_offset+c_block, var_offset+1:var_offset+docp.dim_OCP_x] .= true - J[c_offset+docp.dim_NLP_x+1:c_offset+c_block, var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u] .= true + # 1.5 path constraint wrt x_i, u_i (skip l_i) + add_nonzero_block!(J, c_offset+docp.dim_NLP_x+1, c_offset+c_block, xi_start, xi_end) + add_nonzero_block!(J, c_offset+docp.dim_NLP_x+1, c_offset+c_block, ui_start, ui_end) # 1.6 whole block wrt v - J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true + add_nonzero_block!(J, c_offset+1, c_offset+c_block, v_start, v_end) end # 2. final path constraints (xf, uf, v) c_offset = docp.dim_NLP_steps*(docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block) c_block = docp.discretization._step_pathcons_block var_offset = docp.dim_NLP_steps*docp.discretization._step_variables_block - var_block = docp.discretization._step_variables_block - J[c_offset+1:c_offset+c_block, var_offset+1:var_offset+docp.dim_OCP_x] .= true # xf - J[c_offset+1:c_offset+c_block, var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u] .= true # uf - J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true # v + xf_start = var_offset + 1 + xf_end = var_offset + docp.dim_OCP_x + uf_start = var_offset + docp.dim_NLP_x + 1 + uf_end = var_offset + docp.dim_NLP_x + docp.dim_NLP_u + add_nonzero_block!(J, c_offset+1, c_offset+c_block, xf_start, xf_end) + add_nonzero_block!(J, c_offset+1,c_offset+c_block, uf_start, uf_end) + add_nonzero_block!(J, c_offset+1, c_offset+c_block, v_start, v_end) # 3. boundary constraints (x0, xf, v) c_offset = docp.dim_NLP_steps * (docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block) + docp.discretization._step_pathcons_block c_block = docp.dim_boundary_cons + docp.dim_v_cons - J[c_offset+1:c_offset+c_block, 1:docp.dim_OCP_x] .= true # x0 - J[c_offset+1:c_offset+c_block, var_offset+1:var_offset+docp.dim_OCP_x] .= true # xf - J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true # v + x0_start = 1 + x0_end = docp.dim_OCP_x + add_nonzero_block!(J, c_offset+1, c_offset+c_block, x0_start, x0_end) + add_nonzero_block!(J, c_offset+1, c_offset+c_block, xf_start, xf_end) + add_nonzero_block!(J, c_offset+1, c_offset+c_block, v_start, v_end) # 3.4 null initial condition for lagrangian cost state l0 if docp.is_lagrange - J[docp.dim_NLP_constraints, docp.dim_NLP_x] = true + add_nonzero_block!(J, docp.dim_NLP_constraints, docp.dim_NLP_x) end - # replace J with sparse matrix + # build and return sparse matrix return sparse(J) end @@ -256,18 +271,22 @@ function DOCP_Hessian_pattern(docp::DOCP{Trapeze}) # NB. need to provide full pattern for coloring, not just upper/lower part H = zeros(Bool, docp.dim_NLP_variables, docp.dim_NLP_variables) + # index alias for v + v_start = docp.dim_NLP_variables - docp.dim_NLP_v + 1 + v_end = docp.dim_NLP_variables + # 0. objective # 0.1 mayer cost (x0, xf, v) # -> grouped with term 3. for boundary conditions # 0.2 lagrange case (lf) if docp.is_lagrange lf_index = docp.dim_NLP_steps * docp.discretization._step_variables_block + docp.dim_NLP_x - H[lf_index, lf_index] = true + add_nonzero_block!(H, lf_index, lf_index) end # 1. main loop over steps # 1.0 v / v term - H[docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true + add_nonzero_block!(H, v_start, v_end, v_start, v_end) for i = 1:docp.dim_NLP_steps @@ -279,14 +298,14 @@ function DOCP_Hessian_pattern(docp::DOCP{Trapeze}) # -> included in 1.2 # 1.2 lagrange part wrt x_i, l_i, u_i, x_i+1, l_i+1, u_i+1 # -> single block for all step variables - H[var_offset+1:var_offset+var_block, var_offset+1:var_offset+var_block] .= true + add_nonzero_block!(H, var_offset+1, var_offset+var_block, var_offset+1, var_offset+var_block) # 1.3 path constraint wrt x_i, u_i # -> included in 1.2 # 1.4 whole block wrt v (NB. term v / v added before the loop) - H[docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables, var_offset+1:var_offset+var_block] .= true # v / var block - H[var_offset+1:var_offset+var_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true # var block / v + add_nonzero_block!(H, v_start, v_end, var_offset+1, var_offset+var_block) + add_nonzero_block!(H, var_offset+1, var_offset+var_block, v_start, v_end) end # 2. final path constraints (xf, uf, v) @@ -296,17 +315,37 @@ function DOCP_Hessian_pattern(docp::DOCP{Trapeze}) # -> x0 / x0, x0 / v, xf / xf, xf / v terms included in first/last loop iterations if docp.is_mayer || docp.dim_boundary_cons > 0 var_offset = docp.dim_NLP_steps*docp.discretization._step_variables_block - H[1:docp.dim_OCP_x, var_offset+1:var_offset+docp.dim_OCP_x] .= true # x0 / xf - H[var_offset+1:var_offset+docp.dim_OCP_x, 1:docp.dim_OCP_x] .= true # xf / x0 - H[docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables, 1:docp.dim_OCP_x] .= true # v / x0 - H[docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables, var_offset+1:var_offset+docp.dim_OCP_x] .= true # v / xf + x0_start = 1 + x0_end = docp.dim_OCP_x + xf_start = var_offset + 1 + xf_end = var_offset + docp.dim_OCP_x + add_nonzero_block!(H, x0_start, x0_end, xf_start, xf_end) + add_nonzero_block!(H, xf_start, xf_end, x0_start, x0_end) + add_nonzero_block!(H, v_start, v_end, x0_start, x0_end) + add_nonzero_block!(H, v_start, v_end, xf_start, xf_end) end # 3.1 null initial condition for lagrangian cost state l0 if docp.is_lagrange - H[docp.dim_NLP_x, docp.dim_NLP_x] = true + add_nonzero_block!(H, docp.dim_NLP_x, docp.dim_NLP_x) end # replace H with sparse matrix return sparse(H) +end + + +""" +$(TYPEDSIGNATURES) + +Add block of nonzeros elements (Boolean matrix version) +More compact specific method for single element case +""" +function add_nonzero_block!(M, i_start, i_end, j_start, j_end) + M[i_start:i_end, j_start:j_end] .= true + return +end +function add_nonzero_block!(M, i, j) + M[i,j] = true + return end \ No newline at end of file diff --git a/test/docs/AD_backend.md b/test/docs/AD_backend.md index 1d0cde83..0902730c 100644 --- a/test/docs/AD_backend.md +++ b/test/docs/AD_backend.md @@ -34,18 +34,18 @@ Takeaways: - manual sparse pattern seems to give even better performance for larger problems. This is likely due to the increasing cost of computing the Hessian sparsity in terms of allocations and time. This observation is consistent with the comparison with Jump that seems to use a different, less sparse but faster method for the Hessian. Standard benchmark: -| Trapeze | default | optimized | manual | +| Trapeze | default | optimized | manual | sparse |---------|---------|-----------|---------| -| 250 | 49.7 | 0.9 | 1.5 | -| 500 | | 2.4 | 3.5 | -| 1000 | | 5.6 | 6.4 | -| 2500 | | 23.9 | 23.9 | -| 5000 | | 89.6 | 56.3 | +| 250 | 49.7 | 0.9 | 1.5 | +| 500 | | 2.4 | 3.5 | +| 1000 | | 5.6 | 6.4 | +| 2500 | | 23.9 | 23.9 | +| 5000 | | 89.6 | 56.3 | | 7500 | | 225.4 | 85.9 | | 10000 | | 526.3 | 102.4 | Sparsity details: goddard_all Trapeze (1000 and 10000 steps) -| transcription | optimized | manual | optimized | manual | +| transcription | optimized | manual | optimized | manual | sparse |---------------|-----------|---------|-----------|--------| | NLP vars | 4005 | 4005 | 40005 | 40005 | | NLP cons | 6007 | 6007 | 60007 | 60007 | From 2f396c3539a37f9170c090ab216b2a158a717fde Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Wed, 5 Feb 2025 13:35:11 +0100 Subject: [PATCH 28/44] vector format for sparse matrix building (trapeze) --- src/disc/trapeze.jl | 95 +++++++++++++++++++++++++++-------------- test/docs/AD_backend.md | 59 ++++++++++++++----------- 2 files changed, 95 insertions(+), 59 deletions(-) diff --git a/src/disc/trapeze.jl b/src/disc/trapeze.jl index 8d6b4369..d59cb8a8 100644 --- a/src/disc/trapeze.jl +++ b/src/disc/trapeze.jl @@ -182,12 +182,17 @@ Build sparsity pattern for Jacobian of constraints """ function DOCP_Jacobian_pattern(docp::DOCP{Trapeze}) - J = zeros(Bool, docp.dim_NLP_constraints, docp.dim_NLP_variables) + #J = zeros(Bool, docp.dim_NLP_constraints, docp.dim_NLP_variables) + + # vector format for sparse matrix + # +++ better to compute nnzj beforehand and allocate the 3 vectors (no push then) ? + # pass to addnnz the current offset too #nnzj = ... #Is = Vector{Int}(undef, nnzj) #Js = Vector{Int}(undef, nnzj) #Vs = ones(Bool, nnzj) - # use offset to fill Is, Js (Vs is done) then sparse constructor ? + Is = Vector{Int}(undef, 0) + Js = Vector{Int}(undef, 0) # index alias for v v_start = docp.dim_NLP_variables - docp.dim_NLP_v + 1 @@ -213,22 +218,22 @@ function DOCP_Jacobian_pattern(docp::DOCP{Trapeze}) # state eq wrt x_i, u_i, x_i+1, u_i+1 (skip l_i, l_i+1) # 1.1 state eq wrt x_i - add_nonzero_block!(J, c_offset+1, c_offset+docp.dim_OCP_x, xi_start, xi_end) + add_nonzero_block!(Is, Js, c_offset+1, c_offset+docp.dim_OCP_x, xi_start, xi_end) # 1.2 state eq wrt u_i, x_i+1 (skip l_i) - add_nonzero_block!(J, c_offset+1, c_offset+docp.dim_OCP_x, ui_start, xip1_end) + add_nonzero_block!(Is, Js, c_offset+1, c_offset+docp.dim_OCP_x, ui_start, xip1_end) # 1.3 state eq wrt u_i+1 (skip l_i+1) - add_nonzero_block!(J, c_offset+1, c_offset+docp.dim_OCP_x, uip1_start, uip1_end) + add_nonzero_block!(Is, Js, c_offset+1, c_offset+docp.dim_OCP_x, uip1_start, uip1_end) # 1.4 lagrange part wrt x_i, l_i, u_i, x_i+1, l_i+1, u_i+1 if docp.is_lagrange - add_nonzero_block!(J, c_offset+docp.dim_NLP_x, c_offset+docp.dim_NLP_x, var_offset+1, var_offset+var_block) + add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x, c_offset+docp.dim_NLP_x, var_offset+1, var_offset+var_block) end # 1.5 path constraint wrt x_i, u_i (skip l_i) - add_nonzero_block!(J, c_offset+docp.dim_NLP_x+1, c_offset+c_block, xi_start, xi_end) - add_nonzero_block!(J, c_offset+docp.dim_NLP_x+1, c_offset+c_block, ui_start, ui_end) + add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x+1, c_offset+c_block, xi_start, xi_end) + add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x+1, c_offset+c_block, ui_start, ui_end) # 1.6 whole block wrt v - add_nonzero_block!(J, c_offset+1, c_offset+c_block, v_start, v_end) + add_nonzero_block!(Is, Js, c_offset+1, c_offset+c_block, v_start, v_end) end # 2. final path constraints (xf, uf, v) @@ -239,25 +244,28 @@ function DOCP_Jacobian_pattern(docp::DOCP{Trapeze}) xf_end = var_offset + docp.dim_OCP_x uf_start = var_offset + docp.dim_NLP_x + 1 uf_end = var_offset + docp.dim_NLP_x + docp.dim_NLP_u - add_nonzero_block!(J, c_offset+1, c_offset+c_block, xf_start, xf_end) - add_nonzero_block!(J, c_offset+1,c_offset+c_block, uf_start, uf_end) - add_nonzero_block!(J, c_offset+1, c_offset+c_block, v_start, v_end) + add_nonzero_block!(Is, Js, c_offset+1, c_offset+c_block, xf_start, xf_end) + add_nonzero_block!(Is, Js, c_offset+1,c_offset+c_block, uf_start, uf_end) + add_nonzero_block!(Is, Js, c_offset+1, c_offset+c_block, v_start, v_end) # 3. boundary constraints (x0, xf, v) c_offset = docp.dim_NLP_steps * (docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block) + docp.discretization._step_pathcons_block c_block = docp.dim_boundary_cons + docp.dim_v_cons x0_start = 1 x0_end = docp.dim_OCP_x - add_nonzero_block!(J, c_offset+1, c_offset+c_block, x0_start, x0_end) - add_nonzero_block!(J, c_offset+1, c_offset+c_block, xf_start, xf_end) - add_nonzero_block!(J, c_offset+1, c_offset+c_block, v_start, v_end) + add_nonzero_block!(Is, Js, c_offset+1, c_offset+c_block, x0_start, x0_end) + add_nonzero_block!(Is, Js, c_offset+1, c_offset+c_block, xf_start, xf_end) + add_nonzero_block!(Is, Js, c_offset+1, c_offset+c_block, v_start, v_end) # 3.4 null initial condition for lagrangian cost state l0 if docp.is_lagrange - add_nonzero_block!(J, docp.dim_NLP_constraints, docp.dim_NLP_x) + add_nonzero_block!(Is, Js, docp.dim_NLP_constraints, docp.dim_NLP_x) end # build and return sparse matrix - return sparse(J) + #return sparse(J) + nnzj = length(Is) + Vs = ones(Bool, nnzj) + return sparse(Is, Js, Vs) end @@ -269,8 +277,10 @@ Build sparsity pattern for Hessian of Lagrangian function DOCP_Hessian_pattern(docp::DOCP{Trapeze}) # NB. need to provide full pattern for coloring, not just upper/lower part - H = zeros(Bool, docp.dim_NLP_variables, docp.dim_NLP_variables) - + #H = zeros(Bool, docp.dim_NLP_variables, docp.dim_NLP_variables) + Is = Vector{Int}(undef, 0) + Js = Vector{Int}(undef, 0) + # index alias for v v_start = docp.dim_NLP_variables - docp.dim_NLP_v + 1 v_end = docp.dim_NLP_variables @@ -281,12 +291,12 @@ function DOCP_Hessian_pattern(docp::DOCP{Trapeze}) # 0.2 lagrange case (lf) if docp.is_lagrange lf_index = docp.dim_NLP_steps * docp.discretization._step_variables_block + docp.dim_NLP_x - add_nonzero_block!(H, lf_index, lf_index) + add_nonzero_block!(Is, Js, lf_index, lf_index) end # 1. main loop over steps # 1.0 v / v term - add_nonzero_block!(H, v_start, v_end, v_start, v_end) + add_nonzero_block!(Is, Js, v_start, v_end, v_start, v_end) for i = 1:docp.dim_NLP_steps @@ -298,14 +308,14 @@ function DOCP_Hessian_pattern(docp::DOCP{Trapeze}) # -> included in 1.2 # 1.2 lagrange part wrt x_i, l_i, u_i, x_i+1, l_i+1, u_i+1 # -> single block for all step variables - add_nonzero_block!(H, var_offset+1, var_offset+var_block, var_offset+1, var_offset+var_block) + add_nonzero_block!(Is, Js, var_offset+1, var_offset+var_block, var_offset+1, var_offset+var_block) # 1.3 path constraint wrt x_i, u_i # -> included in 1.2 # 1.4 whole block wrt v (NB. term v / v added before the loop) - add_nonzero_block!(H, v_start, v_end, var_offset+1, var_offset+var_block) - add_nonzero_block!(H, var_offset+1, var_offset+var_block, v_start, v_end) + add_nonzero_block!(Is, Js, v_start, v_end, var_offset+1, var_offset+var_block) + add_nonzero_block!(Is, Js, var_offset+1, var_offset+var_block, v_start, v_end) end # 2. final path constraints (xf, uf, v) @@ -319,18 +329,21 @@ function DOCP_Hessian_pattern(docp::DOCP{Trapeze}) x0_end = docp.dim_OCP_x xf_start = var_offset + 1 xf_end = var_offset + docp.dim_OCP_x - add_nonzero_block!(H, x0_start, x0_end, xf_start, xf_end) - add_nonzero_block!(H, xf_start, xf_end, x0_start, x0_end) - add_nonzero_block!(H, v_start, v_end, x0_start, x0_end) - add_nonzero_block!(H, v_start, v_end, xf_start, xf_end) + add_nonzero_block!(Is, Js, x0_start, x0_end, xf_start, xf_end) + add_nonzero_block!(Is, Js, xf_start, xf_end, x0_start, x0_end) + add_nonzero_block!(Is, Js, v_start, v_end, x0_start, x0_end) + add_nonzero_block!(Is, Js, v_start, v_end, xf_start, xf_end) end # 3.1 null initial condition for lagrangian cost state l0 if docp.is_lagrange - add_nonzero_block!(H, docp.dim_NLP_x, docp.dim_NLP_x) + add_nonzero_block!(Is, Js, docp.dim_NLP_x, docp.dim_NLP_x) end - # replace H with sparse matrix - return sparse(H) + # build and return sparse matrix + #return sparse(H) + nnzj = length(Is) + Vs = ones(Bool, nnzj) + return sparse(Is, Js, Vs) end @@ -338,8 +351,9 @@ end """ $(TYPEDSIGNATURES) -Add block of nonzeros elements (Boolean matrix version) -More compact specific method for single element case +Add block of nonzeros elements to a sparsity pattern +Format: boolean matrix (M) or index vectors (Is, Js) +Includes a more compact method for single element case """ function add_nonzero_block!(M, i_start, i_end, j_start, j_end) M[i_start:i_end, j_start:j_end] .= true @@ -348,4 +362,19 @@ end function add_nonzero_block!(M, i, j) M[i,j] = true return +end +function add_nonzero_block!(Is, Js, i_start, i_end, j_start, j_end) + for i=i_start:i_end + for j=j_start:j_end + # NB. does order matter here ? + push!(Is, i) + push!(Js, j) + end + end + return +end +function add_nonzero_block!(Is, Js, i, j) + push!(Is, i) + push!(Js, j) + return end \ No newline at end of file diff --git a/test/docs/AD_backend.md b/test/docs/AD_backend.md index 0902730c..ab2607db 100644 --- a/test/docs/AD_backend.md +++ b/test/docs/AD_backend.md @@ -34,29 +34,32 @@ Takeaways: - manual sparse pattern seems to give even better performance for larger problems. This is likely due to the increasing cost of computing the Hessian sparsity in terms of allocations and time. This observation is consistent with the comparison with Jump that seems to use a different, less sparse but faster method for the Hessian. Standard benchmark: -| Trapeze | default | optimized | manual | sparse -|---------|---------|-----------|---------| -| 250 | 49.7 | 0.9 | 1.5 | -| 500 | | 2.4 | 3.5 | -| 1000 | | 5.6 | 6.4 | -| 2500 | | 23.9 | 23.9 | -| 5000 | | 89.6 | 56.3 | -| 7500 | | 225.4 | 85.9 | -| 10000 | | 526.3 | 102.4 | +| Trapeze | default | optimized | manual* | manual** | +|---------|---------|-----------|---------|----------| +| 250 | 49.7 | 0.9 | 1.5 | 1.4 | +| 500 | | 2.4 | 3.5 | 3.3 | +| 1000 | | 5.6 | 6.4 | 5.9 | +| 2500 | | 23.9 | 23.9 | 18.7 | +| 5000 | | 89.6 | 56.3 | 41.5 | +| 7500 | | 225.4 | 85.9 | 66.3 | +| 10000 | | 526.3 | 102.4 | 90.4 | + +* build sparse matrices from dense boolean matrices +** build sparse matrices from (i,j,v) vectors Sparsity details: goddard_all Trapeze (1000 and 10000 steps) -| transcription | optimized | manual | optimized | manual | sparse -|---------------|-----------|---------|-----------|--------| -| NLP vars | 4005 | 4005 | 40005 | 40005 | -| NLP cons | 6007 | 6007 | 60007 | 60007 | -| Hess nnz | 11011 | 30024 | 110011 | 300024 | -| H sparsity | 99.86% | 99.63% | 99.99% | 99.96% | -| Jac nnz | 28011 | 42043 | 280011 | 420043 | -| J sparsity | 99.88% | 99.83% | 99.99% | 99.98% | -| allocs | 1.16GB | 106MB | 71.56GB | 4.55GB | -| time | 750ms | 85ms | 64.7s** | 3.8s | +| transcription | optimized | manual*/** | optimized | manual*/** | +|---------------|-----------|------------|-----------|--------| +| NLP vars | 4005 | 4005 | 40005 | 40005 | +| NLP cons | 6007 | 6007 | 60007 | 60007 | +| Hess nnz | 11011 | 30024 | 110011 | 300024 | +| H sparsity | 99.86% | 99.63% | 99.99% | 99.96% | +| Jac nnz | 28011 | 42043 | 280011 | 420043 | +| J sparsity | 99.88% | 99.83% | 99.99% | 99.98% | +| allocs | 1.2GB | 106 / 92MB | 71.6GB | 4.55 / 0.88 GB | +| time | 750ms | 85 / 95ms | 64.7s*** | 3.8 / 2.5s | -** hessian accounts for 59 out of total 65s +*** hessian accounts for 59 out of total 65s ``` julia> direct_transcription(goddard_all().ocp, grid_size=10000, show_time=true); gradient backend ADNLPModels.ReverseDiffADGradient: 0.000137972 seconds; @@ -68,17 +71,21 @@ hessian backend ADNLPModels.SparseReverseADHessian: 58.450146911 seconds; ghjvprod backend ADNLPModels.ForwardDiffADGHjvprod: 4.339e-6 seconds. ``` -| solve | optimized | manual | optimized | manual | -|---------------|-----------|---------|-----------|--------| -| iterations | 42 | 28 | 51 | 29 | -| allocs | 2.0GB | 1.2GB | 87.5GB | 16.9GB | -| time | 2.5s | 2.5s | 151.0s*** | 42.4s | +| solve | optimized | manual*/** | optimized | manual*/** | +|---------------|-----------|-------------|-----------|-------------| +| iterations | 42 | 28 | 51 | 29 | +| allocs | 2.0GB | 1.2/1.2GB | 87.5GB | 16.9/13.2GB | +| time | 2.5s | 2.5/2.6s | 151.0s*** | 42.4/31.6s | *** building the hessian is one third of the total solve time... +## Remarks: +- it is better to build the sparse matrices from the index vectors format rather than a dense boolean matrix. For larger problems it may not be possible to even allocate the boolean matrix (eg. algal bacterial with GL2 at 5000 steps). + ## Todo: -- improve Hessian for IRK (reduce excess nonzeros) +- improve Hessian for IRK (reduce excess nonzeros with finer block granularity) - redo tests on algal_bacterial problem, including Jump +- check the relevance of computing the nnz beforehand and allocate the full index vectors directly instead of using push! - add pattern structure for midpoint - try to disable some unused (?) parts such as hprod ? (according to show_time info the impact may be small) - reuse ADNLPModels functions to get block sparsity patterns then rebuild full patterns ? From 3684b271bf3bd6eda9e99f8b645f411a1f4c424d Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Wed, 5 Feb 2025 14:23:08 +0100 Subject: [PATCH 29/44] index vectors for IRK --- src/disc/irk.jl | 171 +++++++++++++++++++++++++--------------- src/disc/trapeze.jl | 22 ++++-- test/docs/AD_backend.md | 3 +- 3 files changed, 125 insertions(+), 71 deletions(-) diff --git a/src/disc/irk.jl b/src/disc/irk.jl index 829e5f97..52b4ca01 100644 --- a/src/disc/irk.jl +++ b/src/disc/irk.jl @@ -351,10 +351,21 @@ Build sparsity pattern for Jacobian of constraints """ function DOCP_Jacobian_pattern(docp::DOCP{ <: GenericIRK}) - J = zeros(Bool, docp.dim_NLP_constraints, docp.dim_NLP_variables) + if !docp.discretization._constant_control + error("Manual Jacobian sparsity pattern not supported for IRK scheme with piecewise linear control") + end + + #J = zeros(Bool, docp.dim_NLP_constraints, docp.dim_NLP_variables) + # vector format for sparse matrix + Is = Vector{Int}(undef, 0) + Js = Vector{Int}(undef, 0) s = docp.discretization.stage + # index alias for v + v_start = docp.dim_NLP_variables - docp.dim_NLP_v + 1 + v_end = docp.dim_NLP_variables + # 1. main loop over steps for i = 1:docp.dim_NLP_steps @@ -365,63 +376,75 @@ function DOCP_Jacobian_pattern(docp::DOCP{ <: GenericIRK}) # variables block and offset: x_i (l_i) u_i k_i x_i+1 (l_i+1) var_block = docp.discretization._step_variables_block + docp.dim_NLP_x var_offset = (i-1)*docp.discretization._step_variables_block + xi_start = var_offset + 1 + xi_end = var_offset + docp.dim_OCP_x + ui_start = var_offset + docp.dim_NLP_x + 1 + ui_end = var_offset + docp.dim_NLP_x + docp.dim_NLP_u + ki_start = var_offset + docp.dim_NLP_x + docp.dim_NLP_u + 1 + ki_end = var_offset + (s+1)*docp.dim_NLP_x + docp.dim_NLP_u + xip1_end = var_offset + docp.discretization._step_variables_block + docp.dim_OCP_x + li = var_offset + docp.dim_NLP_x + lip1 = var_offset + var_block # state eq x_i+1 = x_i + h sum bj k_ij # 1.1 state eq wrt x_i - J[c_offset+1:c_offset+docp.dim_OCP_x, var_offset+1:var_offset+docp.dim_OCP_x] .= true + add_nonzero_block!(Is, Js, c_offset+1, c_offset+docp.dim_OCP_x, xi_start, xi_end) # 1.2 state eq wrt k_i, x_i+1 (skip l_i, u_i) - J[c_offset+1:c_offset+docp.dim_OCP_x, var_offset+docp.dim_NLP_x+docp.dim_NLP_u+ 1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u+s*docp.dim_NLP_x+docp.dim_OCP_x] .= true + add_nonzero_block!(Is, Js, c_offset+1, c_offset+docp.dim_OCP_x, ki_start, xip1_end) # 1.3 lagrange part l_i+1 = l_i + h (sum bj k_ij)[n+1] if docp.is_lagrange - J[c_offset+docp.dim_NLP_x, var_offset+docp.dim_NLP_x] = true # l_i - J[c_offset+docp.dim_NLP_x, var_offset+docp.discretization._step_variables_block+docp.dim_NLP_x] = true # l_i+1 + add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x, li) + add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x, lip1) for i=1:s - J[c_offset+docp.dim_NLP_x, var_offset+docp.dim_NLP_x+docp.dim_NLP_u+i*docp.dim_NLP_x] = true # k_ij[n+1] + kij_l = var_offset + docp.dim_NLP_x + docp.dim_NLP_u + i*docp.dim_NLP_x + add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x, kij_l) end - end - + end # 1.4 stage equations k_ij = f(t_ij, x_ij, u_ij, v) - # with - # x_ij depending on x_i and all k_ij - # u_ij depending on u_i for piecewise constant or (u_i, u_i+1) for piecewise linear - # ie whole block depends on x_i, u_i, k_i, and u_i+1 for piecewise linear control - # NB we could skip l_i here... - J[c_offset+docp.dim_NLP_x+1:c_offset+(s+1)*docp.dim_NLP_x , var_offset+1:var_offset+docp.discretization._step_variables_block] .= true - if !docp.discretization._constant_control - J[c_offset+docp.dim_NLP_x+1:c_offset+(s+1)*docp.dim_NLP_x , var_offset+docp.discretization._step_variables_block+docp.dim_NLP_x:var_offset+docp.discretization._step_variables_block+docp.dim_NLP_x+docp.dim_NLP_u] .= true - end + # with x_ij depending on x_i and all k_ij and u_ij == u_i + # ie whole block depends on x_i, u_i, k_i (skip l_i) + add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x+1, c_offset+(s+1)*docp.dim_NLP_x, var_offset+1, xi_start, xi_end) + add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x+1, c_offset+(s+1)*docp.dim_NLP_x, var_offset+1, ui_start, ki_end) + + # 1.5 path constraint wrt x_i, u_i (skip l_i) + add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x+1, c_offset+c_block, xi_start, xi_end) + add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x+1, c_offset+c_block, ui_start, ui_end) - # 1.5 path constraint wrt x_i, u_i - J[c_offset+(s+1)*docp.dim_NLP_x+1:c_offset+c_block, var_offset+1:var_offset+docp.dim_OCP_x] .= true - J[c_offset+(s+1)*docp.dim_NLP_x+1:c_offset+c_block, var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u] .= true - # 1.6 whole block wrt v - J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true + add_nonzero_block!(Is, Js, c_offset+1, c_offset+c_block, v_start, v_end) end # 2. final path constraints (xf, uf, v) c_offset = docp.dim_NLP_steps*(docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block) c_block = docp.discretization._step_pathcons_block var_offset = docp.dim_NLP_steps*docp.discretization._step_variables_block - var_block = docp.discretization._step_variables_block - J[c_offset+1:c_offset+c_block, var_offset+1:var_offset+docp.dim_OCP_x] .= true # xf - J[c_offset+1:c_offset+c_block, var_offset+docp.dim_NLP_x+1:var_offset+docp.dim_NLP_x+docp.dim_NLP_u] .= true # uf - J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true # v + xf_start = var_offset + 1 + xf_end = var_offset + docp.dim_OCP_x + uf_start = var_offset + docp.dim_NLP_x + 1 + uf_end = var_offset + docp.dim_NLP_x + docp.dim_NLP_u + add_nonzero_block!(Is, Js, c_offset+1, c_offset+c_block, xf_start, xf_end) + add_nonzero_block!(Is, Js, c_offset+1,c_offset+c_block, uf_start, uf_end) + add_nonzero_block!(Is, Js, c_offset+1, c_offset+c_block, v_start, v_end) # 3. boundary constraints (x0, xf, v) c_offset = docp.dim_NLP_steps * (docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block) + docp.discretization._step_pathcons_block c_block = docp.dim_boundary_cons + docp.dim_v_cons - J[c_offset+1:c_offset+c_block, 1:docp.dim_OCP_x] .= true # x0 - J[c_offset+1:c_offset+c_block, var_offset+1:var_offset+docp.dim_OCP_x] .= true # xf - J[c_offset+1:c_offset+c_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true # v + x0_start = 1 + x0_end = docp.dim_OCP_x + add_nonzero_block!(Is, Js, c_offset+1, c_offset+c_block, x0_start, x0_end) + add_nonzero_block!(Is, Js, c_offset+1, c_offset+c_block, xf_start, xf_end) + add_nonzero_block!(Is, Js, c_offset+1, c_offset+c_block, v_start, v_end) # 3.4 null initial condition for lagrangian cost state l0 if docp.is_lagrange - J[docp.dim_NLP_constraints, docp.dim_NLP_x] = true + add_nonzero_block!(Is, Js, docp.dim_NLP_constraints, docp.dim_NLP_x) end - # return sparse matrix - return sparse(J) + # build and return sparse matrix + #return sparse(J) + nnzj = length(Is) + Vs = ones(Bool, nnzj) + return sparse(Is, Js, Vs) end @@ -432,83 +455,107 @@ Build sparsity pattern for Hessian of Lagrangian """ function DOCP_Hessian_pattern(docp::DOCP{ <: GenericIRK}) + if !docp.discretization._constant_control + error("Manual Hessian sparsity pattern not supported for IRK scheme with piecewise linear control") + end + # NB. need to provide full pattern for coloring, not just upper/lower part - H = zeros(Bool, docp.dim_NLP_variables, docp.dim_NLP_variables) + #H = zeros(Bool, docp.dim_NLP_variables, docp.dim_NLP_variables) + Is = Vector{Int}(undef, 0) + Js = Vector{Int}(undef, 0) s = docp.discretization.stage + # index alias for v + v_start = docp.dim_NLP_variables - docp.dim_NLP_v + 1 + v_end = docp.dim_NLP_variables + # 0. objective # 0.1 mayer cost (x0, xf, v) # -> grouped with term 3. for boundary conditions # 0.2 lagrange case (lf) if docp.is_lagrange lf_index = docp.dim_NLP_steps * docp.discretization._step_variables_block + docp.dim_NLP_x - H[lf_index, lf_index] = true + add_nonzero_block!(Is, Js, lf_index, lf_index) end # 1. main loop over steps # 1.0 v / v term - H[docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true + add_nonzero_block!(Is, Js, v_start, v_end, v_start, v_end) for i = 1:docp.dim_NLP_steps # variables block and offset: x_i (l_i) u_i k_i x_i+1 (l_i+1) var_block = docp.discretization._step_variables_block + docp.dim_NLP_x var_offset = (i-1)*docp.discretization._step_variables_block + xi_start = var_offset + 1 + xi_end = var_offset + docp.dim_OCP_x + #ui_start = var_offset + docp.dim_NLP_x + 1 + #ui_end = var_offset + docp.dim_NLP_x + docp.dim_NLP_u + ki_start = var_offset + docp.dim_NLP_x + docp.dim_NLP_u + 1 + ki_end = var_offset + (s+1)*docp.dim_NLP_x + docp.dim_NLP_u + xip1_start = var_offset + docp.discretization._step_variables_block + 1 + xip1_end = var_offset + docp.discretization._step_variables_block + docp.dim_OCP_x + li = var_offset + docp.dim_NLP_x + lip1 = var_offset + var_block # 1.1 state eq x_i+1 = x_i + h sum bj k_ij # wrt x_i, k_i, x_i+1 (skip l_i, u_i) # -> included in 1.3 except x_i+1 terms - H[var_offset+1:var_offset+docp.dim_OCP_x, var_offset+docp.discretization._step_variables_block+1:var_offset+docp.discretization._step_variables_block+docp.dim_OCP_x] .= true # x_i / x_i+1 - H[var_offset+docp.discretization._step_variables_block+1:var_offset+docp.discretization._step_variables_block+docp.dim_OCP_x , var_offset+1:var_offset+docp.dim_OCP_x] .= true # x_i+1 / x_i - H[var_offset+docp.dim_NLP_x+1:var_offset+(s+1)*docp.dim_NLP_x, var_offset+docp.discretization._step_variables_block+1:var_offset+docp.discretization._step_variables_block+docp.dim_OCP_x] .= true # k_i / x_i+1 - H[var_offset+docp.discretization._step_variables_block+1:var_offset+docp.discretization._step_variables_block+docp.dim_OCP_x , var_offset+docp.dim_NLP_x+1:var_offset+(s+1)*docp.dim_NLP_x] .= true # x_i+1 / k_i + add_nonzero_block!(Is, Js, xi_start, xi_end, xip1_start, xip1_end) + add_nonzero_block!(Is, Js, xip1_start, xip1_end, xi_start, xi_end) + add_nonzero_block!(Is, Js, ki_start, ki_end, xip1_start, xip1_end) + add_nonzero_block!(Is, Js, xip1_start, xip1_end, ki_start, ki_end) # 1.2 lagrange part l_i+1 = l_i + h (sum bj k_ij)[n+1] # -> included in 1.3 except l_i+1 terms # +++ could be done fully here and l_i skipped in 1.3 - H[var_offset+docp.dim_NLP_x , var_offset+docp.discretization._step_variables_block+docp.dim_NLP_x] = true # l_i / l_i+1 - H[ var_offset+docp.discretization._step_variables_block+docp.dim_NLP_x, var_offset+docp.dim_NLP_x] = true # l_i+1 / l_i + add_nonzero_block!(Is, Js, li, lip1) + add_nonzero_block!(Is, Js, lip1, li) for i=1:s - H[var_offset+docp.dim_NLP_x+docp.dim_NLP_u+i*docp.dim_NLP_x , var_offset+docp.discretization._step_variables_block+docp.dim_NLP_x] = true # k_i[n+1] / l_i+1 - H[var_offset+docp.discretization._step_variables_block+docp.dim_NLP_x, var_offset+docp.dim_NLP_x+docp.dim_NLP_u+i*docp.dim_NLP_x] = true # l_i+1 / k_i[n+1] + kij_l = var_offset + docp.dim_NLP_x + docp.dim_NLP_u + i*docp.dim_NLP_x + add_nonzero_block!(Is, Js, kij_l, lip1) + add_nonzero_block!(Is, Js, lip1, kij_l) end # 1.3 stage equations k_ij = f(t_ij, x_ij, u_ij, v) - # wrt x_i, u_i, k_i (and u_i+1 for piecewise linear control) - # NB. l_i terms for 1.2 are included but we have excess nnz eg l_i / x_i,u_i,k_i[1:n] - if docp.discretization._constant_control - H[var_offset+1:var_offset+docp.discretization._step_variables_block, var_offset+1:var_offset+docp.discretization._step_variables_block] .= true - else - error("Manual Hessian sparsity pattern not supported for IRK scheme with piecewise linear control") - end + # wrt x_i, u_i, k_i + # +++if we skip li we need to do 1.2 terms in full + add_nonzero_block!(Is, Js, var_offset+1, var_offset+docp.discretization._step_variables_block, var_offset+1, var_offset+docp.discretization._step_variables_block) # 1.4 path constraint wrt x_i, u_i # -> included in 1.3 # 1.5 whole block wrt v (NB. term v / v added before the loop) - H[docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables, var_offset+1:var_offset+var_block] .= true # v / var block - H[var_offset+1:var_offset+var_block, docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables] .= true # var block / v + add_nonzero_block!(Is, Js, v_start, v_end, var_offset+1, var_offset+var_block) + add_nonzero_block!(Is, Js, var_offset+1, var_offset+var_block, v_start, v_end) end # 2. final path constraints (xf, uf, v) - # -> included in last iteration from loop + # -> included in last loop iteration # 3. boundary constraints (x0, xf, v) - # -> x0 / x0, x0 / v, xf / xf, xf / v terms included in first/last iterations from loop + # -> x0 / x0, x0 / v, xf / xf, xf / v terms included in first/last loop iterations if docp.is_mayer || docp.dim_boundary_cons > 0 var_offset = docp.dim_NLP_steps*docp.discretization._step_variables_block - H[1:docp.dim_OCP_x, var_offset+1:var_offset+docp.dim_OCP_x] .= true # x0 / xf - H[var_offset+1:var_offset+docp.dim_OCP_x, 1:docp.dim_OCP_x] .= true # xf / x0 - H[docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables, 1:docp.dim_OCP_x] .= true # v / x0 - H[docp.dim_NLP_variables-docp.dim_NLP_v+1:docp.dim_NLP_variables, var_offset+1:var_offset+docp.dim_OCP_x] .= true # v / xf + x0_start = 1 + x0_end = docp.dim_OCP_x + xf_start = var_offset + 1 + xf_end = var_offset + docp.dim_OCP_x + add_nonzero_block!(Is, Js, x0_start, x0_end, xf_start, xf_end) + add_nonzero_block!(Is, Js, xf_start, xf_end, x0_start, x0_end) + add_nonzero_block!(Is, Js, v_start, v_end, x0_start, x0_end) + add_nonzero_block!(Is, Js, v_start, v_end, xf_start, xf_end) end # 3.1 null initial condition for lagrangian cost state l0 if docp.is_lagrange - H[docp.dim_NLP_x, docp.dim_NLP_x] = true + add_nonzero_block!(Is, Js, docp.dim_NLP_x, docp.dim_NLP_x) end - # return sparse matrix - return sparse(H) + # build and return sparse matrix + #return sparse(H) + nnzj = length(Is) + Vs = ones(Bool, nnzj) + return sparse(Is, Js, Vs) -end \ No newline at end of file +end diff --git a/src/disc/trapeze.jl b/src/disc/trapeze.jl index d59cb8a8..7afb25ad 100644 --- a/src/disc/trapeze.jl +++ b/src/disc/trapeze.jl @@ -200,7 +200,7 @@ function DOCP_Jacobian_pattern(docp::DOCP{Trapeze}) # 1. main loop over steps for i = 1:docp.dim_NLP_steps - + # constraints block and offset: state equation, path constraints c_block = docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block c_offset = (i-1)*c_block @@ -301,8 +301,8 @@ function DOCP_Hessian_pattern(docp::DOCP{Trapeze}) for i = 1:docp.dim_NLP_steps # variables block and offset: x_i (l_i) u_i x_i+1 (l_i+1) u_i+1 - var_offset = (i-1)*docp.discretization._step_variables_block var_block = docp.discretization._step_variables_block * 2 + var_offset = (i-1)*docp.discretization._step_variables_block # 1.1 state eq wrt x_i, u_i, x_i+1, u_i+1 (skip l_i, l_i+1) # -> included in 1.2 @@ -354,27 +354,33 @@ $(TYPEDSIGNATURES) Add block of nonzeros elements to a sparsity pattern Format: boolean matrix (M) or index vectors (Is, Js) Includes a more compact method for single element case +Note: independent from discretization scheme """ -function add_nonzero_block!(M, i_start, i_end, j_start, j_end) +function add_nonzero_block!(M, i_start, i_end, j_start, j_end, sym=false) M[i_start:i_end, j_start:j_end] .= true + sym && M[j_start:j_end, i_start:i_end] .= true return end -function add_nonzero_block!(M, i, j) +function add_nonzero_block!(M, i, j, sym=false) M[i,j] = true + sym && M[j,i] = true return end -function add_nonzero_block!(Is, Js, i_start, i_end, j_start, j_end) +function add_nonzero_block!(Is, Js, i_start, i_end, j_start, j_end, sym=false) for i=i_start:i_end for j=j_start:j_end - # NB. does order matter here ? push!(Is, i) push!(Js, j) + sym && push!(Is, j) + sym && push!(Js, i) end end return end -function add_nonzero_block!(Is, Js, i, j) +function add_nonzero_block!(Is, Js, i, j, sym=false) push!(Is, i) push!(Js, j) + sym && push!(Is, j) + sym && push!(Js, i) return -end \ No newline at end of file +end diff --git a/test/docs/AD_backend.md b/test/docs/AD_backend.md index ab2607db..c2ec698d 100644 --- a/test/docs/AD_backend.md +++ b/test/docs/AD_backend.md @@ -83,10 +83,11 @@ ghjvprod backend ADNLPModels.ForwardDiffADGHjvprod: 4.339e-6 seconds. - it is better to build the sparse matrices from the index vectors format rather than a dense boolean matrix. For larger problems it may not be possible to even allocate the boolean matrix (eg. algal bacterial with GL2 at 5000 steps). ## Todo: +- vector format for IRK - improve Hessian for IRK (reduce excess nonzeros with finer block granularity) - redo tests on algal_bacterial problem, including Jump - check the relevance of computing the nnz beforehand and allocate the full index vectors directly instead of using push! -- add pattern structure for midpoint +- manual pattern structure for midpoint - try to disable some unused (?) parts such as hprod ? (according to show_time info the impact may be small) - reuse ADNLPModels functions to get block sparsity patterns then rebuild full patterns ? eg for dynamics and path constraints From a65f75e5f5440f89bf0eb00b1c96982156ddb2d5 Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Wed, 5 Feb 2025 17:36:13 +0100 Subject: [PATCH 30/44] todo: debug jacobian for irk --- src/disc/irk.jl | 135 +++++++++++++++++----------------------- src/disc/trapeze.jl | 52 ++++++---------- test/docs/AD_backend.md | 10 +++ 3 files changed, 86 insertions(+), 111 deletions(-) diff --git a/src/disc/irk.jl b/src/disc/irk.jl index 52b4ca01..ea0bd35c 100644 --- a/src/disc/irk.jl +++ b/src/disc/irk.jl @@ -5,9 +5,10 @@ Internal layout for NLP variables: .., X_N-1, U_N-1, K_N-1^1..K_N-1^s, X_N, U_N, V] -with s the stage number and U given by either linear interpolation in [t_i, t_i+1] -or constant interpolation for 1-stage methods or if specfied (U_N might end up unused) -Path constraints are all evaluated at time steps +with s the stage number and U piecewise constant equal to U_i in [t_i, t_i+1] +or, for methods with s>1, piecewise linear if option constant_control set to false +NB. U_N may be removed at some point if we disable piecewise linear control +Path constraints are all evaluated at time steps, including final time. =# @@ -355,7 +356,8 @@ function DOCP_Jacobian_pattern(docp::DOCP{ <: GenericIRK}) error("Manual Jacobian sparsity pattern not supported for IRK scheme with piecewise linear control") end - #J = zeros(Bool, docp.dim_NLP_constraints, docp.dim_NLP_variables) + BUG, recheck indices... + # vector format for sparse matrix Is = Vector{Int}(undef, 0) Js = Vector{Int}(undef, 0) @@ -386,12 +388,13 @@ function DOCP_Jacobian_pattern(docp::DOCP{ <: GenericIRK}) li = var_offset + docp.dim_NLP_x lip1 = var_offset + var_block - # state eq x_i+1 = x_i + h sum bj k_ij - # 1.1 state eq wrt x_i + # 1.1 state eq x_i+1 = x_i + h_i sum bj k_ij + # depends on x_i, k_ij, x_i+1, and v (h_i in variable times case !) add_nonzero_block!(Is, Js, c_offset+1, c_offset+docp.dim_OCP_x, xi_start, xi_end) - # 1.2 state eq wrt k_i, x_i+1 (skip l_i, u_i) + # (skip l_i, u_i) should skip k_i[n+1] also... add_nonzero_block!(Is, Js, c_offset+1, c_offset+docp.dim_OCP_x, ki_start, xip1_end) - # 1.3 lagrange part l_i+1 = l_i + h (sum bj k_ij)[n+1] + add_nonzero_block!(Is, Js, c_offset+1, c_offset+docp.dim_OCP_x, v_start, v_end) + # 1.2 lagrange part l_i+1 = l_i + h_i (sum bj k_ij)[n+1] if docp.is_lagrange add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x, li) add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x, lip1) @@ -399,20 +402,20 @@ function DOCP_Jacobian_pattern(docp::DOCP{ <: GenericIRK}) kij_l = var_offset + docp.dim_NLP_x + docp.dim_NLP_u + i*docp.dim_NLP_x add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x, kij_l) end + add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x, c_offset+docp.dim_NLP_x, v_start, v_end) end - # 1.4 stage equations k_ij = f(t_ij, x_ij, u_ij, v) + # 1.3 stage equations k_ij = f(t_ij, x_ij, u_ij, v) # with x_ij depending on x_i and all k_ij and u_ij == u_i - # ie whole block depends on x_i, u_i, k_i (skip l_i) - add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x+1, c_offset+(s+1)*docp.dim_NLP_x, var_offset+1, xi_start, xi_end) - add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x+1, c_offset+(s+1)*docp.dim_NLP_x, var_offset+1, ui_start, ki_end) - - # 1.5 path constraint wrt x_i, u_i (skip l_i) - add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x+1, c_offset+c_block, xi_start, xi_end) - add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x+1, c_offset+c_block, ui_start, ui_end) - - # 1.6 whole block wrt v - add_nonzero_block!(Is, Js, c_offset+1, c_offset+c_block, v_start, v_end) + # ie this part depends on x_i, u_i, k_i (skip l_i) and v + add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x+1, c_offset+(s+1)*docp.dim_NLP_x, xi_start, xi_end) + add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x+1, c_offset+(s+1)*docp.dim_NLP_x, ui_start, ki_end) + add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x+1, c_offset+(s+1)*docp.dim_NLP_x, v_start, v_end) + + # 1.4 path constraint g(t_i, x_i, u_i, v) (skip l_i) + add_nonzero_block!(Is, Js, c_offset+(s+1)*docp.dim_NLP_x+1, c_offset+c_block, xi_start, xi_end) + add_nonzero_block!(Is, Js, c_offset+(s+1)*docp.dim_NLP_x+1, c_offset+c_block, ui_start, ui_end) + add_nonzero_block!(Is, Js, c_offset+(s+1)*docp.dim_NLP_x+1, c_offset+c_block, v_start, v_end) end # 2. final path constraints (xf, uf, v) @@ -441,7 +444,6 @@ function DOCP_Jacobian_pattern(docp::DOCP{ <: GenericIRK}) end # build and return sparse matrix - #return sparse(J) nnzj = length(Is) Vs = ones(Bool, nnzj) return sparse(Is, Js, Vs) @@ -460,7 +462,6 @@ function DOCP_Hessian_pattern(docp::DOCP{ <: GenericIRK}) end # NB. need to provide full pattern for coloring, not just upper/lower part - #H = zeros(Bool, docp.dim_NLP_variables, docp.dim_NLP_variables) Is = Vector{Int}(undef, 0) Js = Vector{Int}(undef, 0) @@ -474,10 +475,7 @@ function DOCP_Hessian_pattern(docp::DOCP{ <: GenericIRK}) # 0.1 mayer cost (x0, xf, v) # -> grouped with term 3. for boundary conditions # 0.2 lagrange case (lf) - if docp.is_lagrange - lf_index = docp.dim_NLP_steps * docp.discretization._step_variables_block + docp.dim_NLP_x - add_nonzero_block!(Is, Js, lf_index, lf_index) - end + # -> 2nd order term is zero # 1. main loop over steps # 1.0 v / v term @@ -486,74 +484,55 @@ function DOCP_Hessian_pattern(docp::DOCP{ <: GenericIRK}) for i = 1:docp.dim_NLP_steps # variables block and offset: x_i (l_i) u_i k_i x_i+1 (l_i+1) - var_block = docp.discretization._step_variables_block + docp.dim_NLP_x var_offset = (i-1)*docp.discretization._step_variables_block xi_start = var_offset + 1 xi_end = var_offset + docp.dim_OCP_x - #ui_start = var_offset + docp.dim_NLP_x + 1 - #ui_end = var_offset + docp.dim_NLP_x + docp.dim_NLP_u + ui_start = var_offset + docp.dim_NLP_x + 1 + ui_end = var_offset + docp.dim_NLP_x + docp.dim_NLP_u ki_start = var_offset + docp.dim_NLP_x + docp.dim_NLP_u + 1 ki_end = var_offset + (s+1)*docp.dim_NLP_x + docp.dim_NLP_u - xip1_start = var_offset + docp.discretization._step_variables_block + 1 - xip1_end = var_offset + docp.discretization._step_variables_block + docp.dim_OCP_x - li = var_offset + docp.dim_NLP_x - lip1 = var_offset + var_block - - # 1.1 state eq x_i+1 = x_i + h sum bj k_ij - # wrt x_i, k_i, x_i+1 (skip l_i, u_i) - # -> included in 1.3 except x_i+1 terms - add_nonzero_block!(Is, Js, xi_start, xi_end, xip1_start, xip1_end) - add_nonzero_block!(Is, Js, xip1_start, xip1_end, xi_start, xi_end) - add_nonzero_block!(Is, Js, ki_start, ki_end, xip1_start, xip1_end) - add_nonzero_block!(Is, Js, xip1_start, xip1_end, ki_start, ki_end) + # 1.1 state eq 0 = x_i+1 - (x_i + h sum bj k_ij) + # -> 2nd order terms are zero # 1.2 lagrange part l_i+1 = l_i + h (sum bj k_ij)[n+1] - # -> included in 1.3 except l_i+1 terms - # +++ could be done fully here and l_i skipped in 1.3 - add_nonzero_block!(Is, Js, li, lip1) - add_nonzero_block!(Is, Js, lip1, li) - for i=1:s - kij_l = var_offset + docp.dim_NLP_x + docp.dim_NLP_u + i*docp.dim_NLP_x - add_nonzero_block!(Is, Js, kij_l, lip1) - add_nonzero_block!(Is, Js, lip1, kij_l) - end + # -> 2nd order terms are zero - # 1.3 stage equations k_ij = f(t_ij, x_ij, u_ij, v) - # wrt x_i, u_i, k_i - # +++if we skip li we need to do 1.2 terms in full - add_nonzero_block!(Is, Js, var_offset+1, var_offset+docp.discretization._step_variables_block, var_offset+1, var_offset+docp.discretization._step_variables_block) + # 1.3 stage equations 0 = k_ij - f(t_ij, x_ij(x_i, k_i), u_ij, v) + # wrt x_i, u_i, k_i (skip l_i) + add_nonzero_block!(Is, Js, xi_start, xi_end, xi_start, xi_end) + add_nonzero_block!(Is, Js, ui_start, ki_end, ui_start, ki_end) + add_nonzero_block!(Is, Js, xi_start, xi_end, ui_start, ki_end; sym=true) + add_nonzero_block!(Is, Js, xi_start, xi_end, v_start, v_end; sym=true) + add_nonzero_block!(Is, Js, ui_start, ki_end, v_start, v_end; sym=true) - # 1.4 path constraint wrt x_i, u_i + # 1.4 path constraint g(t_i, x_i, u_i, v) # -> included in 1.3 - - # 1.5 whole block wrt v (NB. term v / v added before the loop) - add_nonzero_block!(Is, Js, v_start, v_end, var_offset+1, var_offset+var_block) - add_nonzero_block!(Is, Js, var_offset+1, var_offset+var_block, v_start, v_end) end - # 2. final path constraints (xf, uf, v) - # -> included in last loop iteration + # 2. final path constraints (xf, uf, v) (assume present) + var_offset = docp.dim_NLP_steps*docp.discretization._step_variables_block + xf_start = var_offset + 1 + xf_end = var_offset + docp.dim_OCP_x + # NB U_N may be removed at some point if we use only piecewise constant control + uf_start = var_offset + docp.dim_NLP_x + 1 + uf_end = var_offset + docp.dim_NLP_x + docp.dim_NLP_u + add_nonzero_block!(Is, Js, xf_start, xf_end, xf_start, xf_end) + add_nonzero_block!(Is, Js, uf_start, uf_end, uf_start, uf_end) + add_nonzero_block!(Is, Js, xf_start, xf_end, uf_start, uf_end; sym=true) + add_nonzero_block!(Is, Js, xf_start, xf_end, v_start, v_end; sym=true) + add_nonzero_block!(Is, Js, uf_start, uf_end, v_start, v_end; sym=true) + + # 3. boundary constraints (x0, xf, v) or mayer cost g0(x0, xf, v) (assume present) + # -> x0 / x0, x0 / v terms included in first loop iteration + # -> xf / xf, xf / v terms included in 2. + x0_start = 1 + x0_end = docp.dim_OCP_x + add_nonzero_block!(Is, Js, x0_start, x0_end, xf_start, xf_end; sym=true) - # 3. boundary constraints (x0, xf, v) - # -> x0 / x0, x0 / v, xf / xf, xf / v terms included in first/last loop iterations - if docp.is_mayer || docp.dim_boundary_cons > 0 - var_offset = docp.dim_NLP_steps*docp.discretization._step_variables_block - x0_start = 1 - x0_end = docp.dim_OCP_x - xf_start = var_offset + 1 - xf_end = var_offset + docp.dim_OCP_x - add_nonzero_block!(Is, Js, x0_start, x0_end, xf_start, xf_end) - add_nonzero_block!(Is, Js, xf_start, xf_end, x0_start, x0_end) - add_nonzero_block!(Is, Js, v_start, v_end, x0_start, x0_end) - add_nonzero_block!(Is, Js, v_start, v_end, xf_start, xf_end) - end # 3.1 null initial condition for lagrangian cost state l0 - if docp.is_lagrange - add_nonzero_block!(Is, Js, docp.dim_NLP_x, docp.dim_NLP_x) - end - + # -> 2nd order term is zero + # build and return sparse matrix - #return sparse(H) nnzj = length(Is) Vs = ones(Bool, nnzj) return sparse(Is, Js, Vs) diff --git a/src/disc/trapeze.jl b/src/disc/trapeze.jl index 7afb25ad..1a9188fe 100644 --- a/src/disc/trapeze.jl +++ b/src/disc/trapeze.jl @@ -182,15 +182,9 @@ Build sparsity pattern for Jacobian of constraints """ function DOCP_Jacobian_pattern(docp::DOCP{Trapeze}) - #J = zeros(Bool, docp.dim_NLP_constraints, docp.dim_NLP_variables) - # vector format for sparse matrix # +++ better to compute nnzj beforehand and allocate the 3 vectors (no push then) ? # pass to addnnz the current offset too - #nnzj = ... - #Is = Vector{Int}(undef, nnzj) - #Js = Vector{Int}(undef, nnzj) - #Vs = ones(Bool, nnzj) Is = Vector{Int}(undef, 0) Js = Vector{Int}(undef, 0) @@ -216,14 +210,13 @@ function DOCP_Jacobian_pattern(docp::DOCP{Trapeze}) uip1_start = var_offset + docp.dim_NLP_x*2 + docp.dim_NLP_u + 1 uip1_end = var_offset + docp.dim_NLP_x*2 + docp.dim_NLP_u*2 - # state eq wrt x_i, u_i, x_i+1, u_i+1 (skip l_i, l_i+1) - # 1.1 state eq wrt x_i + # state eq 0 = xip1 - (xi + hi/2 (fi + fip1)) + # wrt x_i, u_i, x_i+1, u_i+1 (skip l_i, l_i+1) add_nonzero_block!(Is, Js, c_offset+1, c_offset+docp.dim_OCP_x, xi_start, xi_end) - # 1.2 state eq wrt u_i, x_i+1 (skip l_i) add_nonzero_block!(Is, Js, c_offset+1, c_offset+docp.dim_OCP_x, ui_start, xip1_end) - # 1.3 state eq wrt u_i+1 (skip l_i+1) add_nonzero_block!(Is, Js, c_offset+1, c_offset+docp.dim_OCP_x, uip1_start, uip1_end) - # 1.4 lagrange part wrt x_i, l_i, u_i, x_i+1, l_i+1, u_i+1 + # 1.4 lagrange part 0 = lip1 - (li + hi/2 (lcosti + lcostip1)) + # wrt x_i, l_i, u_i, x_i+1, l_i+1, u_i+1 if docp.is_lagrange add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x, c_offset+docp.dim_NLP_x, var_offset+1, var_offset+var_block) end @@ -262,7 +255,6 @@ function DOCP_Jacobian_pattern(docp::DOCP{Trapeze}) end # build and return sparse matrix - #return sparse(J) nnzj = length(Is) Vs = ones(Bool, nnzj) return sparse(Is, Js, Vs) @@ -277,7 +269,6 @@ Build sparsity pattern for Hessian of Lagrangian function DOCP_Hessian_pattern(docp::DOCP{Trapeze}) # NB. need to provide full pattern for coloring, not just upper/lower part - #H = zeros(Bool, docp.dim_NLP_variables, docp.dim_NLP_variables) Is = Vector{Int}(undef, 0) Js = Vector{Int}(undef, 0) @@ -289,10 +280,7 @@ function DOCP_Hessian_pattern(docp::DOCP{Trapeze}) # 0.1 mayer cost (x0, xf, v) # -> grouped with term 3. for boundary conditions # 0.2 lagrange case (lf) - if docp.is_lagrange - lf_index = docp.dim_NLP_steps * docp.discretization._step_variables_block + docp.dim_NLP_x - add_nonzero_block!(Is, Js, lf_index, lf_index) - end + # -> 2nd order term is zero # 1. main loop over steps # 1.0 v / v term @@ -304,9 +292,11 @@ function DOCP_Hessian_pattern(docp::DOCP{Trapeze}) var_block = docp.discretization._step_variables_block * 2 var_offset = (i-1)*docp.discretization._step_variables_block - # 1.1 state eq wrt x_i, u_i, x_i+1, u_i+1 (skip l_i, l_i+1) + # 1.1 state eq 0 = xip1 - (xi + hi/2 (fi + fip1)) + # wrt x_i, u_i, x_i+1, u_i+1 (skip l_i, l_i+1) # -> included in 1.2 - # 1.2 lagrange part wrt x_i, l_i, u_i, x_i+1, l_i+1, u_i+1 + # 1.2 lagrange part 0 = lip1 - (li + hi/2 (lcosti + lcostip1)) + # wrt x_i, l_i, u_i, x_i+1, l_i+1, u_i+1 # -> single block for all step variables add_nonzero_block!(Is, Js, var_offset+1, var_offset+var_block, var_offset+1, var_offset+var_block) @@ -314,8 +304,7 @@ function DOCP_Hessian_pattern(docp::DOCP{Trapeze}) # -> included in 1.2 # 1.4 whole block wrt v (NB. term v / v added before the loop) - add_nonzero_block!(Is, Js, v_start, v_end, var_offset+1, var_offset+var_block) - add_nonzero_block!(Is, Js, var_offset+1, var_offset+var_block, v_start, v_end) + add_nonzero_block!(Is, Js, v_start, v_end, var_offset+1, var_offset+var_block; sym=true) end # 2. final path constraints (xf, uf, v) @@ -329,18 +318,14 @@ function DOCP_Hessian_pattern(docp::DOCP{Trapeze}) x0_end = docp.dim_OCP_x xf_start = var_offset + 1 xf_end = var_offset + docp.dim_OCP_x - add_nonzero_block!(Is, Js, x0_start, x0_end, xf_start, xf_end) - add_nonzero_block!(Is, Js, xf_start, xf_end, x0_start, x0_end) + add_nonzero_block!(Is, Js, x0_start, x0_end, xf_start, xf_end; sym=true) add_nonzero_block!(Is, Js, v_start, v_end, x0_start, x0_end) add_nonzero_block!(Is, Js, v_start, v_end, xf_start, xf_end) end # 3.1 null initial condition for lagrangian cost state l0 - if docp.is_lagrange - add_nonzero_block!(Is, Js, docp.dim_NLP_x, docp.dim_NLP_x) - end + # -> 2nd order term is zero # build and return sparse matrix - #return sparse(H) nnzj = length(Is) Vs = ones(Bool, nnzj) return sparse(Is, Js, Vs) @@ -354,19 +339,20 @@ $(TYPEDSIGNATURES) Add block of nonzeros elements to a sparsity pattern Format: boolean matrix (M) or index vectors (Is, Js) Includes a more compact method for single element case +Option to add the symmetric block also (eg for Hessian) Note: independent from discretization scheme """ -function add_nonzero_block!(M, i_start, i_end, j_start, j_end, sym=false) +function add_nonzero_block!(M, i_start, i_end, j_start, j_end; sym=false) M[i_start:i_end, j_start:j_end] .= true - sym && M[j_start:j_end, i_start:i_end] .= true + sym && (M[j_start:j_end, i_start:i_end] .= true) return end -function add_nonzero_block!(M, i, j, sym=false) +function add_nonzero_block!(M, i, j; sym=false) M[i,j] = true - sym && M[j,i] = true + sym && (M[j,i] = true) return end -function add_nonzero_block!(Is, Js, i_start, i_end, j_start, j_end, sym=false) +function add_nonzero_block!(Is, Js, i_start, i_end, j_start, j_end; sym=false) for i=i_start:i_end for j=j_start:j_end push!(Is, i) @@ -377,7 +363,7 @@ function add_nonzero_block!(Is, Js, i_start, i_end, j_start, j_end, sym=false) end return end -function add_nonzero_block!(Is, Js, i, j, sym=false) +function add_nonzero_block!(Is, Js, i, j; sym=false) push!(Is, i) push!(Js, j) sym && push!(Is, j) diff --git a/test/docs/AD_backend.md b/test/docs/AD_backend.md index c2ec698d..97f423ed 100644 --- a/test/docs/AD_backend.md +++ b/test/docs/AD_backend.md @@ -79,6 +79,16 @@ ghjvprod backend ADNLPModels.ForwardDiffADGHjvprod: 4.339e-6 seconds. *** building the hessian is one third of the total solve time... +Standard benchmark: +| GL2 | optimized | manual | +|---------|----------|--------| +| 250 | | | +| 500 | | | +| 1000 | | | +| 2500 | | | +| 5000 | | | + + ## Remarks: - it is better to build the sparse matrices from the index vectors format rather than a dense boolean matrix. For larger problems it may not be possible to even allocate the boolean matrix (eg. algal bacterial with GL2 at 5000 steps). From 357984f2408312683eb1b84a8e760b5c346b703d Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Thu, 6 Feb 2025 10:18:21 +0100 Subject: [PATCH 31/44] bench ok for GL2 manual --- src/default.jl | 8 +++++++ src/disc/irk.jl | 48 ++++++++++++++++++--------------------- src/disc/trapeze.jl | 15 ++++++------ src/docp.jl | 6 ++--- src/solve.jl | 8 +++---- test/docs/AD_backend.md | 29 +++++++++++------------ test/docs/AD_backend.png | Bin 0 -> 39014 bytes test/suite/test_nlp.jl | 3 ++- 8 files changed, 60 insertions(+), 57 deletions(-) create mode 100644 test/docs/AD_backend.png diff --git a/src/default.jl b/src/default.jl index b2d317ae..39058308 100644 --- a/src/default.jl +++ b/src/default.jl @@ -24,6 +24,14 @@ The default value is `nothing`. """ __time_grid() = nothing +""" +$(TYPEDSIGNATURES) + +Used to set the default control type for IRK schemes +The default value is `:constant`. +""" +__control_type() = :constant + """ $(TYPEDSIGNATURES) Used to set the default backend for AD in ADNLPModels. diff --git a/src/disc/irk.jl b/src/disc/irk.jl index ea0bd35c..19a78a64 100644 --- a/src/disc/irk.jl +++ b/src/disc/irk.jl @@ -6,7 +6,7 @@ Internal layout for NLP variables: X_N-1, U_N-1, K_N-1^1..K_N-1^s, X_N, U_N, V] with s the stage number and U piecewise constant equal to U_i in [t_i, t_i+1] -or, for methods with s>1, piecewise linear if option constant_control set to false +or, for methods with s>1, piecewise linear if option control_type set to :linear NB. U_N may be removed at some point if we disable piecewise linear control Path constraints are all evaluated at time steps, including final time. =# @@ -58,9 +58,9 @@ struct Gauss_Legendre_2 <: GenericIRK _step_variables_block::Int _state_stage_eqs_block::Int _step_pathcons_block::Int - _constant_control::Bool + _control_type::Symbol - function Gauss_Legendre_2(dim_NLP_steps, dim_NLP_x, dim_NLP_u, dim_NLP_v, dim_u_cons, dim_x_cons, dim_xu_cons, dim_boundary_cons, dim_v_cons, constant_control) + function Gauss_Legendre_2(dim_NLP_steps, dim_NLP_x, dim_NLP_u, dim_NLP_v, dim_u_cons, dim_x_cons, dim_xu_cons, dim_boundary_cons, dim_v_cons, control_type) stage = 2 @@ -71,7 +71,7 @@ struct Gauss_Legendre_2 <: GenericIRK [0.5, 0.5], [(0.5 - sqrt(3) / 6), (0.5 + sqrt(3) / 6)], step_variables_block, state_stage_eqs_block, step_pathcons_block, - constant_control + control_type ) return disc, dim_NLP_variables, dim_NLP_constraints @@ -94,9 +94,9 @@ struct Gauss_Legendre_3 <: GenericIRK _step_variables_block::Int _state_stage_eqs_block::Int _step_pathcons_block::Int - _constant_control::Bool + _control_type::Symbol - function Gauss_Legendre_3(dim_NLP_steps, dim_NLP_x, dim_NLP_u, dim_NLP_v, dim_u_cons, dim_x_cons, dim_xu_cons, dim_boundary_cons, dim_v_cons, constant_control) + function Gauss_Legendre_3(dim_NLP_steps, dim_NLP_x, dim_NLP_u, dim_NLP_v, dim_u_cons, dim_x_cons, dim_xu_cons, dim_boundary_cons, dim_v_cons, control_type) stage = 3 @@ -108,7 +108,7 @@ struct Gauss_Legendre_3 <: GenericIRK (5/36 + sqrt(15) / 30) (2/9 + sqrt(15) / 15) (5.0/36.0)], [5.0/18.0, 4.0/9.0, 5.0/18.0], [0.5 - 0.1*sqrt(15), 0.5, 0.5 + 0.1*sqrt(15)], - step_variables_block, state_stage_eqs_block, step_pathcons_block, constant_control + step_variables_block, state_stage_eqs_block, step_pathcons_block, control_type ) return disc, dim_NLP_variables, dim_NLP_constraints @@ -184,7 +184,7 @@ function get_OCP_control_at_time_step(xu, docp::DOCP{ <: GenericIRK, <: ScalVect return @view xu[(offset + 1):(offset + docp.dim_NLP_u)] end function get_OCP_control_at_time_stage(xu, docp::DOCP{ <: GenericIRK, <: ScalVect, ScalVariable, <: ScalVect}, i, cj) - if (docp.discretization.stage == 1) || (docp.discretization._constant_control) + if (docp.discretization.stage == 1) || (docp.discretization._control_type == :constant) # constant interpolation on step return get_OCP_control_at_time_step(xu, docp, i) else @@ -195,7 +195,7 @@ function get_OCP_control_at_time_stage(xu, docp::DOCP{ <: GenericIRK, <: ScalVec end end function get_OCP_control_at_time_stage(xu, docp::DOCP{ <: GenericIRK, <: ScalVect, VectVariable, <: ScalVect}, i, cj) - if (docp.discretization.stage == 1) || (docp.discretization._constant_control) + if (docp.discretization.stage == 1) || (docp.discretization._control_type == :constant) # constant interpolation on step return get_OCP_control_at_time_step(xu, docp, i) else @@ -352,12 +352,10 @@ Build sparsity pattern for Jacobian of constraints """ function DOCP_Jacobian_pattern(docp::DOCP{ <: GenericIRK}) - if !docp.discretization._constant_control + if docp.discretization._control_type != :constant error("Manual Jacobian sparsity pattern not supported for IRK scheme with piecewise linear control") end - BUG, recheck indices... - # vector format for sparse matrix Is = Vector{Int}(undef, 0) Js = Vector{Int}(undef, 0) @@ -371,27 +369,25 @@ function DOCP_Jacobian_pattern(docp::DOCP{ <: GenericIRK}) # 1. main loop over steps for i = 1:docp.dim_NLP_steps - # constraints block and offset: state equation, path constraints c_block = docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block c_offset = (i-1)*c_block # variables block and offset: x_i (l_i) u_i k_i x_i+1 (l_i+1) - var_block = docp.discretization._step_variables_block + docp.dim_NLP_x var_offset = (i-1)*docp.discretization._step_variables_block xi_start = var_offset + 1 xi_end = var_offset + docp.dim_OCP_x ui_start = var_offset + docp.dim_NLP_x + 1 ui_end = var_offset + docp.dim_NLP_x + docp.dim_NLP_u ki_start = var_offset + docp.dim_NLP_x + docp.dim_NLP_u + 1 - ki_end = var_offset + (s+1)*docp.dim_NLP_x + docp.dim_NLP_u + ki_end = var_offset + docp.discretization._step_variables_block xip1_end = var_offset + docp.discretization._step_variables_block + docp.dim_OCP_x li = var_offset + docp.dim_NLP_x - lip1 = var_offset + var_block + lip1 = var_offset + docp.discretization._step_variables_block + docp.dim_NLP_x - # 1.1 state eq x_i+1 = x_i + h_i sum bj k_ij - # depends on x_i, k_ij, x_i+1, and v (h_i in variable times case !) + # 1.1 state eq 0 = x_i+1 - (x_i + h_i sum bj k_ij) + # depends on x_i, k_ij, x_i+1, and v for h_i in variable times case ! add_nonzero_block!(Is, Js, c_offset+1, c_offset+docp.dim_OCP_x, xi_start, xi_end) - # (skip l_i, u_i) should skip k_i[n+1] also... + # skip l_i, u_i (should skip k_i[n+1] also but annoying...) add_nonzero_block!(Is, Js, c_offset+1, c_offset+docp.dim_OCP_x, ki_start, xip1_end) add_nonzero_block!(Is, Js, c_offset+1, c_offset+docp.dim_OCP_x, v_start, v_end) # 1.2 lagrange part l_i+1 = l_i + h_i (sum bj k_ij)[n+1] @@ -403,13 +399,13 @@ function DOCP_Jacobian_pattern(docp::DOCP{ <: GenericIRK}) add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x, kij_l) end add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x, c_offset+docp.dim_NLP_x, v_start, v_end) - end + end - # 1.3 stage equations k_ij = f(t_ij, x_ij, u_ij, v) + # 1.3 stage equations k_ij = f(t_ij, x_ij, u_ij, v) [and lagrange cost] # with x_ij depending on x_i and all k_ij and u_ij == u_i # ie this part depends on x_i, u_i, k_i (skip l_i) and v add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x+1, c_offset+(s+1)*docp.dim_NLP_x, xi_start, xi_end) - add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x+1, c_offset+(s+1)*docp.dim_NLP_x, ui_start, ki_end) + add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x+1, c_offset+(s+1)*docp.dim_NLP_x, ui_start, ki_end) add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x+1, c_offset+(s+1)*docp.dim_NLP_x, v_start, v_end) # 1.4 path constraint g(t_i, x_i, u_i, v) (skip l_i) @@ -419,7 +415,7 @@ function DOCP_Jacobian_pattern(docp::DOCP{ <: GenericIRK}) end # 2. final path constraints (xf, uf, v) - c_offset = docp.dim_NLP_steps*(docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block) + c_offset = docp.dim_NLP_steps * (docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block) c_block = docp.discretization._step_pathcons_block var_offset = docp.dim_NLP_steps*docp.discretization._step_variables_block xf_start = var_offset + 1 @@ -446,7 +442,7 @@ function DOCP_Jacobian_pattern(docp::DOCP{ <: GenericIRK}) # build and return sparse matrix nnzj = length(Is) Vs = ones(Bool, nnzj) - return sparse(Is, Js, Vs) + return sparse(Is, Js, Vs, docp.dim_NLP_constraints, docp.dim_NLP_variables) end @@ -457,7 +453,7 @@ Build sparsity pattern for Hessian of Lagrangian """ function DOCP_Hessian_pattern(docp::DOCP{ <: GenericIRK}) - if !docp.discretization._constant_control + if docp.discretization._control_type != :constant error("Manual Hessian sparsity pattern not supported for IRK scheme with piecewise linear control") end @@ -535,6 +531,6 @@ function DOCP_Hessian_pattern(docp::DOCP{ <: GenericIRK}) # build and return sparse matrix nnzj = length(Is) Vs = ones(Bool, nnzj) - return sparse(Is, Js, Vs) + return sparse(Is, Js, Vs, docp.dim_NLP_variables, docp.dim_NLP_variables) end diff --git a/src/disc/trapeze.jl b/src/disc/trapeze.jl index 1a9188fe..3c00f961 100644 --- a/src/disc/trapeze.jl +++ b/src/disc/trapeze.jl @@ -210,22 +210,23 @@ function DOCP_Jacobian_pattern(docp::DOCP{Trapeze}) uip1_start = var_offset + docp.dim_NLP_x*2 + docp.dim_NLP_u + 1 uip1_end = var_offset + docp.dim_NLP_x*2 + docp.dim_NLP_u*2 - # state eq 0 = xip1 - (xi + hi/2 (fi + fip1)) - # wrt x_i, u_i, x_i+1, u_i+1 (skip l_i, l_i+1) + # 1.1 state eq 0 = x_i+1 - (x_i + h_i/2 (fi + fip1)) + # depends on x_i, u_i, x_i+1, u_i+1 (skip l_i, l_i+1) + # and v for h_i and fi fip1 add_nonzero_block!(Is, Js, c_offset+1, c_offset+docp.dim_OCP_x, xi_start, xi_end) add_nonzero_block!(Is, Js, c_offset+1, c_offset+docp.dim_OCP_x, ui_start, xip1_end) add_nonzero_block!(Is, Js, c_offset+1, c_offset+docp.dim_OCP_x, uip1_start, uip1_end) - # 1.4 lagrange part 0 = lip1 - (li + hi/2 (lcosti + lcostip1)) + # 1.2 lagrange part 0 = lip1 - (li + hi/2 (lcosti + lcostip1)) # wrt x_i, l_i, u_i, x_i+1, l_i+1, u_i+1 if docp.is_lagrange add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x, c_offset+docp.dim_NLP_x, var_offset+1, var_offset+var_block) end - # 1.5 path constraint wrt x_i, u_i (skip l_i) + # 1.3 path constraint wrt x_i, u_i (skip l_i) add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x+1, c_offset+c_block, xi_start, xi_end) add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x+1, c_offset+c_block, ui_start, ui_end) - # 1.6 whole block wrt v + # 1.6 whole block wrt v (+++ resplit for clarity) add_nonzero_block!(Is, Js, c_offset+1, c_offset+c_block, v_start, v_end) end @@ -257,7 +258,7 @@ function DOCP_Jacobian_pattern(docp::DOCP{Trapeze}) # build and return sparse matrix nnzj = length(Is) Vs = ones(Bool, nnzj) - return sparse(Is, Js, Vs) + return sparse(Is, Js, Vs, docp.dim_NLP_constraints, docp.dim_NLP_variables) end @@ -328,7 +329,7 @@ function DOCP_Hessian_pattern(docp::DOCP{Trapeze}) # build and return sparse matrix nnzj = length(Is) Vs = ones(Bool, nnzj) - return sparse(Is, Js, Vs) + return sparse(Is, Js, Vs, docp.dim_NLP_variables, docp.dim_NLP_variables) end diff --git a/src/docp.jl b/src/docp.jl index d6a0660b..ffcaacec 100644 --- a/src/docp.jl +++ b/src/docp.jl @@ -87,7 +87,7 @@ struct DOCP{T <: Discretization, X <: ScalVect, U <: ScalVect, V <: ScalVect, G _type_v::V # constructor - function DOCP(ocp::OptimalControlModel; grid_size=__grid_size(), time_grid=__time_grid(), disc_method=__disc_method(), constant_control=false) + function DOCP(ocp::OptimalControlModel; grid_size=__grid_size(), time_grid=__time_grid(), disc_method=__disc_method(), control_type=__control_type()) # time grid if time_grid == nothing @@ -189,9 +189,9 @@ struct DOCP{T <: Discretization, X <: ScalVect, U <: ScalVect, V <: ScalVect, G elseif disc_method == :gauss_legendre_1 discretization, dim_NLP_variables, dim_NLP_constraints = CTDirect.Gauss_Legendre_1(dim_NLP_steps, dim_NLP_x, dim_NLP_u, dim_NLP_v, dim_u_cons, dim_x_cons, dim_xu_cons, dim_boundary_cons, dim_v_cons) elseif disc_method == :gauss_legendre_2 - discretization, dim_NLP_variables, dim_NLP_constraints = CTDirect.Gauss_Legendre_2(dim_NLP_steps, dim_NLP_x, dim_NLP_u, dim_NLP_v, dim_u_cons, dim_x_cons, dim_xu_cons, dim_boundary_cons, dim_v_cons, constant_control) + discretization, dim_NLP_variables, dim_NLP_constraints = CTDirect.Gauss_Legendre_2(dim_NLP_steps, dim_NLP_x, dim_NLP_u, dim_NLP_v, dim_u_cons, dim_x_cons, dim_xu_cons, dim_boundary_cons, dim_v_cons, control_type) elseif disc_method == :gauss_legendre_3 - discretization, dim_NLP_variables, dim_NLP_constraints = CTDirect.Gauss_Legendre_3(dim_NLP_steps, dim_NLP_x, dim_NLP_u, dim_NLP_v, dim_u_cons, dim_x_cons, dim_xu_cons, dim_boundary_cons, dim_v_cons, constant_control) + discretization, dim_NLP_variables, dim_NLP_constraints = CTDirect.Gauss_Legendre_3(dim_NLP_steps, dim_NLP_x, dim_NLP_u, dim_NLP_v, dim_u_cons, dim_x_cons, dim_xu_cons, dim_boundary_cons, dim_v_cons, control_type) else error("Unknown discretization method: ", disc_method, "\nValid options are disc_method={:trapeze, :midpoint, :gauss_legendre_1, :gauss_legendre_2, :gauss_legendre_3}\n", typeof(disc_method)) end diff --git a/src/solve.jl b/src/solve.jl index 7911b286..6722fa83 100644 --- a/src/solve.jl +++ b/src/solve.jl @@ -25,13 +25,13 @@ function direct_transcription( grid_size = __grid_size(), time_grid = __time_grid(), disc_method = __disc_method(), - constant_control = true, + control_type = __control_type(), adnlp_backend = __adnlp_backend(), show_time = false ) # build DOCP - docp = DOCP(ocp; grid_size=grid_size, time_grid=time_grid, disc_method=disc_method, constant_control = constant_control) + docp = DOCP(ocp; grid_size=grid_size, time_grid=time_grid, disc_method=disc_method, control_type = control_type) # set bounds in DOCP variables_bounds!(docp) @@ -103,7 +103,7 @@ function direct_solve( grid_size::Int = CTDirect.__grid_size(), time_grid = CTDirect.__time_grid(), disc_method = __disc_method(), - constant_control = true, + control_type = __control_type(), adnlp_backend = __adnlp_backend(), kwargs..., ) @@ -117,7 +117,7 @@ function direct_solve( grid_size = grid_size, time_grid = time_grid, disc_method = disc_method, - constant_control = constant_control, + control_type = control_type, adnlp_backend = adnlp_backend ) diff --git a/test/docs/AD_backend.md b/test/docs/AD_backend.md index 97f423ed..3f017a05 100644 --- a/test/docs/AD_backend.md +++ b/test/docs/AD_backend.md @@ -33,7 +33,9 @@ Takeaways: - the `:optimized` backend (with reverse mode for Hessian) is much better than full forward mode. - manual sparse pattern seems to give even better performance for larger problems. This is likely due to the increasing cost of computing the Hessian sparsity in terms of allocations and time. This observation is consistent with the comparison with Jump that seems to use a different, less sparse but faster method for the Hessian. -Standard benchmark: +![benchmark](AD_backend.png) + +Standard benchmark for Trapeze: | Trapeze | default | optimized | manual* | manual** | |---------|---------|-----------|---------|----------| | 250 | 49.7 | 0.9 | 1.5 | 1.4 | @@ -44,7 +46,7 @@ Standard benchmark: | 7500 | | 225.4 | 85.9 | 66.3 | | 10000 | | 526.3 | 102.4 | 90.4 | -* build sparse matrices from dense boolean matrices +* (older version) build sparse matrices from dense boolean matrices ** build sparse matrices from (i,j,v) vectors Sparsity details: goddard_all Trapeze (1000 and 10000 steps) @@ -79,26 +81,21 @@ ghjvprod backend ADNLPModels.ForwardDiffADGHjvprod: 4.339e-6 seconds. *** building the hessian is one third of the total solve time... -Standard benchmark: +Standard benchmark for Gauss Legendre 2: | GL2 | optimized | manual | -|---------|----------|--------| -| 250 | | | -| 500 | | | -| 1000 | | | -| 2500 | | | -| 5000 | | | - +|---------|-----------|--------| +| 250 | 3.9 | 5.0 | +| 500 | 10.5 | 12.9 | +| 1000 | 121.2 | 26.1 | +| 2500 | 136.6 | 77.2 | +| 5000 | 551.9 | 172.2 | ## Remarks: - it is better to build the sparse matrices from the index vectors format rather than a dense boolean matrix. For larger problems it may not be possible to even allocate the boolean matrix (eg. algal bacterial with GL2 at 5000 steps). ## Todo: -- vector format for IRK -- improve Hessian for IRK (reduce excess nonzeros with finer block granularity) -- redo tests on algal_bacterial problem, including Jump -- check the relevance of computing the nnz beforehand and allocate the full index vectors directly instead of using push! - manual pattern structure for midpoint -- try to disable some unused (?) parts such as hprod ? (according to show_time info the impact may be small) +- check the relevance of computing the nnz beforehand and allocate the full index vectors directly instead of using push! - reuse ADNLPModels functions to get block sparsity patterns then rebuild full patterns ? eg for dynamics and path constraints - +- try to disable some unused (?) parts such as hprod ? (according to show_time info the impact may be small) diff --git a/test/docs/AD_backend.png b/test/docs/AD_backend.png new file mode 100644 index 0000000000000000000000000000000000000000..422791d94e3ec13b56cd17a679a4f5ecc648903c GIT binary patch literal 39014 zcmb4rhd-5l-2YLMvXdPRBV}gKj6x_QWM>sJN@mF@6e`?N$OsXVjErO^DU$5HlWf_e z$nSl6p63tvo%?k!C)c^I@A!P)>w7|VwA85fvhF1i2vq84PM;?bb{G)|B>$1`z)!Xe zL!0m~GHXq>(}Zo}U&$32Q3S#vg8FGC{Tm5019oQgJ-vkaqg4MzFtdS0_?*6>y7rf?^6-WO&`*X`>E-_wmf9gJE z>1~?7k@ZiX+Np|Y+iImg;@ZP}&x*OzpoN>Afd9CszE3O2yrtunp<#~M)7aQM>-B}1qJ_sQje*r zsQ?Qxditm#|LuyLoVlI?JAS&7va%?ycmCUc4ixS(<|-;G78XO*)mFa-D_*{Q8Rk{t z)FVQ;@q78D5si+{nKL0VF@}DAe(|&UzSj3oe6}9?u{hQ+arf<8$=`n^uY8R?K}oig zAay{?#3b%5@#Tf<;N|S>+{dF&>9{k3L*Dz(>g%Yewj#%_v==XGOLHZ!4W4}__bxN@ zjGms`Wb2E2_wJD@rDbF&+?cnV?tFdX#0k@4Czcb|LBq%C=;&5fRysR7-@JL#t}HJv zU)|;VXH_Nl!TtNn%E}xHem=wR?=bLP*xVRT3UFj$W9#ef&C1LSSz+FF^VThkN}ryO zA5ZJ)6*PMGQjeVOt_-?+_eEM-cVAzX@1M@5re6^Xe}3tc68IY7d!P3hbH~JP{>Od(KyG;9+FB&dSva+(;+x5l8#m&tJ@Yd&ugNFM0%hO#= z)zwNE68k@-N^-ux#ZH{iy?Al(+qZAc&3X00BBG*06}NC3oB{%`U%U|aS$CoHXN}_8 zz*>rpW#);x;^^4duOf`8KXT-GdAXUX>8<{f>#VG-%D060_^{t1qoQuDOc}Vliv?Ea z<>#M}l)UEZn4h1I)%Gegv$?&!@XZ?s*9-;*1{w~zI?Qaxt1isocWS_H?~j~|pAayOPITF!Cknwy)ux{45DV`IsAIHrtR+uC&RY;JC5 zX7Uo6J30*X^iJ(^aB#pn5)~CS=8wL6x0Xg>^U$*0K_;fp<4Xbp0@4SxEG;c%Sx{>ov!J?ATKokF1Q0jAk_v5fNF>8D0SaXV==?BYb>i*q&@`@hPW*2dn(I zdy5>ga84cc#eOX*DY2;X+q-vf*r@;J`gK*+dnqX+kB{j!XE;g7a&d792!yGouKXJE z$F1vYG3cK^zc|w)x?Ch8EX*wK@c7|FO2TOsl@!k3^CLSw78@HIpMK4WVdC1fXQbCL zH-DX+oSc>QWyIxTh4(JP=;sKg4WF#;KWo2=ii+<3wY*2o{(ED|E`^G0*RILwX$r>Fw#m=o?5C0>ayYYdbJZ;V zUTI73=;%lpmANsW+&r5b7`HGxa{M%$Z95yPb!T8aC<7*LYgp#BEzzG!Mn zK}b(eCm|vE^5sirH!6Ew`V_gQhDNi^C+ua4zazTXUc16>czZwBzSdu&v=yX`Dsp#f zu)Vz-(^Y{ez~+QBnJU+`qLtLvovr zQ&mTkwTmw6)vJatUsP39&nxiF&d#=|b>*1TXVr;`ifXE>)AjF)`0VE8rFJFad2;f9 zPo8kM_uf8_1z`|8B#iH{Pg{5V3IXj!bT14QeawHcXZ!ZDO$BjZ( zIaJ5gw6C>Qgp2D(TZZ=9uOT&s?T7=?$_%$Q7GJ!6J$sje=|fG89(IAUvhCMck}%fG zLHF)GC)F(x6K}qQ}(w=fU^Yv?am*Mvr z+DU$>dLuQ#TfbL!-VWSNbG{Zs85S0Xg@h`Nu7h#QPeD`ijSUl>;Pv)C9ZyFmLqgY8wzHLVR%*(^ZAbI~Wl%AcPeK!q<&!5#< zR2Ui>8pa3Qe0-)lI*PB$(6v+^Fakx$3JMCIIMLG7)b#NqDM3z7E>nb;&hyux`NfNW zGm`xFl9Q8DP@FRKOgSAJ9gVS9APcQ@{bzC9HkM*|xf8Nn*J=4!DvH?Whi zb7vsq-?jMV#!z8FL27~#{{?4f0h88CN3d5+Oia*pDzA@{F>C{&X{NSdxoahzWT&TJ znj0v?&Jbm$M2lHmbahxUcy%Equ+)9BHJIk0wT%rvdun20ePL8Bk}EJEpjjx72!Khn{zde%FEN6T3TAdr+|D?3zf_Rn^pW9(LSQQl6W2 zV1lTFPcf9;SU5u9Hz^D;d1G0FlPPa+;iQRQp*PpA86@7fxirm|6-I(4dAI9kYpn!qG<(P_l#$JiJ))a1m3j;78Lr^P9uSaMEI&Xx)j zw=3$+tFwJ1!>E=46uYTdNeLG(UJRf>(N!aK7ueAhk;7rCqfETyBPY-Fa4c z*viUEY?##OKfixtoQQ?qz&<5tYkuTZ^<5y#fCz2_ef@isEUnGW|Ha2Yi0*&qwws8= zd5z4)Zml|{$Y?GP3m!T0{ehLFg#TZk>fV#s`pXa3fcWg|u|Vv_da+`6b+vYMY%P2~ zxU{&4HM%P%*YvGc9!njTDaj#GQThsJd;1bB_XoB6oE{8uVUN3iqGb)^DLnQi{ak!z z7yA6EVCr22NlD3SVbw6!jvu7~z--yCU%R=wW=3f%^}JH|n(NOmD6kP`Mm15Z@~G@Y zwR-;ixtO>(lYj|Vi4VFd!DZ>^xt73`1=90lS3hOcXd@( zaOZtxm`ir&7lrrFeDI5uxGUNDIflo2F9I`z8eo*OIyJPlueE3Drlh3c!w!ntJ`Cx@ zZojjgUK1^T^$|+Nbqp%NHHvH@mx#zBko1`|XVy12+|13{4c?@skya>1c%hS3m6U~q zge;9Wi9Y>`#hUzppPPq=i~w>JD0oTvR{fVR`^@aIR|_vM%M0=IpQe(GjErPG$R1UH zK-lv3@Jeq-#|mmZD8V~WF<`n+4_Ry+9r>)wKJsV}tW0;c8eF`bgrN(c1_P#X8YFAdX!hbJU=xK z8ueE>EG;`b+g{8>Lt}4J6VoP35Qj-2Wl=TbCMT0*z@cs@iI90`1_y?_l|qlqmABJM1UesgbcZx4@^+ax<#EuMU7 zXdpQxESz^eUJ}SS4Sfj4;Br{rzYUK0}qh)z#J4uV1g)T8%F+-~1HD z1`;<=LY4L3Pc3r~5An^l-G08lTfj^MH_UOJsTuK`rzc$MwyAgU|7N-BW7`~YJmu(K z;2nC0BSEXYg)+xa?R}_H4x|de1;no&#e;H|x`>^)XV0Dhbvi;&Zyx#mh=}o}4PYY~ z85s+?=Rd{PEc#!YN~);T0x9+OnoJF%JZj~o>biNaOzk27gAX0(=)$^G+fz0sD}h-~ zS#Me6K7Rc8;loOq!dX0uxh)pqyfHf{kACnxEG$fGu#^B4y^sNPBJkFTp?U_x#?_HdiXXHl!7K$=;sV+x^}F1;0&{z;@GR;ev&z{7=oJAK7I+ zAF_%cmy$A?C)+XGGBo7>A!v_9|2L)-hrACTK49}x2ws!Z=^U;O{54c%VQTtks^b-w zI%aYKKOtuw=7%Ii&+}xn@3HRE%#zuBw%lBSEJ>0`GA3RAs4)#~v^3vhdOFwH> z(3440$8__?jr~ka(-RZq;{Dh5dA~oxD{c)!H%h5u*cAFo6Qr7Q=hc}E#NGfLHhAJwyi0G{R1_?p(YXACo zL6egnSiW5y9Rh-as$SzmFRu+%Le0P&wvXC}DmL}-oQvbS(ecwoYU0?72_ydKs;cb| zcPS2@^pU#${q)pAjiJ)G^x?xPkWE~qr}(UGZFw|~njFT;0TVe&{(Wu8|60z&=CEID zXzQx7CK&myoClRc03^@%pGp*p5A=RWF8_QYz~#u}^48W?41GggofC?X8E58>1DC%r z>R-4phk4)ryCudt+nsOoyxqgR(#Lo{np;|W13UxVHuLUXbUB9$$Bi5#A3x^0AxlmG zsoc4n`mDP8hILR~v2&lCynI1H0mP5C*4E|^yC@iinI8h@prunh!D6>Cbq+i(6~Qjc zv%a#rveMYp#KhmZzAyX53zgKCpq$s|_Vt=LI80%v#>U3j4;*OT*B)X6;2y>%<2(Bz zsn)vjsc7)$(LLf7S8OJ(%%dETGC>3>&(5B4l&rNB7Q9$q0+`g?tfQu;woLl#)$&Y_ z8&(nsOy*qy<;WkvwIuIrYsVZVLk^tu`Co=V{w>jQ^NOYAAqUaTEdX}hKAIRA6>E^m zxE}+qbnaZ^`}gkwML#8-_qWW+$-xkzBn!xCDbi@6JylBF(;`z~eIj@8U=sM>WLt*h z#=IK{d|X@{bTmN30FR$wCsz4JTeep{tEIa*{zVBs?Jmkt)sfy`_=A@Vv#oxVRV$3eaMx$T6n0)ceK_s93j%Aaq*SUMwyqCI)l^ zV#4U?Xkua_wizS?j*~v!U~D|5)9>EB%QwnKs}4UPEg~h={q^hDx;lGIHQwfsai545 zFeRhm$j-`&OH5>EVDLbp*UQjKv0XS4hXN87*4Pjugo;}X=?R;CHyK%JSs4J}-ix2@ zzQ$Hx5-~S3`wi>_R){Ot1k*%uNrONN%_*Nd=RDJ$kL?U$BQzpnsMLcm;&UIsRL7JR z+9w_f+as@_z%<|bZhroC&!T4EG;lj*Dwj%WbL>m=@*CF|z7s#S#C7aGq@|RU z!%0or=gw7lOkaTP31z6tszB&sx#DY+ZYNK5^?D5ENxw~Ug3YIg@4Q#Cbgp^nx=(k` z0#4z!!o$PaB%Jb!iaI(wzcn;~|CYLreStcG5v3q*Bao-fr6z@7YW7l48LU$`se~Ut zE@4GM`${gxERSMa*j>7meBudE{5;iRj~n}@OYUqjIRxB*2J&-nvd}7|yn59F?L$xt zYwl6W5@iGeU!zjk4BDiG=Zuj9C)7%+r`j8v8wF3FJ{=Cnm2T&h#6!XKMH= zd7{?#Sd2xGYP2!tQin~GEHK|^!?n87y>ZK89h%5VpY*oLu+OS!JBnVV0cHO^dW4os zci${ih|~Kok%v{^#jq<=rDHwo1poe2pq+fr5P0K7bTU5fe#)Gqaw?tt%|%iI;JlW= zJGb%hVK-f4;~xLN8@mVmu*FkE+UrEZw9TXNt9Kb08S$eqL#4PGwKI`FKxBhV0ygJs zXq0#ABoE$ibbhCvYf*Jo&Z*+z!>`X(9=s_iV2jzdR!}~Sg;QYLSPFr7wAww6%{aYY z3M`BDZE|N@8+~~s6fpqRJsq*2#d?9K7B)6PVE0Iv&c<;zG@p?>RdMU&D09ET8k zqPX~zT5BpciEK0rUS_~MF~Z)vheF<78{#>7l%Dhs*=*Y0_RdZ`>G5O7_H0cxH8hO6 zNV&SY>c5hv4yK39^X!>cWLk7|H0Ij!4EVyXUAyi>IS$(g{TRBO_v*~Cb;a1GaWz%d zO8@PP*j1gK7u3{tvlm52Gj=GZLh}I=Mfo^Doq?&ql`NbRjDfR^jYG8t;w7)`+qW+0&_!&B90rZi%nJRvp%GIF#^lv+kPQNnxMf~BP;hW-0V{SzPK z?bD};TJ@_}5iFvr#>Vd;5EK_j%WO>iGS<+@x_&DP^#HsX&s!}@AmG`#)FE9;bR7Up z%?4jsxKFrE#(7p#Gb%b-0vbbJ9<4)0RAOS|_)=9$k}7mt%m_v(_H6pIXM@ctr{W!3 zBcD70p}*O3iiXBUFgS52jS@~qqd$8lxOU!t_UvNR&S=l8Qgqg^a?Dw;@(04l@woDT z|F0E<9~;Hoqd!bEdR~m24*CCh+hn(95a8yOD_87e_0>POdWhSWIVYY+3}-) z=TK@O_XpFrS8G3ic6(XZ8M0#tcJSb(y`C({e6HKIT#Xyz$u3by7`T?T%IA zZoO8L5OyV6o6q5|U%q_gekgG_tHpiQRCO8JM>D-e4Lv=^t-6JAu^)n(4Bjcw4De4X z3TH1(Y=-8cE`xr-_==<^jUs4kH=$~(tF<_+b4{}NUVl=n%viDH?{*@)(LFSmK2htS znEiigsqeWIAttsSE=>Q-qHXNnNuHP@C#c<;Z)r4sXPW#(woT?y)^hsI3#I}oN}q4= zhFP)iB0j?jGtJV0s^IJX;x_8L1FRCMh(9EG>~{Q`AGwk3$Z_IL(SHb~t&zF?V4*Uc z%%*5eoSU8eo`*6f)#%pR%j4s<86h{f?9Ry0Qcfpe6iir@Xi1eqjA9$cpcdzUYTJ_# zy83KrNJUpesr3$SCy3t4vYsNJ@-P)pC;etkzvWYGecw$oq&m=+PrO!hQw@Zf@># z5W0xc90-1nMb$+!Gao2$w{PG6@4x@BaWyp?fK|TK)e+urER6%IZll>kp5MKD_v3RU z$tX%QJq7jk^$>R*JF_1}MWH*RKG64%ey@E%GK}8={2y_B2O$uUgmEvwj7({HxewHk z34!>=)#c^0XU_sz{fZNJ1nVRW<7$CqRC_5Yp^H9~x_*SU^lMvNL0a0Se|iE8w#`i+ z#s`-j9O4oZ4zRL1I63)@ePIB}L~Uf+zdzxmF9fk`J#SPD4VehAdY(Lf?C$PPeek52 zv9bEDg6pHIR8(Q$bO6H8+p62!m!U&Wb-rdJw0Cr{EBMJlc5h8n4_Ii5mqZPp#)Q1y zsN%k&^t6fD{kQ$VK`!DFvR!YnFLPO)=|SgnxN;?oU6zpmFJh&JM&WCM^wL;E%mYRN z<;$B}Td35p)6&GQfB%SCO65Bl-w2Rrf91-{2M;u1$^{1QRF6bK4E)P?=+H1o7GW5$ z@#jwor|vx6R8^vp3%sv{~Q_q{)zCJd>!?3UyFJ83FO}PICih~nKMnDAuo{{(ey9uxZil+c%C}3LL-P6-z zUx-zgN}>taN55|$!4e~FPeGW+xT=?Fqm=_Bp!`=gF7;1FcqRYl0{#WNy!U>B=Uo3ya0%Qtf$*SK#&2ipU)ul60&JEv zvm2a09~m0D$9rpYV+l6(fB(@kJ`e}JgKUSj|4-w_QXJ^-Cn%bjc=r`MKVT9nE+{B- zpNz!JjRrn%n}o$e^Psl1s|zw&)0Z#s+n@yA4+v1!)qQmD9+|A{04}^+#L8*qrArT*T^?>Z1>3Xko zrEBcTw*Sf*d(U#5#gO9KKEJdT^HS?0`j74f(=(IyF#fS_Rea1(PS{CCsVH(C*)V zn%xAhK5&^5Ee{V5uD~mNoSBf!k>DAA+DO;pruDQ1D^5tT6taTK{i6Y#xips7P-J3Vxy?Ghy!NEJ07rc#03hlg8$zG930_0C@3gWzkL4m$;8z3ZNgRA z``QNYQhIXD{xtpTXczNt3X2A-vBngd82$qi4r{wqJLzO* zH$+K(etz&9P!1R=gk<~_YvULGhSdWTjD?acu$p#(OZe$onZ$1+E|61Tp8Q#v4vtpv zUatG}=_iUKJIrcG0PvC?)Oy2ceDh`zmJ}cXC5a+!!!;oE-gV* z-WBtZP4e5&P*Tmv53~otr1*H#3l~;v8#_BXh=%*Sg11-Nc-(33gA(=kn{h;_2_TX1 z?$#P&Lsvz}PN2xp$uO7+u0j0vIhkZ~-ps5YlFihVLyE`1zyJ**6^y#Y&$>UXiCzUH1tO0jDY$} zFwn?5&!xY3k$BSAWPa{l;E_Aa4=YPPxE`KUim*Ck#j}wZ~{cdxXRC$V%LS&CYK$0IZT)IMhWij)y7R%6bsr409=_n1@*;lyU|{EVy-!D z{7~S)^&>OFwrG`Px0F1B8hvF@5F|oU-n=<~@#3>;OLRE+)b}rrcDIPrQjOc>mTc$I zttEdJCLAJ(EW$#zT4As8@Ni6lfAdG734 zq*ne*Omx0>tqKlLAyGA=KdObsvO3vjhq8llXrwCLf?)+-v~WX?iHWJuLJ6}4nHbMZ zMmU*Ng{q5dCCPZ)Z?uJcF!=ucFid>d8pOcFy;HKN8SrRfGU{5~Ak@{@FDxw;_`hO+%*W~SV#<{pN1PwhO$7kP!}T|Otb-wyfpVP8Q3g+~Q% zKSt0T$QI_Tl-;+4fcnv!w??ZlyQ zj%dIx5Z43=%vZqjlG0K^qr72=qV`6*hdI^4eGm$oPFnEf$xR==(n&!qhlJbOA2xLc zBmD~Z$T;g83x2Z8C2lZ5K?zaORBw3&*q>+}nN060Dts^edqHxAzx~SmDE5 zT$fOAzEeDG$~pGiO}?=4;{@fs-|12AEImSu_*G7iC}JY8w6qkzWnSiKY-A)RCYDT1 z1BJ=t&80mAI5ZCRk=)P*=T}!b;8UP2l32ocgsKC=q_8=^lhyqeQeH6kkrR+5VDXGY z6NMB*P^3j9;%GCM#(uHBN5hd8ejAsO?@;)=o|=(yRO1f8W$U+xSm`91F;aYcDJalN zunc@r?vcURL5OLna2W}JvKJT_h%}zJw@jR8)(O~8Q1qc|09!Z@mLpf_ue$rl84HV~ zxXCpqpGr7|WtFJC^5iRtRMMbQy7FAXB=v=(LtDx8~}6L{#chY%M0 z_qkV;;h#y#$YH-Oy8HJ&MbdFo>>xY|TKD;zW$+3-7$_P_(7KNw;BR<1H96&1WLOm2 zg-#-OA|kR+zKIkSY+shcDx3|dA#g)*eRTA@ckebt3!ECyg1viP?<{q2AX!1WPHH{U z9Xh!Xg3~G)!5J2+fQJl zBe)ZL))r<4MDI+kIsNm;`1p|MgbH@Sz#tI|akR%Co>e}W2-Zo81v9ilenybQ|vVUCc>Q*7h@3(*>~t^Lg?Ch}v)UV?)x$FUaw0mH%A*lSgTBc-Sx z4T4DQZ#VAC8rJzt*785aXPPDDUwtYQ3ePCSB3GT$@{A_S?UcXn`sRaETI?k%A39o+ z1-fQ_QAToAO>I6{jMANHJdgDQ^mJZNk3`YbG`_U&)$7-2-_VG6v|%iz&UASZy^ zj(!S5r4cVR-b6G4-XGz+;$xG`8vxn}PIcuqRm+beF%0uQ0~I=R+POiUc~ThH0( z0qPSkP?KF2lg+*}+{@P8qEcCN%3@~KM<4ipT}x8naW@9oV0N_Dec1= zskJVe+hA%KkoREUO>EN>6ZY+9F5f?ff(Zb2?z|0AAG`v4@&q%KliJ$j^z3KRD5VToA|wvk7Kxb?>nXaH=?%;mS%s4r7PD}$zs zb%BI2j34B|V`@hFtNfD=g@6JfQ-IfV2Ni8?ZHah@yJ)@spp>5kbP(x0Ku@n{Vsd{F zv1Fa9G|=jo^OulRD7*_^bWoEf3l_(tG_NmMumtICnv)|_0jn= z=UZQBqE%8+QK3=nres$1_+fbd{Bys#0eli9k=GvI`q%`MyiTsZiSMv(DL8b`;{qFJ zD2GU4*iNp6ukSp91yxXcmY0_iy8&Z*jts72C%@Q9k))<1=vV(8K83K~05Ca{dC80D z?4WSQh`z%|!mj&jTM*yp+Qs5tmX)PpZ+Tg4LL#OCozCfv9Qez3+naJ1A#=DTT^*|2 z0wjCBTb=ea26uS4O8_*GlU&)}iU<+5;}0J`jIizV&uvDs2i^OGCOdPQbE#=)N(GE> zG&^4}iPlGUW`AlQf;1lt#sL1+D55hj1 zlmB!OE2^E{Gz3e=j%c zi|?gl*A3)E=kAlgCCmu}7o7Guc_lj~g~XuJFXF=oWx77bM_lSFZR--y>`9df1n<^A z^6^t4i;jDfF5MG6(ELV>O7nYa6M1EHVnHf-1(~<*I6G z7Ta>ECyzL};&8&_0)m<`gU? zfDN+UyOFSh>PL(rO{Bu5*}KB0t**`@<&vC4d|5a-WrIPDQt@zeLmokk@^$9Ih?Rga zt*qiP@C?thK_7|LQ^@oC{@VW$c7x}r%RFo_w3n*gI@7(mC^g5kJ)v=oN<2^wGJIopdJ3vh;vxc9AF zh_{`F01Jx^#TLyJFt!Z2Tu4g@NMM8ClzUcHRTUO?&eL-hcrWt|v|f0`y9sEyz^OTz znX!mAL~?8DI05JQZ>=!JGz{cf?JT;FMh#Yn`8&YQUN!#F^U$*vrs(*l7%L6`cf9vJ z&NEO6%g3%elGYd{5ZBTWU2)OyuqA3k^|4IFAJrsq&VszFIZ+4m^Ea9je&R?2HXUS3 z;O(`Qm4P>xDTu-=b|gDwQ7m4F8}vM9fi>YTPj=-dg@%ScctF8=^$$jF;`$hhKFHpS z+}x_0OSVEnLbq=H`TqSoLNUbJNEJ5F+&nxv8A`V9xsi)m2nr%4U`FSFI}u)yaPIvv zI=Y0TH6+mVdV$w)V4#YaIWtvcV`CGmJZnh%dy*mkGfS?5Dsh{9+_d{-ZXWl*udGhV zd?7q9J*9Kyufx^&hvL0q$CjERGRBOy+2j%VKjSiSz#$+7B7OarVzIglr%(R_gHo@Q z(IN5qVHY7gc*DWY+%pHtf~b89JDU|8JnBqB#5nT`f!_NH3z~Midi` z8JNoX4FykNHSi?w@#D~LcizU{(FrXsFHdL~TSe!{$q8>7N0`;u>S+XlK^d~41UtrAmEM=VBLIn zkrw+ZiszgR2r(4l%)2;AL~PQ~H3?uoC>cB6!w5jf(y`>-_hl_2d*HB{pTDnu?!o0J z?nJLloh*s8OK0$WYH|E^!_!i98btPvoXNA}lU~b?@SeT>~=sU5ccVQDlv{-$AXD4zvEDQ`P=XeiePe7v0%~gP; z4+Tv|N{Vp%)hoW>niCQds$seapto>dz(0@s!e7T{r4U1^_O_>G5_A3qmXMypc^+SY z^Zn0IFo@LbGi;u(U(Z?)Uk42iXvK+L0dclZfkF>F*j(3(qbNx8_ZD9HE;}--q~zRE z^Ke*Yr_3zjJM*ea*$^G+9tG3lGL~$6c1N8E1c_h?{d$#dWnIt%NpJhFuHi=#D)PTu zqS^aMPuMpnqonE8yxl<%&y^?q7$3(F;i8;NzozKDX ztgXMpa;c?zkDXoSG5vFHPB1*o|H)1v|1P~&=O-zbnm?8+A7ykDwYMIOjxV+flixg> zf$*MWBuiTk0xn^ry3Bh`6g z$gl0(pRjDVrG*8^mMzWA!XhGZ2zX^1}fac07>*?e|e9fbb8NN2@xDkjJhNhHB zr7N3%ee@H26iu1E^=0wDbxbe1g+#v~KGq&Z(YfD!C%D8%b3$C$txOxfeOsNMzwvid zy2DM&!h)#ZG(Jg^S@9R(M^kG5G2ag~nEW#jqPR2CWRL-XP_Rej<>bDPj%p`J9p>b8 z8-7oM(<=lbfP%pe(*}b8N#|Z~9M*nR)42coJS32ha5RCT5JW|p1~UY9S7k#nSVt}d z_6&}$X(!68jMVIAV0eNv8sguok%oe9`K`WQX+WSfT}H4^a^aJ1+0K8+GdX&9TR=$7 zDK2R|$vmEfV$YtI*4BMf3WDE#RME2D7CL<&Ji^Os|1pGa!UJ6u8yl`GQEmc)I;NLy zR_SvSpS{acKX6BtH3anALJ95bz zI10J9WE7((?|TKwNzi$4J^4|jvRgwY%C@LLNbje=JR|T_QDI@`TjaV<3zb(?n9i}# zv$A@__WEQK6v_28I$D+L8W{nGg?)GkJO!$QC*_d6RZ_BoI)~DRA;sCXI0b({gox0m za2hbG9&72Kj5{ykA*wJQn2-k!#dGvz8qN>_>s-Ec35lby1JaAA;K=wWUj`K-+GgEn z#xIx|E(*)9f9J|yj`*18Id@h>A^q{ggw_-1&wdVh<8h+w1lg}CwZ%RzIuZNjZG|`4 z_wQ?fGpuyh*6yMzogEwNP(FgKmw?D0QCGNAaEwcRAL-$4SslYQg}@ufjvoi}!wD0@ z?Z=Px-(DSnE&wP2G`aGtY8y@t=rK}4=NEK(^wrgog3P)*>J3vB?f4SR>Xej6&z|ML z&MKjNQ0riCZ*Rq!7;pG6La!=xpiKO_t#G#613u0*RXQQROBz0(%;`M-6E5|YPZrp|nrB|O0rer4r{?fMlFf(lA7@e-sr71X5B`ofHw~ zhr~NRqoD!U#?f^EgY8f{ob&T+;acRKrAx*)_KRgZDv8c%b)+`h9|U5~d@x9-5uK4S z4!nknhH%(RWF0PDawp1#Q5H}*Q7dqpEc!UnQH1A3PlKf(gQq4m+ZTRfxQ5!SaC7ko zoE8#A2rhbh^a6JIMgbndDEGD^Pa&B}xlc61bp{B7tP2gKh0_?a+SCT7e*cP3M?-@n zj+n!3^zih2Chf*^d}d^XcrqMYb`4@Y&N|wwAn)z~u3Mpp<6T}A=8X*rUEDJwYBfAp zhfZX>{SgRHYU0`Y?3wp9VE4crB{W6In<%JL>@6HPCsbEmopSf;VdvD@-2*<m^|k z6pRImI*a$U3C<_?Mc&;#aP`}gkE<>DuSWg!XXVTX?|)Z%o1YHB5_eUUbg5~42{caj zmZxEYEAOF|d;lN9b4xO9z zQ=3=scJ9%o*H8;%tLcr8EN#8@@{T6iw(MyYcNd=5?RWJkL1b>?`{W@GEhatG>T3wxS; zP0b8tUN+sh{)@b(H^OmpGudo0k-mW5%v9E&c(x5^SR=!q>mh+ds#I6!{zK+YXU)+K z|IcP7&P72=Ha_pf?s@Dzc)Z?&?xT&Gt!xrLhy8CfV-44|UoSm3%Zm~D-I_$dinB=0 zh5CqJfvu6}FH)~S0y0$HYnDq=G5c~}7n}S`fX!z6sZe$$3(S!d7xt0@Ypw9Yf@Z1o znT8K%iXzY#^CZ4MZGS}EgyZUy%$=V!x-Y*~IB6#1^6cN6`%E?F9`NM!osjuLoD(Fs zDqA*6`1F2jHUFm56#S@eTcJBBf%t3~Tv+fH87y7;U8$5mqLzIc`x zZJXgUJ6A5P)A%)07EY&!-60fNhz-@U?z`t(;Hp)d7(o+b`Zb0qOn4?a)Qwq{zp`Ow_eL=em88ot zEy#F8m}75X5WHeVRqfqZh807fY07%2T z2#_pMkccuOZ0w$%4IHTW{$OuSaYO0c@mT49!&gT2=(M(UZ)vy!aRfXPpnE_p zRk;2=X!rxpvnmCWrF7wdy3a`koPkI945+f*6++k)^*${BwiYG?5Y9h7{~z)BFLt$V zXfM@irpo#@4sVCN%KwU^qr`8uD6rG6z6&l1p|sr;tmFH4YY-&@RQqH`Mg|7)w>?n# z03&bLTBp24!9_OO-P>@Zr&TLC^d_{Fw4GOEuh)hq9e4Gl>V+r9IcaqJ^JvQU4)xuw zDhnGv)WJ35OWf=sMrjw?I`0+Cb-S`qCB%`btF`Tqy7}L!b@F-%L+ThyI?k*A!jLG6Xt|`NKdz|ZUuVvc6R>QA#Ke|?-f?oErp5-Dd<;J! z&t>ETsHW??e$Y98|84NY`<%xq|B$Vm4)h8ba(KlpHX$RN`&{1-}5cJ-7a(4CFn?xMmaqm+Luo5U?{xxkce8Kdh z9&va|{v$lFply?xB-W@>wQwteKl)FWzUzG~|0_xi9)`Az82KHJ-}E?Rn9Z8zUO!&V z%(J6u;=$6NvSE&8qoa&*+RwC(40;X{MTVjXj+!_+)7byapN_V7>HfR^{x{X)kJs(Q z3hc%c|1o;&SmBfE-m1593(e}R^SIb4?uG7(pb<@4yujQxd&_f$pPZPMwfPr+{Pe%^ z$Iu6VWCK)!%@ci5Rz?bLTWxFaY;igK_c*bLS)+(`iPSlICi)ZK{UpM?x^1Ri>DXHF ziy%%IpQ|=^$@M(O_tn-|c zB>JgF)2~JX02-|t^CrSyk;UIe@|$E&n%#0O10w}dG1GIsrSh;jO<>9!ivBme`I=SmLgCHKb2}``7C71z%^S{PtcPz^4-zLXn8rXpY;p zIJav)iQ7AT-ahoWf)Voc=Dc-m^4f8^~vZueiI(8$Pz1ZbF z|G)d=7fagY3oSVj-BL@$9w*g}y2e;$+?@ZBEzvsOqjIBjUDZ{rd4j^_&SlDsxb7-%n(Np*09M#`O6X$gNjbMPVe%k9= z{35-XT8M`Luu0EU@A8EyZX(p`z?w)Ji~5x?zp}|Tl0;*~6lth8&n3Da0Uw#DwXq+) zF5gSpcjD5G#7ooW#%hY5OVGdcn7HDK!dV3o@HoJJ->!d4P=H&RBf=2<0y>>-i)HLO z*At(!{2RhG_HT$IcVTq(K65Dk;MH>pm2eiJkx?=WE27K7yMwx?q2U0!3?0Ae+qbKK zXisDt+)%YJvKl#F^O)!;bjoOPFOD|Ujn**Z%av;W_&<3C9?)CtYzl3lxOf#(9%MeK zjbFxGadrvEJ^+qA!ABuc!SU$EiM#W3Hr9)|Uf*D`gK(qBIWr%plHB>$EsltZh-jQUhp(~eu^r=Qbc9C;0XqEteP|7iQS-vt4E=xC z6%F%`5~~BX&%U2`7i}j-S-eaexA&!h2Zl^jvWlINt0r!_E14B>H%7ZlEdH(Wa0}}V zhL!mSOEd9qhtt26z&`zOQVn60>uP~kufL5~Y-e6nnBW^Zx?yBD&z~e0b#_xWy844x z;OmS@x5@9{UGa?z%fE3uINJkL8Hdu4pmb(tyXTu5(ED7CMVpB#R)9D-4Eys?7fXWd z`jMmI!sL3b&6#IaJS;2wgN|WiZG9ozC?AdkM?_aKfbZKsvOgmqEbQ;NdYrf>E19q+ z8-q@AG?n!A#+?ZQv@jFwLNtniKr_y`6ki>1z_)Fo!Q)Gs5Hl>g`5*JDK%}O^C#IuK zV$c3Xdl_bYGUxuTBpAgYKK(P7lOYMyR)4f=i-_sF3k z_(w(2o>AuEoZ(J}TK_(@vqGTwe)omfofrEYuDzP=SnajC=6LH?uIFa2L(v82S&wVe zgZJ<6nA;n&^J!1ujN9PQ!O!WBxOrV}%MB@PzK!ZD)aZ35&?rKvMOUqM>ZE#fN$>=_M?{|%W+x|TkHs-I$!lKqCiCof zcK7H00IR6zN}DY-2#ptS(o-=_&=(J~vzsH>^yenz;MG=5ApzqzX}}iPO~@ErO_IEJ z5!T4jB%fA<*C6G$gl%kW0GmR(`3t`pR+IVeojMj81ygH%YPRkgb~3btXL~C~=ls3t zLPbj0Iwt;RmEbI}lSH4w_C}M}kS`pJO1L+;hRD;elkQpiS;*8SLT`ZChCzi`?}Vhq z?6^D*ZGz4#pLx~sskRp9VlM$N!8p7K9UbHYUqONYskE#Pz)97^K_`*Jfc@>W(qRbr zZeHr%gZKl?d$yG;ft?O2tEGDuILCwoISO5JpB?_gXCzen;dOQTyy1~c1wEp+U;kfM z?;THd-^Y(1GLAj6vI!yCI~m88P1(DIRI;*VB`YgCS=oCfgb>Qe&L$*#WoCX~=emEt z?;qdmaX;$5?~Bj*ocDT-_j9OdY0Uu;4NyYx(5IqlX3;1A{t?j%+W`}{UusX?tg~mp zh?Iqm0v`zLc!9D=G?*OjlYttJ%%ZmN*)6S`MRV{738x|7hs=)@g&AN8cRrxn5TJm3 zFZ=!@bMeMslRFokuFMFjgz|E|F=25Vo@rV~)`Z)sU5mM(b(NogoBj>7auU=!=A9_; z6#;|g+Z9n}Qnv*yO2F9gH6JfPmz)#@Z7aAk-Oqsmgl2g~MGP~tcy!Gn8c@aGz7?#Z z3mEtsaObInd|nT*+%Xt)FGcM4j0ZRJt`u%G-j&hV7Vk*=CDJ}lt6jb6Pck@u?k^W6 z)0d?gZM!$V!)kEF#MJ1)b9-pClgpD+`30eqnH}vSL0=99yndj+4=^@5evlL>imp6`7iF2#7 zv@E-3IL0_Bq1(6$MJg^a{kO$kfaIffiOO3pXj$H-rk+5ZJS@aX#b++{V#~ClC=KxG zx3@npE-fWwWcb4m!cCm0l742b;BB7u@+ZcrRmrQe2oL8jwH*?%@4e6I$Dh}&im0)^ zRm%k&BDClVhF2rQ!ayJrzX`<^-E|updj6NEbI@?^164kF;QsFF>Z%muGw6e{#;(8} z;C_WJVr^w?Je}ueed$8~8Vi0}C{c^j@ zl}7_>i%c+rk!Sej5i*^Ewx(Y~>TOej5JgaQsDNl%pT>*m0f$;(GL5m|~~K`fR56 z#%-u>b_$_SKD#4d2Lkw|nzgI*K<#Zl-D(6_us^8nm36`?0hIK1+iH<}jtHsF7e=rd zk~w~fp2@l+Qe~Sl`o&eBzGQfja$kehRh%B~v(H;D&$r>%Tj2OBI|Rj->IqZee+SQF zXy;Tbrl?hW>8G)%<6?e^uZXi7()rt7RfoQCTN0!4`&|QF6McdN=OwcKhuJlo=Ipkl}~4K|h^D`dGGjwBe_VUZFk#6%TZS z@rj9_P9iZ1dKYTS8XKN#j;;RplArQAp4rAs;k3VL>{C3ry4G@WvhH_&0@yxwHZ~f< zSIh6cw_sn0rftHX8uUzvdrv+!i*OaVeBKa9v&I}C9K5~dHxX~g7h)0w4@k-f>K1UP z;hG4*WdLae4J!Z|4*(Je1haC&7O!NICn#25f5x;SM`B!H&>wRp(2%bUK3p~GZb?XJ z=p_K?G_K(z+$z`p^4Zvg;^Lp7cZS#L8hT9VTl2{W;--Jj+_CVnS3_|_VAXy`{r0%J z=l4gJV3Oahwt1b@zhQRfpqUN#z8mh1-lu?WrIq1E6dn2gOihwh54^UY*)cdi@H^-C zk?(6ei%%ls8~PSxXt*<68^d+50Bw{8Jt;y&!osJpB-_U|%?d_J_FS7Lm&rw%sV|ij zk9s~e$AKHwIMWPFH}rY{tSx&Q5g(l$6{q@n%=uoiS)R){{{emt|J^Dsf-vb-oR)?@ zeUUKj^06t$j{q`6QDOCUHBW^qm^b^PtfmF+tllL2O7PcCS;Qm$oF!^7`dJY8NDvi) zTlVb44pEReYH#MUR3~Bf2))k^&*?CwNuDR=+3}a1xBjmlzg3qmhJ8W20meu#q3kIQy586YJxhHm5tFnlaQ&hk!qZ59W6Fou5wn|ENeXF=Yget{iVcSJz3<@!NI`zmLnhb4 zR9&~h!Ntc+Mo-naTlAzCu0<;1_SQ$lC>ttW6CLEy@|=IWZ)z}+BDS_x3HqrXUI_m2 zhGZv`@h{QwlHTvKEP9FPRc%>@5yMS(_EsFRm!5OBrSB&%B+#!<>eV$9-9Y{wL!0m; zAv`opK@ieEm|{_1VZ^5%?xw$Aw53Gd#nCH*q&$P&MzKT!vZo%M^E+(-7-)fy_Gg9^NNSzIRWE*WUhExFXS0-qw zwcNWm?*t1xJu5)So)WxqnwW03YC?b8hK8b6e;H*)x{d&b7bb~t!a!!kU0O>dt5+3! zC}JS$GjPN}71+p=`H){?U{BeqYlf=uL;*C|E;-&@LA5=!vzh%Hf z)`i3DYUVf4@UZm+BLFmc<7d-Cv>*HTv z?#%x8A$JPCtaiP73?q3z%UMx{B&Bdk28k8e8Hww`dE4H?_hGxt>1QIL6}i3T589K@ zoy~Er`QaOI>z5&Aj(=UTDy0b{uklKIx~0AovA#0*27QWtK5{_{P&cI^4MvQ^x+&4> zzg9+12EMwAp6AK9`^jv~p+DNjP<7vkxb_s|bO?)>BB)epNkJS$L!cQbNRGc___*v$ zl*hsH^)piR=%u9}71aIEG5fvp#WFM!SH8iRghm9P#(hrP^=vlfz{V2ij$QNL{o_8D z-%qZbv`Bv)uym{yWFWQEc8a3o-w53h(VNT<_kb)_Sea6D--;^bbI-&~h8uf=kQ4>) z>yM~twl+wjaB*>kDDih>C6>nUU60;J(RIL{)wenFEjp)q6o6rt@$J0g<~(+Y{LO=J zxq}eO1u1cNWNGHNtNgrj_ae}b7n5+15(_GE$|kxJHXERg{n6Penw0;~&dgQEuZ%Q< zNiaai5kFr^_p)9Z$lg+8Gk@04!HnmH;5@((+W6pm?+0JGTN(xoxR9Jx4R5qV%cvFm z;DP?uXW|&_JJ|ZcFRz(A-Fy?@;kvJ72?YTuic>Fa?i+IxUX8zaemo(0<|9aoX)G(~ z#Ac@d^~LnVzC!3*D{Y%IM<@Q$iXZ3uTE&9vm8ql}OtT183&8YLat3RjOu0i=(%ejf z+XWCQ|GhpjD0+do4IK`b^CXDWzVM^gmB*t77Ey2BJdG@n-K+f5{Va&}BqR(4AK@X4 z8`9h;ck4UH@KnV!&ELmu$+xsdIggMDYD78%BCJs^hrSfg#$VSH( zTU;MBqvm}Q)McsmA4KVrK~*vH>3L9}?e$=Qe?eCmYHMg&yx16rX93is!@3eBFOR-# z1<5Yr(7g_73mzzDK=BQk1&m$*aN2@`W)vBX9dUDyu~6X*mVnk*SLQGBl=puf2^K~= ze=eaGkfi4g3G6IHLf1bXn+yd>szYt}1Hs?{NI~^c$Cw#70%$wOgn$U5OZMjM72ep3jGq zkNc%h9YJ&vRRD-`QPIPBl0Qx#bnQV~G`g#u=Jemc!Aj0ojIJ@y9q6Bem;EAh&MZ5rG5r5b9prKPEc>D7nNKkJ9 z_H^YLT%feiJKb@po&dZB{6mH+b8np4T3pl{#(OAeWNmE?=x8z&J+U$~DUcbJd@wU5 zvE*XdR9l=5ocAy6ahK?6go@8***o0@V&0e|{zNHl)0|4?dj4Yas717~(S4HI;Tm-C?A9ytl+7aU60YhwY z*8@tr{iR{x2*t$E;$96`$Oh0PD2LOs0F`XiqczC5V!|=?(MW#u-Y5$nEL+YADBnhtS|EkA!65k^Cz{RW&s< zoC&@Pzz19bQVON}aJ1j?JTPvE8vci60w8Kv6x|q&4hGJ(&AiK*@sB<``EdG5p%U?1 zV&x`PYo74u8z4+Waia%0!+<`%4>;njTflYPr-rTwG}%Dt!5nUM9=}uv95aM^Hvlgb z>Z|p0I7SvAf_|ExTc~-=sXV6{4tRNA(?*`@VXVpaLWkM7(BedeCSgGY95kl9b%~=^ z{|o#{YEi7QXmn8&8KI?VW#Rs>^u@=@{ynbDnIbN_EBU5tL3e-e4Q{L#(hCr>Alz^s zM+GX?v3hUrZFb&mVux2ZF1^a`@f3$^IZOV!1j7eyoWK(}@o2}0MoZU=Ois;>*8YDi z073lR14+^WNy@Zc8D8zKyE`{jH>hpq%kVwOcf7CAjzN9WH1rSq8FBQgtnA~(%mIV3 z0^X_gR!mnLR@^FP4{L3*Kqc3+i)#_XqT+%f=pS#ym*khy{(6Lc zl#Ka#CEn1tPdhbBB+2+n+y{nb#i|hXu90Nkq8++Ns(hl8GJ)}&LENOmk}AQO0q1kM z8RnYmM}NLNI>xaS6Yt@3xU2ctL-(nZfBom6*NPP@3~MRGr8;ko2XK?V`Gy=@zyw-b zbvOSNAs3DA?g=6{_y;6gep8c!)Nss>CrKroGY$b76V&b8vIN8EEP$iaN zf8}e#+I3WT7=xSX2fvK7_~BcE#QLmyDkL`<$-`7gG$5$FH5ezfgyk0RI2eb785Q7B z^Pcd0GJ##nJbv5brRl8B*T2bwEREZGdx8arx~^9AH&82F{j|a+F-W;9+Nt+i=A{1) z7nG-f!NV7oq(|J+g`$+km<@NY&P5(3dJfpG)q9=&k*NCevUyvckRXHtnTib`ig6ax zO1k?74VPncQZ=^sgaH|T^-uUJ*HS>q(y6(lGZa`pTa|6jYt+BU;$2KjMDejH>C zO$@SixOg%;qSbm*JU(S)pvgbe*J}JIlP&GNe!VLcB|ZJl zSA3I+kuQ~Dq_oygOGJim=e81_6sg?I?fpraF=UC4dl*yWDV@By_}m6nHF_LX{UJf& zl1_mQ*Fzhd(3n&u_*daOKPU5tgM`tquI)#Ah*K|AnUmj)WGvr_sV7aab6m;c`Rk88 zlC7{1b?6v<4G+th5q{}=Z1S1FqnULx3l2j@gBAUV_XIlV1w28~^kL?LtcTKTA%mvr z!J&-c_vP&I`u^t~jKVa?8kP6Q2KcnyWuMOlne3x~m8s*lNfl3;r|vrO2D2Ki>A0|P zOBjVOzh}>taTcrYe4(B#`O~Ig1CX#e8mB=#$Aio>!}~$@Bb@J%Y8lk80_qvs4m0 zHSnxl?i`h7mc6Bq`1(D(r;XqbFi_ zOi343+aE?#EY?5zZb#O@|NiPG!5B)e=SmoN49E!lr6q6)3E<>*3a~6M0e9Q{KD1 z#3i(HYw2SZLBr4Tn!tWs>V|Z%%luZ7+<)y$pQLom39#tx%aAP z+VyYffAf8ij!7OEg0I!@KKk7F+h29FT`rNXP;Iumi$f?&7cAg;tae+_ATLnXLRrEfy>`$L!MYO~Lu`@Xzxj@f)qv-Guy-ggj> z@?k!vw^}?^$8#)gRvFY2d8S9`u>W6q(RN37AXSdl(LpO_lOhno^v#{N3<4C@2p06+Ntmc`KmkcCo#EIP63SKJx+<3&E%I{B~A9+ z(z$udC|AE?A}#!9JEP*}fy=fr7_%$KoSNQTK1v8i400>^hTI>Q$Gu5Qwv-u*i~UKb zd*_%~eiuG1nizONIXK6s-_HLz^R>B9KeP>=`Nh*xa7&_e9$%vMeCfjMD-X?I!}M(Y z&;0R{=7BN+?9~CGFc~qT)8mp2eFG!MgYYKHsRXhAH?RZ-HhQ_^`}zl+>blGbyd?XJx*3YkoxQw(pAQJAQlgo>MO`%1H#I zD0G>3OkG^4<0^NLB z#VQi`-Vw{D%d$GsOt~Csyr%o^O2K&Ly4m}};}4Vic6cki8a_gkCOAhAnfR2Wp#JRB ztg&JqyiF=CfY^cMhvTGWar{MlM68B-IH1PQJ*Qeq&%{}&ozjWo`_M`&)#f2}7 zaVvOo^i0QJbPQ6|&Q6SmLW2%TmR;lIFtIYdq7slG@fAWU2~ari#aBdhAo7|P7Z*tC z1(h4LefPbZ40$eA^E04mU?xRQ0%*>D=tGA8MLjlqThNO)ZbTcuyOJOSdAul1&Fb(D z&edR;!LqNBvrV#$w_p`xczot)QU+&%sVsTilXYk>1~PBWHql+XW@K)@<*+@F)edE2 znqX0W&Gi>^pFbERzsWwxTt>aMwd;E*s>laR^%*g~?~5SgIE}nItGRdIc0(xdVR*db z@QoOBsFjnGD|@EM8mp8;7N&q?0ZxuVj&W((Dj_I)O?-Tkwmt>2zEJ5@PDIXdHsoEj z%Sq<^BU8$WEu=|>f{|HuXx>S9!+z5^|0G*A{>fh*kM9k5ZByS39TO={N*G6K-4;Np zia2z5wd7 zZJFtojWmgv#}5vTU|^X*H$1E5M_->J z5=1tSPfmc;3i?e(S=!9OU;su=O+EHj=Z-*>qIV_RBftK!Wa1CG&NS%{QznP1jfpe< zcjAv%bgG^ky<6FnNecT{$rOrf6NV_r?^Fv0Ru=3Jz2Z?UIa5HC0-2wgnfcBgUmz($ z-KM6hY6P;?;4HOrGaI-OXJaiFqopQwU?&7cBS@|k#b!GgHtvxSMy?P_OJo?vFID0DtmymL|5^^vD?)R?;MNSVBIoDl zpewV#)wm7RA%KF|!FBF_E8JOt{uo>v=curq{7HKz33z-J9T8^oc=%L5X(s_VCfGhi z>DsQU{jlT2CEnmt_2PTe9(uBbH#4*-CiJb5qo#^afDKER8KLziM8H<^m6r-&Z@@`_ z_z0;rEz<~8+UgmORX9`1v}lbT^HWF!Jw*Sirjz?WvIzB=+=avBPtQ}QM2iusj3JMnPv}|DGalR}cR0H9IX?#fuT@l-rSD+S%Ayu)++V03 zjVZbvS&1CFGoKf!sRX?ewo_?J&AzNnM#jFo6-Q=$jRpRwH!$R3NxbTe&9eK5Ec!QB zBy^~rj*l~wNry4iw?sb`dfW4Awz%yHZ?MIU=b;m^KG>0qeCZqI^*I#Ir(bh+<%BE81hd^~fnW5|AgH)^% z7QKaa=v|uROdzRqqOx08b6bGK=Lwg*?e4Ll->2o(IYE^ z&ci=0zvUcbBT0K^2yVZzAe~bF=DMMUB)4kX{2{aBkYul}+$z-_c2DAD;VO52lfW?SrVnJ+x*l3elch3%#ZZs!*T3l^xR61KLJ`l8QP?uHHAk^&6K_}<))fA=I09_EM}lS<$mh7?@oypX->Z8%XMncK2Rcy7{+C$4VW>9JSs z2wmI3L#UYhN4Q+}rTxq?RO80KvtUI>kplP@`D)t$ql3vq@ZKYafw{OfUETZ#yPsE$ za9ypshzO6~tfRhB_F4UuD~43Nj8h$^=2-PI1S-pw4X@C(s;rH`L}%+u-i5FAgB5(b z8b!A*5)Xf2?agoENo60d{&uxG;%^FD@~3pKl1-%MNgr5YCi^*>f=i8g{8!wF5Keq5 zugAxK)zjzS8~ywt=D-sfSg2Evz3DHzSszV8DYuKuShVQs+)(lO{_uAUwzh`J*t&8z zUf%H7`LcQ#ri2{oSO2isR41Eha85Dr-+QuKCv`1cMhm6$L1$e%X=3!@+YZ^a(GT!6t!`|m9DFJhPL$Nb%YO?W4n->Im`6_z` z$Sdq$CH7fiyC%7wD)Wp{`!R95Q2Hx}7jjzs{6+e6XiGtjG~x;qr7_n=MZ6bfp+USc zJN)4_+MbYEtJeMD5kW`rS0=m;l`>`p!7|psm~RuT*OV7Xg7l1eJr;}OH|xEX>#lXP z8@G$A<|mAp3c=x%a<|kkYylEBJo$N&V>0ctTCY*Q_b8HYCC*(xY7xRsnlz5>BPVcl zNzvKMcq|`xlU>5%$q(cz=`ZJODHt{t3(r0de^3bR9Bl82damlsEAS9jktnUg8gARA z4!sBXpKO-x;Ru?|y4`jAq)rbB7oWfJQtm!kqIO$KG3FBxV^$>>;eAc;IO`lQyoIWC zNY_f!ZPh}kTV#ocIDc}N>z3C&^#L06BcK?smTdXrN^$Tkx+MQe1O6+&DcHvWO~XOigf6VlyTp}x+)<_YAZJY>!(@m z4^C^-d)v&Rw&aALT7{l2OorTq!U&b%=t{_$jz7h2Bs%r~nn&?ZcrjfnQw8oiX`hKO z-K(oAu2~FtY^81}gDuRi5?nhtMR-k-dQK%3?vrXOSLwTX%qBb;tXa^jRV=6J;d?*4 zw_iDP2)DUD5L3(Tj(so{+vL7=MWg6$@l4KHKLg3x#O%%_qEnW&q;Kmi=t`S+#AhFP zUD~%nL(}|21g*zz)5}%Q6v%D*wZy&QHJ2vr8E64IcS718+k(})An2N*XKH?AK%b&R9R#87F zA9a7u-K`Q;$Nd|VJDu12GwIRVxaob_7pirNm8MzC%|7=cTA8&~H8NlDD2W>vFXmPjI6 zPC4FTc33cs_-6ZCXHm%q>=n)F{!zd za-dm=%@Dj$em~(4<4Ch*^3-$?!Vr%Z0W}rJZRew?$ZHFV{QPHe=EyuAy`HNsSK`

iJ|3Soh#AZm-osB{<3xu{dgBE7;u`%GmGrog$M8qT0qa*k7&_7(WSESY zcz5Fya@e9@okd$Dhz~F`jRF)5iD21lJ8gu{TZk@^)Y83!^4K%)r;gmoiv+f2vdzuq z=he(o^~de*bcF`7O1BCP^l(b2#%PB!TZvEGZ@aT{^%!)MziyJZK^DqDh#o>vjV=1! zL)gi}w|@S6>pNDeT$l?uGKT zZxmzV>bm9V5UP(m_4djDJuI<}SKMQELhRUX_8<0!30mL2AFjenw0QJMj-84(dbsj_&YA$pgQt>lLBQj@U3%6Mcz4vsQuPx&f8^2~o z&}#ZKf^~J%>AI8^OY$GnvwOxu9knQ$k2Fk4YnH0&2xAu5i{sv>+H)1=tmaoD#l?~) z^d+7Ww4SwJrwX*^=&;Zzd?RRwp36OV3!#wfB!BTELNqoIJBFo~rS)5!kuR1uFDtfh zYq`X=Pz4T(pGk&I_93aU>i&jU7E$>_S}(J|)a?;QZYY1(; z&)3hEe0JFdv?%!e<2n?fYTDGuIn4XM|AqDOHk~w2=4gMQ-N-_JZ5qi#Z9QAP1buX> zG4^)NvXufM+a$|+*7L@Tmsf3)%hx>QFi+<+6wij|$uixv1@9Zmnr#U@E=6eY{YogwnTu=!DvTd^MAs(6y-8UY8+_FIXQJVdofwWa2L7Y{~s@ zGIYrleP^iXbjDzUkLM8lb^G7n>GAaueH_-E_pQJ+1N^^mLA6)4A;5&twDI> z@S);qRkmV524-Jaz8B7(gMa6^Reo>hq@Gz+{F+bh>lu6*Ed(MJ2A$+d|BRT_NlGde zbuRte`8P&MCRz1Qosw1UKl5icvORU+>;l^|c4L#JcLtWS9k%qnj}#*q^O~CR@7M6% zyx&V+-{8IH$3%a6*7YOJj-JWj6Pz5i z4hw9qCpY)kc^rheJ}FZ8yK((}AE7kZydf^olN6*H|3+|CwJB9_?Hf5muEDyxvhCUjAeKz1TxS7?6U4czGW|&VoX2ew#Vc^)R-7 zr33@lBjVYU=DqufUFUfM>vkUT9^lc6Ca+tBrIDK!O3B(7b0gfz(tm} zDed{L?`R1-_@Z{eg-C3To)}j(j>r6F1{EfBU*4x91q7#smNQypPP)YdZ}NYvK6WD$ zD_-bV%{rwdDVFq_&hw5PyKnbw`C<5@$DtL%a;$opvUh1lKWNI&dOwJz$WiJ1Lol;F zRO#bmrNn@Mx!Ly}*#DB<{qv^FK324^{9)*k)6?kY^W<9>MdqVCBNFO3@}yHEVL4BF zek!nNFy$+$-8(iG`HU~pq@4a(7(a~nL(%!N|I``*vW!>4mDmm^7*loBRVQ{VZt+&_QCS&k9Axe zIRg3ErvD6)Wm`6Uw264&itm`6iGj#GaNrHoF4PA+w-eUBvqk%bICbfFzjFP1#(6&rYiy&sb*-%FL@*;b z-VR|&2tRjk8tw_D=r){fHrGwzjC^lH`p=0eBWCA{VgH@=`T zN)Kj3J;f*gd=9L|BomTcb0=u4C+}0~I!_-3bEuEr6t;I=`K5@l_i4jR=7qXs<*2&R z&5GQ>s??j@y_B8d;rP|H*=}E+#`GjcAZW&T$mf-re1$L#=bb=S;R!&rZr$2k)k=HLeEQ?O zOJRH)wPw+97Dq6apq;igJtUR`Ftn!#UQM-L1-ju2PCaLG11SZp)mYD0L$H-&M zW?9bP5g}67qS=?S_!;kZ%9lP@)j=S3#Cva`a&-av47iWgXW&)|=2-WKTa8N(VX6Q) zfn*6}#=>j`$s7mC+!Q@>GfY!zIwemAaU+vN%SHyHtj*P($IkEj%N1}+KPXvT^!0UxsdTr4YDD;Nz5 zQy?YD-+C@i-#BAF+{zKpB_lIuTRnIg(!ZRamsj=JFx_+IgFamXchQsIQPpbv2jMgF zgICwg_^#PmYvvEnJ2{E>UhaBvglE&k(`I>S0tLzpE?w%UyPAyzF6$_0Xuerj0`|DP zoDWq1I`87*pYnp*+u8sI1IUOdQm>7dw1&qYV!;d}?n9lJE5Fb?%^Py01DEPqocJtQ z=lv9s0*X3BQdzNY=S2}kf* zY6O@%D62h|kroq60?9!|~T0D<_DQ)zrZ1#G|!!m}fl zuA{)KE_KyOY6L(fj-n=}ru5J@FUZbj1(~Tbv)j;l*M`m)5Iz7$g~ry8IJg1Z8f3iI zJackE)27zl)i zuu3p`nv7kHjPkbjz0ScAz=-E3Xpt$hk`2$-aD(G1Xze)Ef`AtM6Z>?;p%x^8of(D! z+zU=8C+h%^0a7jw78c<(!LPM9Njr~WN&q{97O??XGGlT;;wqIbg{TIN{dOaTv@oj}GAQfRZp^Zb8ntUbMZH2DGNDK%yUHPWd}>c?oga z0fr9}Bq7|X-3%G&{_#|K|@RA};H$jNx69~}&*b7>ee;kBq zqhKN5P;~~BXLQQLmnLuFvp}LA@amxHA`Q0iA3g-HzBY~qh72@ji9scbVy(d8pG_%mg_P}44NY`vQ2PNO_k-}lXkU>WkI-9M%@%Aau z8cj-0Ryo>(|9eF#JXt@9lm2{Q*o6$W>W5 zJ6DZ5?sZvfnRgj(0tUg$C<5T;Su>Cis7XZB{4E^6%d&Y~)c&K?1YQzTWEUgsO2GC-lZ4 zpmKM5d^|7%VkjM;MqJbt3BChBZ0V!{auD!IU_>Zwd=@*2va}2Ixdl35Qiw=MUiivl zje70;3Yc&-ud$2uW`$we0Ug@8H(+yDq|Ld~?+Jn~DWJ6C+xwdAy7cLWP-LZA?%&r8J?1y-=E~S3sobNTU~ho_+5Q;lrB8!3Uc%?bX!;9avG>{6 z0rLgOL-6TN3%D_C4Op#p0LOv$!HZh-&j29bN1BNR?OlTw082^&^gL|e;q(3_XFzwZ zuU*1&R?XE79sq4A5K;npx9=&uRCpx)}Vr=1u72pyaS=Z!JvaGK&SQi z04~PB-0wF0_(fu$Eqbv4R}TWvIml6vqCjT>w3R`xQ~Jw+@qoAeV3f49v|tLMc}~OP z-an9eGX3ai1iJ^02^F!1%O7`hb-@b|n4KVK9i9y`wof3sn)>_uuUwu~8JEIw0IU5~ zG1%~&rafy;x#R5)UkPM|(9srnc0Xq?vE;xt2X!ffarE@V+#?Wu;O~S48>(RP;ziV??7*eg z1Nd^}VGqEyV80Gf$`Gt`TKtyNUtAp4wWQ1VHz!_%&Rym&yGj3VxgKoD^Am1*KyZW<1^0kp z0XOP!0Lh2Z}(e2wMrGj(#APqgex`jy5 zN!>3$enlNvvICa~2g6!2eaQmt=XXX9v_TMCQgpdX2qJf3nW4>ANa1!&!fT)Ne10878sX3_TQAPsQs>YEg)0vaaTF&z z`w}ck3PqM+Vn!(%ur8pd=OIOcACCFKGZy%lz=d>gWMFYU+%O7|1q+-nz?MYA{2Qmj|jG=mf$v7H?j{ZXc){0%z$7M|TKQsoaM@ufwB&jR-^nWENmA zQ^8(aR{==oqte@8VhFKs<(+}4=^3a)g4|V-G#dDtKwPFu&?t<)P=kPcmyO&i5>E9o zU{eV&=&Z+PWT2&_K*G%r0Nt)Vpo_7Zb`K|r1riFli-Wa?7KJ@u0UDP%{2B;d!yLtb z-=B8rhh0o~jhupFf1|n|;H*wS?}8gV*FHY1S~#g^A(Ma=2G$CfM=h6dD-JSh^en<4 zw+j`A2k6EC2^AfKfatMGAh+FgG~2`1QleXk_9Fvhj>N=Qq_q5&vW|`$FyK%#{e{yI z`~<$7t8sahfZ{J?Ai)<%lu6YCRnX{y3=jsh5!OA>UIsS7i3{zVYF^&o*VhE^LcsU= zCFvSyjDP?mEL4cHyE8B5n$8cW;3~LRDxPWq@)(GtaN-E3s6)_38+L&~Bjjmd{vu}E zn+zEcT);Ym>>etMltmasJRzI#^7I6yMM!=Ij~-uVWd;5=AjO3qcE1H{iT-4Ppk{MH z*ry=KpQ8R}X$egjf_v2wa;2UrUsf1guwSsLGz5h+ev3v7u7qw7ylXO3uldwS4P;vnwd(sBuLYx?E}%<#r+lL;F+nx_a47dbtPTjpAo{6*oY*{s-Ax5+Wo6I?3cIo^uny4VdWZ{P+ygF54RA*f znh66}*U)6mh7vO{->_=O3C{+ZYW>p%@>_iW<={jGAv5U277|=G6#T_VM9u<#MdN@i z6yl|F(El=Yz=-{Sxoofx z!A~tef4KNLYcSyAlX#4VAiK^ZbcCG*iq0<2>>*e|Ic!?oE|mi54X8s-`SQsJ512e_ z5&?O~e2FK%V4nkp(?pcc4Ca7}C49;yiQ^87lEOp~{2>}h>|j;|u3qswWPZs~I; zXmghING6sy9U6}+rE5N_p1{P!oIWal4h~gVm|#@zz46aYGJSXzz!W0s-3&9{AR&zz zkU4p!tQ8AMwr(xWcdXFLW1)^k0{jV2D>=nP7fe%;fJ7k z(KL&2YN!++gPWE>CwGDaoXPu`UqLEXTG|iXI`-{BdQ!bELV@+91%MNHHq&tQg2eKt z_oq->g3;D$0c0?sQN84~0w*t2*=T9cbsJdRucUI%F3-Kr{J^~!)6fAxgA%!g42Y97NcOU|%YF2UA_04uP( z`D87@z&u6RUKjkwfZ7MhBQVN!J6O9HJOJ(_XxV8vcAi2t1QiB2DzKo?t%k(Jp(%qX z8UgG0G!Wx}^i*;Qxa*+g1P2Fja26pqfb$N@H^HVSXbRn&_x7FO0dzFDRN+&#FgGt7 zUR`>S zVTiz9@;;Ev;ox~X`?($(5unT%JOCeU?t1{Q4h|+O{42kc-oI}KaZxSlf4@O~pam>m znqG>}1Rz;T@1Fww=WS9FdiY@0U?{FD18E$Z9TI&_;TOB-#S_a9=#2Iv%R6(~VF zL-j}22->?Elq`3&mqMBRzR;{6cm)ptLpE9<@95eEP(g~`JHUV;vtZ5X`>w#mAX!dk zQf!536u_#an@(vCT!l9ns@*YBR-Z$(5KjQrW0)A2NdNX5G}Pa;H6GYW#I!M9dg{xXeK6V>SZqw zkC4ZbgX6R`&jnV?&w~RH!6w9!2PQYnuUQlbD!7K}y@1^gd(##8D$SP{-Uc~?b|5}( zV{Xo9whSrgqlE_Wbh-u(vX9%Dp-}=V)GZ)5fF|$w^&qpzt}A_dqoty944MCiyZ5Mf z?*73jK==B9T?S$if+;158QvfdLlwbJfj@Iwfj6n~#W%>>O~o$R$iKr#ud0a&3y*+_ z3|Rc31vtf?nu0DTLk17R4L3N!fJ0^Uk1huJ2&@QPP8=l&6`*8 zLv#z__#9Gv!jR#itsUb{;MXV6K5PlE06 zV(wlUd4qhOR>syW3kW6L`K!jK4BlN3)Jahw!VF7~gn$6#3j9FO0_`*4A&T6=(k8=< zu6{hm8^~-LWIYYkbEy9Ev$Gdq?%{Al=YmVl|3GtqojeRaOfr4nDY&8D^ zV3mz^I7DJ2FYD4&B`3o;9QBK&qcsQpi;=2YUK+%9OJ7xz7> zL1h(uEZN`NgUP-Ci(6V!QhqqsVV@fs0(sdA^cbPB#2O4hQGbw*f-<=C>sL@bI7*8} zX2I9-0^wpv$sfwf%F4-M%y7o4JP}6bzJjAzThu;;UA%yN7Aqj~=p+5Mk z>>VCzs;I~!2QPU6dlSPm=K-UIpy7%Bne|+o_#|A4AtOP*wc#V!9QETO1LJX!> zj3R3A!QlGB1l}L8VBTrwuhSgq{ROFE2j8vKY+VS}{r zi~k#12wgd+(HyFOp%is%`Ax&f*bHgviB5u=2-u)`C7wgeY9}X0$--+jx5z&q3XwLN z;!y{KKfiW8Kwa}1*fxOKjj0Seu))IHFNlXe%c8P!lr_4YB{nvC_t~>;2w{!yneRA^ z_=YpiSPP~U9d3fw@60xbl7a%-jQe2aQHC$5Dl3s90=mg^8NyKJnIW3Na-t_Y<=!aOwL zp@sBiaTmHAMZ7O{@85Udp1osXF$d?Hap#YLcbdzvW=ifqR#8y_dGyT?YSlQ`KLre( z$B;wOMse`+{t#(wp`uW%G~_vA(olX_Jpm!rpa-xX@BRUfm^(p@Hwmonf3kbz3su4HFK;}eVGjt4t0-->^C^$G+EdbgV(8mEEVhGwWAvfM!d+C5q>-oRmAroXE z*wN-U#Q87dZxuu!I4TWGAe!!POvD5CcRBEG4!X@vYf1|@OaD&`yyg;@yH8SM@P`aI Qc_R?2ikb>#D9gbA0}6u75dZ)H literal 0 HcmV?d00001 diff --git a/test/suite/test_nlp.jl b/test/suite/test_nlp.jl index 996c336c..ddead770 100644 --- a/test/suite/test_nlp.jl +++ b/test/suite/test_nlp.jl @@ -20,7 +20,8 @@ end sol = direct_solve(prob.ocp, display = false, adnlp_backend = :manual) @test sol.objective ≈ prob.obj rtol = 1e-2 # +++ midpoint / manual - # +++ gl2 / manual + sol = direct_solve(prob.ocp, display = false, disc_method=:gauss_legendre_2, adnlp_backend = :manual) + @test sol.objective ≈ prob.obj rtol = 1e-2 end # DOCP solving From 9b1e479c1deb76cbb382f44e23d5fd4e21489997 Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Thu, 6 Feb 2025 10:24:53 +0100 Subject: [PATCH 32/44] ok --- test/docs/AD_backend.md | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/test/docs/AD_backend.md b/test/docs/AD_backend.md index 3f017a05..12d4cbf3 100644 --- a/test/docs/AD_backend.md +++ b/test/docs/AD_backend.md @@ -6,20 +6,6 @@ The backend for ADNLPModels can be set in transcription / solve calls with the o - `:enzyme`* Enzyme (not working). - `:zygote`* Zygote (not working). -## Errors for Enzyme and Zygote: -- enzyme gives correct nonzero counts for Jacobian and Hessian, but fails with -``` -ERROR: Constant memory is stored (or returned) to a differentiable variable. -As a result, Enzyme cannot provably ensure correctness and throws this error. -This might be due to the use of a constant variable as temporary storage for active memory (https://enzyme.mit.edu/julia/stable/faq/#Runtime-Activity). -If Enzyme should be able to prove this use non-differentable, open an issue! -To work around this issue, either: - a) rewrite this variable to not be conditionally active (fastest, but requires a code change), or - b) set the Enzyme mode to turn on runtime activity (e.g. autodiff(set_runtime_activity(Reverse), ...) ). This will maintain correctness, but may slightly reduce performance.``` - Error apparently occurs when calling the boundary conditions. - ``` -- zygote gives incorrect (huge) nonzero counts then also fails with an error message. - ## Tests: ``` julia> include("test/benchmark.jl") @@ -30,8 +16,9 @@ Problem list: ["beam", "double_integrator_mintf", "double_integrator_minenergy", ``` Takeaways: -- the `:optimized` backend (with reverse mode for Hessian) is much better than full forward mode. -- manual sparse pattern seems to give even better performance for larger problems. This is likely due to the increasing cost of computing the Hessian sparsity in terms of allocations and time. This observation is consistent with the comparison with Jump that seems to use a different, less sparse but faster method for the Hessian. +- `:enzyme` and `:zygote` currently fail (see notes below) +- the `:optimized` backend (with reverse mode for Hessian) is much faster than full forward mode, but does not scale greatly. This is likely due to the increasing cost of computing the Hessian sparsity in terms of allocations and time. +- manual sparse pattern seems to give better performance for larger problems. See also the comparison with Jump that seems to use a different, less sparse but faster method for the Hessian. ![benchmark](AD_backend.png) @@ -99,3 +86,18 @@ Standard benchmark for Gauss Legendre 2: - reuse ADNLPModels functions to get block sparsity patterns then rebuild full patterns ? eg for dynamics and path constraints - try to disable some unused (?) parts such as hprod ? (according to show_time info the impact may be small) +- investigate enzyme / zygote + +## Errors for Enzyme and Zygote: +- enzyme gives correct nonzero counts for Jacobian and Hessian, but fails with +``` +ERROR: Constant memory is stored (or returned) to a differentiable variable. +As a result, Enzyme cannot provably ensure correctness and throws this error. +This might be due to the use of a constant variable as temporary storage for active memory (https://enzyme.mit.edu/julia/stable/faq/#Runtime-Activity). +If Enzyme should be able to prove this use non-differentable, open an issue! +To work around this issue, either: + a) rewrite this variable to not be conditionally active (fastest, but requires a code change), or + b) set the Enzyme mode to turn on runtime activity (e.g. autodiff(set_runtime_activity(Reverse), ...) ). This will maintain correctness, but may slightly reduce performance.``` + Error apparently occurs when calling the boundary conditions. + ``` +- zygote gives incorrect (huge) nonzero counts then also fails with an error message. From 7f924a4c333d8bfc18d7d898868c99e70281b24b Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Thu, 6 Feb 2025 14:02:05 +0100 Subject: [PATCH 33/44] bench midpoint ok --- src/disc/midpoint.jl | 177 ++++++++++++++++++++++++++++++++++++++++ test/docs/AD_backend.md | 25 ++++-- test/suite/test_nlp.jl | 3 +- 3 files changed, 195 insertions(+), 10 deletions(-) diff --git a/src/disc/midpoint.jl b/src/disc/midpoint.jl index c3c2595d..bc3928ba 100644 --- a/src/disc/midpoint.jl +++ b/src/disc/midpoint.jl @@ -186,3 +186,180 @@ function setStepConstraints!(docp::DOCP{Midpoint}, c, xu, v, time_grid, i, work) end +""" +$(TYPEDSIGNATURES) + +Build sparsity pattern for Jacobian of constraints +""" +function DOCP_Jacobian_pattern(docp::DOCP{Midpoint}) + + # vector format for sparse matrix + Is = Vector{Int}(undef, 0) + Js = Vector{Int}(undef, 0) + + # index alias for v + v_start = docp.dim_NLP_variables - docp.dim_NLP_v + 1 + v_end = docp.dim_NLP_variables + + # 1. main loop over steps + for i = 1:docp.dim_NLP_steps + + c_block = docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block + c_offset = (i-1)*c_block + + # variables block and offset: x_i (l_i) u_i k_i x_i+1 (l_i+1) + var_offset = (i-1)*docp.discretization._step_variables_block + xi_start = var_offset + 1 + xi_end = var_offset + docp.dim_OCP_x + ui_start = var_offset + docp.dim_NLP_x + 1 + ui_end = var_offset + docp.dim_NLP_x + docp.dim_NLP_u + ki_start = var_offset + docp.dim_NLP_x + docp.dim_NLP_u + 1 + #ki_end = var_offset + docp.discretization._step_variables_block + xip1_end = var_offset + docp.discretization._step_variables_block + docp.dim_OCP_x + li = var_offset + docp.dim_NLP_x + lip1 = var_offset + docp.discretization._step_variables_block + docp.dim_NLP_x + + # 1.1 state eq 0 = x_i+1 - (x_i + h_i * k_i) + # depends on x_i, k_i, x_i+1, and v for h_i in variable times case ! + add_nonzero_block!(Is, Js, c_offset+1, c_offset+docp.dim_OCP_x, xi_start, xi_end) + # skip l_i, u_i (should skip k_i[n+1] also but annoying...) + add_nonzero_block!(Is, Js, c_offset+1, c_offset+docp.dim_OCP_x, ki_start, xip1_end) + add_nonzero_block!(Is, Js, c_offset+1, c_offset+docp.dim_OCP_x, v_start, v_end) + # 1.2 lagrange part 0 = l_i+1 - (l_i + h_i * k_i[n+1]) + if docp.is_lagrange + add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x, li) + add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x, lip1) + ki_l = var_offset + docp.dim_NLP_x + docp.dim_NLP_u + docp.dim_NLP_x + add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x, ki_l) + add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x, c_offset+docp.dim_NLP_x, v_start, v_end) + end + + # 1.3 stage equation 0 = k_i - f(t_s, x_s, u_i, v) + # with t_s = (t_i + t_i+1)/2 x_s = (x_i + x_i+1)/2 + # skip l_i + add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x+1, c_offset+2*docp.dim_NLP_x, xi_start, xi_end) + add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x+1, c_offset+2*docp.dim_NLP_x, ui_start, xip1_end) + add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x+1, c_offset+2*docp.dim_NLP_x, v_start, v_end) + + # 1.4 path constraint g(t_i, x_i, u_i, v) (skip l_i) + add_nonzero_block!(Is, Js, c_offset+2*docp.dim_NLP_x+1, c_offset+c_block, xi_start, xi_end) + add_nonzero_block!(Is, Js, c_offset+2*docp.dim_NLP_x+1, c_offset+c_block, ui_start, ui_end) + add_nonzero_block!(Is, Js, c_offset+2*docp.dim_NLP_x+1, c_offset+c_block, v_start, v_end) + end + + # 2. final path constraints (xf, uf, v) + c_offset = docp.dim_NLP_steps * (docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block) + c_block = docp.discretization._step_pathcons_block + var_offset = docp.dim_NLP_steps*docp.discretization._step_variables_block + xf_start = var_offset + 1 + xf_end = var_offset + docp.dim_OCP_x + uf_start = var_offset-docp.discretization._step_variables_block + docp.dim_NLP_x + 1 + uf_end = var_offset-docp.discretization._step_variables_block + docp.dim_NLP_x + docp.dim_NLP_u + add_nonzero_block!(Is, Js, c_offset+1, c_offset+c_block, xf_start, xf_end) + add_nonzero_block!(Is, Js, c_offset+1,c_offset+c_block, uf_start, uf_end) + add_nonzero_block!(Is, Js, c_offset+1, c_offset+c_block, v_start, v_end) + + # 3. boundary constraints (x0, xf, v) + c_offset = docp.dim_NLP_steps * (docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block) + docp.discretization._step_pathcons_block + c_block = docp.dim_boundary_cons + docp.dim_v_cons + x0_start = 1 + x0_end = docp.dim_OCP_x + add_nonzero_block!(Is, Js, c_offset+1, c_offset+c_block, x0_start, x0_end) + add_nonzero_block!(Is, Js, c_offset+1, c_offset+c_block, xf_start, xf_end) + add_nonzero_block!(Is, Js, c_offset+1, c_offset+c_block, v_start, v_end) + # 3.4 null initial condition for lagrangian cost state l0 + if docp.is_lagrange + add_nonzero_block!(Is, Js, docp.dim_NLP_constraints, docp.dim_NLP_x) + end + + # build and return sparse matrix + nnzj = length(Is) + Vs = ones(Bool, nnzj) + return sparse(Is, Js, Vs, docp.dim_NLP_constraints, docp.dim_NLP_variables) +end + + +""" +$(TYPEDSIGNATURES) + +Build sparsity pattern for Hessian of Lagrangian +""" +function DOCP_Hessian_pattern(docp::DOCP{Midpoint}) + + # NB. need to provide full pattern for coloring, not just upper/lower part + Is = Vector{Int}(undef, 0) + Js = Vector{Int}(undef, 0) + + # index alias for v + v_start = docp.dim_NLP_variables - docp.dim_NLP_v + 1 + v_end = docp.dim_NLP_variables + + # 0. objective + # 0.1 mayer cost (x0, xf, v) + # -> grouped with term 3. for boundary conditions + # 0.2 lagrange case (lf) + # -> 2nd order term is zero + + # 1. main loop over steps + # 1.0 v / v term + add_nonzero_block!(Is, Js, v_start, v_end, v_start, v_end) + + for i = 1:docp.dim_NLP_steps + + # variables block and offset: x_i (l_i) u_i k_i x_i+1 (l_i+1) + var_offset = (i-1)*docp.discretization._step_variables_block + xi_start = var_offset + 1 + xi_end = var_offset + docp.dim_OCP_x + xip1_end = var_offset + docp.discretization._step_variables_block + docp.dim_NLP_x + ui_start = var_offset + docp.dim_NLP_x + 1 + #ui_end = var_offset + docp.dim_NLP_x + docp.dim_NLP_u + #ki_start = var_offset + docp.dim_NLP_x + docp.dim_NLP_u + 1 + #ki_end = var_offset + 2*docp.dim_NLP_x + docp.dim_NLP_u + + # 1.1 state eq 0 = x_i+1 - (x_i + h_i * k_i) + # -> 2nd order terms are zero + # 1.2 lagrange part 0 = l_i+1 - (l_i + h_i * k_i[n+1]) + # -> 2nd order terms are zero + + # 1.3 stage equations 0 = k_i - f(t_s, x_s, u_i, v) + # with t_s = (t_i + t_i+1)/2 x_s = (x_i + x_i+1)/2 + # skip l_i + add_nonzero_block!(Is, Js, xi_start, xi_end, xi_start, xi_end) + add_nonzero_block!(Is, Js, ui_start, xip1_end, ui_start, xip1_end) + add_nonzero_block!(Is, Js, xi_start, xi_end, ui_start, xip1_end; sym=true) + add_nonzero_block!(Is, Js, xi_start, xi_end, v_start, v_end; sym=true) + add_nonzero_block!(Is, Js, ui_start, xip1_end, v_start, v_end; sym=true) + + # 1.4 path constraint g(t_i, x_i, u_i, v) + # -> included in 1.3 + end + + # 2. final path constraints (xf, uf, v) (assume present) + var_offset = docp.dim_NLP_steps*docp.discretization._step_variables_block + xf_start = var_offset + 1 + xf_end = var_offset + docp.dim_OCP_x + # NB U_N may be removed at some point if we use only piecewise constant control + uf_start = var_offset-docp.discretization._step_variables_block + docp.dim_NLP_x + 1 + uf_end = var_offset-docp.discretization._step_variables_block + docp.dim_NLP_x + docp.dim_NLP_u + add_nonzero_block!(Is, Js, xf_start, xf_end, xf_start, xf_end) + add_nonzero_block!(Is, Js, uf_start, uf_end, uf_start, uf_end) + add_nonzero_block!(Is, Js, xf_start, xf_end, uf_start, uf_end; sym=true) + add_nonzero_block!(Is, Js, xf_start, xf_end, v_start, v_end; sym=true) + add_nonzero_block!(Is, Js, uf_start, uf_end, v_start, v_end; sym=true) + + # 3. boundary constraints (x0, xf, v) or mayer cost g0(x0, xf, v) (assume present) + # -> x0 / x0, x0 / v terms included in first loop iteration + # -> xf / xf, xf / v terms included in 2. + x0_start = 1 + x0_end = docp.dim_OCP_x + add_nonzero_block!(Is, Js, x0_start, x0_end, xf_start, xf_end; sym=true) + + # 3.1 null initial condition for lagrangian cost state l0 + # -> 2nd order term is zero + + # build and return sparse matrix + nnzj = length(Is) + Vs = ones(Bool, nnzj) + return sparse(Is, Js, Vs, docp.dim_NLP_variables, docp.dim_NLP_variables) + +end diff --git a/test/docs/AD_backend.md b/test/docs/AD_backend.md index 12d4cbf3..6f266750 100644 --- a/test/docs/AD_backend.md +++ b/test/docs/AD_backend.md @@ -1,10 +1,9 @@ # Benchmark for different AD backends The backend for ADNLPModels can be set in transcription / solve calls with the option `adnlp_backend=`. Possible values include the predefined(*) backends for ADNLPModels: -- `:optimized`* Default for CTDirect. Forward mode for Jacobian, reverse for Gradient and Hessian. +- `:optimized`* Default for CTDirect. Forward mode for Jacobian, reverse for Gradient and forward over reverse for Hessian. - `:default`* Forward mode for everything. Significantly slower. - `:manual` Explicitely give to ADNLPModels the sparse pattern for Jacobian and Hessian. Uses the same forward / reverse settings as the `:optimized` predefined backend. -- `:enzyme`* Enzyme (not working). -- `:zygote`* Zygote (not working). +- `:enzyme`* Enzyme (currently not working). ## Tests: ``` @@ -17,8 +16,8 @@ Problem list: ["beam", "double_integrator_mintf", "double_integrator_minenergy", Takeaways: - `:enzyme` and `:zygote` currently fail (see notes below) -- the `:optimized` backend (with reverse mode for Hessian) is much faster than full forward mode, but does not scale greatly. This is likely due to the increasing cost of computing the Hessian sparsity in terms of allocations and time. -- manual sparse pattern seems to give better performance for larger problems. See also the comparison with Jump that seems to use a different, less sparse but faster method for the Hessian. +- the `:optimized` backend (with forward over reverse mode for Hessian) is much faster than full forward mode, but does not scale greatly. This is likely due to the increasing cost of computing the Hessian sparsity with SparseConnectivityTracer.jl in terms of allocations and time. +- manual sparse pattern seems to give better performance for larger problems. See also the comparison with Jump that seems to use a different, less sparse but faster method for the Hessian. The sparsity pattern detection in JuMP relies on the expression tree of the objective and constraints built from its DSL. ![benchmark](AD_backend.png) @@ -68,6 +67,16 @@ ghjvprod backend ADNLPModels.ForwardDiffADGHjvprod: 4.339e-6 seconds. *** building the hessian is one third of the total solve time... +Standard benchmark for Midpoint: +| Midpoint| optimized | manual | +|---------|-----------|--------| +| 250 | 1.5 | 2.2 | +| 500 | 3.9 | 4.7 | +| 1000 | 11.1 | 11.2 | +| 2500 | 50.5 | 32.7 | +| 5000 | 160.3 | 87.0 | +| 7500 | 333.2 | 140.9 | + Standard benchmark for Gauss Legendre 2: | GL2 | optimized | manual | |---------|-----------|--------| @@ -81,14 +90,13 @@ Standard benchmark for Gauss Legendre 2: - it is better to build the sparse matrices from the index vectors format rather than a dense boolean matrix. For larger problems it may not be possible to even allocate the boolean matrix (eg. algal bacterial with GL2 at 5000 steps). ## Todo: -- manual pattern structure for midpoint - check the relevance of computing the nnz beforehand and allocate the full index vectors directly instead of using push! - reuse ADNLPModels functions to get block sparsity patterns then rebuild full patterns ? eg for dynamics and path constraints - try to disable some unused (?) parts such as hprod ? (according to show_time info the impact may be small) -- investigate enzyme / zygote +- investigate enzyme -## Errors for Enzyme and Zygote: +## Errors for Enzyme: - enzyme gives correct nonzero counts for Jacobian and Hessian, but fails with ``` ERROR: Constant memory is stored (or returned) to a differentiable variable. @@ -100,4 +108,3 @@ To work around this issue, either: b) set the Enzyme mode to turn on runtime activity (e.g. autodiff(set_runtime_activity(Reverse), ...) ). This will maintain correctness, but may slightly reduce performance.``` Error apparently occurs when calling the boundary conditions. ``` -- zygote gives incorrect (huge) nonzero counts then also fails with an error message. diff --git a/test/suite/test_nlp.jl b/test/suite/test_nlp.jl index ddead770..26ad253e 100644 --- a/test/suite/test_nlp.jl +++ b/test/suite/test_nlp.jl @@ -19,7 +19,8 @@ end @test sol.objective ≈ prob.obj rtol = 1e-2 sol = direct_solve(prob.ocp, display = false, adnlp_backend = :manual) @test sol.objective ≈ prob.obj rtol = 1e-2 - # +++ midpoint / manual + sol = direct_solve(prob.ocp, display = false, disc_method=:midpoint, adnlp_backend = :manual) + @test sol.objective ≈ prob.obj rtol = 1e-2 sol = direct_solve(prob.ocp, display = false, disc_method=:gauss_legendre_2, adnlp_backend = :manual) @test sol.objective ≈ prob.obj rtol = 1e-2 end From 562e679b5c5da8000c2f61fa595bb52b194df69d Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Thu, 6 Feb 2025 14:26:38 +0100 Subject: [PATCH 34/44] cleaned jacobian functions --- src/disc/irk.jl | 20 ++++++++++++-------- src/disc/midpoint.jl | 13 ++++++++----- src/disc/trapeze.jl | 19 +++++++++---------- 3 files changed, 29 insertions(+), 23 deletions(-) diff --git a/src/disc/irk.jl b/src/disc/irk.jl index 19a78a64..c96eed32 100644 --- a/src/disc/irk.jl +++ b/src/disc/irk.jl @@ -369,10 +369,12 @@ function DOCP_Jacobian_pattern(docp::DOCP{ <: GenericIRK}) # 1. main loop over steps for i = 1:docp.dim_NLP_steps + # constraints block and offset: state equation, stage equations, path constraints c_block = docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block c_offset = (i-1)*c_block - # variables block and offset: x_i (l_i) u_i k_i x_i+1 (l_i+1) + # contiguous variables blocks will be used when possible + # x_i (l_i) u_i k_i x_i+1 (l_i+1) var_offset = (i-1)*docp.discretization._step_variables_block xi_start = var_offset + 1 xi_end = var_offset + docp.dim_OCP_x @@ -384,13 +386,14 @@ function DOCP_Jacobian_pattern(docp::DOCP{ <: GenericIRK}) li = var_offset + docp.dim_NLP_x lip1 = var_offset + docp.discretization._step_variables_block + docp.dim_NLP_x - # 1.1 state eq 0 = x_i+1 - (x_i + h_i sum bj k_ij) + # 1.1 state eq 0 = x_i+1 - (x_i + h_i sum_j b_j k_ij) # depends on x_i, k_ij, x_i+1, and v for h_i in variable times case ! + # skip l_i, u_i; should skip k_i[n+1] also but annoying... add_nonzero_block!(Is, Js, c_offset+1, c_offset+docp.dim_OCP_x, xi_start, xi_end) - # skip l_i, u_i (should skip k_i[n+1] also but annoying...) add_nonzero_block!(Is, Js, c_offset+1, c_offset+docp.dim_OCP_x, ki_start, xip1_end) add_nonzero_block!(Is, Js, c_offset+1, c_offset+docp.dim_OCP_x, v_start, v_end) - # 1.2 lagrange part l_i+1 = l_i + h_i (sum bj k_ij)[n+1] + # 1.2 lagrange part l_i+1 = l_i + h_i (sum_j b_j k_ij)[n+1] + # depends on l_i, k_ij[n+1], l_i+1, and v for h_i in variable times case ! if docp.is_lagrange add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x, li) add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x, lip1) @@ -401,14 +404,15 @@ function DOCP_Jacobian_pattern(docp::DOCP{ <: GenericIRK}) add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x, c_offset+docp.dim_NLP_x, v_start, v_end) end - # 1.3 stage equations k_ij = f(t_ij, x_ij, u_ij, v) [and lagrange cost] - # with x_ij depending on x_i and all k_ij and u_ij == u_i - # ie this part depends on x_i, u_i, k_i (skip l_i) and v + # 1.3 stage equations k_ij = f(t_ij, x_ij, u_ij, v) (with lagrange part) + # with x_ij = x_i + sum_l a_il k_jl and assuming u_ij = u_i + # depends on x_i, u_i, k_ij, and v; skip l_i (could skip k_ij[n+1] too...) add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x+1, c_offset+(s+1)*docp.dim_NLP_x, xi_start, xi_end) add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x+1, c_offset+(s+1)*docp.dim_NLP_x, ui_start, ki_end) add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x+1, c_offset+(s+1)*docp.dim_NLP_x, v_start, v_end) - # 1.4 path constraint g(t_i, x_i, u_i, v) (skip l_i) + # 1.4 path constraint g(t_i, x_i, u_i, v) + # depends on x_i, u_i, v; skip l_i add_nonzero_block!(Is, Js, c_offset+(s+1)*docp.dim_NLP_x+1, c_offset+c_block, xi_start, xi_end) add_nonzero_block!(Is, Js, c_offset+(s+1)*docp.dim_NLP_x+1, c_offset+c_block, ui_start, ui_end) add_nonzero_block!(Is, Js, c_offset+(s+1)*docp.dim_NLP_x+1, c_offset+c_block, v_start, v_end) diff --git a/src/disc/midpoint.jl b/src/disc/midpoint.jl index bc3928ba..9f1f3ef2 100644 --- a/src/disc/midpoint.jl +++ b/src/disc/midpoint.jl @@ -204,28 +204,30 @@ function DOCP_Jacobian_pattern(docp::DOCP{Midpoint}) # 1. main loop over steps for i = 1:docp.dim_NLP_steps + # constraints block and offset: state equation, stage equation, path constraints c_block = docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block c_offset = (i-1)*c_block - # variables block and offset: x_i (l_i) u_i k_i x_i+1 (l_i+1) + # contiguous variables blocks will be used when possible + # x_i (l_i) u_i k_i x_i+1 (l_i+1) var_offset = (i-1)*docp.discretization._step_variables_block xi_start = var_offset + 1 xi_end = var_offset + docp.dim_OCP_x ui_start = var_offset + docp.dim_NLP_x + 1 ui_end = var_offset + docp.dim_NLP_x + docp.dim_NLP_u ki_start = var_offset + docp.dim_NLP_x + docp.dim_NLP_u + 1 - #ki_end = var_offset + docp.discretization._step_variables_block xip1_end = var_offset + docp.discretization._step_variables_block + docp.dim_OCP_x li = var_offset + docp.dim_NLP_x lip1 = var_offset + docp.discretization._step_variables_block + docp.dim_NLP_x # 1.1 state eq 0 = x_i+1 - (x_i + h_i * k_i) # depends on x_i, k_i, x_i+1, and v for h_i in variable times case ! + # skip l_i, u_i; should skip k_i[n+1] also but annoying... add_nonzero_block!(Is, Js, c_offset+1, c_offset+docp.dim_OCP_x, xi_start, xi_end) - # skip l_i, u_i (should skip k_i[n+1] also but annoying...) add_nonzero_block!(Is, Js, c_offset+1, c_offset+docp.dim_OCP_x, ki_start, xip1_end) add_nonzero_block!(Is, Js, c_offset+1, c_offset+docp.dim_OCP_x, v_start, v_end) # 1.2 lagrange part 0 = l_i+1 - (l_i + h_i * k_i[n+1]) + # depends on l_i, k_i[n+1], l_i+1, and v for h_i in variable times case ! if docp.is_lagrange add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x, li) add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x, lip1) @@ -236,12 +238,13 @@ function DOCP_Jacobian_pattern(docp::DOCP{Midpoint}) # 1.3 stage equation 0 = k_i - f(t_s, x_s, u_i, v) # with t_s = (t_i + t_i+1)/2 x_s = (x_i + x_i+1)/2 - # skip l_i + # depends on x_i, u_i, x_i+1, k_i, and v; skip l_i add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x+1, c_offset+2*docp.dim_NLP_x, xi_start, xi_end) add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x+1, c_offset+2*docp.dim_NLP_x, ui_start, xip1_end) add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x+1, c_offset+2*docp.dim_NLP_x, v_start, v_end) - # 1.4 path constraint g(t_i, x_i, u_i, v) (skip l_i) + # 1.4 path constraint g(t_i, x_i, u_i, v) + # depends on x_i, u_i, v; skip l_i add_nonzero_block!(Is, Js, c_offset+2*docp.dim_NLP_x+1, c_offset+c_block, xi_start, xi_end) add_nonzero_block!(Is, Js, c_offset+2*docp.dim_NLP_x+1, c_offset+c_block, ui_start, ui_end) add_nonzero_block!(Is, Js, c_offset+2*docp.dim_NLP_x+1, c_offset+c_block, v_start, v_end) diff --git a/src/disc/trapeze.jl b/src/disc/trapeze.jl index 3c00f961..7cbea6d9 100644 --- a/src/disc/trapeze.jl +++ b/src/disc/trapeze.jl @@ -183,8 +183,6 @@ Build sparsity pattern for Jacobian of constraints function DOCP_Jacobian_pattern(docp::DOCP{Trapeze}) # vector format for sparse matrix - # +++ better to compute nnzj beforehand and allocate the 3 vectors (no push then) ? - # pass to addnnz the current offset too Is = Vector{Int}(undef, 0) Js = Vector{Int}(undef, 0) @@ -199,7 +197,8 @@ function DOCP_Jacobian_pattern(docp::DOCP{Trapeze}) c_block = docp.discretization._state_stage_eqs_block + docp.discretization._step_pathcons_block c_offset = (i-1)*c_block - # variables block and offset: x_i (l_i) u_i x_i+1 (l_i+1) u_i+1 + # contiguous variables blocks will be used when possible + # x_i (l_i) u_i x_i+1 (l_i+1) u_i+1 var_block = docp.discretization._step_variables_block * 2 var_offset = (i-1)*docp.discretization._step_variables_block xi_start = var_offset + 1 @@ -210,23 +209,23 @@ function DOCP_Jacobian_pattern(docp::DOCP{Trapeze}) uip1_start = var_offset + docp.dim_NLP_x*2 + docp.dim_NLP_u + 1 uip1_end = var_offset + docp.dim_NLP_x*2 + docp.dim_NLP_u*2 - # 1.1 state eq 0 = x_i+1 - (x_i + h_i/2 (fi + fip1)) - # depends on x_i, u_i, x_i+1, u_i+1 (skip l_i, l_i+1) - # and v for h_i and fi fip1 + # 1.1 state eq 0 = x_i+1 - (x_i + h_i/2 (f(t_i,x_i,u_i,v) + f(t_i+1,x_i+1,u_i+1,v))) + # depends on x_i, u_i, x_i+1, u_i+1; skip l_i, l_i+1; v cf 1.4 add_nonzero_block!(Is, Js, c_offset+1, c_offset+docp.dim_OCP_x, xi_start, xi_end) add_nonzero_block!(Is, Js, c_offset+1, c_offset+docp.dim_OCP_x, ui_start, xip1_end) add_nonzero_block!(Is, Js, c_offset+1, c_offset+docp.dim_OCP_x, uip1_start, uip1_end) - # 1.2 lagrange part 0 = lip1 - (li + hi/2 (lcosti + lcostip1)) - # wrt x_i, l_i, u_i, x_i+1, l_i+1, u_i+1 + # 1.2 lagrange part 0 = l_i+1 - (l_i + h_i/2 (l(t_i,x_i,u_i,v) + l(t_i+1,x_i+1,u_i+1,v))) + # depends on x_i, l_i, u_i, x_i+1, l_i+1, u_i+1 ie whole variable block; v cf 1.4 if docp.is_lagrange add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x, c_offset+docp.dim_NLP_x, var_offset+1, var_offset+var_block) end - # 1.3 path constraint wrt x_i, u_i (skip l_i) + # 1.3 path constraint g(t_i, x_i, u_i, v) + # depends on x_i, u_i; skip l_i; v cf 1.4 add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x+1, c_offset+c_block, xi_start, xi_end) add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x+1, c_offset+c_block, ui_start, ui_end) - # 1.6 whole block wrt v (+++ resplit for clarity) + # 1.4 whole constraint block depends on v add_nonzero_block!(Is, Js, c_offset+1, c_offset+c_block, v_start, v_end) end From ae477cccf0321d709e74d120390107d35a3a774a Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Thu, 6 Feb 2025 15:17:10 +0100 Subject: [PATCH 35/44] code and markdown cleanup --- src/disc/irk.jl | 15 ++++++++------- src/disc/midpoint.jl | 27 +++++++++------------------ src/disc/trapeze.jl | 25 +++++++++++-------------- test/docs/AD_backend.md | 38 +++++++++++++++++++------------------- test/docs/AD_backend.png | Bin 39014 -> 36544 bytes 5 files changed, 47 insertions(+), 58 deletions(-) diff --git a/src/disc/irk.jl b/src/disc/irk.jl index c96eed32..981aa914 100644 --- a/src/disc/irk.jl +++ b/src/disc/irk.jl @@ -406,7 +406,7 @@ function DOCP_Jacobian_pattern(docp::DOCP{ <: GenericIRK}) # 1.3 stage equations k_ij = f(t_ij, x_ij, u_ij, v) (with lagrange part) # with x_ij = x_i + sum_l a_il k_jl and assuming u_ij = u_i - # depends on x_i, u_i, k_ij, and v; skip l_i (could skip k_ij[n+1] too...) + # depends on x_i, u_i, k_i, and v; skip l_i (could skip k_ij[n+1] too...) add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x+1, c_offset+(s+1)*docp.dim_NLP_x, xi_start, xi_end) add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x+1, c_offset+(s+1)*docp.dim_NLP_x, ui_start, ki_end) add_nonzero_block!(Is, Js, c_offset+docp.dim_NLP_x+1, c_offset+(s+1)*docp.dim_NLP_x, v_start, v_end) @@ -483,7 +483,8 @@ function DOCP_Hessian_pattern(docp::DOCP{ <: GenericIRK}) for i = 1:docp.dim_NLP_steps - # variables block and offset: x_i (l_i) u_i k_i x_i+1 (l_i+1) + # contiguous variables blocks will be used when possible + # x_i (l_i) u_i k_i x_i+1 (l_i+1) var_offset = (i-1)*docp.discretization._step_variables_block xi_start = var_offset + 1 xi_end = var_offset + docp.dim_OCP_x @@ -492,13 +493,14 @@ function DOCP_Hessian_pattern(docp::DOCP{ <: GenericIRK}) ki_start = var_offset + docp.dim_NLP_x + docp.dim_NLP_u + 1 ki_end = var_offset + (s+1)*docp.dim_NLP_x + docp.dim_NLP_u - # 1.1 state eq 0 = x_i+1 - (x_i + h sum bj k_ij) + # 1.1 state eq 0 = x_i+1 - (x_i + h_i sum_j b_j k_ij) # -> 2nd order terms are zero - # 1.2 lagrange part l_i+1 = l_i + h (sum bj k_ij)[n+1] + # 1.2 lagrange part 0 = l_i+1 - (l_i + h_i (sum_j b_j k_ij[n+1])) # -> 2nd order terms are zero - # 1.3 stage equations 0 = k_ij - f(t_ij, x_ij(x_i, k_i), u_ij, v) - # wrt x_i, u_i, k_i (skip l_i) + # 1.3 stage equations 0 = k_ij - f(t_ij, x_ij, u_ij, v) (with lagrange part) + # with x_ij = x_i + sum_l a_il k_jl and assuming u_ij = u_i + # depends on x_i, u_i, k_i, and v; skip l_i (could skip k_ij[n+1] too...) add_nonzero_block!(Is, Js, xi_start, xi_end, xi_start, xi_end) add_nonzero_block!(Is, Js, ui_start, ki_end, ui_start, ki_end) add_nonzero_block!(Is, Js, xi_start, xi_end, ui_start, ki_end; sym=true) @@ -513,7 +515,6 @@ function DOCP_Hessian_pattern(docp::DOCP{ <: GenericIRK}) var_offset = docp.dim_NLP_steps*docp.discretization._step_variables_block xf_start = var_offset + 1 xf_end = var_offset + docp.dim_OCP_x - # NB U_N may be removed at some point if we use only piecewise constant control uf_start = var_offset + docp.dim_NLP_x + 1 uf_end = var_offset + docp.dim_NLP_x + docp.dim_NLP_u add_nonzero_block!(Is, Js, xf_start, xf_end, xf_start, xf_end) diff --git a/src/disc/midpoint.jl b/src/disc/midpoint.jl index 9f1f3ef2..c2de23e1 100644 --- a/src/disc/midpoint.jl +++ b/src/disc/midpoint.jl @@ -309,15 +309,13 @@ function DOCP_Hessian_pattern(docp::DOCP{Midpoint}) for i = 1:docp.dim_NLP_steps - # variables block and offset: x_i (l_i) u_i k_i x_i+1 (l_i+1) + # contiguous variables blocks will be used when possible + # x_i (l_i) u_i k_i x_i+1 (l_i+1) var_offset = (i-1)*docp.discretization._step_variables_block xi_start = var_offset + 1 xi_end = var_offset + docp.dim_OCP_x xip1_end = var_offset + docp.discretization._step_variables_block + docp.dim_NLP_x ui_start = var_offset + docp.dim_NLP_x + 1 - #ui_end = var_offset + docp.dim_NLP_x + docp.dim_NLP_u - #ki_start = var_offset + docp.dim_NLP_x + docp.dim_NLP_u + 1 - #ki_end = var_offset + 2*docp.dim_NLP_x + docp.dim_NLP_u # 1.1 state eq 0 = x_i+1 - (x_i + h_i * k_i) # -> 2nd order terms are zero @@ -326,7 +324,7 @@ function DOCP_Hessian_pattern(docp::DOCP{Midpoint}) # 1.3 stage equations 0 = k_i - f(t_s, x_s, u_i, v) # with t_s = (t_i + t_i+1)/2 x_s = (x_i + x_i+1)/2 - # skip l_i + # depends on x_i, u_i, k_i, x_i+1, and v; skip l_i add_nonzero_block!(Is, Js, xi_start, xi_end, xi_start, xi_end) add_nonzero_block!(Is, Js, ui_start, xip1_end, ui_start, xip1_end) add_nonzero_block!(Is, Js, xi_start, xi_end, ui_start, xip1_end; sym=true) @@ -337,24 +335,17 @@ function DOCP_Hessian_pattern(docp::DOCP{Midpoint}) # -> included in 1.3 end - # 2. final path constraints (xf, uf, v) (assume present) - var_offset = docp.dim_NLP_steps*docp.discretization._step_variables_block - xf_start = var_offset + 1 - xf_end = var_offset + docp.dim_OCP_x - # NB U_N may be removed at some point if we use only piecewise constant control - uf_start = var_offset-docp.discretization._step_variables_block + docp.dim_NLP_x + 1 - uf_end = var_offset-docp.discretization._step_variables_block + docp.dim_NLP_x + docp.dim_NLP_u - add_nonzero_block!(Is, Js, xf_start, xf_end, xf_start, xf_end) - add_nonzero_block!(Is, Js, uf_start, uf_end, uf_start, uf_end) - add_nonzero_block!(Is, Js, xf_start, xf_end, uf_start, uf_end; sym=true) - add_nonzero_block!(Is, Js, xf_start, xf_end, v_start, v_end; sym=true) - add_nonzero_block!(Is, Js, uf_start, uf_end, v_start, v_end; sym=true) + # 2. final path constraints (xf, uf, v) + # -> included in last loop iteration (with x_i+1 as x_j and u_i as u_f) # 3. boundary constraints (x0, xf, v) or mayer cost g0(x0, xf, v) (assume present) # -> x0 / x0, x0 / v terms included in first loop iteration - # -> xf / xf, xf / v terms included in 2. + # -> xf / xf, xf / v terms included in last loop iteration (with x_i+1 as x_f) x0_start = 1 x0_end = docp.dim_OCP_x + var_offset = docp.dim_NLP_steps*docp.discretization._step_variables_block + xf_start = var_offset + 1 + xf_end = var_offset + docp.dim_OCP_x add_nonzero_block!(Is, Js, x0_start, x0_end, xf_start, xf_end; sym=true) # 3.1 null initial condition for lagrangian cost state l0 diff --git a/src/disc/trapeze.jl b/src/disc/trapeze.jl index 7cbea6d9..84055748 100644 --- a/src/disc/trapeze.jl +++ b/src/disc/trapeze.jl @@ -288,30 +288,29 @@ function DOCP_Hessian_pattern(docp::DOCP{Trapeze}) for i = 1:docp.dim_NLP_steps - # variables block and offset: x_i (l_i) u_i x_i+1 (l_i+1) u_i+1 + # contiguous variables blocks will be used when possible + # x_i (l_i) u_i x_i+1 (l_i+1) u_i+1 var_block = docp.discretization._step_variables_block * 2 var_offset = (i-1)*docp.discretization._step_variables_block - # 1.1 state eq 0 = xip1 - (xi + hi/2 (fi + fip1)) - # wrt x_i, u_i, x_i+1, u_i+1 (skip l_i, l_i+1) - # -> included in 1.2 - # 1.2 lagrange part 0 = lip1 - (li + hi/2 (lcosti + lcostip1)) - # wrt x_i, l_i, u_i, x_i+1, l_i+1, u_i+1 - # -> single block for all step variables + # 1.1 state eq 0 = x_i+1 - (x_i + h_i/2 (f(t_i,x_i,u_i,v) + f(t_i+1,x_i+1,u_i+1,v))) + # depends on x_i, u_i, x_i+1, u_i+1, and v -> included in 1.2 + # 1.2 lagrange part 0 = l_i+1 - (l_i + h_i/2 (l(t_i,x_i,u_i,v) + l(t_i+1,x_i+1,u_i+1,v))) + # depends on x_i, l_i, u_i, x_i+1, l_i+1, u_i+1, and v + # -> use single block for all step variables add_nonzero_block!(Is, Js, var_offset+1, var_offset+var_block, var_offset+1, var_offset+var_block) + add_nonzero_block!(Is, Js, var_offset+1, var_offset+var_block, v_start, v_end; sym=true) - # 1.3 path constraint wrt x_i, u_i + # 1.3 path constraint g(t_i, x_i, u_i, v) # -> included in 1.2 - - # 1.4 whole block wrt v (NB. term v / v added before the loop) - add_nonzero_block!(Is, Js, v_start, v_end, var_offset+1, var_offset+var_block; sym=true) end # 2. final path constraints (xf, uf, v) # -> included in last loop iteration # 3. boundary constraints (x0, xf, v) - # -> x0 / x0, x0 / v, xf / xf, xf / v terms included in first/last loop iterations + # -> (x0, v) terms included in first loop iteration + # -> (xf, v) terms included in last loop iteration if docp.is_mayer || docp.dim_boundary_cons > 0 var_offset = docp.dim_NLP_steps*docp.discretization._step_variables_block x0_start = 1 @@ -319,8 +318,6 @@ function DOCP_Hessian_pattern(docp::DOCP{Trapeze}) xf_start = var_offset + 1 xf_end = var_offset + docp.dim_OCP_x add_nonzero_block!(Is, Js, x0_start, x0_end, xf_start, xf_end; sym=true) - add_nonzero_block!(Is, Js, v_start, v_end, x0_start, x0_end) - add_nonzero_block!(Is, Js, v_start, v_end, xf_start, xf_end) end # 3.1 null initial condition for lagrangian cost state l0 # -> 2nd order term is zero diff --git a/test/docs/AD_backend.md b/test/docs/AD_backend.md index 6f266750..439893cc 100644 --- a/test/docs/AD_backend.md +++ b/test/docs/AD_backend.md @@ -35,6 +35,25 @@ Standard benchmark for Trapeze: * (older version) build sparse matrices from dense boolean matrices ** build sparse matrices from (i,j,v) vectors +Standard benchmark for Midpoint: +| Midpoint| optimized | manual | +|---------|-----------|--------| +| 250 | 1.5 | 2.2 | +| 500 | 3.9 | 4.7 | +| 1000 | 11.1 | 11.2 | +| 2500 | 50.5 | 32.7 | +| 5000 | 160.3 | 87.0 | +| 7500 | 333.2 | 140.9 | + +Standard benchmark for Gauss Legendre 2: +| GL2 | optimized | manual | +|---------|-----------|--------| +| 250 | 3.9 | 5.0 | +| 500 | 10.5 | 12.9 | +| 1000 | 121.2 | 26.1 | +| 2500 | 136.6 | 77.2 | +| 5000 | 551.9 | 172.2 | + Sparsity details: goddard_all Trapeze (1000 and 10000 steps) | transcription | optimized | manual*/** | optimized | manual*/** | |---------------|-----------|------------|-----------|--------| @@ -67,24 +86,6 @@ ghjvprod backend ADNLPModels.ForwardDiffADGHjvprod: 4.339e-6 seconds. *** building the hessian is one third of the total solve time... -Standard benchmark for Midpoint: -| Midpoint| optimized | manual | -|---------|-----------|--------| -| 250 | 1.5 | 2.2 | -| 500 | 3.9 | 4.7 | -| 1000 | 11.1 | 11.2 | -| 2500 | 50.5 | 32.7 | -| 5000 | 160.3 | 87.0 | -| 7500 | 333.2 | 140.9 | - -Standard benchmark for Gauss Legendre 2: -| GL2 | optimized | manual | -|---------|-----------|--------| -| 250 | 3.9 | 5.0 | -| 500 | 10.5 | 12.9 | -| 1000 | 121.2 | 26.1 | -| 2500 | 136.6 | 77.2 | -| 5000 | 551.9 | 172.2 | ## Remarks: - it is better to build the sparse matrices from the index vectors format rather than a dense boolean matrix. For larger problems it may not be possible to even allocate the boolean matrix (eg. algal bacterial with GL2 at 5000 steps). @@ -94,7 +95,6 @@ Standard benchmark for Gauss Legendre 2: - reuse ADNLPModels functions to get block sparsity patterns then rebuild full patterns ? eg for dynamics and path constraints - try to disable some unused (?) parts such as hprod ? (according to show_time info the impact may be small) -- investigate enzyme ## Errors for Enzyme: - enzyme gives correct nonzero counts for Jacobian and Hessian, but fails with diff --git a/test/docs/AD_backend.png b/test/docs/AD_backend.png index 422791d94e3ec13b56cd17a679a4f5ecc648903c..a73e2d70c0b2d030b3c3b319f7c37bb748eb45a3 100644 GIT binary patch literal 36544 zcmbTeg|b*X{AHDk(3T41OY{q?oJ62kyg5;Lj|N;L_npxJEa>Weq*2e ze*b}YpZh%LT-bZBwdNf0i7^S&R9D2up~OKT5ctYUa@q(4sy+gN5{-olKe^Nk>xF+| zJX2AWLtG>O$!yGzLm=)Tl;vb}JyJKP-StVwM=^Kfh>DajW#mxqy}K`;Gxitf9SwOq z*XNMR(#AmRTkPT4YdCdxkKes}Yg`r{@R0N)7wUk@o#o5LrXFsUCqpg4OFrqo$JlM= z`^;FONwp_N7#Y2o!9=o3+Pv!o0YtJ`{n)tlvRL$CZ$gOZgQMlzWZ%Lsv(4z`u;`ik zzY;M76UDyL$0C-+Qohd-j71;JL`g0SpC`fp-^Uc3>EFM7i$zF?8L+Wos@rv4g%5T+(@`*f37O^f3dS8x=al;8dmHsb5J4f6dRi}Q5M41sO) zbMUY1x;o11yTr(M?hf4j|9f8y?}$m;-iBp~q0251!hJG?bv*a<;^X6A?|B`4`n;cJ zPO-uJq^F}Jbo=H)Snuk=nGZ`v?p28St5-Fye;?QBV6`Rj-{I_bcFGpRlwJ5DPyYS% z^z_@eZ|&i?25D}D#ZOHcnV6V3-VHY5DlzGuEaWh^Z%Nmww4x-+? z)fSBHutSS{3Ske0yVURAy*vN!_2R{g*4EY~$4~;AwH|cDSG(yt%fY-$)r!Qasj2z4 zP#ro^Vd3|;d9!1BX(WAWf`Zxzn_cAPr|3gF{^0=j!4N;jsMc=9`YEtKNPgK-UL!-B??ZMNhlB}$& z1Gy?g>_tUIvGJ&dg@w1cpK{#0N5N;V6h$jxb>rXp(ck6Y3740bU7?R2Jwn`JWPBjm z;B&VBJAr$$(pt#ud+_5@LzEU+rEis$vkji4w{DHXZM6nrqP0N0h)}DWn%+Uw)Yd91 zE6415dU_HQ6GuiyUSAwUH8fn4|Z(bm%9eevr91P@BUw{Q2u+ff5l61ZRw zHCt0tQx6XhJ;Z0u4ptWz7DQdv=H}<=+1XW^j-6L~8vQQo^u9a|H2J;M6?d{VIhwCQ zm*w+YPQ{G=;^>yRmz+C6FcWb)v0zr3m^%TTkK}b<)`Q{UVFd++)!qy` zeha+u)v>bYpA@5-T>r+Wr_W@|hr`{-S6!d~{Q}!MFgWOaylIdj=?mE$+IH`BWp}>q z-|-e2Vt+g~mc-T7m5?xs|7FimClLh&L|oyi?VW}1A-EC}5?za)2P>r}%}utGRj@Qw z6%~xEPeTdmXnCK>>FVm%yKPx?CzBBoY5x0L=Xqe^;J|~($lOg_(^Eu@M#OZs2^Sv^pUq(;U*ynnXY-@h+Y zMx2%q5zlSX^x}6s0gY&IaB!-mZ?f0))#bm?&`{@Eqj?{1xp!Z<-_zo=u(+&BD>3X@ zE-x-FZf$k19;iHgNO@gS@v=9PM(nY}?@+>v)}XT>jN!f6k<#l)PKNAlUkv<(46&2u z>Eo?p_v*2+>rOX_Ot^Ynhws1R%f%=utDUxa2Phn{cxOIWhwFoBsi~dEuwD1q+4c1F zX6sywb@J6=J#BVIzsy3o6z0Q?SzBAPva%kWUBG?PNqBv*@!y?$9TFlRF=|-vR+^vB zdNe;>?`}WS(6xG?qocFDyj)RH(dG8!$z)55v}DH4_O^OsH6fkk(9qD*p2zz7It~ub zkc`{*l!)twE=gkhvzXZUv^0DKM52#mM#o~M?WCZnXmanWx0jdQY*U(-ou_9lr05%b zs!s10wzl2NPF?w4himZun2Zc=Zf=bkYRQaQ2>Flfda)hiv>Jwn=TK3O8qc7%4~>p~ z*{R587fSf@x#xN}>!*caO1DLYrE;*CXdUSX1RojhzKuljJO|!=TM-(hetrbxWWP-`x=UcNs}6_2E0^Nb+xqz z1_l^*@DNxtCgwc1OARa8?%lJzqY?h?1+Ho$0s{l0t^i%X{;|cpEYUUskM~~izyOcK zoc~T<9!A9a^Qo+$zL^(JgSZGjK0b0z!<$r8#rgS|h`PGEA?Buc?{M}5+U9lj^w{s- z?fm`QeP<@^gOKdgr?iOjii%Ktsw(SIt|D>xjs0}p@W4QM69Q5n8ymZ_8a|-;1s4Ia z!^XxYun*0cHYz_eQ`5-EXm&3*Cuid#y47^o`qHfYS^K*`fBry|XY=Vzmms7SzkB+E zg_$`f{G0A=W@hI78n~C58hsU&;wBzW?Z@OdFx#q|Wr<|*v2cSrmE!wuCANA8cQZlM zsH;b&zLuAhGk@_yE2GY1kD*W^Un4_MP|$8Lue;?n+NbR7{r!DiD6@|q!HpFc7qhdm zDMV1he+&3scx==@2qnPB#)kGq?;Sho6lh~>3-z#o5I)!0qN1Ym`}_CrP*zZ-&CKRu z8~*uP+j(AHcnYTABeJrxMn*;+K70sI`|;yPXjf7P1Q(Ylvk6cg;7*P@TIX7W>}Q*d zb#>*w|NiyMsn&>;loTV#@!zp6$>v0*^$;<{or{ahM3wD7eqK5CM`WARxT2!}?cdrU z=I5uSr^oDauYN4O*E}@P*H27If!$~{u6MH$oRppu)raOvLrv}C>T1&9A)`?5vOVQ_ zvOVp4eppsjwX)~2x3`zTW$bmZ(*1Yz3v{gMMrgJrpi*>Gl$#30S-Pm|&oINx+SW#U) z+TZVav@r}N21Qy+%W-3<@aNB;kWvy*)T0{F*!4aS4GcIrJ2TSR%`}XUjJUy_5D*Z+ z{VzKRL&ON#O|4DU9=t}w^t(LL^Y;FCc<9m^H@&;-z-#2KVsdSgutO|+q@#_>Qt{c_ zR4PC6uU@z1<@u41;&Gczr1?d;6VmS<%ae-is^omT-$H2Po9{P|Pj`gdd~>(}q!q$DKz9Pwpk zWep8Jj~;!T6G{5;;ihXh4|E%TejPo%1S9v-`Ch5(rGvAXnHhwpq2aI9gF?M~_wFgH zs@jZxrg5!sfKCCa?&j?LZ+AW@DJf}AD5TpWAt8bN-aQCFN8wZeK+d&O0|VL+F69qo zWL`tQ1+^jq78agC=382>L*IjML`M(#{#~}!-v-doQW{kD@TjPSj~@kid9&2XadC0s zULk=xB_XzrxJJK!U-PMEFZXo!*{l^xn#U_1|k~>@Hvj9I{$7Yy?z5U9uYpc-#%% zBXn(hR9C|CH-Kk`dor`LWh2JvpFMx>|8J`rqsa8PjsZ3v2G%7Tq3pBOe#ROH6t~S+ zMT^hrbCTlXbabMwUJVzK31nSKMcn57F3l9m4d9{Ddi1kPK2cE;f0n(;sbDE7y>5Os zL*jjG%F%&6Of&kNafsEm$X)J0Mn_FjuYcqW45rt9NaVM%(i1Y@CX?CqY1fSYW+ape z40QD2F=!yo-Y0g3`&j3IKHgLCL%UMjo(T&?LBrs9Qf0H&mjy7CPSWS}pjT>LeQ0x} zH0Z_ex!GI2#Igdx!7YDBcNgRbn{dcw=llBl?l`YLfBqc!2mlj7q9ikdzayn7ywBgP z!CDuTlrRn6?TDhYfA);eecQZpYC2>Rr~ImO5hq_?W@ZZ}T7!zkdDV zGO8gWC1nKUyRaZD>Xi^5Pb=(T3fb`b^=oQ^mlNMCb2_@Aeoj`~Ybc;WFA#^~yYM3n zK}1Zvx7Pm&5<*N&Oiyo=@<{*jW49k~iDhJDpn%-Ga+fzRHO4J|{5lX7C<2MGIIU{( zjoiSk00aA`mJ(3sd{fn=`fh5w%8Gs0HLa_y)jw6^CyWPKodSbKGbFC@zw|> zC8f8KkpMQJPtXDyPFi}ZtzGW4{16(sghV6ogQtyNG0HU1Ni@>M=@r$Ll!P7T5S~7; zGJq}hpF9B~RjOZ>eOy2E#YC#r;9!B94QHmIm6s1oz-Evo`eR54ZpvaMrMJRuQqs5& zALM+6E&kl*%YDY{rmR-qoEo)F8&l~EBCo~kH-x4{$%-$hYuYV zF?>?!#-g=8DcDZc#E8$7zraQ4K7LFs-0OG69^Onp`$aH#Q!q(fae!P zK}1AEAt9@J>fJG4i}~*%+uPd+B69M)tgJG-X;H-6u&`RUt>nteN_byH1U}*>Ir-y9 zkI*qOz6;il@qr zv*bx=1q+eY?F_=!);{rS7s@AC*JrC4a1%P5IS>iTIP+7r&VVvEH#X{=mVZIJhY~=8 zkn}wtf@FgidH66CJSZ5HzV;OxJ40D$H*O3~yVT$7{3Td~gBma=*AfsA0QL8q)$l_F zg}j^`CPv1fo3b0m0zyKo4#KdAz!nV*42Bj0fyQ)obuH}`>wU>nOL05ec(gwSiE2Ob zEsSI4$Gk(q^j&BTOOzb*YioB;4V;~w_oV@c>^l1SNg{x%dP17R7eMTUgoF&y-3Kg~ z)>KMY{OOZUeXU?f$0Dae`4T*jm6a791|v6jRJXz`Ptdo&COB*8qI>7oTX6HhEEKv#=S6t=uZ>H^_%?~ zYim6{JcJR{B2K|ps6FOqN6!Y*l+)|CIZY- zQW*`bBw1=y9>?#uiLkQ#&mz5pfhfZT-@SdC={GxF>)i3{*F!}`OavZ2{?XBqGU;k( zXD3v%7+;_#fKt@VI_x~52?D58cK-;h-2dt#>3j%KL+8Ezy~U31KY!qnF$ifdfFJ+) zqw1s>-Ttg>d=q;2%a^OnD)H~BMPj|Sp`8Q;27;>4nT~{;Pe*%txC8~`&v&7Q$Hc@$ z{JA*#tH+lZ*KHBUo{HZ$I_iJCNr7;vH7c52hmd%rtJ`_+j(}anwr{_9Qewonn;AGwJ@z+PXhE|vWx|L!{b zdDKam8WS_L`Rd#ncs5{{K&jjzshh1^;4XK9CDQ5y8X9$Hv8#r{E+iDoQKnwh65Pp!HC$u8>eQ@AIENr@JV~ zcr)O5yxNmiSeCbIyDUXU_A&exH~Gz*piNW*a&`;SBo9COx6=%8BhhGNrs5^xQDJcK zjiRG9=tBhwPb;m+Jo}&!0TcJRIPC{+4g`pUbSzJes{FQuh*Zr%x52hwQ(ZYVu9F&{2od0-6NuoJPa)HG@ZlUJj4ltcB0{ zj}*zqJq>!3x3By}V&9zMPs{wEh{EK5Sx(Nil$64c*x)o)+lx5e_6+7AFZRE_T5@j$ z1!`{R$L6MuQZ+Mr- zG_%O>{f-f*CQyW1``~}@_BH2|fZri6e&Wm&-p~Ir~>cB zDG&fm!aOngK`~DfbMuz*BoJl83iQ61_R^lN(q%43ML-H0K0&^U3kzf7KLR^KL)hBd za&v#7fA9u`%>^jqP<&W}WZ)J?0g@=kv6ShT05sD%DABUb9&w?j(S;k;u-@F3=iX%KvJryF22WS6En>rOBwf2OzmXp)E?wae6r55zCs)@a zr1-1*jhq>(4y*@|q#=r#Cf|R+EtRjZ0Hr@z)Vpz^k)EE8gI`r>)Z}wkWjnc%npzF~ z4xq{Xi^J=sm6iC6j4vRAAb^KPm}odo)i~;+Us8B^c_9If*I_~anh;y@=QnC8nIpfL zu;>@MscO7$J#Eb9M-!YTr{w3aC@(LEa?H+N42_pc$WGY5o)viK$Mke{+KTZ_fB^uE zP5jP6yta}1q@$Chx&kN#02q{!P6ra&D0kQZsM+6aC*4;5Bte28q7xDnf7UWFF#%>@ z+t_&X=8e^>R~-6ZrPb6p85kGiH|0FkZWQQ(xB?-6b$3h(90#JJh0)wI&98HiNqk! z>xyHg6LZ5usAouSZEmVYP82f7021pjl42V1R&a3Jh){YJi9sogn z^#bJ4qcERT1>E>t^|tRMvl=IGES0$F`rPlLcF$E^!i~>uqR$`AlREz9it}iG4d`uen}NyKLC52s$qOfP1o?t`iUz_E`KG0l>SU?18N8cZhm` z31l~Kf<{*mnTE6?NU5ngn3<1$b2qQ8ugfbcf@Bu@<_)wmD@)4=Dj{H;TzBujnr}`i ztI#ib-Vq5E35*GAD=Pr#kZNj=9#w)zRr#_TShf1s^}oRGK%_!se*5ME;ROmaub`lt ztE=zzRU<&ZT6{|@tA~_(y!z0dZxV{vP&>=VEU=+4EEch%KG z_rBfYGV(ZFi!s%3i!H6In}EfK{tF_W@BUKP!D??@T$};NNP*TTBu|DPLFS*HpG$fj zc7Ff<3^FgUabam`2?RGPIy&xwt?@)vVON|w_k1wr3h19{W8pu-M>9tCyh!%tK#{Jl zxrz!xC%|gk9n1M+<-g4p>YlTsJx-yYo34eNriKfA6AhOIuZz!J(m>h@6}pkmL*a zzGP+XLFQ75f$<0_4*~!_l^{ae$OuU;wzjsqUP1su7=nn@Zc$NaHwO|aw1M16$SG+7 z0W#z3ZKp^ghNsPM&~fJ`?J1MPl~C_=CI7?m`*28jjI7xGfY4M=tQw{JdRQ_M*M)d7YM zBCC01^V}pl0;ICo%FhzA3)$r)EE;d>Mp$z(XBcmm1BMTQ+)d)o@9CWw8w0u|!p4S! zh>eTuDwSklV|!N$H@4p`TB|q0q{DKPD;J852Os`(LSiXkR4Q%l!6WyKgC=(hRU9Iz%CZ- z6$4-e#je?TmEi0vG*uxXJ$d!dG|sD%vKUD z{|l6n5uoPNB2C0`{IdP@jEsvL8-}wlLA?dtR@`Yx>G^Z`$p%E4PR7P~#VYtepr(+J zl7fh?wA}(vubRXM))Xh$j^HzWmQc%8$0g@Tl=8pEKC}40SpZ}~8Z9y4cV1Dos*j1$ z0>Oe#!fU8lj~h_~)&-E=sp)CxT<|>b9{@H}V`6yiriA&nF%Z-=G?;|6&_6Pglf5>E z?m7Lcfhd5MC;#|y{GZqhlqc18oE#iX?srj4;XVLId}p?K)v0zUJ|F-gBqYR@kP;iKZD4S(hCix9MNO@= zpn&L?N(MB9`Ea&4-H~*$9d;Gw0UT8Qw*?(y_3xP*A`KZOf{1 zsGWlBi%&?1fFrze-&$`;c=_p@A$)>4wmv+Z- zjE-xwpN#iE*^Z){uYV`cI8Q;3Zw_Wrf>}0ClB&~|7DRkO1$%dtBgn+3d(XFCEMW8b ze#M}mNSY{2R-df0wYGtk5=bWX1-k$9Jq138qAI_MiGyp1h0h?774fN0q9lz)PjHj( zdgQ*nK9+o7W1}ycwF?$;P^*6bn%GcJPfzx^or$92OCV7$`2~u0To;!@-M)N8B+CCz z+D5;8wZNYiv1*kaqdxVKOzDm<7O?(SQu4n288GSC^pyy4LswUUpw?HW9~I5k!S~c@ zzJUOEz?<~P;?ya*D^HEbkzvTyE+RU3a5$t-lc+Vrm0Ey}I%mCV?(0uhjdV@5Y5d^FP~f2W2W}#34i2k%8hCB{4BC2v>EB7pWy2 z!hS}<3L%V44;3N;!TKlAy2TOvJqCge+O1lOpw^R%%S-T{$f&4r!R*2S=Aro~O{{vp zJq2u8+FQ5uR`UhHSy*2;SjCr}z^CA20w=>WJl%D7HDhiaJ9}yqgI+g$E$vg4KGbQj z?>s<+skWPb7ak5@D_}FmvtJ0hI-tCAR9lcrK_!IzLQk=i@;$%L%pB7G49XE;3(ltv zH6VK+TIL;4(aY{t5ArK57dN#p&?Mt3L)tMwm@}< zw4$bf$e;inQV$P@*6^hR4-+akDS`1ijMyYBCQHOEGlBhDx~v>=q(3575Q zamJf@$gh>~x`|Lv7heZm^)kD+t84GzfH4ZH2i2`xLI5&=m>V~GeYANW8Tquv!OYxz z=|>o8Tx=|$i_YVFI(bG79xVcY@C z9SD&$5oZiUliy`>TABo?3ZTg#{fgP<3~{jDwwzb~lvP(lkNNkOm{D6t2W1Wrm~K=e zTp7qlfVdxIU9;aMU|G;R{#hG`M;)yXhFZzZFl^1>Z)~`PN-G5~tppacI|#e1zKTvv zC1zP>6`^xqc|fC9of24HvOk!lk()FiQ0496p|7uRGuw0tAgHOS$($KS2AVD)bxWYG zFscF1QmDX&$b=Z2Ye8X&8wZsagkfN6Ma9JsQIaqjVr)D;K7Ixb2c$}a29L5ilo#ga z1k}QJn3;#cv4>9oCdIC!qXSoc(gD0Snr~sVe*Yqd^0mz0oL!5mvt{(x57fM6wsWylg z5Var|6oiNo$#N#=v#KVcHhT4^eUbs-6fykXUcIU7i}g`h-ksmErN-ze(s1jot!Cix zLca#WRf%iniF#-rm1L8KU(_fpqQgE?GZZh5Y{l?T1!Wg0sUiUaSz!qH6 zL`O#ljYWXq)L5+=B)Eh9{f&PmrgIbXKz)G&g075@fLJFr!Y^`jwgoNNirK43=NRe* zw{aaPh2S{AbP6=3;tBs6psT++I}567io+cpdEB`!>YoeCl5k1V`uNPXq7jX&zrER@ zc-LHAT^)(%?d#kDUxQw(EaC|*+;ngYWL$`2N(3#4N%8S3b91J4c3j}ooj-eNVFB}3 zk%s$CX3&b^HY%&dgoS~A?s{A|>kor)(Mi+&6NaJg43`1JnVoH(Pm6f}9=@n^-l4{5 zJL^Gc=g<9pCo4}J5Qf!_h48aKeFEv?$xQ0^N^7%C{kjKtm}JLnc6S3fpR^2p*uEPP z5TYpzdTQ6OIC=Oon8-brP=7ifX#zRrI={!p29lW#_(kB90D}jPF~so?$bC=v|L+C51mM@9aP9o4AyQbt zUUR;#->z$YuQ_Ihs_jic001K-IVHva{IC$n&rYR?27-{djEm@i(7^A6-39cw zHu+S+YhS$t*8YN{2vve9%boj@D|Xx^rEMNoVb4R{3fHm@CV~K!Tb&$V?k|UbrMya) z@=tH*Bb?jw0P8ry9xV;5rBAPgGkSZwyE)m|ikz<-8pHvMW~>*eXbf(d_|0S9xZnP| zY`to&C2w*zHz(NMva7e(K0E07plulxh({(b`$emPo8kP;MOMrY9{`2Ps6T4P1qd=t zT#w>PFxlT%;NHFna@Y0Is7c^z4JVji(b0&({6VBBlU;zt%j*()N((SU7D|`(`wSaV z5T)$XZ=FANb8~Z1JoU3435e3by^R+@R73|T92+Ev&k|dNh#Dh1nAsvSDS*-f^f*Rr zb#+yT(b^A&1&9-VqXz(6*q5G~YQRoP0z83q_sBkebYVTE*T)K32Tlq80l+b!s2TSy z2f$YEJO=&=>IlwuQhIum!@Mk5h9Ja%P%t{m=naq?Fr^gfJuWVF2b&^TIsjda8P=~} z>94L0Dr`(tvL^-AK#2pN=3>`TPEQ(PGj;b};~{bO18tJEzd?5^5A8WPf@PgAP91OE zN=TRRP8W4$6Be#9Y4+3I`3SBS3=BMVSOqo*cJ|rX8Gvm-@p0R)1bpD)p!NgczDm-C zSOUX%DAdo-58B(SC1*(OoYK-u{x9kv>X<^~`7TjOhMkqAIVwazkNso5WvOd!5shJd z;96>quHw5EXboUEfYGL!Ci>BaS4ik&celX)8vJO@t!|jVw6U}_c=U+*VCRV%Jw3=3 zcv+P3KS4VB`!}u-3%D3u7kknmMv7|F@WMboDL0C>`M zQ(F5PsEZI_e;6JrI)r-a3(^&E48Th?AZEc1fW@JvstRBo3|w=2dqM=@Q79~oh$)bV z08QHndT&qFf{ftc=veD{ke84!3VB}mnHQh|%mx8hq&n;dh6u0^1Ht{Y!NJX~3jELg zeH=nUlX7!x1k|m=jp5&YeGc$F5KoZ2xMWRWUUAl^2+9`t5z(Hpv@{)3abxdYzDm9Fy7A2M% z)$W6Oi1-RpKKxIB=%Ou@5QMnW>5n%NBqSsS=@|$IKq=6U5yG%fQ03#fjByYvD=RP{ z0R)1|^CJ+v*!Cy@t}wspez>Lu1^|>tQN;yeQPJ2S4^L0q@$%)3;bJgSK(!Gd7lUc78ZH=`Ky2b0ze!1q)6zb3aVynEjd7MsLA*Zgq+x* zK?{AabLBh=rKQhe;|T7GoA1jaZy?IAxhYFZTH~C*ohy0cWkm!v_Q;4Uoy=DwP6O=| zO=+-jj)2pDgp2-K-0LKh1q%uW0RRAl(_l05k-d|wObo;RaQo7 zDp3Lg0$@>sU(;C(As4p=#Ty`e(T*#&6*CSQ85tTHT5Rfl;nGHD7nj2i_jG`kPgFxN zz}OKa;3un;z`*YSvmna*a#e1X=K^XBJir3ymHo+@NBM+$dT@$R>tqU2vk$IyHXFKo zT6Jy}&cqc$gqWzl-to=JY3bW-cGuB-HDrehLt_+pKcU}rb&=L8h(hvWq{qUvj>B9F z$b`K}If4cP5=Nn{?e%nYe)smWM%5bG$;hA#=bpnxgK&t5{+}?78u8Vd)Mm{1Ww=9Y z0Z9G?cz8J&69vV^y(vOpekKQktqbb;5^rc5u?=^6T-Fcz`G4z6)X@C6nI(n zj=_TY9Bq&tWmQ+7Kqezv+S`{wf6iH=7WdG6@}wTXkU1T6eaQ?H$b7IlU=$lI09XxZ z&VCk@Cd$fT6%_|C=?hbK^0qD@svaFNyC!!mLP5hrq^6~D8`Vf#7~R0Y01aF7U$fWI zum1kMQ2w{knRi}|5}cnPwK#^>0TM3bz@PY`M8?n!EMU|Eh7m5ILxBNKD;gMp2BZv=VkL@ZAPT~iBY^Le#6&qo#V3Y_v8FH} z4Xz=~I;b7|6#7)eHr7BPXp;$#o1Lx0%0m=Jw(Iw>akM+R1CZvKPg7Ks2Gr@r6ji`# zT=(xEZjM)|C2xfyC;5K;8lgo$M}rc#w6(=4?53+3K{f-{qMMtb!7bUUT7~0VWMt3^ zu%8;hteuVE6CQmJk5iEPf#ggLnIT6Tpc?XVR4JeSd*C4)93n8hcO6l(e)ogF(i$t$ zdG73tKGG)oZ>*LCO0x-&?~tS4)=+Tq|2(+GA$ZfkVBDfO5Rq9@VzhY05Z+!y3_hb3 zfcYne6)@-#9_Kf&u2Qy%zB4KWz4#FWvXteg(MmiltUJ~W?~;ib75Zpo zZlj`RDn^O2_TFLAdiT!s!sGNWcbzj9VLI`<`+_OP|7Og5EqYSNJMUdcz5l(ey(kMz zd?@D&vy+XD>4_a+O*tuU?%RSTFM1OSP;j$&fZXCy395o}|L2bls6-%vvQ7e8eDeI4 zkCoL145?T^%YY#)&;2EkZAA&!U=g*eWx?%IjnFiPB7cPbFj&%;Qa~P)zRYa}PfoGz zc*|v#2x^<`!O5(DeSLitwa7l0Xn+ckeFq9@S3bB=DapxfH86Wo1jH1EwSdpIT2vSt z8(Y)yKpO|W3dV43A`fTnftmq|&4uG61!p<=Z5L+_7s5W*-)9)`cZhKmVQpdH4l1{z zz=R+VPeFgNswS8r1?s$@fI*e7OtC{srGWg0hI^oU!DWjI3jv;7o$f7yvI{^6P%PL} zaKK@z&Xqm1&7jJLBI$q@DkC*@YI5=fbT@Qd@@Vz@Fg&lWruH#576%`Ht1qh!LJx+L z(Gb9r$IG5$Qp}T3Q>34O9In(y-<5y|7ur&wsv$o@WeS@3s#x z4-iC!NVc=OI>C7B+!InP2&&ennU#$`X9Hj_0xPne z_@=C(@x>XV7>x6JW-i5PkMVJ6Nk4w{0jD@mlT>pLWMM@1(k20nPa$b(G;v*EK(4@k zp_qathTyRr$blA`oGc1q2l!J57iT21xLAekE2Y&RvoCw}Q5{d}Tr7=@sPB2hA%XmJ z%;9mPC!jwUy#xln)C!YTFxY+{W-Jk7z#;)XSxfPf8_bglT0O+d zidU!Zby|nn{W{n~g9;0I1%=0YdT7$x+Oei|Tj23-+Cy99ZU+|dHLYK zS64cabC&$~z~&^*ZPDidsBWySVbMB(*9E{04h!+0@h~tfB$)W2AfR<^II+id-M@c7 zr{@3;`~Ypb+Y(NMI)L@Eo2fPVN9>7OQGnOHO}b z^c#*`!9-$CUY?-mzByl1Ivn_Tk&d_n3=9;}5;TrT$O|oPt39yWl9kzGUPeF%6Ib5Bj*+u8QIzELB6E> zJ#sU6+~waO>smh=2?@C1Sc?DX$6TJcC|L~J(h_12zM+2rLmhRWuz>Y)2EiznX3-tQ z1(LoNpI=JH{@gLD#}V|oK|_-k@=PQ)`R%(v0qa*Kfmr=^|J({bzhPEc>*+t-9Vk%Z z=i|%HQ+=BZ8{bcpbx$o(+8s4`?Qnx^a|XXRBZw&#W03-mGG_VT_S{(X@I6!iNXa(LWGu{5#1=LmWL?t};a{#C4 z=6Az%2C$t=&{JVt_2}e8%6&UET8D8+?rB57-_h0|!2nhJ^Q`q?TtUt$W+!i-CWMEc zejdCqIEIH=xeO=`N%yMlj3LGZZN`wgAe1RIpjDWD*O%@P3UD5P!>9(ZQ_y$9NKGC6 zN}Q2W(rN(HI~KcBMFwYQ0sF;IoE~pIfrCYS)e7qBJ%9de0s=Po z1{mHc378DZZkodb7<>mz7Wulbnd0{C=n)jDwfP51>TkS6XXt}}JbIq50p}ooPfQ?> zn}LXxDY&w_T1_tO|5!__tGkksNoJsu!vLKXI!2-i-`vJ6twq_%UEFZ9&pz8%TyMReBJT!!m zh6}Oj!jzTOQSU=Ru$%-1k%uHfPl9o$L~s>%Le>~CLUu8yYfwy@l+-c^Tm{H8(aFKm zgAdJwd@U!j=nf=fm)6=7t8?g+UrT#IdBmRq6b$q^@^9?W5;!;jaPI|tl z!&`qnbedGUzd@S?KG82B0QF!mtH9C0gao?#uZW^L#9%ZN5xxBfCb$qhaAFWj4DcyX zWMSgO9}0`wKlwX{aoba`e?7b{7LN?HJL1&ezqj;8ZBrkfZ8!Tv2PkA-)8`n0=?>`f z;?U6rZ6{KN9RpiU5iJn284})5S7DSEP)wb}14btE<6|OX`gCi1#-U7Z6Vsn~P!x3^ ztw+kz&6h}Ofpy2qs`s2R;rH^vnQN6(-o@AkX~DYEwuEed&G<{8*P~zsgoou#<@n{_t)mGrMB9%jHbvWgPo(^Q(*_IXQ1szEMX(%h zIx$07nHidg*E5rQOd_#ytt0WRMZy8c;Xp~XIx9H6iQ&b6f8LWiL#M<_ey#~P@OHD{8t4SoMh!!N{{vjUG zixqQY_xO!`xb&ls$aN0=@6-nYO`xFdTI7K290fgz`^S&F`^+~mV4N(=g6}24dur^q zH{ee!x5&Qzk(Nr$#481J)|(E$k#AM`1gFyBSmWFuid4yKUwB9;Orde4 z)7XZ0VEY#S4zrI{hQk`rYlVgog?-6Yk&zF9|0@u>_NvA|1D=;0!X=MZJmA~?L~-Ph zpFr)xv)f|izkWfNc?PH6pILQN?(U(G&!8(V^!~<8P21a^i)u3=RiXxk>FkWh@4rQI zdfEH^Kkay0#E_38SZQi?EknKwwbws-LU3E= zTM~JZv17UC!DC6nVSoyuq-Z%04-uAzPi4BOU?cec$IME1%W7M;Jjl>zaQeP%aTRk! z{;SOY%>p!f55l74wJ(1!)B(4B289R}d3jSZITGoII13AF1oT|H_Spa50ezBN)CA(5 zvhG++Z^n#{2cD>8yp+fn>%1;5$q{=!2It9tv}Zl|`T}>FKDwrht!v}f6nFjo`f!U| zbrlGNxp}~Al$$sE(jGLJ!n$OYlL*?s87_t+^gn(TZ|X(*iyd$Eos7MGjx%uBJG8iG z`s=3tS1_K}BN?7Le8nTdML{vO#+C2_6d4w!W3SQOEP1^{>`y2}@f2ijcWMN}#02_# zuF853z1vr!?D#Jwcs{=K=dKWFW#1PMwl(VQi|ih{M_(&GCIdAhCuc2^;lDTb_7a{S zqqDOk-^e#(6znw|pAZy^3CHnhMEhvusbL#AjkJgX7MyIuN&2uLA(I&;73J_AgF7$! z8NOCyy1PTZ-=pRTGOU@dHyh(wn}Sd%Xcs$MSqk`s=Je2cjX+z+LwKqV-Omq*q=|>Z zTSH+%s)ojOg4c$ETp9J}BO84}S);5^tNY8c#MIS({&YBE`(_7Ox(8WW=^*S*bea0h zZ7&Bm4u>5#qu1y2@Wb$JVb`^>nC%-21jxhtOnT4!vZu|&Y~{B)yJE5p(6#s>@QGqI z=Xct?>DC*IPKtus2p{t2u1D$&rU=dLXt-l_1;Cf9Qcwu{(q85?>^E4`j z=YG9?2fwR3go1x=jCZqS+6T_GfY$_bE+=OPFF~FGoWXM7Pj-k@C%d}9xVM^ZlBg`J zt2>8r8gbg*nVF|>c%ic}hDqgNV6h&(2y*>Huze`joJUueYT_;r|MjQkKJ}4Q%^*lm z@2lpX)?!kAjyn(17hEL2xHu&>wInkW4w}Qc1p)-j3Ho21<$wCL2qK)1uP@B7;2;1o zgVG%bXFbdGVFmy^6TLR?CMA715!N1#EDNyX1xva;I4NWaq$gtsj+ifE>_*mi2s5PG zk_B8=qj@44?^9A!!w3xxG+3BGV`X(e-8DnpWn%-f`vN2yFe$-zZ8q;BDcA1;y#{nn zIGJ(*<^uRm=VsP-m^kcbTBhsgmgQmjo|)f#d5FEC(_QK(e3|uDPX^&}MXjEG!!jo2 z7ySwxbjfY@eF-P_0F(gqN8DjxkcHtDu!9kOU>(D317HDEq(&(rE`A#i1EHEKya#`t zL5-M3nZe&50APw*mI556+HfrB@Ce^y{h3Y_^3nBru8-4%9cPu&zQatq1_oV=dpXHA z%2S&*Z;~lZ9#@4Ca6e{aXGj0rw}<3vjJU$vV-phc#jsHK z2P1DOHI1$g^fEtU{%Q47R-7fM6;{+TTQ)HP)A!%^qrawR{Wn8z3Bh@J8JSMKT7Pp| z;Kqzn*~iDnAoamtoj?fyIO3a|{lR=A^+Q)8kyC&s&S4fC#$<(IHXZmZ2)NoB8at2)$T7-EfRpR1tL|{n7^eF?eW`}*Uc3PH zrZhA24sFyeT3S16>q(~Zm?iysavz_}uI;|e_{g@!lG?@<+3z7@I@^hs$^A6>Wi!o~ zbaXZDguCZLjgT0i$-xWccuh_HvS;#JTX0HG|2oF+ZuUO={&Y`GSJ_g! zm*k@uYu+^SqT<#zG(j6U%2xMDNqke_og7sL;mpieGqn59@b1S-JDM!iVY;~wIXCcK zJ;W^vM1TeS%@&vcB_NXd4N|3{GznZ)miN@k$gh9q{LGzTTqN({c3qH9#^@LN`!`1u zh{*J@Vz8j~wPB1k(a5C4t!O)YVl%tEm*Bm2 zoGI25HRoVDz?67D3<@4OT#4z0MPQX+Po|=r<)ifv;X2Un(z|`~N(89^R*%KlF!+CR3j~;>e3cW{CsZW3{AqQ=q4l zkdbn*^wG?}>`udFRA||q2aI$CJxcCRx&)8I9eqPsH6TYiTAEsYRjyC!g0N(iuzvjf zez1ysW1Dxd(ubR5uQTXMj8)tT zG2>UNW+w0GP$h@Mi)Gca-!qN47S#4_PROY$B4 zm(hQX$44#aPR=9)pe$zZLgY^p+JAq$;l8Q!_*1>c^p_G*$;2-*83?X1Km}1Jpn6w@ zhg?8BR#q63-X?mBk)a}Wg1fEvhl@OgP2UKw?Tw5RA1w1CfGe#2O__Ne^&T93S=3eYqB7K|;xFFwJvA_>ahk=Dz95CYvr z$$DwV$!hL6^S?U-X8l7yDZ+uG?YjlYKjT!Fn**K`0Z+o++U44yDEt|U%DC-2vOoWm zea*4QCI)EGeF-^&-`Yt^^7u;@6asJ(h)YbpUH`!Ccx$toa7NvooPM$#-9!9zk+9A= zb0_!&i@218bbdu`tQx3gK~k!hx7F3r*}v422!1L>c=zvKOn-lmJ=8=Gzy1Mu%x{Jj z806d-%*^m{mX2j-y;(z=bC$#84go3}s=7t2j5<*!mWAeJa`=-}*&0UFa`LVHpJ0>K zK7TEDfH`_<+STC_y^Z}1mkvudee@@H#X}Vj4JvUSm$dY|#?JNRi@}ul7jL6mi)?QV zx!_q?>}O?dI4+KV$xQ02@;2P~n1ucKamyk+H3U2>BT-gNNxzALF)80)Ea~KdhVV^s%(~7YSD^SUjpu7suxsIC`KXgP6+)K%OUK}Q$% zCO51G)=EOc5nMo#CW#M!n#4zVS|ob6HW3EHYx>f%lkjeX49BE<8Y*JNh5Tz+geih3 zzyWVE-EZG~R>yefumbr6BqXR-Ry^k5b4ELSt=+t~-f3z}t~-$mXdb`w?v6JhI+>BN)bvAUzjw)Wn|m$e{u4U4|3UqI;5Jd!DW^aGvkP7P5Evx;E3B5K#! zrQ<6(^cPI2(LPVXfD=wl^^sMYNj)&;4|>^yliu!1zr{+AlECF?d?N;dBRjxlWxdx% zs9zoVSM;ridBRIVDV~N@y{7B7OQX>N^`c^4M}~LrYCZ=kA<*e9ZwUw_N>Durbuz{z z4jM}xqepr0#(+Rfa4#|M9n0wQx42vtGVhbTJ(BV;(cjT-{kgdDT+4M>3>cxbwC5`W zLiN6G{$qb1e73ff`hR_$1yq)M7v&LAUm64iq(P-56cK4@K`9ZCE&=HdNokN)x;q5v z?gr`ZknR%cn#1?a%$hZ8&A9IJ-YdfMKJ|}t_Wo@uDv8xT#_9S#8#3YNbO=#oW5`+mmp&~HtKGbjIk;-Kc|`3&R4DbJr$8|Uch z)AU!59c)W3(*CRunOl3x_(l=UHn?GoE#rtMvk6jb#lECsM33dLQoCosxVtkuJF}cM zWG0DeyVDX%v$yx!hCMy$DR9cZe~@(xB*A^qWCXpfu_^PPv8Bs)*aRRGE62+mg5Qxfzu{R$#~*Q7x*Yt;U&Z)iD9(EvLa# zVtxJGW992F^?j9jA`21{1%?Dt@lB(@PEK_4a;QjeE&}o9=zMXXrfw^o+#MosC5l3q ztIAWVMDGqEv;?j@j*hGt2|UU8VMf-XZyqg9b3)e-PD$5PZ@MPTlHNh8Bsqg>thB^u z{3FOc0Ps|W)x?elKi1DrRq?g5an^D_ql;oiUdrXgKfEylob~=`${G-2K*<=o^;O7Zy^ovr&^^hXV^>kmTr=g9f+Y9Dai?&9{1U>Uf3RuQDAr zxJxrdokiX|6w0)@{!zqQ6C>X@Egz*<5LvBiZ+y7;iylOgiT^0j$U;Aw-Ds4Eh$w)B z1M4-c+d-4IrUGI%XssZ@vcB48v&|G}I9)wGCL5nY7XUmf=rH0SfUF<^S`D~KB(X)F zc*t&F`&Q9W+!dx3_kZIB_V@pkjE(w+R!I272Nd`>u#`fI)1!l_p+3pPIoGSP+jS3y z5e$^E`D3RCo&JP5k_oDWc+I=R*^>3);Dmu%H2nO9oS>2$j*R~ieBnpR0=Lwu@&ixk zLh)Fy^@CJyTmu0cXYyd$GgDAFINFYzSAW(?1xYpNr7tcnKw=E^5Ae|e{3Sd*n}tz- zoN~Co|8q~g>lN|aNW+K3e${lGkCQ2<@rw0FO63J8?tGcB_dD(w6jdWz?Q3ooxg%q+ zm<*~_Iz+k{>8J!$e9iz5bY`0RowN4o` zB?|ZArrip*X)yTsz6^TE&H%>&m(OdETEmkDJLOxjjeSN(_l6GtIR+jP(JPAuVbl*& zY2DulK9*rz2P9Gr8hrizHETk)D8}7xSGxuA;$$%{M9*M{cpiMJP)3J#^Ee9-?5;t! zX1hM93+fYS8Q&No;039#uaAj&_!Zifm!PHws|DN%P*#BfCOxf__Wi;rQ%n0|Nq`;S zqxU&Q_Zb!btjQXWBfX8@J`;MBUMUa-1}s*VvCq(D)HmynVec9n<9PP05c(egJ=|)8 z9t`N+pz!~i4xfvgyS_O2yR|+4#n*7g;axtVU)QWvmIFo*t08JZ{6i3<`D+DY?~5`TToC zVd%^Dg&?N73j6b@ZM<#W%I30oG8pfasA63izS~-^VANg3Y*KeHE}$<&`Y>T1qKDVw z*Eo|%Ng+_@tFWl8)&fFGB4m7_nJ3O+pp$QwSVI@R$QVqRq@~@O z?SX?_^{2}Y0*1vx&Fx)RCkS&82pLdjE7BOP@-cpSd9c+zbj`J`$4o=^N#bpxNwbeTA9eDh}`{s{Ju5PzQG?dyN>!@U;lF2L;9*vKIDv$_kdnom% zQ_kOw$GzXgo{Y2Fw^%-hLlHbt92$o~Muzm~AhycKSQV?Mw=G4){shO(jfgL9)6ka` z4lrn(o^EdG{<^`2?0H%Wvg1)h<1M36&B+&OnL39XXZTJdr6oq>=e(2kuYc1s3e~%; zjbU3@gt>d_z-$|1a-jEb7yI$!yv-2Q%1UUMOqJ{OQ=Xi2Og^A1qSfpBX_Vv6hh|v5 zG3gfZU~tIb^bs`y9YXv7pFnHsrLc&Fnpis_i9eHy`4vxwl!*0<+}znB7U>2RoEl0l z?z8_6K-=;uX&)469^VNHI@EZNcAs7#QdIolV%If_hxp}7)v*CBR4)W-V@D_~8D71j zwK(nE5Hw+?QZ3t@tAC%kzPu$)TaiA9Mfq z{IKRH*t!#g<4EIp(G?YMzTE(x^I6?hsqO?!A)&m@HH0vx8y;;_BWy}wr5#WIu`&N- zeMS07eGCu1(;%O2S+K#-pLM?|A_)QNZFrGuy1qY_QCzJ;WGku%rHZKx;~>7${voSz zH9ssJ*?F)t&(^eUy*-&cXgk!xlkNG23{CJab3C_|nwz!JI8KEbg*S#lZ~O`JmD~9} zmx?Qo;=%*3KIVUH8?@`srv!8#GpjbFiNhN24f;U3te`UEle;c1=GMX%&vs_PXvHSC z_jy`Y#dXTcZ?#vFlkl&BuW{O+R)3)Gxn9CD&$EOy9`}33FGXs_DLl8)y2)YmUXB7@xwDW|60XG&Xa`yh>QS^ z{Ule_@A2FIY&? zfAUGoEWg+09#TzI6Y}wWEb-0Iq9r4T?Mio%?#DvGJX#Sw66CRbF)6H-Znt{=DqEAQ z+Ge(r|~1e-ey}@&;E$*lsINmUiU6sW!E(cD9ql>12S%f-9WesKB9=$M(mUBaCW0 zoog2i5^tS$*3UAGLCMJ}`#m9MPFqf4_@6euX}v+zUEEh!FLH{0-GJCxoy7)7Mu#s# z5|e#Ld&_j5uU)ryU%Y*U*hZeyNvs1CO|X&{=QSlxYeLihB3fl-)XTuBlu4Cl?*h3W?r*{yZc>{hEb?@cP&lHbOIA7Pc!C z{CUi@mlWgP4H_;2>f{fYTYBUBhm8krkuURuPgCC7_Fpop&p2h+V+@ZZN~dEozEc!z z@{)i0G_^5k_3?b%HtcH)nLH_!VVquGEU7_Sdn1a6Q;Q4GG3@Tq(z=C?ZvO@3whtb# zvUz;#?5ducd`{*x;Gsv)*nD+)i0P)`JTmD08VS`4b<;GjI?Qr?vr&WM>ZHKCC|B*- zJ1LOVXXOZeUh86C=4QS{^M>x7X8srN4DXH4YceOexM+gWdoy;v{z491gM&$VfEoIz z+yvrYW|m_M>lVCd<>{nwI5p>URww(-hsZ;3hsI`9?VikcUmUN{GQS&sKlEKVI?nMQ zLM}tImz2(Se1}g^^dxN$mBd9cL2W=`Ot0CjeVoi8{$E92&>J}1dfO@t)$8krv|osc z5651jqv23oTRJD+x(!EgJ{cgLkc6R*2yX~ZYBV8=w+kja(h61st?#*_lsSvcj}w*S zIunk(c+oe1&W?1ob+z~E4`oT}DVG$zFd)5?Y)waJ|Ap6TydVD^fxw>Hcd!RCgGu~g3u=s@g-Kauj<1Li*^pp8}7n>SHtY0_Z zE9VXDo!boOuPFaIwesm8K@$w(`Qpx`vhjn32{WFnwkCno>7;AsrCHlbPaMI-Kq01E z?90Y!QVrLcZ*WIW!%qdUJsx1GX8lY%7xAMF_QQ$ibHvZ z*#?!-)v$Iq`8WG{p!81hH4ZYGpdZkH>c7%grTuo{wBPFz?;0MN-wMFy&&B4CUJs#t z{9dk5U)ovd&;H^s73xcoL;t|?mpgtFY^A#&lpYdZ!*ugh+4CMY4!$QYO-0^{+V~!N zAEG|SR$g8_9J3yCmgH%*a@~2a=L$TaxZv{Mb;EZ z!g6okz;INfucX*R<|BotS`{U&&!*tAv|90!W zzz#~ZrU)5XH-Gde@4kB7$<6)jN1fdO55yVc)2Dlh*ClyWGHG*pE--nBcKa3#mE!3H zzV~f!j?~cZ#DCN0^e1KSocq+Q?rWd--2L*$MGm4;Y4`5TSF>XL{j2#21u4;~$%Z}6 zEes#3knZk(mc4$^K9j+CJHr=q{YFixcDp^)h1Vg;2S08{FH&FZlc;Bg_f`A_hjhb1 z!QNiczVcF4hrpcM=AbWWGNn_bhkBr%`Tx=a+{1nNO~?nTRWJGrs9+~G%8r7ZMKDTU z;hL0@r#Ii=we^5ls&PU4soFP}OQ`9#hx<0|zVA$rsd|XU-^7X5WOsUL=XA|GCW3>c zZG=Ff)I_x+$~kF(UnNnX@`rTxgh*5zEkoRCjsX?@jOgLk#fw6BMO+M zq#JtkFWYscTvM-}SAL{f|44_J+gO+5ar7QYh2`{4cm&*~Gw+7$bAI6GR~fwRKuGg) zmQovgsngchE*=FHd_TjzZSo^NkfmPO^7+*T*jpq@6X-6Q*qYFD(0r7zQuk;E{oMC{ z9qqKPCJIHnNKE6hqnw>U^BTI75guL{-Y#~h-0Zrg@C-X`JGIKETHi-2v}1{2X?MT# z$~HHCG_w0wM{Tr)9RAHcEP8t;dxC2qO0h4QlhZukT;UQA>v)1G9Q%VO8Pe^kV)+40 z#^G=2BNfzC4pUcyb&T76(_b4XD5J5kd^VULW%9H8_B*G$I@HBOZGe+CDw4@w6>?93joV_?BBolj`PhBHRj2Im~KcrpJtT4*ytKs zY;mLkaw5&Q&T;-?LO}i`Qh0K z{YkwLGs@2obRs`ZY`l&>p5|k9VWAa>v;3Ejyi>yux@d~2C%nk+``=gV zQLQzuDGQigFkJbWGCpvX@;Hq+hQt#btwKW9t39KOY-M+lrF6!1ffcybB<1~ zpPQ&`M;=8VZxOHb2&t3zCYTIyxx=+cD9sK8czEd;|G^{E! zF4Hsgkx}&7p4uOh24zlHL^44jHo?U=ZY8^i-ifi5Q!MKB7^S2Pw63B!O@?wf5fKr# zhUWVP;N1b)e9pqby zeS`=UZT=!{!=IgbYMo-iXnZGMT{pt|XUUNm7+Y;F(r-s1<3N#2hAL0T(!X!@wv353 zQc`B7=vZEbL%6;H{nCEJBKu}kcxkxogskFF{;Z1&cOC0&>$q;n?<9HCoaydq;raIb zm4imYp&uc*|Ff)S64e*)`&Jj7{{>u-f68LL9HsOK=xPHfPecO%*d#814k(Q_-4anPjG=2@BBiwvazCITNKY53>N+rPJ6e$ zf;0kvgx@m9k6@=)Q@hM|r!}Cj9RwT|xIE+!o;K_+nwTL_WMt&`Hh*axx9!A_xQ^=9 zcbV{O-h2N6MQ!G6%8cTVvXaUd?t>$v`ErL7Lo|ND+2Lu*Zs5ZC`-4lL;ptP5r~;ZA zs0tL%KlxTxo)+nLL4mj2VCadyzWz%Dl+wVlV+~v)a3S=>a_o*53VoLrn1+VvL7yt& zKqCRZ<{aP2d2=TkyiKjh*(v zCUt1kW*@G!Ra$cStVNkttR}1L)VCg`DEe`1I^}!_qg-k;zOI`v=*V&D7<<|tQDQi_Mtd(Soi)MD6;6k;V7TXySpR=UfcS}@ncHu_+N z>KU$sgEo(?ya_=w>+#AA+t@+{jXd{TN%@ZzI3gmi?68=x-v!Ibs}RKEbuq7t&5JEb zSUmgwgFsDnFK>s1NysYjWy30z<8cUJGfVa@i%Le|*7a3+#Y5yqZweww0!bvbk*YjL z*Z7ZEQ?jb{K{R4v*B5dY%MusobAM2Cn%T5xG59&qIXH=#1((FWaKgSM+AsI%CtWI< z)GJX3t`G8BhCj(kF-S-(_^9t_-FCh5bGg(s_zed`nZl_sA#pgouOmeHb)#w{et?@} zdUEf_sx(`izVC7rS+Cpeg2J35?8%CXsxGWuX zw7yc9B?r_qKM*n{X=k;mQe4y zTF-sf&T^GK2@Ax1$@S9ofhqdkr$l-$->IloXIprBzVVw>#>anQsU7*lW6OALZCH#f z);y(z?PbL9L#z7f^>-AD$A>|tv)$VwKL0Ap>6YS@GcoH zLFPxZX0yXP4^0=A#2g->`s68%mZ&NoP@zMv{EYvDqFiQmU}R*^Bk9ffs)tp(d}vhP zkCi9=Oh0`}dK(!j3s;mP@r)!Qf~+o;_YM`9Nk}k;j@Z9`LYu-NB0^|rs8*ra8p=G= zB6>p2rX4atsVkz3bQyT=zyD_tc6j=I3n$CNV&TcVeOR^Oc%$;QIgx>5$BVRq1(V5R z;&C+Z5C(M%3vG_sY3Txz+u^p{j^}4n4<5)w%v6NT=#NrsIC~AGJ`jlT!XZ-wp_zUwH5OY^YO0iZ(Kh66M=B^V8=? zTTsf*TV9@TxRqY(NWf*+#xbY)_mgjRtf;-{$=`zIJMwpOeDV{<9aG%@m4y2X!OADW zfBs6J#*pdy{ZN+Gq3!i-t5M1J-LAYBls<31(QT`9H@}rYPP~?2A&{L^`gl{{+l+XQ z@A+B=r!Cm$rg~*AOeg%22-===t~$eX^GDk}*x1kV`EE^j)|CF;=p0@cPTzT}f0US= z*54S5e}x;F;LN~9t-||ncd7mqxh|*h{4cTk^}~u`<*69hIBY?^*VdM%7VK^^at98R zg_3r#`cZ)%&9mC?_n<-&n0Kv<>!Y2mgx+U8`8o~?yLQi7RSnC z1uPX#*h~`pHpG34!wa1kqY>JTR9ivK`GwCDA$9bheTs(hBE27};T4jVqoe(ok@ETV zHFLG{cOmDL0pA4W2_D9Z%Io<_-O5*O95008ue$nu=Z0P0x?Q7jMUlc~qpY`q)8D2^ z);X?yv1+5q)w>x#bdruLwwuIH#hosm=j3z^-%+QQ#k?0)uiG(Wbh|s=a4%rbH^4Vw zFo0P%Z$x^g{V;~@IQ43`mR4swR=9gy0MSFNIXmw?{rk}=R?~(dw1LTa{#0S18rh=( z081)C*i{@;(O&tVbx$cf?AWi&maZTD!?`anBX3T-DJ3l0(%RDBl9ERL@HRim*8g16 zryx5^getyp?TDw_{-k;HxpyfsFukg|@8M0A#%>v-+|e3iUfa;_CEc33XCbY?_+34w z4i@ulsQN4Q9Ilal%uMV|sJtQL|6Rfg$qB(fcxd+}i4cXe6q!MC&}v zyNM=LJru3ch#q(c^S7-Jt@V6Wmf+*ZN+f(|7o4#7hichSa~$V*y~dz4>&BPisAip& zif(b$sBuZ5ZlT@QoT9S7wkzBH!~JjMm*rhyT zyzD)_A+^+*=RO$O^(IE>OiV~wc`LATW*G2G2!M+pMw}myoWP4tn|bu*6Cqw?SS$T| zZh?35CL_PPRbV-X8;!f=Q09SIdspGtXx-woW@(cq{#spwmv@u~*8&(YV|Qk^vb0T@ zSjbl1q%N@|?jTQBxZM<2QhUbb8pybl@`E|8ARmBB-1hhALd=4R>{Qz!>FTF7l-J?{ z;*;+Sq&^-~?%r~okSmEgr_Ffltyifrp6~JqH&s+?GLwimW>Zj{q~_&D-T<-u!(IwK zo!eGcRZ-#(Y4Zi_uXtXXY%~ba;r!VciOl>;MYT4iI2a7wiz!Qef7h^Uv0A?8+tg}! zVH_Csu-bRnIQsXWQo`C^g&c{?FxM$R0)>bJi~q_uS!jKzfX2XZJ#lNRtvzZefFX?l zPUbf9UMnH2^?`%}H(!g3Rx!F~3R`+Ny%;^{uNBo!$z;NC7K(I ztef6(YGAmPuvKC--M+_a@O?>i*iY&GePrYUSN`svXZl#~9#lKTg>wrz{u;~jE0?^7 zW?~*k9W$bZn-ij+gbNh;Y+hK`7}q@Ed^fpVoD(ZrKKudlM2=DJ--kwt>m|Ba@L=mR zW_I@O3nn?=-+MCZgH#+zloDr6Ze4h(3?OcWHjwb-V$t!!`K@wX{?&|3|AzGt+0Y_!U#qKHxQr(jydg|j6< zcpg8lsIpWO8-hiY83h*U>qNdNwtI{1_6hHPwiNjFkhIMo%^zq=#5)72b;g0D^wUI*1U|Cx!+xgMVwP_|2346VNz zSi7U)F(4v`GzPdk!4%DX*tocBv$bU83LS{uPcxk7|BUyB{-C*d1^?=ERlOd<3TABb z2E)Y3NJ-95iJzt`rs^-%NoqNitxGlSpePq41fnv#9nGaTaXd<{VoXdD0{e8#rfDjA za<_Ls^P*5YF0ml;HQK!~RSEZhW=a>j@*nMr&DFu_QMiorkzK$RtX#~DKhnN8&EGFS zeiy3gXC#M&XkJbh`C=y-aA9pj&RduKOWl?2(Ct`A@+m|wG}BEUPpg6w&t{C=@#iD~F zHky(>)Dh8sz3~Qr$f%fTTm?FkV;*{X0=~SZU1#zIC@H~K82sdu=dtCk7Y`=q)rq~X z$?_U`YGf4)*@$!45_xF2VzeF2=En~E3wC#)Pk>AIH^2y*f$E`pU6R`&ESRtSS#Iwv3ma4*1~x(kF>|81s+X z06qcSyqUV{IShWJ{ry)iq>YUoo$kTdunmr8+h*kr3S(dpl){3E?T1Hh?wKCXQP*Ef zc{#bZCgiPp=pSeaUYcw~i~AD&mc;pc3f;j z%d26k9d#9!>8#dr^r?2K3S#>xjuo2_x$LTTJFiN8{`{wWj@7=P=EW5|sm^S{Iy2+u zGRDBxop<-{1&}IT>z12BDqWUDbE}i!r@vydMwqH+UW!n?B3q5egv+#cTXc}X%6_;OXJvp7N_QZJ|S_eu0vdkrq+)N?u5*yDvR0<53;ul||!X`)P1$Q3^Q$kL5F4Vl7_n)_wxG z1!k!A-YCeQKZC>ugOqNB_i&={o|!1wIY0L^>t?T=aljyRck^s}hCB%Mp!`IHa(AHx z^%+G3?;V~m_jl`q4>$L(&KG!TDiR$N$xb8HWBA%^1A^m&*HlcT8{ez1|8S>;I+5{7 zD`89IXxP!{?)K+DeJkSpr|Tkvix=%F<6X>sWdns4{dNUo<%uJ% z5w&&BX0@}%iqdYc!A}q?(OB|AwaPOD2j(4Y?31&mISNrxlkB|1*$Mvi`4X*^uBe%6 zA(hxzcT}oDaV&S%rx@rDeCJWxVT4emMgL^U{&*^Gca zvN{N8PTWu6R_Iu2tNZiv)7Ef^qbGvW(ob#DBQx&tV`B9o_+QN9 za3B5Q{c%jKCtR{d2`%q+k^3Z$Tc{d_7KT$g*dUArT4JRtaMG~Ih3j1w%q|??UWWx> z43E^_w>di6`p8+s_?{Ez84-pYEAMp21odTbD$L!f3 zTItkrY7tfM2&7)h85<|14h{Z9rAmwMu*SLii{9NGgCS^p_QPrwhrlaYXnxb*WU6KG z&WqAl;k75(smWG`f9Y)(@A2}TB(>j5aejpF$V1M+kfg#tPiH#onMh%2rKROY&I=Ax zN5=q?dnzg)RqAUZv#4;szTUI+OXP6Ou9@KXstpjoltWOFMQ+{IvSAORXs>n5H^xV^ z*d-SfG~Xe8&i2d@{{fSGImd7eyXM6PGcW-(8@@Ay*ajgfK<^NkUg^u5a!v&JlzilUi8LI? zJ=ftc*heHKwM?<_@Sow0Zs8<*w$ljm>%Qd2mU_)to_|3@y|xh zV}vSYzB&8Y4SnbNyy6LcG z8fkS&ONol!t*PCl`IUz?I@&+@lfO?#<#606*4R;{xtAW2f(O||h6&_es6=Jc&pJw5 zt0ZNNE`i<#5z1Rc;<@z{%z>1TP^hR516{SW)+X%n)co-k2$1_X)9xiHd^E~5D%Wd? zS=H@ZL|@R4iyvqk@9Gu(wA6J}v-Andu$^eNjyfG7XN`Y4JMAw{!rw-&_N7)-{LaEt zYj)CCeCPJ{0WYOz2cZ&16NsP2-BilWulZ_fz)@XhY+!-i(e|kJ4OhJR*@uhlii-+t z2d94VbU7yhCqXTDE&XQin-fmTyVx0685vHFNe4Qu{M z<)vjRZW61N;dXVsCt+dg!CZudF$6zw1eM=;y93wKW$8Sg8+O0>ELaq#I*S^2>Zz&O z7N8ynWe4(Q-k*6ncW>h{{kS=LdqHq*-R1k^^s=h59#TZq509jz$azH>By{ws7(qqC z($Wgc<#4tde?kr0CVZP#d`wPOw%bf&5(p}7Dm(!qJSawx@}hBbpPVM!nW3QkSsQqe zp7a$rEi!Gcw~8#9;5XslAK$(wJm7Pdy%g*~l#Y~c|J%CiTf7GSi^ImDljD}We66K? z@ms>~C|L@#!orF#cu4j5SP|{!8hk?9Rg9&el%hQb}UE% zB;a@dsEpju?DMe-|Ln}H(nwrVT*9KWcG{6+$c(SkF-$%}{)1WzYd}#-5nSEqOZ}iy zvdyGjSScU&Dk*>S4-It-XAF(K0)!z_-!Sf%T1<`H&uw1PkfvF*GB#w`_9$ za;XVo>G+wS!t~M}WJ2rPDC_fw{TvK#lB2azoe#mX%};$WS;>-elFuho2?$e5@S~%_ z4;V}VwL~Q)x~IY-cS=h47w-J~2MZu?w?ItMV9~w=R(7vYTqtX3zJB`yPtdpIN5`tQ zsA!wxZtd~B4+U62xj8w38DzUVtA^1|fM9jz>P_&@MG>m`yZjvHPMMaXC2GPU8f0~{+gN5Rc z5I6Cf*nB6N0AoY)6r6`hB0#3MnyY#@YPdeDbqqfZc(-qE|1~==^UY_lH#y3i%ch zGV5(L6O#`L3+r-7{rz7vo|Kd%{ldh0VfHZ(qWP z0~%_Hb}QOZ8N>o{%sJ6SG>+ZMz~qp1i|EF|A1TFGmP`b4n(SG<4o<-QM%E9^`v3PKb zMC3hRg@a5U+|C+spWXA#EeJjy{O;ln{dKh6swCP_$`3L!IJJ&*S1)gZdm!g>bX3nn zMnGVmiJ~!nIJ{FT&)Ft6$q!js`8ky*4(H=gN?<|(0Y}$tb!iEl6E8|Lp`Y&N)(D+7 zz#wxC8PmN7SsJAJW%=nmupAWKN-X^O!p0pR2h9r(YAs5KX=r5BdUfk@D5EB{y3kNj zwak1xJp;&jmG1`&BB(q|;)Qz=KtX+_+3*!YQULLl2=?vg&)@$aUk2+5;K5!2S{|I} z(A)2?nx`%Z1p7TI21c0h`wiohu z2%vcd|9N$?7z`O``{X|ye&fN+VubGrFp8yzcc|b}Rc177*AU2MygP-|G-%=a`};#+ zBxkN?0bC|4Bw7>rkTIp{`7It>!<>mTj5bnMS3B<#r zqgM<5EHE-o1j0GjUtc*w)LKr?Cd4MqRZc%PxCtTwZqoP^@RPy0pk)Sa*xjk}{Is;C z)L=gFt>A6Dg6XHAfEBQ?&?W`wSNP0Fh-n1AlEY$2E9e7&XgzJ-1zEa)EeCY|oVj2r z0Q?JX)53rPnhE$LGb1Bz2oM=SQvyc^069TVcDD3G8d(362lUp~fGNFU;xaIt!Tp0P zDsqFGvj-9_I93233bALlI)KCjVFo-!u;9y(PRz?yP1Jz+9JodB4RBKd?SNe!sucwU zruMcr7-XFx(86r@bLT!W*xZ42?g;U%0Otka4kRsB{1Q{BwcpnU+AoL-0I=ZZ=0;9V zzO}ImTqc0xn{4n4C-y?emlTqc!G2j~>UuKo1E~%cM^0Q7 zH_&}y;R5~x0Q1`Fh>QP*zYfC+!1^W<*{Z?m1PJca^uR#L4Ej96sfQQ@+GSB|C z8KgqO2$Yp1rNMt*tE} zJj3V!9A~(r=inrY<4p|8NJ(jekRv!2NK`tyyYWpRo5se@Zg()f3qUeGu?dIVSO#}i2hF?x(l z;JN`E;t@U?u{S5c-N3{@F(q7sawEaeH&cXAr}e?dJUnq)Q4sqQ6&+n$c?}*km{2CB zZhGiIU4xBEKmf!dt3YT7w5xaL#s&u&SXj(5X(?3z%K*^H!^t*i``?_^);7d{S5{ZE zVL+m`zNx9p{({I+8KkS2o3kFYkz|4o=H}|{)}8Obld5f&4^}RCQuDWEWMq=^@)BZV zI$7Vr^9DQ)2smqE?FOJKtz^ujszrEICkr=%BTfPGaM$om5})gK0x5oC3h@0W zXPf!N-T)@NLGX5U=@3EGhBQR1K}4oj)D3qdj`PRNm*3wX-`IVKG9U^J$ZNn0#%P9{ z0;y)pfFw7|_FQCx@>4$GKfl*Dy@VuW;9V`Pu3D*le+N;g_<^BNHw5KT3g>eeeIRkO z2DC4b2W#v9ni7GHO{ius@9bpQAQo1E74#$y8^H{w2e}&w}p(ir|w; zOUUW+=0AY*9~m9xb3A&=%{?_Vga_30@0ppj=gI}#{X5CV%7aZTtEfl_&0|P+ z9*6)EZ=OayA50E4HE}cdiHQyL^xBviGSkyum{ga6O%$8yx*6W3jySrd&4IH3NH^hcpU#aFScQtnVH$@5A^YKaVZ1W5lnkGKtdl3;-=dN8~?lQ(JS+jxD)zQF*@vO}4o4kt3TNZ^)Uxm4gjhuXVnE^lv zuT@%Z?)08M7F>nWc~K;IFpOheo44T+zZpK9_ge{5dI|rl19{*GgV{bjT9C_UwsCx2 zN+<>6pf%|r8ho()%QtgZrw5b0Fsg68?N7mG4C+I`I3ywv9~r3#hn6#Y+Y4_X1Rhv` z^G-;pnWLWjQ%4Ad6S%zwa%N6m9zuir{|3He zQfFl5^zf zs3>2T>svgoS8}RDa$xb)N@PVEO9=PaM2+@L=7{El;Vbn>srU2h+p>Ub!}y z-UVRTiEU6(0xB0yKHQ+xyppkC8CfIn1fqt! zwEdx{CnqrI0t*Sqqf@P|=}AdnwW8n&hR_Xc{)>LV`oio0p2n?ScuP3g+2fz-cxPv? zLuT`MAdqQ*l}ZYLTptRT&tR^zeXL8Q2Xh%5TEl4{hUCNVkKfL!-#yNeMtC*BQV-IF zKoZ%%OolqYZ@;DSo*JS@z={n0TzKCg)Nx>F2-v(MgM;_CWj=l+dhlTMKWUnM-Z{@9 zL3$NdO|S<{_#DP)fSW8s1a~$hBq=3j0b*rzbswK8n3!w>xEADbJiNSz|NaS20c#X8 z7&tVJR;vg1A$YinI4lP8aZH7)u^v z#jWn!!)TQK^Cv`84UUeQj?wlvLcD}8*-$rR&$FBx$iaB`@ZnGR2+PYO=D6uMPv-3n zSQIc$ev}8P#6t!g1fYN*h>&_RAtb~ct_|EPpxXY~)-|WNSyCf>KIcK{=Ko%?-oOR^ gAB)-l&zG(!Yw3K~4An0g8J1vj6}9 literal 39014 zcmb4rhd-5l-2YLMvXdPRBV}gKj6x_QWM>sJN@mF@6e`?N$OsXVjErO^DU$5HlWf_e z$nSl6p63tvo%?k!C)c^I@A!P)>w7|VwA85fvhF1i2vq84PM;?bb{G)|B>$1`z)!Xe zL!0m~GHXq>(}Zo}U&$32Q3S#vg8FGC{Tm5019oQgJ-vkaqg4MzFtdS0_?*6>y7rf?^6-WO&`*X`>E-_wmf9gJE z>1~?7k@ZiX+Np|Y+iImg;@ZP}&x*OzpoN>Afd9CszE3O2yrtunp<#~M)7aQM>-B}1qJ_sQje*r zsQ?Qxditm#|LuyLoVlI?JAS&7va%?ycmCUc4ixS(<|-;G78XO*)mFa-D_*{Q8Rk{t z)FVQ;@q78D5si+{nKL0VF@}DAe(|&UzSj3oe6}9?u{hQ+arf<8$=`n^uY8R?K}oig zAay{?#3b%5@#Tf<;N|S>+{dF&>9{k3L*Dz(>g%Yewj#%_v==XGOLHZ!4W4}__bxN@ zjGms`Wb2E2_wJD@rDbF&+?cnV?tFdX#0k@4Czcb|LBq%C=;&5fRysR7-@JL#t}HJv zU)|;VXH_Nl!TtNn%E}xHem=wR?=bLP*xVRT3UFj$W9#ef&C1LSSz+FF^VThkN}ryO zA5ZJ)6*PMGQjeVOt_-?+_eEM-cVAzX@1M@5re6^Xe}3tc68IY7d!P3hbH~JP{>Od(KyG;9+FB&dSva+(;+x5l8#m&tJ@Yd&ugNFM0%hO#= z)zwNE68k@-N^-ux#ZH{iy?Al(+qZAc&3X00BBG*06}NC3oB{%`U%U|aS$CoHXN}_8 zz*>rpW#);x;^^4duOf`8KXT-GdAXUX>8<{f>#VG-%D060_^{t1qoQuDOc}Vliv?Ea z<>#M}l)UEZn4h1I)%Gegv$?&!@XZ?s*9-;*1{w~zI?Qaxt1isocWS_H?~j~|pAayOPITF!Cknwy)ux{45DV`IsAIHrtR+uC&RY;JC5 zX7Uo6J30*X^iJ(^aB#pn5)~CS=8wL6x0Xg>^U$*0K_;fp<4Xbp0@4SxEG;c%Sx{>ov!J?ATKokF1Q0jAk_v5fNF>8D0SaXV==?BYb>i*q&@`@hPW*2dn(I zdy5>ga84cc#eOX*DY2;X+q-vf*r@;J`gK*+dnqX+kB{j!XE;g7a&d792!yGouKXJE z$F1vYG3cK^zc|w)x?Ch8EX*wK@c7|FO2TOsl@!k3^CLSw78@HIpMK4WVdC1fXQbCL zH-DX+oSc>QWyIxTh4(JP=;sKg4WF#;KWo2=ii+<3wY*2o{(ED|E`^G0*RILwX$r>Fw#m=o?5C0>ayYYdbJZ;V zUTI73=;%lpmANsW+&r5b7`HGxa{M%$Z95yPb!T8aC<7*LYgp#BEzzG!Mn zK}b(eCm|vE^5sirH!6Ew`V_gQhDNi^C+ua4zazTXUc16>czZwBzSdu&v=yX`Dsp#f zu)Vz-(^Y{ez~+QBnJU+`qLtLvovr zQ&mTkwTmw6)vJatUsP39&nxiF&d#=|b>*1TXVr;`ifXE>)AjF)`0VE8rFJFad2;f9 zPo8kM_uf8_1z`|8B#iH{Pg{5V3IXj!bT14QeawHcXZ!ZDO$BjZ( zIaJ5gw6C>Qgp2D(TZZ=9uOT&s?T7=?$_%$Q7GJ!6J$sje=|fG89(IAUvhCMck}%fG zLHF)GC)F(x6K}qQ}(w=fU^Yv?am*Mvr z+DU$>dLuQ#TfbL!-VWSNbG{Zs85S0Xg@h`Nu7h#QPeD`ijSUl>;Pv)C9ZyFmLqgY8wzHLVR%*(^ZAbI~Wl%AcPeK!q<&!5#< zR2Ui>8pa3Qe0-)lI*PB$(6v+^Fakx$3JMCIIMLG7)b#NqDM3z7E>nb;&hyux`NfNW zGm`xFl9Q8DP@FRKOgSAJ9gVS9APcQ@{bzC9HkM*|xf8Nn*J=4!DvH?Whi zb7vsq-?jMV#!z8FL27~#{{?4f0h88CN3d5+Oia*pDzA@{F>C{&X{NSdxoahzWT&TJ znj0v?&Jbm$M2lHmbahxUcy%Equ+)9BHJIk0wT%rvdun20ePL8Bk}EJEpjjx72!Khn{zde%FEN6T3TAdr+|D?3zf_Rn^pW9(LSQQl6W2 zV1lTFPcf9;SU5u9Hz^D;d1G0FlPPa+;iQRQp*PpA86@7fxirm|6-I(4dAI9kYpn!qG<(P_l#$JiJ))a1m3j;78Lr^P9uSaMEI&Xx)j zw=3$+tFwJ1!>E=46uYTdNeLG(UJRf>(N!aK7ueAhk;7rCqfETyBPY-Fa4c z*viUEY?##OKfixtoQQ?qz&<5tYkuTZ^<5y#fCz2_ef@isEUnGW|Ha2Yi0*&qwws8= zd5z4)Zml|{$Y?GP3m!T0{ehLFg#TZk>fV#s`pXa3fcWg|u|Vv_da+`6b+vYMY%P2~ zxU{&4HM%P%*YvGc9!njTDaj#GQThsJd;1bB_XoB6oE{8uVUN3iqGb)^DLnQi{ak!z z7yA6EVCr22NlD3SVbw6!jvu7~z--yCU%R=wW=3f%^}JH|n(NOmD6kP`Mm15Z@~G@Y zwR-;ixtO>(lYj|Vi4VFd!DZ>^xt73`1=90lS3hOcXd@( zaOZtxm`ir&7lrrFeDI5uxGUNDIflo2F9I`z8eo*OIyJPlueE3Drlh3c!w!ntJ`Cx@ zZojjgUK1^T^$|+Nbqp%NHHvH@mx#zBko1`|XVy12+|13{4c?@skya>1c%hS3m6U~q zge;9Wi9Y>`#hUzppPPq=i~w>JD0oTvR{fVR`^@aIR|_vM%M0=IpQe(GjErPG$R1UH zK-lv3@Jeq-#|mmZD8V~WF<`n+4_Ry+9r>)wKJsV}tW0;c8eF`bgrN(c1_P#X8YFAdX!hbJU=xK z8ueE>EG;`b+g{8>Lt}4J6VoP35Qj-2Wl=TbCMT0*z@cs@iI90`1_y?_l|qlqmABJM1UesgbcZx4@^+ax<#EuMU7 zXdpQxESz^eUJ}SS4Sfj4;Br{rzYUK0}qh)z#J4uV1g)T8%F+-~1HD z1`;<=LY4L3Pc3r~5An^l-G08lTfj^MH_UOJsTuK`rzc$MwyAgU|7N-BW7`~YJmu(K z;2nC0BSEXYg)+xa?R}_H4x|de1;no&#e;H|x`>^)XV0Dhbvi;&Zyx#mh=}o}4PYY~ z85s+?=Rd{PEc#!YN~);T0x9+OnoJF%JZj~o>biNaOzk27gAX0(=)$^G+fz0sD}h-~ zS#Me6K7Rc8;loOq!dX0uxh)pqyfHf{kACnxEG$fGu#^B4y^sNPBJkFTp?U_x#?_HdiXXHl!7K$=;sV+x^}F1;0&{z;@GR;ev&z{7=oJAK7I+ zAF_%cmy$A?C)+XGGBo7>A!v_9|2L)-hrACTK49}x2ws!Z=^U;O{54c%VQTtks^b-w zI%aYKKOtuw=7%Ii&+}xn@3HRE%#zuBw%lBSEJ>0`GA3RAs4)#~v^3vhdOFwH> z(3440$8__?jr~ka(-RZq;{Dh5dA~oxD{c)!H%h5u*cAFo6Qr7Q=hc}E#NGfLHhAJwyi0G{R1_?p(YXACo zL6egnSiW5y9Rh-as$SzmFRu+%Le0P&wvXC}DmL}-oQvbS(ecwoYU0?72_ydKs;cb| zcPS2@^pU#${q)pAjiJ)G^x?xPkWE~qr}(UGZFw|~njFT;0TVe&{(Wu8|60z&=CEID zXzQx7CK&myoClRc03^@%pGp*p5A=RWF8_QYz~#u}^48W?41GggofC?X8E58>1DC%r z>R-4phk4)ryCudt+nsOoyxqgR(#Lo{np;|W13UxVHuLUXbUB9$$Bi5#A3x^0AxlmG zsoc4n`mDP8hILR~v2&lCynI1H0mP5C*4E|^yC@iinI8h@prunh!D6>Cbq+i(6~Qjc zv%a#rveMYp#KhmZzAyX53zgKCpq$s|_Vt=LI80%v#>U3j4;*OT*B)X6;2y>%<2(Bz zsn)vjsc7)$(LLf7S8OJ(%%dETGC>3>&(5B4l&rNB7Q9$q0+`g?tfQu;woLl#)$&Y_ z8&(nsOy*qy<;WkvwIuIrYsVZVLk^tu`Co=V{w>jQ^NOYAAqUaTEdX}hKAIRA6>E^m zxE}+qbnaZ^`}gkwML#8-_qWW+$-xkzBn!xCDbi@6JylBF(;`z~eIj@8U=sM>WLt*h z#=IK{d|X@{bTmN30FR$wCsz4JTeep{tEIa*{zVBs?Jmkt)sfy`_=A@Vv#oxVRV$3eaMx$T6n0)ceK_s93j%Aaq*SUMwyqCI)l^ zV#4U?Xkua_wizS?j*~v!U~D|5)9>EB%QwnKs}4UPEg~h={q^hDx;lGIHQwfsai545 zFeRhm$j-`&OH5>EVDLbp*UQjKv0XS4hXN87*4Pjugo;}X=?R;CHyK%JSs4J}-ix2@ zzQ$Hx5-~S3`wi>_R){Ot1k*%uNrONN%_*Nd=RDJ$kL?U$BQzpnsMLcm;&UIsRL7JR z+9w_f+as@_z%<|bZhroC&!T4EG;lj*Dwj%WbL>m=@*CF|z7s#S#C7aGq@|RU z!%0or=gw7lOkaTP31z6tszB&sx#DY+ZYNK5^?D5ENxw~Ug3YIg@4Q#Cbgp^nx=(k` z0#4z!!o$PaB%Jb!iaI(wzcn;~|CYLreStcG5v3q*Bao-fr6z@7YW7l48LU$`se~Ut zE@4GM`${gxERSMa*j>7meBudE{5;iRj~n}@OYUqjIRxB*2J&-nvd}7|yn59F?L$xt zYwl6W5@iGeU!zjk4BDiG=Zuj9C)7%+r`j8v8wF3FJ{=Cnm2T&h#6!XKMH= zd7{?#Sd2xGYP2!tQin~GEHK|^!?n87y>ZK89h%5VpY*oLu+OS!JBnVV0cHO^dW4os zci${ih|~Kok%v{^#jq<=rDHwo1poe2pq+fr5P0K7bTU5fe#)Gqaw?tt%|%iI;JlW= zJGb%hVK-f4;~xLN8@mVmu*FkE+UrEZw9TXNt9Kb08S$eqL#4PGwKI`FKxBhV0ygJs zXq0#ABoE$ibbhCvYf*Jo&Z*+z!>`X(9=s_iV2jzdR!}~Sg;QYLSPFr7wAww6%{aYY z3M`BDZE|N@8+~~s6fpqRJsq*2#d?9K7B)6PVE0Iv&c<;zG@p?>RdMU&D09ET8k zqPX~zT5BpciEK0rUS_~MF~Z)vheF<78{#>7l%Dhs*=*Y0_RdZ`>G5O7_H0cxH8hO6 zNV&SY>c5hv4yK39^X!>cWLk7|H0Ij!4EVyXUAyi>IS$(g{TRBO_v*~Cb;a1GaWz%d zO8@PP*j1gK7u3{tvlm52Gj=GZLh}I=Mfo^Doq?&ql`NbRjDfR^jYG8t;w7)`+qW+0&_!&B90rZi%nJRvp%GIF#^lv+kPQNnxMf~BP;hW-0V{SzPK z?bD};TJ@_}5iFvr#>Vd;5EK_j%WO>iGS<+@x_&DP^#HsX&s!}@AmG`#)FE9;bR7Up z%?4jsxKFrE#(7p#Gb%b-0vbbJ9<4)0RAOS|_)=9$k}7mt%m_v(_H6pIXM@ctr{W!3 zBcD70p}*O3iiXBUFgS52jS@~qqd$8lxOU!t_UvNR&S=l8Qgqg^a?Dw;@(04l@woDT z|F0E<9~;Hoqd!bEdR~m24*CCh+hn(95a8yOD_87e_0>POdWhSWIVYY+3}-) z=TK@O_XpFrS8G3ic6(XZ8M0#tcJSb(y`C({e6HKIT#Xyz$u3by7`T?T%IA zZoO8L5OyV6o6q5|U%q_gekgG_tHpiQRCO8JM>D-e4Lv=^t-6JAu^)n(4Bjcw4De4X z3TH1(Y=-8cE`xr-_==<^jUs4kH=$~(tF<_+b4{}NUVl=n%viDH?{*@)(LFSmK2htS znEiigsqeWIAttsSE=>Q-qHXNnNuHP@C#c<;Z)r4sXPW#(woT?y)^hsI3#I}oN}q4= zhFP)iB0j?jGtJV0s^IJX;x_8L1FRCMh(9EG>~{Q`AGwk3$Z_IL(SHb~t&zF?V4*Uc z%%*5eoSU8eo`*6f)#%pR%j4s<86h{f?9Ry0Qcfpe6iir@Xi1eqjA9$cpcdzUYTJ_# zy83KrNJUpesr3$SCy3t4vYsNJ@-P)pC;etkzvWYGecw$oq&m=+PrO!hQw@Zf@># z5W0xc90-1nMb$+!Gao2$w{PG6@4x@BaWyp?fK|TK)e+urER6%IZll>kp5MKD_v3RU z$tX%QJq7jk^$>R*JF_1}MWH*RKG64%ey@E%GK}8={2y_B2O$uUgmEvwj7({HxewHk z34!>=)#c^0XU_sz{fZNJ1nVRW<7$CqRC_5Yp^H9~x_*SU^lMvNL0a0Se|iE8w#`i+ z#s`-j9O4oZ4zRL1I63)@ePIB}L~Uf+zdzxmF9fk`J#SPD4VehAdY(Lf?C$PPeek52 zv9bEDg6pHIR8(Q$bO6H8+p62!m!U&Wb-rdJw0Cr{EBMJlc5h8n4_Ii5mqZPp#)Q1y zsN%k&^t6fD{kQ$VK`!DFvR!YnFLPO)=|SgnxN;?oU6zpmFJh&JM&WCM^wL;E%mYRN z<;$B}Td35p)6&GQfB%SCO65Bl-w2Rrf91-{2M;u1$^{1QRF6bK4E)P?=+H1o7GW5$ z@#jwor|vx6R8^vp3%sv{~Q_q{)zCJd>!?3UyFJ83FO}PICih~nKMnDAuo{{(ey9uxZil+c%C}3LL-P6-z zUx-zgN}>taN55|$!4e~FPeGW+xT=?Fqm=_Bp!`=gF7;1FcqRYl0{#WNy!U>B=Uo3ya0%Qtf$*SK#&2ipU)ul60&JEv zvm2a09~m0D$9rpYV+l6(fB(@kJ`e}JgKUSj|4-w_QXJ^-Cn%bjc=r`MKVT9nE+{B- zpNz!JjRrn%n}o$e^Psl1s|zw&)0Z#s+n@yA4+v1!)qQmD9+|A{04}^+#L8*qrArT*T^?>Z1>3Xko zrEBcTw*Sf*d(U#5#gO9KKEJdT^HS?0`j74f(=(IyF#fS_Rea1(PS{CCsVH(C*)V zn%xAhK5&^5Ee{V5uD~mNoSBf!k>DAA+DO;pruDQ1D^5tT6taTK{i6Y#xips7P-J3Vxy?Ghy!NEJ07rc#03hlg8$zG930_0C@3gWzkL4m$;8z3ZNgRA z``QNYQhIXD{xtpTXczNt3X2A-vBngd82$qi4r{wqJLzO* zH$+K(etz&9P!1R=gk<~_YvULGhSdWTjD?acu$p#(OZe$onZ$1+E|61Tp8Q#v4vtpv zUatG}=_iUKJIrcG0PvC?)Oy2ceDh`zmJ}cXC5a+!!!;oE-gV* z-WBtZP4e5&P*Tmv53~otr1*H#3l~;v8#_BXh=%*Sg11-Nc-(33gA(=kn{h;_2_TX1 z?$#P&Lsvz}PN2xp$uO7+u0j0vIhkZ~-ps5YlFihVLyE`1zyJ**6^y#Y&$>UXiCzUH1tO0jDY$} zFwn?5&!xY3k$BSAWPa{l;E_Aa4=YPPxE`KUim*Ck#j}wZ~{cdxXRC$V%LS&CYK$0IZT)IMhWij)y7R%6bsr409=_n1@*;lyU|{EVy-!D z{7~S)^&>OFwrG`Px0F1B8hvF@5F|oU-n=<~@#3>;OLRE+)b}rrcDIPrQjOc>mTc$I zttEdJCLAJ(EW$#zT4As8@Ni6lfAdG734 zq*ne*Omx0>tqKlLAyGA=KdObsvO3vjhq8llXrwCLf?)+-v~WX?iHWJuLJ6}4nHbMZ zMmU*Ng{q5dCCPZ)Z?uJcF!=ucFid>d8pOcFy;HKN8SrRfGU{5~Ak@{@FDxw;_`hO+%*W~SV#<{pN1PwhO$7kP!}T|Otb-wyfpVP8Q3g+~Q% zKSt0T$QI_Tl-;+4fcnv!w??ZlyQ zj%dIx5Z43=%vZqjlG0K^qr72=qV`6*hdI^4eGm$oPFnEf$xR==(n&!qhlJbOA2xLc zBmD~Z$T;g83x2Z8C2lZ5K?zaORBw3&*q>+}nN060Dts^edqHxAzx~SmDE5 zT$fOAzEeDG$~pGiO}?=4;{@fs-|12AEImSu_*G7iC}JY8w6qkzWnSiKY-A)RCYDT1 z1BJ=t&80mAI5ZCRk=)P*=T}!b;8UP2l32ocgsKC=q_8=^lhyqeQeH6kkrR+5VDXGY z6NMB*P^3j9;%GCM#(uHBN5hd8ejAsO?@;)=o|=(yRO1f8W$U+xSm`91F;aYcDJalN zunc@r?vcURL5OLna2W}JvKJT_h%}zJw@jR8)(O~8Q1qc|09!Z@mLpf_ue$rl84HV~ zxXCpqpGr7|WtFJC^5iRtRMMbQy7FAXB=v=(LtDx8~}6L{#chY%M0 z_qkV;;h#y#$YH-Oy8HJ&MbdFo>>xY|TKD;zW$+3-7$_P_(7KNw;BR<1H96&1WLOm2 zg-#-OA|kR+zKIkSY+shcDx3|dA#g)*eRTA@ckebt3!ECyg1viP?<{q2AX!1WPHH{U z9Xh!Xg3~G)!5J2+fQJl zBe)ZL))r<4MDI+kIsNm;`1p|MgbH@Sz#tI|akR%Co>e}W2-Zo81v9ilenybQ|vVUCc>Q*7h@3(*>~t^Lg?Ch}v)UV?)x$FUaw0mH%A*lSgTBc-Sx z4T4DQZ#VAC8rJzt*785aXPPDDUwtYQ3ePCSB3GT$@{A_S?UcXn`sRaETI?k%A39o+ z1-fQ_QAToAO>I6{jMANHJdgDQ^mJZNk3`YbG`_U&)$7-2-_VG6v|%iz&UASZy^ zj(!S5r4cVR-b6G4-XGz+;$xG`8vxn}PIcuqRm+beF%0uQ0~I=R+POiUc~ThH0( z0qPSkP?KF2lg+*}+{@P8qEcCN%3@~KM<4ipT}x8naW@9oV0N_Dec1= zskJVe+hA%KkoREUO>EN>6ZY+9F5f?ff(Zb2?z|0AAG`v4@&q%KliJ$j^z3KRD5VToA|wvk7Kxb?>nXaH=?%;mS%s4r7PD}$zs zb%BI2j34B|V`@hFtNfD=g@6JfQ-IfV2Ni8?ZHah@yJ)@spp>5kbP(x0Ku@n{Vsd{F zv1Fa9G|=jo^OulRD7*_^bWoEf3l_(tG_NmMumtICnv)|_0jn= z=UZQBqE%8+QK3=nres$1_+fbd{Bys#0eli9k=GvI`q%`MyiTsZiSMv(DL8b`;{qFJ zD2GU4*iNp6ukSp91yxXcmY0_iy8&Z*jts72C%@Q9k))<1=vV(8K83K~05Ca{dC80D z?4WSQh`z%|!mj&jTM*yp+Qs5tmX)PpZ+Tg4LL#OCozCfv9Qez3+naJ1A#=DTT^*|2 z0wjCBTb=ea26uS4O8_*GlU&)}iU<+5;}0J`jIizV&uvDs2i^OGCOdPQbE#=)N(GE> zG&^4}iPlGUW`AlQf;1lt#sL1+D55hj1 zlmB!OE2^E{Gz3e=j%c zi|?gl*A3)E=kAlgCCmu}7o7Guc_lj~g~XuJFXF=oWx77bM_lSFZR--y>`9df1n<^A z^6^t4i;jDfF5MG6(ELV>O7nYa6M1EHVnHf-1(~<*I6G z7Ta>ECyzL};&8&_0)m<`gU? zfDN+UyOFSh>PL(rO{Bu5*}KB0t**`@<&vC4d|5a-WrIPDQt@zeLmokk@^$9Ih?Rga zt*qiP@C?thK_7|LQ^@oC{@VW$c7x}r%RFo_w3n*gI@7(mC^g5kJ)v=oN<2^wGJIopdJ3vh;vxc9AF zh_{`F01Jx^#TLyJFt!Z2Tu4g@NMM8ClzUcHRTUO?&eL-hcrWt|v|f0`y9sEyz^OTz znX!mAL~?8DI05JQZ>=!JGz{cf?JT;FMh#Yn`8&YQUN!#F^U$*vrs(*l7%L6`cf9vJ z&NEO6%g3%elGYd{5ZBTWU2)OyuqA3k^|4IFAJrsq&VszFIZ+4m^Ea9je&R?2HXUS3 z;O(`Qm4P>xDTu-=b|gDwQ7m4F8}vM9fi>YTPj=-dg@%ScctF8=^$$jF;`$hhKFHpS z+}x_0OSVEnLbq=H`TqSoLNUbJNEJ5F+&nxv8A`V9xsi)m2nr%4U`FSFI}u)yaPIvv zI=Y0TH6+mVdV$w)V4#YaIWtvcV`CGmJZnh%dy*mkGfS?5Dsh{9+_d{-ZXWl*udGhV zd?7q9J*9Kyufx^&hvL0q$CjERGRBOy+2j%VKjSiSz#$+7B7OarVzIglr%(R_gHo@Q z(IN5qVHY7gc*DWY+%pHtf~b89JDU|8JnBqB#5nT`f!_NH3z~Midi` z8JNoX4FykNHSi?w@#D~LcizU{(FrXsFHdL~TSe!{$q8>7N0`;u>S+XlK^d~41UtrAmEM=VBLIn zkrw+ZiszgR2r(4l%)2;AL~PQ~H3?uoC>cB6!w5jf(y`>-_hl_2d*HB{pTDnu?!o0J z?nJLloh*s8OK0$WYH|E^!_!i98btPvoXNA}lU~b?@SeT>~=sU5ccVQDlv{-$AXD4zvEDQ`P=XeiePe7v0%~gP; z4+Tv|N{Vp%)hoW>niCQds$seapto>dz(0@s!e7T{r4U1^_O_>G5_A3qmXMypc^+SY z^Zn0IFo@LbGi;u(U(Z?)Uk42iXvK+L0dclZfkF>F*j(3(qbNx8_ZD9HE;}--q~zRE z^Ke*Yr_3zjJM*ea*$^G+9tG3lGL~$6c1N8E1c_h?{d$#dWnIt%NpJhFuHi=#D)PTu zqS^aMPuMpnqonE8yxl<%&y^?q7$3(F;i8;NzozKDX ztgXMpa;c?zkDXoSG5vFHPB1*o|H)1v|1P~&=O-zbnm?8+A7ykDwYMIOjxV+flixg> zf$*MWBuiTk0xn^ry3Bh`6g z$gl0(pRjDVrG*8^mMzWA!XhGZ2zX^1}fac07>*?e|e9fbb8NN2@xDkjJhNhHB zr7N3%ee@H26iu1E^=0wDbxbe1g+#v~KGq&Z(YfD!C%D8%b3$C$txOxfeOsNMzwvid zy2DM&!h)#ZG(Jg^S@9R(M^kG5G2ag~nEW#jqPR2CWRL-XP_Rej<>bDPj%p`J9p>b8 z8-7oM(<=lbfP%pe(*}b8N#|Z~9M*nR)42coJS32ha5RCT5JW|p1~UY9S7k#nSVt}d z_6&}$X(!68jMVIAV0eNv8sguok%oe9`K`WQX+WSfT}H4^a^aJ1+0K8+GdX&9TR=$7 zDK2R|$vmEfV$YtI*4BMf3WDE#RME2D7CL<&Ji^Os|1pGa!UJ6u8yl`GQEmc)I;NLy zR_SvSpS{acKX6BtH3anALJ95bz zI10J9WE7((?|TKwNzi$4J^4|jvRgwY%C@LLNbje=JR|T_QDI@`TjaV<3zb(?n9i}# zv$A@__WEQK6v_28I$D+L8W{nGg?)GkJO!$QC*_d6RZ_BoI)~DRA;sCXI0b({gox0m za2hbG9&72Kj5{ykA*wJQn2-k!#dGvz8qN>_>s-Ec35lby1JaAA;K=wWUj`K-+GgEn z#xIx|E(*)9f9J|yj`*18Id@h>A^q{ggw_-1&wdVh<8h+w1lg}CwZ%RzIuZNjZG|`4 z_wQ?fGpuyh*6yMzogEwNP(FgKmw?D0QCGNAaEwcRAL-$4SslYQg}@ufjvoi}!wD0@ z?Z=Px-(DSnE&wP2G`aGtY8y@t=rK}4=NEK(^wrgog3P)*>J3vB?f4SR>Xej6&z|ML z&MKjNQ0riCZ*Rq!7;pG6La!=xpiKO_t#G#613u0*RXQQROBz0(%;`M-6E5|YPZrp|nrB|O0rer4r{?fMlFf(lA7@e-sr71X5B`ofHw~ zhr~NRqoD!U#?f^EgY8f{ob&T+;acRKrAx*)_KRgZDv8c%b)+`h9|U5~d@x9-5uK4S z4!nknhH%(RWF0PDawp1#Q5H}*Q7dqpEc!UnQH1A3PlKf(gQq4m+ZTRfxQ5!SaC7ko zoE8#A2rhbh^a6JIMgbndDEGD^Pa&B}xlc61bp{B7tP2gKh0_?a+SCT7e*cP3M?-@n zj+n!3^zih2Chf*^d}d^XcrqMYb`4@Y&N|wwAn)z~u3Mpp<6T}A=8X*rUEDJwYBfAp zhfZX>{SgRHYU0`Y?3wp9VE4crB{W6In<%JL>@6HPCsbEmopSf;VdvD@-2*<m^|k z6pRImI*a$U3C<_?Mc&;#aP`}gkE<>DuSWg!XXVTX?|)Z%o1YHB5_eUUbg5~42{caj zmZxEYEAOF|d;lN9b4xO9z zQ=3=scJ9%o*H8;%tLcr8EN#8@@{T6iw(MyYcNd=5?RWJkL1b>?`{W@GEhatG>T3wxS; zP0b8tUN+sh{)@b(H^OmpGudo0k-mW5%v9E&c(x5^SR=!q>mh+ds#I6!{zK+YXU)+K z|IcP7&P72=Ha_pf?s@Dzc)Z?&?xT&Gt!xrLhy8CfV-44|UoSm3%Zm~D-I_$dinB=0 zh5CqJfvu6}FH)~S0y0$HYnDq=G5c~}7n}S`fX!z6sZe$$3(S!d7xt0@Ypw9Yf@Z1o znT8K%iXzY#^CZ4MZGS}EgyZUy%$=V!x-Y*~IB6#1^6cN6`%E?F9`NM!osjuLoD(Fs zDqA*6`1F2jHUFm56#S@eTcJBBf%t3~Tv+fH87y7;U8$5mqLzIc`x zZJXgUJ6A5P)A%)07EY&!-60fNhz-@U?z`t(;Hp)d7(o+b`Zb0qOn4?a)Qwq{zp`Ow_eL=em88ot zEy#F8m}75X5WHeVRqfqZh807fY07%2T z2#_pMkccuOZ0w$%4IHTW{$OuSaYO0c@mT49!&gT2=(M(UZ)vy!aRfXPpnE_p zRk;2=X!rxpvnmCWrF7wdy3a`koPkI945+f*6++k)^*${BwiYG?5Y9h7{~z)BFLt$V zXfM@irpo#@4sVCN%KwU^qr`8uD6rG6z6&l1p|sr;tmFH4YY-&@RQqH`Mg|7)w>?n# z03&bLTBp24!9_OO-P>@Zr&TLC^d_{Fw4GOEuh)hq9e4Gl>V+r9IcaqJ^JvQU4)xuw zDhnGv)WJ35OWf=sMrjw?I`0+Cb-S`qCB%`btF`Tqy7}L!b@F-%L+ThyI?k*A!jLG6Xt|`NKdz|ZUuVvc6R>QA#Ke|?-f?oErp5-Dd<;J! z&t>ETsHW??e$Y98|84NY`<%xq|B$Vm4)h8ba(KlpHX$RN`&{1-}5cJ-7a(4CFn?xMmaqm+Luo5U?{xxkce8Kdh z9&va|{v$lFply?xB-W@>wQwteKl)FWzUzG~|0_xi9)`Az82KHJ-}E?Rn9Z8zUO!&V z%(J6u;=$6NvSE&8qoa&*+RwC(40;X{MTVjXj+!_+)7byapN_V7>HfR^{x{X)kJs(Q z3hc%c|1o;&SmBfE-m1593(e}R^SIb4?uG7(pb<@4yujQxd&_f$pPZPMwfPr+{Pe%^ z$Iu6VWCK)!%@ci5Rz?bLTWxFaY;igK_c*bLS)+(`iPSlICi)ZK{UpM?x^1Ri>DXHF ziy%%IpQ|=^$@M(O_tn-|c zB>JgF)2~JX02-|t^CrSyk;UIe@|$E&n%#0O10w}dG1GIsrSh;jO<>9!ivBme`I=SmLgCHKb2}``7C71z%^S{PtcPz^4-zLXn8rXpY;p zIJav)iQ7AT-ahoWf)Voc=Dc-m^4f8^~vZueiI(8$Pz1ZbF z|G)d=7fagY3oSVj-BL@$9w*g}y2e;$+?@ZBEzvsOqjIBjUDZ{rd4j^_&SlDsxb7-%n(Np*09M#`O6X$gNjbMPVe%k9= z{35-XT8M`Luu0EU@A8EyZX(p`z?w)Ji~5x?zp}|Tl0;*~6lth8&n3Da0Uw#DwXq+) zF5gSpcjD5G#7ooW#%hY5OVGdcn7HDK!dV3o@HoJJ->!d4P=H&RBf=2<0y>>-i)HLO z*At(!{2RhG_HT$IcVTq(K65Dk;MH>pm2eiJkx?=WE27K7yMwx?q2U0!3?0Ae+qbKK zXisDt+)%YJvKl#F^O)!;bjoOPFOD|Ujn**Z%av;W_&<3C9?)CtYzl3lxOf#(9%MeK zjbFxGadrvEJ^+qA!ABuc!SU$EiM#W3Hr9)|Uf*D`gK(qBIWr%plHB>$EsltZh-jQUhp(~eu^r=Qbc9C;0XqEteP|7iQS-vt4E=xC z6%F%`5~~BX&%U2`7i}j-S-eaexA&!h2Zl^jvWlINt0r!_E14B>H%7ZlEdH(Wa0}}V zhL!mSOEd9qhtt26z&`zOQVn60>uP~kufL5~Y-e6nnBW^Zx?yBD&z~e0b#_xWy844x z;OmS@x5@9{UGa?z%fE3uINJkL8Hdu4pmb(tyXTu5(ED7CMVpB#R)9D-4Eys?7fXWd z`jMmI!sL3b&6#IaJS;2wgN|WiZG9ozC?AdkM?_aKfbZKsvOgmqEbQ;NdYrf>E19q+ z8-q@AG?n!A#+?ZQv@jFwLNtniKr_y`6ki>1z_)Fo!Q)Gs5Hl>g`5*JDK%}O^C#IuK zV$c3Xdl_bYGUxuTBpAgYKK(P7lOYMyR)4f=i-_sF3k z_(w(2o>AuEoZ(J}TK_(@vqGTwe)omfofrEYuDzP=SnajC=6LH?uIFa2L(v82S&wVe zgZJ<6nA;n&^J!1ujN9PQ!O!WBxOrV}%MB@PzK!ZD)aZ35&?rKvMOUqM>ZE#fN$>=_M?{|%W+x|TkHs-I$!lKqCiCof zcK7H00IR6zN}DY-2#ptS(o-=_&=(J~vzsH>^yenz;MG=5ApzqzX}}iPO~@ErO_IEJ z5!T4jB%fA<*C6G$gl%kW0GmR(`3t`pR+IVeojMj81ygH%YPRkgb~3btXL~C~=ls3t zLPbj0Iwt;RmEbI}lSH4w_C}M}kS`pJO1L+;hRD;elkQpiS;*8SLT`ZChCzi`?}Vhq z?6^D*ZGz4#pLx~sskRp9VlM$N!8p7K9UbHYUqONYskE#Pz)97^K_`*Jfc@>W(qRbr zZeHr%gZKl?d$yG;ft?O2tEGDuILCwoISO5JpB?_gXCzen;dOQTyy1~c1wEp+U;kfM z?;THd-^Y(1GLAj6vI!yCI~m88P1(DIRI;*VB`YgCS=oCfgb>Qe&L$*#WoCX~=emEt z?;qdmaX;$5?~Bj*ocDT-_j9OdY0Uu;4NyYx(5IqlX3;1A{t?j%+W`}{UusX?tg~mp zh?Iqm0v`zLc!9D=G?*OjlYttJ%%ZmN*)6S`MRV{738x|7hs=)@g&AN8cRrxn5TJm3 zFZ=!@bMeMslRFokuFMFjgz|E|F=25Vo@rV~)`Z)sU5mM(b(NogoBj>7auU=!=A9_; z6#;|g+Z9n}Qnv*yO2F9gH6JfPmz)#@Z7aAk-Oqsmgl2g~MGP~tcy!Gn8c@aGz7?#Z z3mEtsaObInd|nT*+%Xt)FGcM4j0ZRJt`u%G-j&hV7Vk*=CDJ}lt6jb6Pck@u?k^W6 z)0d?gZM!$V!)kEF#MJ1)b9-pClgpD+`30eqnH}vSL0=99yndj+4=^@5evlL>imp6`7iF2#7 zv@E-3IL0_Bq1(6$MJg^a{kO$kfaIffiOO3pXj$H-rk+5ZJS@aX#b++{V#~ClC=KxG zx3@npE-fWwWcb4m!cCm0l742b;BB7u@+ZcrRmrQe2oL8jwH*?%@4e6I$Dh}&im0)^ zRm%k&BDClVhF2rQ!ayJrzX`<^-E|updj6NEbI@?^164kF;QsFF>Z%muGw6e{#;(8} z;C_WJVr^w?Je}ueed$8~8Vi0}C{c^j@ zl}7_>i%c+rk!Sej5i*^Ewx(Y~>TOej5JgaQsDNl%pT>*m0f$;(GL5m|~~K`fR56 z#%-u>b_$_SKD#4d2Lkw|nzgI*K<#Zl-D(6_us^8nm36`?0hIK1+iH<}jtHsF7e=rd zk~w~fp2@l+Qe~Sl`o&eBzGQfja$kehRh%B~v(H;D&$r>%Tj2OBI|Rj->IqZee+SQF zXy;Tbrl?hW>8G)%<6?e^uZXi7()rt7RfoQCTN0!4`&|QF6McdN=OwcKhuJlo=Ipkl}~4K|h^D`dGGjwBe_VUZFk#6%TZS z@rj9_P9iZ1dKYTS8XKN#j;;RplArQAp4rAs;k3VL>{C3ry4G@WvhH_&0@yxwHZ~f< zSIh6cw_sn0rftHX8uUzvdrv+!i*OaVeBKa9v&I}C9K5~dHxX~g7h)0w4@k-f>K1UP z;hG4*WdLae4J!Z|4*(Je1haC&7O!NICn#25f5x;SM`B!H&>wRp(2%bUK3p~GZb?XJ z=p_K?G_K(z+$z`p^4Zvg;^Lp7cZS#L8hT9VTl2{W;--Jj+_CVnS3_|_VAXy`{r0%J z=l4gJV3Oahwt1b@zhQRfpqUN#z8mh1-lu?WrIq1E6dn2gOihwh54^UY*)cdi@H^-C zk?(6ei%%ls8~PSxXt*<68^d+50Bw{8Jt;y&!osJpB-_U|%?d_J_FS7Lm&rw%sV|ij zk9s~e$AKHwIMWPFH}rY{tSx&Q5g(l$6{q@n%=uoiS)R){{{emt|J^Dsf-vb-oR)?@ zeUUKj^06t$j{q`6QDOCUHBW^qm^b^PtfmF+tllL2O7PcCS;Qm$oF!^7`dJY8NDvi) zTlVb44pEReYH#MUR3~Bf2))k^&*?CwNuDR=+3}a1xBjmlzg3qmhJ8W20meu#q3kIQy586YJxhHm5tFnlaQ&hk!qZ59W6Fou5wn|ENeXF=Yget{iVcSJz3<@!NI`zmLnhb4 zR9&~h!Ntc+Mo-naTlAzCu0<;1_SQ$lC>ttW6CLEy@|=IWZ)z}+BDS_x3HqrXUI_m2 zhGZv`@h{QwlHTvKEP9FPRc%>@5yMS(_EsFRm!5OBrSB&%B+#!<>eV$9-9Y{wL!0m; zAv`opK@ieEm|{_1VZ^5%?xw$Aw53Gd#nCH*q&$P&MzKT!vZo%M^E+(-7-)fy_Gg9^NNSzIRWE*WUhExFXS0-qw zwcNWm?*t1xJu5)So)WxqnwW03YC?b8hK8b6e;H*)x{d&b7bb~t!a!!kU0O>dt5+3! zC}JS$GjPN}71+p=`H){?U{BeqYlf=uL;*C|E;-&@LA5=!vzh%Hf z)`i3DYUVf4@UZm+BLFmc<7d-Cv>*HTv z?#%x8A$JPCtaiP73?q3z%UMx{B&Bdk28k8e8Hww`dE4H?_hGxt>1QIL6}i3T589K@ zoy~Er`QaOI>z5&Aj(=UTDy0b{uklKIx~0AovA#0*27QWtK5{_{P&cI^4MvQ^x+&4> zzg9+12EMwAp6AK9`^jv~p+DNjP<7vkxb_s|bO?)>BB)epNkJS$L!cQbNRGc___*v$ zl*hsH^)piR=%u9}71aIEG5fvp#WFM!SH8iRghm9P#(hrP^=vlfz{V2ij$QNL{o_8D z-%qZbv`Bv)uym{yWFWQEc8a3o-w53h(VNT<_kb)_Sea6D--;^bbI-&~h8uf=kQ4>) z>yM~twl+wjaB*>kDDih>C6>nUU60;J(RIL{)wenFEjp)q6o6rt@$J0g<~(+Y{LO=J zxq}eO1u1cNWNGHNtNgrj_ae}b7n5+15(_GE$|kxJHXERg{n6Penw0;~&dgQEuZ%Q< zNiaai5kFr^_p)9Z$lg+8Gk@04!HnmH;5@((+W6pm?+0JGTN(xoxR9Jx4R5qV%cvFm z;DP?uXW|&_JJ|ZcFRz(A-Fy?@;kvJ72?YTuic>Fa?i+IxUX8zaemo(0<|9aoX)G(~ z#Ac@d^~LnVzC!3*D{Y%IM<@Q$iXZ3uTE&9vm8ql}OtT183&8YLat3RjOu0i=(%ejf z+XWCQ|GhpjD0+do4IK`b^CXDWzVM^gmB*t77Ey2BJdG@n-K+f5{Va&}BqR(4AK@X4 z8`9h;ck4UH@KnV!&ELmu$+xsdIggMDYD78%BCJs^hrSfg#$VSH( zTU;MBqvm}Q)McsmA4KVrK~*vH>3L9}?e$=Qe?eCmYHMg&yx16rX93is!@3eBFOR-# z1<5Yr(7g_73mzzDK=BQk1&m$*aN2@`W)vBX9dUDyu~6X*mVnk*SLQGBl=puf2^K~= ze=eaGkfi4g3G6IHLf1bXn+yd>szYt}1Hs?{NI~^c$Cw#70%$wOgn$U5OZMjM72ep3jGq zkNc%h9YJ&vRRD-`QPIPBl0Qx#bnQV~G`g#u=Jemc!Aj0ojIJ@y9q6Bem;EAh&MZ5rG5r5b9prKPEc>D7nNKkJ9 z_H^YLT%feiJKb@po&dZB{6mH+b8np4T3pl{#(OAeWNmE?=x8z&J+U$~DUcbJd@wU5 zvE*XdR9l=5ocAy6ahK?6go@8***o0@V&0e|{zNHl)0|4?dj4Yas717~(S4HI;Tm-C?A9ytl+7aU60YhwY z*8@tr{iR{x2*t$E;$96`$Oh0PD2LOs0F`XiqczC5V!|=?(MW#u-Y5$nEL+YADBnhtS|EkA!65k^Cz{RW&s< zoC&@Pzz19bQVON}aJ1j?JTPvE8vci60w8Kv6x|q&4hGJ(&AiK*@sB<``EdG5p%U?1 zV&x`PYo74u8z4+Waia%0!+<`%4>;njTflYPr-rTwG}%Dt!5nUM9=}uv95aM^Hvlgb z>Z|p0I7SvAf_|ExTc~-=sXV6{4tRNA(?*`@VXVpaLWkM7(BedeCSgGY95kl9b%~=^ z{|o#{YEi7QXmn8&8KI?VW#Rs>^u@=@{ynbDnIbN_EBU5tL3e-e4Q{L#(hCr>Alz^s zM+GX?v3hUrZFb&mVux2ZF1^a`@f3$^IZOV!1j7eyoWK(}@o2}0MoZU=Ois;>*8YDi z073lR14+^WNy@Zc8D8zKyE`{jH>hpq%kVwOcf7CAjzN9WH1rSq8FBQgtnA~(%mIV3 z0^X_gR!mnLR@^FP4{L3*Kqc3+i)#_XqT+%f=pS#ym*khy{(6Lc zl#Ka#CEn1tPdhbBB+2+n+y{nb#i|hXu90Nkq8++Ns(hl8GJ)}&LENOmk}AQO0q1kM z8RnYmM}NLNI>xaS6Yt@3xU2ctL-(nZfBom6*NPP@3~MRGr8;ko2XK?V`Gy=@zyw-b zbvOSNAs3DA?g=6{_y;6gep8c!)Nss>CrKroGY$b76V&b8vIN8EEP$iaN zf8}e#+I3WT7=xSX2fvK7_~BcE#QLmyDkL`<$-`7gG$5$FH5ezfgyk0RI2eb785Q7B z^Pcd0GJ##nJbv5brRl8B*T2bwEREZGdx8arx~^9AH&82F{j|a+F-W;9+Nt+i=A{1) z7nG-f!NV7oq(|J+g`$+km<@NY&P5(3dJfpG)q9=&k*NCevUyvckRXHtnTib`ig6ax zO1k?74VPncQZ=^sgaH|T^-uUJ*HS>q(y6(lGZa`pTa|6jYt+BU;$2KjMDejH>C zO$@SixOg%;qSbm*JU(S)pvgbe*J}JIlP&GNe!VLcB|ZJl zSA3I+kuQ~Dq_oygOGJim=e81_6sg?I?fpraF=UC4dl*yWDV@By_}m6nHF_LX{UJf& zl1_mQ*Fzhd(3n&u_*daOKPU5tgM`tquI)#Ah*K|AnUmj)WGvr_sV7aab6m;c`Rk88 zlC7{1b?6v<4G+th5q{}=Z1S1FqnULx3l2j@gBAUV_XIlV1w28~^kL?LtcTKTA%mvr z!J&-c_vP&I`u^t~jKVa?8kP6Q2KcnyWuMOlne3x~m8s*lNfl3;r|vrO2D2Ki>A0|P zOBjVOzh}>taTcrYe4(B#`O~Ig1CX#e8mB=#$Aio>!}~$@Bb@J%Y8lk80_qvs4m0 zHSnxl?i`h7mc6Bq`1(D(r;XqbFi_ zOi343+aE?#EY?5zZb#O@|NiPG!5B)e=SmoN49E!lr6q6)3E<>*3a~6M0e9Q{KD1 z#3i(HYw2SZLBr4Tn!tWs>V|Z%%luZ7+<)y$pQLom39#tx%aAP z+VyYffAf8ij!7OEg0I!@KKk7F+h29FT`rNXP;Iumi$f?&7cAg;tae+_ATLnXLRrEfy>`$L!MYO~Lu`@Xzxj@f)qv-Guy-ggj> z@?k!vw^}?^$8#)gRvFY2d8S9`u>W6q(RN37AXSdl(LpO_lOhno^v#{N3<4C@2p06+Ntmc`KmkcCo#EIP63SKJx+<3&E%I{B~A9+ z(z$udC|AE?A}#!9JEP*}fy=fr7_%$KoSNQTK1v8i400>^hTI>Q$Gu5Qwv-u*i~UKb zd*_%~eiuG1nizONIXK6s-_HLz^R>B9KeP>=`Nh*xa7&_e9$%vMeCfjMD-X?I!}M(Y z&;0R{=7BN+?9~CGFc~qT)8mp2eFG!MgYYKHsRXhAH?RZ-HhQ_^`}zl+>blGbyd?XJx*3YkoxQw(pAQJAQlgo>MO`%1H#I zD0G>3OkG^4<0^NLB z#VQi`-Vw{D%d$GsOt~Csyr%o^O2K&Ly4m}};}4Vic6cki8a_gkCOAhAnfR2Wp#JRB ztg&JqyiF=CfY^cMhvTGWar{MlM68B-IH1PQJ*Qeq&%{}&ozjWo`_M`&)#f2}7 zaVvOo^i0QJbPQ6|&Q6SmLW2%TmR;lIFtIYdq7slG@fAWU2~ari#aBdhAo7|P7Z*tC z1(h4LefPbZ40$eA^E04mU?xRQ0%*>D=tGA8MLjlqThNO)ZbTcuyOJOSdAul1&Fb(D z&edR;!LqNBvrV#$w_p`xczot)QU+&%sVsTilXYk>1~PBWHql+XW@K)@<*+@F)edE2 znqX0W&Gi>^pFbERzsWwxTt>aMwd;E*s>laR^%*g~?~5SgIE}nItGRdIc0(xdVR*db z@QoOBsFjnGD|@EM8mp8;7N&q?0ZxuVj&W((Dj_I)O?-Tkwmt>2zEJ5@PDIXdHsoEj z%Sq<^BU8$WEu=|>f{|HuXx>S9!+z5^|0G*A{>fh*kM9k5ZByS39TO={N*G6K-4;Np zia2z5wd7 zZJFtojWmgv#}5vTU|^X*H$1E5M_->J z5=1tSPfmc;3i?e(S=!9OU;su=O+EHj=Z-*>qIV_RBftK!Wa1CG&NS%{QznP1jfpe< zcjAv%bgG^ky<6FnNecT{$rOrf6NV_r?^Fv0Ru=3Jz2Z?UIa5HC0-2wgnfcBgUmz($ z-KM6hY6P;?;4HOrGaI-OXJaiFqopQwU?&7cBS@|k#b!GgHtvxSMy?P_OJo?vFID0DtmymL|5^^vD?)R?;MNSVBIoDl zpewV#)wm7RA%KF|!FBF_E8JOt{uo>v=curq{7HKz33z-J9T8^oc=%L5X(s_VCfGhi z>DsQU{jlT2CEnmt_2PTe9(uBbH#4*-CiJb5qo#^afDKER8KLziM8H<^m6r-&Z@@`_ z_z0;rEz<~8+UgmORX9`1v}lbT^HWF!Jw*Sirjz?WvIzB=+=avBPtQ}QM2iusj3JMnPv}|DGalR}cR0H9IX?#fuT@l-rSD+S%Ayu)++V03 zjVZbvS&1CFGoKf!sRX?ewo_?J&AzNnM#jFo6-Q=$jRpRwH!$R3NxbTe&9eK5Ec!QB zBy^~rj*l~wNry4iw?sb`dfW4Awz%yHZ?MIU=b;m^KG>0qeCZqI^*I#Ir(bh+<%BE81hd^~fnW5|AgH)^% z7QKaa=v|uROdzRqqOx08b6bGK=Lwg*?e4Ll->2o(IYE^ z&ci=0zvUcbBT0K^2yVZzAe~bF=DMMUB)4kX{2{aBkYul}+$z-_c2DAD;VO52lfW?SrVnJ+x*l3elch3%#ZZs!*T3l^xR61KLJ`l8QP?uHHAk^&6K_}<))fA=I09_EM}lS<$mh7?@oypX->Z8%XMncK2Rcy7{+C$4VW>9JSs z2wmI3L#UYhN4Q+}rTxq?RO80KvtUI>kplP@`D)t$ql3vq@ZKYafw{OfUETZ#yPsE$ za9ypshzO6~tfRhB_F4UuD~43Nj8h$^=2-PI1S-pw4X@C(s;rH`L}%+u-i5FAgB5(b z8b!A*5)Xf2?agoENo60d{&uxG;%^FD@~3pKl1-%MNgr5YCi^*>f=i8g{8!wF5Keq5 zugAxK)zjzS8~ywt=D-sfSg2Evz3DHzSszV8DYuKuShVQs+)(lO{_uAUwzh`J*t&8z zUf%H7`LcQ#ri2{oSO2isR41Eha85Dr-+QuKCv`1cMhm6$L1$e%X=3!@+YZ^a(GT!6t!`|m9DFJhPL$Nb%YO?W4n->Im`6_z` z$Sdq$CH7fiyC%7wD)Wp{`!R95Q2Hx}7jjzs{6+e6XiGtjG~x;qr7_n=MZ6bfp+USc zJN)4_+MbYEtJeMD5kW`rS0=m;l`>`p!7|psm~RuT*OV7Xg7l1eJr;}OH|xEX>#lXP z8@G$A<|mAp3c=x%a<|kkYylEBJo$N&V>0ctTCY*Q_b8HYCC*(xY7xRsnlz5>BPVcl zNzvKMcq|`xlU>5%$q(cz=`ZJODHt{t3(r0de^3bR9Bl82damlsEAS9jktnUg8gARA z4!sBXpKO-x;Ru?|y4`jAq)rbB7oWfJQtm!kqIO$KG3FBxV^$>>;eAc;IO`lQyoIWC zNY_f!ZPh}kTV#ocIDc}N>z3C&^#L06BcK?smTdXrN^$Tkx+MQe1O6+&DcHvWO~XOigf6VlyTp}x+)<_YAZJY>!(@m z4^C^-d)v&Rw&aALT7{l2OorTq!U&b%=t{_$jz7h2Bs%r~nn&?ZcrjfnQw8oiX`hKO z-K(oAu2~FtY^81}gDuRi5?nhtMR-k-dQK%3?vrXOSLwTX%qBb;tXa^jRV=6J;d?*4 zw_iDP2)DUD5L3(Tj(so{+vL7=MWg6$@l4KHKLg3x#O%%_qEnW&q;Kmi=t`S+#AhFP zUD~%nL(}|21g*zz)5}%Q6v%D*wZy&QHJ2vr8E64IcS718+k(})An2N*XKH?AK%b&R9R#87F zA9a7u-K`Q;$Nd|VJDu12GwIRVxaob_7pirNm8MzC%|7=cTA8&~H8NlDD2W>vFXmPjI6 zPC4FTc33cs_-6ZCXHm%q>=n)F{!zd za-dm=%@Dj$em~(4<4Ch*^3-$?!Vr%Z0W}rJZRew?$ZHFV{QPHe=EyuAy`HNsSK`

iJ|3Soh#AZm-osB{<3xu{dgBE7;u`%GmGrog$M8qT0qa*k7&_7(WSESY zcz5Fya@e9@okd$Dhz~F`jRF)5iD21lJ8gu{TZk@^)Y83!^4K%)r;gmoiv+f2vdzuq z=he(o^~de*bcF`7O1BCP^l(b2#%PB!TZvEGZ@aT{^%!)MziyJZK^DqDh#o>vjV=1! zL)gi}w|@S6>pNDeT$l?uGKT zZxmzV>bm9V5UP(m_4djDJuI<}SKMQELhRUX_8<0!30mL2AFjenw0QJMj-84(dbsj_&YA$pgQt>lLBQj@U3%6Mcz4vsQuPx&f8^2~o z&}#ZKf^~J%>AI8^OY$GnvwOxu9knQ$k2Fk4YnH0&2xAu5i{sv>+H)1=tmaoD#l?~) z^d+7Ww4SwJrwX*^=&;Zzd?RRwp36OV3!#wfB!BTELNqoIJBFo~rS)5!kuR1uFDtfh zYq`X=Pz4T(pGk&I_93aU>i&jU7E$>_S}(J|)a?;QZYY1(; z&)3hEe0JFdv?%!e<2n?fYTDGuIn4XM|AqDOHk~w2=4gMQ-N-_JZ5qi#Z9QAP1buX> zG4^)NvXufM+a$|+*7L@Tmsf3)%hx>QFi+<+6wij|$uixv1@9Zmnr#U@E=6eY{YogwnTu=!DvTd^MAs(6y-8UY8+_FIXQJVdofwWa2L7Y{~s@ zGIYrleP^iXbjDzUkLM8lb^G7n>GAaueH_-E_pQJ+1N^^mLA6)4A;5&twDI> z@S);qRkmV524-Jaz8B7(gMa6^Reo>hq@Gz+{F+bh>lu6*Ed(MJ2A$+d|BRT_NlGde zbuRte`8P&MCRz1Qosw1UKl5icvORU+>;l^|c4L#JcLtWS9k%qnj}#*q^O~CR@7M6% zyx&V+-{8IH$3%a6*7YOJj-JWj6Pz5i z4hw9qCpY)kc^rheJ}FZ8yK((}AE7kZydf^olN6*H|3+|CwJB9_?Hf5muEDyxvhCUjAeKz1TxS7?6U4czGW|&VoX2ew#Vc^)R-7 zr33@lBjVYU=DqufUFUfM>vkUT9^lc6Ca+tBrIDK!O3B(7b0gfz(tm} zDed{L?`R1-_@Z{eg-C3To)}j(j>r6F1{EfBU*4x91q7#smNQypPP)YdZ}NYvK6WD$ zD_-bV%{rwdDVFq_&hw5PyKnbw`C<5@$DtL%a;$opvUh1lKWNI&dOwJz$WiJ1Lol;F zRO#bmrNn@Mx!Ly}*#DB<{qv^FK324^{9)*k)6?kY^W<9>MdqVCBNFO3@}yHEVL4BF zek!nNFy$+$-8(iG`HU~pq@4a(7(a~nL(%!N|I``*vW!>4mDmm^7*loBRVQ{VZt+&_QCS&k9Axe zIRg3ErvD6)Wm`6Uw264&itm`6iGj#GaNrHoF4PA+w-eUBvqk%bICbfFzjFP1#(6&rYiy&sb*-%FL@*;b z-VR|&2tRjk8tw_D=r){fHrGwzjC^lH`p=0eBWCA{VgH@=`T zN)Kj3J;f*gd=9L|BomTcb0=u4C+}0~I!_-3bEuEr6t;I=`K5@l_i4jR=7qXs<*2&R z&5GQ>s??j@y_B8d;rP|H*=}E+#`GjcAZW&T$mf-re1$L#=bb=S;R!&rZr$2k)k=HLeEQ?O zOJRH)wPw+97Dq6apq;igJtUR`Ftn!#UQM-L1-ju2PCaLG11SZp)mYD0L$H-&M zW?9bP5g}67qS=?S_!;kZ%9lP@)j=S3#Cva`a&-av47iWgXW&)|=2-WKTa8N(VX6Q) zfn*6}#=>j`$s7mC+!Q@>GfY!zIwemAaU+vN%SHyHtj*P($IkEj%N1}+KPXvT^!0UxsdTr4YDD;Nz5 zQy?YD-+C@i-#BAF+{zKpB_lIuTRnIg(!ZRamsj=JFx_+IgFamXchQsIQPpbv2jMgF zgICwg_^#PmYvvEnJ2{E>UhaBvglE&k(`I>S0tLzpE?w%UyPAyzF6$_0Xuerj0`|DP zoDWq1I`87*pYnp*+u8sI1IUOdQm>7dw1&qYV!;d}?n9lJE5Fb?%^Py01DEPqocJtQ z=lv9s0*X3BQdzNY=S2}kf* zY6O@%D62h|kroq60?9!|~T0D<_DQ)zrZ1#G|!!m}fl zuA{)KE_KyOY6L(fj-n=}ru5J@FUZbj1(~Tbv)j;l*M`m)5Iz7$g~ry8IJg1Z8f3iI zJackE)27zl)i zuu3p`nv7kHjPkbjz0ScAz=-E3Xpt$hk`2$-aD(G1Xze)Ef`AtM6Z>?;p%x^8of(D! z+zU=8C+h%^0a7jw78c<(!LPM9Njr~WN&q{97O??XGGlT;;wqIbg{TIN{dOaTv@oj}GAQfRZp^Zb8ntUbMZH2DGNDK%yUHPWd}>c?oga z0fr9}Bq7|X-3%G&{_#|K|@RA};H$jNx69~}&*b7>ee;kBq zqhKN5P;~~BXLQQLmnLuFvp}LA@amxHA`Q0iA3g-HzBY~qh72@ji9scbVy(d8pG_%mg_P}44NY`vQ2PNO_k-}lXkU>WkI-9M%@%Aau z8cj-0Ryo>(|9eF#JXt@9lm2{Q*o6$W>W5 zJ6DZ5?sZvfnRgj(0tUg$C<5T;Su>Cis7XZB{4E^6%d&Y~)c&K?1YQzTWEUgsO2GC-lZ4 zpmKM5d^|7%VkjM;MqJbt3BChBZ0V!{auD!IU_>Zwd=@*2va}2Ixdl35Qiw=MUiivl zje70;3Yc&-ud$2uW`$we0Ug@8H(+yDq|Ld~?+Jn~DWJ6C+xwdAy7cLWP-LZA?%&r8J?1y-=E~S3sobNTU~ho_+5Q;lrB8!3Uc%?bX!;9avG>{6 z0rLgOL-6TN3%D_C4Op#p0LOv$!HZh-&j29bN1BNR?OlTw082^&^gL|e;q(3_XFzwZ zuU*1&R?XE79sq4A5K;npx9=&uRCpx)}Vr=1u72pyaS=Z!JvaGK&SQi z04~PB-0wF0_(fu$Eqbv4R}TWvIml6vqCjT>w3R`xQ~Jw+@qoAeV3f49v|tLMc}~OP z-an9eGX3ai1iJ^02^F!1%O7`hb-@b|n4KVK9i9y`wof3sn)>_uuUwu~8JEIw0IU5~ zG1%~&rafy;x#R5)UkPM|(9srnc0Xq?vE;xt2X!ffarE@V+#?Wu;O~S48>(RP;ziV??7*eg z1Nd^}VGqEyV80Gf$`Gt`TKtyNUtAp4wWQ1VHz!_%&Rym&yGj3VxgKoD^Am1*KyZW<1^0kp z0XOP!0Lh2Z}(e2wMrGj(#APqgex`jy5 zN!>3$enlNvvICa~2g6!2eaQmt=XXX9v_TMCQgpdX2qJf3nW4>ANa1!&!fT)Ne10878sX3_TQAPsQs>YEg)0vaaTF&z z`w}ck3PqM+Vn!(%ur8pd=OIOcACCFKGZy%lz=d>gWMFYU+%O7|1q+-nz?MYA{2Qmj|jG=mf$v7H?j{ZXc){0%z$7M|TKQsoaM@ufwB&jR-^nWENmA zQ^8(aR{==oqte@8VhFKs<(+}4=^3a)g4|V-G#dDtKwPFu&?t<)P=kPcmyO&i5>E9o zU{eV&=&Z+PWT2&_K*G%r0Nt)Vpo_7Zb`K|r1riFli-Wa?7KJ@u0UDP%{2B;d!yLtb z-=B8rhh0o~jhupFf1|n|;H*wS?}8gV*FHY1S~#g^A(Ma=2G$CfM=h6dD-JSh^en<4 zw+j`A2k6EC2^AfKfatMGAh+FgG~2`1QleXk_9Fvhj>N=Qq_q5&vW|`$FyK%#{e{yI z`~<$7t8sahfZ{J?Ai)<%lu6YCRnX{y3=jsh5!OA>UIsS7i3{zVYF^&o*VhE^LcsU= zCFvSyjDP?mEL4cHyE8B5n$8cW;3~LRDxPWq@)(GtaN-E3s6)_38+L&~Bjjmd{vu}E zn+zEcT);Ym>>etMltmasJRzI#^7I6yMM!=Ij~-uVWd;5=AjO3qcE1H{iT-4Ppk{MH z*ry=KpQ8R}X$egjf_v2wa;2UrUsf1guwSsLGz5h+ev3v7u7qw7ylXO3uldwS4P;vnwd(sBuLYx?E}%<#r+lL;F+nx_a47dbtPTjpAo{6*oY*{s-Ax5+Wo6I?3cIo^uny4VdWZ{P+ygF54RA*f znh66}*U)6mh7vO{->_=O3C{+ZYW>p%@>_iW<={jGAv5U277|=G6#T_VM9u<#MdN@i z6yl|F(El=Yz=-{Sxoofx z!A~tef4KNLYcSyAlX#4VAiK^ZbcCG*iq0<2>>*e|Ic!?oE|mi54X8s-`SQsJ512e_ z5&?O~e2FK%V4nkp(?pcc4Ca7}C49;yiQ^87lEOp~{2>}h>|j;|u3qswWPZs~I; zXmghING6sy9U6}+rE5N_p1{P!oIWal4h~gVm|#@zz46aYGJSXzz!W0s-3&9{AR&zz zkU4p!tQ8AMwr(xWcdXFLW1)^k0{jV2D>=nP7fe%;fJ7k z(KL&2YN!++gPWE>CwGDaoXPu`UqLEXTG|iXI`-{BdQ!bELV@+91%MNHHq&tQg2eKt z_oq->g3;D$0c0?sQN84~0w*t2*=T9cbsJdRucUI%F3-Kr{J^~!)6fAxgA%!g42Y97NcOU|%YF2UA_04uP( z`D87@z&u6RUKjkwfZ7MhBQVN!J6O9HJOJ(_XxV8vcAi2t1QiB2DzKo?t%k(Jp(%qX z8UgG0G!Wx}^i*;Qxa*+g1P2Fja26pqfb$N@H^HVSXbRn&_x7FO0dzFDRN+&#FgGt7 zUR`>S zVTiz9@;;Ev;ox~X`?($(5unT%JOCeU?t1{Q4h|+O{42kc-oI}KaZxSlf4@O~pam>m znqG>}1Rz;T@1Fww=WS9FdiY@0U?{FD18E$Z9TI&_;TOB-#S_a9=#2Iv%R6(~VF zL-j}22->?Elq`3&mqMBRzR;{6cm)ptLpE9<@95eEP(g~`JHUV;vtZ5X`>w#mAX!dk zQf!536u_#an@(vCT!l9ns@*YBR-Z$(5KjQrW0)A2NdNX5G}Pa;H6GYW#I!M9dg{xXeK6V>SZqw zkC4ZbgX6R`&jnV?&w~RH!6w9!2PQYnuUQlbD!7K}y@1^gd(##8D$SP{-Uc~?b|5}( zV{Xo9whSrgqlE_Wbh-u(vX9%Dp-}=V)GZ)5fF|$w^&qpzt}A_dqoty944MCiyZ5Mf z?*73jK==B9T?S$if+;158QvfdLlwbJfj@Iwfj6n~#W%>>O~o$R$iKr#ud0a&3y*+_ z3|Rc31vtf?nu0DTLk17R4L3N!fJ0^Uk1huJ2&@QPP8=l&6`*8 zLv#z__#9Gv!jR#itsUb{;MXV6K5PlE06 zV(wlUd4qhOR>syW3kW6L`K!jK4BlN3)Jahw!VF7~gn$6#3j9FO0_`*4A&T6=(k8=< zu6{hm8^~-LWIYYkbEy9Ev$Gdq?%{Al=YmVl|3GtqojeRaOfr4nDY&8D^ zV3mz^I7DJ2FYD4&B`3o;9QBK&qcsQpi;=2YUK+%9OJ7xz7> zL1h(uEZN`NgUP-Ci(6V!QhqqsVV@fs0(sdA^cbPB#2O4hQGbw*f-<=C>sL@bI7*8} zX2I9-0^wpv$sfwf%F4-M%y7o4JP}6bzJjAzThu;;UA%yN7Aqj~=p+5Mk z>>VCzs;I~!2QPU6dlSPm=K-UIpy7%Bne|+o_#|A4AtOP*wc#V!9QETO1LJX!> zj3R3A!QlGB1l}L8VBTrwuhSgq{ROFE2j8vKY+VS}{r zi~k#12wgd+(HyFOp%is%`Ax&f*bHgviB5u=2-u)`C7wgeY9}X0$--+jx5z&q3XwLN z;!y{KKfiW8Kwa}1*fxOKjj0Seu))IHFNlXe%c8P!lr_4YB{nvC_t~>;2w{!yneRA^ z_=YpiSPP~U9d3fw@60xbl7a%-jQe2aQHC$5Dl3s90=mg^8NyKJnIW3Na-t_Y<=!aOwL zp@sBiaTmHAMZ7O{@85Udp1osXF$d?Hap#YLcbdzvW=ifqR#8y_dGyT?YSlQ`KLre( z$B;wOMse`+{t#(wp`uW%G~_vA(olX_Jpm!rpa-xX@BRUfm^(p@Hwmonf3kbz3su4HFK;}eVGjt4t0-->^C^$G+EdbgV(8mEEVhGwWAvfM!d+C5q>-oRmAroXE z*wN-U#Q87dZxuu!I4TWGAe!!POvD5CcRBEG4!X@vYf1|@OaD&`yyg;@yH8SM@P`aI Qc_R?2ikb>#D9gbA0}6u75dZ)H From f6b95fe685680ff099bc2863d969940794506d7b Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Thu, 6 Feb 2025 15:24:58 +0100 Subject: [PATCH 36/44] md --- test/docs/AD_backend.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/docs/AD_backend.md b/test/docs/AD_backend.md index 439893cc..81eb1db1 100644 --- a/test/docs/AD_backend.md +++ b/test/docs/AD_backend.md @@ -15,7 +15,6 @@ Problem list: ["beam", "double_integrator_mintf", "double_integrator_minenergy", ``` Takeaways: -- `:enzyme` and `:zygote` currently fail (see notes below) - the `:optimized` backend (with forward over reverse mode for Hessian) is much faster than full forward mode, but does not scale greatly. This is likely due to the increasing cost of computing the Hessian sparsity with SparseConnectivityTracer.jl in terms of allocations and time. - manual sparse pattern seems to give better performance for larger problems. See also the comparison with Jump that seems to use a different, less sparse but faster method for the Hessian. The sparsity pattern detection in JuMP relies on the expression tree of the objective and constraints built from its DSL. @@ -94,7 +93,7 @@ ghjvprod backend ADNLPModels.ForwardDiffADGHjvprod: 4.339e-6 seconds. - check the relevance of computing the nnz beforehand and allocate the full index vectors directly instead of using push! - reuse ADNLPModels functions to get block sparsity patterns then rebuild full patterns ? eg for dynamics and path constraints -- try to disable some unused (?) parts such as hprod ? (according to show_time info the impact may be small) +- try to disable some unused (?) parts such as hprod ? (according to show_time info the impact may be small). cf new option `excluded_backend=[:jprod_backend, :jtprod_backend, :hprod_backend, :ghjvprod_backend]` ## Errors for Enzyme: - enzyme gives correct nonzero counts for Jacobian and Hessian, but fails with From 2485299cf5e65187355ffbd4328c5b65eb374074 Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Thu, 6 Feb 2025 19:33:57 +0100 Subject: [PATCH 37/44] jump --- test/ab_jump.jl | 1 + test/docs/jump_ctdirect.md | 65 ++++++++++++++++++++------------------ test/jump_comparison.jl | 19 ++++++----- 3 files changed, 47 insertions(+), 38 deletions(-) diff --git a/test/ab_jump.jl b/test/ab_jump.jl index ebec29ea..421aa1d4 100644 --- a/test/ab_jump.jl +++ b/test/ab_jump.jl @@ -27,6 +27,7 @@ function algal_bacterial_jump(;grid_size=1000, disc_method=:trapeze, print_level set_optimizer_attribute(sys, "max_iter", 1500) set_optimizer_attribute(sys, "mu_strategy", "adaptive") set_optimizer_attribute(sys, "sb", "yes") + set_optimizer_attribute(sys, "print_user_options", "yes") # Discretization parameters N = grid_size diff --git a/test/docs/jump_ctdirect.md b/test/docs/jump_ctdirect.md index 742f462a..75d6ac11 100644 --- a/test/docs/jump_ctdirect.md +++ b/test/docs/jump_ctdirect.md @@ -1,7 +1,7 @@ # Jump / CTDirect comparison - algal bacterial problem -Note that the problem is redefined for each method: jump, ctdirect and ctdirect new model. -Also, the Gauss Legendre 2 implementations for Jump and CTDirect here use a piecewise constant control. +Note that the problem is redefined for each method, jump and ctdirect. +Also, the Gauss Legendre 2 implementations use a piecewise constant control. ## Takeaways - CTDirect still allocates at least x10 more memory, worsening for higher problem sizes @@ -15,32 +15,37 @@ Maybe a less sparse but faster and less memory intensive method is used ? ## Todo - check on ipopt last iteration that tol is also 1e-8 for Jump - test CTDirect with manual sparsity patterns -- find more details on the Hessian in Jump - investigate how jump finds a cleaner solution for trapeze discretization (print settings ?) -- can we have linear memory wrt steps for Jump / Trapeze ? ## Results: Jump vs CTDirect See `test/jump_comparison.jl` -Ipopt details: `Ipopt version 3.14.17, running with linear solver MUMPS 5.7.3` +Ipopt details: `This is Ipopt version 3.14.14, running with linear solver MUMPS 5.6.2.` Settings: tol=1e-8, mu_strategy=adaptive +``` +Jump trapeze 1000: 15.889 s (7920529 allocations: 351.87 MiB) +Jump trapeze 2000: 59.236 s (23055275 allocations: 891.64 MiB) +Jump trapeze 5000: 128.857 s (55226845 allocations: 2.10 GiB) +Jump gauss_legendre_2 1000: 15.583 s (10998729 allocations: 726.48 MiB) +Jump gauss_legendre_2 2000: 26.836 s (21371405 allocations: 1.40 GiB) +Jump gauss_legendre_2 5000: 74.715 s (56343588 allocations: 3.58 GiB) +``` -+++redo ``` -Jump trapeze 1000: 17.029 s (7920527 allocations: 351.87 MiB) -Jump trapeze 2000: 56.928 s (23055273 allocations: 891.64 MiB) -Jump trapeze 5000: 125.798 s (55226843 allocations: 2.10 GiB) -Jump gauss_legendre_2 1000: 15.398 s (10988856 allocations: 726.32 MiB) -Jump gauss_legendre_2 2000: 27.401 s (21345532 allocations: 1.40 GiB) -Jump gauss_legendre_2 5000: 76.593 s (56269715 allocations: 3.57 GiB) +CTDirect (optimized) trapeze 1000: 19.976 s (46501061 allocations: 4.54 GiB) +CTDirect (optimized) trapeze 2000: 39.350 s (89302127 allocations: 12.26 GiB) +CTDirect (optimized) trapeze 5000: 127.653 s (267989402 allocations: 49.33 GiB) +CTDirect (optimized) gauss_legendre_2 1000: 30.309 s (36508333 allocations: 14.56 GiB) +CTDirect (optimized) gauss_legendre_2 2000: 90.069 s (85715676 allocations: 42.83 GiB) +CTDirect (optimized) gauss_legendre_2 5000: 293.751 s (159734254 allocations: 304.56 GiB) ``` ``` -CTDirect trapeze 1000: 20.110 s (46501059 allocations: 4.54 GiB) -CTDirect trapeze 2000: 41.097 s (89302125 allocations: 12.26 GiB) -CTDirect trapeze 5000: 133.268 s (267989400 allocations: 49.33 GiB) -CTDirect gauss_legendre_2 1000: 33.181 s (37843213 allocations: 14.79 GiB) -CTDirect gauss_legendre_2 2000: 82.605 s (82766476 allocations: 43.19 GiB) -CTDirect gauss_legendre_2 5000: 356.338 s (221161426 allocations: 312.28 GiB) +CTDirect (manual) trapeze 1000: 49.673 s (66608733 allocations: 5.40 GiB) +CTDirect (manual) trapeze 2000: 112.605 s (140277493 allocations: 11.40 GiB) +CTDirect (manual) trapeze 5000: 336.893 s (418023859 allocations: 34.07 GiB) +CTDirect (manual) gauss_legendre_2 1000: 40.780 s (56179398 allocations: 4.20 GiB) +CTDirect (manual) gauss_legendre_2 2000: 94.828 s (127623066 allocations: 9.56 GiB) +CTDirect (manual) gauss_legendre_2 5000: 197.400 s (263619301 allocations: 19.84 GiB) ``` @@ -62,20 +67,20 @@ CTDirect gauss_legendre_2 5000: 356.338 s (221161426 allocations: 312.28 GiB) ## Details: Gauss Legendre 2 (1000 and 5000 steps) -redo all 4 ct tests ++++redo ct | | Jump | CT | Manual | Jump | CT | Manual | |-----------------|--------|--------|--------|----------|----------|----------| -|nnz jacobian | 118006 | 118006 | 384072 | 590006 | 590006 | | -|nnz hessian | 322000 | 63000 | 319036 | 1610000 | 315000 | | -|variables | 20006 | 20008 | 20008 | 100006 | 100008 | | -|lowerbound | 6006 | 6006 | 6006 | 3006 | 30006 | | -|lower/upper | 2000 | 2002 | 2002 | 10000 | 10002 | | -|equality | 18006 | 18006 | 18006 | 90006 | 90006 | | -|iterations | 117 | 95 | 91 | 146 | 78 | | -|objective | 5.4522 | 5.4522 | 5.4522 | 5.4522 | 5.4522 | | -|structure | clean | clean | clean | clean | clean | | -|allocations | 726MB | 14.6GB | 5.0GB | 3.6GB | 305GB | | -|time | 15 | 28 | 45 | 77 | 291* | | +|nnz jacobian | 118006 | 118006 | 384072 | 590006 | 590006 | 1920072 | +|nnz hessian | 322000 | 63000 | 210072 | 1610000 | 315000 | 1050072 | +|variables | 20006 | 20008 | 20008 | 100006 | 100008 | 100008 | +|lowerbound | 6006 | 6006 | 6006 | 3006 | 30006 | 30006 | +|lower/upper | 2000 | 2002 | 2002 | 10000 | 10002 | 10002 | +|equality | 18006 | 18006 | 18006 | 90006 | 90006 | 90006 | +|iterations | 117 | 95 | 93 | 146 | 78 | 86 | +|objective | 5.4522 | 5.4522 | 5.4522 | 5.4522 | 5.4522 | 5.4522 | +|structure | clean | clean | clean | clean | clean | clean | +|allocations | 726MB | 14.6GB | 4.2GB | 3.6GB | 305GB | 19.8GB | +|time | 15 | 28 | 40 | 77 | 291* | 188 | * half the time is before optimization, swap effect due to huge allocations ? diff --git a/test/jump_comparison.jl b/test/jump_comparison.jl index fa1c792f..a587bea4 100644 --- a/test/jump_comparison.jl +++ b/test/jump_comparison.jl @@ -8,10 +8,11 @@ using Printf jump = true -ctdirect = false +ctdirect = true +adnlp_backend_list = [:manual, :optimized] #disc_method_list = [:gauss_legendre_2] -disc_method_list = [:trapeze] -#disc_method_list = [:trapeze, :gauss_legendre_2] +#disc_method_list = [:trapeze] +disc_method_list = [:trapeze, :gauss_legendre_2] grid_size_list = [1000, 2000, 5000] # Jump @@ -28,10 +29,12 @@ end # CTDirect include("problems/algal_bacterial.jl") if ctdirect -for disc_method in disc_method_list - for grid_size in grid_size_list - @printf("CTDirect %s %d:", disc_method, grid_size) - @btime direct_solve(algal_bacterial().ocp, grid_size=$grid_size, disc_method=$disc_method, print_level=0, constant_control=true) + for backend in adnlp_backend_list + for disc_method in disc_method_list + for grid_size in grid_size_list + @printf("CTDirect (%s) %s %d:", backend, disc_method, grid_size) + @btime direct_solve(algal_bacterial().ocp, grid_size=$grid_size, disc_method=$disc_method, print_level=0, adnlp_backend=$backend) + end + end end -end end \ No newline at end of file From 6a2b07ead3da8eb486d3312c86df33016f4666a8 Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Fri, 7 Feb 2025 17:26:50 +0100 Subject: [PATCH 38/44] disable unused AD backends --- ext/CTSolveExtIpopt.jl | 1 + src/solve.jl | 18 +++++++++---- test/docs/AD_backend.md | 57 ++++++++++++++++++++--------------------- 3 files changed, 42 insertions(+), 34 deletions(-) diff --git a/ext/CTSolveExtIpopt.jl b/ext/CTSolveExtIpopt.jl index 5e61d4a6..e13aad6a 100644 --- a/ext/CTSolveExtIpopt.jl +++ b/ext/CTSolveExtIpopt.jl @@ -59,6 +59,7 @@ function CTDirect.solve_docp( tol = tol, max_iter = max_iter, sb = "yes", + #check_derivatives_for_naninf = "yes", linear_solver = linear_solver; kwargs..., ) diff --git a/src/solve.jl b/src/solve.jl index 6722fa83..736ed726 100644 --- a/src/solve.jl +++ b/src/solve.jl @@ -56,17 +56,25 @@ function direct_transcription( nlp = ADNLPModel!( f, x0, docp.var_l, docp.var_u, c!, docp.con_l, docp.con_u, gradient_backend = ADNLPModels.ReverseDiffADGradient, - hprod_backend = ADNLPModels.ReverseDiffADHvprod, - jtprod_backend = ADNLPModels.ReverseDiffADJtprod, jacobian_backend = J_backend, hessian_backend = H_backend, - show_time = show_time + hprod_backend = ADNLPModels.EmptyADbackend, + jtprod_backend = ADNLPModels.EmptyADbackend, + jprod_backend = ADNLPModels.EmptyADbackend, + ghjvprod_backend = ADNLPModels.EmptyADbackend, + show_time = show_time, + #excluded_backend = [:jprod_backend, :jtprod_backend, :hprod_backend, :ghjvprod_backend] ) else # build NLP nlp = ADNLPModel!( f, x0, docp.var_l, docp.var_u, c!, docp.con_l, docp.con_u, - backend = adnlp_backend, show_time = show_time + backend = adnlp_backend, + hprod_backend = ADNLPModels.EmptyADbackend, + jtprod_backend = ADNLPModels.EmptyADbackend, + jprod_backend = ADNLPModels.EmptyADbackend, + ghjvprod_backend = ADNLPModels.EmptyADbackend, + show_time = show_time, ) end @@ -118,7 +126,7 @@ function direct_solve( time_grid = time_grid, disc_method = disc_method, control_type = control_type, - adnlp_backend = adnlp_backend + adnlp_backend = adnlp_backend, ) # solve DOCP diff --git a/test/docs/AD_backend.md b/test/docs/AD_backend.md index 81eb1db1..816959ce 100644 --- a/test/docs/AD_backend.md +++ b/test/docs/AD_backend.md @@ -21,15 +21,15 @@ Takeaways: ![benchmark](AD_backend.png) Standard benchmark for Trapeze: -| Trapeze | default | optimized | manual* | manual** | -|---------|---------|-----------|---------|----------| -| 250 | 49.7 | 0.9 | 1.5 | 1.4 | -| 500 | | 2.4 | 3.5 | 3.3 | -| 1000 | | 5.6 | 6.4 | 5.9 | -| 2500 | | 23.9 | 23.9 | 18.7 | -| 5000 | | 89.6 | 56.3 | 41.5 | -| 7500 | | 225.4 | 85.9 | 66.3 | -| 10000 | | 526.3 | 102.4 | 90.4 | +| Trapeze | default | optimized | manual | +|---------|---------|-----------|----------| +| 250 | 28.4 | 0.9 | 1.4 | +| 500 | 155.7 | 2.4 | 3.3 | +| 1000 | 978.9 | 5.6 | 5.9 | +| 2500 | | 23.9 | 18.7 | +| 5000 | | 89.6 | 41.5 | +| 7500 | | 225.4 | 66.3 | +| 10000 | | 526.3 | 90.4 | * (older version) build sparse matrices from dense boolean matrices ** build sparse matrices from (i,j,v) vectors @@ -54,16 +54,16 @@ Standard benchmark for Gauss Legendre 2: | 5000 | 551.9 | 172.2 | Sparsity details: goddard_all Trapeze (1000 and 10000 steps) -| transcription | optimized | manual*/** | optimized | manual*/** | -|---------------|-----------|------------|-----------|--------| -| NLP vars | 4005 | 4005 | 40005 | 40005 | -| NLP cons | 6007 | 6007 | 60007 | 60007 | -| Hess nnz | 11011 | 30024 | 110011 | 300024 | -| H sparsity | 99.86% | 99.63% | 99.99% | 99.96% | -| Jac nnz | 28011 | 42043 | 280011 | 420043 | -| J sparsity | 99.88% | 99.83% | 99.99% | 99.98% | -| allocs | 1.2GB | 106 / 92MB | 71.6GB | 4.55 / 0.88 GB | -| time | 750ms | 85 / 95ms | 64.7s*** | 3.8 / 2.5s | +| transcription | optimized | manual | optimized | manual | +|---------------|-----------|------------|-----------|---------| +| NLP vars | 4005 | 4005 | 40005 | 40005 | +| NLP cons | 6007 | 6007 | 60007 | 60007 | +| Hess nnz | 11011 | 30024 | 110011 | 300024 | +| H sparsity | 99.86% | 99.63% | 99.99% | 99.96% | +| Jac nnz | 28011 | 42043 | 280011 | 420043 | +| J sparsity | 99.88% | 99.83% | 99.99% | 99.98% | +| allocs | 1.2GB | 92MB | 71.6GB | 0.88 GB | +| time | 750ms | 95ms | 64.7s*** | 2.5s | *** hessian accounts for 59 out of total 65s ``` @@ -77,23 +77,22 @@ hessian backend ADNLPModels.SparseReverseADHessian: 58.450146911 seconds; ghjvprod backend ADNLPModels.ForwardDiffADGHjvprod: 4.339e-6 seconds. ``` -| solve | optimized | manual*/** | optimized | manual*/** | -|---------------|-----------|-------------|-----------|-------------| -| iterations | 42 | 28 | 51 | 29 | -| allocs | 2.0GB | 1.2/1.2GB | 87.5GB | 16.9/13.2GB | -| time | 2.5s | 2.5/2.6s | 151.0s*** | 42.4/31.6s | +| solve | optimized | manual | optimized | manual | +|---------------|-----------|---------|-----------|---------| +| iterations | 42 | 28 | 51 | 29 | +| allocs | 2.0GB | 1.2GB | 87.5GB | 13.2GB | +| time | 2.5s | 2.6s | 151.0s*** | 31.6s | *** building the hessian is one third of the total solve time... ## Remarks: -- it is better to build the sparse matrices from the index vectors format rather than a dense boolean matrix. For larger problems it may not be possible to even allocate the boolean matrix (eg. algal bacterial with GL2 at 5000 steps). +- it is better to build the sparse matrices from the index vectors format rather than a dense boolean matrix (10-20% faster). For larger problems it may not be possible to even allocate the boolean matrix (eg. algal bacterial with GL2 at 5000 steps). +- disabling the unused backends for the various matrix products (jprod_backend, jtprod_backend, hprod_backend, ghjvprod_backend) gives a slight increase in performance. ## Todo: -- check the relevance of computing the nnz beforehand and allocate the full index vectors directly instead of using push! -- reuse ADNLPModels functions to get block sparsity patterns then rebuild full patterns ? -eg for dynamics and path constraints -- try to disable some unused (?) parts such as hprod ? (according to show_time info the impact may be small). cf new option `excluded_backend=[:jprod_backend, :jtprod_backend, :hprod_backend, :ghjvprod_backend]` +- use automatic differentiation to get the sparsity patterns for first / second derivatives of the OCP functions, and build the Jacobian / Hessian patterns from these instead of assuming full nonzero blocks. +- some gain may be achieved by preallocating the index vectors ## Errors for Enzyme: - enzyme gives correct nonzero counts for Jacobian and Hessian, but fails with From ae6358067a15a641a093fd069134f23f0228e88a Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Sat, 8 Feb 2025 11:39:28 +0100 Subject: [PATCH 39/44] update tests --- test/docs/AD_backend.md | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/test/docs/AD_backend.md b/test/docs/AD_backend.md index 816959ce..93f50505 100644 --- a/test/docs/AD_backend.md +++ b/test/docs/AD_backend.md @@ -23,13 +23,13 @@ Takeaways: Standard benchmark for Trapeze: | Trapeze | default | optimized | manual | |---------|---------|-----------|----------| -| 250 | 28.4 | 0.9 | 1.4 | -| 500 | 155.7 | 2.4 | 3.3 | -| 1000 | 978.9 | 5.6 | 5.9 | -| 2500 | | 23.9 | 18.7 | -| 5000 | | 89.6 | 41.5 | -| 7500 | | 225.4 | 66.3 | -| 10000 | | 526.3 | 90.4 | +| 250 | 51.9 | 0.8 | 1.4 | +| 500 | 219.0 | 2.0 | 3.1 | +| 1000 | 858.8 | 5.2 | 5.4 | +| 2500 | 6932.1 | 20.9 | 17.1 | +| 5000 | | 73.0 | 34.1 | +| 7500 | | 200.6 | 53.1 | +| 10000 | | 415.7 | 70.4 | * (older version) build sparse matrices from dense boolean matrices ** build sparse matrices from (i,j,v) vectors @@ -37,21 +37,21 @@ Standard benchmark for Trapeze: Standard benchmark for Midpoint: | Midpoint| optimized | manual | |---------|-----------|--------| -| 250 | 1.5 | 2.2 | -| 500 | 3.9 | 4.7 | -| 1000 | 11.1 | 11.2 | -| 2500 | 50.5 | 32.7 | -| 5000 | 160.3 | 87.0 | -| 7500 | 333.2 | 140.9 | +| 250 | 1.4 | 2.1 | +| 500 | 3.4 | 4.2 | +| 1000 | 10.0 | 10.2 | +| 2500 | 45.0 | 30.0 | +| 5000 | 150.1 | 79.0 | +| 7500 | 322.6 | 130.7 | Standard benchmark for Gauss Legendre 2: -| GL2 | optimized | manual | +| GL2 | +optimized | manual | |---------|-----------|--------| -| 250 | 3.9 | 5.0 | -| 500 | 10.5 | 12.9 | -| 1000 | 121.2 | 26.1 | -| 2500 | 136.6 | 77.2 | -| 5000 | 551.9 | 172.2 | +| 250 | 3.1 | 4.3 | +| 500 | 8.8 | 11.5 | +| 1000 | 113.2 | 22.3 | +| 2500 | 119.5 | 68.3 | +| 5000 | 544.2 | 156.8 | Sparsity details: goddard_all Trapeze (1000 and 10000 steps) | transcription | optimized | manual | optimized | manual | From e4a5297c08b1b8828b33c93ecdb96ce2c16fe230 Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Tue, 11 Feb 2025 16:55:26 +0100 Subject: [PATCH 40/44] tests update --- test/docs/jump_ctdirect.md | 58 ++++++++++++++------------------------ test/jump_comparison.jl | 1 - 2 files changed, 21 insertions(+), 38 deletions(-) diff --git a/test/docs/jump_ctdirect.md b/test/docs/jump_ctdirect.md index 75d6ac11..e59058bb 100644 --- a/test/docs/jump_ctdirect.md +++ b/test/docs/jump_ctdirect.md @@ -4,17 +4,12 @@ Note that the problem is redefined for each method, jump and ctdirect. Also, the Gauss Legendre 2 implementations use a piecewise constant control. ## Takeaways -- CTDirect still allocates at least x10 more memory, worsening for higher problem sizes -For the biggest allocations, a significant time is passed during the AD phase, before Ipopt. The runs marked with * spend half the time before optimization, likely some swap issue. -We note that Jump memory appears linear wrt steps for GL2, but a bit superlinear for Trapeze. CTDirect memory always increases superlinearly wrt steps. -- Hessian seems to be handled differently by Jump, see the higher nonzero values. -Maybe a less sparse but faster and less memory intensive method is used ? -- convergence: objective and trajectory are similar, iterations differ, maybe due to the different hessian handling. Total computation times are similar for Trapeze and x2 to x5 slower for CTDirect for GL2, probably due to the memory effect. +- Hessian is handled differently by Jump, with a less sparse but faster method. +- CTDirect with manual sparsity patterns still allocates 5 to 10 times more memory than Jump, but scales better than the automatic sparsity detection. Manual mode becomes faster than automatic mode for GL2 above 2000 steps, while being slower for Trapeze even at 5000 steps. +- convergence: objective and trajectory are similar, iterations differ, maybe due to the different hessian handling. Total computation times for Trapeze are similar for Jump and CTDirect (auto). For GL2, Jump is faster than both CTDirect versions. - in terms of control structures, GL2 solutions are clean, Jump Trapeze solutions shows a bit of noise, while CTDirect Trapeze solutions are very noisy. How Jump manages to find a cleaner solution with Trapeze is unclear. ## Todo -- check on ipopt last iteration that tol is also 1e-8 for Jump -- test CTDirect with manual sparsity patterns - investigate how jump finds a cleaner solution for trapeze discretization (print settings ?) ## Results: Jump vs CTDirect @@ -22,32 +17,27 @@ See `test/jump_comparison.jl` Ipopt details: `This is Ipopt version 3.14.14, running with linear solver MUMPS 5.6.2.` Settings: tol=1e-8, mu_strategy=adaptive ``` -Jump trapeze 1000: 15.889 s (7920529 allocations: 351.87 MiB) -Jump trapeze 2000: 59.236 s (23055275 allocations: 891.64 MiB) -Jump trapeze 5000: 128.857 s (55226845 allocations: 2.10 GiB) -Jump gauss_legendre_2 1000: 15.583 s (10998729 allocations: 726.48 MiB) -Jump gauss_legendre_2 2000: 26.836 s (21371405 allocations: 1.40 GiB) -Jump gauss_legendre_2 5000: 74.715 s (56343588 allocations: 3.58 GiB) -``` +Jump trapeze 1000: 15.633 s (7920529 allocations: 351.87 MiB) +Jump trapeze 2000: 57.472 s (23055275 allocations: 891.64 MiB) +Jump trapeze 5000: 124.108 s (55226845 allocations: 2.10 GiB) +Jump gauss_legendre_2 1000: 15.250 s (10998729 allocations: 726.48 MiB) +Jump gauss_legendre_2 2000: 26.686 s (21371405 allocations: 1.40 GiB) +Jump gauss_legendre_2 5000: 76.455 s (56343588 allocations: 3.58 GiB) -``` -CTDirect (optimized) trapeze 1000: 19.976 s (46501061 allocations: 4.54 GiB) -CTDirect (optimized) trapeze 2000: 39.350 s (89302127 allocations: 12.26 GiB) -CTDirect (optimized) trapeze 5000: 127.653 s (267989402 allocations: 49.33 GiB) -CTDirect (optimized) gauss_legendre_2 1000: 30.309 s (36508333 allocations: 14.56 GiB) -CTDirect (optimized) gauss_legendre_2 2000: 90.069 s (85715676 allocations: 42.83 GiB) -CTDirect (optimized) gauss_legendre_2 5000: 293.751 s (159734254 allocations: 304.56 GiB) -``` +CTDirect (optimized) trapeze 1000: 17.941 s (45041839 allocations: 4.47 GiB) +CTDirect (optimized) trapeze 2000: 35.811 s (86384903 allocations: 12.12 GiB) +CTDirect (optimized) trapeze 5000: 124.451 s (260698176 allocations: 48.99 GiB) +CTDirect (optimized) gauss_legendre_2 1000: 25.087 s (32172053 allocations: 14.36 GiB) +CTDirect (optimized) gauss_legendre_2 2000: 76.272 s (77043394 allocations: 42.43 GiB) +CTDirect (optimized) gauss_legendre_2 5000: 281.000 s (138053972 allocations: 303.56 GiB) +CTDirect (manual) trapeze 1000: 47.442 s (65149511 allocations: 5.33 GiB) +CTDirect (manual) trapeze 2000: 105.765 s (137360269 allocations: 11.26 GiB) +CTDirect (manual) trapeze 5000: 324.819 s (410732633 allocations: 33.73 GiB) +CTDirect (manual) gauss_legendre_2 1000: 38.147 s (51843118 allocations: 4.00 GiB) +CTDirect (manual) gauss_legendre_2 2000: 87.863 s (118950784 allocations: 9.16 GiB) +CTDirect (manual) gauss_legendre_2 5000: 187.292 s (241939016 allocations: 18.85 GiB) ``` -CTDirect (manual) trapeze 1000: 49.673 s (66608733 allocations: 5.40 GiB) -CTDirect (manual) trapeze 2000: 112.605 s (140277493 allocations: 11.40 GiB) -CTDirect (manual) trapeze 5000: 336.893 s (418023859 allocations: 34.07 GiB) -CTDirect (manual) gauss_legendre_2 1000: 40.780 s (56179398 allocations: 4.20 GiB) -CTDirect (manual) gauss_legendre_2 2000: 94.828 s (127623066 allocations: 9.56 GiB) -CTDirect (manual) gauss_legendre_2 5000: 197.400 s (263619301 allocations: 19.84 GiB) -``` - ## Details: Trapeze (1000 and 5000 steps) @@ -62,12 +52,8 @@ CTDirect (manual) gauss_legendre_2 5000: 197.400 s (263619301 allocations: 19.8 |iterations | 334 | 365 | 333 | 517 | 420 | 419 | |objective | 5.4522 | 5.4522 | 5.4522 | 5.4522 | 5.4522 | 5.4522 | |structure | ok | noisy | noisy | ok | noisy | noisy | -|allocations | 352MB | 4.5GB | 5.50GB | 2.1GB | 49GB | 37GB | -|time | 17 | 20 | 50 | 126 | 136 | 354 | - ## Details: Gauss Legendre 2 (1000 and 5000 steps) -+++redo ct | | Jump | CT | Manual | Jump | CT | Manual | |-----------------|--------|--------|--------|----------|----------|----------| @@ -80,7 +66,5 @@ CTDirect (manual) gauss_legendre_2 5000: 197.400 s (263619301 allocations: 19.8 |iterations | 117 | 95 | 93 | 146 | 78 | 86 | |objective | 5.4522 | 5.4522 | 5.4522 | 5.4522 | 5.4522 | 5.4522 | |structure | clean | clean | clean | clean | clean | clean | -|allocations | 726MB | 14.6GB | 4.2GB | 3.6GB | 305GB | 19.8GB | -|time | 15 | 28 | 40 | 77 | 291* | 188 | * half the time is before optimization, swap effect due to huge allocations ? diff --git a/test/jump_comparison.jl b/test/jump_comparison.jl index a587bea4..8d9d4d8f 100644 --- a/test/jump_comparison.jl +++ b/test/jump_comparison.jl @@ -6,7 +6,6 @@ using MKL using BenchmarkTools using Printf - jump = true ctdirect = true adnlp_backend_list = [:manual, :optimized] From a67c5e728ea5895101267bd9975e34e058dec99d Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Wed, 12 Feb 2025 17:48:46 +0100 Subject: [PATCH 41/44] ready for merge --- test/docs/AD_backend.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/test/docs/AD_backend.md b/test/docs/AD_backend.md index 93f50505..78c1f743 100644 --- a/test/docs/AD_backend.md +++ b/test/docs/AD_backend.md @@ -1,7 +1,7 @@ # Benchmark for different AD backends The backend for ADNLPModels can be set in transcription / solve calls with the option `adnlp_backend=`. Possible values include the predefined(*) backends for ADNLPModels: - `:optimized`* Default for CTDirect. Forward mode for Jacobian, reverse for Gradient and forward over reverse for Hessian. -- `:default`* Forward mode for everything. Significantly slower. +- `:default`* Forward mode only. Significantly slower, but more rugged. - `:manual` Explicitely give to ADNLPModels the sparse pattern for Jacobian and Hessian. Uses the same forward / reverse settings as the `:optimized` predefined backend. - `:enzyme`* Enzyme (currently not working). @@ -15,7 +15,7 @@ Problem list: ["beam", "double_integrator_mintf", "double_integrator_minenergy", ``` Takeaways: -- the `:optimized` backend (with forward over reverse mode for Hessian) is much faster than full forward mode, but does not scale greatly. This is likely due to the increasing cost of computing the Hessian sparsity with SparseConnectivityTracer.jl in terms of allocations and time. +- the `:optimized` backend (with forward over reverse mode for Hessian) is much faster than full forward mode, but does not scale greatly. This is likely due to the increasing cost of computing the Hessian sparsity with SparseConnectivityTracer.jl in terms of allocations and time. Note that the `:default` backend that uses forward mode only may still be useful when having AD errors. - manual sparse pattern seems to give better performance for larger problems. See also the comparison with Jump that seems to use a different, less sparse but faster method for the Hessian. The sparsity pattern detection in JuMP relies on the expression tree of the objective and constraints built from its DSL. ![benchmark](AD_backend.png) @@ -45,13 +45,13 @@ Standard benchmark for Midpoint: | 7500 | 322.6 | 130.7 | Standard benchmark for Gauss Legendre 2: -| GL2 | +optimized | manual | +| GL2 | optimized | manual | |---------|-----------|--------| -| 250 | 3.1 | 4.3 | -| 500 | 8.8 | 11.5 | -| 1000 | 113.2 | 22.3 | -| 2500 | 119.5 | 68.3 | -| 5000 | 544.2 | 156.8 | +| 250 | 3.5 | 4.3 | +| 500 | 9.6 | 11.5 | +| 1000 | 125.5 | 22.3 | +| 2500 | 135.0 | 68.3 | +| 5000 | 527.7 | 156.8 | Sparsity details: goddard_all Trapeze (1000 and 10000 steps) | transcription | optimized | manual | optimized | manual | From 01a92e1a4cd5d9dd9afed2f1d076e70f7d1b7560 Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Wed, 12 Feb 2025 18:17:06 +0100 Subject: [PATCH 42/44] index --- docs/src/index.md | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/docs/src/index.md b/docs/src/index.md index 81ad3d61..a53ef00b 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -82,15 +82,11 @@ LB \le C(X) \le UB \right. ``` -We use packages from [JuliaSmoothOptimizers](https://github.com/JuliaSmoothOptimizers) to solve the (NLP) problem. +Solving the (NLP) problem is done using packages from [JuliaSmoothOptimizers](https://github.com/JuliaSmoothOptimizers), with Ipopt as the default solver. -As input of this package we use an [`OptimalControlModel`](@ref) structure from CTBase. +On the input side of this package, we use an [`OptimalControlModel`](@ref) structure from CTBase to define the (OCP). -!!! note "Current limitations" - - The current implemented is limited to - - trapezoidal rule for the ODE discretization - - `Ipopt` for the optimization software +The direct transcription to build the (NLP) can use discretization schemes such as trapeze (default), midpoint, or Gauss-Legendre collocations. !!! note "Related packages" From d97f1e3a4108c7496179b889cf5111f1fcb97f8d Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Thu, 13 Feb 2025 11:40:49 +0100 Subject: [PATCH 43/44] api update --- src/solve.jl | 127 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 80 insertions(+), 47 deletions(-) diff --git a/src/solve.jl b/src/solve.jl index 736ed726..a9d34f35 100644 --- a/src/solve.jl +++ b/src/solve.jl @@ -13,20 +13,95 @@ function available_methods() return algorithms end + +""" +$(TYPEDSIGNATURES) + +Solve an OCP with a direct method + +# Arguments +* ocp: optimal control problem as defined in `CTBase` +* [description]: can specifiy for instance the NLP model and / or solver + +# Keyword arguments (optional) +* `grid_size`: number of time steps for the discretized problem ([250]) +* `disc_method`: discretization method ([`:trapeze`], `:midpoint`, `gauss_legendre_2`) +* `time_grid`: explicit time grid (can be non uniform) +* `init`: info for the starting guess (values or existing solution) +* `adnlp_backend`: backend for automatic differentiation in ADNLPModels ([`:optimized`], `:manual`, `:default`) +* `control_type`: ([`:constant`], `:linear`) control piecewise parametrization for IRK methods + +All further keywords are passed to the inner call of `solve_docp` +""" +function direct_solve( + ocp::OptimalControlModel, + description::Symbol...; + grid_size::Int = CTDirect.__grid_size(), + disc_method = __disc_method(), + time_grid = CTDirect.__time_grid(), + init = CTBase.__ocp_init(), + adnlp_backend = __adnlp_backend(), + control_type = __control_type(), + kwargs..., +) + method = getFullDescription(description, available_methods()) + + # build discretized OCP, including initial guess + docp, nlp = direct_transcription( + ocp, + description; + init = init, + grid_size = grid_size, + time_grid = time_grid, + disc_method = disc_method, + control_type = control_type, + adnlp_backend = adnlp_backend, + ) + + # solve DOCP + if :ipopt ∈ method + solver_backend = CTDirect.IpoptBackend() + elseif :madnlp ∈ method + solver_backend = CTDirect.MadNLPBackend() + else + error("no known solver in method", method) + end + docp_solution = CTDirect.solve_docp(solver_backend, docp, nlp; kwargs...) + + # build and return OCP solution + return OptimalControlSolution(docp, docp_solution) +end + + """ $(TYPEDSIGNATURES) Discretize an optimal control problem into a nonlinear optimization problem (ie direct transcription) + + +# Arguments +* ocp: optimal control problem as defined in `CTBase` +* [description]: can specifiy for instance the NLP model and / or solver + +# Keyword arguments (optional) +* `grid_size`: number of time steps for the discretized problem ([250]) +* `disc_method`: discretization method ([`:trapeze`], `:midpoint`, `gauss_legendre_2`) +* `time_grid`: explicit time grid (can be non uniform) +* `init`: info for the starting guess (values or existing solution) +* `adnlp_backend`: backend for automatic differentiation in ADNLPModels ([`:optimized`], `:manual`, `:default`) +* `control_type`: ([`:constant`], `:linear`) control piecewise parametrization for IRK methods +* show_time: (:true, [:false]) show timing details from ADNLPModels + """ function direct_transcription( ocp::OptimalControlModel, - description...; - init = CTBase.__ocp_init(), + description::Symbol...; grid_size = __grid_size(), - time_grid = __time_grid(), disc_method = __disc_method(), - control_type = __control_type(), + time_grid = __time_grid(), + init = CTBase.__ocp_init(), adnlp_backend = __adnlp_backend(), + control_type = __control_type(), show_time = false ) @@ -81,6 +156,7 @@ function direct_transcription( return docp, nlp end + """ $(TYPEDSIGNATURES) @@ -99,49 +175,6 @@ function set_initial_guess(docp::DOCP, nlp, init) ) end -""" -$(TYPEDSIGNATURES) - -Solve an OCP with a direct method -""" -function direct_solve( - ocp::OptimalControlModel, - description::Symbol...; - init = CTBase.__ocp_init(), - grid_size::Int = CTDirect.__grid_size(), - time_grid = CTDirect.__time_grid(), - disc_method = __disc_method(), - control_type = __control_type(), - adnlp_backend = __adnlp_backend(), - kwargs..., -) - method = getFullDescription(description, available_methods()) - - # build discretized OCP, including initial guess - docp, nlp = direct_transcription( - ocp, - description; - init = init, - grid_size = grid_size, - time_grid = time_grid, - disc_method = disc_method, - control_type = control_type, - adnlp_backend = adnlp_backend, - ) - - # solve DOCP - if :ipopt ∈ method - solver_backend = CTDirect.IpoptBackend() - elseif :madnlp ∈ method - solver_backend = CTDirect.MadNLPBackend() - else - error("no known solver in method", method) - end - docp_solution = CTDirect.solve_docp(solver_backend, docp, nlp; kwargs...) - - # build and return OCP solution - return OptimalControlSolution(docp, docp_solution) -end # placeholders (see CTSolveExt*** extensions) abstract type AbstractSolverBackend end From afb212c27f4df18b633ff089df2fcbb0797a3cbc Mon Sep 17 00:00:00 2001 From: Pierre Martinon Date: Thu, 13 Feb 2025 12:06:03 +0100 Subject: [PATCH 44/44] removed type qualif for OCP description in direct_transcription --- src/solve.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/solve.jl b/src/solve.jl index a9d34f35..bc022d9c 100644 --- a/src/solve.jl +++ b/src/solve.jl @@ -78,7 +78,6 @@ $(TYPEDSIGNATURES) Discretize an optimal control problem into a nonlinear optimization problem (ie direct transcription) - # Arguments * ocp: optimal control problem as defined in `CTBase` * [description]: can specifiy for instance the NLP model and / or solver @@ -95,7 +94,7 @@ Discretize an optimal control problem into a nonlinear optimization problem (ie """ function direct_transcription( ocp::OptimalControlModel, - description::Symbol...; + description...; grid_size = __grid_size(), disc_method = __disc_method(), time_grid = __time_grid(),