Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
name = "ParallelParticleSwarms"
uuid = "ab63da0c-63b4-40fa-a3b7-d2cba5be6419"
version = "1.0.0"
authors = ["Utkarsh <rajpututkarsh530@gmail.com> and contributors"]
version = "1.0.0"

[deps]
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
DiffEqGPU = "071ae1c0-96b5-11e9-1965-c90190d839ea"
Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
LineSearch = "87fe0de2-c867-4266-b59a-2f0a94fc965b"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba"
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
QuasiMonteCarlo = "8a4e6c94-4038-4cdc-81c3-7e6ffdb2a71b"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
Runic = "62bfec6d-59d7-401d-8490-b29ee721c001"
SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46"
SimpleNonlinearSolve = "727e6d20-b764-4bd8-a329-72de5adea6c7"
Expand All @@ -26,6 +29,7 @@ Enzyme = "0.13"
ForwardDiff = "0.10, 1.3"
JET = "0.9, 0.10, 0.11"
KernelAbstractions = "0.9"
LineSearch = "0.1"
Optimization = "4.1, 5.2"
PrecompileTools = "1"
QuasiMonteCarlo = "0.3"
Expand All @@ -39,7 +43,6 @@ julia = "1.10"

[extras]
JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

Expand Down
185 changes: 141 additions & 44 deletions src/hybrid.jl
Original file line number Diff line number Diff line change
@@ -1,69 +1,166 @@
@kernel function simplebfgs_run!(nlprob, x0s, result, opt, maxiters, abstol, reltol)
using LinearAlgebra: dot
using KernelAbstractions
using SciMLBase
using Optimization
using LineSearch

"Check that `θ` lies within a slack-expanded box around `[lb, ub]`."
@inline _in_safe_box(θ, ::Nothing, ::Nothing) = all(isfinite, θ)
@inline function _in_safe_box(θ::AbstractArray{T}, lb, ub) where {T}
all(isfinite, θ) || return false
w = ub .- lb
return all(θ .>= lb .- T(2) .* w) && all(θ .<= ub .+ T(2) .* w)
end

"Sentinel gradient with huge magnitude used to reject out-of-box trial points."
@inline _huge_grad(θ::AbstractArray{T}) where {T} = map(_ -> T(1.0e15), θ)

"L-BFGS two-loop recursion (Nocedal & Wright Algorithm 7.4) over cyclic memory of length `M`."
@inline function _lbfgs_direction(g, S, Y, Rho, ::Val{M}, k) where {M}
T = eltype(g)
q = g
a = ntuple(_ -> zero(T), Val(M))
for j in 0:(M - 1)
idx = k - j
if idx >= 1
ii = mod1(idx, M)
aii = Rho[ii] * dot(S[ii], q)
a = Base.setindex(a, aii, ii)
q = q - aii * Y[ii]
end
end
γ = if k >= 1
kk = mod1(k, M)
yy = sum(abs2, Y[kk])
sy = dot(S[kk], Y[kk])
ifelse(yy > T(1.0e-30) && sy > zero(T), sy / yy, one(T))
else
one(T)
end
r = γ * q
for j in (M - 1):-1:0
idx = k - j
if idx >= 1
ii = mod1(idx, M)
β = Rho[ii] * dot(Y[ii], r)
r = r + (a[ii] - β) * S[ii]
end
end
return -r
end

"Per-particle hand-written L-BFGS with `LineSearch.StrongWolfeLineSearch`. Avoids `SciMLBase.init`, whose `promote_u0` path uses dynamic dispatch (invalid GPU IR)."
@kernel function lbfgs_run!(
grad_f, p, x0s, result, ls_cache, ::Val{M}, maxiters
) where {M}
i = @index(Global, Linear)
nlcache = remake(nlprob; u0 = x0s[i])
sol = solve(nlcache, opt; maxiters, abstol, reltol)
@inbounds result[i] = sol.u
x = x0s[i]
T = eltype(x)
g = grad_f(x, p)

z = zero(typeof(x))
S = ntuple(_ -> z, Val(M))
Y = ntuple(_ -> z, Val(M))
Rho = ntuple(_ -> zero(T), Val(M))
k = 0
active = all(isfinite, g)

for _ in 1:maxiters
if active
dir = _lbfgs_direction(g, S, Y, Rho, Val(M), k)
if dot(g, dir) >= zero(T)
dir = -g
k = 0
end
ls_sol = SciMLBase.solve!(ls_cache, x, dir)
α = T(ls_sol.step_size)
ok = ls_sol.retcode == ReturnCode.Success
if ok && isfinite(α) && α > zero(T)
xn = x + α * dir
gn = grad_f(xn, p)
if all(isfinite, gn)
s = xn - x
y = gn - g
sy = dot(s, y)
if sy > T(1.0e-10) && isfinite(one(T) / sy)
k += 1
ii = mod1(k, M)
S = Base.setindex(S, s, ii)
Y = Base.setindex(Y, y, ii)
Rho = Base.setindex(Rho, one(T) / sy, ii)
else
k = 0
end
x = xn
g = gn
else
active = false
end
else
active = false
end
end
end
@inbounds result[i] = x
end

"Hybrid PSO solve: global PSO exploration followed by per-particle local L-BFGS refinement."
function SciMLBase.solve!(
cache::HybridPSOCache, opt::HybridPSO{Backend, LocalOpt}, args...;
abstol = nothing,
reltol = nothing,
maxiters = 100, local_maxiters = 10, kwargs...
) where {
Backend, LocalOpt <: Union{LBFGS, BFGS},
}
pso_cache = cache.pso_cache

sol_pso = solve!(pso_cache)
x0s = sol_pso.original
abstol = nothing, reltol = nothing, maxiters = 100, local_maxiters = 10,
linesearch = LineSearch.StrongWolfeLineSearch(), kwargs...
) where {Backend, LocalOpt <: Union{LBFGS, BFGS}}

backend = opt.backend
sol_pso = SciMLBase.solve!(cache.pso_cache; maxiters = maxiters)

orig_lb = cache.prob.lb
orig_ub = cache.prob.ub
prob = remake(cache.prob, lb = nothing, ub = nothing)

x0s = sol_pso.original
raw_grad = instantiate_gradient(prob.f.f, prob.f.adtype)
grad_f = (θ, p) ->
_in_safe_box(θ, orig_lb, orig_ub) ? raw_grad(θ, p) : _huge_grad(θ)

result = cache.start_points
copyto!(result, x0s)

∇f = instantiate_gradient(prob.f.f, prob.f.adtype)

kernel = simplebfgs_run!(backend)
nlprob = SimpleNonlinearSolve.ImmutableNonlinearProblem{false}(∇f, prob.u0, prob.p)
T = eltype(prob.u0)
D = length(prob.u0)
M_val = opt.local_opt isa LBFGS ? Val(min(opt.local_opt.threshold, D)) : Val(D)

nlalg = opt.local_opt isa LBFGS ?
SimpleLimitedMemoryBroyden(;
threshold = opt.local_opt.threshold,
linesearch = Val(true)
) : SimpleBroyden(; linesearch = Val(true))
# Construct the line-search cache directly: `LineSearch.init` would route
# through `SciMLBase.init`'s `promote_u0`, which dynamic-dispatches on GPU.
ls_cache = LineSearch.StaticStrongWolfeLineSearchCache(
grad_f, grad_f, prob.p,
T(linesearch.c1), T(linesearch.c2),
T(linesearch.α_init), T(linesearch.α_max),
linesearch.maxiters, linesearch.zoom_maxiters
)

t0 = time()
kernel = lbfgs_run!(opt.backend)
kernel(
nlprob,
x0s,
result,
nlalg,
local_maxiters,
abstol,
reltol;
grad_f, prob.p, x0s, result, ls_cache, M_val, local_maxiters;
ndrange = length(x0s)
)
KernelAbstractions.synchronize(opt.backend)

sol_bfgs = (x -> prob.f(x, prob.p)).(result)
sol_bfgs = (x -> isnan(x) ? convert(eltype(prob.u0), Inf) : x).(sol_bfgs)

minobj, ind = findmin(sol_bfgs)
sol_u,
sol_obj = minobj > sol_pso.objective ? (sol_pso.u, sol_pso.objective) :
(view(result, ind), minobj)
t1 = time()

# @show sol_pso.stats.time
Tobj = eltype(prob.u0)
result_fx = map(eachindex(result)) do i
x = result[i]
_in_safe_box(x, orig_lb, orig_ub) || return convert(Tobj, Inf)
v = prob.f(x, prob.p)
isnan(v) ? convert(Tobj, Inf) : v
end

solve_time = (t1 - t0) + sol_pso.stats.time
minobj, ind = findmin(result_fx)
best_obj = sol_pso.objective isa Real ? sol_pso.objective : sol_pso.objective[]
best_u, best_obj = minobj > best_obj ? (sol_pso.u, best_obj) : (result[ind], minobj)

solve_time = (time() - t0) + sol_pso.stats.time
return SciMLBase.build_solution(
SciMLBase.DefaultOptimizationCache(prob.f, prob.p), opt,
sol_u, sol_obj,
SciMLBase.DefaultOptimizationCache(prob.f, prob.p), opt, best_u, best_obj;
stats = Optimization.OptimizationStats(; time = solve_time)
)
end
Loading