From 1a4012e5d98f3352ba8e272b5a68aa3cb659ceda Mon Sep 17 00:00:00 2001 From: odunbar Date: Fri, 15 May 2026 15:38:04 -0700 Subject: [PATCH 1/2] truncate tail sum < threshold --- src/Utilities/likelihood_informed.jl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/Utilities/likelihood_informed.jl b/src/Utilities/likelihood_informed.jl index 75156c701..bfe345281 100644 --- a/src/Utilities/likelihood_informed.jl +++ b/src/Utilities/likelihood_informed.jl @@ -248,15 +248,16 @@ function initialize_processor!( # then get the eigen-decomposition decomp = eigen(diagnostic_mat, sortby = (-)) - sv_cumsum = cumsum(log.(decomp.values .+ 1) .^ 2) / sum(log.(decomp.values .+ 1) .^ 2) # frac Forstner distance-based cutoff + # truncate when tail_sum(log(1+λ_i) < ϵ + sv_tails = cumsum(reverse(log.(1 .+ decomp.values))) trunc_val = nothing retain_info = get_retain_info(li) if retain_info >= 1.0 trunc_val = apply_to == "in" ? input_dim : output_dim else - trunc_val = findfirst(x -> (x ≥ retain_info), sv_cumsum) - trunc_val = isnothing(trunc_val) ? (apply_to == "in" ? input_dim : output_dim) : trunc_val - @info " truncating at $trunc_val/$(length(sv_cumsum)) retaining $(100.0*sv_cumsum[trunc_val])% of the information" + trunc_val = Int(findfirst(k -> (k == length(sv_tails) || sv_tails[k] < 1-retain_info), eachindex(sv_tails))) + + @info " truncating at $trunc_val/$(length(sv_tails)) retaining $(100.0*(1-sv_tails[trunc_val]))% of the information" end encoder_mat = decomp.vectors[:, 1:trunc_val]' else # using diagnostic_f's and diagnostic_egrads From a324d45acc7c4f3900178dbf2855834a8132a2eb Mon Sep 17 00:00:00 2001 From: odunbar Date: Fri, 22 May 2026 17:20:22 -0700 Subject: [PATCH 2/2] change kwarg --- examples/DimensionReduction/emulate_sample.jl | 18 +++--- src/Utilities/likelihood_informed.jl | 55 +++++++++++++------ test/Utilities/runtests.jl | 12 ++-- 3 files changed, 53 insertions(+), 32 deletions(-) diff --git a/examples/DimensionReduction/emulate_sample.jl b/examples/DimensionReduction/emulate_sample.jl index 10efcb5ea..e3a7686b2 100644 --- a/examples/DimensionReduction/emulate_sample.jl +++ b/examples/DimensionReduction/emulate_sample.jl @@ -38,7 +38,7 @@ function main() encoder_schedule_ref = [(decorrelate_structure_mat(), "in_and_out")] # chosen some truncations with similar dimension - rvs_rinfos = [(0.995, 0.99995), (0.99, 0.9995), (0.98, 0.995), (0.95, 0.97), (0.9, 0.9)] + rvs_specmas = [(0.995, 0.995), (0.99, 0.98), (0.98, 0.95), (0.95, 0.75), (0.9, 0.6)] # as we have more input samples than out stored in ekp. we provide the extended set of outputs to be used in the likelihood informed processor final_samples_out = ekp_samples[αs[end]][2] @@ -55,18 +55,18 @@ function main() bad_model(mod_type, mod_types) end - all_errs = zeros(length(rvs_rinfos), length(names)) - reduced_dims = zeros(Int, length(rvs_rinfos), length(names)) # store reduced dims for final table + all_errs = zeros(length(rvs_specmas), length(names)) + reduced_dims = zeros(Int, length(rvs_specmas), length(names)) # store reduced dims for final table - for (idx, rv_rinfo) in enumerate(rvs_rinfos) - rv, rinfo = rv_rinfo + for (idx, rv_specma) in enumerate(rvs_specmas) + rv, specma = rv_specma encoder_schedule_decorrelate_samp = [(decorrelate_sample_cov(retain_var = rv), "in_and_out")] encoder_schedule_decorrelate = [(decorrelate_structure_mat(retain_var = rv), "in_and_out")] encoder_schedules_li = [ [ (decorrelate_structure_mat(), "in_and_out"), - (likelihood_informed(retain_info = rinfo, iters = 1:i), "in"), - (likelihood_informed(retain_info = rinfo, iters = 1:1), "out"), + (likelihood_informed(retain_spectral_mass = specma, iters = 1:i), "in"), + (likelihood_informed(retain_spectral_mass = specma, iters = 1:1), "out"), ] for i in mask ] @@ -121,7 +121,7 @@ function main() @info "No truncation" else ed = get_encoded_dim(get_encoder_schedule(em), "in") - @info "Truncation criteria (var>$(rv), or information > $(rinfo))" + @info "Truncation criteria (var>$(rv), or information > $(specma))" @info "input dim reduced to: $(ed)" end @@ -155,7 +155,7 @@ function main() pairs = [(reduced_dims[i, j], all_errs[i, j]) for i in axes(all_errs, 1), j in axes(all_errs, 2)] df = DataFrame(pairs, Symbol.(names[1:end])) # columns = names - df.truncation = rvs_rinfos # add row labels + df.truncation = rvs_specmas # add row labels return df, all_errs, reduced_dims[:, 1:end] end diff --git a/src/Utilities/likelihood_informed.jl b/src/Utilities/likelihood_informed.jl index bfe345281..a827cae5a 100644 --- a/src/Utilities/likelihood_informed.jl +++ b/src/Utilities/likelihood_informed.jl @@ -2,7 +2,7 @@ using Manifolds, Manopt -export LikelihoodInformed, likelihood_informed, get_retain_info, get_iters, get_grad_type +export LikelihoodInformed, likelihood_informed, get_retain_spectral_mass, get_iters, get_grad_type """ $(TYPEDEF) @@ -24,7 +24,7 @@ mutable struct LikelihoodInformed{ encoder_mat::VV1 decoder_mat::VV2 data_mean::VV3 - retain_info::FT + retain_spectral_mass::FT apply_to::Union{Nothing, AbstractString} iters::VV4 grad_type::Symbol @@ -34,11 +34,11 @@ end $(TYPEDSIGNATURES) Constructs the `LikelihoodInformed` struct. Keywords: -- `retain_info`: the method will attempt to limit the KL divergence of the true posterior from the reduced posterior to a value proportional to (1 - `retain_info`). Choose `retain_info` close to 1 to get a good approximation in a large subspace, and reduce it to get a worse approximation in a smaller subspace. +- `retain_spectral_mass`: The truncation is chosen such that the remaining log-spectrum mass satisfies: `Σ_{i<=k} log(1 + λ_i)/ Σ_i log(1 + λ_i) ≥ retain_spectral_mass` smaller values produce more aggressive dimensionality reduction. - `iters`[`= [1]`]: the likelihood-informed data processor requires samples from the distribution `∝ π_prior(x) π_likelihood(y | x)^α` with `α ∈ [0, 1]`. Here, `iter` indicates the structure vector iterations to use, as sampled from these distributions. For how to pass in these samples, see the `use_data_as_samples` parameter. - `grad_type`[`= :linreg`]: how the gradient of the forward model at the samples will be approximated. Choose from `:linreg` (global linear regression) and `:localsl` (localized statistical linearization; see [Wacker, 2025]). """ -function likelihood_informed(; retain_info = 1, iters = 1, grad_type = :linreg) +function likelihood_informed(; retain_spectral_mass = 1, iters = 1, grad_type = :linreg) grad_types = [:linreg, :localsl] if grad_type ∉ grad_types throw(ArgumentError("Unknown grad_type=$grad_type, please select from $(grad_types)")) @@ -53,21 +53,21 @@ function likelihood_informed(; retain_info = 1, iters = 1, grad_type = :linreg) ) end - LikelihoodInformed([], [], [], retain_info, nothing, iters, grad_type) + LikelihoodInformed([], [], [], retain_spectral_mass, nothing, iters, grad_type) end get_encoder_mat(li::LikelihoodInformed) = li.encoder_mat get_decoder_mat(li::LikelihoodInformed) = li.decoder_mat get_data_mean(li::LikelihoodInformed) = li.data_mean -get_retain_info(li::LikelihoodInformed) = li.retain_info +get_retain_spectral_mass(li::LikelihoodInformed) = li.retain_spectral_mass get_iters(li::LikelihoodInformed) = li.iters get_grad_type(li::LikelihoodInformed) = li.grad_type function Base.show(io::IO, li::LikelihoodInformed) out = "LikelihoodInformed" out *= ": iters=$(get_iters(li)), grad_type=$(get_grad_type(li))" - if get_retain_info(li) < 1.0 - out *= ", retain_info=$(get_retain_info(li))" + if get_retain_spectral_mass(li) < 1.0 + out *= ", retain_spectral_mass=$(get_retain_spectral_mass(li))" end print(io, out) end @@ -247,17 +247,37 @@ function initialize_processor!( end # then get the eigen-decomposition - decomp = eigen(diagnostic_mat, sortby = (-)) - # truncate when tail_sum(log(1+λ_i) < ϵ - sv_tails = cumsum(reverse(log.(1 .+ decomp.values))) + decomp = eigen(diagnostic_mat, sortby = x -> -x) + # truncate when tail_sum(log(1+λ_i)) < ϵ + summand = log.(1 .+ decomp.values) + + sv_tails = reverse(cumsum(reverse(summand))) # sv_tails[k] = sum_{i>=k} summand[i] trunc_val = nothing - retain_info = get_retain_info(li) - if retain_info >= 1.0 + retain_spectral_mass = get_retain_spectral_mass(li) + if retain_spectral_mass >= 1.0 trunc_val = apply_to == "in" ? input_dim : output_dim else - trunc_val = Int(findfirst(k -> (k == length(sv_tails) || sv_tails[k] < 1-retain_info), eachindex(sv_tails))) + # interpret the user object + I_total = sv_tails[1] + ϵ = 1 - retain_spectral_mass + trunc_val = Int(findfirst(k -> (k == length(sv_tails) || sv_tails[k+1]/ I_total < ϵ ), eachindex(sv_tails))) + + I_tail = sv_tails[trunc_val+1] + I_frac = 1 - I_tail / I_total + + λk = decomp.values[trunc_val] + λk1 = trunc_val < length(decomp.values) ? decomp.values[trunc_val+1] : 0.0 - @info " truncating at $trunc_val/$(length(sv_tails)) retaining $(100.0*(1-sv_tails[trunc_val]))% of the information" + gap = λk / (λk1 + eps()) + tail_ratio = I_tail / log(1+λk) + @info """ + spectral truncation summary: + Truncated at eigenvalue: $(trunc_val) / $(length(decomp.values)), + Information retained: $(100*I_frac)% + Information lost (tail mass): $(I_tail) + tail-to-trunc. ratio (tail/$(trunc_val)): $(tail_ratio) + Spectral gap at truncation: $(gap) + """ end encoder_mat = decomp.vectors[:, 1:trunc_val]' else # using diagnostic_f's and diagnostic_egrads @@ -290,7 +310,7 @@ function initialize_processor!( k = 1 Vs = nothing - retain_info = get_retain_info(li) + retain_spectral_mass = get_retain_spectral_mass(li) while true M = Grassmann(output_dim, k) @@ -304,7 +324,8 @@ function initialize_processor!( ref = diagnostic_f(M, zeros(output_dim, 0)) val = diagnostic_f(M, Vs) - if val / ref ≤ 1 - retain_info + # TODO: look at greedy metric here... i.e. is the information added here worth it? Possibly with bisection + if val / ref ≤ 1 - retain_spectral_mass @info " truncating at $k/$output_dim retaining $(100.0*(1-val/ref))% of the KL divergence reduction" break # TODO: Start bisecting? else diff --git a/test/Utilities/runtests.jl b/test/Utilities/runtests.jl index 8703e6cd3..6511b4f26 100644 --- a/test/Utilities/runtests.jl +++ b/test/Utilities/runtests.jl @@ -268,11 +268,11 @@ end # likelihood informed ll = likelihood_informed() - @test get_retain_info(ll) == 1.0 + @test get_retain_spectral_mass(ll) == 1.0 @test get_iters(ll) == [1] @test get_grad_type(ll) == :linreg - ll2 = likelihood_informed(retain_info = 0.99, iters = 3:5, grad_type = :localsl) - @test get_retain_info(ll2) == 0.99 + ll2 = likelihood_informed(retain_spectral_mass = 0.99, iters = 3:5, grad_type = :localsl) + @test get_retain_spectral_mass(ll2) == 0.99 @test get_iters(ll2) == 3:5 @test get_grad_type(ll2) == :localsl ll3 = LikelihoodInformed([2], [3], [4], 0.99, nothing, [3:5], :localsl) @@ -335,8 +335,8 @@ end (canonical_correlation(), "in_and_out"), (decorrelate_structure_mat(retain_var = 0.95), "in_and_out"), (canonical_correlation(retain_var = 0.95), "in_and_out"), - (likelihood_informed(retain_info = 0.99), "in_and_out"), - (likelihood_informed(retain_info = 0.99, iters = 1:2, grad_type = :localsl), "in_and_out"), + (likelihood_informed(retain_spectral_mass = 0.99), "in_and_out"), + (likelihood_informed(retain_spectral_mass = 0.99, iters = 1:2, grad_type = :localsl), "in_and_out"), ] lossless = [fill(true, 6); fill(false, length(schedules) - 6)] # are these lossless approximations? @@ -352,7 +352,7 @@ end # quick test without struct. vec, test_kwargs_no_svs = merge(prior_kwargs, obs_kwargs) - sch_test = create_encoder_schedule((likelihood_informed(retain_info = 0.85), "in_and_out")) + sch_test = create_encoder_schedule((likelihood_informed(retain_spectral_mass = 0.85), "in_and_out")) initialize_and_encode_with_schedule!(sch_test, io_pairs; test_kwargs_no_svs...) # functional test pipeline