diff --git a/Project.toml b/Project.toml index 97c673e548..c43fca427c 100644 --- a/Project.toml +++ b/Project.toml @@ -47,7 +47,7 @@ DataAPI = "1" Dictionaries = "0.3, 0.4" DynamicQuantities = "1" FileIO = "1.1" -GLM = "1.9.1" +GLM = "1.9.2" GeoInterface = "1" GeometryBasics = "0.4.1, 0.5" GridLayoutBase = "0.6, 0.7, 0.8, 0.9, 0.10, 0.11" diff --git a/docs/src/reference/analyses.md b/docs/src/reference/analyses.md index 0a0b30975c..068d8a8077 100644 --- a/docs/src/reference/analyses.md +++ b/docs/src/reference/analyses.md @@ -152,6 +152,34 @@ specs = data(df) * mapping(:x, :y, color=:a => nonnumeric) * ( draw(specs) ``` +Below are a few examples showing the [`linear`](@ref) interface for performing weighted fits: + +```@example analyses +using Random +Random.seed!(111) +colors = Makie.wong_colors() +df = let + x = 1:5 + y_err = randn(length(x)) + y = x .+ y_err + y_unc = abs.(y_err) + (; x, y, y_unc) +end +specs = data(df) * mapping(:x, :y) * ( + (visual(Scatter) + mapping(:y_unc) * visual(Errorbars)) * visual(; label = "data") + + mapping(; weights = :y_unc) * ( + linear(; weighttype = :fweights, weighttransform = x -> inv.(x .^ 2)) * visual(; color = colors[1], label = "fweights") + + # Other weights available once GLM v2 is released + #linear(; weighttype = :aweights) * visual(; color = colors[2], label = "aweights") + #linear(; weighttype = :pweights) * visual(; color = colors[3], label = "pweights") + ) +) +draw(specs) +``` + +Note that the usual caveats still apply for working with different kinds of weights. See the [StatsBase.jl documentation](https://juliastats.org/StatsBase.jl/stable/weights/) for more. + ## Smoothing ```@docs diff --git a/src/transformations/linear.jl b/src/transformations/linear.jl index c55e777c8c..44fe4c3bff 100644 --- a/src/transformations/linear.jl +++ b/src/transformations/linear.jl @@ -3,6 +3,9 @@ Base.@kwdef struct LinearAnalysis{I} dropcollinear::Bool = false interval::I = automatic level::Float64 = 0.95 + weighttype::Symbol = :fweights + distr::GLM.Distribution = GLM.Normal() + link::GLM.Link = GLM.canonicallink(distr) end function add_intercept_column(x::AbstractVector{T}) where {T} @@ -12,16 +15,44 @@ function add_intercept_column(x::AbstractVector{T}) where {T} return mat end +function get_weighttype(s::Symbol) + #weighttype = if s == :fweights + # StatsBase.fweights + #else + # throw(ArgumentError("Currently, GLM.jl only supports `StatsBase.fweights`.")) + #end + + # TODO: Can support these weights as well after GLM v2 is released + # https://github.com/JuliaStats/GLM.jl/pull/619 + weighttype = if s == :aweights + StatsBase.aweights + elseif s == :pweights + StatsBase.pweights + elseif s == :fweights + StatsBase.fweights + else + throw(ArgumentError("Currently, GLM.jl only supports `aweights`, `pweights`, and `fweights`.")) + end + + return weighttype +end + # TODO: add multidimensional version function (l::LinearAnalysis)(input::ProcessedLayer) output = map(input) do p, n x, y = p xn = to_numerical(x) - weights = StatsBase.fweights(get(n, :weights, similar(xn, 0))) - default_interval = length(weights) > 0 ? nothing : :confidence - interval = l.interval === automatic ? default_interval : l.interval + weights = get_weighttype(l.weighttype)(get(n, :weights, similar(xn, 0))) + interval = l.interval === automatic ? :confidence : l.interval # FIXME: handle collinear case gracefully - lin_model = GLM.lm(add_intercept_column(xn), y; weights, l.dropcollinear) + lin_model = if isempty(weights) + GLM.lm(add_intercept_column(xn), y; l.dropcollinear) + else + # Supports confidence intervals, while `GLM.lm` currently does not + # TODO: `wts` --> `weights` after GLM v2 is released + # https://github.com/JuliaStats/GLM.jl/pull/631 + GLM.glm(add_intercept_column(xn), y, l.distr, l.link; wts = weights, l.dropcollinear) + end x̂n = collect(range(extrema(xn)..., length = l.npoints)) pred = GLM.predict(lin_model, add_intercept_column(x̂n); interval, l.level) x̂ = from_numerical(x̂n, x) @@ -52,7 +83,7 @@ function (l::LinearAnalysis)(input::ProcessedLayer) end """ - linear(; interval=automatic, level=0.95, dropcollinear=false, npoints=200) + linear(; interval=automatic, level=0.95, dropcollinear=false, npoints=200, weighttype=:fweights, weighttransform=identity, distr=GLM.Normal()) Compute a linear fit of `y ~ 1 + x`. An optional named mapping `weights` determines the weights. Use `interval` to specify what type of interval the shaded band should represent, @@ -65,6 +96,11 @@ it is possible to set `dropcollinear=true`. `npoints` is the number of points used by Makie to draw the shaded band. Weighted data is supported via the keyword `weights` (passed to `mapping`). +Additional weight support is provided via the `weighttype`, `weighttransform`, and `distr` keywords. +`weightype` specifies the `StatsBase.AbstractWeights` type to use. +`weighttransform` accepts an optional function to transform the weights before they are passed to `GLM.glm`. +`distr` is forwarded to `GLM.glm`. +See the GLM.jl documentation for more on working with weighted data. This transformation creates two `ProcessedLayer`s labelled `:prediction` and `:ci`, which can be styled separately with `[subvisual](@ref)`. """