From 92a0d42488742d985fab1ec7fbe921479767f639 Mon Sep 17 00:00:00 2001 From: BKGiwa <146840983+BKGiwa@users.noreply.github.com> Date: Fri, 17 May 2024 17:40:13 +0100 Subject: [PATCH 1/3] Estimate best fit distribution From 047e1b9b373e29c0315e9c1a1755cfb356fa6beb Mon Sep 17 00:00:00 2001 From: BKGiwa <146840983+BKGiwa@users.noreply.github.com> Date: Fri, 17 May 2024 17:42:30 +0100 Subject: [PATCH 2/3] Estimate best fit distribution --- tests/NAMESPACE | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 tests/NAMESPACE diff --git a/tests/NAMESPACE b/tests/NAMESPACE new file mode 100644 index 0000000..89fdec3 --- /dev/null +++ b/tests/NAMESPACE @@ -0,0 +1,15 @@ +# Generated by roxygen2: do not edit by hand + +export(avail_dists) +export(d2n) +export(do_fits) +export(evaldist) +export(fake_leg) +export(fill_scale) +export(find_fits) +export(parse_fitdist) +export(parse_univariateML) +export(plot_best) +export(plot_failed) +export(single_fit) +export(squash_fits) From e06f7e33b5c634197959686e897257e81272ad6c Mon Sep 17 00:00:00 2001 From: BKGiwa <146840983+BKGiwa@users.noreply.github.com> Date: Fri, 24 May 2024 10:13:06 +0100 Subject: [PATCH 3/3] Documentation and NAMESPACE updates for peppwR --- DESCRIPTION | 4 +-- NAMESPACE | 13 +++++++ R/fits.R | 76 +++++++++++++++++++++++++++++++++++++-- R/peppwr.R | 13 ++++++- R/plots.R | 58 ++++++++++++++++++++++++++++++ man/avail_dists.Rd | 14 ++++++++ man/d2n.Rd | 17 +++++++++ man/do_fits.Rd | 17 +++++++++ man/evaldist.Rd | 19 ++++++++++ man/fake_leg.Rd | 17 +++++++++ man/fill_scale.Rd | 17 +++++++++ man/find_fits.Rd | 23 ++++++++++++ man/parse_fitdist.Rd | 17 +++++++++ man/parse_univariateML.Rd | 17 +++++++++ man/plot_best.Rd | 19 ++++++++++ man/plot_failed.Rd | 19 ++++++++++ man/single_fit.Rd | 20 +++++++++++ man/squash_fits.Rd | 17 +++++++++ 18 files changed, 392 insertions(+), 5 deletions(-) create mode 100644 man/avail_dists.Rd create mode 100644 man/d2n.Rd create mode 100644 man/do_fits.Rd create mode 100644 man/evaldist.Rd create mode 100644 man/fake_leg.Rd create mode 100644 man/fill_scale.Rd create mode 100644 man/find_fits.Rd create mode 100644 man/parse_fitdist.Rd create mode 100644 man/parse_univariateML.Rd create mode 100644 man/plot_best.Rd create mode 100644 man/plot_failed.Rd create mode 100644 man/single_fit.Rd create mode 100644 man/squash_fits.Rd diff --git a/DESCRIPTION b/DESCRIPTION index d97827a..84c2830 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: peppwR Title: Power Analysis For Phosphopeptide Abundance Hypothesis Test -Version: 0.0.0.9000 +Version: 0.0.0.9001 Authors@R: person("Dan", "MacLean", , "dan.maclean@tsl.ac.uk", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-1032-0887")) @@ -8,7 +8,7 @@ Description: Estimate best fit distributions and do power analysis for hypothesi License: MIT + file LICENSE Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 URL: https://github.com/danmaclean/peppwR BugReports: https://github.com/danmaclean/peppwR/issues Imports: diff --git a/NAMESPACE b/NAMESPACE index 6ae9268..89fdec3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,2 +1,15 @@ # Generated by roxygen2: do not edit by hand +export(avail_dists) +export(d2n) +export(do_fits) +export(evaldist) +export(fake_leg) +export(fill_scale) +export(find_fits) +export(parse_fitdist) +export(parse_univariateML) +export(plot_best) +export(plot_failed) +export(single_fit) +export(squash_fits) diff --git a/R/fits.R b/R/fits.R index c9a3438..82366e8 100644 --- a/R/fits.R +++ b/R/fits.R @@ -1,4 +1,14 @@ - +#' Fit a Single Distribution +#' +#' @description This fits a specified distribution to the first column of a data frame. +#' +#' @param df The data frame containing the data to fit. +#' @param dist The distribution name or a list of distribution models to try fitting. One of "nbinom", "gamma", "snorm", "norm", +#' "invgamma", "invgauss", "lnorm", "lgamma", and "pareto" +#' +#'@return A list containing the fit result or the distribution name in case there is an error. +#' +#' @export single_fit <- function(df, dist){ if (dist %in% c("nbinom")){ result <- tryCatch( @@ -16,7 +26,15 @@ single_fit <- function(df, dist){ } - +#' Available Distributions +#' +#' @description This provides a vector of available distributions for fitting. +#' +#' @return A character vector of available distribution names. +#' +#' +#' +#' @export avail_dists <- function() { c("gamma", "norm", "snorm", "invgamma", @@ -25,6 +43,17 @@ avail_dists <- function() { "nbinom") } + +#' Distribution Name to Normalized Name +#' +#' @description This converts distribution tags to names that humans can read and understand. +#' +#' @param tag A character string representing the distribution tag. +#' +#' @return A character string with the human-readable distribution name. +#' +#' +#' @export d2n <- function(tag){ v <- c("Gamma", "Normal", "Skew Normal", "InvGamma", "Inverse Gaussian", "Lognormal", "Log Gamma", "Pareto", "Negative Binomial") @@ -32,6 +61,17 @@ d2n <- function(tag){ v[tag] } + +#' Fit Multiple Distributions +#' +#' @description This is used to fits multiple distributions to the first column of a data frame. +#' +#' @param df A data frame with the data to fit. +#' +#' @return A data frame with the fit results for each distribution. +#' +#' +#' @export do_fits <- function(df) { dists <- avail_dists() @@ -41,6 +81,16 @@ do_fits <- function(df) { } + +#' Squash Fit Results +#' +#' @description This processes fit results into a tidy format. +#' +#' @param fit A list containing the fit result. +#' +#' @return A tibble with the distribution name, log-likelihood, and Akaike Information Criterion (AIC). +#' +#' @export squash_fits <- function(fit){ if (length(fit) == 1){ @@ -58,6 +108,18 @@ squash_fits <- function(fit){ } + + +#' Parse Fit Results from fitdistrplus +#' +#' @description This converts fit results from the `fitdistrplus` package into a tidy format. +#' +#' @param fit An object returned by `fitdistrplus::fitdist`. +#' +#' @return A tibble with the distribution name, log-likelihood, and AIC. +#' +#' +#' @export parse_fitdist <- function(fit){ tibble::tibble( dist = fit$distname, @@ -66,6 +128,16 @@ parse_fitdist <- function(fit){ ) } + +#' Parse Fit Results from univariateML +#' +#' @description This converts fit results from the `univariateML` package into a tidy format. +#' +#' @param fit An object returned by `univariateML::model_select`. +#' +#' @return A tibble with the distribution name, log-likelihood, and AIC. +#' +#' @export parse_univariateML <- function(fit){ tibble::tibble( dist = attr(fit, "model"), diff --git a/R/peppwr.R b/R/peppwr.R index dc58b2a..e71a432 100644 --- a/R/peppwr.R +++ b/R/peppwr.R @@ -1,4 +1,15 @@ - +#' Find Fits for Nested Data +#' +#' @description This groups the data frame by specified columns, nests the data, and applies the fitting functions to each group. +#' +#' @param df A data frame containing the data to be fitted. +#' @param id_col A character string specifying the column name for the identifier. Default is "id". +#' @param group_col A character string specifying the column name for the group. Default is "group". +#' @param value_col A character string specifying the column name for the values to be fitted. Default is "value". +#' +#' @return A data frame with nested data and the fit results for each group. +#' +#' @export find_fits <- function(df, id_col="id", group_col="group", value_col="value"){ tidyr::nest(df, .by = tidyr::all_of(c(id_col, group_col )), data = {{value_col}} ) |> dplyr::mutate( diff --git a/R/plots.R b/R/plots.R index 8d5869c..730ac5b 100644 --- a/R/plots.R +++ b/R/plots.R @@ -1,3 +1,14 @@ + +#' Fill Scale for Distributions +#' +#' @description This generates a fill scale for distributions using the Set3 palette from RColorBrewer. +#' +#' @param name A character string specifying the name of the fill scale. +#' +#' @return A ggplot2 scale fill manual object. +#' +#' +#' @export fill_scale <- function(name = "name"){ cols <- RColorBrewer::brewer.pal(length(avail_dists()), "Set3") names(cols) <- d2n(avail_dists()) @@ -5,6 +16,17 @@ fill_scale <- function(name = "name"){ } + +#' Plot Best Fitted Models +#' +#' @description This plots the best-fitted models based on Log-Likelihood and AIC from the fits data frame. +#' +#' @param fits_df A data frame containing the fit results. +#' @param fit_col A character string specifying the column name that contains the nested fit results. Default is "fits". +#' +#' @return A ggplot object showing the best-fitted models. +#' +#' @export plot_best <- function(fits_df, fit_col="fits") { #TODO check and test df has fit_col 'fits' @@ -49,6 +71,17 @@ plot_best <- function(fits_df, fit_col="fits") { } + +#' Plot Failed Models +#' +#' @description This plots the count of failed model fits from the fits data frame. +#' +#' @param fits_df A data frame containing the fit results. +#' @param fit_col A character string specifying the column name that contains the nested fit results. Default is "fits". +#' +#' @return A ggplot object showing the count of failed models. +#' +#' @export plot_failed <- function(fits_df, fit_col = "fits"){ if (! fit_col %in% colnames(fits_df)){ @@ -79,6 +112,17 @@ plot_failed <- function(fits_df, fit_col = "fits"){ } + + +#' Fake Legend +#' +#' @description This generates a fake legend for the distribution plots. +#' +#' @param name A character string specifying the name of the legend. Default is "Dist". +#' +#' @return A ggplot2 legend object. +#' +#' @export fake_leg <- function(name="Dist") { p <- data.frame( dist = d2n(avail_dists()), @@ -93,6 +137,20 @@ fake_leg <- function(name="Dist") { cowplot::get_legend(p) } + + + +#' Evaluate Distributions +#' +#' @description This evaluates and plots the best-fitted and failed models from the fits data frame. +#' +#' @param fits_df A data frame containing the fit results. +#' +#' @param fit_col A character string specifying the column name that contains the nested fit results. Default is "fits". +#' +#' @return A ggplot object showing the evaluation of fitted models. +#' +#' @export evaldist <- function(fits_df, fit_col="fits"){ a <- plot_best(fits_df, fit_col = fit_col) b <- plot_failed(fits_df, fit_col=fit_col) diff --git a/man/avail_dists.Rd b/man/avail_dists.Rd new file mode 100644 index 0000000..2b0ab3b --- /dev/null +++ b/man/avail_dists.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fits.R +\name{avail_dists} +\alias{avail_dists} +\title{Available Distributions} +\usage{ +avail_dists() +} +\value{ +A character vector of available distribution names. +} +\description{ +This provides a vector of available distributions for fitting. +} diff --git a/man/d2n.Rd b/man/d2n.Rd new file mode 100644 index 0000000..49d01e3 --- /dev/null +++ b/man/d2n.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fits.R +\name{d2n} +\alias{d2n} +\title{Distribution Name to Normalized Name} +\usage{ +d2n(tag) +} +\arguments{ +\item{tag}{A character string representing the distribution tag.} +} +\value{ +A character string with the human-readable distribution name. +} +\description{ +This converts distribution tags to names that humans can read and understand. +} diff --git a/man/do_fits.Rd b/man/do_fits.Rd new file mode 100644 index 0000000..6496f65 --- /dev/null +++ b/man/do_fits.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fits.R +\name{do_fits} +\alias{do_fits} +\title{Fit Multiple Distributions} +\usage{ +do_fits(df) +} +\arguments{ +\item{df}{A data frame with the data to fit.} +} +\value{ +A data frame with the fit results for each distribution. +} +\description{ +This is used to fits multiple distributions to the first column of a data frame. +} diff --git a/man/evaldist.Rd b/man/evaldist.Rd new file mode 100644 index 0000000..2bedddd --- /dev/null +++ b/man/evaldist.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plots.R +\name{evaldist} +\alias{evaldist} +\title{Evaluate Distributions} +\usage{ +evaldist(fits_df, fit_col = "fits") +} +\arguments{ +\item{fits_df}{A data frame containing the fit results.} + +\item{fit_col}{A character string specifying the column name that contains the nested fit results. Default is "fits".} +} +\value{ +A ggplot object showing the evaluation of fitted models. +} +\description{ +This evaluates and plots the best-fitted and failed models from the fits data frame. +} diff --git a/man/fake_leg.Rd b/man/fake_leg.Rd new file mode 100644 index 0000000..d6a06fb --- /dev/null +++ b/man/fake_leg.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plots.R +\name{fake_leg} +\alias{fake_leg} +\title{Fake Legend} +\usage{ +fake_leg(name = "Dist") +} +\arguments{ +\item{name}{A character string specifying the name of the legend. Default is "Dist".} +} +\value{ +A ggplot2 legend object. +} +\description{ +This generates a fake legend for the distribution plots. +} diff --git a/man/fill_scale.Rd b/man/fill_scale.Rd new file mode 100644 index 0000000..5629dc2 --- /dev/null +++ b/man/fill_scale.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plots.R +\name{fill_scale} +\alias{fill_scale} +\title{Fill Scale for Distributions} +\usage{ +fill_scale(name = "name") +} +\arguments{ +\item{name}{A character string specifying the name of the fill scale.} +} +\value{ +A ggplot2 scale fill manual object. +} +\description{ +This generates a fill scale for distributions using the Set3 palette from RColorBrewer. +} diff --git a/man/find_fits.Rd b/man/find_fits.Rd new file mode 100644 index 0000000..ec1a2aa --- /dev/null +++ b/man/find_fits.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/peppwr.R +\name{find_fits} +\alias{find_fits} +\title{Find Fits for Nested Data} +\usage{ +find_fits(df, id_col = "id", group_col = "group", value_col = "value") +} +\arguments{ +\item{df}{A data frame containing the data to be fitted.} + +\item{id_col}{A character string specifying the column name for the identifier. Default is "id".} + +\item{group_col}{A character string specifying the column name for the group. Default is "group".} + +\item{value_col}{A character string specifying the column name for the values to be fitted. Default is "value".} +} +\value{ +A data frame with nested data and the fit results for each group. +} +\description{ +This groups the data frame by specified columns, nests the data, and applies the fitting functions to each group. +} diff --git a/man/parse_fitdist.Rd b/man/parse_fitdist.Rd new file mode 100644 index 0000000..9b386c0 --- /dev/null +++ b/man/parse_fitdist.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fits.R +\name{parse_fitdist} +\alias{parse_fitdist} +\title{Parse Fit Results from fitdistrplus} +\usage{ +parse_fitdist(fit) +} +\arguments{ +\item{fit}{An object returned by \code{fitdistrplus::fitdist}.} +} +\value{ +A tibble with the distribution name, log-likelihood, and AIC. +} +\description{ +This converts fit results from the \code{fitdistrplus} package into a tidy format. +} diff --git a/man/parse_univariateML.Rd b/man/parse_univariateML.Rd new file mode 100644 index 0000000..3168516 --- /dev/null +++ b/man/parse_univariateML.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fits.R +\name{parse_univariateML} +\alias{parse_univariateML} +\title{Parse Fit Results from univariateML} +\usage{ +parse_univariateML(fit) +} +\arguments{ +\item{fit}{An object returned by \code{univariateML::model_select}.} +} +\value{ +A tibble with the distribution name, log-likelihood, and AIC. +} +\description{ +This converts fit results from the \code{univariateML} package into a tidy format. +} diff --git a/man/plot_best.Rd b/man/plot_best.Rd new file mode 100644 index 0000000..c4c2e74 --- /dev/null +++ b/man/plot_best.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plots.R +\name{plot_best} +\alias{plot_best} +\title{Plot Best Fitted Models} +\usage{ +plot_best(fits_df, fit_col = "fits") +} +\arguments{ +\item{fits_df}{A data frame containing the fit results.} + +\item{fit_col}{A character string specifying the column name that contains the nested fit results. Default is "fits".} +} +\value{ +A ggplot object showing the best-fitted models. +} +\description{ +This plots the best-fitted models based on Log-Likelihood and AIC from the fits data frame. +} diff --git a/man/plot_failed.Rd b/man/plot_failed.Rd new file mode 100644 index 0000000..a75a1a8 --- /dev/null +++ b/man/plot_failed.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plots.R +\name{plot_failed} +\alias{plot_failed} +\title{Plot Failed Models} +\usage{ +plot_failed(fits_df, fit_col = "fits") +} +\arguments{ +\item{fits_df}{A data frame containing the fit results.} + +\item{fit_col}{A character string specifying the column name that contains the nested fit results. Default is "fits".} +} +\value{ +A ggplot object showing the count of failed models. +} +\description{ +This plots the count of failed model fits from the fits data frame. +} diff --git a/man/single_fit.Rd b/man/single_fit.Rd new file mode 100644 index 0000000..177fb24 --- /dev/null +++ b/man/single_fit.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fits.R +\name{single_fit} +\alias{single_fit} +\title{Fit a Single Distribution} +\usage{ +single_fit(df, dist) +} +\arguments{ +\item{df}{The data frame containing the data to fit.} + +\item{dist}{The distribution name or a list of distribution models to try fitting. One of "nbinom", "gamma", "snorm", "norm", +"invgamma", "invgauss", "lnorm", "lgamma", and "pareto"} +} +\value{ +A list containing the fit result or the distribution name in case there is an error. +} +\description{ +This fits a specified distribution to the first column of a data frame. +} diff --git a/man/squash_fits.Rd b/man/squash_fits.Rd new file mode 100644 index 0000000..b95b07c --- /dev/null +++ b/man/squash_fits.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fits.R +\name{squash_fits} +\alias{squash_fits} +\title{Squash Fit Results} +\usage{ +squash_fits(fit) +} +\arguments{ +\item{fit}{A list containing the fit result.} +} +\value{ +A tibble with the distribution name, log-likelihood, and Akaike Information Criterion (AIC). +} +\description{ +This processes fit results into a tidy format. +}