From 92a0d42488742d985fab1ec7fbe921479767f639 Mon Sep 17 00:00:00 2001
From: BKGiwa <146840983+BKGiwa@users.noreply.github.com>
Date: Fri, 17 May 2024 17:40:13 +0100
Subject: [PATCH 1/3] Estimate best fit distribution


From 047e1b9b373e29c0315e9c1a1755cfb356fa6beb Mon Sep 17 00:00:00 2001
From: BKGiwa <146840983+BKGiwa@users.noreply.github.com>
Date: Fri, 17 May 2024 17:42:30 +0100
Subject: [PATCH 2/3] Estimate best fit distribution

---
 tests/NAMESPACE | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 tests/NAMESPACE

diff --git a/tests/NAMESPACE b/tests/NAMESPACE
new file mode 100644
index 0000000..89fdec3
--- /dev/null
+++ b/tests/NAMESPACE
@@ -0,0 +1,15 @@
+# Generated by roxygen2: do not edit by hand
+
+export(avail_dists)
+export(d2n)
+export(do_fits)
+export(evaldist)
+export(fake_leg)
+export(fill_scale)
+export(find_fits)
+export(parse_fitdist)
+export(parse_univariateML)
+export(plot_best)
+export(plot_failed)
+export(single_fit)
+export(squash_fits)

From e06f7e33b5c634197959686e897257e81272ad6c Mon Sep 17 00:00:00 2001
From: BKGiwa <146840983+BKGiwa@users.noreply.github.com>
Date: Fri, 24 May 2024 10:13:06 +0100
Subject: [PATCH 3/3] Documentation and NAMESPACE updates for peppwR

---
 DESCRIPTION               |  4 +--
 NAMESPACE                 | 13 +++++++
 R/fits.R                  | 76 +++++++++++++++++++++++++++++++++++++--
 R/peppwr.R                | 13 ++++++-
 R/plots.R                 | 58 ++++++++++++++++++++++++++++++
 man/avail_dists.Rd        | 14 ++++++++
 man/d2n.Rd                | 17 +++++++++
 man/do_fits.Rd            | 17 +++++++++
 man/evaldist.Rd           | 19 ++++++++++
 man/fake_leg.Rd           | 17 +++++++++
 man/fill_scale.Rd         | 17 +++++++++
 man/find_fits.Rd          | 23 ++++++++++++
 man/parse_fitdist.Rd      | 17 +++++++++
 man/parse_univariateML.Rd | 17 +++++++++
 man/plot_best.Rd          | 19 ++++++++++
 man/plot_failed.Rd        | 19 ++++++++++
 man/single_fit.Rd         | 20 +++++++++++
 man/squash_fits.Rd        | 17 +++++++++
 18 files changed, 392 insertions(+), 5 deletions(-)
 create mode 100644 man/avail_dists.Rd
 create mode 100644 man/d2n.Rd
 create mode 100644 man/do_fits.Rd
 create mode 100644 man/evaldist.Rd
 create mode 100644 man/fake_leg.Rd
 create mode 100644 man/fill_scale.Rd
 create mode 100644 man/find_fits.Rd
 create mode 100644 man/parse_fitdist.Rd
 create mode 100644 man/parse_univariateML.Rd
 create mode 100644 man/plot_best.Rd
 create mode 100644 man/plot_failed.Rd
 create mode 100644 man/single_fit.Rd
 create mode 100644 man/squash_fits.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index d97827a..84c2830 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: peppwR
 Title: Power Analysis For Phosphopeptide Abundance Hypothesis Test
-Version: 0.0.0.9000
+Version: 0.0.0.9001
 Authors@R: 
     person("Dan", "MacLean", , "dan.maclean@tsl.ac.uk", role = c("aut", "cre"),
            comment = c(ORCID = "0000-0003-1032-0887"))
@@ -8,7 +8,7 @@ Description: Estimate best fit distributions and do power analysis for hypothesi
 License: MIT + file LICENSE
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.1
 URL: https://github.com/danmaclean/peppwR
 BugReports: https://github.com/danmaclean/peppwR/issues
 Imports: 
diff --git a/NAMESPACE b/NAMESPACE
index 6ae9268..89fdec3 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,2 +1,15 @@
 # Generated by roxygen2: do not edit by hand
 
+export(avail_dists)
+export(d2n)
+export(do_fits)
+export(evaldist)
+export(fake_leg)
+export(fill_scale)
+export(find_fits)
+export(parse_fitdist)
+export(parse_univariateML)
+export(plot_best)
+export(plot_failed)
+export(single_fit)
+export(squash_fits)
diff --git a/R/fits.R b/R/fits.R
index c9a3438..82366e8 100644
--- a/R/fits.R
+++ b/R/fits.R
@@ -1,4 +1,14 @@
-
+#' Fit a Single Distribution
+#'
+#' @description This fits a specified distribution to the first column of a data frame.
+#'
+#' @param df The data frame containing the data to fit.
+#' @param dist The distribution name or a list of distribution models to try fitting. One of "nbinom", "gamma", "snorm", "norm",
+#'        "invgamma", "invgauss", "lnorm", "lgamma",  and "pareto"
+#'
+#'@return A list containing the fit result or the distribution name in case there is an error.
+#'
+#' @export
 single_fit <- function(df, dist){
   if (dist %in% c("nbinom")){
     result <- tryCatch(
@@ -16,7 +26,15 @@ single_fit <- function(df, dist){
 
 }
 
-
+#' Available Distributions
+#'
+#' @description This provides a vector of available distributions for fitting.
+#'
+#' @return A character vector of available distribution names.
+#'
+#'
+#'
+#' @export
 avail_dists <- function() {
   c("gamma", "norm",
     "snorm", "invgamma",
@@ -25,6 +43,17 @@ avail_dists <- function() {
     "nbinom")
 }
 
+
+#' Distribution Name to Normalized Name
+#'
+#' @description This converts distribution tags to names that humans can read and understand.
+#'
+#' @param tag A character string representing the distribution tag.
+#'
+#' @return A character string with the human-readable distribution name.
+#'
+#'
+#' @export
 d2n <- function(tag){
   v <- c("Gamma", "Normal", "Skew Normal", "InvGamma", "Inverse Gaussian",
            "Lognormal", "Log Gamma", "Pareto", "Negative Binomial")
@@ -32,6 +61,17 @@ d2n <- function(tag){
   v[tag]
 }
 
+
+#' Fit Multiple Distributions
+#'
+#' @description This is used to fits multiple distributions to the first column of a data frame.
+#'
+#' @param df A data frame with the data to fit.
+#'
+#' @return A data frame with the fit results for each distribution.
+#'
+#'
+#' @export
 do_fits <- function(df) {
   dists <- avail_dists()
 
@@ -41,6 +81,16 @@ do_fits <- function(df) {
 
 }
 
+
+#' Squash Fit Results
+#'
+#' @description This processes fit results into a tidy format.
+#'
+#' @param fit A list containing the fit result.
+#'
+#' @return A tibble with the distribution name, log-likelihood, and Akaike Information Criterion (AIC).
+#'
+#' @export
 squash_fits <- function(fit){
 
   if (length(fit) == 1){
@@ -58,6 +108,18 @@ squash_fits <- function(fit){
 
 }
 
+
+
+#' Parse Fit Results from fitdistrplus
+#'
+#' @description This converts fit results from the `fitdistrplus` package into a tidy format.
+#'
+#' @param fit An object returned by `fitdistrplus::fitdist`.
+#'
+#' @return A tibble with the distribution name, log-likelihood, and AIC.
+#'
+#'
+#' @export
 parse_fitdist <- function(fit){
   tibble::tibble(
     dist = fit$distname,
@@ -66,6 +128,16 @@ parse_fitdist <- function(fit){
   )
 }
 
+
+#' Parse Fit Results from univariateML
+#'
+#' @description This converts fit results from the `univariateML` package into a tidy format.
+#'
+#' @param fit An object returned by `univariateML::model_select`.
+#'
+#' @return A tibble with the distribution name, log-likelihood, and AIC.
+#'
+#' @export
 parse_univariateML <- function(fit){
   tibble::tibble(
     dist = attr(fit, "model"),
diff --git a/R/peppwr.R b/R/peppwr.R
index dc58b2a..e71a432 100644
--- a/R/peppwr.R
+++ b/R/peppwr.R
@@ -1,4 +1,15 @@
-
+#' Find Fits for Nested Data
+#'
+#' @description This groups the data frame by specified columns, nests the data, and applies the fitting functions to each group.
+#'
+#' @param df A data frame containing the data to be fitted.
+#' @param id_col A character string specifying the column name for the identifier. Default is "id".
+#' @param group_col A character string specifying the column name for the group. Default is "group".
+#' @param value_col A character string specifying the column name for the values to be fitted. Default is "value".
+#'
+#' @return A data frame with nested data and the fit results for each group.
+#'
+#' @export
 find_fits <- function(df, id_col="id", group_col="group", value_col="value"){
     tidyr::nest(df, .by = tidyr::all_of(c(id_col, group_col )), data = {{value_col}} ) |>
     dplyr::mutate(
diff --git a/R/plots.R b/R/plots.R
index 8d5869c..730ac5b 100644
--- a/R/plots.R
+++ b/R/plots.R
@@ -1,3 +1,14 @@
+
+#' Fill Scale for Distributions
+#'
+#' @description This generates a fill scale for distributions using the Set3 palette from RColorBrewer.
+#'
+#' @param name A character string specifying the name of the fill scale.
+#'
+#' @return A ggplot2 scale fill manual object.
+#'
+#'
+#' @export
 fill_scale <- function(name = "name"){
   cols <- RColorBrewer::brewer.pal(length(avail_dists()), "Set3")
   names(cols) <- d2n(avail_dists())
@@ -5,6 +16,17 @@ fill_scale <- function(name = "name"){
 }
 
 
+
+#' Plot Best Fitted Models
+#'
+#' @description This plots the best-fitted models based on Log-Likelihood and AIC from the fits data frame.
+#'
+#' @param fits_df A data frame containing the fit results.
+#' @param fit_col A character string specifying the column name that contains the nested fit results. Default is "fits".
+#'
+#' @return A ggplot object showing the best-fitted models.
+#'
+#' @export
 plot_best <- function(fits_df, fit_col="fits") {
   #TODO check and test df has fit_col 'fits'
 
@@ -49,6 +71,17 @@ plot_best <- function(fits_df, fit_col="fits") {
 }
 
 
+
+#' Plot Failed Models
+#'
+#' @description This plots the count of failed model fits from the fits data frame.
+#'
+#' @param fits_df A data frame containing the fit results.
+#' @param fit_col A character string specifying the column name that contains the nested fit results. Default is "fits".
+#'
+#' @return A ggplot object showing the count of failed models.
+#'
+#' @export
 plot_failed <- function(fits_df, fit_col = "fits"){
 
   if (! fit_col %in% colnames(fits_df)){
@@ -79,6 +112,17 @@ plot_failed <- function(fits_df, fit_col = "fits"){
 
 }
 
+
+
+#' Fake Legend
+#'
+#' @description This generates a fake legend for the distribution plots.
+#'
+#' @param name A character string specifying the name of the legend. Default is "Dist".
+#'
+#' @return A ggplot2 legend object.
+#'
+#' @export
 fake_leg <- function(name="Dist") {
   p <- data.frame(
     dist = d2n(avail_dists()),
@@ -93,6 +137,20 @@ fake_leg <- function(name="Dist") {
     cowplot::get_legend(p)
 }
 
+
+
+
+#' Evaluate Distributions
+#'
+#' @description This evaluates and plots the best-fitted and failed models from the fits data frame.
+#'
+#' @param fits_df A data frame containing the fit results.
+#'
+#' @param fit_col A character string specifying the column name that contains the nested fit results. Default is "fits".
+#'
+#' @return A ggplot object showing the evaluation of fitted models.
+#'
+#' @export
 evaldist <- function(fits_df, fit_col="fits"){
   a <- plot_best(fits_df, fit_col = fit_col)
   b <- plot_failed(fits_df, fit_col=fit_col)
diff --git a/man/avail_dists.Rd b/man/avail_dists.Rd
new file mode 100644
index 0000000..2b0ab3b
--- /dev/null
+++ b/man/avail_dists.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/fits.R
+\name{avail_dists}
+\alias{avail_dists}
+\title{Available Distributions}
+\usage{
+avail_dists()
+}
+\value{
+A character vector of available distribution names.
+}
+\description{
+This provides a vector of available distributions for fitting.
+}
diff --git a/man/d2n.Rd b/man/d2n.Rd
new file mode 100644
index 0000000..49d01e3
--- /dev/null
+++ b/man/d2n.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/fits.R
+\name{d2n}
+\alias{d2n}
+\title{Distribution Name to Normalized Name}
+\usage{
+d2n(tag)
+}
+\arguments{
+\item{tag}{A character string representing the distribution tag.}
+}
+\value{
+A character string with the human-readable distribution name.
+}
+\description{
+This converts distribution tags to names that humans can read and understand.
+}
diff --git a/man/do_fits.Rd b/man/do_fits.Rd
new file mode 100644
index 0000000..6496f65
--- /dev/null
+++ b/man/do_fits.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/fits.R
+\name{do_fits}
+\alias{do_fits}
+\title{Fit Multiple Distributions}
+\usage{
+do_fits(df)
+}
+\arguments{
+\item{df}{A data frame with the data to fit.}
+}
+\value{
+A data frame with the fit results for each distribution.
+}
+\description{
+This is used to fits multiple distributions to the first column of a data frame.
+}
diff --git a/man/evaldist.Rd b/man/evaldist.Rd
new file mode 100644
index 0000000..2bedddd
--- /dev/null
+++ b/man/evaldist.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plots.R
+\name{evaldist}
+\alias{evaldist}
+\title{Evaluate Distributions}
+\usage{
+evaldist(fits_df, fit_col = "fits")
+}
+\arguments{
+\item{fits_df}{A data frame containing the fit results.}
+
+\item{fit_col}{A character string specifying the column name that contains the nested fit results. Default is "fits".}
+}
+\value{
+A ggplot object showing the evaluation of fitted models.
+}
+\description{
+This evaluates and plots the best-fitted and failed models from the fits data frame.
+}
diff --git a/man/fake_leg.Rd b/man/fake_leg.Rd
new file mode 100644
index 0000000..d6a06fb
--- /dev/null
+++ b/man/fake_leg.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plots.R
+\name{fake_leg}
+\alias{fake_leg}
+\title{Fake Legend}
+\usage{
+fake_leg(name = "Dist")
+}
+\arguments{
+\item{name}{A character string specifying the name of the legend. Default is "Dist".}
+}
+\value{
+A ggplot2 legend object.
+}
+\description{
+This generates a fake legend for the distribution plots.
+}
diff --git a/man/fill_scale.Rd b/man/fill_scale.Rd
new file mode 100644
index 0000000..5629dc2
--- /dev/null
+++ b/man/fill_scale.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plots.R
+\name{fill_scale}
+\alias{fill_scale}
+\title{Fill Scale for Distributions}
+\usage{
+fill_scale(name = "name")
+}
+\arguments{
+\item{name}{A character string specifying the name of the fill scale.}
+}
+\value{
+A ggplot2 scale fill manual object.
+}
+\description{
+This generates a fill scale for distributions using the Set3 palette from RColorBrewer.
+}
diff --git a/man/find_fits.Rd b/man/find_fits.Rd
new file mode 100644
index 0000000..ec1a2aa
--- /dev/null
+++ b/man/find_fits.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/peppwr.R
+\name{find_fits}
+\alias{find_fits}
+\title{Find Fits for Nested Data}
+\usage{
+find_fits(df, id_col = "id", group_col = "group", value_col = "value")
+}
+\arguments{
+\item{df}{A data frame containing the data to be fitted.}
+
+\item{id_col}{A character string specifying the column name for the identifier. Default is "id".}
+
+\item{group_col}{A character string specifying the column name for the group. Default is "group".}
+
+\item{value_col}{A character string specifying the column name for the values to be fitted. Default is "value".}
+}
+\value{
+A data frame with nested data and the fit results for each group.
+}
+\description{
+This groups the data frame by specified columns, nests the data, and applies the fitting functions to each group.
+}
diff --git a/man/parse_fitdist.Rd b/man/parse_fitdist.Rd
new file mode 100644
index 0000000..9b386c0
--- /dev/null
+++ b/man/parse_fitdist.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/fits.R
+\name{parse_fitdist}
+\alias{parse_fitdist}
+\title{Parse Fit Results from fitdistrplus}
+\usage{
+parse_fitdist(fit)
+}
+\arguments{
+\item{fit}{An object returned by \code{fitdistrplus::fitdist}.}
+}
+\value{
+A tibble with the distribution name, log-likelihood, and AIC.
+}
+\description{
+This converts fit results from the \code{fitdistrplus} package into a tidy format.
+}
diff --git a/man/parse_univariateML.Rd b/man/parse_univariateML.Rd
new file mode 100644
index 0000000..3168516
--- /dev/null
+++ b/man/parse_univariateML.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/fits.R
+\name{parse_univariateML}
+\alias{parse_univariateML}
+\title{Parse Fit Results from univariateML}
+\usage{
+parse_univariateML(fit)
+}
+\arguments{
+\item{fit}{An object returned by \code{univariateML::model_select}.}
+}
+\value{
+A tibble with the distribution name, log-likelihood, and AIC.
+}
+\description{
+This converts fit results from the \code{univariateML} package into a tidy format.
+}
diff --git a/man/plot_best.Rd b/man/plot_best.Rd
new file mode 100644
index 0000000..c4c2e74
--- /dev/null
+++ b/man/plot_best.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plots.R
+\name{plot_best}
+\alias{plot_best}
+\title{Plot Best Fitted Models}
+\usage{
+plot_best(fits_df, fit_col = "fits")
+}
+\arguments{
+\item{fits_df}{A data frame containing the fit results.}
+
+\item{fit_col}{A character string specifying the column name that contains the nested fit results. Default is "fits".}
+}
+\value{
+A ggplot object showing the best-fitted models.
+}
+\description{
+This plots the best-fitted models based on Log-Likelihood and AIC from the fits data frame.
+}
diff --git a/man/plot_failed.Rd b/man/plot_failed.Rd
new file mode 100644
index 0000000..a75a1a8
--- /dev/null
+++ b/man/plot_failed.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plots.R
+\name{plot_failed}
+\alias{plot_failed}
+\title{Plot Failed Models}
+\usage{
+plot_failed(fits_df, fit_col = "fits")
+}
+\arguments{
+\item{fits_df}{A data frame containing the fit results.}
+
+\item{fit_col}{A character string specifying the column name that contains the nested fit results. Default is "fits".}
+}
+\value{
+A ggplot object showing the count of failed models.
+}
+\description{
+This plots the count of failed model fits from the fits data frame.
+}
diff --git a/man/single_fit.Rd b/man/single_fit.Rd
new file mode 100644
index 0000000..177fb24
--- /dev/null
+++ b/man/single_fit.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/fits.R
+\name{single_fit}
+\alias{single_fit}
+\title{Fit a Single Distribution}
+\usage{
+single_fit(df, dist)
+}
+\arguments{
+\item{df}{The data frame containing the data to fit.}
+
+\item{dist}{The distribution name or a list of distribution models to try fitting. One of "nbinom", "gamma", "snorm", "norm",
+"invgamma", "invgauss", "lnorm", "lgamma",  and "pareto"}
+}
+\value{
+A list containing the fit result or the distribution name in case there is an error.
+}
+\description{
+This fits a specified distribution to the first column of a data frame.
+}
diff --git a/man/squash_fits.Rd b/man/squash_fits.Rd
new file mode 100644
index 0000000..b95b07c
--- /dev/null
+++ b/man/squash_fits.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/fits.R
+\name{squash_fits}
+\alias{squash_fits}
+\title{Squash Fit Results}
+\usage{
+squash_fits(fit)
+}
+\arguments{
+\item{fit}{A list containing the fit result.}
+}
+\value{
+A tibble with the distribution name, log-likelihood, and Akaike Information Criterion (AIC).
+}
+\description{
+This processes fit results into a tidy format.
+}