pegeler · pegeler · Apr 11, 2026 · Oct 12, 2025 · Oct 12, 2025 · Oct 12, 2025
diff --git a/R/package/DESCRIPTION b/R/package/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: eddington
 Title: Compute a Cyclist's Eddington Number
-Version: 4.2.1
+Version: 4.3.0
 Authors@R: c(
     person('Paul', 'Egeler', email = 'paulegeler@gmail.com', role = c('aut','cre')),
     person('Tashi', 'Reigle', role = 'ctb'))
@@ -15,6 +15,8 @@ Description: Compute a cyclist's Eddington number, including efficiently
     used for computing h-indices for authors, a metric described by Hirsch (2005)
     <doi:10.1073/pnas.0507655102>. Both are specific applications of computing
     the side length of a Durfee square <https://en.wikipedia.org/wiki/Durfee_square>.
+    Some additional author-level metrics such as g-index and i10-index are also
+    included in the package.
 License: GPL (>=2)
 Encoding: UTF-8
 LazyData: true
@@ -32,10 +34,11 @@ Suggests:
     knitr,
     rmarkdown,
     stats,
-    dplyr
+    dplyr,
+    tibble
 SystemRequirements: C++17
 VignetteBuilder: knitr
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.2
 Roxygen: list(markdown = TRUE)
 URL: https://github.com/pegeler/eddington2
 BugReports: https://github.com/pegeler/eddington2/issues
diff --git a/R/package/NAMESPACE b/R/package/NAMESPACE
@@ -8,7 +8,12 @@ export(E_req)
 export(E_sat)
 export(Eddington)
 export(EddingtonModule)
+export(durfee)
+export(g_index)
 export(get_haversine_distance)
+export(h_index)
+export(i10_index)
+export(index)
 export(read_gpx)
 importFrom(R6,R6Class)
 importFrom(Rcpp,loadModule)

diff --git a/R/package/NEWS.md b/R/package/NEWS.md
@@ -1,3 +1,13 @@
+# eddington 4.3.0 (Release date: 2026-04-11)
+
+Changes:
+
+- Added bibliometric index calculations `h_index()`, `g_index()`,
+  and `i10_index()` as well as an `index()` function factory for making custom
+  index calculations.
+- Aliased `E_num()` to `durfee()` to aid in search.
+- Created a `daily_totals` dataset that aggregates the `rides` dataset by day.
+
 # eddington 4.2.0 (Release date: 2024-03-23)
 
 Changes:

diff --git a/R/package/R/RcppExports.R b/R/package/R/RcppExports.R
@@ -1,6 +1,10 @@
 # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 
+g_index_ <- function(citations, sorted = FALSE) {
+    .Call(`_eddington_g_index_`, citations, sorted)
+}
+
 #' Get the Eddington number for cycling
 #'
 #' Gets the \href{https://en.wikipedia.org/wiki/Arthur_Eddington#Eddington_number_for_cycling}{Eddington number for cycling}.

diff --git a/R/package/R/bibliometrics.R b/R/package/R/bibliometrics.R
@@ -0,0 +1,125 @@
+#' Define a custom bibliometric index function
+#'
+#' @param f A function to be applied to the index before comparison.
+#' @param cumulative A logical on whether to apply a cumulative sum to the counts.
+#' @examples
+#' # NOTE: These will all be less performant than their counterparts exported
+#' # in this package, i.e., `h_index()`, `g_index()`, `i10_index()`.
+#' set.seed(2018)
+#' citations <- rgamma(30, shape = 2, scale = 10)
+#'
+#' # Create an h-index
+#' my_h_index <- index(force)
+#' my_h_index(citations)
+#'
+#' # Create a g-index function
+#' my_g_index <- index(\(i) i * i, cumulative = TRUE)
+#' my_g_index(citations)
+#'
+#' # Create an i10-index
+#' my_i10_index <- index(\(i) 10L)
+#' my_i10_index(citations)
+#'
+#' @returns A function that will compute the specified index.
+#' @seealso [bibliometrics]
+#' @export
+index <- function(f, cumulative = FALSE) {
+  force(f)
+  force(cumulative)
+  function(xs) {
+    is <- vapply(seq_along(xs), f, integer(1L))
+    xs <- sort(xs, decreasing = TRUE)
+    sum((if (cumulative) cumsum(xs) else xs) >= is)
+  }
+}
+
+#' Compute several bibliometric indices
+#'
+#' Compute bibliometric indices such as the h-index, g-index, and i10-index.
+#'
+#' @section Implicit Type Conversions:
+#' The `h_index()` function implicitly coerces inputs into integer vectors,
+#' which will truncate any floating point inputs. This usually will result in
+#' expected outputs, as there are not typically fractional inputs in the
+#' intended domain, and the definitions of these indices are defined on integral
+#' thresholds explicitly. However, to maximize the versatility of g-index
+#' computation, the `g_index()` function does not perform this integer coercion.
+#' Therefore it is worth noting that floating point input can push the g-index
+#' higher on edge cases. For example,
+#' `g_index(as.integer(daily_totals$total_length)) !=
+#' g_index(daily_totals$total_length)` Thus to ensure accurate g-index results
+#' on data that may have a fractional component, it is advised to first perform
+#' an integer conversion prior to passing a vector into `g_index()` or otherwise
+#' validate inputs.
+#'
+#' This integer conversion will also cause the `h_index()` to fail when inputs
+#' contain extremely large values (\eqn{> 2^{31} - 1}{> 2^31 - 1}). The
+#' Eddington number family of functions and `durfee()` do not have this check,
+#' and may result in inaccurate outputs.
+#'
+#' @param citations A vector of citation counts.
+#' @param sorted Whether the data is pre-sorted in descending order. This may
+#'   speed up computations for some algorithms. The pre-sorted assumption is
+#'   tested and a warning is emitted if unsorted data is detected.
+#' @param na.rm If `TRUE`, `NA` values will be filtered out. Otherwise, any `NA`
+#'   value found in the vector will propagate and `NA` will be returned.
+#' @returns The summary number.
+#' @references <https://en.wikipedia.org/wiki/Author-level_metrics>,
+#' <https://en.wikipedia.org/wiki/G-index>
+#' @seealso [E_num()], [durfee()]
+#' @name h_index
+#' @aliases bibliometrics
+#' @export
+h_index <- function(citations, na.rm = FALSE) {
+  has_na <- anyNA(citations)
+  if (!na.rm && has_na)
+    return(NA_integer_)
+
+  if (is.double(citations) && any(citations > .Machine$integer.max, na.rm = TRUE))
+    stop("Values exceed limits for the integer data type.")
+
+  .Call(`_eddington_E_num`, if (has_na) filter_na(citations) else citations)
+}
+
+#' @rdname h_index
+#' @export
+i10_index <- function(citations, na.rm = FALSE) sum(citations >= 10L, na.rm = na.rm)
+
+#' @rdname h_index
+#' @export
+g_index <- function(citations, sorted = FALSE, na.rm = FALSE) {
+  has_na <- anyNA(citations)
+  if (!na.rm && has_na)
+    return(NA_integer_)
+
+  .Call(
+    `_eddington_g_index_`,
+    if (has_na) filter_na(citations) else citations,
+    sorted
+  )
+}
+
+filter_na <- \(x) Filter(Negate(is.na), x)
+
+
+if (FALSE) {  # Quick benchmark
+  h_alt <- index(force)
+  g_alt <- index(\(i) i * i, cumulative = TRUE)
+  i10_alt <- index(\(i) 10L)
+
+  # data(daily_totals)
+
+  xs <- daily_totals$total_length
+
+  microbenchmark::microbenchmark(
+    h_index(xs),
+    h_alt(xs),
+    g_index(xs),
+    g_alt(xs),
+    i10_index(xs),
+    i10_alt(xs)
+  )
+
+  # FINDING: way faster. as fun as the closure is for generalization,
+  # performance demands writing separate functions.
+}
diff --git a/R/package/R/durfee.R b/R/package/R/durfee.R
@@ -0,0 +1,9 @@
+#' Compute the side length of a Durfee square
+#'
+#' @param is An integer vector representing an integer partition.
+#' @returns The side length of the Durfee square for that partition.
+#' @export
+durfee <- function(is) .Call(`_eddington_E_num`, is)
+
+# Using .Call instead of just assigning `E_num` to `durfee` so that formals
+# could be renamed.
diff --git a/R/package/R/rides.r b/R/package/R/rides.r
@@ -10,5 +10,22 @@
 #'   \item{ride_date}{date the ride occurred}
 #'   \item{ride_length}{the length in miles}
 #' }
-#'
+#' @seealso [daily_totals]
 "rides"
+
+
+#' A year of simulated bicycle ride mileages, aggregated by day
+#'
+#' Simulated dates and distances of rides occurring in 2009. This is an
+#' aggregation of the \code{\link{rides}} dataset by day.
+#'
+#' The dataset contains a total of 3,419 miles spread across 178
+#' unique days. The Eddington number for the year was 29.
+#'
+#' @format A data frame with 178 rows and 2 variables:
+#' \describe{
+#'   \item{ride_date}{date the ride occurred}
+#'   \item{total_length}{the total length in miles for each day}
+#' }
+#' @seealso [rides]
+"daily_totals"
diff --git a/R/package/data-raw/daily_totals.R b/R/package/data-raw/daily_totals.R
@@ -0,0 +1,10 @@
+## code to prepare `daily_totals` dataset goes here
+library(dplyr)
+
+data(rides, package = "eddington")
+
+daily_totals <- rides |>
+  group_by(ride_date) |>
+  summarize(total_length = sum(ride_length))
+
+usethis::use_data(daily_totals, overwrite = TRUE)
diff --git a/R/package/data/daily_totals.rda b/R/package/data/daily_totals.rda
diff --git a/R/package/eddington.Rproj b/R/package/eddington.Rproj
@@ -1,4 +1,5 @@
 Version: 1.0
+ProjectId: 104c8096-9fd0-4ca1-b7f7-dd6cb0afb1a6
 
 RestoreWorkspace: No
 SaveWorkspace: No

diff --git a/R/package/man/daily_totals.Rd b/R/package/man/daily_totals.Rd
diff --git a/R/package/man/durfee.Rd b/R/package/man/durfee.Rd
diff --git a/R/package/man/h_index.Rd b/R/package/man/h_index.Rd
diff --git a/R/package/man/index.Rd b/R/package/man/index.Rd