diff --git a/DESCRIPTION b/DESCRIPTION index a9af153..929c13a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: hrmn Title: Harmonize Datasets -Version: 0.0.0.9001 +Version: 0.0.0.9002 Authors@R: person("Jon", "Harmon", , "jonthegeek@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-4781-4346")) @@ -22,4 +22,6 @@ Language: en-US Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.3 Imports: - S7 + fastmatch, + rlang, + stbl diff --git a/NAMESPACE b/NAMESPACE index 9ebe023..04627fa 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,4 +1,6 @@ # Generated by roxygen2: do not edit by hand +export(harmonize_fct) export(specify_fct) -if (getRversion() < "4.3.0") importFrom("S7", "@") +importFrom(fastmatch,"%fin%") +importFrom(rlang,"%||%") diff --git a/R/aaa-shared_params.R b/R/aaa-shared_params.R new file mode 100644 index 0000000..eabf2a2 --- /dev/null +++ b/R/aaa-shared_params.R @@ -0,0 +1,9 @@ +#' Parameters used in multiple functions +#' +#' Reused parameter definitions are gathered here for easier editing. +#' +#' @param levels (`character`) The allowed values of the factor. +#' +#' @name .shared_params +#' @keywords internal +NULL diff --git a/R/harmonize_fct.R b/R/harmonize_fct.R new file mode 100644 index 0000000..9e1f9c7 --- /dev/null +++ b/R/harmonize_fct.R @@ -0,0 +1,50 @@ +#' Harmonize a factor +#' +#' @param .data (`character` or coercible to `character`) A vector to harmonize +#' to the specified factor. +#' @inheritParams .shared_params +#' @inheritParams rlang::args_dots_empty +#' @param .spec (`hrmn_fct_spec`) A harmonization specification from +#' [specify_fct()]. +#' @param .lookup (named `character`) A vector of replacement values. The names +#' are the values in `.data` and the values are the target values. +#' +#' @returns A harmonized [factor()]. +#' @export +#' +#' @examples +#' # Without a spec, harmonize_fct() acts like [base::factor()]. +#' harmonize_fct(c("a", "b", "c")) +#' +#' # Basic harmonization, dropping levels not in the spec +#' spec <- specify_fct(levels = c("a", "b")) +#' harmonize_fct(c("a", "b", "c"), .spec = spec) +#' +#' # Using a lookup table to recode values +#' spec2 <- specify_fct(levels = c("fruit", "citrus")) +#' lookup <- c(apple = "fruit", banana = "fruit", orange = "citrus") +#' harmonize_fct( +#' c("apple", "banana", "orange"), +#' .spec = spec2, +#' .lookup = lookup +#' ) +harmonize_fct <- function(.data, ..., .spec = NULL, .lookup = NULL) { + rlang::check_dots_empty() + .data <- stbl::to_chr(.data) + .spec <- .spec %||% specify_fct() + .data <- .apply_fct_lookup(.data, .lookup = .lookup) + return(factor(.data, levels = .spec$levels)) +} + +#' Apply a lookup table to a character vector +#' +#' @inheritParams harmonize_fct +#' @returns A character vector with values replaced according to the lookup +#' table. +#' @keywords internal +.apply_fct_lookup <- function(.data, .lookup = NULL) { + .lookup <- stbl::to_chr(.lookup) + matches <- .data %fin% names(.lookup) + .data[matches] <- .lookup[.data[matches]] + return(.data) +} diff --git a/R/hrmn-package.R b/R/hrmn-package.R index a65cf64..860748b 100644 --- a/R/hrmn-package.R +++ b/R/hrmn-package.R @@ -2,5 +2,7 @@ "_PACKAGE" ## usethis namespace: start +#' @importFrom fastmatch %fin% +#' @importFrom rlang %||% ## usethis namespace: end NULL diff --git a/R/specify_fct.R b/R/specify_fct.R index 02562ad..293aa14 100644 --- a/R/specify_fct.R +++ b/R/specify_fct.R @@ -1,34 +1,19 @@ -# Developer note: `specify_fct()` is designed to create a data-less -# "specification" object. It defines the target state (the levels) for a factor -# but doesn't hold any actual factor data itself. This is why the constructor -# internally provides `integer()` as the data component to `S7::new_object()`. -# -# In the future, we might use the `hrmn_fct` class to represent actual, -# harmonized factor data. In that scenario, we would likely create a separate -# `class_hrmn_fct` object and have `specify_fct()` be a wrapper function that -# calls the constructor with the empty data. For now, since we don't need the -# full factor-like class, we are directly defining the `hrmn_fct` class in -# `specify_fct()` - -#' Specify a factor harmonization +#' Factor specification #' -#' Create a `hrmn_fct` object that specifies the desired levels for a factor -#' variable. This 'specification' object does not contain any data itself, only -#' the rules for harmonization. +#' Create an object that specifies the desired levels for a factor variable. +#' This specification object does not contain any data itself, only the rules +#' for harmonization. #' -#' @param levels (`character`) The allowed values of the factor. -#' @returns A factor specification, an S7 object of class `hrmn::hrmn_fct`. +#' @inheritParams .shared_params +#' +#' @returns A `hrmn_fct_spec` object that acts as a specification. #' @export -specify_fct <- S7::new_class( - "hrmn_fct", - parent = S7::class_factor, - properties = list( - levels = S7::class_character - ), - constructor = function(levels = character()) { - S7::new_object( - integer(), - levels = levels - ) - } -) +#' +#' @examples +#' specify_fct(levels = c("a", "b", "c")) +specify_fct <- function(levels = character()) { + structure( + list(levels = stbl::to_chr(levels)), + class = c("hrmn_fct_spec", "hrmn_spec", "list") + ) +} diff --git a/R/zzz.R b/R/zzz.R deleted file mode 100644 index 2b31dbb..0000000 --- a/R/zzz.R +++ /dev/null @@ -1,3 +0,0 @@ -# enable usage of @name in package code -#' @rawNamespace if (getRversion() < "4.3.0") importFrom("S7", "@") -NULL diff --git a/man/dot-apply_fct_lookup.Rd b/man/dot-apply_fct_lookup.Rd new file mode 100644 index 0000000..e2dfb44 --- /dev/null +++ b/man/dot-apply_fct_lookup.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/harmonize_fct.R +\name{.apply_fct_lookup} +\alias{.apply_fct_lookup} +\title{Apply a lookup table to a character vector} +\usage{ +.apply_fct_lookup(.data, .lookup = NULL) +} +\arguments{ +\item{.data}{(\code{character} or coercible to \code{character}) A vector to harmonize +to the specified factor.} + +\item{.lookup}{(named \code{character}) A vector of replacement values. The names +are the values in \code{.data} and the values are the target values.} +} +\value{ +A character vector with values replaced according to the lookup +table. +} +\description{ +Apply a lookup table to a character vector +} +\keyword{internal} diff --git a/man/dot-shared_params.Rd b/man/dot-shared_params.Rd new file mode 100644 index 0000000..8eb0f4b --- /dev/null +++ b/man/dot-shared_params.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/aaa-shared_params.R +\name{.shared_params} +\alias{.shared_params} +\title{Parameters used in multiple functions} +\arguments{ +\item{levels}{(\code{character}) The allowed values of the factor.} +} +\description{ +Reused parameter definitions are gathered here for easier editing. +} +\keyword{internal} diff --git a/man/harmonize_fct.Rd b/man/harmonize_fct.Rd new file mode 100644 index 0000000..bb55d28 --- /dev/null +++ b/man/harmonize_fct.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/harmonize_fct.R +\name{harmonize_fct} +\alias{harmonize_fct} +\title{Harmonize a factor} +\usage{ +harmonize_fct(.data, ..., .spec = NULL, .lookup = NULL) +} +\arguments{ +\item{.data}{(\code{character} or coercible to \code{character}) A vector to harmonize +to the specified factor.} + +\item{...}{These dots are for future extensions and must be empty.} + +\item{.spec}{(\code{hrmn_fct_spec}) A harmonization specification from +\code{\link[=specify_fct]{specify_fct()}}.} + +\item{.lookup}{(named \code{character}) A vector of replacement values. The names +are the values in \code{.data} and the values are the target values.} +} +\value{ +A harmonized \code{\link[=factor]{factor()}}. +} +\description{ +Harmonize a factor +} +\examples{ +# Without a spec, harmonize_fct() acts like [base::factor()]. +harmonize_fct(c("a", "b", "c")) + +# Basic harmonization, dropping levels not in the spec +spec <- specify_fct(levels = c("a", "b")) +harmonize_fct(c("a", "b", "c"), .spec = spec) + +# Using a lookup table to recode values +spec2 <- specify_fct(levels = c("fruit", "citrus")) +lookup <- c(apple = "fruit", banana = "fruit", orange = "citrus") +harmonize_fct( + c("apple", "banana", "orange"), + .spec = spec2, + .lookup = lookup +) +} diff --git a/man/specify_fct.Rd b/man/specify_fct.Rd index f460874..ff3a34e 100644 --- a/man/specify_fct.Rd +++ b/man/specify_fct.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/specify_fct.R \name{specify_fct} \alias{specify_fct} -\title{Specify a factor harmonization} +\title{Factor specification} \usage{ specify_fct(levels = character()) } @@ -10,10 +10,13 @@ specify_fct(levels = character()) \item{levels}{(\code{character}) The allowed values of the factor.} } \value{ -A factor specification, an S7 object of class \code{hrmn::hrmn_fct}. +A \code{hrmn_fct_spec} object that acts as a specification. } \description{ -Create a \code{hrmn_fct} object that specifies the desired levels for a factor -variable. This 'specification' object does not contain any data itself, only -the rules for harmonization. +Create an object that specifies the desired levels for a factor variable. +This specification object does not contain any data itself, only the rules +for harmonization. +} +\examples{ +specify_fct(levels = c("a", "b", "c")) } diff --git a/tests/testthat/test-harmonize_fct.R b/tests/testthat/test-harmonize_fct.R new file mode 100644 index 0000000..a99642c --- /dev/null +++ b/tests/testthat/test-harmonize_fct.R @@ -0,0 +1,90 @@ +test_that("harmonize_fct() works with empty vector and returns a factor", { + expect_identical( + { + harmonize_fct(factor()) + }, + factor() + ) +}) + +test_that("harmonize_fct() drops unspecified levels", { + expect_identical( + { + harmonize_fct(factor(c("a", "b")), .spec = specify_fct(levels = "a")) + }, + factor(c("a", NA), levels = "a") + ) +}) + + +test_that("harmonize_fct() errors if .spec is not named", { + expect_error( + { + harmonize_fct(factor(c("a", "b")), specify_fct(levels = "a")) + }, + class = "rlib_error_dots_nonempty" + ) +}) + +test_that("The first `harmonize_fct()` argument is `.data`", { + expect_equal( + rlang::fn_fmls_names(harmonize_fct)[1], + ".data" + ) +}) + +test_that("harmonize_fct() preserves existing NAs", { + expect_equal( + { + harmonize_fct(factor(c("a", "b", NA)), .spec = specify_fct(levels = "a")) + }, + factor(c("a", NA, NA), levels = "a") + ) +}) + +test_that("harmonize_fct() works with character vectors", { + expect_equal( + { + harmonize_fct(c("a", "b"), .spec = specify_fct(levels = "a")) + }, + factor(c("a", NA), levels = "a") + ) +}) + +test_that("harmonize_fct() works with an empty spec", { + expect_equal( + { + harmonize_fct( + factor(c("a", "b")), + .spec = specify_fct(levels = character()) + ) + }, + factor(c(NA, NA), levels = character()) + ) +}) + +test_that("harmonize_fct() uses .lookup table", { + expect_equal( + { + harmonize_fct( + c("x", "y", "z"), + .spec = specify_fct(levels = c("a", "b")), + .lookup = c(x = "a", y = "a", z = "b") + ) + }, + factor(c("a", "a", "b"), levels = c("a", "b")) + ) +}) + +test_that("harmonize_fct() .lookup values not in levels become NA", { + expect_equal( + { + harmonize_fct( + "x", + .spec = specify_fct(levels = "a"), + .lookup = c(x = "b") + ) + }, + factor(NA_character_, levels = "a") + ) +}) diff --git a/tests/testthat/test-specify_fct.R b/tests/testthat/test-specify_fct.R index f966f88..9b93283 100644 --- a/tests/testthat/test-specify_fct.R +++ b/tests/testthat/test-specify_fct.R @@ -1,8 +1,7 @@ test_that("specify_fct() returns an object with the correct class", { - spec <- specify_fct() expect_s3_class( - spec, - c("hrmn::hrmn_fct", "factor", "S7_object"), + specify_fct(), + c("hrmn_fct_spec", "hrmn_spec", "list"), exact = TRUE ) }) @@ -10,5 +9,5 @@ test_that("specify_fct() returns an object with the correct class", { test_that("specify_fct() stores the levels", { lvls <- c("a", "b", "c") spec <- specify_fct(levels = lvls) - expect_equal(spec@levels, lvls) + expect_equal(spec$levels, lvls) })