From 70275b45563b4bfc817a6774d33386325a6a9a6c Mon Sep 17 00:00:00 2001 From: Jon Harmon Date: Fri, 10 Oct 2025 05:47:25 -0500 Subject: [PATCH 1/3] Implement harmonize_df() Closes #5. --- .Rbuildignore | 1 + DESCRIPTION | 5 +- NAMESPACE | 1 + R/aaa-conditions.R | 18 +- R/aaa-shared_params.R | 8 +- R/harmonize_df.R | 238 ++++++++++++++++++ R/harmonize_fct.R | 11 +- R/specify_df.R | 4 +- R/to_hrmn_spec.R | 54 ++++ man/dot-confirm_spec_subclass.Rd | 23 ++ man/dot-harmonize_col.Rd | 26 ++ man/dot-harmonize_col_names.Rd | 31 +++ man/dot-harmonize_df_data.Rd | 35 +++ man/dot-harmonize_df_dots.Rd | 23 ++ man/dot-harmonize_df_spec.Rd | 23 ++ man/dot-harmonize_extra_col_names.Rd | 31 +++ man/dot-hrmn_abort.Rd | 6 +- man/dot-shared_params.Rd | 11 +- man/dot-stop_col_has_unknown_spec.Rd | 20 ++ ...s_spec.Rd => dot-stop_if_args_not_spec.Rd} | 8 +- ...s_named.Rd => dot-stop_if_args_unnamed.Rd} | 8 +- man/dot-stop_if_missing_col_names.Rd | 23 ++ man/dot-to_hrmn_spec.Rd | 23 ++ man/harmonize_df.Rd | 59 +++++ man/harmonize_fct.Rd | 4 + principles.md | 17 ++ tests/testthat/_snaps/aaa-conditions.md | 14 +- tests/testthat/test-aaa-conditions.R | 45 ++-- tests/testthat/test-harmonize_df.R | 128 ++++++++++ tests/testthat/test-harmonize_fct.R | 11 +- tests/testthat/test-specify_df.R | 4 +- tests/testthat/test-to_hrmn_spec.R | 40 +++ 32 files changed, 876 insertions(+), 77 deletions(-) create mode 100644 R/harmonize_df.R create mode 100644 R/to_hrmn_spec.R create mode 100644 man/dot-confirm_spec_subclass.Rd create mode 100644 man/dot-harmonize_col.Rd create mode 100644 man/dot-harmonize_col_names.Rd create mode 100644 man/dot-harmonize_df_data.Rd create mode 100644 man/dot-harmonize_df_dots.Rd create mode 100644 man/dot-harmonize_df_spec.Rd create mode 100644 man/dot-harmonize_extra_col_names.Rd create mode 100644 man/dot-stop_col_has_unknown_spec.Rd rename man/{dot-check_args_spec.Rd => dot-stop_if_args_not_spec.Rd} (63%) rename man/{dot-check_args_named.Rd => dot-stop_if_args_unnamed.Rd} (63%) create mode 100644 man/dot-stop_if_missing_col_names.Rd create mode 100644 man/dot-to_hrmn_spec.Rd create mode 100644 man/harmonize_df.Rd create mode 100644 principles.md create mode 100644 tests/testthat/test-harmonize_df.R create mode 100644 tests/testthat/test-to_hrmn_spec.R diff --git a/.Rbuildignore b/.Rbuildignore index 80cd200..d5f3cb6 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -8,3 +8,4 @@ ^_pkgdown\.yml$ ^docs$ ^pkgdown$ +^principles\.md$ diff --git a/DESCRIPTION b/DESCRIPTION index 024874d..d9d4846 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: hrmn Title: Harmonize Datasets -Version: 0.0.0.9004 +Version: 0.0.0.9005 Authors@R: person("Jon", "Harmon", , "jonthegeek@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-4781-4346")) @@ -17,7 +17,8 @@ Depends: Imports: fastmatch, rlang, - stbl (>= 0.2.0.9002) + stbl (>= 0.2.0.9002), + tibble Suggests: testthat (>= 3.0.0) Remotes: diff --git a/NAMESPACE b/NAMESPACE index 55133e4..06bb585 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,7 @@ export(caller_arg) export(caller_env) +export(harmonize_df) export(harmonize_fct) export(specify_df) export(specify_fct) diff --git a/R/aaa-conditions.R b/R/aaa-conditions.R index 8e21c79..bc3548e 100644 --- a/R/aaa-conditions.R +++ b/R/aaa-conditions.R @@ -15,8 +15,8 @@ rlang::caller_env .hrmn_abort <- function( message, subclass, - call = caller_env(), - message_env = call, + .call = caller_env(), + message_env = .call, parent = NULL, ... ) { @@ -24,7 +24,7 @@ rlang::caller_env "hrmn", message = message, subclass = subclass, - call = call, + call = .call, message_env = message_env, parent = parent, ... @@ -37,15 +37,14 @@ rlang::caller_env #' @inheritParams .shared_params #' @returns `NULL`, invisibly. #' @keywords internal -.check_args_named <- function(..., call = rlang::caller_env()) { +.stop_if_args_unnamed <- function(..., .call = rlang::caller_env()) { if (...length() && (is.null(...names()) || !all(nzchar(...names())))) { .hrmn_abort( "All arguments must be named.", "args_unnamed", - call = call + .call = .call ) } - invisible(NULL) } #' Check that all args are hrmn_spec objects @@ -54,7 +53,7 @@ rlang::caller_env #' @inheritParams .shared_params #' @returns `NULL`, invisibly. #' @keywords internal -.check_args_spec <- function(..., call = rlang::caller_env()) { +.stop_if_args_not_spec <- function(..., .call = rlang::caller_env()) { dots <- list(...) is_spec <- vapply(dots, inherits, logical(1), "hrmn_spec") if (length(dots) && !all(is_spec)) { @@ -65,10 +64,9 @@ rlang::caller_env "All arguments must be `hrmn_spec` objects.", "x" = "Argument{?s} {.arg {bad_args}} {?is/are} not {?a / }`hrmn_spec` object{?s}." ), - subclass = "args_not_spec", - call = call, + subclass = "not_spec", + .call = .call, message_env = rlang::current_env() ) } - invisible(NULL) } diff --git a/R/aaa-shared_params.R b/R/aaa-shared_params.R index bb5198c..570a590 100644 --- a/R/aaa-shared_params.R +++ b/R/aaa-shared_params.R @@ -2,15 +2,9 @@ #' #' Reused parameter definitions are gathered here for easier editing. #' -#' @param call `(environment)` The execution environment to mention as the +#' @param .call (`environment`) The execution environment to mention as the #' source of error messages. #' @param levels (`character`) The allowed values of the factor. -#' @param message_env (`environment`) The execution environment to use to -#' evaluate variables in error messages. -#' @param parent A parent condition, as you might create during a -#' [rlang::try_fetch()]. See [rlang::abort()] for additional information. -#' @param subclass (`character`) Class(es) to assign to the error. Will be -#' prefixed by "hrmn-error-". #' @name .shared_params #' @keywords internal NULL diff --git a/R/harmonize_df.R b/R/harmonize_df.R new file mode 100644 index 0000000..b6da0b1 --- /dev/null +++ b/R/harmonize_df.R @@ -0,0 +1,238 @@ +#' Harmonize a data frame +#' +#' @param .data (`data.frame`) A data frame to harmonize. +#' @param .spec (`hrmn_spec_df`) A data frame harmonization specification. +#' @param .unspecified_columns (`"error"`, `"drop"`, or `"keep"`) How to handle +#' columns in `.data` that are not present in `.spec`. +#' @inheritParams rlang::args_dots_empty +#' +#' @returns The input `.data` harmonized to a [tibble::tibble()]. +#' @family harmonization functions +#' @examples +#' df <- data.frame( +#' size = c("Small", "Medium", "S", "M", "Large", "Lrg", "Sm"), +#' id = 1:7 +#' ) +#' +#' # This spec will coerce values to NA if they are not "Small", "Medium", +#' # or "Large". +#' spec <- specify_df( +#' size = specify_fct(levels = c("Small", "Medium", "Large")) +#' ) +#' +#' # We can provide harmonization rules to the data before the spec is applied. +#' # Here, we harmonize the input factor to convert "S", "M", "Sm", and "Lrg" to +#' # valid values. +#' harmonize_df( +#' df, +#' size = harmonize_fct( +#' size, +#' .lookup = c("S" = "Small", "M" = "Medium", "Sm" = "Small", "Lrg" = "Large") +#' ), +#' .spec = spec, +#' .unspecified_columns = "keep" +#' ) +#' @export +harmonize_df <- function( + .data, + ..., + .spec = NULL, + .unspecified_columns = c("error", "drop", "keep") +) { + .spec <- .to_hrmn_spec(.spec, "df") + .data <- .harmonize_df_data( + .data, + ..., + .spec = .spec, + .unspecified_columns = rlang::arg_match(.unspecified_columns) + ) + return(.harmonize_df_spec(.data, .spec = .spec)) +} + +#' Harmonize a data frame based on data inputs +#' +#' @inheritParams harmonize_df +#' @inheritParams .shared_params +#' @returns A `data.frame` with custom harmonizations and column name +#' reconciliation applied. +#' @keywords internal +.harmonize_df_data <- function( + .data, + ..., + .spec, + .unspecified_columns, + .call = rlang::caller_env() +) { + .harmonize_col_names( + .harmonize_df_dots(.data, ..., .call = .call), + .spec, + .unspecified_columns, + .call = .call + ) +} + +#' Harmonize a data frame based on a harmonization specification +#' +#' @inheritParams harmonize_df +#' @inheritParams .shared_params +#' @returns The input `.data` harmonized to a [tibble::tibble()]. +#' @keywords internal +.harmonize_df_spec <- function( + .data, + .spec, + .call = rlang::caller_env() +) { + .data_lst <- rlang::set_names(vector("list", length(.spec)), names(.spec)) + for (col_name in names(.spec)) { + .data_lst[[col_name]] <- .harmonize_col( + .data, + .spec, + col_name, + .call = .call + ) + } + .data_lst <- c(.data_lst, .data[setdiff(names(.data), names(.spec))]) + return(tibble::as_tibble(.data_lst)) +} + +#' Harmonize a data frame based on custom harmonization calls +#' +#' @inheritParams harmonize_df +#' @inheritParams .shared_params +#' @returns A `data.frame` with custom harmonizations applied. +#' @keywords internal +.harmonize_df_dots <- function( + .data, + ..., + .call = rlang::caller_env() +) { + if (!...length()) { + return(.data) + } + dots <- rlang::enquos(..., .named = TRUE) + for (col_name in names(dots)) { + .data[[col_name]] <- rlang::eval_tidy(dots[[col_name]], data = .data) + } + return(.data) +} + +#' Harmonize data frame column names against a specification +#' +#' This will almost definitely migrate to stbl in the future. +#' +#' @inheritParams harmonize_df +#' @inheritParams .shared_params +#' @returns A `data.frame`, possibly with columns removed. +#' @keywords internal +.harmonize_col_names <- function( + .data, + .spec, + .unspecified_columns, + .call = rlang::caller_env() +) { + .stop_if_missing_col_names(.data, .spec, .call = .call) + .harmonize_extra_col_names(.data, .spec, .unspecified_columns, .call = .call) +} + +#' Stop if data frame is missing columns from specification +#' +#' @inheritParams harmonize_df +#' @inheritParams .shared_params +#' @returns `NULL` (invisibly) +#' @keywords internal +.stop_if_missing_col_names <- function( + .data, + .spec, + .call = rlang::caller_env() +) { + missing_from_data <- setdiff(names(.spec), names(.data)) + if (length(missing_from_data)) { + .hrmn_abort( + c( + "The data frame is missing columns required by the specification.", + i = "Missing columns: {missing_from_data}." + ), + "col_mismatch", + .call = .call, + message_env = rlang::current_env() + ) + } +} + +#' Harmonize extra data frame column names not in specification +#' +#' @inheritParams harmonize_df +#' @inheritParams .shared_params +#' @returns A `data.frame`, possibly with columns removed. +#' @keywords internal +.harmonize_extra_col_names <- function( + .data, + .spec, + .unspecified_columns, + .call = rlang::caller_env() +) { + extra_in_data <- setdiff(names(.data), names(.spec)) + if (length(extra_in_data)) { + .data <- switch( + .unspecified_columns, + error = .hrmn_abort( + c( + "The data frame has columns not present in the specification.", + i = "Extra columns: {extra_in_data}.", + i = "Set {.arg .unspecified_columns} to {.str drop} to remove extra columns.", + i = "Set {.arg .unspecified_columns} to {.str keep} to remove extra columns." + ), + "col_mismatch", + .call = .call, + message_env = rlang::current_env() + ), + drop = .data[names(.spec)], + keep = .data + ) + } + return(.data) +} + +#' Harmonize a specific column within a data.frame +#' +#' @param .col_name (`length-1 character`) The name of the column to harmonize +#' within `.data`. +#' @inheritParams harmonize_df +#' @inheritParams .shared_params +#' @inherit harmonize_df return +#' @keywords internal +.harmonize_col <- function( + .data, + .spec, + .col_name, + .call = rlang::caller_env() +) { + this_spec <- .spec[[.col_name]] + if (inherits(this_spec, "hrmn_spec_fct")) { + return(harmonize_fct(.data[[.col_name]], .spec = this_spec)) + } else { + .stop_col_has_unknown_spec(.col_name, class(this_spec), .call = .call) + } +} + +#' Stop if a column has an unknown specification class +#' +#' @param .col_name (`length-1 character`) The name of the column. +#' @param class (`character`) The classes of the column specification. +#' @inheritParams .shared_params +#' @keywords internal +.stop_col_has_unknown_spec <- function( + .col_name, + class, + .call = rlang::caller_env() +) { + .hrmn_abort( + c( + "Column specification must be created with a known `specify_*()` function.", + i = "Column {(.col_name)} has specification type {.cls {class[[1]]}}." + ), + "unknown_spec", + .call = .call, + message_env = rlang::current_env() + ) +} diff --git a/R/harmonize_fct.R b/R/harmonize_fct.R index a1b0bb5..8411cc5 100644 --- a/R/harmonize_fct.R +++ b/R/harmonize_fct.R @@ -31,9 +31,12 @@ harmonize_fct <- function(.data, ..., .spec = NULL, .lookup = NULL) { rlang::check_dots_empty() .data <- stbl::to_chr(.data) - .spec <- .spec %||% specify_fct() - .data <- .apply_fct_lookup(.data, .lookup = .lookup) - return(factor(.data, levels = .spec$levels)) + .spec <- .to_hrmn_spec(.spec, "fct") + .data <- .harmonize_fct_by_lookup(.data, .lookup = .lookup) + if (length(.spec)) { + return(factor(.data, levels = .spec$levels)) + } + return(factor(.data)) } #' Apply a lookup table to a character vector @@ -42,7 +45,7 @@ harmonize_fct <- function(.data, ..., .spec = NULL, .lookup = NULL) { #' @returns A character vector with values replaced according to the lookup #' table. #' @keywords internal -.apply_fct_lookup <- function(.data, .lookup = NULL) { +.harmonize_fct_by_lookup <- function(.data, .lookup = NULL) { .lookup <- stbl::to_chr(.lookup) matches <- .data %fin% names(.lookup) .data[matches] <- .lookup[.data[matches]] diff --git a/R/specify_df.R b/R/specify_df.R index 98270aa..d06ba27 100644 --- a/R/specify_df.R +++ b/R/specify_df.R @@ -15,7 +15,7 @@ #' ) #' @export specify_df <- function(...) { - .check_args_named(...) - .check_args_spec(...) + .stop_if_args_unnamed(...) + .stop_if_args_not_spec(...) structure(list(...), class = c("hrmn_spec_df", "hrmn_spec", "list")) } diff --git a/R/to_hrmn_spec.R b/R/to_hrmn_spec.R new file mode 100644 index 0000000..c1892a0 --- /dev/null +++ b/R/to_hrmn_spec.R @@ -0,0 +1,54 @@ +#' Validate a harmonization specification +#' +#' @param .spec (`NULL` or `hrmn_spec`) An object to validate as a harmonization +#' specification object. +#' @param .subclass (`NULL` or `length-1 character`) If provided, check that +#' `.spec` inherits the given subclass. For example, to confirm that `.spec` +#' has class `"hrmn_spec_fct`, use `.subclass = "fct"`. +#' @returns The validated `.spec` object, or an informative error. +#' @keywords internal +.to_hrmn_spec <- function( + .spec, + .subclass = NULL, + .call = rlang::caller_env() +) { + if (!length(.spec)) { + return(NULL) + } + if (inherits(.spec, "hrmn_spec")) { + return(.confirm_spec_subclass(.spec, .subclass, .call = .call)) + } + .hrmn_abort( + "{.arg .spec} must be `NULL` or a `hrmn_spec` object.", + "not_spec", + .call = .call + ) +} + +#' Confirm that a spec has a given spec subclass +#' +#' @inheritParams .to_hrmn_spec +#' @inherit .to_hrmn_spec return +#' @keywords internal +.confirm_spec_subclass <- function( + .spec, + .subclass = NULL, + .call = rlang::caller_env() +) { + if (is.null(.subclass)) { + return(.spec) + } + subclass <- paste0("hrmn_spec_", .subclass) + if (!inherits(.spec, subclass)) { + .hrmn_abort( + c( + "{.arg .spec} must be a {.cls {subclass}} object.", + "x" = "{.arg .spec} is a {.cls {class(.spec)[1]}} object." + ), + "bad_spec", + .call = .call, + message_env = rlang::current_env() + ) + } + return(.spec) +} diff --git a/man/dot-confirm_spec_subclass.Rd b/man/dot-confirm_spec_subclass.Rd new file mode 100644 index 0000000..b66fe3a --- /dev/null +++ b/man/dot-confirm_spec_subclass.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/to_hrmn_spec.R +\name{.confirm_spec_subclass} +\alias{.confirm_spec_subclass} +\title{Confirm that a spec has a given spec subclass} +\usage{ +.confirm_spec_subclass(.spec, .subclass = NULL, .call = rlang::caller_env()) +} +\arguments{ +\item{.spec}{(\code{NULL} or \code{hrmn_spec}) An object to validate as a harmonization +specification object.} + +\item{.subclass}{(\code{NULL} or \verb{length-1 character}) If provided, check that +\code{.spec} inherits the given subclass. For example, to confirm that \code{.spec} +has class \verb{"hrmn_spec_fct}, use \code{.subclass = "fct"}.} +} +\value{ +The validated \code{.spec} object, or an informative error. +} +\description{ +Confirm that a spec has a given spec subclass +} +\keyword{internal} diff --git a/man/dot-harmonize_col.Rd b/man/dot-harmonize_col.Rd new file mode 100644 index 0000000..c8a9477 --- /dev/null +++ b/man/dot-harmonize_col.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/harmonize_df.R +\name{.harmonize_col} +\alias{.harmonize_col} +\title{Harmonize a specific column within a data.frame} +\usage{ +.harmonize_col(.data, .spec, .col_name, .call = rlang::caller_env()) +} +\arguments{ +\item{.data}{(\code{data.frame}) A data frame to harmonize.} + +\item{.spec}{(\code{hrmn_spec_df}) A data frame harmonization specification.} + +\item{.col_name}{(\verb{length-1 character}) The name of the column to harmonize +within \code{.data}.} + +\item{.call}{(\code{environment}) The execution environment to mention as the +source of error messages.} +} +\value{ +The input \code{.data} harmonized to a \code{\link[tibble:tibble]{tibble::tibble()}}. +} +\description{ +Harmonize a specific column within a data.frame +} +\keyword{internal} diff --git a/man/dot-harmonize_col_names.Rd b/man/dot-harmonize_col_names.Rd new file mode 100644 index 0000000..186f5b6 --- /dev/null +++ b/man/dot-harmonize_col_names.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/harmonize_df.R +\name{.harmonize_col_names} +\alias{.harmonize_col_names} +\title{Harmonize data frame column names against a specification} +\usage{ +.harmonize_col_names( + .data, + .spec, + .unspecified_columns, + .call = rlang::caller_env() +) +} +\arguments{ +\item{.data}{(\code{data.frame}) A data frame to harmonize.} + +\item{.spec}{(\code{hrmn_spec_df}) A data frame harmonization specification.} + +\item{.unspecified_columns}{(\code{"error"}, \code{"drop"}, or \code{"keep"}) How to handle +columns in \code{.data} that are not present in \code{.spec}.} + +\item{.call}{(\code{environment}) The execution environment to mention as the +source of error messages.} +} +\value{ +A \code{data.frame}, possibly with columns removed. +} +\description{ +This will almost definitely migrate to stbl in the future. +} +\keyword{internal} diff --git a/man/dot-harmonize_df_data.Rd b/man/dot-harmonize_df_data.Rd new file mode 100644 index 0000000..50680ea --- /dev/null +++ b/man/dot-harmonize_df_data.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/harmonize_df.R +\name{.harmonize_df_data} +\alias{.harmonize_df_data} +\title{Harmonize a data frame based on data inputs} +\usage{ +.harmonize_df_data( + .data, + ..., + .spec, + .unspecified_columns, + .call = rlang::caller_env() +) +} +\arguments{ +\item{.data}{(\code{data.frame}) A data frame to harmonize.} + +\item{...}{These dots are for future extensions and must be empty.} + +\item{.spec}{(\code{hrmn_spec_df}) A data frame harmonization specification.} + +\item{.unspecified_columns}{(\code{"error"}, \code{"drop"}, or \code{"keep"}) How to handle +columns in \code{.data} that are not present in \code{.spec}.} + +\item{.call}{(\code{environment}) The execution environment to mention as the +source of error messages.} +} +\value{ +A \code{data.frame} with custom harmonizations and column name +reconciliation applied. +} +\description{ +Harmonize a data frame based on data inputs +} +\keyword{internal} diff --git a/man/dot-harmonize_df_dots.Rd b/man/dot-harmonize_df_dots.Rd new file mode 100644 index 0000000..d17ba49 --- /dev/null +++ b/man/dot-harmonize_df_dots.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/harmonize_df.R +\name{.harmonize_df_dots} +\alias{.harmonize_df_dots} +\title{Harmonize a data frame based on custom harmonization calls} +\usage{ +.harmonize_df_dots(.data, ..., .call = rlang::caller_env()) +} +\arguments{ +\item{.data}{(\code{data.frame}) A data frame to harmonize.} + +\item{...}{These dots are for future extensions and must be empty.} + +\item{.call}{(\code{environment}) The execution environment to mention as the +source of error messages.} +} +\value{ +A \code{data.frame} with custom harmonizations applied. +} +\description{ +Harmonize a data frame based on custom harmonization calls +} +\keyword{internal} diff --git a/man/dot-harmonize_df_spec.Rd b/man/dot-harmonize_df_spec.Rd new file mode 100644 index 0000000..ce4e644 --- /dev/null +++ b/man/dot-harmonize_df_spec.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/harmonize_df.R +\name{.harmonize_df_spec} +\alias{.harmonize_df_spec} +\title{Harmonize a data frame based on a harmonization specification} +\usage{ +.harmonize_df_spec(.data, .spec, .call = rlang::caller_env()) +} +\arguments{ +\item{.data}{(\code{data.frame}) A data frame to harmonize.} + +\item{.spec}{(\code{hrmn_spec_df}) A data frame harmonization specification.} + +\item{.call}{(\code{environment}) The execution environment to mention as the +source of error messages.} +} +\value{ +The input \code{.data} harmonized to a \code{\link[tibble:tibble]{tibble::tibble()}}. +} +\description{ +Harmonize a data frame based on a harmonization specification +} +\keyword{internal} diff --git a/man/dot-harmonize_extra_col_names.Rd b/man/dot-harmonize_extra_col_names.Rd new file mode 100644 index 0000000..a9d8103 --- /dev/null +++ b/man/dot-harmonize_extra_col_names.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/harmonize_df.R +\name{.harmonize_extra_col_names} +\alias{.harmonize_extra_col_names} +\title{Harmonize extra data frame column names not in specification} +\usage{ +.harmonize_extra_col_names( + .data, + .spec, + .unspecified_columns, + .call = rlang::caller_env() +) +} +\arguments{ +\item{.data}{(\code{data.frame}) A data frame to harmonize.} + +\item{.spec}{(\code{hrmn_spec_df}) A data frame harmonization specification.} + +\item{.unspecified_columns}{(\code{"error"}, \code{"drop"}, or \code{"keep"}) How to handle +columns in \code{.data} that are not present in \code{.spec}.} + +\item{.call}{(\code{environment}) The execution environment to mention as the +source of error messages.} +} +\value{ +A \code{data.frame}, possibly with columns removed. +} +\description{ +Harmonize extra data frame column names not in specification +} +\keyword{internal} diff --git a/man/dot-hrmn_abort.Rd b/man/dot-hrmn_abort.Rd index 34c2b96..a60ee95 100644 --- a/man/dot-hrmn_abort.Rd +++ b/man/dot-hrmn_abort.Rd @@ -7,8 +7,8 @@ .hrmn_abort( message, subclass, - call = caller_env(), - message_env = call, + .call = caller_env(), + message_env = .call, parent = NULL, ... ) @@ -20,7 +20,7 @@ formatted with \code{\link[cli:cli_bullets]{cli::cli_bullets()}}.} \item{subclass}{(\code{character}) Class(es) to assign to the error. Will be prefixed by "\{package\}-error-".} -\item{call}{\code{(environment)} The execution environment to mention as the +\item{.call}{\code{(environment)} The execution environment to mention as the source of error messages.} \item{message_env}{(\code{environment}) The execution environment to use to diff --git a/man/dot-shared_params.Rd b/man/dot-shared_params.Rd index e920af7..4dfc105 100644 --- a/man/dot-shared_params.Rd +++ b/man/dot-shared_params.Rd @@ -4,19 +4,10 @@ \alias{.shared_params} \title{Parameters used in multiple functions} \arguments{ -\item{call}{\code{(environment)} The execution environment to mention as the +\item{.call}{(\code{environment}) The execution environment to mention as the source of error messages.} \item{levels}{(\code{character}) The allowed values of the factor.} - -\item{message_env}{(\code{environment}) The execution environment to use to -evaluate variables in error messages.} - -\item{parent}{A parent condition, as you might create during a -\code{\link[rlang:try_fetch]{rlang::try_fetch()}}. See \code{\link[rlang:abort]{rlang::abort()}} for additional information.} - -\item{subclass}{(\code{character}) Class(es) to assign to the error. Will be -prefixed by "hrmn-error-".} } \description{ Reused parameter definitions are gathered here for easier editing. diff --git a/man/dot-stop_col_has_unknown_spec.Rd b/man/dot-stop_col_has_unknown_spec.Rd new file mode 100644 index 0000000..f31d2ab --- /dev/null +++ b/man/dot-stop_col_has_unknown_spec.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/harmonize_df.R +\name{.stop_col_has_unknown_spec} +\alias{.stop_col_has_unknown_spec} +\title{Stop if a column has an unknown specification class} +\usage{ +.stop_col_has_unknown_spec(.col_name, class, .call = rlang::caller_env()) +} +\arguments{ +\item{.col_name}{(\verb{length-1 character}) The name of the column.} + +\item{class}{(\code{character}) The classes of the column specification.} + +\item{.call}{(\code{environment}) The execution environment to mention as the +source of error messages.} +} +\description{ +Stop if a column has an unknown specification class +} +\keyword{internal} diff --git a/man/dot-check_args_spec.Rd b/man/dot-stop_if_args_not_spec.Rd similarity index 63% rename from man/dot-check_args_spec.Rd rename to man/dot-stop_if_args_not_spec.Rd index 811a746..7c9483d 100644 --- a/man/dot-check_args_spec.Rd +++ b/man/dot-stop_if_args_not_spec.Rd @@ -1,15 +1,15 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/aaa-conditions.R -\name{.check_args_spec} -\alias{.check_args_spec} +\name{.stop_if_args_not_spec} +\alias{.stop_if_args_not_spec} \title{Check that all args are hrmn_spec objects} \usage{ -.check_args_spec(..., call = rlang::caller_env()) +.stop_if_args_not_spec(..., .call = rlang::caller_env()) } \arguments{ \item{...}{Arguments to check.} -\item{call}{\code{(environment)} The execution environment to mention as the +\item{.call}{(\code{environment}) The execution environment to mention as the source of error messages.} } \value{ diff --git a/man/dot-check_args_named.Rd b/man/dot-stop_if_args_unnamed.Rd similarity index 63% rename from man/dot-check_args_named.Rd rename to man/dot-stop_if_args_unnamed.Rd index 29e3de3..1e32ee3 100644 --- a/man/dot-check_args_named.Rd +++ b/man/dot-stop_if_args_unnamed.Rd @@ -1,15 +1,15 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/aaa-conditions.R -\name{.check_args_named} -\alias{.check_args_named} +\name{.stop_if_args_unnamed} +\alias{.stop_if_args_unnamed} \title{Check that all specified args are named} \usage{ -.check_args_named(..., call = rlang::caller_env()) +.stop_if_args_unnamed(..., .call = rlang::caller_env()) } \arguments{ \item{...}{Arguments to check.} -\item{call}{\code{(environment)} The execution environment to mention as the +\item{.call}{(\code{environment}) The execution environment to mention as the source of error messages.} } \value{ diff --git a/man/dot-stop_if_missing_col_names.Rd b/man/dot-stop_if_missing_col_names.Rd new file mode 100644 index 0000000..ea1515a --- /dev/null +++ b/man/dot-stop_if_missing_col_names.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/harmonize_df.R +\name{.stop_if_missing_col_names} +\alias{.stop_if_missing_col_names} +\title{Stop if data frame is missing columns from specification} +\usage{ +.stop_if_missing_col_names(.data, .spec, .call = rlang::caller_env()) +} +\arguments{ +\item{.data}{(\code{data.frame}) A data frame to harmonize.} + +\item{.spec}{(\code{hrmn_spec_df}) A data frame harmonization specification.} + +\item{.call}{(\code{environment}) The execution environment to mention as the +source of error messages.} +} +\value{ +\code{NULL} (invisibly) +} +\description{ +Stop if data frame is missing columns from specification +} +\keyword{internal} diff --git a/man/dot-to_hrmn_spec.Rd b/man/dot-to_hrmn_spec.Rd new file mode 100644 index 0000000..0380acc --- /dev/null +++ b/man/dot-to_hrmn_spec.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/to_hrmn_spec.R +\name{.to_hrmn_spec} +\alias{.to_hrmn_spec} +\title{Validate a harmonization specification} +\usage{ +.to_hrmn_spec(.spec, .subclass = NULL, .call = rlang::caller_env()) +} +\arguments{ +\item{.spec}{(\code{NULL} or \code{hrmn_spec}) An object to validate as a harmonization +specification object.} + +\item{.subclass}{(\code{NULL} or \verb{length-1 character}) If provided, check that +\code{.spec} inherits the given subclass. For example, to confirm that \code{.spec} +has class \verb{"hrmn_spec_fct}, use \code{.subclass = "fct"}.} +} +\value{ +The validated \code{.spec} object, or an informative error. +} +\description{ +Validate a harmonization specification +} +\keyword{internal} diff --git a/man/harmonize_df.Rd b/man/harmonize_df.Rd new file mode 100644 index 0000000..a33c0cd --- /dev/null +++ b/man/harmonize_df.Rd @@ -0,0 +1,59 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/harmonize_df.R +\name{harmonize_df} +\alias{harmonize_df} +\title{Harmonize a data frame} +\usage{ +harmonize_df( + .data, + ..., + .spec = NULL, + .unspecified_columns = c("error", "drop", "keep") +) +} +\arguments{ +\item{.data}{(\code{data.frame}) A data frame to harmonize.} + +\item{...}{These dots are for future extensions and must be empty.} + +\item{.spec}{(\code{hrmn_spec_df}) A data frame harmonization specification.} + +\item{.unspecified_columns}{(\code{"error"}, \code{"drop"}, or \code{"keep"}) How to handle +columns in \code{.data} that are not present in \code{.spec}.} +} +\value{ +The input \code{.data} harmonized to a \code{\link[tibble:tibble]{tibble::tibble()}}. +} +\description{ +Harmonize a data frame +} +\examples{ +df <- data.frame( + size = c("Small", "Medium", "S", "M", "Large", "Lrg", "Sm"), + id = 1:7 +) + +# This spec will coerce values to NA if they are not "Small", "Medium", +# or "Large". +spec <- specify_df( + size = specify_fct(levels = c("Small", "Medium", "Large")) +) + +# We can provide harmonization rules to the data before the spec is applied. +# Here, we harmonize the input factor to convert "S", "M", "Sm", and "Lrg" to +# valid values. +harmonize_df( + df, + size = harmonize_fct( + size, + .lookup = c("S" = "Small", "M" = "Medium", "Sm" = "Small", "Lrg" = "Large") + ), + .spec = spec, + .unspecified_columns = "keep" +) +} +\seealso{ +Other harmonization functions: +\code{\link{harmonize_fct}()} +} +\concept{harmonization functions} diff --git a/man/harmonize_fct.Rd b/man/harmonize_fct.Rd index 9a60789..1d9ed58 100644 --- a/man/harmonize_fct.Rd +++ b/man/harmonize_fct.Rd @@ -41,4 +41,8 @@ harmonize_fct( .lookup = lookup ) } +\seealso{ +Other harmonization functions: +\code{\link{harmonize_df}()} +} \concept{harmonization functions} diff --git a/principles.md b/principles.md new file mode 100644 index 0000000..daf3ae6 --- /dev/null +++ b/principles.md @@ -0,0 +1,17 @@ +# hrmn design principles + +*This is an experiment in making key package design principles explicit, versus only implicit in the code. The goal is to make maintenance easier, when spread out over time and across people. This idea was copied from [usethis](https://github.com/r-lib/usethis/blob/main/principles.md).* + +*These principles are a work in progress.* + +## Function Naming Conventions + +Internal helper functions follow specific naming patterns to make their purpose +clear at a glance. + +-`.*()`: Internal helper functions that are not exported. `.`-prefixed versions of the families below might exist. +- `harmonize_*()`: Functions that take a data object (like a vector or data frame) and (usually) a harmonization specification, apply the harmonization, and return the modified data object. These functions form the core logic of the package. +- `specify_*()`: Functions that create and return a harmonization specification object, which defines the target structure and constraints for data harmonization. These functions are used to set up the harmonization process. +- `.stop_if_*()`: Functions that conditionally throw an error based on some unmet criteria. If the condition is not met, they should return `invisible(NULL)` (generally "automatically" via `if ()`. Generally used for input validation. +- `.stop_*()` (not `_if_`): Functions that unconditionally throw a specific, named error. These are used to abstract the call to `.hrmn_abort()` for common error conditions. +- `.to_*()`: Functions that attempt to coerce an object to a specific class, e.g., `.to_hrmn_spec()`. diff --git a/tests/testthat/_snaps/aaa-conditions.md b/tests/testthat/_snaps/aaa-conditions.md index c3918d9..7ede526 100644 --- a/tests/testthat/_snaps/aaa-conditions.md +++ b/tests/testthat/_snaps/aaa-conditions.md @@ -6,18 +6,18 @@ Error: ! A message. -# .check_args_named() works +# .stop_if_args_unnamed() works Code - .check_args_named(1) + .stop_if_args_unnamed(1) Condition Error: ! All arguments must be named. -# .check_args_spec() works +# .stop_if_args_not_spec() works Code - .check_args_spec(a = 1) + .stop_if_args_not_spec(a = 1) Condition Error: ! All arguments must be `hrmn_spec` objects. @@ -26,7 +26,7 @@ --- Code - .check_args_spec(a = 1, b = "B") + .stop_if_args_not_spec(a = 1, b = "B") Condition Error: ! All arguments must be `hrmn_spec` objects. @@ -35,7 +35,7 @@ --- Code - .check_args_spec(a = 1, b = spec) + .stop_if_args_not_spec(a = 1, b = spec) Condition Error: ! All arguments must be `hrmn_spec` objects. @@ -44,7 +44,7 @@ --- Code - .check_args_spec(1) + .stop_if_args_not_spec(1) Condition Error: ! All arguments must be `hrmn_spec` objects. diff --git a/tests/testthat/test-aaa-conditions.R b/tests/testthat/test-aaa-conditions.R index 3b6fb80..b4aa50f 100644 --- a/tests/testthat/test-aaa-conditions.R +++ b/tests/testthat/test-aaa-conditions.R @@ -10,34 +10,37 @@ test_that(".hrmn_abort() throws the expected error", { ) }) -test_that(".check_args_named() works", { - expect_no_error(.check_args_named()) - expect_no_error(.check_args_named(a = 1)) - expect_no_error(.check_args_named(a = 1, b = 2)) +test_that(".stop_if_args_unnamed() works", { + expect_no_error(.stop_if_args_unnamed()) + expect_no_error(.stop_if_args_unnamed(a = 1)) + expect_no_error(.stop_if_args_unnamed(a = 1, b = 2)) - expect_error(.check_args_named(1), class = "hrmn-error-args_unnamed") - expect_error(.check_args_named(a = 1, 2), class = "hrmn-error-args_unnamed") - expect_error(.check_args_named(1, 2), class = "hrmn-error-args_unnamed") - expect_snapshot(.check_args_named(1), error = TRUE) + expect_error(.stop_if_args_unnamed(1), class = "hrmn-error-args_unnamed") + expect_error( + .stop_if_args_unnamed(a = 1, 2), + class = "hrmn-error-args_unnamed" + ) + expect_error(.stop_if_args_unnamed(1, 2), class = "hrmn-error-args_unnamed") + expect_snapshot(.stop_if_args_unnamed(1), error = TRUE) }) -test_that(".check_args_spec() works", { - expect_no_error(.check_args_spec()) +test_that(".stop_if_args_not_spec() works", { + expect_no_error(.stop_if_args_not_spec()) spec <- structure(list(), class = "hrmn_spec") - expect_no_error(.check_args_spec(a = spec)) - expect_no_error(.check_args_spec(a = spec, b = spec)) + expect_no_error(.stop_if_args_not_spec(a = spec)) + expect_no_error(.stop_if_args_not_spec(a = spec, b = spec)) - expect_error(.check_args_spec(a = 1), class = "hrmn-error-args_not_spec") + expect_error(.stop_if_args_not_spec(a = 1), class = "hrmn-error-not_spec") expect_error( - .check_args_spec(a = spec, b = "B"), - class = "hrmn-error-args_not_spec" + .stop_if_args_not_spec(a = spec, b = "B"), + class = "hrmn-error-not_spec" ) expect_error( - .check_args_spec(a = 1, b = 2), - class = "hrmn-error-args_not_spec" + .stop_if_args_not_spec(a = 1, b = 2), + class = "hrmn-error-not_spec" ) - expect_snapshot(.check_args_spec(a = 1), error = TRUE) - expect_snapshot(.check_args_spec(a = 1, b = "B"), error = TRUE) - expect_snapshot(.check_args_spec(a = 1, b = spec), error = TRUE) - expect_snapshot(.check_args_spec(1), error = TRUE) + expect_snapshot(.stop_if_args_not_spec(a = 1), error = TRUE) + expect_snapshot(.stop_if_args_not_spec(a = 1, b = "B"), error = TRUE) + expect_snapshot(.stop_if_args_not_spec(a = 1, b = spec), error = TRUE) + expect_snapshot(.stop_if_args_not_spec(1), error = TRUE) }) diff --git a/tests/testthat/test-harmonize_df.R b/tests/testthat/test-harmonize_df.R new file mode 100644 index 0000000..5732286 --- /dev/null +++ b/tests/testthat/test-harmonize_df.R @@ -0,0 +1,128 @@ +test_that("harmonize_df() works with an empty data frame", { + expect_identical( + harmonize_df(data.frame()), + tibble::tibble() + ) +}) + +test_that("harmonize_df() errors if .spec is not a hrmn_spec_df", { + expect_error( + harmonize_df(data.frame(x = c("a", "b")), .spec = list(x = "not a spec")), + class = "hrmn-error-not_spec" + ) + expect_error( + harmonize_df(data.frame(x = c("a", "b")), .spec = specify_fct()), + class = "hrmn-error-bad_spec" + ) +}) + +test_that("harmonize_df() errors if spec contains unknown spec class", { + df <- data.frame(x = "x") + spec <- specify_df(x = specify_fct(levels = c("a"))) + class(spec$x)[[1]] <- "hrmn_spec_unknown" + expect_error( + harmonize_df(df, .spec = spec), + class = "hrmn-error-unknown_spec" + ) +}) + +test_that("harmonize_df() errors for missing columns with default .unspecified_columns", { + spec <- specify_df( + x = specify_fct(levels = c("a", "b")) + ) + missing_cols <- data.frame( + y = factor(c("a", "b", "c", NA)) + ) + expect_error( + harmonize_df(missing_cols, .spec = spec), + class = "hrmn-error-col_mismatch" + ) +}) + +test_that("harmonize_df() errors for extra columns with default .unspecified_columns", { + spec <- specify_df( + x = specify_fct(levels = c("a", "b")) + ) + extra_cols <- data.frame( + x = factor(c("a", "b", "c", NA)), + y = 1 + ) + expect_error( + harmonize_df(extra_cols, .spec = spec), + class = "hrmn-error-col_mismatch" + ) +}) + +test_that("harmonize_df() works with a single-column data frame", { + df <- data.frame( + x = factor(c("a", "b", "c", NA)) + ) + spec <- specify_df( + x = specify_fct(levels = c("a", "b")) + ) + expected <- tibble::tibble( + x = factor(c("a", "b", NA, NA), levels = c("a", "b")) + ) + expect_identical( + harmonize_df(df, .spec = spec), + expected + ) +}) + +test_that("harmonize_df() can keep unspecified columns", { + df <- data.frame( + x = factor(c("a", "b", "c", NA)), + y = 1:4 + ) + spec <- specify_df( + x = specify_fct(levels = c("a", "b")) + ) + expected <- tibble::tibble( + x = factor(c("a", "b", NA, NA), levels = c("a", "b")), + y = 1:4 + ) + expect_identical( + harmonize_df(df, .spec = spec, .unspecified_columns = "keep"), + expected + ) +}) + +test_that("harmonize_df() can drop unspecified columns", { + df <- data.frame( + x = factor(c("a", "b", "c", NA)), + y = 1:4 + ) + spec <- specify_df( + x = specify_fct(levels = c("a", "b")) + ) + expected <- tibble::tibble( + x = factor(c("a", "b", NA, NA), levels = c("a", "b")) + ) + expect_identical( + harmonize_df(df, .spec = spec, .unspecified_columns = "drop"), + expected + ) +}) + +test_that("harmonize_df() can use custom harmonization calls", { + df <- data.frame( + x = factor(c("a", "b", "c", NA)), + y = 1:4 + ) + spec <- specify_df( + x = specify_fct(levels = c("A", "B")) + ) + expected <- tibble::tibble( + x = factor(c("A", "B", NA, NA), levels = c("A", "B")), + y = 1:4 + ) + expect_identical( + harmonize_df( + df, + .spec = spec, + .unspecified_columns = "keep", + x = harmonize_fct(toupper(x)) + ), + expected + ) +}) diff --git a/tests/testthat/test-harmonize_fct.R b/tests/testthat/test-harmonize_fct.R index a99642c..74bc9ec 100644 --- a/tests/testthat/test-harmonize_fct.R +++ b/tests/testthat/test-harmonize_fct.R @@ -1,4 +1,4 @@ -test_that("harmonize_fct() works with empty vector and returns a factor", { +test_that("harmonize_fct() with empty vector returns a factor", { expect_identical( { harmonize_fct(factor()) @@ -7,6 +7,15 @@ test_that("harmonize_fct() works with empty vector and returns a factor", { ) }) +test_that("harmonize_fct() with character vector and no spec returns a factor", { + expect_identical( + { + harmonize_fct(c("a", "b", "c")) + }, + factor(c("a", "b", "c")) + ) +}) + test_that("harmonize_fct() drops unspecified levels", { expect_identical( { diff --git a/tests/testthat/test-specify_df.R b/tests/testthat/test-specify_df.R index 7adc97f..32498af 100644 --- a/tests/testthat/test-specify_df.R +++ b/tests/testthat/test-specify_df.R @@ -25,14 +25,14 @@ test_that("specify_df() errors if dots are unnamed", { test_that("specify_df() errors if arguments are not hrmn_spec objects", { expect_error( specify_df(col1 = "not a spec"), - class = "hrmn-error-args_not_spec" + class = "hrmn-error-not_spec" ) expect_error( specify_df( col1 = specify_fct(levels = c("a", "b")), col2 = 123 ), - class = "hrmn-error-args_not_spec" + class = "hrmn-error-not_spec" ) expect_snapshot( specify_df(col2 = 123), diff --git a/tests/testthat/test-to_hrmn_spec.R b/tests/testthat/test-to_hrmn_spec.R new file mode 100644 index 0000000..55c873f --- /dev/null +++ b/tests/testthat/test-to_hrmn_spec.R @@ -0,0 +1,40 @@ +test_that(".to_hrmn_spec errors if object is not NULL or hrmn_spec", { + expect_error( + .to_hrmn_spec("a"), + class = "hrmn-error-not_spec" + ) +}) + +test_that(".to_hrmn_spec returns NULL if input is NULL", { + expect_null(.to_hrmn_spec(NULL)) +}) + +test_that(".to_hrmn_spec returns NULL input has length 0", { + spec <- structure(list(), class = "hrmn_spec") + expect_identical(.to_hrmn_spec(spec), NULL) +}) + +test_that(".to_hrmn_spec returns input if it is a hrmn_spec", { + spec <- structure(list(a = 1), class = "hrmn_spec") + expect_identical(.to_hrmn_spec(spec), spec) +}) + +test_that(".to_hrmn_spec checks spec subclass if .subclass is provided", { + spec_fct <- structure( + list(1), + class = c("hrmn_spec_fct", "hrmn_spec", "list") + ) + spec_df <- structure(list(1), class = c("hrmn_spec_df", "hrmn_spec", "list")) + + expect_identical(.to_hrmn_spec(spec_fct, .subclass = "fct"), spec_fct) + expect_identical(.to_hrmn_spec(spec_df, .subclass = "df"), spec_df) + + expect_error( + .to_hrmn_spec(spec_fct, .subclass = "df"), + class = "hrmn-error-bad_spec" + ) + expect_error( + .to_hrmn_spec(spec_df, .subclass = "fct"), + class = "hrmn-error-bad_spec" + ) +}) From 904755e8b6d9cbec040688f46ad65fbe07c75822 Mon Sep 17 00:00:00 2001 From: Jon Harmon Date: Fri, 10 Oct 2025 14:27:47 -0500 Subject: [PATCH 2/3] Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- R/harmonize_df.R | 2 +- tests/testthat/test-harmonize_fct.R | 2 +- tests/testthat/test-to_hrmn_spec.R | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/harmonize_df.R b/R/harmonize_df.R index b6da0b1..69aad2b 100644 --- a/R/harmonize_df.R +++ b/R/harmonize_df.R @@ -180,7 +180,7 @@ harmonize_df <- function( "The data frame has columns not present in the specification.", i = "Extra columns: {extra_in_data}.", i = "Set {.arg .unspecified_columns} to {.str drop} to remove extra columns.", - i = "Set {.arg .unspecified_columns} to {.str keep} to remove extra columns." + i = "Set {.arg .unspecified_columns} to {.str keep} to keep extra columns." ), "col_mismatch", .call = .call, diff --git a/tests/testthat/test-harmonize_fct.R b/tests/testthat/test-harmonize_fct.R index 74bc9ec..06ae727 100644 --- a/tests/testthat/test-harmonize_fct.R +++ b/tests/testthat/test-harmonize_fct.R @@ -1,4 +1,4 @@ -test_that("harmonize_fct() with empty vector returns a factor", { +test_that("harmonize_fct() with empty vector returns an empty factor with no levels", { expect_identical( { harmonize_fct(factor()) diff --git a/tests/testthat/test-to_hrmn_spec.R b/tests/testthat/test-to_hrmn_spec.R index 55c873f..1bc3a96 100644 --- a/tests/testthat/test-to_hrmn_spec.R +++ b/tests/testthat/test-to_hrmn_spec.R @@ -9,7 +9,7 @@ test_that(".to_hrmn_spec returns NULL if input is NULL", { expect_null(.to_hrmn_spec(NULL)) }) -test_that(".to_hrmn_spec returns NULL input has length 0", { +test_that(".to_hrmn_spec returns NULL if input has length 0", { spec <- structure(list(), class = "hrmn_spec") expect_identical(.to_hrmn_spec(spec), NULL) }) From 0535defff81cef6e17a8963392650585b446ba10 Mon Sep 17 00:00:00 2001 From: Jon Harmon Date: Fri, 10 Oct 2025 14:29:52 -0500 Subject: [PATCH 3/3] Redocument. --- ...t-apply_fct_lookup.Rd => dot-harmonize_fct_by_lookup.Rd} | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) rename man/{dot-apply_fct_lookup.Rd => dot-harmonize_fct_by_lookup.Rd} (84%) diff --git a/man/dot-apply_fct_lookup.Rd b/man/dot-harmonize_fct_by_lookup.Rd similarity index 84% rename from man/dot-apply_fct_lookup.Rd rename to man/dot-harmonize_fct_by_lookup.Rd index e2dfb44..8805e11 100644 --- a/man/dot-apply_fct_lookup.Rd +++ b/man/dot-harmonize_fct_by_lookup.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/harmonize_fct.R -\name{.apply_fct_lookup} -\alias{.apply_fct_lookup} +\name{.harmonize_fct_by_lookup} +\alias{.harmonize_fct_by_lookup} \title{Apply a lookup table to a character vector} \usage{ -.apply_fct_lookup(.data, .lookup = NULL) +.harmonize_fct_by_lookup(.data, .lookup = NULL) } \arguments{ \item{.data}{(\code{character} or coercible to \code{character}) A vector to harmonize