-
Notifications
You must be signed in to change notification settings - Fork 0
Implement harmonize_fct()
#36
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,6 @@ | ||
| # Generated by roxygen2: do not edit by hand | ||
|
|
||
| export(harmonize_fct) | ||
| export(specify_fct) | ||
| if (getRversion() < "4.3.0") importFrom("S7", "@") | ||
| importFrom(fastmatch,"%fin%") | ||
| importFrom(rlang,"%||%") |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| #' Parameters used in multiple functions | ||
| #' | ||
| #' Reused parameter definitions are gathered here for easier editing. | ||
| #' | ||
| #' @param levels (`character`) The allowed values of the factor. | ||
| #' | ||
| #' @name .shared_params | ||
| #' @keywords internal | ||
| NULL |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,50 @@ | ||
| #' Harmonize a factor | ||
| #' | ||
| #' @param .data (`character` or coercible to `character`) A vector to harmonize | ||
| #' to the specified factor. | ||
| #' @inheritParams .shared_params | ||
| #' @inheritParams rlang::args_dots_empty | ||
| #' @param .spec (`hrmn_fct_spec`) A harmonization specification from | ||
| #' [specify_fct()]. | ||
| #' @param .lookup (named `character`) A vector of replacement values. The names | ||
| #' are the values in `.data` and the values are the target values. | ||
| #' | ||
| #' @returns A harmonized [factor()]. | ||
| #' @export | ||
| #' | ||
| #' @examples | ||
| #' # Without a spec, harmonize_fct() acts like [base::factor()]. | ||
| #' harmonize_fct(c("a", "b", "c")) | ||
| #' | ||
| #' # Basic harmonization, dropping levels not in the spec | ||
| #' spec <- specify_fct(levels = c("a", "b")) | ||
| #' harmonize_fct(c("a", "b", "c"), .spec = spec) | ||
| #' | ||
| #' # Using a lookup table to recode values | ||
| #' spec2 <- specify_fct(levels = c("fruit", "citrus")) | ||
| #' lookup <- c(apple = "fruit", banana = "fruit", orange = "citrus") | ||
| #' harmonize_fct( | ||
| #' c("apple", "banana", "orange"), | ||
| #' .spec = spec2, | ||
| #' .lookup = lookup | ||
| #' ) | ||
| harmonize_fct <- function(.data, ..., .spec = NULL, .lookup = NULL) { | ||
| rlang::check_dots_empty() | ||
| .data <- stbl::to_chr(.data) | ||
| .spec <- .spec %||% specify_fct() | ||
| .data <- .apply_fct_lookup(.data, .lookup = .lookup) | ||
| return(factor(.data, levels = .spec$levels)) | ||
| } | ||
|
|
||
| #' Apply a lookup table to a character vector | ||
| #' | ||
| #' @inheritParams harmonize_fct | ||
| #' @returns A character vector with values replaced according to the lookup | ||
| #' table. | ||
| #' @keywords internal | ||
| .apply_fct_lookup <- function(.data, .lookup = NULL) { | ||
| .lookup <- stbl::to_chr(.lookup) | ||
| matches <- .data %fin% names(.lookup) | ||
| .data[matches] <- .lookup[.data[matches]] | ||
| return(.data) | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,34 +1,19 @@ | ||
| # Developer note: `specify_fct()` is designed to create a data-less | ||
| # "specification" object. It defines the target state (the levels) for a factor | ||
| # but doesn't hold any actual factor data itself. This is why the constructor | ||
| # internally provides `integer()` as the data component to `S7::new_object()`. | ||
| # | ||
| # In the future, we might use the `hrmn_fct` class to represent actual, | ||
| # harmonized factor data. In that scenario, we would likely create a separate | ||
| # `class_hrmn_fct` object and have `specify_fct()` be a wrapper function that | ||
| # calls the constructor with the empty data. For now, since we don't need the | ||
| # full factor-like class, we are directly defining the `hrmn_fct` class in | ||
| # `specify_fct()` | ||
|
|
||
| #' Specify a factor harmonization | ||
| #' Factor specification | ||
| #' | ||
| #' Create a `hrmn_fct` object that specifies the desired levels for a factor | ||
| #' variable. This 'specification' object does not contain any data itself, only | ||
| #' the rules for harmonization. | ||
| #' Create an object that specifies the desired levels for a factor variable. | ||
| #' This specification object does not contain any data itself, only the rules | ||
| #' for harmonization. | ||
| #' | ||
| #' @param levels (`character`) The allowed values of the factor. | ||
| #' @returns A factor specification, an S7 object of class `hrmn::hrmn_fct`. | ||
| #' @inheritParams .shared_params | ||
| #' | ||
| #' @returns A `hrmn_fct_spec` object that acts as a specification. | ||
| #' @export | ||
| specify_fct <- S7::new_class( | ||
| "hrmn_fct", | ||
| parent = S7::class_factor, | ||
| properties = list( | ||
| levels = S7::class_character | ||
| ), | ||
| constructor = function(levels = character()) { | ||
| S7::new_object( | ||
| integer(), | ||
| levels = levels | ||
| ) | ||
| } | ||
| ) | ||
| #' | ||
| #' @examples | ||
| #' specify_fct(levels = c("a", "b", "c")) | ||
| specify_fct <- function(levels = character()) { | ||
| structure( | ||
| list(levels = stbl::to_chr(levels)), | ||
| class = c("hrmn_fct_spec", "hrmn_spec", "list") | ||
| ) | ||
| } |
This file was deleted.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,90 @@ | ||
| test_that("harmonize_fct() works with empty vector and returns a factor", { | ||
| expect_identical( | ||
| { | ||
| harmonize_fct(factor()) | ||
| }, | ||
| factor() | ||
| ) | ||
| }) | ||
|
|
||
| test_that("harmonize_fct() drops unspecified levels", { | ||
| expect_identical( | ||
| { | ||
| harmonize_fct(factor(c("a", "b")), .spec = specify_fct(levels = "a")) | ||
| }, | ||
| factor(c("a", NA), levels = "a") | ||
| ) | ||
| }) | ||
|
|
||
|
|
||
| test_that("harmonize_fct() errors if .spec is not named", { | ||
| expect_error( | ||
| { | ||
| harmonize_fct(factor(c("a", "b")), specify_fct(levels = "a")) | ||
| }, | ||
| class = "rlib_error_dots_nonempty" | ||
| ) | ||
| }) | ||
|
|
||
| test_that("The first `harmonize_fct()` argument is `.data`", { | ||
| expect_equal( | ||
| rlang::fn_fmls_names(harmonize_fct)[1], | ||
| ".data" | ||
| ) | ||
| }) | ||
|
|
||
| test_that("harmonize_fct() preserves existing NAs", { | ||
| expect_equal( | ||
| { | ||
| harmonize_fct(factor(c("a", "b", NA)), .spec = specify_fct(levels = "a")) | ||
| }, | ||
| factor(c("a", NA, NA), levels = "a") | ||
| ) | ||
| }) | ||
|
|
||
| test_that("harmonize_fct() works with character vectors", { | ||
| expect_equal( | ||
| { | ||
| harmonize_fct(c("a", "b"), .spec = specify_fct(levels = "a")) | ||
| }, | ||
| factor(c("a", NA), levels = "a") | ||
| ) | ||
| }) | ||
|
|
||
| test_that("harmonize_fct() works with an empty spec", { | ||
| expect_equal( | ||
| { | ||
| harmonize_fct( | ||
| factor(c("a", "b")), | ||
| .spec = specify_fct(levels = character()) | ||
| ) | ||
| }, | ||
| factor(c(NA, NA), levels = character()) | ||
| ) | ||
| }) | ||
|
|
||
| test_that("harmonize_fct() uses .lookup table", { | ||
| expect_equal( | ||
| { | ||
| harmonize_fct( | ||
| c("x", "y", "z"), | ||
| .spec = specify_fct(levels = c("a", "b")), | ||
| .lookup = c(x = "a", y = "a", z = "b") | ||
| ) | ||
| }, | ||
| factor(c("a", "a", "b"), levels = c("a", "b")) | ||
| ) | ||
| }) | ||
|
|
||
| test_that("harmonize_fct() .lookup values not in levels become NA", { | ||
| expect_equal( | ||
| { | ||
| harmonize_fct( | ||
| "x", | ||
| .spec = specify_fct(levels = "a"), | ||
| .lookup = c(x = "b") | ||
| ) | ||
| }, | ||
| factor(NA_character_, levels = "a") | ||
| ) | ||
| }) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,14 +1,13 @@ | ||
| test_that("specify_fct() returns an object with the correct class", { | ||
| spec <- specify_fct() | ||
| expect_s3_class( | ||
| spec, | ||
| c("hrmn::hrmn_fct", "factor", "S7_object"), | ||
| specify_fct(), | ||
| c("hrmn_fct_spec", "hrmn_spec", "list"), | ||
| exact = TRUE | ||
| ) | ||
| }) | ||
|
|
||
| test_that("specify_fct() stores the levels", { | ||
| lvls <- c("a", "b", "c") | ||
| spec <- specify_fct(levels = lvls) | ||
| expect_equal(spec@levels, lvls) | ||
| expect_equal(spec$levels, lvls) | ||
| }) |
Uh oh!
There was an error while loading. Please reload this page.