Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: hrmn
Title: Harmonize Datasets
Version: 0.0.0.9001
Version: 0.0.0.9002
Authors@R:
person("Jon", "Harmon", , "jonthegeek@gmail.com", role = c("aut", "cre"),
comment = c(ORCID = "0000-0003-4781-4346"))
Expand All @@ -22,4 +22,6 @@ Language: en-US
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.3
Imports:
S7
fastmatch,
rlang,
stbl
4 changes: 3 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# Generated by roxygen2: do not edit by hand

export(harmonize_fct)
export(specify_fct)
if (getRversion() < "4.3.0") importFrom("S7", "@")
importFrom(fastmatch,"%fin%")
importFrom(rlang,"%||%")
9 changes: 9 additions & 0 deletions R/aaa-shared_params.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#' Parameters used in multiple functions
#'
#' Reused parameter definitions are gathered here for easier editing.
#'
#' @param levels (`character`) The allowed values of the factor.
#'
#' @name .shared_params
#' @keywords internal
NULL
50 changes: 50 additions & 0 deletions R/harmonize_fct.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#' Harmonize a factor
#'
#' @param .data (`character` or coercible to `character`) A vector to harmonize
#' to the specified factor.
#' @inheritParams .shared_params
#' @inheritParams rlang::args_dots_empty
#' @param .spec (`hrmn_fct_spec`) A harmonization specification from
#' [specify_fct()].
#' @param .lookup (named `character`) A vector of replacement values. The names
#' are the values in `.data` and the values are the target values.
#'
#' @returns A harmonized [factor()].
#' @export
#'
#' @examples
#' # Without a spec, harmonize_fct() acts like [base::factor()].
#' harmonize_fct(c("a", "b", "c"))
#'
#' # Basic harmonization, dropping levels not in the spec
#' spec <- specify_fct(levels = c("a", "b"))
#' harmonize_fct(c("a", "b", "c"), .spec = spec)
#'
#' # Using a lookup table to recode values
#' spec2 <- specify_fct(levels = c("fruit", "citrus"))
#' lookup <- c(apple = "fruit", banana = "fruit", orange = "citrus")
#' harmonize_fct(
#' c("apple", "banana", "orange"),
#' .spec = spec2,
#' .lookup = lookup
#' )
harmonize_fct <- function(.data, ..., .spec = NULL, .lookup = NULL) {
rlang::check_dots_empty()
.data <- stbl::to_chr(.data)
.spec <- .spec %||% specify_fct()
Comment thread
jonthegeek marked this conversation as resolved.
.data <- .apply_fct_lookup(.data, .lookup = .lookup)
return(factor(.data, levels = .spec$levels))
}

#' Apply a lookup table to a character vector
#'
#' @inheritParams harmonize_fct
#' @returns A character vector with values replaced according to the lookup
#' table.
#' @keywords internal
.apply_fct_lookup <- function(.data, .lookup = NULL) {
.lookup <- stbl::to_chr(.lookup)
matches <- .data %fin% names(.lookup)
.data[matches] <- .lookup[.data[matches]]
return(.data)
}
2 changes: 2 additions & 0 deletions R/hrmn-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,7 @@
"_PACKAGE"

## usethis namespace: start
#' @importFrom fastmatch %fin%
#' @importFrom rlang %||%
## usethis namespace: end
NULL
47 changes: 16 additions & 31 deletions R/specify_fct.R
Original file line number Diff line number Diff line change
@@ -1,34 +1,19 @@
# Developer note: `specify_fct()` is designed to create a data-less
# "specification" object. It defines the target state (the levels) for a factor
# but doesn't hold any actual factor data itself. This is why the constructor
# internally provides `integer()` as the data component to `S7::new_object()`.
#
# In the future, we might use the `hrmn_fct` class to represent actual,
# harmonized factor data. In that scenario, we would likely create a separate
# `class_hrmn_fct` object and have `specify_fct()` be a wrapper function that
# calls the constructor with the empty data. For now, since we don't need the
# full factor-like class, we are directly defining the `hrmn_fct` class in
# `specify_fct()`

#' Specify a factor harmonization
#' Factor specification
#'
#' Create a `hrmn_fct` object that specifies the desired levels for a factor
#' variable. This 'specification' object does not contain any data itself, only
#' the rules for harmonization.
#' Create an object that specifies the desired levels for a factor variable.
#' This specification object does not contain any data itself, only the rules
#' for harmonization.
#'
#' @param levels (`character`) The allowed values of the factor.
#' @returns A factor specification, an S7 object of class `hrmn::hrmn_fct`.
#' @inheritParams .shared_params
#'
#' @returns A `hrmn_fct_spec` object that acts as a specification.
#' @export
specify_fct <- S7::new_class(
"hrmn_fct",
parent = S7::class_factor,
properties = list(
levels = S7::class_character
),
constructor = function(levels = character()) {
S7::new_object(
integer(),
levels = levels
)
}
)
#'
#' @examples
#' specify_fct(levels = c("a", "b", "c"))
specify_fct <- function(levels = character()) {
structure(
list(levels = stbl::to_chr(levels)),
class = c("hrmn_fct_spec", "hrmn_spec", "list")
)
}
3 changes: 0 additions & 3 deletions R/zzz.R

This file was deleted.

23 changes: 23 additions & 0 deletions man/dot-apply_fct_lookup.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions man/dot-shared_params.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

43 changes: 43 additions & 0 deletions man/harmonize_fct.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 8 additions & 5 deletions man/specify_fct.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

90 changes: 90 additions & 0 deletions tests/testthat/test-harmonize_fct.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
test_that("harmonize_fct() works with empty vector and returns a factor", {
expect_identical(
{
harmonize_fct(factor())
},
factor()
)
})

test_that("harmonize_fct() drops unspecified levels", {
expect_identical(
{
harmonize_fct(factor(c("a", "b")), .spec = specify_fct(levels = "a"))
},
factor(c("a", NA), levels = "a")
)
})


test_that("harmonize_fct() errors if .spec is not named", {
expect_error(
{
harmonize_fct(factor(c("a", "b")), specify_fct(levels = "a"))
},
class = "rlib_error_dots_nonempty"
)
})

test_that("The first `harmonize_fct()` argument is `.data`", {
expect_equal(
rlang::fn_fmls_names(harmonize_fct)[1],
".data"
)
})

test_that("harmonize_fct() preserves existing NAs", {
expect_equal(
{
harmonize_fct(factor(c("a", "b", NA)), .spec = specify_fct(levels = "a"))
},
factor(c("a", NA, NA), levels = "a")
)
})

test_that("harmonize_fct() works with character vectors", {
expect_equal(
{
harmonize_fct(c("a", "b"), .spec = specify_fct(levels = "a"))
},
factor(c("a", NA), levels = "a")
)
})

test_that("harmonize_fct() works with an empty spec", {
expect_equal(
{
harmonize_fct(
factor(c("a", "b")),
.spec = specify_fct(levels = character())
)
},
factor(c(NA, NA), levels = character())
)
})

test_that("harmonize_fct() uses .lookup table", {
expect_equal(
{
harmonize_fct(
c("x", "y", "z"),
.spec = specify_fct(levels = c("a", "b")),
.lookup = c(x = "a", y = "a", z = "b")
)
},
factor(c("a", "a", "b"), levels = c("a", "b"))
)
})

test_that("harmonize_fct() .lookup values not in levels become NA", {
expect_equal(
{
harmonize_fct(
"x",
.spec = specify_fct(levels = "a"),
.lookup = c(x = "b")
)
},
factor(NA_character_, levels = "a")
)
})
7 changes: 3 additions & 4 deletions tests/testthat/test-specify_fct.R
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
test_that("specify_fct() returns an object with the correct class", {
spec <- specify_fct()
expect_s3_class(
spec,
c("hrmn::hrmn_fct", "factor", "S7_object"),
specify_fct(),
c("hrmn_fct_spec", "hrmn_spec", "list"),
exact = TRUE
)
})

test_that("specify_fct() stores the levels", {
lvls <- c("a", "b", "c")
spec <- specify_fct(levels = lvls)
expect_equal(spec@levels, lvls)
expect_equal(spec$levels, lvls)
})