Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: datawrap
Title: Final Steps for Dataset Preparation
Version: 0.0.0.9000
Version: 0.0.0.9001
Authors@R:
person("Jon", "Harmon", , "jonthegeek@gmail.com", role = c("aut", "cre"),
comment = c(ORCID = "0000-0003-4781-4346"))
Expand All @@ -10,17 +10,22 @@ License: MIT + file LICENSE
URL: https://wranglezone.github.io/datawrap/,
https://github.com/wranglezone/datawrap
BugReports: https://github.com/wranglezone/datawrap/issues
Depends:
R (>= 4.1)
Imports:
knitr,
purrr,
rlang,
stbl,
stringr,
tibble,
vctrs
Suggests:
testthat (>= 3.0.0),
usethis,
withr
Remotes:
stbl=wranglezone/stbl
Config/testthat/edition: 3
Encoding: UTF-8
Language: en-US
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@

export(create_dataset_dictionary)
export(describe_dataset)
export(finalize_integers)
export(write_dataset_dictionary)
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# datawrap (development version)

* `finalize_integers()` converts integerish columns to integer for efficient storage (@copilot & @jonthegeek, #10).

# datawrap 0.0.0.9000

* Initial release.
33 changes: 33 additions & 0 deletions R/finalize_integers.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#' Downcast integerish columns to integer
#'
#' Iterates over all columns (or list elements) in `dataset` and converts any
#' non-logical column whose values can all be represented as integers without
#' losing any information to integers. Columns that contain non-integerish data
#' are left unchanged.
#'
#' @param dataset (`data.frame`, `list`, or `NULL`) The dataset to process.
#'
#' @returns The `dataset` with all integerish columns converted to integer (or
#' `NULL` if `dataset` is `NULL`).
#' @export
#'
#' @examples
#' df <- data.frame(x = c(1.0, 2.0, 3.0), y = c(1.1, 2.2, 3.3))
#' finalize_integers(df)
finalize_integers <- function(dataset) {
if (!is.null(dataset) && !is.list(dataset)) {
stbl::pkg_abort(
"datawrap",
"{.arg dataset} must be a {.cls data.frame}, {.cls list}, or {.cls NULL}.",
c("invalid_dataset", "invalid_argument")
)
}
# NOTE: This function alone does not warrant adding dplyr to imports, but it
# could be refactored to use dplyr::mutate, dplyr::across, and dplyr::where
# if dplyr is added as a dependency elsewhere in the package.
int_ish_cols <- purrr::map_lgl(dataset, \(x) {
!is.logical(x) && stbl::is_int_ish(x)
})
dataset[int_ish_cols] <- purrr::map(dataset[int_ish_cols], stbl::to_int)
dataset
}
2 changes: 1 addition & 1 deletion man/datawrap-package.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 25 additions & 0 deletions man/finalize_integers.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

68 changes: 68 additions & 0 deletions tests/testthat/test-finalize_integers.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
test_that("finalize_integers() converts integerish double columns to integer (#10)", {
df <- data.frame(x = c(1.0, 2.0, 3.0), y = c(1.1, 2.2, 3.3))
result <- finalize_integers(df)
expect_type(result$x, "integer")
expect_type(result$y, "double")
})

test_that("finalize_integers() converts integer-valued double elements in a list to integer (#10)", {
lst <- list(x = c(1.0, 2.0, 3.0), y = c(1.1, 2.2, 3.3))
result <- finalize_integers(lst)
expect_type(result$x, "integer")
expect_type(result$y, "double")
})

test_that("finalize_integers() converts integerish character columns to integer (#10)", {
df <- data.frame(x = as.character(1:3), y = as.character(c(1.1, 2.2, 3.3)))
result <- finalize_integers(df)
expect_type(result$x, "integer")
expect_type(result$y, "character")
})

test_that("finalize_integers() leaves non-integerish columns unchanged (#10)", {
df <- data.frame(
x = c(1.0, 2.0),
y = letters[1:2],
z = TRUE,
stringsAsFactors = FALSE
)
result <- finalize_integers(df)
expect_type(result$y, "character")
expect_type(result$z, "logical")
})

test_that("finalize_integers() preserves NA values in converted columns (#10)", {
df <- data.frame(x = c(1.0, NA, 3.0))
result <- finalize_integers(df)
expect_type(result$x, "integer")
expect_true(is.na(result$x[[2]]))
})

test_that("finalize_integers() preserves the class of the input (#10)", {
tbl <- tibble::tibble(x = c(1.0, 2.0))
result <- finalize_integers(tbl)
expect_identical(class(result), class(tbl))
})

test_that("finalize_integers() errors if dataset is not a data.frame, list, or NULL (#10)", {
stbl::expect_pkg_error_classes(
finalize_integers("not a dataset"),
"datawrap",
"invalid_dataset",
"invalid_argument"
)
})

test_that("finalize_integers() handles a dataset with no double columns (#10)", {
df <- data.frame(x = 1L, y = "a", stringsAsFactors = FALSE)
expect_identical(finalize_integers(df), df)
})

test_that("finalize_integers() handles an empty data frame (#10)", {
df <- data.frame()
expect_identical(finalize_integers(df), df)
})

test_that("finalize_integers() handles NULL (#10)", {
expect_null(finalize_integers(NULL))
})