From 1cff5108e1130bee79e82bfdada3fdd439bad280 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 10 Mar 2026 12:42:47 +0000 Subject: [PATCH 1/8] Initial plan From 8f8022ac061aefc7e1642c86982eeb7b83a5280f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 10 Mar 2026 12:50:47 +0000 Subject: [PATCH 2/8] feat: add finalize_doubles() (#10) Co-authored-by: jonthegeek <33983824+jonthegeek@users.noreply.github.com> --- DESCRIPTION | 3 ++ NAMESPACE | 1 + NEWS.md | 4 ++ R/finalize_doubles.R | 22 +++++++++++ man/datawrap-package.Rd | 2 +- man/finalize_doubles.Rd | 25 +++++++++++++ tests/testthat/test-finalize_doubles.R | 51 ++++++++++++++++++++++++++ 7 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 NEWS.md create mode 100644 R/finalize_doubles.R create mode 100644 man/finalize_doubles.Rd create mode 100644 tests/testthat/test-finalize_doubles.R diff --git a/DESCRIPTION b/DESCRIPTION index 08cdde2..759bd67 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -14,9 +14,12 @@ Imports: knitr, purrr, rlang, + stbl, stringr, tibble, vctrs +Remotes: + stbl=wranglezone/stbl Suggests: testthat (>= 3.0.0), usethis, diff --git a/NAMESPACE b/NAMESPACE index 385e453..9e71e1d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,4 +2,5 @@ export(create_dataset_dictionary) export(describe_dataset) +export(finalize_doubles) export(write_dataset_dictionary) diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 0000000..3ea0dfc --- /dev/null +++ b/NEWS.md @@ -0,0 +1,4 @@ +# datawrap (development version) + +* `finalize_doubles()` converts integer-valued double columns to integer for + efficient storage (@copilot, #10). diff --git a/R/finalize_doubles.R b/R/finalize_doubles.R new file mode 100644 index 0000000..1bef8ba --- /dev/null +++ b/R/finalize_doubles.R @@ -0,0 +1,22 @@ +#' Downcast integer-valued double columns to integer +#' +#' Iterates over all columns in `dataset` and converts any double column whose +#' values can all be represented as integers (as determined by +#' `stbl::is_int_ish()`) to integer using `stbl::to_int()`. Columns that are +#' not doubles, or that contain non-integer-valued data, are left unchanged. +#' +#' @param dataset (`data.frame`) The dataset to process. +#' +#' @returns The `dataset` with all safely-downcasted double columns converted to +#' integer. +#' @export +#' +#' @examples +#' df <- data.frame(x = c(1.0, 2.0, 3.0), y = c(1.1, 2.2, 3.3)) +#' finalize_doubles(df) +finalize_doubles <- function(dataset) { + int_ish_cols <- purrr::map_lgl(dataset, is.double) & + purrr::map_lgl(dataset, stbl::is_int_ish) + dataset[int_ish_cols] <- lapply(dataset[int_ish_cols], stbl::to_int) + dataset +} diff --git a/man/datawrap-package.Rd b/man/datawrap-package.Rd index 82d7860..533de11 100644 --- a/man/datawrap-package.Rd +++ b/man/datawrap-package.Rd @@ -11,8 +11,8 @@ Helpers to create data dictionaries, document package datasets, and apply finish \seealso{ Useful links: \itemize{ - \item \url{https://github.com/wranglezone/datawrap} \item \url{https://wranglezone.github.io/datawrap/} + \item \url{https://github.com/wranglezone/datawrap} \item Report bugs at \url{https://github.com/wranglezone/datawrap/issues} } diff --git a/man/finalize_doubles.Rd b/man/finalize_doubles.Rd new file mode 100644 index 0000000..6293948 --- /dev/null +++ b/man/finalize_doubles.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/finalize_doubles.R +\name{finalize_doubles} +\alias{finalize_doubles} +\title{Downcast integer-valued double columns to integer} +\usage{ +finalize_doubles(dataset) +} +\arguments{ +\item{dataset}{(\code{data.frame}) The dataset to process.} +} +\value{ +The \code{dataset} with all safely-downcasted double columns converted to +integer. +} +\description{ +Iterates over all columns in \code{dataset} and converts any double column whose +values can all be represented as integers (as determined by +\code{stbl::is_int_ish()}) to integer using \code{stbl::to_int()}. Columns that are +not doubles, or that contain non-integer-valued data, are left unchanged. +} +\examples{ +df <- data.frame(x = c(1.0, 2.0, 3.0), y = c(1.1, 2.2, 3.3)) +finalize_doubles(df) +} diff --git a/tests/testthat/test-finalize_doubles.R b/tests/testthat/test-finalize_doubles.R new file mode 100644 index 0000000..48c70ec --- /dev/null +++ b/tests/testthat/test-finalize_doubles.R @@ -0,0 +1,51 @@ +# finalize_doubles() ----------------------------------------------------------- + +test_that("finalize_doubles() converts integer-valued double columns to integer (#10)", { + df <- data.frame(x = c(1.0, 2.0, 3.0), y = c(1.1, 2.2, 3.3)) + result <- finalize_doubles(df) + expect_type(result$x, "integer") + expect_type(result$y, "double") +}) + +test_that("finalize_doubles() leaves non-double columns unchanged (#10)", { + df <- data.frame( + x = c(1.0, 2.0), + y = letters[1:2], + z = TRUE, + stringsAsFactors = FALSE + ) + result <- finalize_doubles(df) + expect_type(result$y, "character") + expect_type(result$z, "logical") +}) + +test_that("finalize_doubles() preserves NA values in converted columns (#10)", { + df <- data.frame(x = c(1.0, NA, 3.0)) + result <- finalize_doubles(df) + expect_type(result$x, "integer") + expect_true(is.na(result$x[[2]])) +}) + +test_that("finalize_doubles() does not convert already-integer columns (#10)", { + df <- data.frame(x = 1L) + result <- finalize_doubles(df) + expect_type(result$x, "integer") +}) + +test_that("finalize_doubles() returns a data frame (#10)", { + df <- data.frame(x = c(1.0, 2.0)) + result <- finalize_doubles(df) + expect_s3_class(result, "data.frame") +}) + +test_that("finalize_doubles() handles a dataset with no double columns (#10)", { + df <- data.frame(x = 1L, y = "a", stringsAsFactors = FALSE) + result <- finalize_doubles(df) + expect_identical(result, df) +}) + +test_that("finalize_doubles() handles an empty data frame (#10)", { + df <- data.frame() + result <- finalize_doubles(df) + expect_identical(result, df) +}) From c19c394b88c2b732ff6d11daeb05dc4a9b46aa47 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 10 Mar 2026 13:06:40 +0000 Subject: [PATCH 3/8] fix: address review feedback on finalize_doubles() Co-authored-by: jonthegeek <33983824+jonthegeek@users.noreply.github.com> --- R/finalize_doubles.R | 9 ++++++++- tests/testthat/test-finalize_doubles.R | 18 ++++++++++-------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/R/finalize_doubles.R b/R/finalize_doubles.R index 1bef8ba..76cc788 100644 --- a/R/finalize_doubles.R +++ b/R/finalize_doubles.R @@ -15,8 +15,15 @@ #' df <- data.frame(x = c(1.0, 2.0, 3.0), y = c(1.1, 2.2, 3.3)) #' finalize_doubles(df) finalize_doubles <- function(dataset) { + if (!is.data.frame(dataset)) { + stbl::pkg_abort( + "datawrap", + "{.arg dataset} must be a {.cls data.frame}.", + "invalid_argument" + ) + } int_ish_cols <- purrr::map_lgl(dataset, is.double) & purrr::map_lgl(dataset, stbl::is_int_ish) - dataset[int_ish_cols] <- lapply(dataset[int_ish_cols], stbl::to_int) + dataset[int_ish_cols] <- purrr::map(dataset[int_ish_cols], stbl::to_int) dataset } diff --git a/tests/testthat/test-finalize_doubles.R b/tests/testthat/test-finalize_doubles.R index 48c70ec..c958b01 100644 --- a/tests/testthat/test-finalize_doubles.R +++ b/tests/testthat/test-finalize_doubles.R @@ -26,16 +26,18 @@ test_that("finalize_doubles() preserves NA values in converted columns (#10)", { expect_true(is.na(result$x[[2]])) }) -test_that("finalize_doubles() does not convert already-integer columns (#10)", { - df <- data.frame(x = 1L) - result <- finalize_doubles(df) - expect_type(result$x, "integer") +test_that("finalize_doubles() preserves the class of the input (#10)", { + tbl <- tibble::tibble(x = c(1.0, 2.0)) + result <- finalize_doubles(tbl) + expect_identical(class(result), class(tbl)) }) -test_that("finalize_doubles() returns a data frame (#10)", { - df <- data.frame(x = c(1.0, 2.0)) - result <- finalize_doubles(df) - expect_s3_class(result, "data.frame") +test_that("finalize_doubles() errors if dataset is not a data.frame (#10)", { + stbl::expect_pkg_error_classes( + finalize_doubles(list(x = 1.0)), + "datawrap", + "invalid_argument" + ) }) test_that("finalize_doubles() handles a dataset with no double columns (#10)", { From 63b5e3d2af1bc2278c2c3e9c362917be768802d6 Mon Sep 17 00:00:00 2001 From: Jon Harmon Date: Tue, 10 Mar 2026 09:17:46 -0500 Subject: [PATCH 4/8] Tweaks that are harder to explain than to implement. --- DESCRIPTION | 4 ++-- R/finalize_doubles.R | 14 +++++++------- tests/testthat/test-finalize_doubles.R | 22 +++++++++++++++------- 3 files changed, 24 insertions(+), 16 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 759bd67..5bce19f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -18,12 +18,12 @@ Imports: stringr, tibble, vctrs -Remotes: - stbl=wranglezone/stbl Suggests: testthat (>= 3.0.0), usethis, withr +Remotes: + stbl=wranglezone/stbl Config/testthat/edition: 3 Encoding: UTF-8 Language: en-US diff --git a/R/finalize_doubles.R b/R/finalize_doubles.R index 76cc788..5923b6f 100644 --- a/R/finalize_doubles.R +++ b/R/finalize_doubles.R @@ -1,11 +1,11 @@ #' Downcast integer-valued double columns to integer #' -#' Iterates over all columns in `dataset` and converts any double column whose -#' values can all be represented as integers (as determined by -#' `stbl::is_int_ish()`) to integer using `stbl::to_int()`. Columns that are -#' not doubles, or that contain non-integer-valued data, are left unchanged. +#' Iterates over all columns or elements in `dataset` and converts any double +#' column whose values can all be represented as integers to integers. Columns +#' that are not doubles, or that contain non-integer-valued data, are left +#' unchanged. #' -#' @param dataset (`data.frame`) The dataset to process. +#' @param dataset (`data.frame` or `list`) The dataset to process. #' #' @returns The `dataset` with all safely-downcasted double columns converted to #' integer. @@ -15,11 +15,11 @@ #' df <- data.frame(x = c(1.0, 2.0, 3.0), y = c(1.1, 2.2, 3.3)) #' finalize_doubles(df) finalize_doubles <- function(dataset) { - if (!is.data.frame(dataset)) { + if (!is.null(dataset) && !is.list(dataset)) { stbl::pkg_abort( "datawrap", "{.arg dataset} must be a {.cls data.frame}.", - "invalid_argument" + c("invalid_dataset", "invalid_argument") ) } int_ish_cols <- purrr::map_lgl(dataset, is.double) & diff --git a/tests/testthat/test-finalize_doubles.R b/tests/testthat/test-finalize_doubles.R index c958b01..1d9532d 100644 --- a/tests/testthat/test-finalize_doubles.R +++ b/tests/testthat/test-finalize_doubles.R @@ -1,5 +1,3 @@ -# finalize_doubles() ----------------------------------------------------------- - test_that("finalize_doubles() converts integer-valued double columns to integer (#10)", { df <- data.frame(x = c(1.0, 2.0, 3.0), y = c(1.1, 2.2, 3.3)) result <- finalize_doubles(df) @@ -7,6 +5,13 @@ test_that("finalize_doubles() converts integer-valued double columns to integer expect_type(result$y, "double") }) +test_that("finalize_doubles() converts integer-valued double elements in a list to integer (#10)", { + lst <- list(x = c(1.0, 2.0, 3.0), y = c(1.1, 2.2, 3.3)) + result <- finalize_doubles(lst) + expect_type(result$x, "integer") + expect_type(result$y, "double") +}) + test_that("finalize_doubles() leaves non-double columns unchanged (#10)", { df <- data.frame( x = c(1.0, 2.0), @@ -34,20 +39,23 @@ test_that("finalize_doubles() preserves the class of the input (#10)", { test_that("finalize_doubles() errors if dataset is not a data.frame (#10)", { stbl::expect_pkg_error_classes( - finalize_doubles(list(x = 1.0)), + finalize_doubles("not a dataset"), "datawrap", + "invalid_dataset", "invalid_argument" ) }) test_that("finalize_doubles() handles a dataset with no double columns (#10)", { df <- data.frame(x = 1L, y = "a", stringsAsFactors = FALSE) - result <- finalize_doubles(df) - expect_identical(result, df) + expect_identical(finalize_doubles(df), df) }) test_that("finalize_doubles() handles an empty data frame (#10)", { df <- data.frame() - result <- finalize_doubles(df) - expect_identical(result, df) + expect_identical(finalize_doubles(df), df) +}) + +test_that("finalize_doubles() handles NULL (#10)", { + expect_null(finalize_doubles(NULL)) }) From 352157dd5524655d9df37cb283014ee64e469d60 Mon Sep 17 00:00:00 2001 From: Jon Harmon Date: Tue, 10 Mar 2026 09:29:55 -0500 Subject: [PATCH 5/8] Rename (and reinterpret) as `finalize_integers()`. --- NAMESPACE | 2 +- R/finalize_doubles.R | 29 ----------- R/finalize_integers.R | 29 +++++++++++ man/finalize_doubles.Rd | 25 --------- man/finalize_integers.Rd | 24 +++++++++ tests/testthat/test-finalize_doubles.R | 61 ---------------------- tests/testthat/test-finalize_integers.R | 68 +++++++++++++++++++++++++ 7 files changed, 122 insertions(+), 116 deletions(-) delete mode 100644 R/finalize_doubles.R create mode 100644 R/finalize_integers.R delete mode 100644 man/finalize_doubles.Rd create mode 100644 man/finalize_integers.Rd delete mode 100644 tests/testthat/test-finalize_doubles.R create mode 100644 tests/testthat/test-finalize_integers.R diff --git a/NAMESPACE b/NAMESPACE index 9e71e1d..3437290 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,5 +2,5 @@ export(create_dataset_dictionary) export(describe_dataset) -export(finalize_doubles) +export(finalize_integers) export(write_dataset_dictionary) diff --git a/R/finalize_doubles.R b/R/finalize_doubles.R deleted file mode 100644 index 5923b6f..0000000 --- a/R/finalize_doubles.R +++ /dev/null @@ -1,29 +0,0 @@ -#' Downcast integer-valued double columns to integer -#' -#' Iterates over all columns or elements in `dataset` and converts any double -#' column whose values can all be represented as integers to integers. Columns -#' that are not doubles, or that contain non-integer-valued data, are left -#' unchanged. -#' -#' @param dataset (`data.frame` or `list`) The dataset to process. -#' -#' @returns The `dataset` with all safely-downcasted double columns converted to -#' integer. -#' @export -#' -#' @examples -#' df <- data.frame(x = c(1.0, 2.0, 3.0), y = c(1.1, 2.2, 3.3)) -#' finalize_doubles(df) -finalize_doubles <- function(dataset) { - if (!is.null(dataset) && !is.list(dataset)) { - stbl::pkg_abort( - "datawrap", - "{.arg dataset} must be a {.cls data.frame}.", - c("invalid_dataset", "invalid_argument") - ) - } - int_ish_cols <- purrr::map_lgl(dataset, is.double) & - purrr::map_lgl(dataset, stbl::is_int_ish) - dataset[int_ish_cols] <- purrr::map(dataset[int_ish_cols], stbl::to_int) - dataset -} diff --git a/R/finalize_integers.R b/R/finalize_integers.R new file mode 100644 index 0000000..ae511e7 --- /dev/null +++ b/R/finalize_integers.R @@ -0,0 +1,29 @@ +#' Downcast integerish columns to integer +#' +#' Iterates over all columns (or list elements) in `dataset` and converts any +#' non-logical column whose values can all be represented as integers without +#' losing any information to integers. Columns that contain non-integerish data +#' are left unchanged. +#' +#' @param dataset (`data.frame` or `list`) The dataset to process. +#' +#' @returns The `dataset` with all integerish columns converted to integer. +#' @export +#' +#' @examples +#' df <- data.frame(x = c(1.0, 2.0, 3.0), y = c(1.1, 2.2, 3.3)) +#' finalize_integers(df) +finalize_integers <- function(dataset) { + if (!is.null(dataset) && !is.list(dataset)) { + stbl::pkg_abort( + "datawrap", + "{.arg dataset} must be a {.cls data.frame}.", + c("invalid_dataset", "invalid_argument") + ) + } + int_ish_cols <- purrr::map_lgl(dataset, \(x) { + !is.logical(x) && stbl::is_int_ish(x) + }) + dataset[int_ish_cols] <- purrr::map(dataset[int_ish_cols], stbl::to_int) + dataset +} diff --git a/man/finalize_doubles.Rd b/man/finalize_doubles.Rd deleted file mode 100644 index 6293948..0000000 --- a/man/finalize_doubles.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/finalize_doubles.R -\name{finalize_doubles} -\alias{finalize_doubles} -\title{Downcast integer-valued double columns to integer} -\usage{ -finalize_doubles(dataset) -} -\arguments{ -\item{dataset}{(\code{data.frame}) The dataset to process.} -} -\value{ -The \code{dataset} with all safely-downcasted double columns converted to -integer. -} -\description{ -Iterates over all columns in \code{dataset} and converts any double column whose -values can all be represented as integers (as determined by -\code{stbl::is_int_ish()}) to integer using \code{stbl::to_int()}. Columns that are -not doubles, or that contain non-integer-valued data, are left unchanged. -} -\examples{ -df <- data.frame(x = c(1.0, 2.0, 3.0), y = c(1.1, 2.2, 3.3)) -finalize_doubles(df) -} diff --git a/man/finalize_integers.Rd b/man/finalize_integers.Rd new file mode 100644 index 0000000..9612c35 --- /dev/null +++ b/man/finalize_integers.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/finalize_integers.R +\name{finalize_integers} +\alias{finalize_integers} +\title{Downcast integerish columns to integer} +\usage{ +finalize_integers(dataset) +} +\arguments{ +\item{dataset}{(\code{data.frame} or \code{list}) The dataset to process.} +} +\value{ +The \code{dataset} with all integerish columns converted to integer. +} +\description{ +Iterates over all columns (or list elements) in \code{dataset} and converts any +non-logical column whose values can all be represented as integers without +losing any information to integers. Columns that contain non-integerish data +are left unchanged. +} +\examples{ +df <- data.frame(x = c(1.0, 2.0, 3.0), y = c(1.1, 2.2, 3.3)) +finalize_integers(df) +} diff --git a/tests/testthat/test-finalize_doubles.R b/tests/testthat/test-finalize_doubles.R deleted file mode 100644 index 1d9532d..0000000 --- a/tests/testthat/test-finalize_doubles.R +++ /dev/null @@ -1,61 +0,0 @@ -test_that("finalize_doubles() converts integer-valued double columns to integer (#10)", { - df <- data.frame(x = c(1.0, 2.0, 3.0), y = c(1.1, 2.2, 3.3)) - result <- finalize_doubles(df) - expect_type(result$x, "integer") - expect_type(result$y, "double") -}) - -test_that("finalize_doubles() converts integer-valued double elements in a list to integer (#10)", { - lst <- list(x = c(1.0, 2.0, 3.0), y = c(1.1, 2.2, 3.3)) - result <- finalize_doubles(lst) - expect_type(result$x, "integer") - expect_type(result$y, "double") -}) - -test_that("finalize_doubles() leaves non-double columns unchanged (#10)", { - df <- data.frame( - x = c(1.0, 2.0), - y = letters[1:2], - z = TRUE, - stringsAsFactors = FALSE - ) - result <- finalize_doubles(df) - expect_type(result$y, "character") - expect_type(result$z, "logical") -}) - -test_that("finalize_doubles() preserves NA values in converted columns (#10)", { - df <- data.frame(x = c(1.0, NA, 3.0)) - result <- finalize_doubles(df) - expect_type(result$x, "integer") - expect_true(is.na(result$x[[2]])) -}) - -test_that("finalize_doubles() preserves the class of the input (#10)", { - tbl <- tibble::tibble(x = c(1.0, 2.0)) - result <- finalize_doubles(tbl) - expect_identical(class(result), class(tbl)) -}) - -test_that("finalize_doubles() errors if dataset is not a data.frame (#10)", { - stbl::expect_pkg_error_classes( - finalize_doubles("not a dataset"), - "datawrap", - "invalid_dataset", - "invalid_argument" - ) -}) - -test_that("finalize_doubles() handles a dataset with no double columns (#10)", { - df <- data.frame(x = 1L, y = "a", stringsAsFactors = FALSE) - expect_identical(finalize_doubles(df), df) -}) - -test_that("finalize_doubles() handles an empty data frame (#10)", { - df <- data.frame() - expect_identical(finalize_doubles(df), df) -}) - -test_that("finalize_doubles() handles NULL (#10)", { - expect_null(finalize_doubles(NULL)) -}) diff --git a/tests/testthat/test-finalize_integers.R b/tests/testthat/test-finalize_integers.R new file mode 100644 index 0000000..3a20e97 --- /dev/null +++ b/tests/testthat/test-finalize_integers.R @@ -0,0 +1,68 @@ +test_that("finalize_integers() converts integerish double columns to integer (#10)", { + df <- data.frame(x = c(1.0, 2.0, 3.0), y = c(1.1, 2.2, 3.3)) + result <- finalize_integers(df) + expect_type(result$x, "integer") + expect_type(result$y, "double") +}) + +test_that("finalize_integers() converts integer-valued double elements in a list to integer (#10)", { + lst <- list(x = c(1.0, 2.0, 3.0), y = c(1.1, 2.2, 3.3)) + result <- finalize_integers(lst) + expect_type(result$x, "integer") + expect_type(result$y, "double") +}) + +test_that("finalize_integers() converts integerish character columns to integer (#10)", { + df <- data.frame(x = as.character(1:3), y = as.character(c(1.1, 2.2, 3.3))) + result <- finalize_integers(df) + expect_type(result$x, "integer") + expect_type(result$y, "character") +}) + +test_that("finalize_integers() leaves non-integerish columns unchanged (#10)", { + df <- data.frame( + x = c(1.0, 2.0), + y = letters[1:2], + z = TRUE, + stringsAsFactors = FALSE + ) + result <- finalize_integers(df) + expect_type(result$y, "character") + expect_type(result$z, "logical") +}) + +test_that("finalize_integers() preserves NA values in converted columns (#10)", { + df <- data.frame(x = c(1.0, NA, 3.0)) + result <- finalize_integers(df) + expect_type(result$x, "integer") + expect_true(is.na(result$x[[2]])) +}) + +test_that("finalize_integers() preserves the class of the input (#10)", { + tbl <- tibble::tibble(x = c(1.0, 2.0)) + result <- finalize_integers(tbl) + expect_identical(class(result), class(tbl)) +}) + +test_that("finalize_integers() errors if dataset is not a data.frame or list (#10)", { + stbl::expect_pkg_error_classes( + finalize_integers("not a dataset"), + "datawrap", + "invalid_dataset", + "invalid_argument" + ) +}) + +test_that("finalize_integers() handles a dataset with no double columns (#10)", { + df <- data.frame(x = 1L, y = "a", stringsAsFactors = FALSE) + expect_identical(finalize_integers(df), df) +}) + +test_that("finalize_integers() handles an empty data frame (#10)", { + df <- data.frame() + expect_identical(finalize_integers(df), df) +}) + +test_that("finalize_integers() handles NULL (#10)", { + expect_null(finalize_integers(NULL)) +}) From 564b2c780a769be37cd1a574cd9d5cc10485c51a Mon Sep 17 00:00:00 2001 From: Jon Harmon Date: Tue, 10 Mar 2026 09:35:31 -0500 Subject: [PATCH 6/8] Make R CMD check happy about `NEWS.md` --- DESCRIPTION | 4 +++- NEWS.md | 7 +++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5bce19f..083fc22 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: datawrap Title: Final Steps for Dataset Preparation -Version: 0.0.0.9000 +Version: 0.0.0.9001 Authors@R: person("Jon", "Harmon", , "jonthegeek@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-4781-4346")) @@ -10,6 +10,8 @@ License: MIT + file LICENSE URL: https://wranglezone.github.io/datawrap/, https://github.com/wranglezone/datawrap BugReports: https://github.com/wranglezone/datawrap/issues +Depends: + R (>= 4.1) Imports: knitr, purrr, diff --git a/NEWS.md b/NEWS.md index 3ea0dfc..d68d0da 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,7 @@ # datawrap (development version) -* `finalize_doubles()` converts integer-valued double columns to integer for - efficient storage (@copilot, #10). +* `finalize_integers()` converts integerish columns to integer for efficient storage (@copilot, #10). + +# datawrap 0.0.0.9000 + +* Initial release. From 1e0e897024333e346d992d104a342b95c3d60c8b Mon Sep 17 00:00:00 2001 From: Jon Harmon Date: Tue, 10 Mar 2026 09:37:38 -0500 Subject: [PATCH 7/8] Add refactor TODO. --- R/finalize_integers.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/finalize_integers.R b/R/finalize_integers.R index ae511e7..36181a5 100644 --- a/R/finalize_integers.R +++ b/R/finalize_integers.R @@ -21,6 +21,9 @@ finalize_integers <- function(dataset) { c("invalid_dataset", "invalid_argument") ) } + # TODO: This function isn't enough to warrant adding dplyr to imports, but + # this should be refactored to use dplyr::mutate, dplyr::across, and + # dplyr::where when something else necessitates dplyr. int_ish_cols <- purrr::map_lgl(dataset, \(x) { !is.logical(x) && stbl::is_int_ish(x) }) From fffb1c7cedbff3bf0fb0cd27b5f448b3875212dc Mon Sep 17 00:00:00 2001 From: Jon Harmon Date: Tue, 10 Mar 2026 09:52:31 -0500 Subject: [PATCH 8/8] Apply copilot review suggestions --- NEWS.md | 2 +- R/finalize_integers.R | 13 +++++++------ man/finalize_integers.Rd | 5 +++-- tests/testthat/test-finalize_integers.R | 2 +- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/NEWS.md b/NEWS.md index d68d0da..729911f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,6 @@ # datawrap (development version) -* `finalize_integers()` converts integerish columns to integer for efficient storage (@copilot, #10). +* `finalize_integers()` converts integerish columns to integer for efficient storage (@copilot & @jonthegeek, #10). # datawrap 0.0.0.9000 diff --git a/R/finalize_integers.R b/R/finalize_integers.R index 36181a5..0d2940c 100644 --- a/R/finalize_integers.R +++ b/R/finalize_integers.R @@ -5,9 +5,10 @@ #' losing any information to integers. Columns that contain non-integerish data #' are left unchanged. #' -#' @param dataset (`data.frame` or `list`) The dataset to process. +#' @param dataset (`data.frame`, `list`, or `NULL`) The dataset to process. #' -#' @returns The `dataset` with all integerish columns converted to integer. +#' @returns The `dataset` with all integerish columns converted to integer (or +#' `NULL` if `dataset` is `NULL`). #' @export #' #' @examples @@ -17,13 +18,13 @@ finalize_integers <- function(dataset) { if (!is.null(dataset) && !is.list(dataset)) { stbl::pkg_abort( "datawrap", - "{.arg dataset} must be a {.cls data.frame}.", + "{.arg dataset} must be a {.cls data.frame}, {.cls list}, or {.cls NULL}.", c("invalid_dataset", "invalid_argument") ) } - # TODO: This function isn't enough to warrant adding dplyr to imports, but - # this should be refactored to use dplyr::mutate, dplyr::across, and - # dplyr::where when something else necessitates dplyr. + # NOTE: This function alone does not warrant adding dplyr to imports, but it + # could be refactored to use dplyr::mutate, dplyr::across, and dplyr::where + # if dplyr is added as a dependency elsewhere in the package. int_ish_cols <- purrr::map_lgl(dataset, \(x) { !is.logical(x) && stbl::is_int_ish(x) }) diff --git a/man/finalize_integers.Rd b/man/finalize_integers.Rd index 9612c35..c34f33a 100644 --- a/man/finalize_integers.Rd +++ b/man/finalize_integers.Rd @@ -7,10 +7,11 @@ finalize_integers(dataset) } \arguments{ -\item{dataset}{(\code{data.frame} or \code{list}) The dataset to process.} +\item{dataset}{(\code{data.frame}, \code{list}, or \code{NULL}) The dataset to process.} } \value{ -The \code{dataset} with all integerish columns converted to integer. +The \code{dataset} with all integerish columns converted to integer (or +\code{NULL} if \code{dataset} is \code{NULL}). } \description{ Iterates over all columns (or list elements) in \code{dataset} and converts any diff --git a/tests/testthat/test-finalize_integers.R b/tests/testthat/test-finalize_integers.R index 3a20e97..2f4e4ad 100644 --- a/tests/testthat/test-finalize_integers.R +++ b/tests/testthat/test-finalize_integers.R @@ -44,7 +44,7 @@ test_that("finalize_integers() preserves the class of the input (#10)", { expect_identical(class(result), class(tbl)) }) -test_that("finalize_integers() errors if dataset is not a data.frame or list (#10)", { +test_that("finalize_integers() errors if dataset is not a data.frame, list, or NULL (#10)", { stbl::expect_pkg_error_classes( finalize_integers("not a dataset"), "datawrap",