From 0ae633c8e50b2d5ab12d65cf65459f4e2da55542 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 7 Apr 2026 21:40:59 +0000 Subject: [PATCH 1/5] Initial plan From bd5eb7cefbbd1be92a51634c9a5b31c82a815bc3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 7 Apr 2026 21:50:04 +0000 Subject: [PATCH 2/5] C optimization: inline min/max range checks (#220) Agent-Logs-Url: https://github.com/wranglezone/stbl/sessions/e39da0cc-69c1-4f17-ba28-d9949321656e Co-authored-by: jonthegeek <33983824+jonthegeek@users.noreply.github.com> --- NEWS.md | 1 + R/c_api.R | 10 +++ R/stabilize_dbl.R | 8 ++- src/check_range.c | 131 ++++++++++++++++++++++++++++++++++++ src/init.c | 7 ++ tests/testthat/test-c_api.R | 44 ++++++++++++ 6 files changed, 199 insertions(+), 2 deletions(-) create mode 100644 src/check_range.c diff --git a/NEWS.md b/NEWS.md index 6bf6646..6ef3094 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,7 @@ * New `pkg_inform()` signals classed messages with an opinionated class hierarchy, mirroring `pkg_abort()`. New `expect_pkg_message_classes()` tests that a message with the expected set of classes is thrown, and `expect_pkg_message_snapshot()` snapshot-tests the full message output in one step (#213). * New `pkg_warn()` signals classed warnings with an opinionated class hierarchy, mirroring `pkg_abort()`. New `expect_pkg_warning_classes()` tests that a warning with the expected set of classes is thrown, and `expect_pkg_warning_snapshot()` snapshot-tests the full warning output in one step (#213). * Many `are_*_ish()` and `to_*()` methods are now implemented in C. Benchmarks show a significant speedup (about 3-20x) for large vectors (#217, #218, #219, #221, #226). +* `stabilize_dbl()` and `stabilize_int()` now use a C implementation for min/max range checks, eliminating intermediate logical vector allocations and improving throughput for large vectors (#220). # stbl 0.3.0 diff --git a/R/c_api.R b/R/c_api.R index 1a206b8..7b17f8d 100644 --- a/R/c_api.R +++ b/R/c_api.R @@ -93,3 +93,13 @@ .lst_to_fct <- function(x) { .Call(stbl_lst_to_fct, x) } + +# range checks ---- + +.check_min_dbl <- function(x, min_val) { + .Call(stbl_check_min_dbl, x, min_val) +} + +.check_max_dbl <- function(x, max_val) { + .Call(stbl_check_max_dbl, x, max_val) +} diff --git a/R/stabilize_dbl.R b/R/stabilize_dbl.R index 1c33f9c..dcd53be 100644 --- a/R/stabilize_dbl.R +++ b/R/stabilize_dbl.R @@ -165,8 +165,12 @@ stabilise_double_scalar <- stabilize_dbl_scalar ) { min_value <- to_dbl_scalar(min_value, allow_null = TRUE, call = call) max_value <- to_dbl_scalar(max_value, allow_null = TRUE, call = call) - min_failure_locations <- .find_failures(x, min_value, `<`) - max_failure_locations <- .find_failures(x, max_value, `>`) + min_failure_locations <- if (is.null(min_value)) NULL else { + .Call(stbl_check_min_dbl, x, min_value) + } + max_failure_locations <- if (is.null(max_value)) NULL else { + .Call(stbl_check_max_dbl, x, max_value) + } if (is.null(min_failure_locations) && is.null(max_failure_locations)) { return(invisible(NULL)) } diff --git a/src/check_range.c b/src/check_range.c new file mode 100644 index 0000000..74531ea --- /dev/null +++ b/src/check_range.c @@ -0,0 +1,131 @@ +#include +#include +#include + +/* + * Scan a REALSXP or INTSXP vector for values below (type=0) or above + * (type=1) the threshold. + * + * Writes 1-based failure indices into p_idx[0..n-1] and returns the number + * of failures found. NA values are always treated as passing (they are not + * reported as range failures). + */ +static R_xlen_t check_dbl_real_core( + const double* px, + R_xlen_t n, + double threshold, + int type, + int* p_idx +) { + R_xlen_t n_fail = 0; + if (type == 0) { + /* min check: fail if x < threshold */ + for (R_xlen_t i = 0; i < n; i++) { + double v = px[i]; + if (!ISNAN(v) && v < threshold) { + p_idx[n_fail++] = (int)(i + 1); + } + } + } else { + /* max check: fail if x > threshold */ + for (R_xlen_t i = 0; i < n; i++) { + double v = px[i]; + if (!ISNAN(v) && v > threshold) { + p_idx[n_fail++] = (int)(i + 1); + } + } + } + return n_fail; +} + +static R_xlen_t check_dbl_int_core( + const int* px, + R_xlen_t n, + double threshold, + int type, + int* p_idx +) { + R_xlen_t n_fail = 0; + if (type == 0) { + /* min check: fail if x < threshold */ + for (R_xlen_t i = 0; i < n; i++) { + int v = px[i]; + if (v != NA_INTEGER && (double)v < threshold) { + p_idx[n_fail++] = (int)(i + 1); + } + } + } else { + /* max check: fail if x > threshold */ + for (R_xlen_t i = 0; i < n; i++) { + int v = px[i]; + if (v != NA_INTEGER && (double)v > threshold) { + p_idx[n_fail++] = (int)(i + 1); + } + } + } + return n_fail; +} + +/* + * Shared implementation for stbl_check_min_dbl / stbl_check_max_dbl. + * + * x - a REALSXP or INTSXP vector + * threshold - a length-1 REALSXP scalar + * type - 0 for min check (x < threshold), 1 for max check (x > threshold) + * + * Returns R_NilValue (NULL) if all values pass, otherwise an INTSXP of + * 1-based failure indices. + */ +static SEXP check_range_impl(SEXP x, SEXP threshold_sexp, int type) { + R_xlen_t n = XLENGTH(x); + double threshold = REAL(threshold_sexp)[0]; + + /* Allocate a scratch buffer sized for the worst case */ + SEXP scratch = PROTECT(Rf_allocVector(INTSXP, n)); + int* p_idx = INTEGER(scratch); + + R_xlen_t n_fail; + if (TYPEOF(x) == REALSXP) { + n_fail = check_dbl_real_core(REAL(x), n, threshold, type, p_idx); + } else { + /* INTSXP — stabilize_int() passes integer vectors here */ + n_fail = check_dbl_int_core(INTEGER(x), n, threshold, type, p_idx); + } + + if (n_fail == 0) { + UNPROTECT(1); + return R_NilValue; + } + + SEXP out = PROTECT(Rf_allocVector(INTSXP, n_fail)); + int* p_out = INTEGER(out); + for (R_xlen_t i = 0; i < n_fail; i++) { + p_out[i] = p_idx[i]; + } + UNPROTECT(2); + return out; +} + +/* + * stbl_check_min_dbl: return 1-based integer indices where x < min_val. + * + * x - a REALSXP or INTSXP vector + * min_val - a length-1 REALSXP scalar + * + * Returns R_NilValue (NULL) if all values pass. + */ +SEXP stbl_check_min_dbl(SEXP x, SEXP min_val) { + return check_range_impl(x, min_val, 0); +} + +/* + * stbl_check_max_dbl: return 1-based integer indices where x > max_val. + * + * x - a REALSXP or INTSXP vector + * max_val - a length-1 REALSXP scalar + * + * Returns R_NilValue (NULL) if all values pass. + */ +SEXP stbl_check_max_dbl(SEXP x, SEXP max_val) { + return check_range_impl(x, max_val, 1); +} diff --git a/src/init.c b/src/init.c index 0495a7f..fe04f55 100644 --- a/src/init.c +++ b/src/init.c @@ -67,6 +67,10 @@ SEXP stbl_lst_to_lgl(SEXP x); SEXP stbl_lst_to_chr(SEXP x); SEXP stbl_lst_to_fct(SEXP x); +/* range checks */ +SEXP stbl_check_min_dbl(SEXP x, SEXP min_val); +SEXP stbl_check_max_dbl(SEXP x, SEXP max_val); + static const R_CallMethodDef callMethods[] = { /* chr -> * */ {"ffi_chr_to_lgl", (DL_FUNC) &ffi_chr_to_lgl, 1}, @@ -117,6 +121,9 @@ static const R_CallMethodDef callMethods[] = { {"stbl_lst_to_lgl", (DL_FUNC) &stbl_lst_to_lgl, 1}, {"stbl_lst_to_chr", (DL_FUNC) &stbl_lst_to_chr, 1}, {"stbl_lst_to_fct", (DL_FUNC) &stbl_lst_to_fct, 1}, + /* range checks */ + {"stbl_check_min_dbl", (DL_FUNC) &stbl_check_min_dbl, 2}, + {"stbl_check_max_dbl", (DL_FUNC) &stbl_check_max_dbl, 2}, {NULL, NULL, 0} }; diff --git a/tests/testthat/test-c_api.R b/tests/testthat/test-c_api.R index 6d24939..4ad61da 100644 --- a/tests/testthat/test-c_api.R +++ b/tests/testthat/test-c_api.R @@ -400,3 +400,47 @@ test_that(".lst_to_fct() returns NULL for non-character elements (#226)", { test_that(".lst_to_fct() returns NULL for non-scalar elements (#226)", { expect_null(.lst_to_fct(list(c("a", "b")))) }) + +# range checks ------------------------------------------------------------- ---- + +test_that(".check_min_dbl() returns NULL when all values pass (#220)", { + expect_null(.check_min_dbl(c(1.0, 2.0, 3.0), 1.0)) + expect_null(.check_min_dbl(double(0), 0.0)) +}) + +test_that(".check_min_dbl() returns failure indices for values below min (#220)", { + expect_identical(.check_min_dbl(c(1.0, 2.0, 3.0), 2.0), 1L) + expect_identical(.check_min_dbl(c(0.0, 1.0, 2.0, 3.0), 2.0), c(1L, 2L)) +}) + +test_that(".check_min_dbl() treats NA as passing (#220)", { + expect_null(.check_min_dbl(c(NA_real_, 2.0), 1.0)) + expect_identical(.check_min_dbl(c(NA_real_, 0.5), 1.0), 2L) +}) + +test_that(".check_min_dbl() handles integer input (#220)", { + expect_null(.check_min_dbl(c(2L, 3L), 1.0)) + expect_identical(.check_min_dbl(c(1L, 2L, 3L), 2.0), 1L) + expect_null(.check_min_dbl(c(NA_integer_, 2L), 1.0)) +}) + +test_that(".check_max_dbl() returns NULL when all values pass (#220)", { + expect_null(.check_max_dbl(c(1.0, 2.0, 3.0), 3.0)) + expect_null(.check_max_dbl(double(0), 0.0)) +}) + +test_that(".check_max_dbl() returns failure indices for values above max (#220)", { + expect_identical(.check_max_dbl(c(1.0, 2.0, 3.0), 2.0), 3L) + expect_identical(.check_max_dbl(c(1.0, 2.0, 3.0, 4.0), 2.0), c(3L, 4L)) +}) + +test_that(".check_max_dbl() treats NA as passing (#220)", { + expect_null(.check_max_dbl(c(NA_real_, 1.0), 2.0)) + expect_identical(.check_max_dbl(c(NA_real_, 3.0), 2.0), 2L) +}) + +test_that(".check_max_dbl() handles integer input (#220)", { + expect_null(.check_max_dbl(c(1L, 2L), 3.0)) + expect_identical(.check_max_dbl(c(1L, 2L, 3L), 2.0), 3L) + expect_null(.check_max_dbl(c(NA_integer_, 1L), 2.0)) +}) From d8be19c675e6fb8df2c46424ec97717a9d4969f1 Mon Sep 17 00:00:00 2001 From: Jon Harmon Date: Tue, 7 Apr 2026 16:56:41 -0500 Subject: [PATCH 3/5] Update R/stabilize_dbl.R Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- R/stabilize_dbl.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/stabilize_dbl.R b/R/stabilize_dbl.R index dcd53be..47ceebf 100644 --- a/R/stabilize_dbl.R +++ b/R/stabilize_dbl.R @@ -165,7 +165,9 @@ stabilise_double_scalar <- stabilize_dbl_scalar ) { min_value <- to_dbl_scalar(min_value, allow_null = TRUE, call = call) max_value <- to_dbl_scalar(max_value, allow_null = TRUE, call = call) - min_failure_locations <- if (is.null(min_value)) NULL else { + min_failure_locations <- if (is.null(min_value)) { + NULL + } else { .Call(stbl_check_min_dbl, x, min_value) } max_failure_locations <- if (is.null(max_value)) NULL else { From 5cb20dcbbaa198c123ecc25ad438c6a9f507f98b Mon Sep 17 00:00:00 2001 From: Jon Harmon Date: Tue, 7 Apr 2026 16:56:50 -0500 Subject: [PATCH 4/5] Update R/stabilize_dbl.R Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- R/stabilize_dbl.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/stabilize_dbl.R b/R/stabilize_dbl.R index 47ceebf..1433451 100644 --- a/R/stabilize_dbl.R +++ b/R/stabilize_dbl.R @@ -170,7 +170,9 @@ stabilise_double_scalar <- stabilize_dbl_scalar } else { .Call(stbl_check_min_dbl, x, min_value) } - max_failure_locations <- if (is.null(max_value)) NULL else { + max_failure_locations <- if (is.null(max_value)) { + NULL + } else { .Call(stbl_check_max_dbl, x, max_value) } if (is.null(min_failure_locations) && is.null(max_failure_locations)) { From 6c003fe72dc0cb866c4c2b646026e18dee6a239e Mon Sep 17 00:00:00 2001 From: Jon Harmon Date: Thu, 9 Apr 2026 08:44:51 -0500 Subject: [PATCH 5/5] Simplify code and news Co-authored-by: Jon Harmon --- NEWS.md | 2 +- R/stabilize_dbl.R | 8 ++------ 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/NEWS.md b/NEWS.md index 6ef3094..f589c6b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,7 +3,7 @@ * New `pkg_inform()` signals classed messages with an opinionated class hierarchy, mirroring `pkg_abort()`. New `expect_pkg_message_classes()` tests that a message with the expected set of classes is thrown, and `expect_pkg_message_snapshot()` snapshot-tests the full message output in one step (#213). * New `pkg_warn()` signals classed warnings with an opinionated class hierarchy, mirroring `pkg_abort()`. New `expect_pkg_warning_classes()` tests that a warning with the expected set of classes is thrown, and `expect_pkg_warning_snapshot()` snapshot-tests the full warning output in one step (#213). * Many `are_*_ish()` and `to_*()` methods are now implemented in C. Benchmarks show a significant speedup (about 3-20x) for large vectors (#217, #218, #219, #221, #226). -* `stabilize_dbl()` and `stabilize_int()` now use a C implementation for min/max range checks, eliminating intermediate logical vector allocations and improving throughput for large vectors (#220). +* `stabilize_dbl()` and `stabilize_int()` now use a C implementation for min/max range checks, improving throughput for large vectors (#220). # stbl 0.3.0 diff --git a/R/stabilize_dbl.R b/R/stabilize_dbl.R index 1433451..8b5973d 100644 --- a/R/stabilize_dbl.R +++ b/R/stabilize_dbl.R @@ -165,14 +165,10 @@ stabilise_double_scalar <- stabilize_dbl_scalar ) { min_value <- to_dbl_scalar(min_value, allow_null = TRUE, call = call) max_value <- to_dbl_scalar(max_value, allow_null = TRUE, call = call) - min_failure_locations <- if (is.null(min_value)) { - NULL - } else { + min_failure_locations <- if (!is.null(min_value)) { .Call(stbl_check_min_dbl, x, min_value) } - max_failure_locations <- if (is.null(max_value)) { - NULL - } else { + max_failure_locations <- if (!is.null(max_value)) { .Call(stbl_check_max_dbl, x, max_value) } if (is.null(min_failure_locations) && is.null(max_failure_locations)) {