From 0534ed26ea6a7a5579b66b38e639e521483e16e7 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 15 Jan 2026 23:22:08 +0100 Subject: [PATCH 01/11] add support for outer transformations --- R/data.table.R | 15 +++++++++++---- inst/tests/tests.Rraw | 10 ++++++++++ 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 06a7f0437..bb8b8d328 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -3410,6 +3410,13 @@ is_constantish = function(q, check_singleton=FALSE) { .gforce_ops = c("+", "-", "*", "/", "^", "%%", "%/%") +# Outer transformations that can wrap GForce-optimizable expressions +# e.g., sqrt(min(x)) should be optimized to sqrt(gmin(x)) +# for the moment we only include unary functions +.gforce_outer_trans = c("sqrt", "abs", "sign", "log", "log10", "log2", "log1p", + "exp", "expm1", "cos", "sin", "tan", "acos", "asin", "atan", + "cosh", "sinh", "tanh", "floor", "ceiling") + .unwrap_conversions = function(expr) { while (.is_type_conversion(expr) && length(expr) >= 2L) expr = expr[[2L]] expr @@ -3435,8 +3442,8 @@ is_constantish = function(q, check_singleton=FALSE) { )) } - # check if arithmetic operator -> recursively validate ALL branches (like in AST) - if (is.symbol(q[[1L]]) && q[[1L]] %chin% .gforce_ops) { + # check if arithmetic operator or outer transformation -> recursively validate ALL branches (like in AST) + if (is.symbol(q[[1L]]) && q[[1L]] %chin% c(.gforce_ops, .gforce_outer_trans)) { for (i in 2:length(q)) { if (!.gforce_ok(q[[i]], x, envir)) return(FALSE) } @@ -3467,8 +3474,8 @@ is_constantish = function(q, check_singleton=FALSE) { return(q) } - # if arithmetic operator, recursively substitute its operands. we know what branches are valid from .gforce_ok - if (is.symbol(q[[1L]]) && q[[1L]] %chin% .gforce_ops) { + # if arithmetic operator or outer transformation, recursively substitute its operands. we know what branches are valid from .gforce_ok + if (is.symbol(q[[1L]]) && q[[1L]] %chin% c(.gforce_ops, .gforce_outer_trans)) { for (i in 2:length(q)) { q[[i]] = .gforce_jsub(q[[i]], names_x, envir) } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 10fd2fc7f..339e87d3a 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21484,3 +21484,13 @@ dt = data.table(a=1:4, b=1:2) test(2362.51, optimize=0:2, dt[, c(list()), b, verbose=TRUE], data.table(b=integer(0L)), output="GForce FALSE") test(2362.52, optimize=0:2, dt[, c(lapply(.SD, sum), list()), b, verbose=TRUE], output=out) test(2362.53, optimize=0:2, dt[, list(lapply(.SD, sum), list()), b, verbose=TRUE], output="GForce FALSE") + +# outer transformations with GForce, #7594 +dt = data.table(x = 1:2, y = 1:10) +out = c('GForce FALSE', 'GForce TRUE') +test(2284.01, optimize=1:2, dt[, sqrt(min(y)), by=x, verbose=TRUE], output=out) +test(2284.02, optimize=1:2, dt[, sqrt(abs(min(y))), by=x, verbose=TRUE], output=out) +test(2284.03, optimize=1:2, dt[, sqrt(min(y)) + 1, by=x, verbose=TRUE], output=out) +test(2284.04, optimize=1:2, dt[, floor(mean(y)), by=x, verbose=TRUE], output=out) +# Transformation around non-GForce expression should NOT optimize +test(2284.11, optimize=2L, dt[, sqrt(y), by=x, verbose=TRUE], output="GForce FALSE") From 7b853ba7662877eb0650a64af10324661748a75c Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 15 Jan 2026 23:26:33 +0100 Subject: [PATCH 02/11] add examples from issue --- inst/tests/tests.Rraw | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 339e87d3a..5707fce82 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21489,8 +21489,9 @@ test(2362.53, optimize=0:2, dt[, list(lapply(.SD, sum), list()), b, verbose=TRUE dt = data.table(x = 1:2, y = 1:10) out = c('GForce FALSE', 'GForce TRUE') test(2284.01, optimize=1:2, dt[, sqrt(min(y)), by=x, verbose=TRUE], output=out) -test(2284.02, optimize=1:2, dt[, sqrt(abs(min(y))), by=x, verbose=TRUE], output=out) -test(2284.03, optimize=1:2, dt[, sqrt(min(y)) + 1, by=x, verbose=TRUE], output=out) -test(2284.04, optimize=1:2, dt[, floor(mean(y)), by=x, verbose=TRUE], output=out) +test(2284.02, optimize=1:2, dt[, mean(y)^2, by=x, verbose=TRUE], output=out) +test(2284.03, optimize=1:2, dt[, sqrt(abs(min(y))), by=x, verbose=TRUE], output=out) +test(2284.04, optimize=1:2, dt[, sqrt(min(y)) + 1, by=x, verbose=TRUE], output=out) +test(2284.05, optimize=1:2, dt[, floor(mean(y)), by=x, verbose=TRUE], output=out) # Transformation around non-GForce expression should NOT optimize test(2284.11, optimize=2L, dt[, sqrt(y), by=x, verbose=TRUE], output="GForce FALSE") From 0b8220c87a55789a9b3512ed62720b83639a0b4f Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 15 Jan 2026 23:34:20 +0100 Subject: [PATCH 03/11] refine accepted transformations --- R/data.table.R | 8 +++++--- inst/tests/tests.Rraw | 3 +++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index bb8b8d328..03c2d05ff 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -3413,9 +3413,11 @@ is_constantish = function(q, check_singleton=FALSE) { # Outer transformations that can wrap GForce-optimizable expressions # e.g., sqrt(min(x)) should be optimized to sqrt(gmin(x)) # for the moment we only include unary functions -.gforce_outer_trans = c("sqrt", "abs", "sign", "log", "log10", "log2", "log1p", - "exp", "expm1", "cos", "sin", "tan", "acos", "asin", "atan", - "cosh", "sinh", "tanh", "floor", "ceiling") +.gforce_outer_trans = c("sqrt", "abs", "sign", "floor", "ceiling", + "log", "log10", "log2", "log1p", "exp", "expm1", + "cos", "sin", "tan", "acos", "asin", "atan", + "cosh", "sinh", "tanh", "acosh", "asinh", "atanh", + "is.na", "is.nan", "is.finite", "is.infinite") .unwrap_conversions = function(expr) { while (.is_type_conversion(expr) && length(expr) >= 2L) expr = expr[[2L]] diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 5707fce82..ca2a1875c 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21495,3 +21495,6 @@ test(2284.04, optimize=1:2, dt[, sqrt(min(y)) + 1, by=x, verbose=TRUE], output=o test(2284.05, optimize=1:2, dt[, floor(mean(y)), by=x, verbose=TRUE], output=out) # Transformation around non-GForce expression should NOT optimize test(2284.11, optimize=2L, dt[, sqrt(y), by=x, verbose=TRUE], output="GForce FALSE") +dt = data.table(x = 1:2, y = c(NA, NA, NaN, Inf, 1:4)) +test(2284.21, optimize=1:2, dt[, is.na(sum(y)), by=x, verbose=TRUE], output=out) +test(2284.22, optimize=1:2, dt[, is.finite(sum(y, na.rm=TRUE)), by=x, verbose=TRUE], output=out) From 409f454081a43eb7089906cd57a2648efbd60b9a Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 15 Jan 2026 23:35:05 +0100 Subject: [PATCH 04/11] fix test num --- inst/tests/tests.Rraw | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index ca2a1875c..a6ecfff2f 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21488,13 +21488,13 @@ test(2362.53, optimize=0:2, dt[, list(lapply(.SD, sum), list()), b, verbose=TRUE # outer transformations with GForce, #7594 dt = data.table(x = 1:2, y = 1:10) out = c('GForce FALSE', 'GForce TRUE') -test(2284.01, optimize=1:2, dt[, sqrt(min(y)), by=x, verbose=TRUE], output=out) -test(2284.02, optimize=1:2, dt[, mean(y)^2, by=x, verbose=TRUE], output=out) -test(2284.03, optimize=1:2, dt[, sqrt(abs(min(y))), by=x, verbose=TRUE], output=out) -test(2284.04, optimize=1:2, dt[, sqrt(min(y)) + 1, by=x, verbose=TRUE], output=out) -test(2284.05, optimize=1:2, dt[, floor(mean(y)), by=x, verbose=TRUE], output=out) +test(2363.01, optimize=1:2, dt[, sqrt(min(y)), by=x, verbose=TRUE], output=out) +test(2363.02, optimize=1:2, dt[, mean(y)^2, by=x, verbose=TRUE], output=out) +test(2363.03, optimize=1:2, dt[, sqrt(abs(min(y))), by=x, verbose=TRUE], output=out) +test(2363.04, optimize=1:2, dt[, sqrt(min(y)) + 1, by=x, verbose=TRUE], output=out) +test(2363.05, optimize=1:2, dt[, floor(mean(y)), by=x, verbose=TRUE], output=out) # Transformation around non-GForce expression should NOT optimize -test(2284.11, optimize=2L, dt[, sqrt(y), by=x, verbose=TRUE], output="GForce FALSE") +test(2363.11, optimize=2L, dt[, sqrt(y), by=x, verbose=TRUE], output="GForce FALSE") dt = data.table(x = 1:2, y = c(NA, NA, NaN, Inf, 1:4)) -test(2284.21, optimize=1:2, dt[, is.na(sum(y)), by=x, verbose=TRUE], output=out) -test(2284.22, optimize=1:2, dt[, is.finite(sum(y, na.rm=TRUE)), by=x, verbose=TRUE], output=out) +test(2363.21, optimize=1:2, dt[, is.na(sum(y)), by=x, verbose=TRUE], output=out) +test(2363.22, optimize=1:2, dt[, is.finite(sum(y, na.rm=TRUE)), by=x, verbose=TRUE], output=out) From 903e8007f89cf6abd4d75b83497cf66c2ee50335 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 15 Jan 2026 23:54:03 +0100 Subject: [PATCH 05/11] adjust test of now optimized fun --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index a6ecfff2f..dacd215ee 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21465,7 +21465,7 @@ test(2362.32, optimize=0:2, dt[, .((max(a) - min(a)) / (max(a) + min(a))), by=b, test(2362.33, optimize=0:2, dt[, sum(a) / .N, b, verbose=TRUE], output=out) test(2362.34, optimize=0:2, dt[, mean(a) * 2L + sum(a), b, verbose=TRUE], output=out) test(2362.35, optimize=0:2, dt[, list(range=max(a)-min(a), avg=mean(a)), by=b, verbose=TRUE], output=out) -test(2362.36, optimize=0:2, dt[, .(max(a)-sqrt(min(a))), by=b, verbose=TRUE], output="GForce FALSE") +test(2362.36, optimize=0:2, dt[, .(max(a)-sqrt(min(a))), by=b, verbose=TRUE], output=out) test(2362.37, optimize=0:2, dt[, sum(a) %% 2, b, verbose=TRUE], output=out) test(2362.38, optimize=0:2, dt[, sum(a) %/% 2, b, verbose=TRUE], output=out) test(2362.39, optimize=0:2, dt[, -sum(a), b, verbose=TRUE], output=out) From 5ca7e41e38f6f100949bad494d85682b6105f696 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Fri, 16 Jan 2026 09:30:07 +0100 Subject: [PATCH 06/11] add rounding functions --- R/data.table.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/data.table.R b/R/data.table.R index 03c2d05ff..9d80fe29b 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -3417,7 +3417,8 @@ is_constantish = function(q, check_singleton=FALSE) { "log", "log10", "log2", "log1p", "exp", "expm1", "cos", "sin", "tan", "acos", "asin", "atan", "cosh", "sinh", "tanh", "acosh", "asinh", "atanh", - "is.na", "is.nan", "is.finite", "is.infinite") + "is.na", "is.nan", "is.finite", "is.infinite", + "trunc", "round", "signif") .unwrap_conversions = function(expr) { while (.is_type_conversion(expr) && length(expr) >= 2L) expr = expr[[2L]] From 597040e850483a8ec18be0838f739972f8fb1cf9 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Fri, 16 Jan 2026 09:31:09 +0100 Subject: [PATCH 07/11] be more specific about functions --- R/data.table.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/data.table.R b/R/data.table.R index 9d80fe29b..43c7833ff 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -3412,7 +3412,7 @@ is_constantish = function(q, check_singleton=FALSE) { # Outer transformations that can wrap GForce-optimizable expressions # e.g., sqrt(min(x)) should be optimized to sqrt(gmin(x)) -# for the moment we only include unary functions +# for the moment we only include unary elementwise functions .gforce_outer_trans = c("sqrt", "abs", "sign", "floor", "ceiling", "log", "log10", "log2", "log1p", "exp", "expm1", "cos", "sin", "tan", "acos", "asin", "atan", From 151dc90a594c9d42fd1c4f23ff919863e974ead6 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Fri, 16 Jan 2026 09:33:51 +0100 Subject: [PATCH 08/11] add gamma functions --- R/data.table.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/data.table.R b/R/data.table.R index 43c7833ff..6e5d4130e 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -3416,7 +3416,9 @@ is_constantish = function(q, check_singleton=FALSE) { .gforce_outer_trans = c("sqrt", "abs", "sign", "floor", "ceiling", "log", "log10", "log2", "log1p", "exp", "expm1", "cos", "sin", "tan", "acos", "asin", "atan", + "cospi", "sinpi", "tanpi", "cosh", "sinh", "tanh", "acosh", "asinh", "atanh", + "gamma" "lgamma" "digamma" "trigamma" "is.na", "is.nan", "is.finite", "is.infinite", "trunc", "round", "signif") From 851ce7877e5ad539570ccc427e751a490a66a6f7 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Fri, 16 Jan 2026 09:36:27 +0100 Subject: [PATCH 09/11] fix commas --- R/data.table.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/data.table.R b/R/data.table.R index 6e5d4130e..896503601 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -3418,7 +3418,7 @@ is_constantish = function(q, check_singleton=FALSE) { "cos", "sin", "tan", "acos", "asin", "atan", "cospi", "sinpi", "tanpi", "cosh", "sinh", "tanh", "acosh", "asinh", "atanh", - "gamma" "lgamma" "digamma" "trigamma" + "gamma", "lgamma", "digamma", "trigamma", "is.na", "is.nan", "is.finite", "is.infinite", "trunc", "round", "signif") From b40b684b5a96e466ba53c536f915f1b8f7a3dd36 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Fri, 16 Jan 2026 10:25:45 +0100 Subject: [PATCH 10/11] escape GForce for old test --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index dacd215ee..82d377de5 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -14743,7 +14743,7 @@ DT = data.table( Date2 = .Date(c(17459.1561177987, 17451.1086757995, 17449.0820898537, 17443.1175238448, 17461.0463715783, 17448.1033968224)) ) DT[ , DiffTime := abs(difftime(Date1, Date2, units = 'days'))] -test(2042.4, DT[ , round(mean(DiffTime)), by=Group, verbose=TRUE], +test(2042.4, DT[ , base::round(mean(DiffTime)), by=Group, verbose=TRUE], data.table(Group=c("A", "B", "C"), V1=structure(c(16, 8, 12), class="difftime", units="days")), output="Old mean optimization is on, left j unchanged.*GForce.*FALSE") From 89f93f9ad612a91b63ab1facba42d7394e35f3f7 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Fri, 16 Jan 2026 11:29:50 +0100 Subject: [PATCH 11/11] add test for nesting non-GForce call --- inst/tests/tests.Rraw | 1 + 1 file changed, 1 insertion(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 82d377de5..73c5250e7 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21495,6 +21495,7 @@ test(2363.04, optimize=1:2, dt[, sqrt(min(y)) + 1, by=x, verbose=TRUE], output=o test(2363.05, optimize=1:2, dt[, floor(mean(y)), by=x, verbose=TRUE], output=out) # Transformation around non-GForce expression should NOT optimize test(2363.11, optimize=2L, dt[, sqrt(y), by=x, verbose=TRUE], output="GForce FALSE") +test(2363.12, optimize=2L, dt[, log1p(abs(y)), by=x, verbose=TRUE], output="GForce FALSE") dt = data.table(x = 1:2, y = c(NA, NA, NaN, Inf, 1:4)) test(2363.21, optimize=1:2, dt[, is.na(sum(y)), by=x, verbose=TRUE], output=out) test(2363.22, optimize=1:2, dt[, is.finite(sum(y, na.rm=TRUE)), by=x, verbose=TRUE], output=out)