From 18537eb3cf4e6910503733c5ec4b2bb410425270 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Wed, 30 Nov 2022 14:51:25 +0100 Subject: [PATCH 01/53] add fix for escaping gforce using [variables in function call --- R/data.table.R | 3 ++- inst/tests/tests.Rraw | 2 +- src/shift.c | 23 +++++++++++++++++++++++ 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 473cf6e766..20c805bc8d 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1748,7 +1748,8 @@ replace_dot_alias = function(e) { # ^^ base::startWith errors on NULL unfortunately if (length(q)>=2L && q[[1L]] == "shift") { q_named = match.call(shift, q) - if (!is.call(q_named[["fill"]]) && is.null(q_named[["give.names"]])) return(TRUE) + vars = all.vars(q_named) + if (!is.call(q_named[["fill"]]) && is.null(q_named[["give.names"]]) && (length(vars) == 0L || !any(vars %chin% ls(envir=parent.frame()))) ) return(TRUE) } if (length(q)>=3L && q[[1L]] == "weighted.mean") return(TRUE) #3977 # otherwise there must be three arguments diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 3cbe67680f..7dfb5c8481 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -13608,7 +13608,7 @@ test(1963.07, shift(DT, -1:1), c(NA, 10L, 9L, 8L, 7L, 6L, 5L, 4L, 3L, 2L))) ## some coverage tests for good measure test(1963.08, shift(DT$x, type = 'some_other_type'), error='should be one of.*lag.*lead') -test(1963.09, shift(as.raw(0:1)), error = "Type 'raw' is not supported") +test(1963.09, shift(as.raw(0:1)), as.raw(c(0,0))) test(1963.10, shift(DT, -1:1, type="shift", give.names = TRUE), # new type="shift" #3223 ans <- list(`x_shift_-1` = c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, NA), x_shift_0 = 1:10, diff --git a/src/shift.c b/src/shift.c index dba598fe50..95dd87b73d 100644 --- a/src/shift.c +++ b/src/shift.c @@ -165,6 +165,29 @@ SEXP shift(SEXP obj, SEXP k, SEXP fill, SEXP type) copyMostAttrib(elem, tmp); } } break; + case RAWSXP : { + const Rbyte rfill = RAW(thisfill)[0]; + for (int j=0; j= 0) || (stype == LEAD && kd[j] < 0)) { + if (tailk > 0) memmove(dtmp+thisk, delem, tailk*size); + if (cycle) { + if (thisk > 0) memmove(dtmp, delem+tailk, thisk*size); + } else for (int m=0; m 0) memmove(dtmp, delem+thisk, tailk*size); + if (cycle) { + if (thisk > 0) memmove(dtmp+tailk, delem, thisk*size); + } else for (int m=tailk; m Date: Mon, 5 Dec 2022 12:37:20 +0100 Subject: [PATCH 02/53] push --- R/data.table.R | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 20c805bc8d..e6dd0839e7 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1748,8 +1748,12 @@ replace_dot_alias = function(e) { # ^^ base::startWith errors on NULL unfortunately if (length(q)>=2L && q[[1L]] == "shift") { q_named = match.call(shift, q) - vars = all.vars(q_named) - if (!is.call(q_named[["fill"]]) && is.null(q_named[["give.names"]]) && (length(vars) == 0L || !any(vars %chin% ls(envir=parent.frame()))) ) return(TRUE) + jsub = q_named + browser() + jsub[["n"]] = as.integer(eval(jsub[["n"]], parent.frame(n=2))) + # q_named[["n"]] = envir(paren) + if (!is.call(q_named[["fill"]]) && is.null(q_named[["give.names"]])) return(TRUE) + #&& (length(vars) == 0L || !any(vars %chin% ls(envir=parent.frame()))) } if (length(q)>=3L && q[[1L]] == "weighted.mean") return(TRUE) #3977 # otherwise there must be three arguments From bcd154f44dd5eca16b0549ad32f786a6d1e888f4 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <5801475-ben-schwen@users.noreply.gitlab.com> Date: Wed, 6 Sep 2023 11:23:50 +0200 Subject: [PATCH 03/53] escape gshift --- R/data.table.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index e6dd0839e7..bd44ad66d1 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1748,12 +1748,12 @@ replace_dot_alias = function(e) { # ^^ base::startWith errors on NULL unfortunately if (length(q)>=2L && q[[1L]] == "shift") { q_named = match.call(shift, q) - jsub = q_named - browser() - jsub[["n"]] = as.integer(eval(jsub[["n"]], parent.frame(n=2))) - # q_named[["n"]] = envir(paren) - if (!is.call(q_named[["fill"]]) && is.null(q_named[["give.names"]])) return(TRUE) - #&& (length(vars) == 0L || !any(vars %chin% ls(envir=parent.frame()))) + vars = all.vars(q_named) + # check if argument + if (!is.call(q_named[["fill"]]) && is.null(q_named[["give.names"]]) + && (is.null(q_named[["fill"]]) || exists(q_named[["fill"]]))) return(TRUE) + #&& (length(vars) == 0L || all(vars %chin% ls(envir=parent.frame()))) ) return(TRUE) + #return(!is.call(q_named[["fill"]]) && !is.null(q_named[["give.names"]])) } if (length(q)>=3L && q[[1L]] == "weighted.mean") return(TRUE) #3977 # otherwise there must be three arguments From 2161217ab268cc9f72b705dde30d869cbc715bec Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Wed, 13 Sep 2023 09:47:28 +0200 Subject: [PATCH 04/53] update --- R/data.table.R | 10 +++++++--- inst/tests/tests.Rraw | 1 + 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index bd44ad66d1..3f2b646e6c 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1749,9 +1749,13 @@ replace_dot_alias = function(e) { if (length(q)>=2L && q[[1L]] == "shift") { q_named = match.call(shift, q) vars = all.vars(q_named) - # check if argument - if (!is.call(q_named[["fill"]]) && is.null(q_named[["give.names"]]) - && (is.null(q_named[["fill"]]) || exists(q_named[["fill"]]))) return(TRUE) + #browser() + # evaluate arguments + if (is.symbol(q_named[["fill"]]) && !(q_named["fill"] %chin% sdvars)) { + pos = which(names(q_named) == "fill") + jsub[[pos]] = eval(jsub[[pos]], parent.frame()) + } + if (!is.call(q_named[["fill"]]) && is.null(q_named[["give.names"]])) return(TRUE) #&& (length(vars) == 0L || all(vars %chin% ls(envir=parent.frame()))) ) return(TRUE) #return(!is.call(q_named[["fill"]]) && !is.null(q_named[["give.names"]])) } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 6f3bb828e0..0afb1c3398 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17577,6 +17577,7 @@ for (col in c("a","b","c")) { # DT() functional form, #4872 #5106 #5107 #5129 if (base::getRversion() >= "4.1.0") { + rm(DT) # we have to EVAL "|>" here too otherwise this tests.Rraw file won't parse in R<4.1.0 droprn = function(df) { rownames(df)=NULL; df } # TODO: could retain rownames where droprn is currently used below test(2212.011, EVAL("mtcars |> DT(mpg>20, .(mean_hp=round(mean(hp),2)), by=cyl)"), From 5addb780f5bdfd6918cf2ed2f38009ca408ccc4b Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Mon, 25 Dec 2023 09:56:47 +0100 Subject: [PATCH 05/53] add fix --- R/data.table.R | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 73551548d7..d9c11c7a57 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1748,16 +1748,11 @@ replace_dot_alias = function(e) { # ^^ base::startWith errors on NULL unfortunately if (length(q)>=2L && q[[1L]] == "shift") { q_named = match.call(shift, q) - vars = all.vars(q_named) - #browser() - # evaluate arguments - if (is.symbol(q_named[["fill"]]) && !(q_named["fill"] %chin% sdvars)) { - pos = which(names(q_named) == "fill") - jsub[[pos]] = eval(jsub[[pos]], parent.frame()) - } - if (!is.call(q_named[["fill"]]) && is.null(q_named[["give.names"]])) return(TRUE) - #&& (length(vars) == 0L || all(vars %chin% ls(envir=parent.frame()))) ) return(TRUE) - #return(!is.call(q_named[["fill"]]) && !is.null(q_named[["give.names"]])) + if (!is.call(q_named[["n"]]) && + !is.call(q_named[["fill"]]) && + !is.call(q_named[["type"]]) && + is.null(q_named[["give.names"]])) + return(TRUE) } if (length(q)>=3L && q[[1L]] == "weighted.mean") return(TRUE) #3977 # otherwise there must be three arguments From 6eb75ad19f83ab75483092905220b7d0aecba2c4 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Mon, 25 Dec 2023 10:06:20 +0100 Subject: [PATCH 06/53] add test for coverage --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 2d63819e0e..f8a11e8f1b 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -13618,6 +13618,7 @@ test(1963.07, shift(DT, -1:1), ## some coverage tests for good measure test(1963.08, shift(DT$x, type = 'some_other_type'), error='should be one of.*lag.*lead') test(1963.09, shift(as.raw(0:1)), as.raw(c(0,0))) +test(1963.095, shift(list(expression(1))), error = "Type 'expression' is not supported") test(1963.10, shift(DT, -1:1, type="shift", give.names = TRUE), # new type="shift" #3223 ans <- list(`x_shift_-1` = c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, NA), x_shift_0 = 1:10, @@ -17589,7 +17590,6 @@ for (col in c("a","b","c")) { # DT() functional form, #4872 #5106 #5107 #5129 if (base::getRversion() >= "4.1.0") { - DT = DTfun # we have to EVAL "|>" here too otherwise this tests.Rraw file won't parse in R<4.1.0 droprn = function(df) { rownames(df)=NULL; df } # TODO: could retain rownames where droprn is currently used below test(2212.011, EVAL("mtcars |> DT(mpg>20, .(mean_hp=round(mean(hp),2)), by=cyl)"), From e9cf88ac53b0372f5b2d6332acb04067ed263450 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Mon, 25 Dec 2023 10:08:24 +0100 Subject: [PATCH 07/53] add news --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 4ca025d22e..cfeed54923 100644 --- a/NEWS.md +++ b/NEWS.md @@ -170,7 +170,7 @@ 28. `setkey()` now supports type `raw` as value columns (not as key columns), [#5100](https://github.com/Rdatatable/data.table/issues/5100). Thanks Hugh Parsonage for requesting, and Benjamin Schwendinger for the PR. -29. `shift()` is now optimised by group, [#1534](https://github.com/Rdatatable/data.table/issues/1534). Thanks to Gerhard Nachtmann for requesting, and Benjamin Schwendinger for the PR. +29. `shift()` is now optimised by group, [#1534](https://github.com/Rdatatable/data.table/issues/1534). Thanks to Gerhard Nachtmann for requesting, and Benjamin Schwendinger for the PR. Thanks to @neovom for testing dev and filing a bug report which was fixed before release. ```R N = 1e7 From 7ace3d56f96c3f7ad5b4e7377e52d5e3182b9403 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Mon, 25 Dec 2023 11:12:53 +0100 Subject: [PATCH 08/53] update shift tests --- inst/tests/tests.Rraw | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index f8a11e8f1b..246e432260 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17996,6 +17996,11 @@ ydt[, date := seq_len(.N), by = symbol] ydt[, ret := rnorm(.N)] f = shift test(2233.33, copy(ydt)[, (ycols) := shift(ret, yn, type = "lead"), by = symbol, verbose=TRUE], copy(ydt)[, (ycols) := f(ret, yn, type = "lead"), by = symbol], output="GForce optimized j to") +# turn off GForce for shift where arguments are calls, #5547 +test(2233.34, copy(ydt)[, (ycols) := shift(ret, abs(yn), type = "lead"), by = symbol, verbose=TRUE], copy(ydt)[, (ycols) := f(ret, yn, type = "lead"), by = symbol], output="GForce FALSE") +test(2233.35, copy(ydt)[, (ycols) := shift(ret, yn, type = as.character("lead")), by = symbol, verbose=TRUE], copy(ydt)[, (ycols) := f(ret, yn, type = "lead"), by = symbol], output="GForce FALSE") +test(2233.36, copy(ydt)[, (ycols) := shift(ret, yn, type = "lead", fill=1), by = symbol, verbose=TRUE], copy(ydt)[, (ycols) := f(ret, yn, type = "lead", fill=1), by = symbol], output="GForce optimized j to") +test(2233.37, copy(ydt)[, (ycols) := shift(ret, yn, type = "lead", fill=abs(1)), by = symbol, verbose=TRUE], copy(ydt)[, (ycols) := f(ret, yn, type = "lead", fill=1), by = symbol], output="GForce FALSE") # optimized := by= with out-of-order groups broke in dev before release; #5307, #5326, #5337, #5345 # verbatim from #5307 From 74970f550ac50f019811fba63989ea9ebba27eba Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Mon, 25 Dec 2023 11:28:31 +0100 Subject: [PATCH 09/53] move tests --- inst/tests/tests.Rraw | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 246e432260..6fcfe1b434 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17996,11 +17996,6 @@ ydt[, date := seq_len(.N), by = symbol] ydt[, ret := rnorm(.N)] f = shift test(2233.33, copy(ydt)[, (ycols) := shift(ret, yn, type = "lead"), by = symbol, verbose=TRUE], copy(ydt)[, (ycols) := f(ret, yn, type = "lead"), by = symbol], output="GForce optimized j to") -# turn off GForce for shift where arguments are calls, #5547 -test(2233.34, copy(ydt)[, (ycols) := shift(ret, abs(yn), type = "lead"), by = symbol, verbose=TRUE], copy(ydt)[, (ycols) := f(ret, yn, type = "lead"), by = symbol], output="GForce FALSE") -test(2233.35, copy(ydt)[, (ycols) := shift(ret, yn, type = as.character("lead")), by = symbol, verbose=TRUE], copy(ydt)[, (ycols) := f(ret, yn, type = "lead"), by = symbol], output="GForce FALSE") -test(2233.36, copy(ydt)[, (ycols) := shift(ret, yn, type = "lead", fill=1), by = symbol, verbose=TRUE], copy(ydt)[, (ycols) := f(ret, yn, type = "lead", fill=1), by = symbol], output="GForce optimized j to") -test(2233.37, copy(ydt)[, (ycols) := shift(ret, yn, type = "lead", fill=abs(1)), by = symbol, verbose=TRUE], copy(ydt)[, (ycols) := f(ret, yn, type = "lead", fill=1), by = symbol], output="GForce FALSE") # optimized := by= with out-of-order groups broke in dev before release; #5307, #5326, #5337, #5345 # verbatim from #5307 @@ -18064,6 +18059,16 @@ for (opt in c(0,Inf)) { testnum = 2233.44 } options(old) +# turn off GForce for shift where arguments are calls, #5547 +DT = data.table(x=letters[1:4], y=1:2) +yn = c(0,1) +t = "lead" +f = shift +test(2233.51, copy(DT)[, shift(x, yn, type=t), by=y, verbose=TRUE], copy(DT)[, f(x, yn, type=t), by=y], output="GForce optimized j to") +test(2233.52, copy(DT)[, shift(x, abs(yn), type=t), by=y, verbose=TRUE], copy(DT)[, f(x, yn, type=t), by=y], output="GForce FALSE") +test(2233.53, copy(DT)[, shift(x, yn, type=as.character(t)), by=y, verbose=TRUE], copy(DT)[, f(x, yn, type=t), by=y], output="GForce FALSE") +test(2233.54, copy(DT)[, shift(x, yn, type=t, fill=1), by=y, verbose=TRUE], copy(DT)[, f(x, yn, type=t, fill=1), by=y], output="GForce optimized j to") +test(2233.55, copy(DT)[, shift(x, yn, type=t, fill=abs(1)), by=y, verbose=TRUE], copy(DT)[, f(x, yn, type=t, fill=1), by=y], output="GForce FALSE") # support by=.I; #1732 DT = data.table(V1=1:5, V2=3:7, V3=5:1) From 4e1c43fa6bcf722e697093796601c0f30e652163 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Mon, 25 Dec 2023 11:34:13 +0100 Subject: [PATCH 10/53] update tests --- inst/tests/tests.Rraw | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 6fcfe1b434..a7aa5f8993 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18059,16 +18059,6 @@ for (opt in c(0,Inf)) { testnum = 2233.44 } options(old) -# turn off GForce for shift where arguments are calls, #5547 -DT = data.table(x=letters[1:4], y=1:2) -yn = c(0,1) -t = "lead" -f = shift -test(2233.51, copy(DT)[, shift(x, yn, type=t), by=y, verbose=TRUE], copy(DT)[, f(x, yn, type=t), by=y], output="GForce optimized j to") -test(2233.52, copy(DT)[, shift(x, abs(yn), type=t), by=y, verbose=TRUE], copy(DT)[, f(x, yn, type=t), by=y], output="GForce FALSE") -test(2233.53, copy(DT)[, shift(x, yn, type=as.character(t)), by=y, verbose=TRUE], copy(DT)[, f(x, yn, type=t), by=y], output="GForce FALSE") -test(2233.54, copy(DT)[, shift(x, yn, type=t, fill=1), by=y, verbose=TRUE], copy(DT)[, f(x, yn, type=t, fill=1), by=y], output="GForce optimized j to") -test(2233.55, copy(DT)[, shift(x, yn, type=t, fill=abs(1)), by=y, verbose=TRUE], copy(DT)[, f(x, yn, type=t, fill=1), by=y], output="GForce FALSE") # support by=.I; #1732 DT = data.table(V1=1:5, V2=3:7, V3=5:1) @@ -18153,3 +18143,14 @@ DT = data.table(id=1:2, x=1:2) r = copy(DT)[1L, x:= 5L] test(2241.13, DT, data.table(id=1:2, x=1:2)) test(2241.14, r, data.table(id=1:2, x=c(5L,2L))) + +# turn off GForce for shift where arguments are calls, #5547 +DT = data.table(x=letters[1:4], y=1:2) +yn = c(0,1) +t = "lead" +f = shift +test(2242.1, copy(DT)[, shift(x, yn, type=t), by=y, verbose=TRUE], copy(DT)[, f(x, yn, type=t), by=y], output="GForce optimized j to") +test(2242.2, copy(DT)[, shift(x, abs(yn), type=t), by=y, verbose=TRUE], copy(DT)[, f(x, yn, type=t), by=y], output="GForce FALSE") +test(2242.3, copy(DT)[, shift(x, yn, type=as.character(t)), by=y, verbose=TRUE], copy(DT)[, f(x, yn, type=t), by=y], output="GForce FALSE") +test(2242.4, copy(DT)[, shift(x, yn, type=t, fill=1), by=y, verbose=TRUE], copy(DT)[, f(x, yn, type=t, fill=1), by=y], output="GForce optimized j to") +test(2242.5, copy(DT)[, shift(x, yn, type=t, fill=abs(1)), by=y, verbose=TRUE], copy(DT)[, f(x, yn, type=t, fill=1), by=y], output="GForce FALSE") From 9eae0fad81b3d6548f1abf70a340fbc18b9cddf2 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Mon, 25 Dec 2023 11:35:54 +0100 Subject: [PATCH 11/53] simplify tests --- inst/tests/tests.Rraw | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index a7aa5f8993..bd06d8e63f 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18149,8 +18149,8 @@ DT = data.table(x=letters[1:4], y=1:2) yn = c(0,1) t = "lead" f = shift -test(2242.1, copy(DT)[, shift(x, yn, type=t), by=y, verbose=TRUE], copy(DT)[, f(x, yn, type=t), by=y], output="GForce optimized j to") -test(2242.2, copy(DT)[, shift(x, abs(yn), type=t), by=y, verbose=TRUE], copy(DT)[, f(x, yn, type=t), by=y], output="GForce FALSE") -test(2242.3, copy(DT)[, shift(x, yn, type=as.character(t)), by=y, verbose=TRUE], copy(DT)[, f(x, yn, type=t), by=y], output="GForce FALSE") -test(2242.4, copy(DT)[, shift(x, yn, type=t, fill=1), by=y, verbose=TRUE], copy(DT)[, f(x, yn, type=t, fill=1), by=y], output="GForce optimized j to") -test(2242.5, copy(DT)[, shift(x, yn, type=t, fill=abs(1)), by=y, verbose=TRUE], copy(DT)[, f(x, yn, type=t, fill=1), by=y], output="GForce FALSE") +test(2242.1, DT[, shift(x, yn, type=t), by=y, verbose=TRUE], DT[, f(x, yn, type=t), by=y], output="GForce optimized j to") +test(2242.2, DT[, shift(x, abs(yn), type=t), by=y, verbose=TRUE], DT[, f(x, yn, type=t), by=y], output="GForce FALSE") +test(2242.3, DT[, shift(x, yn, type=as.character(t)), by=y, verbose=TRUE], DT[, f(x, yn, type=t), by=y], output="GForce FALSE") +test(2242.4, DT[, shift(x, yn, type=t, fill=1), by=y, verbose=TRUE], DT[, f(x, yn, type=t, fill=1), by=y], output="GForce optimized j to") +test(2242.5, DT[, shift(x, yn, type=t, fill=abs(1)), by=y, verbose=TRUE], DT[, f(x, yn, type=t, fill=1), by=y], output="GForce FALSE") From 6d9763851d08ee1f52bcf61efd52f9d82261b86d Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Mon, 25 Dec 2023 11:45:22 +0100 Subject: [PATCH 12/53] simplify --- inst/tests/tests.Rraw | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index bd06d8e63f..a8cf358e93 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18145,12 +18145,12 @@ test(2241.13, DT, data.table(id=1:2, x=1:2)) test(2241.14, r, data.table(id=1:2, x=c(5L,2L))) # turn off GForce for shift where arguments are calls, #5547 -DT = data.table(x=letters[1:4], y=1:2) +DT = data.table(x=letters[1:4], y=c(1L,1L,2L,2L)) yn = c(0,1) t = "lead" f = shift -test(2242.1, DT[, shift(x, yn, type=t), by=y, verbose=TRUE], DT[, f(x, yn, type=t), by=y], output="GForce optimized j to") -test(2242.2, DT[, shift(x, abs(yn), type=t), by=y, verbose=TRUE], DT[, f(x, yn, type=t), by=y], output="GForce FALSE") -test(2242.3, DT[, shift(x, yn, type=as.character(t)), by=y, verbose=TRUE], DT[, f(x, yn, type=t), by=y], output="GForce FALSE") -test(2242.4, DT[, shift(x, yn, type=t, fill=1), by=y, verbose=TRUE], DT[, f(x, yn, type=t, fill=1), by=y], output="GForce optimized j to") -test(2242.5, DT[, shift(x, yn, type=t, fill=abs(1)), by=y, verbose=TRUE], DT[, f(x, yn, type=t, fill=1), by=y], output="GForce FALSE") +test(2242.1, DT[, shift(x, yn, type=t), by=y, verbose=TRUE], data.table(y=c(1L,1L,2L,2L), V1=c("a","b","c","d"), V2=c("b",NA,"d",NA)), output="GForce optimized j to") +test(2242.2, DT[, shift(x, abs(yn), type=t), by=y, verbose=TRUE], data.table(y=c(1L,1L,2L,2L), V1=c("a","b","c","d"), V2=c("b",NA,"d",NA)), output="GForce FALSE") +test(2242.3, DT[, shift(x, yn, type=as.character(t)), by=y, verbose=TRUE], data.table(y=c(1L,1L,2L,2L), V1=c("a","b","c","d"), V2=c("b",NA,"d",NA)), output="GForce FALSE") +test(2242.4, DT[, shift(x, yn, type=t, fill=1), by=y, verbose=TRUE], data.table(y=c(1L,1L,2L,2L), V1=c("a","b","c","d"), V2=c("b","1","d","1")), output="GForce optimized j to") +test(2242.5, DT[, shift(x, yn, type=t, fill=abs(1)), by=y, verbose=TRUE], data.table(y=c(1L,1L,2L,2L), V1=c("a","b","c","d"), V2=c("b","1","d","1")), output="GForce FALSE") From af6d7bd33ed395d7ae240f8152e9bc27633db492 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Mon, 25 Dec 2023 12:08:36 +0100 Subject: [PATCH 13/53] working version --- R/data.table.R | 6 +++++- inst/tests/tests.Rraw | 3 ++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index d9c11c7a57..5ac21afac2 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1776,7 +1776,11 @@ replace_dot_alias = function(e) { # adding argument to ghead/gtail if none is supplied to g-optimized head/tail if (length(jsub) == 2L && jsub[[1L]] %chin% c("head", "tail")) jsub[["n"]] = 6L jsub[[1L]] = as.name(paste0("g", jsub[[1L]])) - if (length(jsub)>=3L && is.symbol(jsub[[3L]]) && !(jsub[[3L]] %chin% sdvars)) jsub[[3L]] = eval(jsub[[3L]], parent.frame()) # tests 1187.3 & 1187.5 + if (length(jsub)>=3L) { + for (i in 3:length(jsub)) { + if(is.symbol(jsub[[i]]) && !(jsub[[i]] %chin% sdvars)) jsub[[i]] = eval(jsub[[i]], parent.frame()) # tests 1187.3 & 1187.5 + } + } } if (verbose) catf("GForce optimized j to '%s'\n", deparse(jsub, width.cutoff=200L, nlines=1L)) } else if (verbose) catf("GForce is on, left j unchanged\n"); diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index a8cf358e93..25210a5299 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18145,7 +18145,8 @@ test(2241.13, DT, data.table(id=1:2, x=1:2)) test(2241.14, r, data.table(id=1:2, x=c(5L,2L))) # turn off GForce for shift where arguments are calls, #5547 -DT = data.table(x=letters[1:4], y=c(1L,1L,2L,2L)) +options(datatable.optimize=2L) +DT = data.table(x=c("a","b","c","d"), y=c(1L,1L,2L,2L)) yn = c(0,1) t = "lead" f = shift From 6cdfc812ddc295812c6f9bf98f2f3c9ec9a0c749 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Mon, 25 Dec 2023 12:14:30 +0100 Subject: [PATCH 14/53] add comments --- R/data.table.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/data.table.R b/R/data.table.R index 5ac21afac2..d886f5e043 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1777,6 +1777,7 @@ replace_dot_alias = function(e) { if (length(jsub) == 2L && jsub[[1L]] %chin% c("head", "tail")) jsub[["n"]] = 6L jsub[[1L]] = as.name(paste0("g", jsub[[1L]])) if (length(jsub)>=3L) { + # gforce needs to evaluate arguments before calling C part TODO: move the evaluation into gforce_ok for (i in 3:length(jsub)) { if(is.symbol(jsub[[i]]) && !(jsub[[i]] %chin% sdvars)) jsub[[i]] = eval(jsub[[i]], parent.frame()) # tests 1187.3 & 1187.5 } From 3c361c44210ea088d1e18f5c08793c9224a287f8 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Mon, 25 Dec 2023 12:15:47 +0100 Subject: [PATCH 15/53] update test info --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 25210a5299..246ece4102 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18144,7 +18144,7 @@ r = copy(DT)[1L, x:= 5L] test(2241.13, DT, data.table(id=1:2, x=1:2)) test(2241.14, r, data.table(id=1:2, x=c(5L,2L))) -# turn off GForce for shift where arguments are calls, #5547 +# turn off GForce for shift where arguments are calls but still allow variables, #5547 options(datatable.optimize=2L) DT = data.table(x=c("a","b","c","d"), y=c(1L,1L,2L,2L)) yn = c(0,1) From 63f10fa5721024698bf8235c24c7f375b637ad72 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Mon, 25 Dec 2023 12:22:03 +0100 Subject: [PATCH 16/53] add dropped DT --- inst/tests/tests.Rraw | 1 + 1 file changed, 1 insertion(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 246ece4102..b1d9a26cc3 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17590,6 +17590,7 @@ for (col in c("a","b","c")) { # DT() functional form, #4872 #5106 #5107 #5129 if (base::getRversion() >= "4.1.0") { + DT = DTfun # we have to EVAL "|>" here too otherwise this tests.Rraw file won't parse in R<4.1.0 droprn = function(df) { rownames(df)=NULL; df } # TODO: could retain rownames where droprn is currently used below test(2212.011, EVAL("mtcars |> DT(mpg>20, .(mean_hp=round(mean(hp),2)), by=cyl)"), From fc8f40bd39d8165f905fdd7b7b33874ba2fbd631 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Tue, 26 Dec 2023 17:04:25 +0100 Subject: [PATCH 17/53] add raw tests --- R/data.table.R | 7 ++++++- inst/tests/tests.Rraw | 29 ++++++++++++++++++++++++----- 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index d886f5e043..184d080d13 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1770,7 +1770,12 @@ replace_dot_alias = function(e) { for (ii in seq_along(jsub)[-1L]) { if (dotN(jsub[[ii]])) next; # For #334 jsub[[ii]][[1L]] = as.name(paste0("g", jsub[[ii]][[1L]])) - if (length(jsub[[ii]])>=3L && is.symbol(jsub[[ii]][[3L]]) && !(jsub[[ii]][[3L]] %chin% sdvars)) jsub[[ii]][[3L]] = eval(jsub[[ii]][[3L]], parent.frame()) # tests 1187.2 & 1187.4 + if (length(jsub[[ii]])>=3L) { + # gforce needs to evaluate arguments before calling C part TODO: move the evaluation into gforce_ok + for (i in 3:length(jsub[[ii]])) { + if(is.symbol(jsub[[ii]][[i]]) && !(jsub[[ii]][[i]] %chin% sdvars)) jsub[[ii]][[i]] = eval(jsub[[ii]][[i]], parent.frame()) # tests 1187.2 & 1187.4 + } + } } else { # adding argument to ghead/gtail if none is supplied to g-optimized head/tail diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index b1d9a26cc3..952bdff1d5 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -6730,6 +6730,18 @@ test(1463.69, shift(x, -6, type="cyclic"), shift(x, -1, type="cyclic")) # test warning test(1463.70, shift(x, 1, fill=1, type="cyclic"), c(5L, 1L:4L), warning="Provided argument fill=1 will be ignored since type='shift'.") +# test raw #5547 +x=as.raw(1:5) +test(1463.71, shift(x,1L), as.raw(c(0L, 1:4))) +test(1463.72, shift(x,1:2), list(as.raw(c(0L, 1:4)), as.raw(c(0L, 0L, 1:3)))) +test(1463.73, shift(x,1L, 0L), as.raw(c(0L, 1:4))) +test(1463.74, shift(x,1L, type="lead"), as.raw(c(2:5, 0L))) +test(1463.75, shift(x,1:2, type="lead"), list(as.raw(c(2:5, 0L)), as.raw(c(3:5, 0L, 0L)))) +test(1463.76, shift(x,1L, 0L,type="lead"), as.raw(c(2:5, 0L))) +test(1463.77, shift(x,1L, type="cyclic"), as.raw(c(5, 1:4))) +test(1463.78, shift(x,1:2, type="cyclic"), list(as.raw(c(5, 1:4)), as.raw(c(4:5, 1:3)))) +test(1463.79, shift(x,-1L, type="cyclic"), as.raw(c(2:5, 1))) +test(1463.80, shift(x,-(1:2),type="cyclic"), list(as.raw(c(2:5, 1)), as.raw(c(3:5,1:2)))) # FR #686 DT = data.table(a=rep(c("A", "B", "C", "A", "B"), c(2,2,3,1,2)), foo=1:10) @@ -18151,8 +18163,15 @@ DT = data.table(x=c("a","b","c","d"), y=c(1L,1L,2L,2L)) yn = c(0,1) t = "lead" f = shift -test(2242.1, DT[, shift(x, yn, type=t), by=y, verbose=TRUE], data.table(y=c(1L,1L,2L,2L), V1=c("a","b","c","d"), V2=c("b",NA,"d",NA)), output="GForce optimized j to") -test(2242.2, DT[, shift(x, abs(yn), type=t), by=y, verbose=TRUE], data.table(y=c(1L,1L,2L,2L), V1=c("a","b","c","d"), V2=c("b",NA,"d",NA)), output="GForce FALSE") -test(2242.3, DT[, shift(x, yn, type=as.character(t)), by=y, verbose=TRUE], data.table(y=c(1L,1L,2L,2L), V1=c("a","b","c","d"), V2=c("b",NA,"d",NA)), output="GForce FALSE") -test(2242.4, DT[, shift(x, yn, type=t, fill=1), by=y, verbose=TRUE], data.table(y=c(1L,1L,2L,2L), V1=c("a","b","c","d"), V2=c("b","1","d","1")), output="GForce optimized j to") -test(2242.5, DT[, shift(x, yn, type=t, fill=abs(1)), by=y, verbose=TRUE], data.table(y=c(1L,1L,2L,2L), V1=c("a","b","c","d"), V2=c("b","1","d","1")), output="GForce FALSE") +o1 = DT[, f(x, c(0,1), type="lead"), by=y] +o2 = DT[, f(x, c(0,1), fill=1, type="lead"), by=y] +test(2242.01, DT[, shift(x, yn, type=t), by=y, verbose=TRUE], o1, output="GForce optimized j to") +test(2242.02, DT[, shift(x, abs(yn), type=t), by=y, verbose=TRUE], o1, output="GForce FALSE") +test(2242.03, DT[, shift(x, yn, type=as.character(t)), by=y, verbose=TRUE], o1, output="GForce FALSE") +test(2242.04, DT[, shift(x, yn, type=t, fill=1), by=y, verbose=TRUE], o2, output="GForce optimized j to") +test(2242.05, DT[, shift(x, yn, type=t, fill=abs(1)), by=y, verbose=TRUE], o2, output="GForce FALSE") +# test(2242.11, DT[, list(shift(x, yn, type=t)), by=y, verbose=TRUE], data.table(y=c(1L,1L,2L,2L), V1=c("a","b","c","d"), V2=c("b",NA,"d",NA)), output="GForce optimized j to") +# test(2242.12, DT[, .(shift(x, abs(yn), type=t)), by=y, verbose=TRUE], data.table(y=c(1L,1L,2L,2L), V1=c("a","b","c","d"), V2=c("b",NA,"d",NA)), output="GForce FALSE") +# test(2242.13, DT[, .(shift(x, yn, type=as.character(t))), by=y, verbose=TRUE], data.table(y=c(1L,1L,2L,2L), V1=c("a","b","c","d"), V2=c("b",NA,"d",NA)), output="GForce FALSE") +# test(2242.14, DT[, .(shift(x, yn, type=t, fill=1)), by=y, verbose=TRUE], data.table(y=c(1L,1L,2L,2L), V1=c("a","b","c","d"), V2=c("b","1","d","1")), output="GForce optimized j to") +# test(2242.15, DT[, .(shift(x, yn, type=t, fill=abs(1))), by=y, verbose=TRUE], data.table(y=c(1L,1L,2L,2L), V1=c("a","b","c","d"), V2=c("b","1","d","1")), output="GForce FALSE") From 9cafd8ebb8d47b2423a16892426d653385ca6827 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 28 Dec 2023 01:01:22 +0100 Subject: [PATCH 18/53] update tests --- R/data.table.R | 9 ++++++--- inst/tests/tests.Rraw | 23 +++++++++-------------- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 184d080d13..00e9c39484 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1748,9 +1748,10 @@ replace_dot_alias = function(e) { # ^^ base::startWith errors on NULL unfortunately if (length(q)>=2L && q[[1L]] == "shift") { q_named = match.call(shift, q) - if (!is.call(q_named[["n"]]) && - !is.call(q_named[["fill"]]) && - !is.call(q_named[["type"]]) && + noCall_or_noVars = function(expr) !is.call(expr) || length(all.vars(expr, max.names=1L))==0 + if (noCall_or_noVars(q_named[["n"]]) && + noCall_or_noVars(q_named[["fill"]]) && + noCall_or_noVars(q_named[["type"]]) && is.null(q_named[["give.names"]])) return(TRUE) } @@ -1895,6 +1896,8 @@ replace_dot_alias = function(e) { g = lapply(g, rep.int, times=grplens) } else if (.is_nrows(jsub)) { g = lapply(g, rep.int, times=len__) + # unpack list of lists for nrows functions + # if (all(vapply_1b(ans, is.list))) ans = lapply(ans, transpose) } ans = c(g, ans) } else { diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 952bdff1d5..46b2951d05 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18159,19 +18159,14 @@ test(2241.14, r, data.table(id=1:2, x=c(5L,2L))) # turn off GForce for shift where arguments are calls but still allow variables, #5547 options(datatable.optimize=2L) -DT = data.table(x=c("a","b","c","d"), y=c(1L,1L,2L,2L)) -yn = c(0,1) +dt = data.table(x=c("a","b","c","d"), y=c(1L,1L,2L,2L)) +i = c(0,1) t = "lead" f = shift -o1 = DT[, f(x, c(0,1), type="lead"), by=y] -o2 = DT[, f(x, c(0,1), fill=1, type="lead"), by=y] -test(2242.01, DT[, shift(x, yn, type=t), by=y, verbose=TRUE], o1, output="GForce optimized j to") -test(2242.02, DT[, shift(x, abs(yn), type=t), by=y, verbose=TRUE], o1, output="GForce FALSE") -test(2242.03, DT[, shift(x, yn, type=as.character(t)), by=y, verbose=TRUE], o1, output="GForce FALSE") -test(2242.04, DT[, shift(x, yn, type=t, fill=1), by=y, verbose=TRUE], o2, output="GForce optimized j to") -test(2242.05, DT[, shift(x, yn, type=t, fill=abs(1)), by=y, verbose=TRUE], o2, output="GForce FALSE") -# test(2242.11, DT[, list(shift(x, yn, type=t)), by=y, verbose=TRUE], data.table(y=c(1L,1L,2L,2L), V1=c("a","b","c","d"), V2=c("b",NA,"d",NA)), output="GForce optimized j to") -# test(2242.12, DT[, .(shift(x, abs(yn), type=t)), by=y, verbose=TRUE], data.table(y=c(1L,1L,2L,2L), V1=c("a","b","c","d"), V2=c("b",NA,"d",NA)), output="GForce FALSE") -# test(2242.13, DT[, .(shift(x, yn, type=as.character(t))), by=y, verbose=TRUE], data.table(y=c(1L,1L,2L,2L), V1=c("a","b","c","d"), V2=c("b",NA,"d",NA)), output="GForce FALSE") -# test(2242.14, DT[, .(shift(x, yn, type=t, fill=1)), by=y, verbose=TRUE], data.table(y=c(1L,1L,2L,2L), V1=c("a","b","c","d"), V2=c("b","1","d","1")), output="GForce optimized j to") -# test(2242.15, DT[, .(shift(x, yn, type=t, fill=abs(1))), by=y, verbose=TRUE], data.table(y=c(1L,1L,2L,2L), V1=c("a","b","c","d"), V2=c("b","1","d","1")), output="GForce FALSE") +o1 = dt[, f(x, c(0,1), type="lead"), by=y] +o2 = dt[, f(x, c(0,1), fill=1, type="lead"), by=y] +test(2242.01, dt[, shift(x, i, type=t), by=y, verbose=TRUE], o1, output="GForce optimized j to") +test(2242.02, dt[, shift(x, abs(i), type=t), by=y, verbose=TRUE], o1, output="GForce FALSE") +test(2242.03, dt[, shift(x, i, type=as.character(t)), by=y, verbose=TRUE], o1, output="GForce FALSE") +test(2242.04, dt[, shift(x, i, type=t, fill=1), by=y, verbose=TRUE], o2, output="GForce optimized j to") +test(2242.05, dt[, shift(x, i, type=t, fill=abs(1)), by=y, verbose=TRUE], o2, output="GForce FALSE") From e25ecf6bb4fc9a32647b93544af9c93f850953e9 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 28 Dec 2023 01:46:03 +0100 Subject: [PATCH 19/53] add more tests for nested jsub --- R/data.table.R | 5 ++++- inst/tests/tests.Rraw | 16 +++++++++------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 00e9c39484..f055d35494 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1897,7 +1897,10 @@ replace_dot_alias = function(e) { } else if (.is_nrows(jsub)) { g = lapply(g, rep.int, times=len__) # unpack list of lists for nrows functions - # if (all(vapply_1b(ans, is.list))) ans = lapply(ans, transpose) + zip = function(ll) do.call(mapply, c(list(c), ll, SIMPLIFY=FALSE, USE.NAMES=FALSE)) + if (all(vapply_1b(ans, is.list))) { + ans = lapply(ans, zip) + } } ans = c(g, ans) } else { diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 46b2951d05..6befd436fd 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18161,12 +18161,14 @@ test(2241.14, r, data.table(id=1:2, x=c(5L,2L))) options(datatable.optimize=2L) dt = data.table(x=c("a","b","c","d"), y=c(1L,1L,2L,2L)) i = c(0,1) +j = 1L t = "lead" f = shift -o1 = dt[, f(x, c(0,1), type="lead"), by=y] -o2 = dt[, f(x, c(0,1), fill=1, type="lead"), by=y] -test(2242.01, dt[, shift(x, i, type=t), by=y, verbose=TRUE], o1, output="GForce optimized j to") -test(2242.02, dt[, shift(x, abs(i), type=t), by=y, verbose=TRUE], o1, output="GForce FALSE") -test(2242.03, dt[, shift(x, i, type=as.character(t)), by=y, verbose=TRUE], o1, output="GForce FALSE") -test(2242.04, dt[, shift(x, i, type=t, fill=1), by=y, verbose=TRUE], o2, output="GForce optimized j to") -test(2242.05, dt[, shift(x, i, type=t, fill=abs(1)), by=y, verbose=TRUE], o2, output="GForce FALSE") +test(2242.01, dt[, shift(x, i, type=t), by=y, verbose=TRUE], dt[, f(x, c(0,1), type="lead"), by=y], output="GForce optimized j to") +test(2242.02, dt[, shift(x, abs(c(0,1)), type=t), by=y, verbose=TRUE], dt[, f(x, c(0,1), type="lead"), by=y], output="GForce optimized j to") +test(2242.03, dt[, shift(x, abs(i), type=t), by=y, verbose=TRUE], dt[, f(x, c(0,1), type="lead"), by=y], output="GForce FALSE") +test(2242.04, dt[, shift(x, i, type=as.character(t)), by=y, verbose=TRUE], dt[, f(x, c(0,1), type="lead"), by=y], output="GForce FALSE") +test(2242.05, dt[, shift(x, i, type=t, fill=1), by=y, verbose=TRUE], dt[, f(x, c(0,1), type="lead", fill=1), by=y], output="GForce optimized j to") +test(2242.06, dt[, shift(x, i, type=t, fill=abs(1)), by=y, verbose=TRUE], dt[, f(x, c(0,1), type="lead", fill=1), by=y], output="GForce optimized j to") +test(2242.07, dt[, shift(x, i, type=t, fill=abs(j)), by=y, verbose=TRUE], dt[, f(x, c(0,1), type="lead", fill=1), by=y], output="GForce FALSE") +test(2242.08, dt[, .(shift(x, i, type=t)), by=y, verbose=TRUE], dt[, .(f(x, c(0,1), type="lead")), by=y], output="GForce optimized j to") From 95ca220bd1a592c3a944192e3598a4387a77d6f1 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 28 Dec 2023 02:36:29 +0100 Subject: [PATCH 20/53] add more tests --- R/data.table.R | 14 +++++++------- inst/tests/tests.Rraw | 16 ++++++++++++++++ 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index f055d35494..dd4ec1410a 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1739,26 +1739,26 @@ replace_dot_alias = function(e) { } else { # Apply GForce .gforce_ok = function(q) { + noCall_noVars = function(expr) !is.call(expr) || length(all.vars(expr, max.names=1L))==0 if (dotN(q)) return(TRUE) # For #334 # run GForce for simple f(x) calls and f(x, na.rm = TRUE)-like calls where x is a column of .SD # is.symbol() is for #1369, #1974 and #2949 if (!(is.call(q) && is.symbol(q[[1L]]) && is.symbol(q[[2L]]) && (q1 <- q[[1L]]) %chin% gfuns)) return(FALSE) if (!(q2 <- q[[2L]]) %chin% names(SDenv$.SDall) && q2 != ".I") return(FALSE) # 875 - if ((length(q)==2L || (!is.null(names(q)) && startsWith(names(q)[3L], "na")))) return(TRUE) + if (length(q)==2L || (!is.null(names(q)) && startsWith(names(q)[3L], "na") && noCall_noVars(q[[3L]]))) return(TRUE) # ^^ base::startWith errors on NULL unfortunately if (length(q)>=2L && q[[1L]] == "shift") { q_named = match.call(shift, q) - noCall_or_noVars = function(expr) !is.call(expr) || length(all.vars(expr, max.names=1L))==0 - if (noCall_or_noVars(q_named[["n"]]) && - noCall_or_noVars(q_named[["fill"]]) && - noCall_or_noVars(q_named[["type"]]) && + if (noCall_noVars(q_named[["n"]]) && + noCall_noVars(q_named[["fill"]]) && + noCall_noVars(q_named[["type"]]) && is.null(q_named[["give.names"]])) return(TRUE) } if (length(q)>=3L && q[[1L]] == "weighted.mean") return(TRUE) #3977 # otherwise there must be three arguments - length(q)==3L && length(q3 <- q[[3L]])==1L && is.numeric(q3) && - ( (q1 %chin% c("head", "tail")) || ((q1 == "[" || (q1 == "[[" && eval(call('is.atomic', q[[2L]]), envir=x))) && q3>0L) ) + length(q)==3L && length(q3 <- q[[3L]])==1L && noCall_noVars(q3) && + ( (q1 %chin% c("head", "tail")) || (q1 == "[" || (q1 == "[[" && eval(call('is.atomic', q[[2L]]), envir=x))) ) } if (jsub[[1L]]=="list") { GForce = TRUE diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 6befd436fd..3f9c5c0fa7 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18172,3 +18172,19 @@ test(2242.05, dt[, shift(x, i, type=t, fill=1), by=y, verbose=TRUE], dt[, test(2242.06, dt[, shift(x, i, type=t, fill=abs(1)), by=y, verbose=TRUE], dt[, f(x, c(0,1), type="lead", fill=1), by=y], output="GForce optimized j to") test(2242.07, dt[, shift(x, i, type=t, fill=abs(j)), by=y, verbose=TRUE], dt[, f(x, c(0,1), type="lead", fill=1), by=y], output="GForce FALSE") test(2242.08, dt[, .(shift(x, i, type=t)), by=y, verbose=TRUE], dt[, .(f(x, c(0,1), type="lead")), by=y], output="GForce optimized j to") +# allow variable as n of head(x, n), "["(x, n) #5547 +dt = data.table(id = c(1L,1L,2L), v = 1:3) +j=1L +test(2242.11, dt[, head(v, j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(1L,3L)), output="GForce optimized j to") +test(2242.12, dt[, tail(v, j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(2L,3L)), output="GForce optimized j to") +test(2242.13, dt[, v[j], id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(1L,3L)), output="GForce optimized j to") +# allow variable as na.rm of sum, mean, median, prod, min, max, var, sd #5547 +dt = data.table(id = c(1L,1L,2L), v = 1:3) +j=FALSE +test(2242.21, dt[, sum(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(3L,3L)), output="GForce optimized j to") +test(2242.22, dt[, mean(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(1.5,3)), output="GForce optimized j to") +test(2242.23, dt[, prod(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(2,3)), output="GForce optimized j to") +test(2242.24, dt[, min(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(1L,3L)), output="GForce optimized j to") +test(2242.25, dt[, max(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(2L,3L)), output="GForce optimized j to") +test(2242.26, dt[, var(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(0.5,NA)), output="GForce optimized j to") +test(2242.27, dt[, sd(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(sqrt(0.5),NA)), output="GForce optimized j to") From e6ccf13668e4e5f8c2d94bb17b178e2ed9d45291 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 28 Dec 2023 02:58:05 +0100 Subject: [PATCH 21/53] make qforce_ok more robust --- R/data.table.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index dd4ec1410a..f6b18ad945 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1739,7 +1739,7 @@ replace_dot_alias = function(e) { } else { # Apply GForce .gforce_ok = function(q) { - noCall_noVars = function(expr) !is.call(expr) || length(all.vars(expr, max.names=1L))==0 + noCall_noVars = function(expr) (!is.call(expr) || length(all.vars(expr, max.names=1L))==0) && !dotN(expr) if (dotN(q)) return(TRUE) # For #334 # run GForce for simple f(x) calls and f(x, na.rm = TRUE)-like calls where x is a column of .SD # is.symbol() is for #1369, #1974 and #2949 @@ -1758,7 +1758,7 @@ replace_dot_alias = function(e) { if (length(q)>=3L && q[[1L]] == "weighted.mean") return(TRUE) #3977 # otherwise there must be three arguments length(q)==3L && length(q3 <- q[[3L]])==1L && noCall_noVars(q3) && - ( (q1 %chin% c("head", "tail")) || (q1 == "[" || (q1 == "[[" && eval(call('is.atomic', q[[2L]]), envir=x))) ) + ( (q1 %chin% c("head", "tail")) || ((q1 == "[" || (q1 == "[[" && eval(call('is.atomic', q[[2L]]), envir=x))) && eval(q3, parent.frame())>0L) ) } if (jsub[[1L]]=="list") { GForce = TRUE From af93263ee9e18681fed85db0a9742786c9d02ddc Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 28 Dec 2023 13:41:40 +0100 Subject: [PATCH 22/53] update comments --- inst/tests/tests.Rraw | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 3f9c5c0fa7..a74eaeb7df 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18157,7 +18157,7 @@ r = copy(DT)[1L, x:= 5L] test(2241.13, DT, data.table(id=1:2, x=1:2)) test(2241.14, r, data.table(id=1:2, x=c(5L,2L))) -# turn off GForce for shift where arguments are calls but still allow variables, #5547 +# turn off GForce where arguments are calls but still allow variables, #5547 options(datatable.optimize=2L) dt = data.table(x=c("a","b","c","d"), y=c(1L,1L,2L,2L)) i = c(0,1) @@ -18172,13 +18172,13 @@ test(2242.05, dt[, shift(x, i, type=t, fill=1), by=y, verbose=TRUE], dt[, test(2242.06, dt[, shift(x, i, type=t, fill=abs(1)), by=y, verbose=TRUE], dt[, f(x, c(0,1), type="lead", fill=1), by=y], output="GForce optimized j to") test(2242.07, dt[, shift(x, i, type=t, fill=abs(j)), by=y, verbose=TRUE], dt[, f(x, c(0,1), type="lead", fill=1), by=y], output="GForce FALSE") test(2242.08, dt[, .(shift(x, i, type=t)), by=y, verbose=TRUE], dt[, .(f(x, c(0,1), type="lead")), by=y], output="GForce optimized j to") -# allow variable as n of head(x, n), "["(x, n) #5547 +# GForce only var or call without vars as n of head/tail/"["(x, n) dt = data.table(id = c(1L,1L,2L), v = 1:3) j=1L test(2242.11, dt[, head(v, j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(1L,3L)), output="GForce optimized j to") test(2242.12, dt[, tail(v, j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(2L,3L)), output="GForce optimized j to") test(2242.13, dt[, v[j], id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(1L,3L)), output="GForce optimized j to") -# allow variable as na.rm of sum, mean, median, prod, min, max, var, sd #5547 +# GForce only var or call without vars as na.rm of sum, mean, median, prod, min, max, var dt = data.table(id = c(1L,1L,2L), v = 1:3) j=FALSE test(2242.21, dt[, sum(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(3L,3L)), output="GForce optimized j to") From 46d4824bdab0e5ad7b6fc840fef8b088f9458527 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 28 Dec 2023 13:55:28 +0100 Subject: [PATCH 23/53] simplify logical --- src/shift.c | 27 ++------------------------- 1 file changed, 2 insertions(+), 25 deletions(-) diff --git a/src/shift.c b/src/shift.c index 06b57ae352..e3e4a2b82b 100644 --- a/src/shift.c +++ b/src/shift.c @@ -42,11 +42,11 @@ SEXP shift(SEXP obj, SEXP k, SEXP fill, SEXP type) R_xlen_t xrows = xlength(elem); SEXP thisfill = PROTECT(coerceAs(fill, elem, ScalarLogical(0))); nprotect++; // #4865 use coerceAs for type coercion switch (TYPEOF(elem)) { - case INTSXP : { + case INTSXP: case LGLSXP: { const int ifill = INTEGER(thisfill)[0]; for (int j=0; j= 0) || (stype == LEAD && kd[j] < 0)) { - if (tailk > 0) memmove(ltmp+thisk, lelem, tailk*size); - if (cycle) { - if (thisk > 0) memmove(ltmp, lelem+tailk, thisk*size); - } else for (int m=0; m 0) memmove(ltmp, lelem+thisk, tailk*size); - if (cycle) { - if (thisk > 0) memmove(ltmp+tailk, lelem, thisk*size); - } else for (int m=tailk; m Date: Thu, 28 Dec 2023 14:58:23 +0100 Subject: [PATCH 24/53] remove comment since n is used --- R/data.table.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index f6b18ad945..6af7b3ac3b 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -3016,8 +3016,8 @@ rleidv = function(x, cols=seq_along(x), prefix=NULL) { gfuns = c("[", "[[", "head", "tail", "first", "last", "sum", "mean", "prod", "median", "min", "max", "var", "sd", ".N", "shift", "weighted.mean") # added .N for #334 `g[` = `g[[` = function(x, n) .Call(Cgnthvalue, x, as.integer(n)) # n is of length=1 here. -ghead = function(x, n) .Call(Cghead, x, as.integer(n)) # n is not used at the moment -gtail = function(x, n) .Call(Cgtail, x, as.integer(n)) # n is not used at the moment +ghead = function(x, n) .Call(Cghead, x, as.integer(n)) +gtail = function(x, n) .Call(Cgtail, x, as.integer(n)) gfirst = function(x) .Call(Cgfirst, x) glast = function(x) .Call(Cglast, x) gsum = function(x, na.rm=FALSE) .Call(Cgsum, x, na.rm) From 4d3999158d77dbcb02895bd0f518c86c3d1f466a Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Tue, 2 Jan 2024 17:08:59 +0100 Subject: [PATCH 25/53] update eval environment --- R/data.table.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/data.table.R b/R/data.table.R index 486919714d..9577267767 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1758,7 +1758,8 @@ replace_dot_alias = function(e) { if (length(q)>=3L && q[[1L]] == "weighted.mean") return(TRUE) #3977 # otherwise there must be three arguments length(q)==3L && length(q3 <- q[[3L]])==1L && noCall_noVars(q3) && - ( (q1 %chin% c("head", "tail")) || ((q1 == "[" || (q1 == "[[" && eval(call('is.atomic', q[[2L]]), envir=x))) && eval(q3, parent.frame())>0L) ) + ( (q1 %chin% c("head", "tail")) || ((q1 == "[" || (q1 == "[[" && eval(call('is.atomic', q[[2L]]), envir=x))) && eval(q3, parent.frame(2L))>0L) ) + # ^ eval outside of [ } if (jsub[[1L]]=="list") { GForce = TRUE From 293b5fc0397e87e96e08c3f907ca321e1e435a1f Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 4 Jan 2024 11:33:27 +0100 Subject: [PATCH 26/53] add Jans testcase --- inst/tests/tests.Rraw | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 4231103f20..bfd03be0cb 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18194,18 +18194,26 @@ test(2242.06, dt[, shift(x, i, type=t, fill=abs(1)), by=y, verbose=TRUE], dt[, test(2242.07, dt[, shift(x, i, type=t, fill=abs(j)), by=y, verbose=TRUE], dt[, f(x, c(0,1), type="lead", fill=1), by=y], output="GForce FALSE") test(2242.08, dt[, .(shift(x, i, type=t)), by=y, verbose=TRUE], dt[, .(f(x, c(0,1), type="lead")), by=y], output="GForce optimized j to") # GForce only var or call without vars as n of head/tail/"["(x, n) -dt = data.table(id = c(1L,1L,2L), v = 1:3) -j=1L +dt = data.table(id=c(1L,1L,2L), v=1:3) +j = 1L test(2242.11, dt[, head(v, j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(1L,3L)), output="GForce optimized j to") test(2242.12, dt[, tail(v, j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(2L,3L)), output="GForce optimized j to") test(2242.13, dt[, v[j], id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(1L,3L)), output="GForce optimized j to") # GForce only var or call without vars as na.rm of sum, mean, median, prod, min, max, var -dt = data.table(id = c(1L,1L,2L), v = 1:3) -j=FALSE -test(2242.21, dt[, sum(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(3L,3L)), output="GForce optimized j to") +j = FALSE +test(2242.21, dt[, sum(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(3L,3L)), output="GForce optimized j to") test(2242.22, dt[, mean(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(1.5,3)), output="GForce optimized j to") test(2242.23, dt[, prod(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(2,3)), output="GForce optimized j to") -test(2242.24, dt[, min(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(1L,3L)), output="GForce optimized j to") -test(2242.25, dt[, max(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(2L,3L)), output="GForce optimized j to") -test(2242.26, dt[, var(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(0.5,NA)), output="GForce optimized j to") -test(2242.27, dt[, sd(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(sqrt(0.5),NA)), output="GForce optimized j to") +test(2242.24, dt[, min(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(1L,3L)), output="GForce optimized j to") +test(2242.25, dt[, max(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(2L,3L)), output="GForce optimized j to") +test(2242.26, dt[, var(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(0.5,NA)), output="GForce optimized j to") +test(2242.27, dt[, sd(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(sqrt(0.5),NA)), output="GForce optimized j to") +dt = data.table(g=1:2, y=1:4) +j = TRUE +test(2242.31, dt[, sum(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(4L,6L)), output="GForce FALSE") +test(2242.32, dt[, mean(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(2,3)), output="GForce FALSE") +test(2242.33, dt[, prod(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(3,8)), output="GForce FALSE") +test(2242.34, dt[, min(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(1L,2L)), output="GForce FALSE") +test(2242.35, dt[, max(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(3L,4L)), output="GForce FALSE") +test(2242.36, dt[, var(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(2,2)), output="GForce FALSE") +test(2242.37, dt[, sd(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(sqrt(c(2,2)))), output="GForce FALSE") From 5f5382e52d67870d8c4754f60a7227c496f69c4e Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 4 Jan 2024 12:57:34 +0100 Subject: [PATCH 27/53] add helper functions --- R/data.table.R | 50 ++++++++++++++++++++++++++++++------------- inst/tests/tests.Rraw | 36 +++++++++++++++++++------------ 2 files changed, 57 insertions(+), 29 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 9577267767..8b90144f98 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1738,8 +1738,32 @@ replace_dot_alias = function(e) { GForce = FALSE } else { # Apply GForce + noCall_noVars = function(expr) (!is.call(expr) || length(all.vars(expr, max.names=1L))==0) && !dotN(expr) + .gshift_ok = function(q) { + q_named = match.call(shift, q) + noCall_noVars(q_named[["n"]]) && + noCall_noVars(q_named[["fill"]]) && + noCall_noVars(q_named[["type"]]) && + is.null(q_named[["give.names"]]) + } + .ghead_ok = function(q) { + length(q)==3L && + length(q[[3L]])==1L && + noCall_noVars(q[[3L]]) + } + ".g[_ok" = function(q, x) { + length(q)==3L && + length(q[[3L]])==1L && + noCall_noVars(q[[3L]]) && + (q[[1L]] == "[" || (q[[1L]] == "[[" && eval(call('is.atomic', q[[2L]]), envir=x))) && + eval(q[[3L]], parent.frame(3L))>0L + } + .gweighted.mean_ok = function(q, x) { #3977 + q_named = match.call(function(x, w, ..., na.rm=FALSE) {}, q) + noCall_noVars(q_named[["na.rm"]]) && + (is.null(q_named[["w"]]) || eval(call('is.numeric', q_named[["w"]]), envir=x)) + } .gforce_ok = function(q) { - noCall_noVars = function(expr) (!is.call(expr) || length(all.vars(expr, max.names=1L))==0) && !dotN(expr) if (dotN(q)) return(TRUE) # For #334 # run GForce for simple f(x) calls and f(x, na.rm = TRUE)-like calls where x is a column of .SD # is.symbol() is for #1369, #1974 and #2949 @@ -1747,19 +1771,15 @@ replace_dot_alias = function(e) { if (!(q2 <- q[[2L]]) %chin% names(SDenv$.SDall) && q2 != ".I") return(FALSE) # 875 if (length(q)==2L || (!is.null(names(q)) && startsWith(names(q)[3L], "na") && noCall_noVars(q[[3L]]))) return(TRUE) # ^^ base::startWith errors on NULL unfortunately - if (length(q)>=2L && q[[1L]] == "shift") { - q_named = match.call(shift, q) - if (noCall_noVars(q_named[["n"]]) && - noCall_noVars(q_named[["fill"]]) && - noCall_noVars(q_named[["type"]]) && - is.null(q_named[["give.names"]])) - return(TRUE) - } - if (length(q)>=3L && q[[1L]] == "weighted.mean") return(TRUE) #3977 - # otherwise there must be three arguments - length(q)==3L && length(q3 <- q[[3L]])==1L && noCall_noVars(q3) && - ( (q1 %chin% c("head", "tail")) || ((q1 == "[" || (q1 == "[[" && eval(call('is.atomic', q[[2L]]), envir=x))) && eval(q3, parent.frame(2L))>0L) ) - # ^ eval outside of [ + switch(as.character(q[[1L]]), + "shift" = .gshift_ok(q), + "weighted.mean" = .gweighted.mean_ok(q, x), + "head" = .ghead_ok(q), + "tail" = .ghead_ok(q), + "[" = `.g[_ok`(q, x), + "[[" = `.g[_ok`(q, x), + FALSE + ) } if (jsub[[1L]]=="list") { GForce = TRUE @@ -3026,7 +3046,7 @@ gfirst = function(x) .Call(Cgfirst, x) glast = function(x) .Call(Cglast, x) gsum = function(x, na.rm=FALSE) .Call(Cgsum, x, na.rm) gmean = function(x, na.rm=FALSE) .Call(Cgmean, x, na.rm) -gweighted.mean = function(x, w, na.rm=FALSE) { +gweighted.mean = function(x, w, ..., na.rm=FALSE) { if (missing(w)) gmean(x, na.rm) else { if (na.rm) { # take those indices out of the equation by setting them to 0 diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index bfd03be0cb..6783f3a02e 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17944,6 +17944,12 @@ test(2231.50, DT[, weighted.mean(x, w, na.rm=TRUE), g, verbose=TRUE], data.table DT = data.table(x=c(1L,NA,NaN,3L,4L,5L,5L,6L), w=c(1L,NaN,NA,1L,2L,2L,2L,2L), g=rep(1L:2L, each=4L)) test(2231.51, DT[, weighted.mean(x, w, na.rm=FALSE), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(NA, 5)), output="GForce optimized j to") test(2231.52, DT[, weighted.mean(x, w, na.rm=TRUE), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(2, 5)), output="GForce optimized j to") +# let wrongly named arguments get lost in ellipsis #5543 +DT = data.table(x=c(3.7,3.3,3.5,2.8), w=c(5,5,4,1), g=1L) +test(2231.61, DT[, weighted.mean(x, w), g, verbose=TRUE], data.table(g=1L, V1=3.45+1/300), output="GForce optimized j to") +test(2231.62, DT[, weighted.mean(x, weight=w), g, verbose=TRUE], data.table(g=1L, V1=3.325), output="GForce optimized j to") +test(2231.63, DT[, weighted.mean(x, w, na.rm=FALSE), g], DT[, stats::weighted.mean(x, w, na.rm=FALSE), g]) +test(2231.64, DT[, weighted.mean(x, weight=w, na.rm=TRUE)], DT[, stats::weighted.mean(x, weight=w, na.rm=TRUE)]) options(old) # cols argument for unique.data.table, #5243 @@ -18201,19 +18207,21 @@ test(2242.12, dt[, tail(v, j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(2 test(2242.13, dt[, v[j], id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(1L,3L)), output="GForce optimized j to") # GForce only var or call without vars as na.rm of sum, mean, median, prod, min, max, var j = FALSE -test(2242.21, dt[, sum(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(3L,3L)), output="GForce optimized j to") -test(2242.22, dt[, mean(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(1.5,3)), output="GForce optimized j to") -test(2242.23, dt[, prod(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(2,3)), output="GForce optimized j to") -test(2242.24, dt[, min(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(1L,3L)), output="GForce optimized j to") -test(2242.25, dt[, max(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(2L,3L)), output="GForce optimized j to") -test(2242.26, dt[, var(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(0.5,NA)), output="GForce optimized j to") -test(2242.27, dt[, sd(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(sqrt(0.5),NA)), output="GForce optimized j to") +test(2242.21, dt[, sum(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(3L,3L)), output="GForce optimized j to") +test(2242.22, dt[, mean(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(1.5,3)), output="GForce optimized j to") +test(2242.23, dt[, median(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(1.5,3)), output="GForce optimized j to") +test(2242.24, dt[, prod(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(2,3)), output="GForce optimized j to") +test(2242.25, dt[, min(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(1L,3L)), output="GForce optimized j to") +test(2242.26, dt[, max(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(2L,3L)), output="GForce optimized j to") +test(2242.27, dt[, var(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(0.5,NA)), output="GForce optimized j to") +test(2242.28, dt[, sd(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(sqrt(0.5),NA)), output="GForce optimized j to") dt = data.table(g=1:2, y=1:4) j = TRUE -test(2242.31, dt[, sum(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(4L,6L)), output="GForce FALSE") -test(2242.32, dt[, mean(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(2,3)), output="GForce FALSE") -test(2242.33, dt[, prod(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(3,8)), output="GForce FALSE") -test(2242.34, dt[, min(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(1L,2L)), output="GForce FALSE") -test(2242.35, dt[, max(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(3L,4L)), output="GForce FALSE") -test(2242.36, dt[, var(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(2,2)), output="GForce FALSE") -test(2242.37, dt[, sd(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(sqrt(c(2,2)))), output="GForce FALSE") +test(2242.31, dt[, sum(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(4L,6L)), output="GForce FALSE") +test(2242.32, dt[, mean(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(2,3)), output="GForce FALSE") +test(2242.33, dt[, median(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(2,3)), output="GForce FALSE") +test(2242.34, dt[, prod(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(3,8)), output="GForce FALSE") +test(2242.35, dt[, min(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(1L,2L)), output="GForce FALSE") +test(2242.36, dt[, max(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(3L,4L)), output="GForce FALSE") +test(2242.37, dt[, var(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(2,2)), output="GForce FALSE") +test(2242.38, dt[, sd(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(sqrt(c(2,2)))), output="GForce FALSE") From eb10410fa91320f27dccead5ba7ac4e4f26d9dd4 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 4 Jan 2024 13:08:02 +0100 Subject: [PATCH 28/53] remove unused assignments --- R/data.table.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index dfc64ff8ea..62b56e339d 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1768,7 +1768,7 @@ replace_dot_alias = function(e) { if (dotN(q)) return(TRUE) # For #334 # run GForce for simple f(x) calls and f(x, na.rm = TRUE)-like calls where x is a column of .SD # is.symbol() is for #1369, #1974 and #2949 - if (!(is.call(q) && is.symbol(q[[1L]]) && is.symbol(q[[2L]]) && (q1 <- q[[1L]]) %chin% gfuns)) return(FALSE) + if (!(is.call(q) && is.symbol(q[[1L]]) && is.symbol(q[[2L]]) && (q[[1L]]) %chin% gfuns)) return(FALSE) if (!(q2 <- q[[2L]]) %chin% names(SDenv$.SDall) && q2 != ".I") return(FALSE) # 875 if (length(q)==2L || (!is.null(names(q)) && startsWith(names(q)[3L], "na") && noCall_noVars(q[[3L]]))) return(TRUE) # ^^ base::startWith errors on NULL unfortunately @@ -1904,7 +1904,7 @@ replace_dot_alias = function(e) { if (!is.symbol(jsub)) { headTail_arg = function(q) { if (length(q)==3L && length(q3 <- q[[3L]])==1L && is.numeric(q3) && - (q1 <- q[[1L]]) %chin% c("ghead", "gtail") && q3!=1) q3 + (q[[1L]]) %chin% c("ghead", "gtail") && q3!=1) q3 else 0 } if (jsub[[1L]] == "list"){ From 1e55ac2b6c23251a45cba8d74297deff3916c377 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 4 Jan 2024 13:08:09 +0100 Subject: [PATCH 29/53] update test nums --- inst/tests/tests.Rraw | 54 +++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 4bb1d7e553..4bc7bf8c67 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18211,37 +18211,37 @@ i = c(0,1) j = 1L t = "lead" f = shift -test(2242.01, dt[, shift(x, i, type=t), by=y, verbose=TRUE], dt[, f(x, c(0,1), type="lead"), by=y], output="GForce optimized j to") -test(2242.02, dt[, shift(x, abs(c(0,1)), type=t), by=y, verbose=TRUE], dt[, f(x, c(0,1), type="lead"), by=y], output="GForce optimized j to") -test(2242.03, dt[, shift(x, abs(i), type=t), by=y, verbose=TRUE], dt[, f(x, c(0,1), type="lead"), by=y], output="GForce FALSE") -test(2242.04, dt[, shift(x, i, type=as.character(t)), by=y, verbose=TRUE], dt[, f(x, c(0,1), type="lead"), by=y], output="GForce FALSE") -test(2242.05, dt[, shift(x, i, type=t, fill=1), by=y, verbose=TRUE], dt[, f(x, c(0,1), type="lead", fill=1), by=y], output="GForce optimized j to") -test(2242.06, dt[, shift(x, i, type=t, fill=abs(1)), by=y, verbose=TRUE], dt[, f(x, c(0,1), type="lead", fill=1), by=y], output="GForce optimized j to") -test(2242.07, dt[, shift(x, i, type=t, fill=abs(j)), by=y, verbose=TRUE], dt[, f(x, c(0,1), type="lead", fill=1), by=y], output="GForce FALSE") -test(2242.08, dt[, .(shift(x, i, type=t)), by=y, verbose=TRUE], dt[, .(f(x, c(0,1), type="lead")), by=y], output="GForce optimized j to") +test(2243.01, dt[, shift(x, i, type=t), by=y, verbose=TRUE], dt[, f(x, c(0,1), type="lead"), by=y], output="GForce optimized j to") +test(2243.02, dt[, shift(x, abs(c(0,1)), type=t), by=y, verbose=TRUE], dt[, f(x, c(0,1), type="lead"), by=y], output="GForce optimized j to") +test(2243.03, dt[, shift(x, abs(i), type=t), by=y, verbose=TRUE], dt[, f(x, c(0,1), type="lead"), by=y], output="GForce FALSE") +test(2243.04, dt[, shift(x, i, type=as.character(t)), by=y, verbose=TRUE], dt[, f(x, c(0,1), type="lead"), by=y], output="GForce FALSE") +test(2243.05, dt[, shift(x, i, type=t, fill=1), by=y, verbose=TRUE], dt[, f(x, c(0,1), type="lead", fill=1), by=y], output="GForce optimized j to") +test(2243.06, dt[, shift(x, i, type=t, fill=abs(1)), by=y, verbose=TRUE], dt[, f(x, c(0,1), type="lead", fill=1), by=y], output="GForce optimized j to") +test(2243.07, dt[, shift(x, i, type=t, fill=abs(j)), by=y, verbose=TRUE], dt[, f(x, c(0,1), type="lead", fill=1), by=y], output="GForce FALSE") +test(2243.08, dt[, .(shift(x, i, type=t)), by=y, verbose=TRUE], dt[, .(f(x, c(0,1), type="lead")), by=y], output="GForce optimized j to") # GForce only var or call without vars as n of head/tail/"["(x, n) dt = data.table(id=c(1L,1L,2L), v=1:3) j = 1L -test(2242.11, dt[, head(v, j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(1L,3L)), output="GForce optimized j to") -test(2242.12, dt[, tail(v, j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(2L,3L)), output="GForce optimized j to") -test(2242.13, dt[, v[j], id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(1L,3L)), output="GForce optimized j to") +test(2243.11, dt[, head(v, j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(1L,3L)), output="GForce optimized j to") +test(2243.12, dt[, tail(v, j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(2L,3L)), output="GForce optimized j to") +test(2243.13, dt[, v[j], id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(1L,3L)), output="GForce optimized j to") # GForce only var or call without vars as na.rm of sum, mean, median, prod, min, max, var j = FALSE -test(2242.21, dt[, sum(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(3L,3L)), output="GForce optimized j to") -test(2242.22, dt[, mean(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(1.5,3)), output="GForce optimized j to") -test(2242.23, dt[, median(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(1.5,3)), output="GForce optimized j to") -test(2242.24, dt[, prod(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(2,3)), output="GForce optimized j to") -test(2242.25, dt[, min(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(1L,3L)), output="GForce optimized j to") -test(2242.26, dt[, max(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(2L,3L)), output="GForce optimized j to") -test(2242.27, dt[, var(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(0.5,NA)), output="GForce optimized j to") -test(2242.28, dt[, sd(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(sqrt(0.5),NA)), output="GForce optimized j to") +test(2243.21, dt[, sum(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(3L,3L)), output="GForce optimized j to") +test(2243.22, dt[, mean(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(1.5,3)), output="GForce optimized j to") +test(2243.23, dt[, median(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(1.5,3)), output="GForce optimized j to") +test(2243.24, dt[, prod(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(2,3)), output="GForce optimized j to") +test(2243.25, dt[, min(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(1L,3L)), output="GForce optimized j to") +test(2243.26, dt[, max(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(2L,3L)), output="GForce optimized j to") +test(2243.27, dt[, var(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(0.5,NA)), output="GForce optimized j to") +test(2243.28, dt[, sd(v, na.rm=j), id, verbose=TRUE], data.table(id=c(1L,2L), V1=c(sqrt(0.5),NA)), output="GForce optimized j to") dt = data.table(g=1:2, y=1:4) j = TRUE -test(2242.31, dt[, sum(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(4L,6L)), output="GForce FALSE") -test(2242.32, dt[, mean(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(2,3)), output="GForce FALSE") -test(2242.33, dt[, median(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(2,3)), output="GForce FALSE") -test(2242.34, dt[, prod(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(3,8)), output="GForce FALSE") -test(2242.35, dt[, min(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(1L,2L)), output="GForce FALSE") -test(2242.36, dt[, max(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(3L,4L)), output="GForce FALSE") -test(2242.37, dt[, var(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(2,2)), output="GForce FALSE") -test(2242.38, dt[, sd(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(sqrt(c(2,2)))), output="GForce FALSE") +test(2243.31, dt[, sum(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(4L,6L)), output="GForce FALSE") +test(2243.32, dt[, mean(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(2,3)), output="GForce FALSE") +test(2243.33, dt[, median(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(2,3)), output="GForce FALSE") +test(2243.34, dt[, prod(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(3,8)), output="GForce FALSE") +test(2243.35, dt[, min(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(1L,2L)), output="GForce FALSE") +test(2243.36, dt[, max(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(3L,4L)), output="GForce FALSE") +test(2243.37, dt[, var(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(2,2)), output="GForce FALSE") +test(2243.38, dt[, sd(y, na.rm=as.logical(j)), g, verbose=TRUE], data.table(g=c(1L,2L), V1=c(sqrt(c(2,2)))), output="GForce FALSE") From 6e4338c454b05596716d40ef343757188dca13b9 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 4 Jan 2024 14:24:02 +0100 Subject: [PATCH 30/53] escape evaluating values present int x --- R/data.table.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 62b56e339d..6579d6fe4e 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1795,8 +1795,9 @@ replace_dot_alias = function(e) { jsub[[ii]][[1L]] = as.name(paste0("g", jsub[[ii]][[1L]])) if (length(jsub[[ii]])>=3L) { # gforce needs to evaluate arguments before calling C part TODO: move the evaluation into gforce_ok + # do not evaluate vars present as columns in x for (i in 3:length(jsub[[ii]])) { - if(is.symbol(jsub[[ii]][[i]]) && !(jsub[[ii]][[i]] %chin% sdvars)) jsub[[ii]][[i]] = eval(jsub[[ii]][[i]], parent.frame()) # tests 1187.2 & 1187.4 + if(is.symbol(jsub[[ii]][[i]]) && !(jsub[[ii]][[i]] %chin% c(sdvars, names_x))) jsub[[ii]][[i]] = eval(jsub[[ii]][[i]], parent.frame()) # tests 1187.2 & 1187.4 } } } @@ -1806,8 +1807,9 @@ replace_dot_alias = function(e) { jsub[[1L]] = as.name(paste0("g", jsub[[1L]])) if (length(jsub)>=3L) { # gforce needs to evaluate arguments before calling C part TODO: move the evaluation into gforce_ok + # do not evaluate vars present as columns in x for (i in 3:length(jsub)) { - if(is.symbol(jsub[[i]]) && !(jsub[[i]] %chin% sdvars)) jsub[[i]] = eval(jsub[[i]], parent.frame()) # tests 1187.3 & 1187.5 + if(is.symbol(jsub[[i]]) && !(jsub[[i]] %chin% c(sdvars, names_x))) jsub[[i]] = eval(jsub[[i]], parent.frame()) # tests 1187.3 & 1187.5 } } } From 1e487ff1066deb505e0eee55b63cdeeecfca0983 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 4 Jan 2024 14:33:41 +0100 Subject: [PATCH 31/53] update eval of vars --- R/data.table.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 6579d6fe4e..5b0c9364fc 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1797,7 +1797,7 @@ replace_dot_alias = function(e) { # gforce needs to evaluate arguments before calling C part TODO: move the evaluation into gforce_ok # do not evaluate vars present as columns in x for (i in 3:length(jsub[[ii]])) { - if(is.symbol(jsub[[ii]][[i]]) && !(jsub[[ii]][[i]] %chin% c(sdvars, names_x))) jsub[[ii]][[i]] = eval(jsub[[ii]][[i]], parent.frame()) # tests 1187.2 & 1187.4 + if(is.symbol(jsub[[ii]][[i]]) && !(jsub[[ii]][[i]] %chin% names_x)) jsub[[ii]][[i]] = eval(jsub[[ii]][[i]], parent.frame()) # tests 1187.2 & 1187.4 } } } @@ -1809,7 +1809,7 @@ replace_dot_alias = function(e) { # gforce needs to evaluate arguments before calling C part TODO: move the evaluation into gforce_ok # do not evaluate vars present as columns in x for (i in 3:length(jsub)) { - if(is.symbol(jsub[[i]]) && !(jsub[[i]] %chin% c(sdvars, names_x))) jsub[[i]] = eval(jsub[[i]], parent.frame()) # tests 1187.3 & 1187.5 + if(is.symbol(jsub[[i]]) && !(jsub[[i]] %chin% names_x)) jsub[[i]] = eval(jsub[[i]], parent.frame()) # tests 1187.3 & 1187.5 } } } From 2efc34cea4f09780f39b9568236833d1f9f80ba1 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 4 Jan 2024 14:36:54 +0100 Subject: [PATCH 32/53] add spaces --- R/data.table.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 5b0c9364fc..e98abd6fb5 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1748,13 +1748,13 @@ replace_dot_alias = function(e) { is.null(q_named[["give.names"]]) } .ghead_ok = function(q) { - length(q)==3L && - length(q[[3L]])==1L && + length(q) == 3L && + length(q[[3L]]) == 1L && noCall_noVars(q[[3L]]) } ".g[_ok" = function(q, x) { - length(q)==3L && - length(q[[3L]])==1L && + length(q) == 3L && + length(q[[3L]]) == 1L && noCall_noVars(q[[3L]]) && (q[[1L]] == "[" || (q[[1L]] == "[[" && eval(call('is.atomic', q[[2L]]), envir=x))) && eval(q[[3L]], parent.frame(3L))>0L From 6f69350665a42e0ed4873c8b3ce44b89e6437769 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 4 Jan 2024 14:37:59 +0100 Subject: [PATCH 33/53] overwrite call --- R/data.table.R | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index e98abd6fb5..3ca30cb35d 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1741,11 +1741,11 @@ replace_dot_alias = function(e) { # Apply GForce noCall_noVars = function(expr) (!is.call(expr) || length(all.vars(expr, max.names=1L))==0) && !dotN(expr) .gshift_ok = function(q) { - q_named = match.call(shift, q) - noCall_noVars(q_named[["n"]]) && - noCall_noVars(q_named[["fill"]]) && - noCall_noVars(q_named[["type"]]) && - is.null(q_named[["give.names"]]) + q = match.call(shift, q) + noCall_noVars(q[["n"]]) && + noCall_noVars(q[["fill"]]) && + noCall_noVars(q[["type"]]) && + is.null(q[["give.names"]]) } .ghead_ok = function(q) { length(q) == 3L && @@ -1760,9 +1760,9 @@ replace_dot_alias = function(e) { eval(q[[3L]], parent.frame(3L))>0L } .gweighted.mean_ok = function(q, x) { #3977 - q_named = match.call(function(x, w, ..., na.rm=FALSE) {}, q) - noCall_noVars(q_named[["na.rm"]]) && - (is.null(q_named[["w"]]) || eval(call('is.numeric', q_named[["w"]]), envir=x)) + q = match.call(function(x, w, ..., na.rm=FALSE) {}, q) + noCall_noVars(q[["na.rm"]]) && + (is.null(q[["w"]]) || eval(call('is.numeric', q[["w"]]), envir=x)) } .gforce_ok = function(q) { if (dotN(q)) return(TRUE) # For #334 From 4eaf67b7cc05f22d5bdec95e1648488a944ccafa Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 4 Jan 2024 14:39:05 +0100 Subject: [PATCH 34/53] all.vars==0L and unique=FALSE --- R/data.table.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 3ca30cb35d..169227a2ab 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1739,7 +1739,7 @@ replace_dot_alias = function(e) { GForce = FALSE } else { # Apply GForce - noCall_noVars = function(expr) (!is.call(expr) || length(all.vars(expr, max.names=1L))==0) && !dotN(expr) + noCall_noVars = function(expr) (!is.call(expr) || length(all.vars(expr, max.names=1L, unique=FALSE))==0L) && !dotN(expr) .gshift_ok = function(q) { q = match.call(shift, q) noCall_noVars(q[["n"]]) && @@ -1757,7 +1757,7 @@ replace_dot_alias = function(e) { length(q[[3L]]) == 1L && noCall_noVars(q[[3L]]) && (q[[1L]] == "[" || (q[[1L]] == "[[" && eval(call('is.atomic', q[[2L]]), envir=x))) && - eval(q[[3L]], parent.frame(3L))>0L + eval(q[[3L]], parent.frame(3L)) > 0L } .gweighted.mean_ok = function(q, x) { #3977 q = match.call(function(x, w, ..., na.rm=FALSE) {}, q) From 1dd0ca1f508af90765bdec3727a2da43016f42d2 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 4 Jan 2024 14:53:13 +0100 Subject: [PATCH 35/53] add comment about noCall_noVars --- R/data.table.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/data.table.R b/R/data.table.R index 169227a2ab..930dabdf9f 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1739,6 +1739,9 @@ replace_dot_alias = function(e) { GForce = FALSE } else { # Apply GForce + # GForce needs to evaluate all arguments not present in the data.table before calling C part #5547 + # Safe cases: variables [i], calls without variables [c(0,1), list(1)] # TODO extend this list + # Unsafe cases: functions containing variables [c(i), abs(i)], .N noCall_noVars = function(expr) (!is.call(expr) || length(all.vars(expr, max.names=1L, unique=FALSE))==0L) && !dotN(expr) .gshift_ok = function(q) { q = match.call(shift, q) From c31387c6471e618311aa3a9601ef142d8be38e5d Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Thu, 4 Jan 2024 14:53:38 +0100 Subject: [PATCH 36/53] shorten switch Co-authored-by: Michael Chirico --- R/data.table.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/data.table.R b/R/data.table.R index 169227a2ab..4172455497 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1775,7 +1775,7 @@ replace_dot_alias = function(e) { switch(as.character(q[[1L]]), "shift" = .gshift_ok(q), "weighted.mean" = .gweighted.mean_ok(q, x), - "head" = .ghead_ok(q), + "tail" = , "head" = .ghead_ok(q), "tail" = .ghead_ok(q), "[" = `.g[_ok`(q, x), "[[" = `.g[_ok`(q, x), From 7f5f9041dc907038ad5373b17249019c5d3eb2b5 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 4 Jan 2024 15:35:32 +0100 Subject: [PATCH 37/53] add extra check to noCall_noVars --- R/data.table.R | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index e9d2eef4c1..e073534544 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1742,7 +1742,11 @@ replace_dot_alias = function(e) { # GForce needs to evaluate all arguments not present in the data.table before calling C part #5547 # Safe cases: variables [i], calls without variables [c(0,1), list(1)] # TODO extend this list # Unsafe cases: functions containing variables [c(i), abs(i)], .N - noCall_noVars = function(expr) (!is.call(expr) || length(all.vars(expr, max.names=1L, unique=FALSE))==0L) && !dotN(expr) + noCall_noVars = function(expr, check) { + (!is.call(expr) || length(all.vars(expr, max.names=1L, unique=FALSE))==0L) && + !dotN(expr) && + (missing(check) || check(expr)) + } .gshift_ok = function(q) { q = match.call(shift, q) noCall_noVars(q[["n"]]) && @@ -1752,13 +1756,11 @@ replace_dot_alias = function(e) { } .ghead_ok = function(q) { length(q) == 3L && - length(q[[3L]]) == 1L && - noCall_noVars(q[[3L]]) + noCall_noVars(q[[3L]], function(x_) length(x_) == 1L) } ".g[_ok" = function(q, x) { length(q) == 3L && - length(q[[3L]]) == 1L && - noCall_noVars(q[[3L]]) && + noCall_noVars(q[[3L]], function(x_) length(x_) == 1L) && (q[[1L]] == "[" || (q[[1L]] == "[[" && eval(call('is.atomic', q[[2L]]), envir=x))) && eval(q[[3L]], parent.frame(3L)) > 0L } @@ -1779,7 +1781,6 @@ replace_dot_alias = function(e) { "shift" = .gshift_ok(q), "weighted.mean" = .gweighted.mean_ok(q, x), "tail" = , "head" = .ghead_ok(q), - "tail" = .ghead_ok(q), "[" = `.g[_ok`(q, x), "[[" = `.g[_ok`(q, x), FALSE From c0171a08b825407aa8b852ba9f85b011bbd909ed Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 4 Jan 2024 15:36:15 +0100 Subject: [PATCH 38/53] rename noCall_noVars --- R/data.table.R | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index e073534544..e28431d888 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1742,31 +1742,31 @@ replace_dot_alias = function(e) { # GForce needs to evaluate all arguments not present in the data.table before calling C part #5547 # Safe cases: variables [i], calls without variables [c(0,1), list(1)] # TODO extend this list # Unsafe cases: functions containing variables [c(i), abs(i)], .N - noCall_noVars = function(expr, check) { + is_constantish = function(expr, check) { (!is.call(expr) || length(all.vars(expr, max.names=1L, unique=FALSE))==0L) && !dotN(expr) && (missing(check) || check(expr)) } .gshift_ok = function(q) { q = match.call(shift, q) - noCall_noVars(q[["n"]]) && - noCall_noVars(q[["fill"]]) && - noCall_noVars(q[["type"]]) && + is_constantish(q[["n"]]) && + is_constantish(q[["fill"]]) && + is_constantish(q[["type"]]) && is.null(q[["give.names"]]) } .ghead_ok = function(q) { length(q) == 3L && - noCall_noVars(q[[3L]], function(x_) length(x_) == 1L) + is_constantish(q[[3L]], function(x_) length(x_) == 1L) } ".g[_ok" = function(q, x) { length(q) == 3L && - noCall_noVars(q[[3L]], function(x_) length(x_) == 1L) && + is_constantish(q[[3L]], function(x_) length(x_) == 1L) && (q[[1L]] == "[" || (q[[1L]] == "[[" && eval(call('is.atomic', q[[2L]]), envir=x))) && eval(q[[3L]], parent.frame(3L)) > 0L } .gweighted.mean_ok = function(q, x) { #3977 q = match.call(function(x, w, ..., na.rm=FALSE) {}, q) - noCall_noVars(q[["na.rm"]]) && + is_constantish(q[["na.rm"]]) && (is.null(q[["w"]]) || eval(call('is.numeric', q[["w"]]), envir=x)) } .gforce_ok = function(q) { @@ -1775,7 +1775,7 @@ replace_dot_alias = function(e) { # is.symbol() is for #1369, #1974 and #2949 if (!(is.call(q) && is.symbol(q[[1L]]) && is.symbol(q[[2L]]) && (q[[1L]]) %chin% gfuns)) return(FALSE) if (!(q2 <- q[[2L]]) %chin% names(SDenv$.SDall) && q2 != ".I") return(FALSE) # 875 - if (length(q)==2L || (!is.null(names(q)) && startsWith(names(q)[3L], "na") && noCall_noVars(q[[3L]]))) return(TRUE) + if (length(q)==2L || (!is.null(names(q)) && startsWith(names(q)[3L], "na") && is_constantish(q[[3L]]))) return(TRUE) # ^^ base::startWith errors on NULL unfortunately switch(as.character(q[[1L]]), "shift" = .gshift_ok(q), From 54d9c7f829b44904a1803530f18cd9c55f82afb0 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 4 Jan 2024 15:37:03 +0100 Subject: [PATCH 39/53] simplify switch --- R/data.table.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index e28431d888..2534a9e1cd 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1781,8 +1781,7 @@ replace_dot_alias = function(e) { "shift" = .gshift_ok(q), "weighted.mean" = .gweighted.mean_ok(q, x), "tail" = , "head" = .ghead_ok(q), - "[" = `.g[_ok`(q, x), - "[[" = `.g[_ok`(q, x), + "[[" = , "[" = `.g[_ok`(q, x), FALSE ) } From 35284b3fae02b3bc2f231256bc360983609db6c3 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 4 Jan 2024 15:42:53 +0100 Subject: [PATCH 40/53] update match.call gweighted.mean --- R/data.table.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/data.table.R b/R/data.table.R index 2534a9e1cd..971c808012 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1765,7 +1765,7 @@ replace_dot_alias = function(e) { eval(q[[3L]], parent.frame(3L)) > 0L } .gweighted.mean_ok = function(q, x) { #3977 - q = match.call(function(x, w, ..., na.rm=FALSE) {}, q) + q = match.call(gweighted.mean, q) is_constantish(q[["na.rm"]]) && (is.null(q[["w"]]) || eval(call('is.numeric', q[["w"]]), envir=x)) } From f58e1fdc4e473df841270d6779f497baf04de6df Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 4 Jan 2024 15:47:21 +0100 Subject: [PATCH 41/53] simplify --- R/data.table.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/data.table.R b/R/data.table.R index 971c808012..119df66a77 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1761,7 +1761,7 @@ replace_dot_alias = function(e) { ".g[_ok" = function(q, x) { length(q) == 3L && is_constantish(q[[3L]], function(x_) length(x_) == 1L) && - (q[[1L]] == "[" || (q[[1L]] == "[[" && eval(call('is.atomic', q[[2L]]), envir=x))) && + (q[[1L]] != "[[" || eval(call('is.atomic', q[[2L]]), envir=x)) && eval(q[[3L]], parent.frame(3L)) > 0L } .gweighted.mean_ok = function(q, x) { #3977 From bbf11d0535a3e95fa0823925c5939d4b70bd951f Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 4 Jan 2024 15:51:27 +0100 Subject: [PATCH 42/53] rename zip and name args --- R/data.table.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 119df66a77..7f4fed1f74 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1924,9 +1924,9 @@ replace_dot_alias = function(e) { } else if (.is_nrows(jsub)) { g = lapply(g, rep.int, times=len__) # unpack list of lists for nrows functions - zip = function(ll) do.call(mapply, c(list(c), ll, SIMPLIFY=FALSE, USE.NAMES=FALSE)) + zip_items = function(ll) do.call(mapply, c(list(FUN = c), ll, SIMPLIFY=FALSE, USE.NAMES=FALSE)) if (all(vapply_1b(ans, is.list))) { - ans = lapply(ans, zip) + ans = lapply(ans, zip_items) } } ans = c(g, ans) From a1bb4588bd7490f4d7a4de852f78b7492e457d12 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 4 Jan 2024 07:20:38 -0800 Subject: [PATCH 43/53] remove redundant switch() entry --- R/data.table.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/data.table.R b/R/data.table.R index 4172455497..eeb00a4bae 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1776,7 +1776,6 @@ replace_dot_alias = function(e) { "shift" = .gshift_ok(q), "weighted.mean" = .gweighted.mean_ok(q, x), "tail" = , "head" = .ghead_ok(q), - "tail" = .ghead_ok(q), "[" = `.g[_ok`(q, x), "[[" = `.g[_ok`(q, x), FALSE From feee5356792990cc627eeba7cc221345c3d7da98 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 4 Jan 2024 16:46:39 +0100 Subject: [PATCH 44/53] deduplicate code --- R/data.table.R | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 7f4fed1f74..2e45d5b576 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1791,30 +1791,27 @@ replace_dot_alias = function(e) { if (!.gforce_ok(jsub[[ii]])) {GForce = FALSE; break} } } else GForce = .gforce_ok(jsub) + gforce_jsub = function(x, names_x) { + x[[1L]] = as.name(paste0("g", x[[1L]])) + # gforce needs to evaluate arguments before calling C part TODO: move the evaluation into gforce_ok + # do not evaluate vars present as columns in x + if (length(x)>=3L) { + for (i in 3:length(x)) { + if (is.symbol(x[[i]]) && !(x[[i]] %chin% names_x)) x[[i]] = eval(x[[i]], parent.frame(2L)) # tests 1187.2 & 1187.4 + } + } + x + } if (GForce) { if (jsub[[1L]]=="list") for (ii in seq_along(jsub)[-1L]) { if (dotN(jsub[[ii]])) next; # For #334 - jsub[[ii]][[1L]] = as.name(paste0("g", jsub[[ii]][[1L]])) - if (length(jsub[[ii]])>=3L) { - # gforce needs to evaluate arguments before calling C part TODO: move the evaluation into gforce_ok - # do not evaluate vars present as columns in x - for (i in 3:length(jsub[[ii]])) { - if(is.symbol(jsub[[ii]][[i]]) && !(jsub[[ii]][[i]] %chin% names_x)) jsub[[ii]][[i]] = eval(jsub[[ii]][[i]], parent.frame()) # tests 1187.2 & 1187.4 - } - } + jsub[[ii]] = gforce_jsub(jsub[[ii]], names_x) } else { # adding argument to ghead/gtail if none is supplied to g-optimized head/tail if (length(jsub) == 2L && jsub[[1L]] %chin% c("head", "tail")) jsub[["n"]] = 6L - jsub[[1L]] = as.name(paste0("g", jsub[[1L]])) - if (length(jsub)>=3L) { - # gforce needs to evaluate arguments before calling C part TODO: move the evaluation into gforce_ok - # do not evaluate vars present as columns in x - for (i in 3:length(jsub)) { - if(is.symbol(jsub[[i]]) && !(jsub[[i]] %chin% names_x)) jsub[[i]] = eval(jsub[[i]], parent.frame()) # tests 1187.3 & 1187.5 - } - } + jsub = gforce_jsub(jsub, names_x) } if (verbose) catf("GForce optimized j to '%s'\n", deparse(jsub, width.cutoff=200L, nlines=1L)) } else if (verbose) catf("GForce is on, left j unchanged\n"); From e3938a896bc58a1a72a226968af72165412eff8f Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 4 Jan 2024 16:50:50 +0100 Subject: [PATCH 45/53] update g[_ok signature --- R/data.table.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/data.table.R b/R/data.table.R index 2e45d5b576..7831fd3a5f 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1758,7 +1758,7 @@ replace_dot_alias = function(e) { length(q) == 3L && is_constantish(q[[3L]], function(x_) length(x_) == 1L) } - ".g[_ok" = function(q, x) { + `.g[_ok` = function(q, x) { length(q) == 3L && is_constantish(q[[3L]], function(x_) length(x_) == 1L) && (q[[1L]] != "[[" || eval(call('is.atomic', q[[2L]]), envir=x)) && From d68cb36fb9dba743ae943e77ef8a8e2690502823 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 5 Jan 2024 02:03:26 -0800 Subject: [PATCH 46/53] whitespace suggestion --- R/data.table.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/data.table.R b/R/data.table.R index 7831fd3a5f..3f45dc34a0 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1795,7 +1795,7 @@ replace_dot_alias = function(e) { x[[1L]] = as.name(paste0("g", x[[1L]])) # gforce needs to evaluate arguments before calling C part TODO: move the evaluation into gforce_ok # do not evaluate vars present as columns in x - if (length(x)>=3L) { + if (length(x) >= 3L) { for (i in 3:length(x)) { if (is.symbol(x[[i]]) && !(x[[i]] %chin% names_x)) x[[i]] = eval(x[[i]], parent.frame(2L)) # tests 1187.2 & 1187.4 } From 285d132fce8c0fb9c5bb99f2f479f5ef42da04ad Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 5 Jan 2024 02:04:11 -0800 Subject: [PATCH 47/53] just check if 'give.names' in names --- R/data.table.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/data.table.R b/R/data.table.R index 3f45dc34a0..701849f7e8 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1752,7 +1752,7 @@ replace_dot_alias = function(e) { is_constantish(q[["n"]]) && is_constantish(q[["fill"]]) && is_constantish(q[["type"]]) && - is.null(q[["give.names"]]) + !"give.names" %chin% names(q) } .ghead_ok = function(q) { length(q) == 3L && From 1a5eacdb27e952c4c0e705c2a79bd1948b866745 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 5 Jan 2024 18:06:43 +0800 Subject: [PATCH 48/53] Change check= to check_singleton= --- R/data.table.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 701849f7e8..27301c5cdc 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1742,10 +1742,10 @@ replace_dot_alias = function(e) { # GForce needs to evaluate all arguments not present in the data.table before calling C part #5547 # Safe cases: variables [i], calls without variables [c(0,1), list(1)] # TODO extend this list # Unsafe cases: functions containing variables [c(i), abs(i)], .N - is_constantish = function(expr, check) { - (!is.call(expr) || length(all.vars(expr, max.names=1L, unique=FALSE))==0L) && + is_constantish = function(expr, check_singleton = FALSE) { + (!is.call(expr) || length(all.vars(expr, max.names=1L, unique=FALSE)) == 0L) && !dotN(expr) && - (missing(check) || check(expr)) + (!check_singleton || length(expr) == 1L) } .gshift_ok = function(q) { q = match.call(shift, q) @@ -1756,11 +1756,11 @@ replace_dot_alias = function(e) { } .ghead_ok = function(q) { length(q) == 3L && - is_constantish(q[[3L]], function(x_) length(x_) == 1L) + is_constantish(q[[3L]], check_singleton = TRUE) } `.g[_ok` = function(q, x) { length(q) == 3L && - is_constantish(q[[3L]], function(x_) length(x_) == 1L) && + is_constantish(q[[3L]], check_singleton = TRUE) && (q[[1L]] != "[[" || eval(call('is.atomic', q[[2L]]), envir=x)) && eval(q[[3L]], parent.frame(3L)) > 0L } From 999448d7170a3b243503cd3279da218b51546ce8 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Fri, 5 Jan 2024 12:04:46 +0100 Subject: [PATCH 49/53] name argument --- inst/tests/tests.Rraw | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 4bc7bf8c67..cb74b3109e 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -6740,10 +6740,10 @@ test(1463.70, shift(x, 1, fill=1, type="cyclic"), c(5L, 1L:4L), warning="Provide x=as.raw(1:5) test(1463.71, shift(x,1L), as.raw(c(0L, 1:4))) test(1463.72, shift(x,1:2), list(as.raw(c(0L, 1:4)), as.raw(c(0L, 0L, 1:3)))) -test(1463.73, shift(x,1L, 0L), as.raw(c(0L, 1:4))) +test(1463.73, shift(x,1L, fill=0L), as.raw(c(0L, 1:4))) test(1463.74, shift(x,1L, type="lead"), as.raw(c(2:5, 0L))) test(1463.75, shift(x,1:2, type="lead"), list(as.raw(c(2:5, 0L)), as.raw(c(3:5, 0L, 0L)))) -test(1463.76, shift(x,1L, 0L,type="lead"), as.raw(c(2:5, 0L))) +test(1463.76, shift(x,1L, fill=0L,type="lead"), as.raw(c(2:5, 0L))) test(1463.77, shift(x,1L, type="cyclic"), as.raw(c(5, 1:4))) test(1463.78, shift(x,1:2, type="cyclic"), list(as.raw(c(5, 1:4)), as.raw(c(4:5, 1:3)))) test(1463.79, shift(x,-1L, type="cyclic"), as.raw(c(2:5, 1))) From 091357934fbf3d030e1e90e8d82d0d86734d107e Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Fri, 5 Jan 2024 12:21:12 +0100 Subject: [PATCH 50/53] update check constantish --- R/data.table.R | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 27301c5cdc..6e2ebc3330 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1742,10 +1742,15 @@ replace_dot_alias = function(e) { # GForce needs to evaluate all arguments not present in the data.table before calling C part #5547 # Safe cases: variables [i], calls without variables [c(0,1), list(1)] # TODO extend this list # Unsafe cases: functions containing variables [c(i), abs(i)], .N - is_constantish = function(expr, check_singleton = FALSE) { - (!is.call(expr) || length(all.vars(expr, max.names=1L, unique=FALSE)) == 0L) && - !dotN(expr) && - (!check_singleton || length(expr) == 1L) + is_constantish = function(expr, check_singleton=FALSE) { + if (!is.call(expr)) { + return(!dotN(expr)) + } + if (check_singleton) { + return(FALSE) + } + # calls are allowed <=> there's no SYMBOLs in the sub-AST + return(length(all.vars(expr, max.names=1L, unique=FALSE))==0L) } .gshift_ok = function(q) { q = match.call(shift, q) From 4c08cbb8274a24db07eebaa7363c0a15871369aa Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Fri, 5 Jan 2024 12:25:42 +0100 Subject: [PATCH 51/53] update NEWS item --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index b2c6a24c7d..78036ac205 100644 --- a/NEWS.md +++ b/NEWS.md @@ -168,7 +168,7 @@ 28. `setkey()` now supports type `raw` as value columns (not as key columns), [#5100](https://github.com/Rdatatable/data.table/issues/5100). Thanks Hugh Parsonage for requesting, and Benjamin Schwendinger for the PR. -29. `shift()` is now optimised by group, [#1534](https://github.com/Rdatatable/data.table/issues/1534). Thanks to Gerhard Nachtmann for requesting, and Benjamin Schwendinger for the PR. Thanks to @neovom for testing dev and filing a bug report which was fixed before release. +29. `shift()` is now optimised by group, [#1534](https://github.com/Rdatatable/data.table/issues/1534). Thanks to Gerhard Nachtmann for requesting, and Benjamin Schwendinger for the PR. Thanks to @neovom for testing dev and filing a bug report, [#5547](https://github.com/Rdatatable/data.table/issues/5547) which was fixed before release. This helped also in improving the logic for when to turn on optimization by group in general, making it more robust. ```R N = 1e7 From e31e9420da004d97a7c7ebb4727b43c2183c097b Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Fri, 5 Jan 2024 12:34:12 +0100 Subject: [PATCH 52/53] standardize spelling --- NEWS.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index 78036ac205..bc7f467cd3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -168,12 +168,12 @@ 28. `setkey()` now supports type `raw` as value columns (not as key columns), [#5100](https://github.com/Rdatatable/data.table/issues/5100). Thanks Hugh Parsonage for requesting, and Benjamin Schwendinger for the PR. -29. `shift()` is now optimised by group, [#1534](https://github.com/Rdatatable/data.table/issues/1534). Thanks to Gerhard Nachtmann for requesting, and Benjamin Schwendinger for the PR. Thanks to @neovom for testing dev and filing a bug report, [#5547](https://github.com/Rdatatable/data.table/issues/5547) which was fixed before release. This helped also in improving the logic for when to turn on optimization by group in general, making it more robust. +29. `shift()` is now optimized by group, [#1534](https://github.com/Rdatatable/data.table/issues/1534). Thanks to Gerhard Nachtmann for requesting, and Benjamin Schwendinger for the PR. Thanks to @neovom for testing dev and filing a bug report, [#5547](https://github.com/Rdatatable/data.table/issues/5547) which was fixed before release. This helped also in improving the logic for when to turn on optimization by group in general, making it more robust. ```R N = 1e7 DT = data.table(x=sample(N), y=sample(1e6,N,TRUE)) - shift_no_opt = shift # different name not optimised as a way to compare + shift_no_opt = shift # different name not optimized as a way to compare microbenchmark( DT[, c(NA, head(x,-1)), y], DT[, shift_no_opt(x, 1, type="lag"), y], @@ -263,7 +263,7 @@ # 2: 2 4 b ``` -34. `weighted.mean()` is now optimised by group, [#3977](https://github.com/Rdatatable/data.table/issues/3977). Thanks to @renkun-ken for requesting, and Benjamin Schwendinger for the PR. +34. `weighted.mean()` is now optimized by group, [#3977](https://github.com/Rdatatable/data.table/issues/3977). Thanks to @renkun-ken for requesting, and Benjamin Schwendinger for the PR. 35. `as.xts.data.table()` now supports non-numeric xts coredata matrixes, [5268](https://github.com/Rdatatable/data.table/issues/5268). Existing numeric only functionality is supported by a new `numeric.only` parameter, which defaults to `TRUE` for backward compatability and the most common use case. To convert non-numeric columns, set this parameter to `FALSE`. Conversions of `data.table` columns to a `matrix` now uses `data.table::as.matrix`, with all its performance benefits. Thanks to @ethanbsmith for the report and fix. From 9d2f6154c0fc9506b613d0a1a42e2eb7f4292370 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 5 Jan 2024 19:59:46 +0800 Subject: [PATCH 53/53] infix spacing --- R/data.table.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/data.table.R b/R/data.table.R index 6e2ebc3330..77fabf83be 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1750,7 +1750,7 @@ replace_dot_alias = function(e) { return(FALSE) } # calls are allowed <=> there's no SYMBOLs in the sub-AST - return(length(all.vars(expr, max.names=1L, unique=FALSE))==0L) + return(length(all.vars(expr, max.names=1L, unique=FALSE)) == 0L) } .gshift_ok = function(q) { q = match.call(shift, q)