From 9905f215c87da5fa839225a162200a18d4f96eb3 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Tue, 17 Sep 2019 18:35:58 -0700 Subject: [PATCH 1/8] interim --- R/data.table.R | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 5ad0292329..384162ed59 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1284,17 +1284,25 @@ replace_dot_alias = function(e) { setattr(jval,"names",NULL) jval = data.table(jval) # TO DO: should this be setDT(list(jval)) instead? } else { - if (is.null(jvnames)) jvnames=names(jval) - lenjval = vapply(jval, length, 0L) - nulljval = vapply(jval, is.null, FALSE) - if (lenjval[1L]==0L || any(lenjval != lenjval[1L])) { - jval = as.data.table.list(jval) # does the vector expansion to create equal length vectors, and drops any NULL items - jvnames = jvnames[!nulljval] # fix for #1477 - } else { - # all columns same length and at least 1 row; avoid copy. TODO: remove when as.data.table.list is ported to C - setDT(jval) - } + #if (is.null(jvnames)) jvnames=names(jval) + #nulljval = vapply(jval, is.null, FALSE) + jval = as.data.table.list(jval) + jvnames = names(jval) + #jvnames = jvnames[!nulljval] # fix for #1477 } + # if (is.null(jvnames)) jvnames=names(jval) + # lenjval = vapply(jval, length, 0L) + # nulljval = vapply(jval, is.null, FALSE) + # if (lenjval[1L]==0L || any(lenjval != lenjval[1L])) { + # cat("CALLING as.data.table.list(jval)\n") + # jval = as.data.table.list(jval) # does the vector expansion to create equal length vectors, and drops any NULL items + # jvnames = jvnames[!nulljval] # fix for #1477 + # } else { + # cat("CALLING setDT(jval)\n") + # # all columns same length and at least 1 row; avoid copy. TODO: remove when as.data.table.list is ported to C + # setDT(jval) + # } + #} if (is.null(jvnames)) jvnames = character(length(jval)-length(bynames)) ww = which(jvnames=="") if (any(ww)) jvnames[ww] = paste0("V",ww) From c11d12270246f2f800693f18836be4ccece0895b Mon Sep 17 00:00:00 2001 From: mattdowle Date: Wed, 18 Sep 2019 00:01:04 -0700 Subject: [PATCH 2/8] interim --- R/data.table.R | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 384162ed59..42d4180431 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1283,11 +1283,19 @@ replace_dot_alias = function(e) { if (is.atomic(jval)) { setattr(jval,"names",NULL) jval = data.table(jval) # TO DO: should this be setDT(list(jval)) instead? + # setnames(jval, if (is.null(jvnames) || jnames=="") "V1" else jvnames) + if (is.null(jvnames)) jvnames = character(length(jval)-length(bynames)) + ww = which(jvnames=="") + if (any(ww)) jvnames[ww] = paste0("V",ww) + setnames(jval, jvnames) } else { - #if (is.null(jvnames)) jvnames=names(jval) + #browser() + #if (!is.null(jvnames)) jvnames=names(jval) #nulljval = vapply(jval, is.null, FALSE) - jval = as.data.table.list(jval) - jvnames = names(jval) + if (!is.null(jvnames) && !all(jvnames=="")) setattr(jval, 'names', jvnames) + jval = as.data.table.list(jval, .named=NULL) + #jval = as.data.table.list(jval) + #jvnames = names(jval) #jvnames = jvnames[!nulljval] # fix for #1477 } # if (is.null(jvnames)) jvnames=names(jval) @@ -1303,10 +1311,10 @@ replace_dot_alias = function(e) { # setDT(jval) # } #} - if (is.null(jvnames)) jvnames = character(length(jval)-length(bynames)) - ww = which(jvnames=="") - if (any(ww)) jvnames[ww] = paste0("V",ww) - setnames(jval, jvnames) + #if (is.null(jvnames)) jvnames = character(length(jval)-length(bynames)) + #ww = which(jvnames=="") + #if (any(ww)) jvnames[ww] = paste0("V",ww) + #setnames(jval, jvnames) } if (is.data.table(jval)) { From 9ff93586607fe80c8d9c9a8f897042dbfe9cfd25 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Wed, 18 Sep 2019 10:53:24 -0700 Subject: [PATCH 3/8] interim. passing. --- R/as.data.table.R | 5 +++-- R/data.table.R | 2 +- inst/tests/tests.Rraw | 9 ++++++++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/R/as.data.table.R b/R/as.data.table.R index c077361be2..359e0ec6fe 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -167,9 +167,10 @@ as.data.table.list = function(x, } vnames = character(ncol) k = 1L + n_null = 0L for(i in seq_len(n)) { xi = x[[i]] - if (is.null(xi)) next + if (is.null(xi)) { n_null = n_null+1L; next } if (eachnrow[i]>1L && nrow%%eachnrow[i]!=0L) # in future: eachnrow[i]!=nrow warning("Item ", i, " has ", eachnrow[i], " rows but longest item has ", nrow, "; recycled with remainder.") if (eachnrow[i]==0L && nrow>0L && is.atomic(xi)) # is.atomic to ignore list() since list() is a common way to initialize; let's not insist on list(NULL) @@ -183,7 +184,7 @@ as.data.table.list = function(x, } } else { nm = names(x)[i] - vnames[k] = if (length(nm) && !is.na(nm) && nm!="") nm else paste0("V",i) # i (not k) tested by 2058.14 to be the same as the past for now + vnames[k] = if (length(nm) && !is.na(nm) && nm!="") nm else paste0("V",i-n_null) # i (not k) tested by 2058.14 to be the same as the past for now ans[[k]] = recycle(xi, nrow) k = k+1L } diff --git a/R/data.table.R b/R/data.table.R index 42d4180431..312832dbc5 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1289,7 +1289,7 @@ replace_dot_alias = function(e) { if (any(ww)) jvnames[ww] = paste0("V",ww) setnames(jval, jvnames) } else { - #browser() + # browser() #if (!is.null(jvnames)) jvnames=names(jval) #nulljval = vapply(jval, is.null, FALSE) if (!is.null(jvnames) && !all(jvnames=="")) setattr(jval, 'names', jvnames) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index c80a64b83e..c28716dc10 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15149,6 +15149,9 @@ L = list(1:3, 4:6, 7:9, 10:12) names(L) = c("","foo","","foo") test(2058.17, as.data.table(L), setnames(data.table(1:3, 4:6, 7:9, 10:12),c("","foo","","foo"))) # no +L = list(1:3, NULL, 4:6) +test(2058.18, length(L), 3L) +test(2058.19, as.data.table(L), data.table(V1=1:3, V2=4:6)) # V2 not V3 # no # rbindlist improved error message, #3638 DT = data.table(a=1) @@ -15823,8 +15826,10 @@ test(2086.05, DT[ , sum(a), by = character()], data.table(V1=55L)) test(2086.06, DT[ , sum(a), keyby = character()], data.table(V1=55L)) # simple queries can create tables with columns sharing the same address, #3766 +# these tests were new in 1.12.4 dev. In late stage before release 2087.1 was changed to avoid the share for #3890 +# when #617 is done it will change back to being the same address x = data.table(a=1L, b=c(1L, 4L, 2L, 3L), c=4:1) -test(2087.1, x[a == 1L, .(b, b2=b)][ , identical(address(b), address(b2))]) +test(2087.1, x[a == 1L, .(b, b2=b)][ , address(b)!=address(b2)]) # setkey detects and copies shared address columns, #3496 x = data.frame(a=paste0(2:1), stringsAsFactors=FALSE) x$b = x$a @@ -15837,6 +15842,8 @@ x$c = x$a setDT(x) test(2087.3, setkey(x, a, verbose=TRUE), data.table(a=paste0(1:2), b=paste0(1:2), c=paste0(1:2), key="a"), output='Found and copied 2 columns with a shared memory address') +# follow-up which seems to do with function body, #3890 +# ... # clear '.data.table.locked' even when is.null(irows), #2245 x = data.table(a=c(0.85, -0.38, 1.19), b=c(0.56, 0.63, -1.30)) From ad0d71fbb4a115a5011b7709bd691f3ef340c546 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Wed, 18 Sep 2019 11:10:13 -0700 Subject: [PATCH 4/8] more removal. passing --- R/data.table.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 312832dbc5..f3d1c22647 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1281,13 +1281,13 @@ replace_dot_alias = function(e) { )) jval = lapply(jval, `[`, 0L) if (is.atomic(jval)) { - setattr(jval,"names",NULL) + setattr(jval,"names",NULL) # discard names of named vectors otherwise each cell in the column would have a name jval = data.table(jval) # TO DO: should this be setDT(list(jval)) instead? - # setnames(jval, if (is.null(jvnames) || jnames=="") "V1" else jvnames) - if (is.null(jvnames)) jvnames = character(length(jval)-length(bynames)) - ww = which(jvnames=="") - if (any(ww)) jvnames[ww] = paste0("V",ww) - setnames(jval, jvnames) + setnames(jval, if (is.null(jvnames) || jvnames=="") "V1" else jvnames) # e.g. jvnames=="N" for DT[,.N,] + #if (is.null(jvnames)) jvnames = character(length(jval)-length(bynames)) + #ww = which(jvnames=="") + #if (any(ww)) jvnames[ww] = paste0("V",ww) + #setnames(jval, jvnames) } else { # browser() #if (!is.null(jvnames)) jvnames=names(jval) From a09641dd25a035cbde2147e3e6d498c63d544308 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Wed, 18 Sep 2019 11:12:29 -0700 Subject: [PATCH 5/8] remove old code --- R/data.table.R | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index f3d1c22647..e6388dcb5d 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1284,37 +1284,10 @@ replace_dot_alias = function(e) { setattr(jval,"names",NULL) # discard names of named vectors otherwise each cell in the column would have a name jval = data.table(jval) # TO DO: should this be setDT(list(jval)) instead? setnames(jval, if (is.null(jvnames) || jvnames=="") "V1" else jvnames) # e.g. jvnames=="N" for DT[,.N,] - #if (is.null(jvnames)) jvnames = character(length(jval)-length(bynames)) - #ww = which(jvnames=="") - #if (any(ww)) jvnames[ww] = paste0("V",ww) - #setnames(jval, jvnames) } else { - # browser() - #if (!is.null(jvnames)) jvnames=names(jval) - #nulljval = vapply(jval, is.null, FALSE) if (!is.null(jvnames) && !all(jvnames=="")) setattr(jval, 'names', jvnames) jval = as.data.table.list(jval, .named=NULL) - #jval = as.data.table.list(jval) - #jvnames = names(jval) - #jvnames = jvnames[!nulljval] # fix for #1477 } - # if (is.null(jvnames)) jvnames=names(jval) - # lenjval = vapply(jval, length, 0L) - # nulljval = vapply(jval, is.null, FALSE) - # if (lenjval[1L]==0L || any(lenjval != lenjval[1L])) { - # cat("CALLING as.data.table.list(jval)\n") - # jval = as.data.table.list(jval) # does the vector expansion to create equal length vectors, and drops any NULL items - # jvnames = jvnames[!nulljval] # fix for #1477 - # } else { - # cat("CALLING setDT(jval)\n") - # # all columns same length and at least 1 row; avoid copy. TODO: remove when as.data.table.list is ported to C - # setDT(jval) - # } - #} - #if (is.null(jvnames)) jvnames = character(length(jval)-length(bynames)) - #ww = which(jvnames=="") - #if (any(ww)) jvnames[ww] = paste0("V",ww) - #setnames(jval, jvnames) } if (is.data.table(jval)) { From 372cdcfdc7937c9d8f0dc04535e2c00aad0650b2 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Wed, 18 Sep 2019 11:22:11 -0700 Subject: [PATCH 6/8] simplified and did the todo --- R/data.table.R | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index e6388dcb5d..a3ae2cc0dd 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1282,12 +1282,10 @@ replace_dot_alias = function(e) { jval = lapply(jval, `[`, 0L) if (is.atomic(jval)) { setattr(jval,"names",NULL) # discard names of named vectors otherwise each cell in the column would have a name - jval = data.table(jval) # TO DO: should this be setDT(list(jval)) instead? - setnames(jval, if (is.null(jvnames) || jvnames=="") "V1" else jvnames) # e.g. jvnames=="N" for DT[,.N,] - } else { - if (!is.null(jvnames) && !all(jvnames=="")) setattr(jval, 'names', jvnames) - jval = as.data.table.list(jval, .named=NULL) + jval = list(jval) } + if (!is.null(jvnames) && !all(jvnames=="")) setattr(jval, 'names', jvnames) # e.g. jvnames=="N" for DT[,.N,] + jval = as.data.table.list(jval, .named=NULL) } if (is.data.table(jval)) { From a6ed2856379c521ebe929b57e170d907f4874435 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Wed, 18 Sep 2019 11:45:00 -0700 Subject: [PATCH 7/8] added tests verbatim from #3890 --- inst/tests/tests.Rraw | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index c28716dc10..ce766eb142 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15829,21 +15829,33 @@ test(2086.06, DT[ , sum(a), keyby = character()], data.table(V1=55L)) # these tests were new in 1.12.4 dev. In late stage before release 2087.1 was changed to avoid the share for #3890 # when #617 is done it will change back to being the same address x = data.table(a=1L, b=c(1L, 4L, 2L, 3L), c=4:1) -test(2087.1, x[a == 1L, .(b, b2=b)][ , address(b)!=address(b2)]) +test(2087.01, x[a == 1L, .(b, b2=b)][ , address(b)!=address(b2)]) # setkey detects and copies shared address columns, #3496 x = data.frame(a=paste0(2:1), stringsAsFactors=FALSE) x$b = x$a setDT(x) -test(2087.2, setkey(x, a, verbose=TRUE), data.table(a=paste0(1:2), b=paste0(1:2), key="a"), - output='Found and copied 1 column with a shared memory address') +test(2087.02, setkey(x, a, verbose=TRUE), data.table(a=paste0(1:2), b=paste0(1:2), key="a"), + output='Found and copied 1 column with a shared memory address') x = data.frame(a=paste0(2:1), stringsAsFactors=FALSE) x$b = x$a x$c = x$a setDT(x) -test(2087.3, setkey(x, a, verbose=TRUE), data.table(a=paste0(1:2), b=paste0(1:2), c=paste0(1:2), key="a"), - output='Found and copied 2 columns with a shared memory address') -# follow-up which seems to do with function body, #3890 -# ... +test(2087.03, setkey(x, a, verbose=TRUE), data.table(a=paste0(1:2), b=paste0(1:2), c=paste0(1:2), key="a"), + output='Found and copied 2 columns with a shared memory address') +# follow-up from #3890; function body and variable in calling scope +f = function(flag=FALSE) { + dt1 = data.table(a = 1) + dt2 = data.table(a = 1) + dt3 = dt1[, .(a, b = 0)] # (**) + if (flag) dt3[dt2, b := 999, on = "a"] + gsub(" ","",as.character(body(f)[[4L]])) # (**) above; remove spaces just to isolate from potential future formatting changes in R +} +test(2087.10, f(TRUE), c("=","dt3","dt1[,.(a,b=0)]")) # was "dt1[,.(a,b=999)]" in v1.12.2 +value = 0 +dt1 = data.table(a = 1) +dt2 = dt1[, .(a, b = ..value)] +dt2[1, b := 999] +test(2087.11, value, 0) # was 999 in v1.12.2 # clear '.data.table.locked' even when is.null(irows), #2245 x = data.table(a=c(0.85, -0.38, 1.19), b=c(0.56, 0.63, -1.30)) From 055e683f9ee379a6635a3a930ae51a502295220e Mon Sep 17 00:00:00 2001 From: mattdowle Date: Wed, 18 Sep 2019 12:32:09 -0700 Subject: [PATCH 8/8] news item --- NEWS.md | 2 ++ R/as.data.table.R | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 11a6e69999..14f676a863 100644 --- a/NEWS.md +++ b/NEWS.md @@ -321,6 +321,8 @@ 42. `DT[...,by={...}]` now handles expressions in `{`, [#3156](https://github.com/Rdatatable/data.table/issues/3156). Thanks to @tdhock for the report. +43. `:=` could change a `data.table` creation statement in the body of the function calling it, or a variable in calling scope, [#3890](https://github.com/Rdatatable/data.table/issues/3890). Many thanks to @kirillmayantsev for the detailed reports. + ## NOTES 1. `rbindlist`'s `use.names="check"` now emits its message for automatic column names (`"V[0-9]+"`) too, [#3484](https://github.com/Rdatatable/data.table/pull/3484). See news item 5 of v1.12.2 below. diff --git a/R/as.data.table.R b/R/as.data.table.R index 359e0ec6fe..71cbc310b1 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -161,7 +161,7 @@ as.data.table.list = function(x, # TODO: port this as.data.table.list() to C and use MAYBE_REFERENCED(x) || ALTREP(x) to save some copies. # That saving used to be done by CcopyNamedInList but the copies happened again as well, so removing CcopyNamedInList is # not worse than before, and gets us in a better centralized place to port as.data.table.list to C and use MAYBE_REFERENCED - # again in future. + # again in future, for #617. } if (identical(x,list())) vector("list", nrow) else rep(x, length.out=nrow) # new objects don't need copy }