From 0471c4d53cd5b6f107894d008c0b4c95bc0a660f Mon Sep 17 00:00:00 2001 From: HughParsonage Date: Sun, 17 Dec 2023 23:20:10 +1100 Subject: [PATCH 01/17] Fix UB via int conversion (#5510) Note that some double -> integer64 conversions are collapsed into the one warning, affecting tests, so these have been updated. --- inst/tests/nafill.Rraw | 2 +- inst/tests/tests.Rraw | 12 ++++++------ src/assign.c | 14 +++++++------- src/data.table.h | 1 + src/utils.c | 7 ++++++- 5 files changed, 21 insertions(+), 15 deletions(-) diff --git a/inst/tests/nafill.Rraw b/inst/tests/nafill.Rraw index d2ee592ccc..07a70a78f4 100644 --- a/inst/tests/nafill.Rraw +++ b/inst/tests/nafill.Rraw @@ -187,7 +187,7 @@ if (test_bit64) { test(6.50, identical(coerceFill(x<-as.integer64(-2147483648)), list(NA_integer_, -2147483648, x)), warning="out-of-range") test(6.51, identical(coerceFill(x<-as.integer64(-2147483649)), list(NA_integer_, -2147483649, x)), warning="out-of-range") test(6.52, identical(coerceFill(-2147483647), list(-2147483647L, -2147483647, as.integer64("-2147483647")))) - test(6.53, identical(coerceFill(-2147483648), list(NA_integer_, -2147483648, as.integer64("-2147483648")))) + test(6.53, identical(coerceFill(-2147483648), list(NA_integer_, -2147483648, as.integer64("-2147483648"))), warning=c("out-of-range", "precision lost")) test(6.54, identical(coerceFill(-2147483649), list(NA_integer_, -2147483649, as.integer64("-2147483649"))), warning=c("precision lost","precision lost")) } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 9d194346ba..0b0765feef 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -103,7 +103,7 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) { year = data.table::year # lubridate yearmon = data.table::yearmon # zoo yearqtr = data.table::yearqtr # zoo - + rm_all = function(env=parent.frame()) { tt = setdiff(ls(envir=env), .do_not_rm) rm(list=tt, envir=env) @@ -3886,7 +3886,7 @@ test(1133.75, DT[, new := .N, by=x], data.table(x=INT(1,1,1,1,1,2,2), new=INT(5, # on a new column with warning on 2nd assign DT[,new:=NULL] test(1133.8, DT[, new := if (.GRP==1L) 7L else 3.4, by=x], data.table(x=INT(1,1,1,1,1,2,2), new=INT(7,7,7,7,7,3,3)), - warning="Group 2 column 'new': 3.4.*double.*at RHS position 1 truncated.*precision lost.*integer") + warning="Group 2 column 'new': 3.4.*double.*at RHS position.*truncated.*precision lost.*integer") # Fix for FR #2496 - catch `{` in `:=` expression in `j`: DT <- data.table(x=c("A", "A", "B", "B"), val =1:4) @@ -5071,7 +5071,7 @@ dt <- data.table(a=1:3, b=c(7,8,9), c=c(TRUE, NA, FALSE), d=as.list(4:6), e=c("a test(1294.01, dt[, a := 1]$a, rep(1L, 3L)) test(1294.02, dt[, a := 1.5]$a, rep(1L, 3L), - warning="1.5.*double.*position 1 truncated.*integer.*column 1 named 'a'") + warning="1.5.*double.*position 1.*truncated.*integer.*column 1 named 'a'") test(1294.03, dt[, a := NA]$a, rep(NA_integer_, 3L)) test(1294.04, dt[, a := "a"]$a, rep(NA_integer_, 3L), warning=c("Coercing 'character' RHS to 'integer'.*column 1 named 'a'", @@ -9668,7 +9668,7 @@ nqjoin_test <- function(x, y, k=1L, test_no, mult="all") { gc() # no longer needed but left in place just in case, no harm } -dt1 = nq_fun(100L) # 400 reduced to 100, #5517 +dt1 = nq_fun(100L) # 400 reduced to 100, #5517 dt2 = nq_fun(50L) x = na.omit(dt1) y = na.omit(dt2) @@ -14365,7 +14365,7 @@ DT[,foo:=factor(c("a","b","c"))] test(2005.05, DT[2, foo:=8i], error="Can't assign to column 'foo' (type 'factor') a value of type 'complex' (not character, factor, integer or numeric)") test(2005.06, DT[2, a:=9, verbose=TRUE], notOutput="Coerced") test(2005.07, DT[2, a:=NA, verbose=TRUE], notOutput="Coerced") -test(2005.08, DT[2, a:=9.9]$a, INT(1,9,3), warning="9.9.*double.*position 1 truncated.*integer.*column 1 named 'a'") +test(2005.08, DT[2, a:=9.9]$a, INT(1,9,3), warning="9.9.*double.*position 1.*truncated.*integer.*column 1 named 'a'") test(2005.09, set(DT, 1L, "c", expression(x+2)), error="type 'expression' cannot be coerced to 'raw'") test(2005.10, set(DT, 1L, "d", expression(x+2)), error="type 'expression' cannot be coerced to 'logical'") test(2005.11, set(DT, 1L, "e", expression(x+2)), error="type 'expression' cannot be coerced to 'double'") @@ -18004,7 +18004,7 @@ test(2233.34, copy(DT)[, same_value:=value[1], by=.(by1, by2), verbose=TRUE], test(2233.35, copy(DT)[, same_value:=value[1], by=.(by2, by1), verbose=TRUE], ans, output=out) test(2233.36, copy(DT)[, same_value:=value[1], keyby=.(by2, by1), verbose=TRUE], setkey(ans,by2,by1), output=out) # similar to #5307 using integer -DT = data.table(A=INT(2,1,2,1), B=6:3, v=11:14) +DT = data.table(A=INT(2,1,2,1), B=6:3, v=11:14) test(2233.37, copy(DT)[, val:=v[1L], by=.(A,B), verbose=TRUE], copy(DT)[, val:=11:14], output=out) test(2233.38, copy(DT)[, val:=v[1L], keyby=.(A,B), verbose=TRUE], data.table(A=INT(1,1,2,2), B=INT(3,5,4,6), v=INT(14,12,13,11), val=INT(14,12,13,11), key=c("A","B")), output=out) # test from #5326 but with n=100 rather than n=100000; confirmed that n=100 fails tests 2233.403-405 before fix diff --git a/src/assign.c b/src/assign.c index ce2c707dfd..b3981db82b 100644 --- a/src/assign.c +++ b/src/assign.c @@ -239,7 +239,7 @@ int checkOverAlloc(SEXP x) error(_("getOption('datatable.alloccol') should be a number, by default 1024. But its type is '%s'."), type2char(TYPEOF(x))); if (LENGTH(x) != 1) error(_("getOption('datatable.alloc') is a numeric vector ok but its length is %d. Its length should be 1."), LENGTH(x)); - int ans = isInteger(x) ? INTEGER(x)[0] : (int)REAL(x)[0]; + int ans = asInteger(x); if (ans<0) error(_("getOption('datatable.alloc')==%d. It must be >=0 and not NA."), ans); return ans; @@ -742,7 +742,7 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con const double *sd = REAL(source); for (int i=0; inlevel)) { + if (!ISNAN(val) && (!R_FINITE(val) || !inside_int32_range(val) || val!=(int)val || (int)val<1 || (int)val>nlevel)) { error(_("Assigning factor numbers to %s. But %f is outside the level range [1,%d], or is not a whole number."), targetDesc(colnum, colname), val, nlevel); } } @@ -897,14 +897,14 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con switch (TYPEOF(source)) { case REALSXP: if (sourceIsI64) CHECK_RANGE(int64_t, REAL, val!=NA_INTEGER64 && (val<=NA_INTEGER || val>INT_MAX), PRId64, "out-of-range (NA)", val) - else CHECK_RANGE(double, REAL, !ISNAN(val) && (!R_FINITE(val) || (int)val!=val), "f", "truncated (precision lost)", val) + else CHECK_RANGE(double, REAL, !ISNAN(val) && (!R_FINITE(val) || !inside_int32_range(val) || (int)val!=val), "f", "out-of-range(NA) or truncated (precision lost)", val) case CPLXSXP: CHECK_RANGE(Rcomplex, COMPLEX, !((ISNAN(val.i) || (R_FINITE(val.i) && val.i==0.0)) && - (ISNAN(val.r) || (R_FINITE(val.r) && (int)val.r==val.r))), "f", "either imaginary part discarded or real part truncated (precision lost)", val.r) + (ISNAN(val.r) || (R_FINITE(val.r) && inside_int32_range(val.r) && (int)val.r==val.r))), "f", "either imaginary part discarded or real part truncated (precision lost)", val.r) } break; case REALSXP: switch (TYPEOF(source)) { case REALSXP: if (targetIsI64 && !sourceIsI64) - CHECK_RANGE(double, REAL, !ISNAN(val) && (!R_FINITE(val) || (int)val!=val), "f", "truncated (precision lost)", val) + CHECK_RANGE(double, REAL, !ISNAN(val) && (!R_FINITE(val) || !inside_int32_range(val) || (int)val!=val), "f", "truncated (precision lost)", val) break; case CPLXSXP: if (targetIsI64) CHECK_RANGE(Rcomplex, COMPLEX, !((ISNAN(val.i) || (R_FINITE(val.i) && val.i==0.0)) && @@ -1004,8 +1004,8 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con case REALSXP: if (sourceIsI64) BODY(int64_t, REAL, int, (val==NA_INTEGER64||val>INT_MAX||val<=NA_INTEGER) ? NA_INTEGER : (int)val, td[i]=cval) - else BODY(double, REAL, int, ISNAN(val) ? NA_INTEGER : (int)val, td[i]=cval) - case CPLXSXP: BODY(Rcomplex, COMPLEX, int, ISNAN(val.r) ? NA_INTEGER : (int)val.r, td[i]=cval) + else BODY(double, REAL, int, (ISNAN(val) || !inside_int32_range(val)) ? NA_INTEGER : (int)val, td[i]=cval) + case CPLXSXP: BODY(Rcomplex, COMPLEX, int, (ISNAN(val.r) || !inside_int32_range(val.r)) ? NA_INTEGER : (int)val.r, td[i]=cval) default: COERCE_ERROR("integer"); // test 2005.4 } } break; diff --git a/src/data.table.h b/src/data.table.h index 4c9df894c0..6c5c0865cc 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -232,6 +232,7 @@ SEXP between(SEXP x, SEXP lower, SEXP upper, SEXP incbounds, SEXP NAbounds, SEXP SEXP coalesce(SEXP x, SEXP inplace); // utils.c +bool inside_int32_range(double x); bool isRealReallyInt(SEXP x); SEXP isRealReallyIntR(SEXP x); SEXP isReallyReal(SEXP x); diff --git a/src/utils.c b/src/utils.c index e5e343ac9f..4e3a0df03a 100644 --- a/src/utils.c +++ b/src/utils.c @@ -1,11 +1,16 @@ #include "data.table.h" +bool inside_int32_range(double x) { + // N.B. if x = 2147483647.99 then (int)2147483647.99 is not undefined behaviour + return x < 2147483648 && x > -2147483648; +} + static R_xlen_t firstNonInt(SEXP x) { R_xlen_t n=xlength(x), i=0; const double *dx = REAL(x); while (i Date: Sun, 17 Dec 2023 23:21:14 +1100 Subject: [PATCH 02/17] Ensure nan is not coerced. Closes #5510 --- src/gsumm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gsumm.c b/src/gsumm.c index 96d85999b6..63c65d837b 100644 --- a/src/gsumm.c +++ b/src/gsumm.c @@ -1164,7 +1164,7 @@ SEXP gprod(SEXP x, SEXP narmArg) { if (INHERITS(x, char_integer64)) { int64_t *ansd = (int64_t *)REAL(ans); for (int i=0; iINT64_MAX || s[i]<=INT64_MIN) ? NA_INTEGER64 : (int64_t)s[i]; + ansd[i] = (ISNAN(s[i]) || s[i]>INT64_MAX || s[i]<=INT64_MIN) ? NA_INTEGER64 : (int64_t)s[i]; } } else { double *ansd = REAL(ans); From 90b167e399c6ee3da4a99961d1ce5de327d28ddc Mon Sep 17 00:00:00 2001 From: HughParsonage Date: Sun, 17 Dec 2023 23:43:33 +1100 Subject: [PATCH 03/17] Fix '1' mistakenly removed from earlier commit --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 0b0765feef..1a6abaa480 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -3886,7 +3886,7 @@ test(1133.75, DT[, new := .N, by=x], data.table(x=INT(1,1,1,1,1,2,2), new=INT(5, # on a new column with warning on 2nd assign DT[,new:=NULL] test(1133.8, DT[, new := if (.GRP==1L) 7L else 3.4, by=x], data.table(x=INT(1,1,1,1,1,2,2), new=INT(7,7,7,7,7,3,3)), - warning="Group 2 column 'new': 3.4.*double.*at RHS position.*truncated.*precision lost.*integer") + warning="Group 2 column 'new': 3.4.*double.*at RHS position 1.*truncated.*precision lost.*integer") # Fix for FR #2496 - catch `{` in `:=` expression in `j`: DT <- data.table(x=c("A", "A", "B", "B"), val =1:4) From 47c87fdca1b3133bf6b99586a4c13042f1d189f6 Mon Sep 17 00:00:00 2001 From: HughParsonage Date: Mon, 18 Dec 2023 22:39:07 +1100 Subject: [PATCH 04/17] Re #5834 --- src/assign.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/assign.c b/src/assign.c index b3981db82b..dfb46304d9 100644 --- a/src/assign.c +++ b/src/assign.c @@ -904,7 +904,7 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con case REALSXP: switch (TYPEOF(source)) { case REALSXP: if (targetIsI64 && !sourceIsI64) - CHECK_RANGE(double, REAL, !ISNAN(val) && (!R_FINITE(val) || !inside_int32_range(val) || (int)val!=val), "f", "truncated (precision lost)", val) + CHECK_RANGE(double, REAL, !ISNAN(val) && (!R_FINITE(val) || (int64_t)val!=val), "f", "truncated (precision lost)", val) break; case CPLXSXP: if (targetIsI64) CHECK_RANGE(Rcomplex, COMPLEX, !((ISNAN(val.i) || (R_FINITE(val.i) && val.i==0.0)) && From 772906d132568af4b706d4b88c6df2c5761e57eb Mon Sep 17 00:00:00 2001 From: HughParsonage Date: Tue, 19 Dec 2023 00:25:36 +1100 Subject: [PATCH 05/17] Rename function more sensibly --- src/assign.c | 10 +++++----- src/data.table.h | 2 +- src/utils.c | 8 +++++--- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/assign.c b/src/assign.c index dfb46304d9..7572b369bb 100644 --- a/src/assign.c +++ b/src/assign.c @@ -742,7 +742,7 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con const double *sd = REAL(source); for (int i=0; inlevel)) { + if (!ISNAN(val) && (!R_FINITE(val) || !within_int32_repres(val) || val!=(int)val || (int)val<1 || (int)val>nlevel)) { error(_("Assigning factor numbers to %s. But %f is outside the level range [1,%d], or is not a whole number."), targetDesc(colnum, colname), val, nlevel); } } @@ -897,9 +897,9 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con switch (TYPEOF(source)) { case REALSXP: if (sourceIsI64) CHECK_RANGE(int64_t, REAL, val!=NA_INTEGER64 && (val<=NA_INTEGER || val>INT_MAX), PRId64, "out-of-range (NA)", val) - else CHECK_RANGE(double, REAL, !ISNAN(val) && (!R_FINITE(val) || !inside_int32_range(val) || (int)val!=val), "f", "out-of-range(NA) or truncated (precision lost)", val) + else CHECK_RANGE(double, REAL, !ISNAN(val) && (!R_FINITE(val) || !within_int32_repres(val) || (int)val!=val), "f", "out-of-range(NA) or truncated (precision lost)", val) case CPLXSXP: CHECK_RANGE(Rcomplex, COMPLEX, !((ISNAN(val.i) || (R_FINITE(val.i) && val.i==0.0)) && - (ISNAN(val.r) || (R_FINITE(val.r) && inside_int32_range(val.r) && (int)val.r==val.r))), "f", "either imaginary part discarded or real part truncated (precision lost)", val.r) + (ISNAN(val.r) || (R_FINITE(val.r) && within_int32_repres(val.r) && (int)val.r==val.r))), "f", "either imaginary part discarded or real part truncated (precision lost)", val.r) } break; case REALSXP: switch (TYPEOF(source)) { @@ -1004,8 +1004,8 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con case REALSXP: if (sourceIsI64) BODY(int64_t, REAL, int, (val==NA_INTEGER64||val>INT_MAX||val<=NA_INTEGER) ? NA_INTEGER : (int)val, td[i]=cval) - else BODY(double, REAL, int, (ISNAN(val) || !inside_int32_range(val)) ? NA_INTEGER : (int)val, td[i]=cval) - case CPLXSXP: BODY(Rcomplex, COMPLEX, int, (ISNAN(val.r) || !inside_int32_range(val.r)) ? NA_INTEGER : (int)val.r, td[i]=cval) + else BODY(double, REAL, int, (ISNAN(val) || !within_int32_repres(val)) ? NA_INTEGER : (int)val, td[i]=cval) + case CPLXSXP: BODY(Rcomplex, COMPLEX, int, (ISNAN(val.r) || !within_int32_repres(val.r)) ? NA_INTEGER : (int)val.r, td[i]=cval) default: COERCE_ERROR("integer"); // test 2005.4 } } break; diff --git a/src/data.table.h b/src/data.table.h index 6c5c0865cc..f0b5bb3cad 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -232,7 +232,7 @@ SEXP between(SEXP x, SEXP lower, SEXP upper, SEXP incbounds, SEXP NAbounds, SEXP SEXP coalesce(SEXP x, SEXP inplace); // utils.c -bool inside_int32_range(double x); +bool within_int32_repres(double x); bool isRealReallyInt(SEXP x); SEXP isRealReallyIntR(SEXP x); SEXP isReallyReal(SEXP x); diff --git a/src/utils.c b/src/utils.c index 4e3a0df03a..2f3e5fcc56 100644 --- a/src/utils.c +++ b/src/utils.c @@ -1,7 +1,9 @@ #include "data.table.h" -bool inside_int32_range(double x) { - // N.B. if x = 2147483647.99 then (int)2147483647.99 is not undefined behaviour +bool within_int32_repres(double x) { + // N.B. (int)2147483647.99 is not undefined behaviour since s 6.3.1.4 of the C + // standard states that behaviour is undefined only if the integral part of a + // finite value of standard floating type cannot be represented. return x < 2147483648 && x > -2147483648; } @@ -10,7 +12,7 @@ static R_xlen_t firstNonInt(SEXP x) { const double *dx = REAL(x); while (i Date: Tue, 19 Dec 2023 00:27:57 +1100 Subject: [PATCH 06/17] Incorporate finite checks into the logic --- src/assign.c | 6 +++--- src/utils.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/assign.c b/src/assign.c index 7572b369bb..755a7b151a 100644 --- a/src/assign.c +++ b/src/assign.c @@ -742,7 +742,7 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con const double *sd = REAL(source); for (int i=0; inlevel)) { + if (!ISNAN(val) && (!within_int32_repres(val) || val!=(int)val || (int)val<1 || (int)val>nlevel)) { error(_("Assigning factor numbers to %s. But %f is outside the level range [1,%d], or is not a whole number."), targetDesc(colnum, colname), val, nlevel); } } @@ -897,9 +897,9 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con switch (TYPEOF(source)) { case REALSXP: if (sourceIsI64) CHECK_RANGE(int64_t, REAL, val!=NA_INTEGER64 && (val<=NA_INTEGER || val>INT_MAX), PRId64, "out-of-range (NA)", val) - else CHECK_RANGE(double, REAL, !ISNAN(val) && (!R_FINITE(val) || !within_int32_repres(val) || (int)val!=val), "f", "out-of-range(NA) or truncated (precision lost)", val) + else CHECK_RANGE(double, REAL, !ISNAN(val) && (!within_int32_repres(val) || (int)val!=val), "f", "out-of-range(NA) or truncated (precision lost)", val) case CPLXSXP: CHECK_RANGE(Rcomplex, COMPLEX, !((ISNAN(val.i) || (R_FINITE(val.i) && val.i==0.0)) && - (ISNAN(val.r) || (R_FINITE(val.r) && within_int32_repres(val.r) && (int)val.r==val.r))), "f", "either imaginary part discarded or real part truncated (precision lost)", val.r) + (ISNAN(val.r) || (within_int32_repres(val.r) && (int)val.r==val.r))), "f", "either imaginary part discarded or real part truncated (precision lost)", val.r) } break; case REALSXP: switch (TYPEOF(source)) { diff --git a/src/utils.c b/src/utils.c index 2f3e5fcc56..530a6c6cc6 100644 --- a/src/utils.c +++ b/src/utils.c @@ -4,7 +4,7 @@ bool within_int32_repres(double x) { // N.B. (int)2147483647.99 is not undefined behaviour since s 6.3.1.4 of the C // standard states that behaviour is undefined only if the integral part of a // finite value of standard floating type cannot be represented. - return x < 2147483648 && x > -2147483648; + return R_FINITE(x) && x < 2147483648 && x > -2147483648; } static R_xlen_t firstNonInt(SEXP x) { @@ -12,7 +12,7 @@ static R_xlen_t firstNonInt(SEXP x) { const double *dx = REAL(x); while (i Date: Tue, 19 Dec 2023 00:30:45 +1100 Subject: [PATCH 07/17] Suspend int64 coerce tests subject to #5834 --- inst/tests/nafill.Rraw | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/inst/tests/nafill.Rraw b/inst/tests/nafill.Rraw index 07a70a78f4..631df7441a 100644 --- a/inst/tests/nafill.Rraw +++ b/inst/tests/nafill.Rraw @@ -181,14 +181,14 @@ if (test_bit64) { test(6.44, identical(coerceFill(NaN), list(NA_integer_, NaN, as.integer64(NA)))) test(6.45, identical(coerceFill(Inf), list(NA_integer_, Inf, as.integer64(NA))), warning=c("precision lost","precision lost")) test(6.46, identical(coerceFill(-Inf), list(NA_integer_, -Inf, as.integer64(NA))), warning=c("precision lost","precision lost")) - test(6.47, identical(coerceFill(-(2^62)), list(NA_integer_, -(2^62), as.integer64("-4611686018427387904"))), warning=c("precision lost","precision lost")) + # test(6.47, identical(coerceFill(-(2^62)), list(NA_integer_, -(2^62), as.integer64("-4611686018427387904"))), warning=c("precision lost","precision lost")) test(6.48, identical(coerceFill(-(2^64)), list(NA_integer_, -(2^64), as.integer64(NA))), warning=c("precision lost","precision lost")) test(6.49, identical(coerceFill(x<-as.integer64(-2147483647)), list(-2147483647L, -2147483647, x))) test(6.50, identical(coerceFill(x<-as.integer64(-2147483648)), list(NA_integer_, -2147483648, x)), warning="out-of-range") test(6.51, identical(coerceFill(x<-as.integer64(-2147483649)), list(NA_integer_, -2147483649, x)), warning="out-of-range") test(6.52, identical(coerceFill(-2147483647), list(-2147483647L, -2147483647, as.integer64("-2147483647")))) - test(6.53, identical(coerceFill(-2147483648), list(NA_integer_, -2147483648, as.integer64("-2147483648"))), warning=c("out-of-range", "precision lost")) - test(6.54, identical(coerceFill(-2147483649), list(NA_integer_, -2147483649, as.integer64("-2147483649"))), warning=c("precision lost","precision lost")) + # test(6.53, identical(coerceFill(-2147483648), list(NA_integer_, -2147483648, as.integer64("-2147483648"))), warning=c("out-of-range", "precision lost")) + # test(6.54, identical(coerceFill(-2147483649), list(NA_integer_, -2147483649, as.integer64("-2147483649"))), warning=c("precision lost","precision lost")) } # nan argument to treat NaN as NA in nafill, #4020 From f0238b44ecfbf0ae3e2d75398cdaee0f44c6baea Mon Sep 17 00:00:00 2001 From: HughParsonage Date: Tue, 19 Dec 2023 12:29:50 +1100 Subject: [PATCH 08/17] Update CODEOWNERS --- CODEOWNERS | 97 +++++++++++++++++++++++++++++------------------------- 1 file changed, 52 insertions(+), 45 deletions(-) diff --git a/CODEOWNERS b/CODEOWNERS index 5d98e02422..12329119d9 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -1,45 +1,52 @@ -# https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners -* @mattdowle - -# melt -/R/fmelt.R @tdhock -/src/fmelt.c @tdhock -/man/melt.data.table.Rd @tdhock -/vignettes/datatable-reshape.Rmd @tdhock - -# rolling statistics -/R/froll.R @jangorecki -/man/froll.Rd @jangorecki -/src/froll.c @jangorecki -/src/frollR.c @jangorecki -/src/frolladaptive.c @jangorecki - -# meta-programming -/R/programming.R @jangorecki -/man/substitute2.Rd @jangorecki -/src/programming.c @jangorecki -/vignettes/datatable-programming.Rmd @jangorecki - -# GForce groupby -/src/gsumm.c @ben-schwen -# datetime classes -/R/IDateTime.R @ben-schwen @michaelchirico -/src/idatetime.c @ben-schwen @michaelchirico -/man/IDateTime.Rd @ben-schwen @michaelchirico - -# shift -/R/shift.R @ben-schwen @michaelchirico -/src/shift.c @ben-schwen @michaelchirico -/man/shift.Rd @ben-schwen @michaelchirico - -# translations -/inst/po/ @michaelchirico -/po/ @michaelchirico -/R/translation.R @michaelchirico -/src/po.h @michaelchirico - -# printing -/R/print.data.table.R @michaelchirico - -# .SD vignette -/vignettes/datatable-sd-usage.Rmd @michaelchirico +# https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners +* @mattdowle + +# melt +/R/fmelt.R @tdhock +/src/fmelt.c @tdhock +/man/melt.data.table.Rd @tdhock +/vignettes/datatable-reshape.Rmd @tdhock + +# rolling statistics +/R/froll.R @jangorecki +/man/froll.Rd @jangorecki +/src/froll.c @jangorecki +/src/frollR.c @jangorecki +/src/frolladaptive.c @jangorecki + +# meta-programming +/R/programming.R @jangorecki +/man/substitute2.Rd @jangorecki +/src/programming.c @jangorecki +/vignettes/datatable-programming.Rmd @jangorecki + +# GForce groupby +/src/gsumm.c @ben-schwen +# datetime classes +/R/IDateTime.R @ben-schwen @michaelchirico +/src/idatetime.c @ben-schwen @michaelchirico +/man/IDateTime.Rd @ben-schwen @michaelchirico + +# shift +/R/shift.R @ben-schwen @michaelchirico +/src/shift.c @ben-schwen @michaelchirico +/man/shift.Rd @ben-schwen @michaelchirico + +# translations +/inst/po/ @michaelchirico +/po/ @michaelchirico +/R/translation.R @michaelchirico +/src/po.h @michaelchirico + +# printing +/R/print.data.table.R @michaelchirico + +# .SD vignette +/vignettes/datatable-sd-usage.Rmd @michaelchirico + +# assign +/src/assign.c @HughParsonage + +# utils +/src/utils.c @HughParsonage + From 61ce9dd93ea0260c9e5fca84f246ef023d3e1831 Mon Sep 17 00:00:00 2001 From: HughParsonage Date: Tue, 19 Dec 2023 19:13:00 +1100 Subject: [PATCH 09/17] Revert CODEOWNERS --- CODEOWNERS | 97 +++++++++++++++++++++++++----------------------------- 1 file changed, 45 insertions(+), 52 deletions(-) diff --git a/CODEOWNERS b/CODEOWNERS index 12329119d9..5d98e02422 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -1,52 +1,45 @@ -# https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners -* @mattdowle - -# melt -/R/fmelt.R @tdhock -/src/fmelt.c @tdhock -/man/melt.data.table.Rd @tdhock -/vignettes/datatable-reshape.Rmd @tdhock - -# rolling statistics -/R/froll.R @jangorecki -/man/froll.Rd @jangorecki -/src/froll.c @jangorecki -/src/frollR.c @jangorecki -/src/frolladaptive.c @jangorecki - -# meta-programming -/R/programming.R @jangorecki -/man/substitute2.Rd @jangorecki -/src/programming.c @jangorecki -/vignettes/datatable-programming.Rmd @jangorecki - -# GForce groupby -/src/gsumm.c @ben-schwen -# datetime classes -/R/IDateTime.R @ben-schwen @michaelchirico -/src/idatetime.c @ben-schwen @michaelchirico -/man/IDateTime.Rd @ben-schwen @michaelchirico - -# shift -/R/shift.R @ben-schwen @michaelchirico -/src/shift.c @ben-schwen @michaelchirico -/man/shift.Rd @ben-schwen @michaelchirico - -# translations -/inst/po/ @michaelchirico -/po/ @michaelchirico -/R/translation.R @michaelchirico -/src/po.h @michaelchirico - -# printing -/R/print.data.table.R @michaelchirico - -# .SD vignette -/vignettes/datatable-sd-usage.Rmd @michaelchirico - -# assign -/src/assign.c @HughParsonage - -# utils -/src/utils.c @HughParsonage - +# https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners +* @mattdowle + +# melt +/R/fmelt.R @tdhock +/src/fmelt.c @tdhock +/man/melt.data.table.Rd @tdhock +/vignettes/datatable-reshape.Rmd @tdhock + +# rolling statistics +/R/froll.R @jangorecki +/man/froll.Rd @jangorecki +/src/froll.c @jangorecki +/src/frollR.c @jangorecki +/src/frolladaptive.c @jangorecki + +# meta-programming +/R/programming.R @jangorecki +/man/substitute2.Rd @jangorecki +/src/programming.c @jangorecki +/vignettes/datatable-programming.Rmd @jangorecki + +# GForce groupby +/src/gsumm.c @ben-schwen +# datetime classes +/R/IDateTime.R @ben-schwen @michaelchirico +/src/idatetime.c @ben-schwen @michaelchirico +/man/IDateTime.Rd @ben-schwen @michaelchirico + +# shift +/R/shift.R @ben-schwen @michaelchirico +/src/shift.c @ben-schwen @michaelchirico +/man/shift.Rd @ben-schwen @michaelchirico + +# translations +/inst/po/ @michaelchirico +/po/ @michaelchirico +/R/translation.R @michaelchirico +/src/po.h @michaelchirico + +# printing +/R/print.data.table.R @michaelchirico + +# .SD vignette +/vignettes/datatable-sd-usage.Rmd @michaelchirico From e00a553ae849ceb87e24a4ad4dcf6a54636b378b Mon Sep 17 00:00:00 2001 From: HughParsonage Date: Tue, 19 Dec 2023 19:16:13 +1100 Subject: [PATCH 10/17] Update CODEOWNERS With extant file endings --- CODEOWNERS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CODEOWNERS b/CODEOWNERS index 5d98e02422..7d7a5ecaac 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -43,3 +43,9 @@ # .SD vignette /vignettes/datatable-sd-usage.Rmd @michaelchirico + +# assign +/src/assign.c @HughParsonage + +# utils +/src/utils.c @HughParsonage From 9d05feb0f41a98c535568ad7ec1f328b45546916 Mon Sep 17 00:00:00 2001 From: HughParsonage Date: Tue, 19 Dec 2023 19:25:47 +1100 Subject: [PATCH 11/17] Simplify logic for integer check --- src/assign.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/assign.c b/src/assign.c index 755a7b151a..0174c623a0 100644 --- a/src/assign.c +++ b/src/assign.c @@ -742,7 +742,8 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con const double *sd = REAL(source); for (int i=0; inlevel)) { + // Since nlevel is an int, val < 1 || val > nlevel will deflect UB guarded against in PR #5832 + if (!ISNAN(val) && (val < 1 || val > nlevel || val != (int)val)) { error(_("Assigning factor numbers to %s. But %f is outside the level range [1,%d], or is not a whole number."), targetDesc(colnum, colname), val, nlevel); } } From 50056ef0e1da52d2d0291f3f63f82fb8d8470162 Mon Sep 17 00:00:00 2001 From: HughParsonage Date: Tue, 19 Dec 2023 21:56:39 +1100 Subject: [PATCH 12/17] Check for NA_INTEGER superfluous --- src/utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils.c b/src/utils.c index 530a6c6cc6..7999b532f0 100644 --- a/src/utils.c +++ b/src/utils.c @@ -12,7 +12,7 @@ static R_xlen_t firstNonInt(SEXP x) { const double *dx = REAL(x); while (i Date: Tue, 19 Dec 2023 21:57:12 +1100 Subject: [PATCH 13/17] Test 6.53 should emit warning on -2^31 --- inst/tests/nafill.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/nafill.Rraw b/inst/tests/nafill.Rraw index 98b1acbb94..b72c0b5063 100644 --- a/inst/tests/nafill.Rraw +++ b/inst/tests/nafill.Rraw @@ -187,7 +187,7 @@ if (test_bit64) { test(6.50, identical(coerceFill(x<-as.integer64(-2147483648)), list(NA_integer_, -2147483648, x)), warning="out-of-range") test(6.51, identical(coerceFill(x<-as.integer64(-2147483649)), list(NA_integer_, -2147483649, x)), warning="out-of-range") test(6.52, identical(coerceFill(-2147483647), list(-2147483647L, -2147483647, as.integer64("-2147483647")))) - test(6.53, identical(coerceFill(-2147483648), list(NA_integer_, -2147483648, as.integer64("-2147483648")))) + test(6.53, identical(coerceFill(-2147483648), list(NA_integer_, -2147483648, as.integer64("-2147483648"))), warning="precision lost") test(6.54, identical(coerceFill(-2147483649), list(NA_integer_, -2147483649, as.integer64("-2147483649"))), warning="precision lost") } From 46cf79ce43136f206170e8abe4dec98967e6dc86 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 22 Dec 2023 16:49:16 +0000 Subject: [PATCH 14/17] C++ toolkit included by default --- .devcontainer/r-devel-clang-ubsan/devcontainer.json | 7 ++++++- .devcontainer/r-devel-gcc/devcontainer.json | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/.devcontainer/r-devel-clang-ubsan/devcontainer.json b/.devcontainer/r-devel-clang-ubsan/devcontainer.json index f1d985f31e..de21d3dfe1 100644 --- a/.devcontainer/r-devel-clang-ubsan/devcontainer.json +++ b/.devcontainer/r-devel-clang-ubsan/devcontainer.json @@ -1,4 +1,9 @@ { "build": { "dockerfile": "Dockerfile", "context": "../.." }, - "customizations": { "vscode": { "extensions": [ "REditorSupport.r" ] } } + "customizations": { "vscode": { + "extensions": [ + "REditorSupport.r", + "ms-vscode.cpptools-extension-pack" + ] + }} } diff --git a/.devcontainer/r-devel-gcc/devcontainer.json b/.devcontainer/r-devel-gcc/devcontainer.json index f1d985f31e..de21d3dfe1 100644 --- a/.devcontainer/r-devel-gcc/devcontainer.json +++ b/.devcontainer/r-devel-gcc/devcontainer.json @@ -1,4 +1,9 @@ { "build": { "dockerfile": "Dockerfile", "context": "../.." }, - "customizations": { "vscode": { "extensions": [ "REditorSupport.r" ] } } + "customizations": { "vscode": { + "extensions": [ + "REditorSupport.r", + "ms-vscode.cpptools-extension-pack" + ] + }} } From 3e2f1c3000fb21f052783138a375f774dfda8773 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 22 Dec 2023 16:49:51 +0000 Subject: [PATCH 15/17] first pass at fix --- src/assign.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/assign.c b/src/assign.c index 0174c623a0..ec53ba5dc7 100644 --- a/src/assign.c +++ b/src/assign.c @@ -905,7 +905,8 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con case REALSXP: switch (TYPEOF(source)) { case REALSXP: if (targetIsI64 && !sourceIsI64) - CHECK_RANGE(double, REAL, !ISNAN(val) && (!R_FINITE(val) || (int64_t)val!=val), "f", "truncated (precision lost)", val) + CHECK_RANGE(double, REAL, !ISNAN(val) && + (!R_FINITE(val) || val < (double)INT64_MIN || val > (double)INT64_MAX || (int64_t)val!=val), "f", "truncated (precision lost)", val) break; case CPLXSXP: if (targetIsI64) CHECK_RANGE(Rcomplex, COMPLEX, !((ISNAN(val.i) || (R_FINITE(val.i) && val.i==0.0)) && @@ -1022,7 +1023,7 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con if (mc) { memcpy(td, (int64_t *)REAL(source), slen*sizeof(int64_t)); break; } else BODY(int64_t, REAL, int64_t, val, td[i]=cval) - } else BODY(double, REAL, int64_t, R_FINITE(val) ? val : NA_INTEGER64, td[i]=cval) + } else BODY(double, REAL, int64_t, R_FINITE(val) && val > (double)INT64_MIN && val < (double)INT64_MAX ? val : NA_INTEGER64, td[i]=cval) case CPLXSXP: BODY(Rcomplex, COMPLEX, int64_t, ISNAN(val.r) ? NA_INTEGER64 : (int64_t)val.r, td[i]=cval) default: COERCE_ERROR("integer64"); } From 77017388800e728ffa85e7f4af53d867a6257a5d Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 22 Dec 2023 17:09:10 +0000 Subject: [PATCH 16/17] Simplify/unite with within_int64_repres() --- inst/tests/tests.Rraw | 2 +- src/assign.c | 5 ++--- src/data.table.h | 1 + src/utils.c | 4 ++++ 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 89239d7b76..42d85cf06d 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -14417,7 +14417,7 @@ test(2006.2, rbindlist(list(data.table(x = as.raw(1:2), y=as.raw(5:6)), data.tab if (test_bit64) { test(2007.1, rbindlist(list( list(a=as.integer64(1), b=3L), list(a=2L, b=4L) )), data.table(a=as.integer64(1:2), b=3:4)) test(2007.2, rbindlist(list( list(a=3.4, b=5L), list(a=as.integer64(4), b=6L) )), data.table(a=as.integer64(3:4), b=5:6), - warning="Column 1 of item 1: 3.4.*double.*position 1 truncated.*precision lost.*when assigning.*integer64.*column 1 named 'a'") + warning="Column 1 of item 1: 3.4.*double.*position 1.*truncated.*precision lost.*when assigning.*integer64.*column 1 named 'a'") test(2007.3, rbindlist(list( list(a=3.0, b=5L), list(a=as.integer64(4), b=6L) )), data.table(a=as.integer64(3:4), b=5:6)) test(2007.4, rbindlist(list( list(b=5:6), list(a=as.integer64(4), b=7L)), fill=TRUE), data.table(b=5:7, a=as.integer64(c(NA,NA,4)))) # tests writeNA of integer64 test(2007.5, rbindlist(list( list(a=INT(1,NA,-2)), list(a=as.integer64(c(3,NA))) )), data.table(a=as.integer64(c(1,NA,-2,3,NA)))) # int NAs combined with int64 NA diff --git a/src/assign.c b/src/assign.c index ec53ba5dc7..ef49fd230b 100644 --- a/src/assign.c +++ b/src/assign.c @@ -905,8 +905,7 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con case REALSXP: switch (TYPEOF(source)) { case REALSXP: if (targetIsI64 && !sourceIsI64) - CHECK_RANGE(double, REAL, !ISNAN(val) && - (!R_FINITE(val) || val < (double)INT64_MIN || val > (double)INT64_MAX || (int64_t)val!=val), "f", "truncated (precision lost)", val) + CHECK_RANGE(double, REAL, !ISNAN(val) && (!within_int64_repres(val) || (int64_t)val!=val), "f", "out-of-range(NA) or truncated (precision lost)", val) break; case CPLXSXP: if (targetIsI64) CHECK_RANGE(Rcomplex, COMPLEX, !((ISNAN(val.i) || (R_FINITE(val.i) && val.i==0.0)) && @@ -1023,7 +1022,7 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con if (mc) { memcpy(td, (int64_t *)REAL(source), slen*sizeof(int64_t)); break; } else BODY(int64_t, REAL, int64_t, val, td[i]=cval) - } else BODY(double, REAL, int64_t, R_FINITE(val) && val > (double)INT64_MIN && val < (double)INT64_MAX ? val : NA_INTEGER64, td[i]=cval) + } else BODY(double, REAL, int64_t, within_int64_repres(val) ? val : NA_INTEGER64, td[i]=cval) case CPLXSXP: BODY(Rcomplex, COMPLEX, int64_t, ISNAN(val.r) ? NA_INTEGER64 : (int64_t)val.r, td[i]=cval) default: COERCE_ERROR("integer64"); } diff --git a/src/data.table.h b/src/data.table.h index f0b5bb3cad..0a6eb207a8 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -233,6 +233,7 @@ SEXP coalesce(SEXP x, SEXP inplace); // utils.c bool within_int32_repres(double x); +bool within_int64_repres(double x); bool isRealReallyInt(SEXP x); SEXP isRealReallyIntR(SEXP x); SEXP isReallyReal(SEXP x); diff --git a/src/utils.c b/src/utils.c index 7999b532f0..ca7cf3c5ac 100644 --- a/src/utils.c +++ b/src/utils.c @@ -7,6 +7,10 @@ bool within_int32_repres(double x) { return R_FINITE(x) && x < 2147483648 && x > -2147483648; } +bool within_int64_repres(double x) { + return R_FINITE(x) && x < (double)INT64_MAX && x > (double)INT64_MIN; +} + static R_xlen_t firstNonInt(SEXP x) { R_xlen_t n=xlength(x), i=0; const double *dx = REAL(x); From b269b39841f6961de43504c5b155beb9723f8d14 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 22 Dec 2023 18:23:33 -0800 Subject: [PATCH 17/17] weak inequality --- src/utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils.c b/src/utils.c index ca7cf3c5ac..1fba47cac3 100644 --- a/src/utils.c +++ b/src/utils.c @@ -8,7 +8,7 @@ bool within_int32_repres(double x) { } bool within_int64_repres(double x) { - return R_FINITE(x) && x < (double)INT64_MAX && x > (double)INT64_MIN; + return R_FINITE(x) && x <= (double)INT64_MAX && x >= (double)INT64_MIN; } static R_xlen_t firstNonInt(SEXP x) {