diff --git a/NEWS.md b/NEWS.md index f6d2f971cb..3db78f46c9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -8,6 +8,8 @@ 1. `nafill()` now applies `fill=` to the front/back of the vector when `type="locf|nocb"`, [#3594](https://github.com/Rdatatable/data.table/issues/3594). Thanks to @ben519 for the feature request. It also now returns a named object based on the input names. Note that if you are considering joining and then using `nafill(...,type='locf|nocb')` afterwards, please review `roll=`/`rollends=` which should achieve the same result in one step more efficiently. `nafill()` is for when filling-while-joining (i.e. `roll=`/`rollends=`/`nomatch=`) cannot be applied. +2. `mean(na.rm=TRUE)` by group is now GForce optimized, [#4849](https://github.com/Rdatatable/data.table/issues/4849). Thanks to the [h2oai/db-benchmark](https://github.com/h2oai/db-benchmark) project for spotting this issue. The 1 billion row example in the issue shows 48s reduced to 14s. The optimization also applies to type `integer64` resulting in a difference to the `bit64::mean.integer64` method: `data.table` returns a `double` result whereas `bit64` rounds the mean to the nearest integer. + ## BUG FIXES ## NOTES diff --git a/R/data.table.R b/R/data.table.R index 961d9eb857..1334a612d0 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -2856,7 +2856,7 @@ ghead = function(x, n) .Call(Cghead, x, as.integer(n)) # n is not used at the mo gtail = function(x, n) .Call(Cgtail, x, as.integer(n)) # n is not used at the moment gfirst = function(x) .Call(Cgfirst, x) glast = function(x) .Call(Cglast, x) -gsum = function(x, na.rm=FALSE) .Call(Cgsum, x, na.rm, TRUE) # warnOverflow=TRUE, #986 +gsum = function(x, na.rm=FALSE) .Call(Cgsum, x, na.rm) gmean = function(x, na.rm=FALSE) .Call(Cgmean, x, na.rm) gprod = function(x, na.rm=FALSE) .Call(Cgprod, x, na.rm) gmedian = function(x, na.rm=FALSE) .Call(Cgmedian, x, na.rm) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 95b8ba4492..8f32278523 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17263,3 +17263,13 @@ if (identical(x, enc2native(x))) { # fintersect now preserves order of first argument like intersect, #4716 test(2163, fintersect(data.table(x=c("b", "c", "a")), data.table(x=c("a","c")))$x, c("c", "a")) + +# mean na.rm=TRUE GForce, #4849 +d = data.table(a=1, b=list(1,2)) +test(2164.1, d[, mean(b), by=a], error="not supported by GForce mean") +if (test_bit64) { + d = data.table(a=INT(1,1,2,2), b=as.integer64(c(2,3,4,NA))) + test(2164.2, d[, mean(b), by=a], data.table(a=INT(1,2), V1=c(2.5, NA))) + test(2164.3, d[, mean(b, na.rm=TRUE), by=a], data.table(a=INT(1,2), V1=c(2.5, 4))) +} + diff --git a/src/gsumm.c b/src/gsumm.c index ed34e76207..9c31f4a761 100644 --- a/src/gsumm.c +++ b/src/gsumm.c @@ -337,11 +337,10 @@ void *gather(SEXP x, bool *anyNA) return gx; } -SEXP gsum(SEXP x, SEXP narmArg, SEXP warnOverflowArg) +SEXP gsum(SEXP x, SEXP narmArg) { if (!isLogical(narmArg) || LENGTH(narmArg)!=1 || LOGICAL(narmArg)[0]==NA_LOGICAL) error(_("na.rm must be TRUE or FALSE")); const bool narm = LOGICAL(narmArg)[0]; - const bool warnOverflow = LOGICAL(warnOverflowArg)[0]; if (inherits(x, "factor")) error(_("sum is not meaningful for factors.")); const int n = (irowslen == -1) ? length(x) : irowslen; double started = wallclock(); @@ -401,7 +400,7 @@ SEXP gsum(SEXP x, SEXP narmArg, SEXP warnOverflowArg) //Rprintf(_("gsum int took %.3f\n"), wallclock()-started); if (overflow) { UNPROTECT(1); // discard the result with overflow - if (warnOverflow) warning(_("The sum of an integer column for a group was more than type 'integer' can hold so the result has been coerced to 'numeric' automatically for convenience.")); + warning(_("The sum of an integer column for a group was more than type 'integer' can hold so the result has been coerced to 'numeric' automatically for convenience.")); ans = PROTECT(allocVector(REALSXP, ngrp)); double *restrict ansp = REAL(ans); memset(ansp, 0, ngrp*sizeof(double)); @@ -570,113 +569,147 @@ SEXP gsum(SEXP x, SEXP narmArg, SEXP warnOverflowArg) return(ans); } -SEXP gmean(SEXP x, SEXP narm) +SEXP gmean(SEXP x, SEXP narmArg) { - SEXP ans=R_NilValue; - //clock_t start = clock(); - if (!isLogical(narm) || LENGTH(narm)!=1 || LOGICAL(narm)[0]==NA_LOGICAL) error(_("na.rm must be TRUE or FALSE")); - if (!isVectorAtomic(x)) error(_("GForce mean can only be applied to columns, not .SD or similar. Likely you're looking for 'DT[,lapply(.SD,mean),by=,.SDcols=]'. See ?data.table.")); if (inherits(x, "factor")) error(_("mean is not meaningful for factors.")); - if (!LOGICAL(narm)[0]) { - int protecti=0; - ans = PROTECT(gsum(x, narm, /*#986, warnOverflow=*/ScalarLogical(FALSE))); protecti++; - switch(TYPEOF(ans)) { - case LGLSXP: case INTSXP: - ans = PROTECT(coerceVector(ans, REALSXP)); protecti++; - case REALSXP: { - double *xd = REAL(ans); - for (int i=0; ii /= grpsize[i]; - xd->r /= grpsize[i]; - xd++; - } - } break; - default : - error(_("Internal error: gsum returned type '%s'. typeof(x) is '%s'"), type2char(TYPEOF(ans)), type2char(TYPEOF(x))); // # nocov - } - UNPROTECT(protecti); - return(ans); - } - // na.rm=TRUE. Similar to gsum, but we need to count the non-NA as well for the divisor + if (!isLogical(narmArg) || LENGTH(narmArg)!=1 || LOGICAL(narmArg)[0]==NA_LOGICAL) error(_("na.rm must be TRUE or FALSE")); + const bool narm = LOGICAL(narmArg)[0]; const int n = (irowslen == -1) ? length(x) : irowslen; - if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gsum"); - - long double *s = calloc(ngrp, sizeof(long double)), *si=NULL; // s = sum; si = sum imaginary just for complex - if (!s) error(_("Unable to allocate %d * %d bytes for sum in gmean na.rm=TRUE"), ngrp, sizeof(long double)); - - int *c = calloc(ngrp, sizeof(int)); - if (!c) error(_("Unable to allocate %d * %d bytes for counts in gmean na.rm=TRUE"), ngrp, sizeof(int)); - + double started = wallclock(); + const bool verbose=GetVerbose(); + if (verbose) Rprintf(_("This gmean took (narm=%s) ... "), narm?"TRUE":"FALSE"); // narm=TRUE only at this point + if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gmean"); + bool anyNA=false; + SEXP ans=R_NilValue; + int protecti=0; switch(TYPEOF(x)) { - case LGLSXP: case INTSXP: { - const int *xd = INTEGER(x); - for (int i=0; iDBL_MAX ? R_PosInf : (s[i] < -DBL_MAX ? R_NegInf : (double)s[i]); + const double *restrict gx = gather(x, &anyNA); + ans = PROTECT(allocVector(REALSXP, ngrp)); protecti++; + double *restrict ansp = REAL(ans); + memset(ansp, 0, ngrp*sizeof(double)); + if (!narm || !anyNA) { + #pragma omp parallel for num_threads(getDTthreads(highSize, false)) + for (int h=0; hDBL_MAX ? R_PosInf : (s[i] < -DBL_MAX ? R_NegInf : (double)s[i]); - ansd[i].i = si[i]>DBL_MAX ? R_PosInf : (si[i]< -DBL_MAX ? R_NegInf : (double)si[i]); + const Rcomplex *restrict gx = gather(x, &anyNA); + ans = PROTECT(allocVector(CPLXSXP, ngrp)); protecti++; + Rcomplex *restrict ansp = COMPLEX(ans); + memset(ansp, 0, ngrp*sizeof(Rcomplex)); + if (!narm || !anyNA) { + #pragma omp parallel for num_threads(getDTthreads(highSize, false)) + for (int h=0; h