From 119489f5e2e10562a47f5a132da7fd93e5bf9901 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 15 Jul 2024 18:50:07 -0700 Subject: [PATCH 01/15] Consistently error when allocation request fails --- src/assign.c | 2 ++ src/bmerge.c | 6 +++++- src/chmatch.c | 4 +++- src/forder.c | 43 +++++++++++++++++++++++++++++++++++-------- src/fread.c | 17 +++++++++++++---- src/fsort.c | 4 ++-- src/fwrite.c | 7 +++++-- src/gsumm.c | 16 +++++++++++----- src/ijoin.c | 2 ++ src/rbindlist.c | 17 +++++++++++++---- src/uniqlist.c | 7 ++++++- 11 files changed, 97 insertions(+), 28 deletions(-) diff --git a/src/assign.c b/src/assign.c index 3b7ac5faa6..f7a6e6f636 100644 --- a/src/assign.c +++ b/src/assign.c @@ -1248,6 +1248,8 @@ void savetl_init(void) { saveds = (SEXP *)malloc(nalloc * sizeof(SEXP)); savedtl = (R_len_t *)malloc(nalloc * sizeof(R_len_t)); if (saveds==NULL || savedtl==NULL) { + if (saveds) free(saveds); + if (savedtl) free(savedtl); savetl_end(); // # nocov error(_("Failed to allocate initial %d items in savetl_init"), nalloc); // # nocov } diff --git a/src/bmerge.c b/src/bmerge.c index 2ce69904e0..8eb1e0abbe 100644 --- a/src/bmerge.c +++ b/src/bmerge.c @@ -129,8 +129,12 @@ SEXP bmerge(SEXP idt, SEXP xdt, SEXP icolsArg, SEXP xcolsArg, SEXP isorted, SEXP retFirst = Calloc(anslen, int); // anslen is set above retLength = Calloc(anslen, int); retIndex = Calloc(anslen, int); - if (retFirst==NULL || retLength==NULL || retIndex==NULL) + if (retFirst==NULL || retLength==NULL || retIndex==NULL) { + if (retFirst) Free(retFirst); + if (retLength) Free(retLength); + if (retIndex) Free(retIndex); error(_("Internal error in allocating memory for non-equi join")); // # nocov + } // initialise retIndex here directly, as next loop is meant for both equi and non-equi joins for (int j=0; j A(TL=1),B(2),C(3),D(4),E(5) => dupMap 1 2 3 5 6 | 8 7 4 // dupLink 7 8 | 6 (blank=0) - int *counts = (int *)calloc(nuniq, sizeof(int)); unsigned int mapsize = tablelen+nuniq; // lto compilation warning #5760 // +nuniq to store a 0 at the end of each group + int *counts = (int *)calloc(nuniq, sizeof(int)); int *map = (int *)calloc(mapsize, sizeof(int)); if (!counts || !map) { + if (counts) free(counts); + if (map) free(map); // # nocov start for (int i=0; i0) savetl(s); diff --git a/src/uniqlist.c b/src/uniqlist.c index 48da706a2c..c67d307774 100644 --- a/src/uniqlist.c +++ b/src/uniqlist.c @@ -23,6 +23,8 @@ SEXP uniqlist(SEXP l, SEXP order) SEXP v, ans; R_len_t len, thisi, previ, isize=1000; int *iidx = Calloc(isize, int); // for 'idx' + if (!iidx) + error(_("Failed to allocate %d bytes for 'iidx'."), (int)(isize * sizeof(int))); len = 1; iidx[0] = 1; // first row is always the first of the first group @@ -259,7 +261,10 @@ SEXP nestedid(SEXP l, SEXP cols, SEXP order, SEXP grps, SEXP resetvals, SEXP mul R_len_t nrows = length(VECTOR_ELT(l,0)), ncols = length(cols); if (nrows==0) return(allocVector(INTSXP, 0)); R_len_t thisi, previ, ansgrpsize=1000, nansgrp=0; - R_len_t *ansgrp = Calloc(ansgrpsize, R_len_t), starts, grplen; // #3401 fix. Needs to be Calloc due to Realloc below .. else segfaults. + R_len_t *ansgrp = Calloc(ansgrpsize, R_len_t); + if (!ansgrp) + error(_("Failed to allocate %d bytes for '%s'."), (int)(ansgrpsize * sizeof(R_len_t)), "ansgrpsize"); + R_len_t starts, grplen; // #3401 fix. Needs to be Calloc due to Realloc below .. else segfaults. R_len_t ngrps = length(grps); bool *i64 = (bool *)R_alloc(ncols, sizeof(bool)); if (ngrps==0) error(_("Internal error: nrows[%d]>0 but ngrps==0"), nrows); // # nocov From 97e684bce7fb3688d253e7311d132303ab1d5405 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 15 Jul 2024 18:56:56 -0700 Subject: [PATCH 02/15] Just free() unconditionally --- src/assign.c | 3 +-- src/bmerge.c | 4 +--- src/chmatch.c | 3 +-- src/forder.c | 17 +++++------------ src/fread.c | 3 +-- src/gsumm.c | 6 ++---- src/rbindlist.c | 9 ++------- 7 files changed, 13 insertions(+), 32 deletions(-) diff --git a/src/assign.c b/src/assign.c index f7a6e6f636..b475c6f638 100644 --- a/src/assign.c +++ b/src/assign.c @@ -1248,8 +1248,7 @@ void savetl_init(void) { saveds = (SEXP *)malloc(nalloc * sizeof(SEXP)); savedtl = (R_len_t *)malloc(nalloc * sizeof(R_len_t)); if (saveds==NULL || savedtl==NULL) { - if (saveds) free(saveds); - if (savedtl) free(savedtl); + free(saveds); free(savedtl); savetl_end(); // # nocov error(_("Failed to allocate initial %d items in savetl_init"), nalloc); // # nocov } diff --git a/src/bmerge.c b/src/bmerge.c index 8eb1e0abbe..98a4b7b707 100644 --- a/src/bmerge.c +++ b/src/bmerge.c @@ -130,9 +130,7 @@ SEXP bmerge(SEXP idt, SEXP xdt, SEXP icolsArg, SEXP xcolsArg, SEXP isorted, SEXP retLength = Calloc(anslen, int); retIndex = Calloc(anslen, int); if (retFirst==NULL || retLength==NULL || retIndex==NULL) { - if (retFirst) Free(retFirst); - if (retLength) Free(retLength); - if (retIndex) Free(retIndex); + Free(retFirst); Free(retLength); Free(retIndex); error(_("Internal error in allocating memory for non-equi join")); // # nocov } // initialise retIndex here directly, as next loop is meant for both equi and non-equi joins diff --git a/src/chmatch.c b/src/chmatch.c index f54eda96a7..878699e6ae 100644 --- a/src/chmatch.c +++ b/src/chmatch.c @@ -98,8 +98,7 @@ static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatch int *counts = (int *)calloc(nuniq, sizeof(int)); int *map = (int *)calloc(mapsize, sizeof(int)); if (!counts || !map) { - if (counts) free(counts); - if (map) free(map); + free(counts); free(map); // # nocov start for (int i=0; i Date: Mon, 15 Jul 2024 19:01:35 -0700 Subject: [PATCH 03/15] paren mismatch --- src/ijoin.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ijoin.c b/src/ijoin.c index 129cda9114..da7c5ab765 100644 --- a/src/ijoin.c +++ b/src/ijoin.c @@ -143,7 +143,7 @@ SEXP lookup(SEXP ux, SEXP xlen, SEXP indices, SEXP gaps, SEXP overlaps, SEXP mul start = clock(); idx = Calloc(uxrows, R_len_t); // resets bits, =0 if (!idx) - error(_("Failed to allocate %d bytes for idx.", (int)(uxrows * sizeof(R_len_t)))); + error(_("Failed to allocate %d bytes for idx."), (int)(uxrows * sizeof(R_len_t))); switch (type) { case ANY: case START: case END: case WITHIN: for (int i=0; i Date: Mon, 15 Jul 2024 19:06:49 -0700 Subject: [PATCH 04/15] More consistency for easier i18n --- src/forder.c | 2 +- src/fread.c | 4 ++-- src/ijoin.c | 2 +- src/uniqlist.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/forder.c b/src/forder.c index e1b60976ff..70b7070e3c 100644 --- a/src/forder.c +++ b/src/forder.c @@ -1157,7 +1157,7 @@ void radix_r(const int from, const int to, const int radix) { int *starts = calloc(nBatch*256, sizeof(int)); // keep starts the same shape and ugrp order as counts if (!starts) - STOP(_("Unable to allocate 'starts' array, %d bytes."), (int)(nBatch*256*sizeof(int))); + STOP(_("Failed to allocate %d bytes for '%s'."), (int)(nBatch*256*sizeof(int)), "starts"); for (int j=0, sum=0; j Date: Mon, 15 Jul 2024 19:13:16 -0700 Subject: [PATCH 05/15] Calloc() very intentionally _does not need_ error checking (handled by R) --- src/bmerge.c | 4 ---- src/ijoin.c | 2 -- src/uniqlist.c | 4 ---- 3 files changed, 10 deletions(-) diff --git a/src/bmerge.c b/src/bmerge.c index 98a4b7b707..6cb20c683f 100644 --- a/src/bmerge.c +++ b/src/bmerge.c @@ -129,10 +129,6 @@ SEXP bmerge(SEXP idt, SEXP xdt, SEXP icolsArg, SEXP xcolsArg, SEXP isorted, SEXP retFirst = Calloc(anslen, int); // anslen is set above retLength = Calloc(anslen, int); retIndex = Calloc(anslen, int); - if (retFirst==NULL || retLength==NULL || retIndex==NULL) { - Free(retFirst); Free(retLength); Free(retIndex); - error(_("Internal error in allocating memory for non-equi join")); // # nocov - } // initialise retIndex here directly, as next loop is meant for both equi and non-equi joins for (int j=0; j Date: Mon, 15 Jul 2024 19:16:12 -0700 Subject: [PATCH 06/15] Smaller diff thanks to Calloc() behavior --- src/uniqlist.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/uniqlist.c b/src/uniqlist.c index 42882b39cd..48da706a2c 100644 --- a/src/uniqlist.c +++ b/src/uniqlist.c @@ -259,8 +259,7 @@ SEXP nestedid(SEXP l, SEXP cols, SEXP order, SEXP grps, SEXP resetvals, SEXP mul R_len_t nrows = length(VECTOR_ELT(l,0)), ncols = length(cols); if (nrows==0) return(allocVector(INTSXP, 0)); R_len_t thisi, previ, ansgrpsize=1000, nansgrp=0; - R_len_t *ansgrp = Calloc(ansgrpsize, R_len_t); - R_len_t starts, grplen; // #3401 fix. Needs to be Calloc due to Realloc below .. else segfaults. + R_len_t *ansgrp = Calloc(ansgrpsize, R_len_t), starts, grplen; // #3401 fix. Needs to be Calloc due to Realloc below .. else segfaults. R_len_t ngrps = length(grps); bool *i64 = (bool *)R_alloc(ncols, sizeof(bool)); if (ngrps==0) error(_("Internal error: nrows[%d]>0 but ngrps==0"), nrows); // # nocov From 3fa2bdb77ac7fac862471e0552acee7267421097 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 16 Jul 2024 05:57:51 +0000 Subject: [PATCH 07/15] consistency: !x, not x==NULL --- src/assign.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/assign.c b/src/assign.c index b475c6f638..6eb0866e43 100644 --- a/src/assign.c +++ b/src/assign.c @@ -1247,7 +1247,7 @@ void savetl_init(void) { nalloc = 100; saveds = (SEXP *)malloc(nalloc * sizeof(SEXP)); savedtl = (R_len_t *)malloc(nalloc * sizeof(R_len_t)); - if (saveds==NULL || savedtl==NULL) { + if (!saveds || !savedtl) { free(saveds); free(savedtl); savetl_end(); // # nocov error(_("Failed to allocate initial %d items in savetl_init"), nalloc); // # nocov From 4d02d0cd7cc69c8136ae9bf494113dc9f3be0eb0 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 16 Jul 2024 06:03:15 +0000 Subject: [PATCH 08/15] initialize alloc_linter() checker --- .ci/linters/alloc_linter.R | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 .ci/linters/alloc_linter.R diff --git a/.ci/linters/alloc_linter.R b/.ci/linters/alloc_linter.R new file mode 100644 index 0000000000..a94bc27a92 --- /dev/null +++ b/.ci/linters/alloc_linter.R @@ -0,0 +1,36 @@ +# Ensure that we check the result of malloc()/calloc() for success +# More specifically, given that this is an AST-ignorant checker, +# 1. Find groups of malloc()/calloc() calls +# 2. Check the next line for a check like 'if (!x || !y)' +alloc_linter = function(c_file) { + lines <- readLines(c_file) + # Be a bit more precise to avoid mentions in comments + alloc_lines <- grep(R"{=\s*([(]\w+\s*[*][)])?[mc]alloc[(]}", lines) + if (!length(alloc_lines)) return() + # int *tmp=(int*)malloc(...); or just int tmp=malloc(...); + alloc_keys <- lines[alloc_lines] |> + strsplit(R"(\s*=\s*)") |> + vapply(head, 1L, FUN.VALUE="") |> + trimws() |> + # just normalize the more exotic assignments, namely 'type *restrict key = ...' + gsub(pattern = "[*]\\s*(restrict\\s*)?", replacement = "*") |> + strsplit("*", fixed=TRUE) |> + vapply(tail, 1L, FUN.VALUE="") + alloc_grp_id <- cumsum(c(TRUE, diff(alloc_lines) != 1L)) + + # execute by group + tapply(seq_along(alloc_lines), alloc_grp_id, function(grp_idx) { + keys_regex <- paste0("\\s*!\\s*", alloc_keys[grp_idx], "\\s*", collapse = "[|][|]") + check_regex <- paste0("if\\s*\\(", keys_regex) + check_line <- lines[alloc_lines[tail(grp_idx, 1L)] + 1L] + # Rarely (once in fread.c as of initialization), error checking is handled + # but not immediately, so use 'NOCHECK' to escape. + if (!grepl(check_regex, check_line) && !grepl("NOCHECK", check_line, fixed=TRUE)) { + bad_lines_idx <- seq(alloc_lines[grp_idx[1L]], length.out=length(grp_idx)+1L) + cat("FILE: ", c_file, "; LINES: ", head(bad_lines_idx, 1L), "-", tail(bad_lines_idx, 1L), "\n", sep="") + writeLines(lines[bad_lines_idx]) + cat(strrep("-", max(nchar(lines[bad_lines_idx]))), "\n", sep="") + stop("Expected the malloc()/calloc() usage above to be followed immediately by error checking.", call.=FALSE) + } + }) +} From 1d9389a6cf33f310eead0af62a14284f10e406fb Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 16 Jul 2024 06:03:26 +0000 Subject: [PATCH 09/15] one valid //NOCHECK usage --- src/fread.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/fread.c b/src/fread.c index 5a14253e0f..e301e8cd1e 100644 --- a/src/fread.c +++ b/src/fread.c @@ -2254,6 +2254,7 @@ int freadMain(freadMainArgs _args) { .buff8 = malloc(rowSize8 * myBuffRows + 8), .buff4 = malloc(rowSize4 * myBuffRows + 4), .buff1 = malloc(rowSize1 * myBuffRows + 1), + // NOCHECK .rowSize8 = rowSize8, .rowSize4 = rowSize4, .rowSize1 = rowSize1, From a8d1004739822cfd7f4ee4ccf5e7d29056026766 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 16 Jul 2024 06:03:42 +0000 Subject: [PATCH 10/15] check in CI --- .../workflows/{lint.yaml => code-quality.yaml} | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) rename .github/workflows/{lint.yaml => code-quality.yaml} (65%) diff --git a/.github/workflows/lint.yaml b/.github/workflows/code-quality.yaml similarity index 65% rename from .github/workflows/lint.yaml rename to .github/workflows/code-quality.yaml index d5a4ef4662..20222b606c 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/code-quality.yaml @@ -6,10 +6,10 @@ on: branches: - master -name: lint +name: code-quality jobs: - lint: + lint-r: runs-on: ubuntu-latest env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} @@ -33,3 +33,14 @@ jobs: env: LINTR_ERROR_ON_LINT: true R_LINTR_LINTER_FILE: .ci/.lintr + lint-c: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: r-lib/actions/setup-r@v2 + - name: Lint + run: | + for (f in list.files('.ci/linters', full.names=TRUE)) source(f) + for (f in list.files('src', pattern='[.]c$', full.names=TRUE)) { + alloc_linter(f) + } From 40cb39a103fb4331d122558d56eb9ebcbf0fb8b4 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 16 Jul 2024 06:09:28 +0000 Subject: [PATCH 11/15] TODO for later --- .github/workflows/code-quality.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/code-quality.yaml b/.github/workflows/code-quality.yaml index 20222b606c..611e828275 100644 --- a/.github/workflows/code-quality.yaml +++ b/.github/workflows/code-quality.yaml @@ -43,4 +43,5 @@ jobs: for (f in list.files('.ci/linters', full.names=TRUE)) source(f) for (f in list.files('src', pattern='[.]c$', full.names=TRUE)) { alloc_linter(f) + # TODO(#6272): Incorporate more checks from CRAN_Release } From bdfa6f0a4d0fe5506d7cc96bbeaf542e2d1f2c1a Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 15 Jul 2024 23:11:38 -0700 Subject: [PATCH 12/15] Need to specify run -- as R --- .github/workflows/code-quality.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/code-quality.yaml b/.github/workflows/code-quality.yaml index 611e828275..1a8376c55e 100644 --- a/.github/workflows/code-quality.yaml +++ b/.github/workflows/code-quality.yaml @@ -45,3 +45,4 @@ jobs: alloc_linter(f) # TODO(#6272): Incorporate more checks from CRAN_Release } + shell: Rscript {0} From c94d42d15cd283aaf9a0a706f63d53a080eaa3a5 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 16 Jul 2024 06:21:28 +0000 Subject: [PATCH 13/15] move linters to own subdirectories --- .ci/linters/{ => c}/alloc_linter.R | 0 .ci/linters/{ => r}/dt_lengths_linter.R | 0 .ci/linters/{ => r}/dt_test_literal_linter.R | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename .ci/linters/{ => c}/alloc_linter.R (100%) rename .ci/linters/{ => r}/dt_lengths_linter.R (100%) rename .ci/linters/{ => r}/dt_test_literal_linter.R (100%) diff --git a/.ci/linters/alloc_linter.R b/.ci/linters/c/alloc_linter.R similarity index 100% rename from .ci/linters/alloc_linter.R rename to .ci/linters/c/alloc_linter.R diff --git a/.ci/linters/dt_lengths_linter.R b/.ci/linters/r/dt_lengths_linter.R similarity index 100% rename from .ci/linters/dt_lengths_linter.R rename to .ci/linters/r/dt_lengths_linter.R diff --git a/.ci/linters/dt_test_literal_linter.R b/.ci/linters/r/dt_test_literal_linter.R similarity index 100% rename from .ci/linters/dt_test_literal_linter.R rename to .ci/linters/r/dt_test_literal_linter.R From 1cb8e1a10bb4622f2f802cf82af203f16cc13df7 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 16 Jul 2024 06:22:15 +0000 Subject: [PATCH 14/15] reflect new directories where needed --- .ci/.lintr.R | 2 +- .github/workflows/code-quality.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.ci/.lintr.R b/.ci/.lintr.R index 6659efbc82..2481ecec97 100644 --- a/.ci/.lintr.R +++ b/.ci/.lintr.R @@ -1,5 +1,5 @@ dt_linters = new.env() -for (f in list.files('.ci/linters', full.names=TRUE)) sys.source(f, dt_linters) +for (f in list.files('.ci/linters/r', full.names=TRUE)) sys.source(f, dt_linters) rm(f) # NB: Could do this inside the linter definition, this separation makes those files more standardized diff --git a/.github/workflows/code-quality.yaml b/.github/workflows/code-quality.yaml index 1a8376c55e..f9066eacd0 100644 --- a/.github/workflows/code-quality.yaml +++ b/.github/workflows/code-quality.yaml @@ -40,7 +40,7 @@ jobs: - uses: r-lib/actions/setup-r@v2 - name: Lint run: | - for (f in list.files('.ci/linters', full.names=TRUE)) source(f) + for (f in list.files('.ci/linters/c', full.names=TRUE)) source(f) for (f in list.files('src', pattern='[.]c$', full.names=TRUE)) { alloc_linter(f) # TODO(#6272): Incorporate more checks from CRAN_Release From a4aea85dd1d12fd51d5ff20ef1ff31ea82f57111 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 16 Jul 2024 07:14:11 -0700 Subject: [PATCH 15/15] NEWS --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 262aace175..ab412e0524 100644 --- a/NEWS.md +++ b/NEWS.md @@ -116,6 +116,8 @@ 21. Refactored some non-API calls to R macros for S4 objects (#6180)[https://github.com/Rdatatable/data.table/issues/6180]. There should be no user-visible change. Thanks to various R users & R core for pushing to have a clearer definition of "API" for R, and thanks @MichaelChirico for implementing here. +22. C code was unified more in how failures to allocate memory (`malloc()`/`calloc()`) are handled, (#1115)[https://github.com/Rdatatable/data.table/issues/1115]. No OOM issues were reported, as these regions of code typically request relatively small blocks of memory, but it is good to handle memory pressure consistently. Thanks @elfring for the report and @MichaelChirico for the clean-up effort and future-proofing linter. + ## TRANSLATIONS 1. Fix a typo in a Mandarin translation of an error message that was hiding the actual error message, [#6172](https://github.com/Rdatatable/data.table/issues/6172). Thanks @trafficfan for the report and @MichaelChirico for the fix.