diff --git a/src/cunumeric/index/advanced_indexing.cu b/src/cunumeric/index/advanced_indexing.cu index fde5590fda..a7d3f2f945 100644 --- a/src/cunumeric/index/advanced_indexing.cu +++ b/src/cunumeric/index/advanced_indexing.cu @@ -94,7 +94,7 @@ struct AdvancedIndexingImplBody { const size_t blocks = (volume + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - size_t shmem_size = THREADS_PER_BLOCK / 32 * sizeof(int64_t); + size_t shmem_size = THREADS_PER_BLOCK / 32 * sizeof(uint64_t); if (blocks >= MAX_REDUCTION_CTAS) { const size_t iters = (blocks + MAX_REDUCTION_CTAS - 1) / MAX_REDUCTION_CTAS; diff --git a/src/cunumeric/index/repeat.cu b/src/cunumeric/index/repeat.cu index 30f0c2aff8..1b658874a0 100644 --- a/src/cunumeric/index/repeat.cu +++ b/src/cunumeric/index/repeat.cu @@ -139,7 +139,7 @@ struct RepeatImplBody { DeviceScalarReductionBuffer> sum(stream); const size_t blocks_count = (extent + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - const size_t shmem_size = THREADS_PER_BLOCK / 32 * sizeof(int64_t); + const size_t shmem_size = THREADS_PER_BLOCK / 32 * sizeof(uint64_t); if (blocks_count > MAX_REDUCTION_CTAS) { const size_t iters = (blocks_count + MAX_REDUCTION_CTAS - 1) / MAX_REDUCTION_CTAS; diff --git a/src/cunumeric/search/nonzero.cuh b/src/cunumeric/search/nonzero.cuh index 1b777b34cb..e9af925787 100644 --- a/src/cunumeric/search/nonzero.cuh +++ b/src/cunumeric/search/nonzero.cuh @@ -63,7 +63,7 @@ int64_t compute_offsets(const AccessorRO& in, DeviceScalarReductionBuffer> size(stream); const size_t blocks = (volume + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - size_t shmem_size = THREADS_PER_BLOCK / 32 * sizeof(int64_t); + size_t shmem_size = THREADS_PER_BLOCK / 32 * sizeof(uint64_t); if (blocks >= MAX_REDUCTION_CTAS) { const size_t iters = (blocks + MAX_REDUCTION_CTAS - 1) / MAX_REDUCTION_CTAS;