|
14 | 14 | namespace cuopt { |
15 | 15 | namespace test { |
16 | 16 |
|
| 17 | +/// @brief Dummy kernel used to test a zero-byte shared-memory request. |
17 | 18 | __global__ void kernel_zero() {} |
| 19 | +/// @brief Dummy kernel used to test a normal (within-limit) shared-memory request. |
18 | 20 | __global__ void kernel_normal() {} |
| 21 | +/// @brief Dummy kernel used to test a too-large shared-memory request (first call). |
19 | 22 | __global__ void kernel_too_large_a() {} |
| 23 | +/// @brief Dummy kernel used to test a too-large shared-memory request (repeated call). |
20 | 24 | __global__ void kernel_too_large_b() {} |
| 25 | +/// @brief Dummy kernel used to verify that a failed request leaves no sticky CUDA error. |
21 | 26 | __global__ void kernel_sticky_error() {} |
22 | 27 |
|
23 | | -// Zero request is a no-op and must return true. |
| 28 | +/// @brief Zero request is a no-op and must return true. |
24 | 29 | TEST(set_shmem_of_kernel, zero_request) |
25 | 30 | { |
26 | 31 | EXPECT_TRUE(set_shmem_of_kernel(kernel_zero, 0)); |
27 | 32 | EXPECT_EQ(cudaSuccess, cudaGetLastError()); |
28 | 33 | } |
29 | 34 |
|
30 | | -// A modest request well within device limits must succeed. |
| 35 | +/// @brief A modest request well within device limits must succeed. |
31 | 36 | TEST(set_shmem_of_kernel, normal_request) |
32 | 37 | { |
33 | 38 | EXPECT_TRUE(set_shmem_of_kernel(kernel_normal, 4096)); |
34 | 39 | EXPECT_EQ(cudaSuccess, cudaGetLastError()); |
35 | 40 | } |
36 | 41 |
|
37 | | -// Requesting more shared memory than the device supports must return false. |
| 42 | +/// @brief Requesting more shared memory than the device supports must return false. |
38 | 43 | TEST(set_shmem_of_kernel, too_large_returns_false) |
39 | 44 | { |
40 | 45 | int shmem_max{}; |
41 | | - cudaDeviceGetAttribute(&shmem_max, cudaDevAttrMaxSharedMemoryPerBlockOptin, 0); |
| 46 | + ASSERT_EQ(cudaSuccess, |
| 47 | + cudaDeviceGetAttribute(&shmem_max, cudaDevAttrMaxSharedMemoryPerBlockOptin, 0)) |
| 48 | + << "cudaDeviceGetAttribute(cudaDevAttrMaxSharedMemoryPerBlockOptin) failed"; |
42 | 49 | size_t too_large = static_cast<size_t>(shmem_max) + 1024; |
43 | 50 |
|
44 | 51 | EXPECT_FALSE(set_shmem_of_kernel(kernel_too_large_a, too_large)); |
45 | 52 | EXPECT_EQ(cudaSuccess, cudaGetLastError()); |
46 | 53 | } |
47 | 54 |
|
48 | | -// A second call with the same too-large size must still return false |
| 55 | +/// @brief A second call with the same too-large size must still return false. |
49 | 56 | TEST(set_shmem_of_kernel, cache_not_poisoned_on_failure) |
50 | 57 | { |
51 | 58 | int shmem_max{}; |
52 | | - cudaDeviceGetAttribute(&shmem_max, cudaDevAttrMaxSharedMemoryPerBlockOptin, 0); |
| 59 | + ASSERT_EQ(cudaSuccess, |
| 60 | + cudaDeviceGetAttribute(&shmem_max, cudaDevAttrMaxSharedMemoryPerBlockOptin, 0)) |
| 61 | + << "cudaDeviceGetAttribute(cudaDevAttrMaxSharedMemoryPerBlockOptin) failed"; |
53 | 62 | size_t too_large = static_cast<size_t>(shmem_max) + 1024; |
54 | 63 |
|
55 | 64 | EXPECT_FALSE(set_shmem_of_kernel(kernel_too_large_b, too_large)); |
56 | 65 | EXPECT_FALSE(set_shmem_of_kernel(kernel_too_large_b, too_large)); // must not return true |
57 | 66 | EXPECT_EQ(cudaSuccess, cudaGetLastError()); |
58 | 67 | } |
59 | 68 |
|
60 | | -// A failed call must not leave a sticky CUDA error that would be caught |
61 | | -// later by an unrelated RAFT_CHECK_CUDA. |
| 69 | +/// @brief A failed call must not leave a sticky CUDA error that would be caught |
| 70 | +/// later by an unrelated RAFT_CHECK_CUDA. |
62 | 71 | TEST(set_shmem_of_kernel, no_sticky_error_after_failure) |
63 | 72 | { |
64 | 73 | int shmem_max{}; |
65 | | - cudaDeviceGetAttribute(&shmem_max, cudaDevAttrMaxSharedMemoryPerBlockOptin, 0); |
| 74 | + ASSERT_EQ(cudaSuccess, |
| 75 | + cudaDeviceGetAttribute(&shmem_max, cudaDevAttrMaxSharedMemoryPerBlockOptin, 0)) |
| 76 | + << "cudaDeviceGetAttribute(cudaDevAttrMaxSharedMemoryPerBlockOptin) failed"; |
66 | 77 | size_t too_large = static_cast<size_t>(shmem_max) + 1024; |
67 | 78 |
|
68 | | - set_shmem_of_kernel(kernel_sticky_error, too_large); |
| 79 | + EXPECT_FALSE(set_shmem_of_kernel(kernel_sticky_error, too_large)); // confirm failure branch taken |
69 | 80 | EXPECT_EQ(cudaSuccess, cudaGetLastError()); |
70 | 81 | } |
71 | 82 |
|
|
0 commit comments