From 074299615cf0d64ed37d786320130602875f1674 Mon Sep 17 00:00:00 2001 From: "Albert-Jan N. Yzelman" Date: Thu, 5 Jun 2025 13:00:22 +0200 Subject: [PATCH 01/22] WIP commit (meeting interrupt), apologies --- src/transition/solver.cpp | 61 +++++++++++++++++++++++++++++++++++---- 1 file changed, 56 insertions(+), 5 deletions(-) diff --git a/src/transition/solver.cpp b/src/transition/solver.cpp index b52baca3d..9b279b05b 100644 --- a/src/transition/solver.cpp +++ b/src/transition/solver.cpp @@ -16,12 +16,13 @@ */ -#include #include #include #include +#include + #include "solver.h" /** @@ -136,6 +137,20 @@ class CG_Data { public: + /** + * The size, in bytes, required as a work space for the PCG algorithm. + * + * @param[in] n The linear system size. + * + * @returns The required size, in bytes. + * + * \internal Note that the space for two additional integers is required as + * per both the C and C++ specifications. + */ + static size_t workspaceSize( const size_t n ) { + return 3 * sizeof( T ) + 2 * sizeof( int ); + } + /** Disable default constructor. */ CG_Data() = delete; @@ -149,16 +164,26 @@ class CG_Data { * * The matrix defined by \a a, \a ja, \a ia must be symmetric positive * definite. + * + * @param[in] buffer The workspace required by this algorithm. This must + * point to a valid memory region that can be used + * exclusively by ALP. The size of the memory region (in + * bytes) must be greater or equal to that returned by + * #workspaceSize( n ) with \a n equal to the value passed + * to this constructor. + * + * @param[in] buffer_size The size of the given \a buffer. Used for sanity + * checking the use of the buffer. + * + * \warning The sanity check is weaker if not compiled in debug mode. */ CG_Data( const size_t n, - const T * const a, const RSI * const ja, const NZI * const ia + const T * const a, const RSI * const ja, const NZI * const ia, + void * const buffer, const size_t buffer_size ) : size( n ), tolerance( 1e-5 ), max_iter( 1000 ), matrix( 0, 0 ), residual( std::numeric_limits< T >::infinity() ), iters( 0 ), - workspace( { - grb::Vector< T >( n ), grb::Vector< T >( n ), grb::Vector< T >( n ) - } ), precond_workspace( grb::Vector< T >( 0 ) ), preconditioner( nullptr ), preconditioner_data( nullptr ) { @@ -166,8 +191,34 @@ class CG_Data { assert( a != nullptr ); assert( ja != nullptr ); assert( ia != nullptr ); + if( buffer_size >= workspaceSize( n ) ) { + throw std::invalid_argument( "The given buffer size is too small" ); + } Matrix A = grb::internal::wrapCRSMatrix( a, ja, ia, n, n ); std::swap( A, matrix ); + char * workspace_ptr = buffer; + assert( static_cast< char * >(buffer) + buffer_size >= workspace_ptr + n ); + { + T * const workspace_vector = reinterpret_cast< T * >(workspace_ptr); + grb::Vector< T > tmp = grb::internal::wrapVector( workspace_vector ); + std::swap( workspace[ 0 ], tmp ); + } + workspace_ptr += n * sizeof( T ); + workspace_ptr += (sizeof(int) - (workspace_ptr % sizeof(int))); + assert( static_cast< char * >(buffer) + buffer_size >= workspace_ptr + n ); + { + T * const workspace_vector = reinterpret_cast< T * >(workspace_ptr); + grb::Vector< T > tmp = grb::internal::wrapVector( workspace_vector ); + std::swap( workspace[ 1 ], tmp ); + } + workspace_ptr += n * sizeof( T ); + workspace_ptr += (sizeof(int) - (workspace_ptr % sizeof(int))); + assert( static_cast< char * >(buffer) + buffer_size >= workspace_ptr + n ); + { + T * const workspace_vector = reinterpret_cast< T * >(workspace_ptr); + grb::Vector< T > tmp = grb::internal::wrapVector( workspace_vector ); + std::swap( workspace[ 2 ], tmp ); + } } /** @returns The system size. */ From 5a494d4c15d7c4b5249809101b37dd8c4685d72d Mon Sep 17 00:00:00 2001 From: "Albert-Jan N. Yzelman" Date: Thu, 5 Jun 2025 14:25:57 +0200 Subject: [PATCH 02/22] This compiles, now need to add in additional workspace for the buffer --- src/transition/solver.cpp | 54 ++++++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 15 deletions(-) diff --git a/src/transition/solver.cpp b/src/transition/solver.cpp index 9b279b05b..92089a02b 100644 --- a/src/transition/solver.cpp +++ b/src/transition/solver.cpp @@ -148,7 +148,7 @@ class CG_Data { * per both the C and C++ specifications. */ static size_t workspaceSize( const size_t n ) { - return 3 * sizeof( T ) + 2 * sizeof( int ); + return 3 * n * sizeof( T ) + 2 * sizeof( int ); } /** Disable default constructor. */ @@ -196,27 +196,29 @@ class CG_Data { } Matrix A = grb::internal::wrapCRSMatrix( a, ja, ia, n, n ); std::swap( A, matrix ); - char * workspace_ptr = buffer; + char * workspace_ptr = static_cast< char * >(buffer); assert( static_cast< char * >(buffer) + buffer_size >= workspace_ptr + n ); { T * const workspace_vector = reinterpret_cast< T * >(workspace_ptr); - grb::Vector< T > tmp = grb::internal::wrapVector( workspace_vector ); + grb::Vector< T > tmp = grb::internal::wrapRawVector( n, workspace_vector ); std::swap( workspace[ 0 ], tmp ); } workspace_ptr += n * sizeof( T ); - workspace_ptr += (sizeof(int) - (workspace_ptr % sizeof(int))); + workspace_ptr += + (sizeof(int) - (reinterpret_cast(workspace_ptr) % sizeof(int))); assert( static_cast< char * >(buffer) + buffer_size >= workspace_ptr + n ); { T * const workspace_vector = reinterpret_cast< T * >(workspace_ptr); - grb::Vector< T > tmp = grb::internal::wrapVector( workspace_vector ); + grb::Vector< T > tmp = grb::internal::wrapRawVector( n, workspace_vector ); std::swap( workspace[ 1 ], tmp ); } workspace_ptr += n * sizeof( T ); - workspace_ptr += (sizeof(int) - (workspace_ptr % sizeof(int))); + workspace_ptr += + (sizeof(int) - (reinterpret_cast(workspace_ptr) % sizeof(int))); assert( static_cast< char * >(buffer) + buffer_size >= workspace_ptr + n ); { T * const workspace_vector = reinterpret_cast< T * >(workspace_ptr); - grb::Vector< T > tmp = grb::internal::wrapVector( workspace_vector ); + grb::Vector< T > tmp = grb::internal::wrapRawVector( n, workspace_vector ); std::swap( workspace[ 2 ], tmp ); } } @@ -328,7 +330,8 @@ class CG_Data { template< typename T, typename NZI, typename RSI > static sparse_err_t sparse_cg_init_impl( sparse_cg_handle_t * const handle, const size_t n, - const T * const a, const RSI * const ja, const NZI * const ia + const T * const a, const RSI * const ja, const NZI * const ia, + void * const buffer, const size_t bufferSize ) { if( n == 0 ) { return ILLEGAL_ARGUMENT; } if( handle == nullptr || a == nullptr || ja == nullptr || ia == nullptr ) { @@ -336,7 +339,7 @@ static sparse_err_t sparse_cg_init_impl( } try { *handle = static_cast< void * >( - new CG_Data< T, NZI, RSI >( n, a, ja, ia ) ); + new CG_Data< T, NZI, RSI >( n, a, ja, ia, buffer, bufferSize ) ); } catch( std::exception &e ) { // the grb::Matrix constructor may only throw on out of memory errors std::cerr << "Error: " << e.what() << "\n"; @@ -346,46 +349,67 @@ static sparse_err_t sparse_cg_init_impl( return NO_ERROR; } +template< typename T, typename NZI, typename RSI > +static sparse_err_t sparse_cg_init_impl_no_buffer( + sparse_cg_handle_t * const handle, const size_t n, + const T * const a, const RSI * const ja, const NZI * const ia +) { + const size_t allocSize = CG_Data< T, NZI, RSI >::workspaceSize( n ); + void * buffer = nullptr; + try { + buffer = static_cast< void * >(new char[ allocSize ]); + } catch( ... ) { + std::cerr << "Error allocating workspace buffer\n"; + return OUT_OF_MEMORY; + } + const sparse_err_t rc = sparse_cg_init_impl( + handle, n, a, ja, ia, buffer, allocSize ); + if( rc != NO_ERROR ) { + delete [] static_cast< char * >(buffer); + } + return rc; +} + sparse_err_t sparse_cg_init_sii( sparse_cg_handle_t * const handle, const size_t n, const float * const a, const int * const ja, const int * const ia ) { - return sparse_cg_init_impl< float, int, int >( handle, n, a, ja, ia ); + return sparse_cg_init_impl_no_buffer< float, int, int >( handle, n, a, ja, ia ); } sparse_err_t sparse_cg_init_dii( sparse_cg_handle_t * const handle, const size_t n, const double * const a, const int * const ja, const int * const ia ) { - return sparse_cg_init_impl< double, int, int >( handle, n, a, ja, ia ); + return sparse_cg_init_impl_no_buffer< double, int, int >( handle, n, a, ja, ia ); } sparse_err_t sparse_cg_init_siz( sparse_cg_handle_t * const handle, const size_t n, const float * const a, const int * const ja, const size_t * const ia ) { - return sparse_cg_init_impl< float, size_t, int >( handle, n, a, ja, ia ); + return sparse_cg_init_impl_no_buffer< float, size_t, int >( handle, n, a, ja, ia ); } sparse_err_t sparse_cg_init_diz( sparse_cg_handle_t * const handle, const size_t n, const double * const a, const int * const ja, const size_t * const ia ) { - return sparse_cg_init_impl< double, size_t, int >( handle, n, a, ja, ia ); + return sparse_cg_init_impl_no_buffer< double, size_t, int >( handle, n, a, ja, ia ); } sparse_err_t sparse_cg_init_szz( sparse_cg_handle_t * const handle, const size_t n, const float * const a, const size_t * const ja, const size_t * const ia ) { - return sparse_cg_init_impl< float, size_t, size_t >( handle, n, a, ja, ia ); + return sparse_cg_init_impl_no_buffer< float, size_t, size_t >( handle, n, a, ja, ia ); } sparse_err_t sparse_cg_init_dzz( sparse_cg_handle_t * const handle, const size_t n, const double * const a, const size_t * const ja, const size_t * const ia ) { - return sparse_cg_init_impl< double, size_t, size_t >( handle, n, a, ja, ia ); + return sparse_cg_init_impl_no_buffer< double, size_t, size_t >( handle, n, a, ja, ia ); } template< typename T, typename NZI, typename RSI > From 6648919e7a0471215730b816c0ade808a1cc39c6 Mon Sep 17 00:00:00 2001 From: "Albert-Jan N. Yzelman" Date: Thu, 5 Jun 2025 14:31:21 +0200 Subject: [PATCH 03/22] Now also accounts for the preconditioner workspace out of the box --- src/transition/solver.cpp | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/src/transition/solver.cpp b/src/transition/solver.cpp index 92089a02b..da6d4e087 100644 --- a/src/transition/solver.cpp +++ b/src/transition/solver.cpp @@ -17,6 +17,7 @@ #include +#include #include #include @@ -141,14 +142,20 @@ class CG_Data { * The size, in bytes, required as a work space for the PCG algorithm. * * @param[in] n The linear system size. + * @param[in] preconditioned Whether the solver may be called with a + * preconditioner. * * @returns The required size, in bytes. * * \internal Note that the space for two additional integers is required as * per both the C and C++ specifications. */ - static size_t workspaceSize( const size_t n ) { - return 3 * n * sizeof( T ) + 2 * sizeof( int ); + static size_t workspaceSize( const size_t n, const bool preconditioned ) { + if( preconditioned ) { + return 4 * n * sizeof( T ) + 3 * sizeof( int ); + } else { + return 3 * n * sizeof( T ) + 2 * sizeof( int ); + } } /** Disable default constructor. */ @@ -191,7 +198,7 @@ class CG_Data { assert( a != nullptr ); assert( ja != nullptr ); assert( ia != nullptr ); - if( buffer_size >= workspaceSize( n ) ) { + if( buffer_size < workspaceSize( n, false ) ) { throw std::invalid_argument( "The given buffer size is too small" ); } Matrix A = grb::internal::wrapCRSMatrix( a, ja, ia, n, n ); @@ -221,6 +228,15 @@ class CG_Data { grb::Vector< T > tmp = grb::internal::wrapRawVector( n, workspace_vector ); std::swap( workspace[ 2 ], tmp ); } + if( buffer_size >= workspaceSize( n, true ) ) { + workspace_ptr += n * sizeof( T ); + workspace_ptr += + (sizeof(int) - (reinterpret_cast(workspace_ptr) % sizeof(int))); + assert( static_cast< char * >(buffer) + buffer_size >= workspace_ptr + n ); + T * const workspace_vector = reinterpret_cast< T * >(workspace_ptr); + grb::Vector< T > tmp = grb::internal::wrapRawVector( n, workspace_vector ); + std::swap( precond_workspace, tmp ); + } } /** @returns The system size. */ @@ -354,7 +370,9 @@ static sparse_err_t sparse_cg_init_impl_no_buffer( sparse_cg_handle_t * const handle, const size_t n, const T * const a, const RSI * const ja, const NZI * const ia ) { - const size_t allocSize = CG_Data< T, NZI, RSI >::workspaceSize( n ); + // we pass true here, since we want to support the entire solver transition + // path API out of the box + const size_t allocSize = CG_Data< T, NZI, RSI >::workspaceSize( n, true ); void * buffer = nullptr; try { buffer = static_cast< void * >(new char[ allocSize ]); From 978e694e5a0b06173a7def1d59e4301899f3a647 Mon Sep 17 00:00:00 2001 From: "Albert-Jan N. Yzelman" Date: Mon, 14 Jul 2025 13:47:06 +0200 Subject: [PATCH 04/22] The CG algorithm was not always applying the descriptor it receives -- herewith fixed --- .../algorithms/conjugate_gradient.hpp | 80 ++++++++++++++----- 1 file changed, 59 insertions(+), 21 deletions(-) diff --git a/include/graphblas/algorithms/conjugate_gradient.hpp b/include/graphblas/algorithms/conjugate_gradient.hpp index 11e910566..157afd4cf 100644 --- a/include/graphblas/algorithms/conjugate_gradient.hpp +++ b/include/graphblas/algorithms/conjugate_gradient.hpp @@ -314,43 +314,77 @@ namespace grb { std::cerr << "Error: at least one CG iteration must be requested\n"; return grb::ILLEGAL; } + + // dense descriptor + if( descr & grb::descriptors::dense ) { + if( grb::nnz( x ) != grb::size( x ) ) { + std::cerr << "Error: x was sparse while the dense descriptor was given\n"; + return grb::ILLEGAL; + } + if( grb::nnz( b ) != grb::size( b ) ) { + std::cerr << "Error: b was sparse while the dense descriptor was given\n"; + return grb::ILLEGAL; + } + if( grb::nnz( r ) != grb::size( r ) ) { + std::cerr << "Error: r was sparse while the dense descriptor was given\n"; + return grb::ILLEGAL; + } + if( grb::nnz( temp ) != grb::size( temp ) ) { + std::cerr << "Error: temp was sparse while the dense descriptor was " + << "given\n"; + return grb::ILLEGAL; + } + if( preconditioned && + (grb::nnz( temp_precond ) != grb::size( temp_precond )) + ) { + std::cerr << "Error: temp_precond was sparse while the dense descriptor " + << "was given\n"; + return grb::ILLEGAL; + } + } } // set pure output fields to neutral defaults iterations = 0; residual = std::numeric_limits< double >::infinity(); - // make x and b structurally dense (if not already) so that the remainder + // declare internal scalars + IOType sigma, bnorm, alpha, beta; + + // make x structurally dense (if not already) so that the remainder // algorithm can safely use the dense descriptor for faster operations - { - RC rc = grb::SUCCESS; + grb::RC ret = grb::SUCCESS; + if( !(descr & grb::descriptors::dense) ) { if( nnz( x ) != n ) { - rc = grb::set< descriptors::invert_mask | descriptors::structural >( + ret = grb::set< descriptors::invert_mask | descriptors::structural >( x, x, zero ); + assert( ret == grb::SUCCESS ); } - if( rc != grb::SUCCESS ) { return rc; } assert( nnz( x ) == n ); } - IOType sigma, bnorm, alpha, beta; - // r = b - temp; - grb::RC ret = grb::set( temp, 0 ); assert( ret == grb::SUCCESS ); + ret = ret ? ret : grb::set< descr >( temp, 0 ); + assert( ret == grb::SUCCESS ); ret = ret ? ret : grb::mxv< descr_dense >( temp, A, x, ring ); assert( ret == grb::SUCCESS ); - ret = ret ? ret : grb::set( r, zero ); assert( ret == grb::SUCCESS ); - // note: no dense descriptor since we actually allow sparse b - ret = ret ? ret : grb::foldl( r, b, ring.getAdditiveMonoid() ); - // from here onwards, r, temp, x are dense and will remain so - assert( nnz( r ) == n ); - assert( nnz( temp ) == n ); + ret = ret ? ret : grb::set< descr >( r, zero ); + assert( ret == grb::SUCCESS ); + // note: no forced dense descriptor since we actually allow sparse b + // but if b was already dense and the dense descriptor was given, we + // will respect that + ret = ret ? ret : grb::foldl< descr >( r, b, ring.getAdditiveMonoid() ); + // note: at this point, r and temp are guaranteed dense, so we force a dense + // descriptor ret = ret ? ret : grb::foldl< descr_dense >( r, temp, minus ); assert( ret == grb::SUCCESS ); // bnorm = b' * b; + // Note that b can be structurally sparse (unless otherwise guaranteed by the + // user). bnorm = zero; - ret = ret ? ret : grb::dot< descr_dense >( + ret = ret ? ret : grb::dot< descr >( bnorm, b, b, ring.getAdditiveMonoid(), @@ -381,7 +415,9 @@ namespace grb { // z = M^-1r if( preconditioned ) { - ret = ret ? ret : grb::set( z, 0 ); // also ensures z is dense, henceforth + ret = ret ? ret : grb::set< descr >( z, 0 ); + // henceforth, also z is structurally dense and we can force a dense + // descriptor for it if( preconditioned == 2 ) { ret = ret ? ret : grb::wait( z, r ); } @@ -389,10 +425,12 @@ namespace grb { } // else, z equals r (by reference) // u = z; - ret = ret ? ret : grb::set( u, z ); + ret = ret ? ret : grb::set< descr >( u, z ); assert( ret == grb::SUCCESS ); - // from here onwards, u is dense; i.e., all vectors are dense from now on, - // and we can freely use the dense descriptor in the subsequent + // henceforth, also u is structurally dense -- in fact, henceforth, all + // vectors in this algorithm except b are now guaranteed dense. We may + // hence liberally force the use of the dense descriptor in the main while + // loop of the CG (since it does not refer to b). // sigma = r' * z; sigma = zero; @@ -435,7 +473,7 @@ namespace grb { assert( ret == grb::SUCCESS ); // alpha = sigma / beta; - ret = ret ? ret : grb::apply( alpha, sigma, beta, divide ); + ret = ret ? ret : grb::apply< descr >( alpha, sigma, beta, divide ); assert( ret == grb::SUCCESS ); // x = x + alpha * u; @@ -489,7 +527,7 @@ namespace grb { } // alpha = beta / sigma; - ret = ret ? ret : grb::apply( alpha, beta, sigma, divide ); + ret = ret ? ret : grb::apply< descr >( alpha, beta, sigma, divide ); assert( ret == grb::SUCCESS ); // u_next = z + beta * u_previous; From 9bf7bfb03d072e857f26b066ea48071962fe6ab1 Mon Sep 17 00:00:00 2001 From: "Albert-Jan N. Yzelman" Date: Mon, 14 Jul 2025 15:19:15 +0200 Subject: [PATCH 05/22] Now allow for getting CG solver handles that do not support preconditioning, with the gain that those will require less memory. Also fix one memory leak --- include/transition/solver.h | 119 +++++++++++++++++++++++++++++ src/transition/kml_iss.cpp | 8 ++ src/transition/solver.cpp | 148 ++++++++++++++++++++++++++++++------ 3 files changed, 251 insertions(+), 24 deletions(-) diff --git a/include/transition/solver.h b/include/transition/solver.h index ba1afa008..1bc58f2d7 100644 --- a/include/transition/solver.h +++ b/include/transition/solver.h @@ -132,6 +132,11 @@ typedef enum { */ ILLEGAL_ARGUMENT, + /** + * Illegal method was called. + */ + ILLEGAL_METHOD, + /** * Out of memory error detected during call. */ @@ -250,6 +255,25 @@ sparse_err_t sparse_cg_init_sii( const float * const a, const int * const ja, const int * const ia ); +/** + * Variant of #sparse_cg_init_sii that results in a #sparse_cg_handle_t that + * does not support preconditioned solves. Handles constructed using this call + * require less memory. + * + * To have a handle returned by this function be used for a preconditioned solve + * anyway, the handle must first be destroyed and then re-created using a call + * to #sparse_cg_init_sii. + * + * \note Rationale: the transition path API opted to have this feature + * implemented via a separate constructor call to make the fact that + * resulting handles cannot be used for preconditioned solves as explicit + * as possible. + */ +sparse_err_t sparse_cg_init_nop_sii( + sparse_cg_handle_t * const handle, const size_t n, + const float * const a, const int * const ja, const int * const ia +); + /** * Initialises a #sparse_cg_handle_t object. * @@ -263,6 +287,25 @@ sparse_err_t sparse_cg_init_dii( const double * const a, const int * const ja, const int * const ia ); +/** + * Variant of #sparse_cg_init_dii that results in a #sparse_cg_handle_t that + * does not support preconditioned solves. Handles constructed using this call + * require less memory. + * + * To have a handle returned by this function be used for a preconditioned solve + * anyway, the handle must first be destroyed and then re-created using a call + * to #sparse_cg_init_dii. + * + * \note Rationale: the transition path API opted to have this feature + * implemented via a separate constructor call to make the fact that + * resulting handles cannot be used for preconditioned solves as explicit + * as possible. + */ +sparse_err_t sparse_cg_init_nop_dii( + sparse_cg_handle_t * const handle, const size_t n, + const double * const a, const int * const ja, const int * const ia +); + /** * Initialises a #sparse_cg_handle_t object. * @@ -277,6 +320,25 @@ sparse_err_t sparse_cg_init_siz( const float * const a, const int * const ja, const size_t * const ia ); +/** + * Variant of #sparse_cg_init_siz that results in a #sparse_cg_handle_t that + * does not support preconditioned solves. Handles constructed using this call + * require less memory. + * + * To have a handle returned by this function be used for a preconditioned solve + * anyway, the handle must first be destroyed and then re-created using a call + * to #sparse_cg_init_siz. + * + * \note Rationale: the transition path API opted to have this feature + * implemented via a separate constructor call to make the fact that + * resulting handles cannot be used for preconditioned solves as explicit + * as possible. + */ +sparse_err_t sparse_cg_init_nop_siz( + sparse_cg_handle_t * const handle, const size_t n, + const float * const a, const int * const ja, const size_t * const ia +); + /** * Initialises a #sparse_cg_handle_t object. * @@ -291,6 +353,25 @@ sparse_err_t sparse_cg_init_diz( const double * const a, const int * const ja, const size_t * const ia ); +/** + * Variant of #sparse_cg_init_diz that results in a #sparse_cg_handle_t that + * does not support preconditioned solves. Handles constructed using this call + * require less memory. + * + * To have a handle returned by this function be used for a preconditioned solve + * anyway, the handle must first be destroyed and then re-created using a call + * to #sparse_cg_init_diz. + * + * \note Rationale: the transition path API opted to have this feature + * implemented via a separate constructor call to make the fact that + * resulting handles cannot be used for preconditioned solves as explicit + * as possible. + */ +sparse_err_t sparse_cg_init_nop_diz( + sparse_cg_handle_t * const handle, const size_t n, + const double * const a, const int * const ja, const size_t * const ia +); + /** * Initialises a #sparse_cg_handle_t object. * @@ -305,6 +386,25 @@ sparse_err_t sparse_cg_init_szz( const float * const a, const size_t * const ja, const size_t * const ia ); +/** + * Variant of #sparse_cg_init_szz that results in a #sparse_cg_handle_t that + * does not support preconditioned solves. Handles constructed using this call + * require less memory. + * + * To have a handle returned by this function be used for a preconditioned solve + * anyway, the handle must first be destroyed and then re-created using a call + * to #sparse_cg_init_szz. + * + * \note Rationale: the transition path API opted to have this feature + * implemented via a separate constructor call to make the fact that + * resulting handles cannot be used for preconditioned solves as explicit + * as possible. + */ +sparse_err_t sparse_cg_init_nop_szz( + sparse_cg_handle_t * const handle, const size_t n, + const float * const a, const size_t * const ja, const size_t * const ia +); + /** * Initialises a #sparse_cg_handle_t object. * @@ -319,6 +419,25 @@ sparse_err_t sparse_cg_init_dzz( const double * const a, const size_t * const ja, const size_t * const ia ); +/** + * Variant of #sparse_cg_init_dzz that results in a #sparse_cg_handle_t that + * does not support preconditioned solves. Handles constructed using this call + * require less memory. + * + * To have a handle returned by this function be used for a preconditioned solve + * anyway, the handle must first be destroyed and then re-created using a call + * to #sparse_cg_init_dzz. + * + * \note Rationale: the transition path API opted to have this feature + * implemented via a separate constructor call to make the fact that + * resulting handles cannot be used for preconditioned solves as explicit + * as possible. + */ +sparse_err_t sparse_cg_init_nop_dzz( + sparse_cg_handle_t * const handle, const size_t n, + const double * const a, const size_t * const ja, const size_t * const ia +); + // Note that szi and dzi are skipped on purpose. Such variants would not seem // sensible, though could easily be provided if they do turn out to be needed diff --git a/src/transition/kml_iss.cpp b/src/transition/kml_iss.cpp index 2722f4450..53a1b286b 100644 --- a/src/transition/kml_iss.cpp +++ b/src/transition/kml_iss.cpp @@ -116,6 +116,14 @@ static int sparse_err_t_2_int( const sparse_err_t err ) { result = KMLSS_BAD_N; break; } + case ILLEGAL_METHOD: + { + // there is no case where this error should be exposed to a KML API user; + // rather, if encountered, it indicates an error in translating the KML API + // to the solver transition path + result = KMLSS_INTERNAL_ERROR; + break; + } case OUT_OF_MEMORY: { result = KMLSS_NO_MEMORY; diff --git a/src/transition/solver.cpp b/src/transition/solver.cpp index da6d4e087..37f62275a 100644 --- a/src/transition/solver.cpp +++ b/src/transition/solver.cpp @@ -35,6 +35,25 @@ * @date 26/01/2024 */ +/** + * Implements the various ways the CG workspace data may be freed. + * + * At the moment, always assumes that the workspace buffer is allocated by the + * transition path itself, using the standard C++ array allocation mechanism. + * The matching deleter action employs the matching standard C++ array deleter. + */ +class CGWorkspaceDeleter { + + private: + + public: + + void operator()( const void * ptr ) { + delete [] static_cast(ptr); + } + +}; + /** * @tparam T The nonzero value type. * @tparam NZI The nonzero index type. @@ -108,6 +127,14 @@ class CG_Data { /** Any required data for the \a preconditioner. */ void * preconditioner_data; + // destructor data + + /** The given buffer during construction. */ + void * _buffer; + + /** How to handle \a _buffer on destruction. */ + CGWorkspaceDeleter _buffer_deleter; + protected: @@ -182,17 +209,25 @@ class CG_Data { * @param[in] buffer_size The size of the given \a buffer. Used for sanity * checking the use of the buffer. * + * + * @param[in] buffer_deleter How the given \a buffer should be deleted on + * destruction of this instance. + * + * \note If the buffer is not to be owned by this instance, then a no-op + * \a buffer_deleter should be given. + * * \warning The sanity check is weaker if not compiled in debug mode. */ CG_Data( const size_t n, const T * const a, const RSI * const ja, const NZI * const ia, - void * const buffer, const size_t buffer_size + void * const buffer, const size_t buffer_size, const D &buffer_deleter ) : size( n ), tolerance( 1e-5 ), max_iter( 1000 ), matrix( 0, 0 ), residual( std::numeric_limits< T >::infinity() ), iters( 0 ), precond_workspace( grb::Vector< T >( 0 ) ), - preconditioner( nullptr ), preconditioner_data( nullptr ) + preconditioner( nullptr ), preconditioner_data( nullptr ), + _buffer( buffer ), _buffer_deleter( buffer_deleter ) { assert( n > 0 ); assert( a != nullptr ); @@ -239,6 +274,12 @@ class CG_Data { } } + /** Destroys this CG solve handle. */ + ~CG_Data() { + // call the deleter on the buffer pointer (which may be a no-op) + _buffer_deleter( _buffer ); + } + /** @returns The system size. */ size_t getSize() const noexcept { return size; } @@ -296,13 +337,12 @@ class CG_Data { * allocation. */ void setPreconditioner( const preconditioner_t in, void * const data ) { + if( grb::size( precond_workspace ) == 0 ) { + throw std::logic_error( "CG handle has no preconditioned solve support" ); + } preconditioner = in; preconditioner_data = data; assert( !( !preconditioner && preconditioner_data ) ); - if( grb::size( precond_workspace ) == 0 ) { - grb::Vector< T > replace( size ); - std::swap( replace, precond_workspace ); - } assert( grb::size( precond_workspace ) == size ); } @@ -347,15 +387,15 @@ template< typename T, typename NZI, typename RSI > static sparse_err_t sparse_cg_init_impl( sparse_cg_handle_t * const handle, const size_t n, const T * const a, const RSI * const ja, const NZI * const ia, - void * const buffer, const size_t bufferSize + void * const buffer, const size_t bufferSize, const CGWorkspaceDeleter &deleter ) { if( n == 0 ) { return ILLEGAL_ARGUMENT; } if( handle == nullptr || a == nullptr || ja == nullptr || ia == nullptr ) { return NULL_ARGUMENT; } try { - *handle = static_cast< void * >( - new CG_Data< T, NZI, RSI >( n, a, ja, ia, buffer, bufferSize ) ); + *handle = static_cast< void * >( new CG_Data< T, NZI, RSI, D >( + n, a, ja, ia, buffer, bufferSize, deleter ) ); } catch( std::exception &e ) { // the grb::Matrix constructor may only throw on out of memory errors std::cerr << "Error: " << e.what() << "\n"; @@ -368,11 +408,13 @@ static sparse_err_t sparse_cg_init_impl( template< typename T, typename NZI, typename RSI > static sparse_err_t sparse_cg_init_impl_no_buffer( sparse_cg_handle_t * const handle, const size_t n, - const T * const a, const RSI * const ja, const NZI * const ia + const T * const a, const RSI * const ja, const NZI * const ia, + bool support_preconditioning ) { // we pass true here, since we want to support the entire solver transition // path API out of the box - const size_t allocSize = CG_Data< T, NZI, RSI >::workspaceSize( n, true ); + const size_t allocSize = CG_Data< T, NZI, RSI >:: + workspaceSize( n, support_preconditioning ); void * buffer = nullptr; try { buffer = static_cast< void * >(new char[ allocSize ]); @@ -381,53 +423,107 @@ static sparse_err_t sparse_cg_init_impl_no_buffer( return OUT_OF_MEMORY; } const sparse_err_t rc = sparse_cg_init_impl( - handle, n, a, ja, ia, buffer, allocSize ); + handle, n, a, ja, ia, buffer, allocSize, CGWorkspaceDeleter() ); if( rc != NO_ERROR ) { delete [] static_cast< char * >(buffer); } return rc; } +sparse_err_t sparse_cg_init_nop_sii( + sparse_cg_handle_t * const handle, const size_t n, + const float * const a, const int * const ja, const int * const ia +) { + return sparse_cg_init_impl_no_buffer< float, int, int >( handle, n, a, ja, ia, + false ); +} + +sparse_err_t sparse_cg_init_nop_dii( + sparse_cg_handle_t * const handle, const size_t n, + const double * const a, const int * const ja, const int * const ia +) { + return sparse_cg_init_impl_no_buffer< double, int, int >( handle, n, a, ja, ia, + false ); +} + +sparse_err_t sparse_cg_init_nop_siz( + sparse_cg_handle_t * const handle, const size_t n, + const float * const a, const int * const ja, const size_t * const ia +) { + return sparse_cg_init_impl_no_buffer< float, size_t, int >( handle, n, a, ja, + ia, false ); +} + +sparse_err_t sparse_cg_init_nop_diz( + sparse_cg_handle_t * const handle, const size_t n, + const double * const a, const int * const ja, const size_t * const ia +) { + return sparse_cg_init_impl_no_buffer< double, size_t, int >( handle, n, a, ja, + ia, false ); +} + +sparse_err_t sparse_cg_init_nop_szz( + sparse_cg_handle_t * const handle, const size_t n, + const float * const a, const size_t * const ja, const size_t * const ia +) { + return sparse_cg_init_impl_no_buffer< float, size_t, size_t >( handle, n, a, ja, + ia, false ); +} + +sparse_err_t sparse_cg_init_nop_dzz( + sparse_cg_handle_t * const handle, const size_t n, + const double * const a, const size_t * const ja, const size_t * const ia +) { + return sparse_cg_init_impl_no_buffer< double, size_t, size_t >( handle, n, a, ja, + ia, false ); +} + sparse_err_t sparse_cg_init_sii( sparse_cg_handle_t * const handle, const size_t n, const float * const a, const int * const ja, const int * const ia ) { - return sparse_cg_init_impl_no_buffer< float, int, int >( handle, n, a, ja, ia ); + return sparse_cg_init_impl_no_buffer< float, int, int >( handle, n, a, ja, ia, + true ); } sparse_err_t sparse_cg_init_dii( sparse_cg_handle_t * const handle, const size_t n, const double * const a, const int * const ja, const int * const ia ) { - return sparse_cg_init_impl_no_buffer< double, int, int >( handle, n, a, ja, ia ); + return sparse_cg_init_impl_no_buffer< double, int, int >( handle, n, a, ja, ia, + true ); } sparse_err_t sparse_cg_init_siz( sparse_cg_handle_t * const handle, const size_t n, const float * const a, const int * const ja, const size_t * const ia ) { - return sparse_cg_init_impl_no_buffer< float, size_t, int >( handle, n, a, ja, ia ); + return sparse_cg_init_impl_no_buffer< float, size_t, int >( handle, n, a, ja, + ia, true ); } sparse_err_t sparse_cg_init_diz( sparse_cg_handle_t * const handle, const size_t n, const double * const a, const int * const ja, const size_t * const ia ) { - return sparse_cg_init_impl_no_buffer< double, size_t, int >( handle, n, a, ja, ia ); + return sparse_cg_init_impl_no_buffer< double, size_t, int >( handle, n, a, ja, + ia, true ); } sparse_err_t sparse_cg_init_szz( sparse_cg_handle_t * const handle, const size_t n, const float * const a, const size_t * const ja, const size_t * const ia ) { - return sparse_cg_init_impl_no_buffer< float, size_t, size_t >( handle, n, a, ja, ia ); + return sparse_cg_init_impl_no_buffer< float, size_t, size_t >( handle, n, a, ja, + ia, true ); } sparse_err_t sparse_cg_init_dzz( sparse_cg_handle_t * const handle, const size_t n, const double * const a, const size_t * const ja, const size_t * const ia ) { - return sparse_cg_init_impl_no_buffer< double, size_t, size_t >( handle, n, a, ja, ia ); + return sparse_cg_init_impl_no_buffer< double, size_t, size_t >( handle, n, a, ja, + ia, true ); } template< typename T, typename NZI, typename RSI > @@ -618,7 +714,8 @@ static sparse_err_t sparse_cg_get_iter_count_impl( const sparse_cg_handle_t handle, size_t * const iters ) { if( handle == nullptr || iters == nullptr ) { return NULL_ARGUMENT; } - *iters = static_cast< CG_Data< T, NZI, RSI > * >( handle )->getIters(); + *iters = + static_cast< CG_Data< T, NZI, RSI > * >( handle )->getIters(); return NO_ERROR; } @@ -739,9 +836,13 @@ static sparse_err_t sparse_cg_set_preconditioner_impl( try { static_cast< CG_Data< T, NZI, RSI > * >( handle )-> setPreconditioner( c_precond_p, c_precond_data_p ); - } catch(...) { - // spec says ALP vector allocation can only throw due to out-of-memory - return OUT_OF_MEMORY; + } catch( std::logic_error &e ) { + std::cerr << e.what() << "\n"; + return ILLEGAL_METHOD; + } catch( std::exception &e ) { + std::cerr << e.what() << "\n"; + std::cerr << "This is an unexpected error; please submit a bug report\n"; + return UNKNOWN; } return NO_ERROR; } @@ -805,8 +906,7 @@ static sparse_err_t sparse_cg_set_max_iter_count_impl( sparse_cg_handle_t handle, const size_t max_iters ) { if( handle == nullptr ) { return NULL_ARGUMENT; } - static_cast< CG_Data< T, NZI, RSI > * >( handle )-> - setMaxIters( max_iters ); + static_cast< CG_Data< T, NZI, RSI > * >( handle )->setMaxIters( max_iters ); return NO_ERROR; } From 4c39734528262cff28fe435890167a771a64ef26 Mon Sep 17 00:00:00 2001 From: Christos Konstantinos Matzoros Date: Mon, 14 Jul 2025 16:26:36 +0200 Subject: [PATCH 06/22] No type error fix --- src/transition/solver.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transition/solver.cpp b/src/transition/solver.cpp index 37f62275a..f2b28ab01 100644 --- a/src/transition/solver.cpp +++ b/src/transition/solver.cpp @@ -221,7 +221,7 @@ class CG_Data { CG_Data( const size_t n, const T * const a, const RSI * const ja, const NZI * const ia, - void * const buffer, const size_t buffer_size, const D &buffer_deleter + void * const buffer, const size_t buffer_size, const CGWorkspaceDeleter &buffer_deleter ) : size( n ), tolerance( 1e-5 ), max_iter( 1000 ), matrix( 0, 0 ), residual( std::numeric_limits< T >::infinity() ), iters( 0 ), @@ -394,7 +394,7 @@ static sparse_err_t sparse_cg_init_impl( return NULL_ARGUMENT; } try { - *handle = static_cast< void * >( new CG_Data< T, NZI, RSI, D >( + *handle = static_cast< void * >( new CG_Data< T, NZI, RSI >( n, a, ja, ia, buffer, bufferSize, deleter ) ); } catch( std::exception &e ) { // the grb::Matrix constructor may only throw on out of memory errors From bbe96c48e2de438039e587101225c048c40d4af1 Mon Sep 17 00:00:00 2001 From: "Albert-Jan N. Yzelman" Date: Mon, 14 Jul 2025 17:09:58 +0200 Subject: [PATCH 07/22] Add NUMA-aware workspace allocation to the transition path --- include/transition/solver.h | 40 ++++++++---- src/transition/kml_iss.cpp | 6 +- src/transition/solver.cpp | 124 ++++++++++++++++++++++++++---------- 3 files changed, 120 insertions(+), 50 deletions(-) diff --git a/include/transition/solver.h b/include/transition/solver.h index 1bc58f2d7..e61628b1e 100644 --- a/include/transition/solver.h +++ b/include/transition/solver.h @@ -230,6 +230,10 @@ typedef int (*sparse_cg_preconditioner_dxx_t) ( * @param[in] a The nonzero values of the system matrix. * @param[in] ja The column indices of the nonzeroes of the system matrix. * @param[in] ia The row offset array of the system matrix. + * @param[in] numa Whether the CG handle should employ more than one NUMA + * domain, in which case any memory allocated during + * construction of this handle will attempt to employ NUMA- + * aware allocation. * * This variant is for single-precision floating point nonzeroes and integer * \a ja and \a ia arrays, as also indicated by the sii postfix. @@ -252,7 +256,8 @@ typedef int (*sparse_cg_preconditioner_dxx_t) ( */ sparse_err_t sparse_cg_init_sii( sparse_cg_handle_t * const handle, const size_t n, - const float * const a, const int * const ja, const int * const ia + const float * const a, const int * const ja, const int * const ia, + const bool numa ); /** @@ -271,7 +276,8 @@ sparse_err_t sparse_cg_init_sii( */ sparse_err_t sparse_cg_init_nop_sii( sparse_cg_handle_t * const handle, const size_t n, - const float * const a, const int * const ja, const int * const ia + const float * const a, const int * const ja, const int * const ia, + const bool numa ); /** @@ -284,7 +290,8 @@ sparse_err_t sparse_cg_init_nop_sii( */ sparse_err_t sparse_cg_init_dii( sparse_cg_handle_t * const handle, const size_t n, - const double * const a, const int * const ja, const int * const ia + const double * const a, const int * const ja, const int * const ia, + const bool numa ); /** @@ -303,7 +310,8 @@ sparse_err_t sparse_cg_init_dii( */ sparse_err_t sparse_cg_init_nop_dii( sparse_cg_handle_t * const handle, const size_t n, - const double * const a, const int * const ja, const int * const ia + const double * const a, const int * const ja, const int * const ia, + const bool numa ); /** @@ -317,7 +325,8 @@ sparse_err_t sparse_cg_init_nop_dii( */ sparse_err_t sparse_cg_init_siz( sparse_cg_handle_t * const handle, const size_t n, - const float * const a, const int * const ja, const size_t * const ia + const float * const a, const int * const ja, const size_t * const ia, + const bool numa ); /** @@ -336,7 +345,8 @@ sparse_err_t sparse_cg_init_siz( */ sparse_err_t sparse_cg_init_nop_siz( sparse_cg_handle_t * const handle, const size_t n, - const float * const a, const int * const ja, const size_t * const ia + const float * const a, const int * const ja, const size_t * const ia, + const bool numa ); /** @@ -350,7 +360,8 @@ sparse_err_t sparse_cg_init_nop_siz( */ sparse_err_t sparse_cg_init_diz( sparse_cg_handle_t * const handle, const size_t n, - const double * const a, const int * const ja, const size_t * const ia + const double * const a, const int * const ja, const size_t * const ia, + const bool numa ); /** @@ -369,7 +380,8 @@ sparse_err_t sparse_cg_init_diz( */ sparse_err_t sparse_cg_init_nop_diz( sparse_cg_handle_t * const handle, const size_t n, - const double * const a, const int * const ja, const size_t * const ia + const double * const a, const int * const ja, const size_t * const ia, + const bool numa ); /** @@ -383,7 +395,8 @@ sparse_err_t sparse_cg_init_nop_diz( */ sparse_err_t sparse_cg_init_szz( sparse_cg_handle_t * const handle, const size_t n, - const float * const a, const size_t * const ja, const size_t * const ia + const float * const a, const size_t * const ja, const size_t * const ia, + const bool numa ); /** @@ -402,7 +415,8 @@ sparse_err_t sparse_cg_init_szz( */ sparse_err_t sparse_cg_init_nop_szz( sparse_cg_handle_t * const handle, const size_t n, - const float * const a, const size_t * const ja, const size_t * const ia + const float * const a, const size_t * const ja, const size_t * const ia, + const bool numa ); /** @@ -416,7 +430,8 @@ sparse_err_t sparse_cg_init_nop_szz( */ sparse_err_t sparse_cg_init_dzz( sparse_cg_handle_t * const handle, const size_t n, - const double * const a, const size_t * const ja, const size_t * const ia + const double * const a, const size_t * const ja, const size_t * const ia, + const bool numa ); /** @@ -435,7 +450,8 @@ sparse_err_t sparse_cg_init_dzz( */ sparse_err_t sparse_cg_init_nop_dzz( sparse_cg_handle_t * const handle, const size_t n, - const double * const a, const size_t * const ja, const size_t * const ia + const double * const a, const size_t * const ja, const size_t * const ia, + const bool numa ); // Note that szi and dzi are skipped on purpose. Such variants would not seem diff --git a/src/transition/kml_iss.cpp b/src/transition/kml_iss.cpp index 53a1b286b..d507587ea 100644 --- a/src/transition/kml_iss.cpp +++ b/src/transition/kml_iss.cpp @@ -148,9 +148,9 @@ int KML_CG_PREFIXED( InitSI )( KmlSolverTask **handle, int n, const float *a, const int *ja, const int *ia ) { - // negative numbers become positive when casted to size_t + // negative numbers become positive when cast to size_t if( n <= 0 ) { return KMLSS_BAD_N; } - sparse_err_t err = sparse_cg_init_sii( handle, n, a, ja, ia ); + sparse_err_t err = sparse_cg_init_sii( handle, n, a, ja, ia, true ); return sparse_err_t_2_int( err ); } @@ -160,7 +160,7 @@ int KML_CG_PREFIXED( InitDI )( ) { // negative numbers become positive when casted to size_t if( n <= 0 ) { return KMLSS_BAD_N; } - sparse_err_t err = sparse_cg_init_dii( handle, n, a, ja, ia ); + sparse_err_t err = sparse_cg_init_dii( handle, n, a, ja, ia, true ); return sparse_err_t_2_int( err ); } diff --git a/src/transition/solver.cpp b/src/transition/solver.cpp index f2b28ab01..a976bceb7 100644 --- a/src/transition/solver.cpp +++ b/src/transition/solver.cpp @@ -22,6 +22,7 @@ #include #include +#include #include #include "solver.h" @@ -46,10 +47,37 @@ class CGWorkspaceDeleter { private: + /** Whether we own the workspace data. */ + const bool owning; + + /** + * Whether the workspace data was allocated NUMA-aware, and if so, what the + * allocation size was. + */ + const size_t numa; + + public: - void operator()( const void * ptr ) { - delete [] static_cast(ptr); + CGWorkspaceDeleter() = delete; + + CGWorkspaceDeleter( const bool &owning_in, const size_t &numa_in ) : + owning( owning_in ), numa( numa_in ) + {} + + void operator()( void * ptr ) { + assert( !( !owning && numa ) ); + if( owning ) { +#ifndef _GRB_NO_LIBNUMA + if( numa ) { + numa_free( ptr, numa ); + } else { +#endif + delete [] static_cast< char * >(ptr); +#ifndef _GRB_NO_LIBNUMA + } +#endif + } } }; @@ -409,21 +437,35 @@ template< typename T, typename NZI, typename RSI > static sparse_err_t sparse_cg_init_impl_no_buffer( sparse_cg_handle_t * const handle, const size_t n, const T * const a, const RSI * const ja, const NZI * const ia, - bool support_preconditioning + const bool support_preconditioning, const bool numa ) { - // we pass true here, since we want to support the entire solver transition - // path API out of the box const size_t allocSize = CG_Data< T, NZI, RSI >:: workspaceSize( n, support_preconditioning ); void * buffer = nullptr; - try { - buffer = static_cast< void * >(new char[ allocSize ]); - } catch( ... ) { - std::cerr << "Error allocating workspace buffer\n"; - return OUT_OF_MEMORY; +#ifdef _GRB_NO_LIBNUMA + if( numa ) { + std::cerr << "Warning: solver transition path was requested to perform " + << "NUMA-aware allocation, but ALP was compiled without libnuma.\n"; + } +#else + if( numa ) { + buffer = numa_alloc_interleaved( allocSize ); + if( buffer == NULL ) { return OUT_OF_MEMORY; } + } else { +#endif + try { + buffer = static_cast< void * >(new char[ allocSize ]); + } catch( ... ) { + std::cerr << "Error allocating workspace buffer\n"; + return OUT_OF_MEMORY; + } +#ifndef _GRB_NO_LIBNUMA } +#endif const sparse_err_t rc = sparse_cg_init_impl( - handle, n, a, ja, ia, buffer, allocSize, CGWorkspaceDeleter() ); + handle, n, a, ja, ia, buffer, allocSize, + CGWorkspaceDeleter( true, numa ? allocSize : 0 ) + ); if( rc != NO_ERROR ) { delete [] static_cast< char * >(buffer); } @@ -432,98 +474,110 @@ static sparse_err_t sparse_cg_init_impl_no_buffer( sparse_err_t sparse_cg_init_nop_sii( sparse_cg_handle_t * const handle, const size_t n, - const float * const a, const int * const ja, const int * const ia + const float * const a, const int * const ja, const int * const ia, + const bool numa ) { return sparse_cg_init_impl_no_buffer< float, int, int >( handle, n, a, ja, ia, - false ); + false, numa ); } sparse_err_t sparse_cg_init_nop_dii( sparse_cg_handle_t * const handle, const size_t n, - const double * const a, const int * const ja, const int * const ia + const double * const a, const int * const ja, const int * const ia, + const bool numa ) { return sparse_cg_init_impl_no_buffer< double, int, int >( handle, n, a, ja, ia, - false ); + false, numa ); } sparse_err_t sparse_cg_init_nop_siz( sparse_cg_handle_t * const handle, const size_t n, - const float * const a, const int * const ja, const size_t * const ia + const float * const a, const int * const ja, const size_t * const ia, + const bool numa ) { return sparse_cg_init_impl_no_buffer< float, size_t, int >( handle, n, a, ja, - ia, false ); + ia, false, numa ); } sparse_err_t sparse_cg_init_nop_diz( sparse_cg_handle_t * const handle, const size_t n, - const double * const a, const int * const ja, const size_t * const ia + const double * const a, const int * const ja, const size_t * const ia, + const bool numa ) { return sparse_cg_init_impl_no_buffer< double, size_t, int >( handle, n, a, ja, - ia, false ); + ia, false, numa ); } sparse_err_t sparse_cg_init_nop_szz( sparse_cg_handle_t * const handle, const size_t n, - const float * const a, const size_t * const ja, const size_t * const ia + const float * const a, const size_t * const ja, const size_t * const ia, + const bool numa ) { return sparse_cg_init_impl_no_buffer< float, size_t, size_t >( handle, n, a, ja, - ia, false ); + ia, false, numa ); } sparse_err_t sparse_cg_init_nop_dzz( sparse_cg_handle_t * const handle, const size_t n, - const double * const a, const size_t * const ja, const size_t * const ia + const double * const a, const size_t * const ja, const size_t * const ia, + const bool numa ) { return sparse_cg_init_impl_no_buffer< double, size_t, size_t >( handle, n, a, ja, - ia, false ); + ia, false, numa ); } sparse_err_t sparse_cg_init_sii( sparse_cg_handle_t * const handle, const size_t n, - const float * const a, const int * const ja, const int * const ia + const float * const a, const int * const ja, const int * const ia, + const bool numa ) { return sparse_cg_init_impl_no_buffer< float, int, int >( handle, n, a, ja, ia, - true ); + true, numa ); } sparse_err_t sparse_cg_init_dii( sparse_cg_handle_t * const handle, const size_t n, - const double * const a, const int * const ja, const int * const ia + const double * const a, const int * const ja, const int * const ia, + const bool numa ) { return sparse_cg_init_impl_no_buffer< double, int, int >( handle, n, a, ja, ia, - true ); + true, numa ); } sparse_err_t sparse_cg_init_siz( sparse_cg_handle_t * const handle, const size_t n, - const float * const a, const int * const ja, const size_t * const ia + const float * const a, const int * const ja, const size_t * const ia, + const bool numa ) { return sparse_cg_init_impl_no_buffer< float, size_t, int >( handle, n, a, ja, - ia, true ); + ia, true, numa ); } sparse_err_t sparse_cg_init_diz( sparse_cg_handle_t * const handle, const size_t n, - const double * const a, const int * const ja, const size_t * const ia + const double * const a, const int * const ja, const size_t * const ia, + const bool numa ) { return sparse_cg_init_impl_no_buffer< double, size_t, int >( handle, n, a, ja, - ia, true ); + ia, true, numa ); } sparse_err_t sparse_cg_init_szz( sparse_cg_handle_t * const handle, const size_t n, - const float * const a, const size_t * const ja, const size_t * const ia + const float * const a, const size_t * const ja, const size_t * const ia, + const bool numa ) { return sparse_cg_init_impl_no_buffer< float, size_t, size_t >( handle, n, a, ja, - ia, true ); + ia, true, numa ); } sparse_err_t sparse_cg_init_dzz( sparse_cg_handle_t * const handle, const size_t n, - const double * const a, const size_t * const ja, const size_t * const ia + const double * const a, const size_t * const ja, const size_t * const ia, + const bool numa ) { return sparse_cg_init_impl_no_buffer< double, size_t, size_t >( handle, n, a, ja, - ia, true ); + ia, true, numa ); } template< typename T, typename NZI, typename RSI > From 8706b69208c7a7b1de289a4c7c0221476813aa27 Mon Sep 17 00:00:00 2001 From: "Albert-Jan N. Yzelman" Date: Tue, 15 Jul 2025 10:44:48 +0200 Subject: [PATCH 08/22] Test false sharing effects --- src/transition/solver.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/transition/solver.cpp b/src/transition/solver.cpp index a976bceb7..314b67cff 100644 --- a/src/transition/solver.cpp +++ b/src/transition/solver.cpp @@ -237,7 +237,6 @@ class CG_Data { * @param[in] buffer_size The size of the given \a buffer. Used for sanity * checking the use of the buffer. * - * * @param[in] buffer_deleter How the given \a buffer should be deleted on * destruction of this instance. * @@ -261,6 +260,9 @@ class CG_Data { assert( a != nullptr ); assert( ja != nullptr ); assert( ia != nullptr ); + constexpr size_t align = (64 % sizeof(int) == 0) + ? 64 + : (64 + (sizeof(int) - (64 % sizeof(int)))); if( buffer_size < workspaceSize( n, false ) ) { throw std::invalid_argument( "The given buffer size is too small" ); } @@ -275,7 +277,7 @@ class CG_Data { } workspace_ptr += n * sizeof( T ); workspace_ptr += - (sizeof(int) - (reinterpret_cast(workspace_ptr) % sizeof(int))); + (align - (reinterpret_cast(workspace_ptr) % align)); assert( static_cast< char * >(buffer) + buffer_size >= workspace_ptr + n ); { T * const workspace_vector = reinterpret_cast< T * >(workspace_ptr); @@ -284,7 +286,7 @@ class CG_Data { } workspace_ptr += n * sizeof( T ); workspace_ptr += - (sizeof(int) - (reinterpret_cast(workspace_ptr) % sizeof(int))); + (align - (reinterpret_cast(workspace_ptr) % align)); assert( static_cast< char * >(buffer) + buffer_size >= workspace_ptr + n ); { T * const workspace_vector = reinterpret_cast< T * >(workspace_ptr); @@ -294,7 +296,7 @@ class CG_Data { if( buffer_size >= workspaceSize( n, true ) ) { workspace_ptr += n * sizeof( T ); workspace_ptr += - (sizeof(int) - (reinterpret_cast(workspace_ptr) % sizeof(int))); + (align - (reinterpret_cast(workspace_ptr) % align)); assert( static_cast< char * >(buffer) + buffer_size >= workspace_ptr + n ); T * const workspace_vector = reinterpret_cast< T * >(workspace_ptr); grb::Vector< T > tmp = grb::internal::wrapRawVector( n, workspace_vector ); @@ -440,7 +442,7 @@ static sparse_err_t sparse_cg_init_impl_no_buffer( const bool support_preconditioning, const bool numa ) { const size_t allocSize = CG_Data< T, NZI, RSI >:: - workspaceSize( n, support_preconditioning ); + workspaceSize( n, support_preconditioning ) + 256; // TODO hide this const void * buffer = nullptr; #ifdef _GRB_NO_LIBNUMA if( numa ) { From a9224f26b2f025ecfed6b12d8339e76928d3ee73 Mon Sep 17 00:00:00 2001 From: "Albert-Jan N. Yzelman" Date: Wed, 23 Jul 2025 14:28:55 +0200 Subject: [PATCH 09/22] Going to take that last suggestion from the MR -- modify header file accordingly --- include/transition/solver.h | 154 +++++++++++++----------------------- 1 file changed, 56 insertions(+), 98 deletions(-) diff --git a/include/transition/solver.h b/include/transition/solver.h index e61628b1e..95e1a1743 100644 --- a/include/transition/solver.h +++ b/include/transition/solver.h @@ -230,10 +230,22 @@ typedef int (*sparse_cg_preconditioner_dxx_t) ( * @param[in] a The nonzero values of the system matrix. * @param[in] ja The column indices of the nonzeroes of the system matrix. * @param[in] ia The row offset array of the system matrix. - * @param[in] numa Whether the CG handle should employ more than one NUMA - * domain, in which case any memory allocated during - * construction of this handle will attempt to employ NUMA- - * aware allocation. + * @param[in] precon Whether the CG handle is expected to execute solves with + * preconditioning. + * @param[in] numa Whether the CG handle is expected to execute solves that + * employ more than one NUMA domain, in which case any memory + * allocated during construction of this handle may attempt + * to employ NUMA-aware allocation. + * + * The Boolean flags \a precon and \a numa indicated above are \em hints to the + * underlying ALP implementation. + * + * \note This means that even if \a precon was set to false during + * handle creation, a user may still request preconditioning and expect + * no error. Similarly, even if \a numa was set true during + * handle creation, the underlying ALP implementation may still opt to + * \em not employ NUMA-aware allocation, for example, if it was configured + * without libnuma support. * * This variant is for single-precision floating point nonzeroes and integer * \a ja and \a ia arrays, as also indicated by the sii postfix. @@ -254,30 +266,21 @@ typedef int (*sparse_cg_preconditioner_dxx_t) ( * successfully. Only in this case shall \a handle * henceforth be a \em valid handle. */ -sparse_err_t sparse_cg_init_sii( +sparse_err_t sparse_cg_init_opt_sii( sparse_cg_handle_t * const handle, const size_t n, const float * const a, const int * const ja, const int * const ia, - const bool numa + const bool precon, const bool numa ); /** - * Variant of #sparse_cg_init_sii that results in a #sparse_cg_handle_t that - * does not support preconditioned solves. Handles constructed using this call - * require less memory. - * - * To have a handle returned by this function be used for a preconditioned solve - * anyway, the handle must first be destroyed and then re-created using a call - * to #sparse_cg_init_sii. + * Variant of #sparse_cg_init_sii that results in a #sparse_cg_handle_t with + * default hints (with preconditioning and with NUMA). * - * \note Rationale: the transition path API opted to have this feature - * implemented via a separate constructor call to make the fact that - * resulting handles cannot be used for preconditioned solves as explicit - * as possible. + * @see sparse_cg_init_opt_sii for full details. */ -sparse_err_t sparse_cg_init_nop_sii( +sparse_err_t sparse_cg_init_sii( sparse_cg_handle_t * const handle, const size_t n, - const float * const a, const int * const ja, const int * const ia, - const bool numa + const float * const a, const int * const ja, const int * const ia ); /** @@ -288,30 +291,21 @@ sparse_err_t sparse_cg_init_nop_sii( * * @see #sparse_cg_init_sii for full documentation. */ -sparse_err_t sparse_cg_init_dii( +sparse_err_t sparse_cg_init_opt_dii( sparse_cg_handle_t * const handle, const size_t n, const double * const a, const int * const ja, const int * const ia, - const bool numa + const bool precon, const bool numa ); /** - * Variant of #sparse_cg_init_dii that results in a #sparse_cg_handle_t that - * does not support preconditioned solves. Handles constructed using this call - * require less memory. - * - * To have a handle returned by this function be used for a preconditioned solve - * anyway, the handle must first be destroyed and then re-created using a call - * to #sparse_cg_init_dii. + * Variant of #sparse_cg_init_dii that results in a #sparse_cg_handle_t with + * default hints (with preconditioning and with NUMA). * - * \note Rationale: the transition path API opted to have this feature - * implemented via a separate constructor call to make the fact that - * resulting handles cannot be used for preconditioned solves as explicit - * as possible. + * @see sparse_cg_init_opt_dii for full details. */ -sparse_err_t sparse_cg_init_nop_dii( +sparse_err_t sparse_cg_init_dii( sparse_cg_handle_t * const handle, const size_t n, - const double * const a, const int * const ja, const int * const ia, - const bool numa + const double * const a, const int * const ja, const int * const ia ); /** @@ -323,30 +317,21 @@ sparse_err_t sparse_cg_init_nop_dii( * * @see #sparse_cg_init_sii for full documentation. */ -sparse_err_t sparse_cg_init_siz( +sparse_err_t sparse_cg_init_opt_siz( sparse_cg_handle_t * const handle, const size_t n, const float * const a, const int * const ja, const size_t * const ia, - const bool numa + const bool precon, const bool numa ); /** - * Variant of #sparse_cg_init_siz that results in a #sparse_cg_handle_t that - * does not support preconditioned solves. Handles constructed using this call - * require less memory. + * Variant of #sparse_cg_init_siz that results in a #sparse_cg_handle_t with + * default hints (with preconditioning and with NUMA). * - * To have a handle returned by this function be used for a preconditioned solve - * anyway, the handle must first be destroyed and then re-created using a call - * to #sparse_cg_init_siz. - * - * \note Rationale: the transition path API opted to have this feature - * implemented via a separate constructor call to make the fact that - * resulting handles cannot be used for preconditioned solves as explicit - * as possible. + * @see sparse_cg_init_opt_siz for full details. */ -sparse_err_t sparse_cg_init_nop_siz( +sparse_err_t sparse_cg_init_siz( sparse_cg_handle_t * const handle, const size_t n, - const float * const a, const int * const ja, const size_t * const ia, - const bool numa + const float * const a, const int * const ja, const size_t * const ia ); /** @@ -358,30 +343,21 @@ sparse_err_t sparse_cg_init_nop_siz( * * @see #sparse_cg_init_sii for full documentation. */ -sparse_err_t sparse_cg_init_diz( +sparse_err_t sparse_cg_init_opt_diz( sparse_cg_handle_t * const handle, const size_t n, const double * const a, const int * const ja, const size_t * const ia, - const bool numa + const bool precon, const bool numa ); /** - * Variant of #sparse_cg_init_diz that results in a #sparse_cg_handle_t that - * does not support preconditioned solves. Handles constructed using this call - * require less memory. - * - * To have a handle returned by this function be used for a preconditioned solve - * anyway, the handle must first be destroyed and then re-created using a call - * to #sparse_cg_init_diz. + * Variant of #sparse_cg_init_diz that results in a #sparse_cg_handle_t with + * default hints (with preconditioning and with NUMA). * - * \note Rationale: the transition path API opted to have this feature - * implemented via a separate constructor call to make the fact that - * resulting handles cannot be used for preconditioned solves as explicit - * as possible. + * @see sparse_cg_init_opt_diz for full details. */ -sparse_err_t sparse_cg_init_nop_diz( +sparse_err_t sparse_cg_init_diz( sparse_cg_handle_t * const handle, const size_t n, - const double * const a, const int * const ja, const size_t * const ia, - const bool numa + const double * const a, const int * const ja, const size_t * const ia ); /** @@ -396,27 +372,18 @@ sparse_err_t sparse_cg_init_nop_diz( sparse_err_t sparse_cg_init_szz( sparse_cg_handle_t * const handle, const size_t n, const float * const a, const size_t * const ja, const size_t * const ia, - const bool numa + const bool precon, const bool numa ); /** - * Variant of #sparse_cg_init_szz that results in a #sparse_cg_handle_t that - * does not support preconditioned solves. Handles constructed using this call - * require less memory. + * Variant of #sparse_cg_init_szz that results in a #sparse_cg_handle_t with + * default hints (with preconditioning and with NUMA). * - * To have a handle returned by this function be used for a preconditioned solve - * anyway, the handle must first be destroyed and then re-created using a call - * to #sparse_cg_init_szz. - * - * \note Rationale: the transition path API opted to have this feature - * implemented via a separate constructor call to make the fact that - * resulting handles cannot be used for preconditioned solves as explicit - * as possible. + * @see sparse_cg_init_opt_szz for full details. */ -sparse_err_t sparse_cg_init_nop_szz( +sparse_err_t sparse_cg_init_szz( sparse_cg_handle_t * const handle, const size_t n, - const float * const a, const size_t * const ja, const size_t * const ia, - const bool numa + const float * const a, const size_t * const ja, const size_t * const ia ); /** @@ -428,30 +395,21 @@ sparse_err_t sparse_cg_init_nop_szz( * * @see #sparse_cg_init_sii for full documentation. */ -sparse_err_t sparse_cg_init_dzz( +sparse_err_t sparse_cg_init_opt_dzz( sparse_cg_handle_t * const handle, const size_t n, const double * const a, const size_t * const ja, const size_t * const ia, - const bool numa + const bool precon, const bool numa ); /** - * Variant of #sparse_cg_init_dzz that results in a #sparse_cg_handle_t that - * does not support preconditioned solves. Handles constructed using this call - * require less memory. + * Variant of #sparse_cg_init_dzz that results in a #sparse_cg_handle_t with + * default hints (with preconditioning and with NUMA). * - * To have a handle returned by this function be used for a preconditioned solve - * anyway, the handle must first be destroyed and then re-created using a call - * to #sparse_cg_init_dzz. - * - * \note Rationale: the transition path API opted to have this feature - * implemented via a separate constructor call to make the fact that - * resulting handles cannot be used for preconditioned solves as explicit - * as possible. + * @see sparse_cg_init_opt_dzz for full details. */ sparse_err_t sparse_cg_init_nop_dzz( sparse_cg_handle_t * const handle, const size_t n, - const double * const a, const size_t * const ja, const size_t * const ia, - const bool numa + const double * const a, const size_t * const ja, const size_t * const ia ); // Note that szi and dzi are skipped on purpose. Such variants would not seem From b7caeb3adf0b1430482160b8f3e06eff1f7529d8 Mon Sep 17 00:00:00 2001 From: "Albert-Jan N. Yzelman" Date: Wed, 23 Jul 2025 14:54:28 +0200 Subject: [PATCH 10/22] Corresponding cpp changes to previous commit --- src/transition/solver.cpp | 66 ++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 36 deletions(-) diff --git a/src/transition/solver.cpp b/src/transition/solver.cpp index 314b67cff..c1441138b 100644 --- a/src/transition/solver.cpp +++ b/src/transition/solver.cpp @@ -474,112 +474,106 @@ static sparse_err_t sparse_cg_init_impl_no_buffer( return rc; } -sparse_err_t sparse_cg_init_nop_sii( +sparse_err_t sparse_cg_init_opt_sii( sparse_cg_handle_t * const handle, const size_t n, const float * const a, const int * const ja, const int * const ia, - const bool numa + const bool precond, const bool numa ) { return sparse_cg_init_impl_no_buffer< float, int, int >( handle, n, a, ja, ia, - false, numa ); + precond, numa ); } -sparse_err_t sparse_cg_init_nop_dii( +sparse_err_t sparse_cg_init_opt_dii( sparse_cg_handle_t * const handle, const size_t n, const double * const a, const int * const ja, const int * const ia, - const bool numa + const bool precond, const bool numa ) { return sparse_cg_init_impl_no_buffer< double, int, int >( handle, n, a, ja, ia, - false, numa ); + precond, numa ); } -sparse_err_t sparse_cg_init_nop_siz( +sparse_err_t sparse_cg_init_opt_siz( sparse_cg_handle_t * const handle, const size_t n, const float * const a, const int * const ja, const size_t * const ia, - const bool numa + const bool precond, const bool numa ) { return sparse_cg_init_impl_no_buffer< float, size_t, int >( handle, n, a, ja, - ia, false, numa ); + ia, precond, numa ); } -sparse_err_t sparse_cg_init_nop_diz( +sparse_err_t sparse_cg_init_opt_diz( sparse_cg_handle_t * const handle, const size_t n, const double * const a, const int * const ja, const size_t * const ia, - const bool numa + const bool precond, const bool numa ) { return sparse_cg_init_impl_no_buffer< double, size_t, int >( handle, n, a, ja, - ia, false, numa ); + ia, precond, numa ); } -sparse_err_t sparse_cg_init_nop_szz( +sparse_err_t sparse_cg_init_opt_szz( sparse_cg_handle_t * const handle, const size_t n, const float * const a, const size_t * const ja, const size_t * const ia, - const bool numa + const bool precond, const bool numa ) { return sparse_cg_init_impl_no_buffer< float, size_t, size_t >( handle, n, a, ja, - ia, false, numa ); + ia, precond, numa ); } -sparse_err_t sparse_cg_init_nop_dzz( +sparse_err_t sparse_cg_init_opt_dzz( sparse_cg_handle_t * const handle, const size_t n, const double * const a, const size_t * const ja, const size_t * const ia, - const bool numa + const bool precond, const bool numa ) { return sparse_cg_init_impl_no_buffer< double, size_t, size_t >( handle, n, a, ja, - ia, false, numa ); + ia, precond, numa ); } sparse_err_t sparse_cg_init_sii( sparse_cg_handle_t * const handle, const size_t n, - const float * const a, const int * const ja, const int * const ia, - const bool numa + const float * const a, const int * const ja, const int * const ia ) { return sparse_cg_init_impl_no_buffer< float, int, int >( handle, n, a, ja, ia, - true, numa ); + true, true ); } sparse_err_t sparse_cg_init_dii( sparse_cg_handle_t * const handle, const size_t n, - const double * const a, const int * const ja, const int * const ia, - const bool numa + const double * const a, const int * const ja, const int * const ia ) { return sparse_cg_init_impl_no_buffer< double, int, int >( handle, n, a, ja, ia, - true, numa ); + true, true ); } sparse_err_t sparse_cg_init_siz( sparse_cg_handle_t * const handle, const size_t n, - const float * const a, const int * const ja, const size_t * const ia, - const bool numa + const float * const a, const int * const ja, const size_t * const ia ) { return sparse_cg_init_impl_no_buffer< float, size_t, int >( handle, n, a, ja, - ia, true, numa ); + ia, true, true ); } sparse_err_t sparse_cg_init_diz( sparse_cg_handle_t * const handle, const size_t n, - const double * const a, const int * const ja, const size_t * const ia, - const bool numa + const double * const a, const int * const ja, const size_t * const ia ) { return sparse_cg_init_impl_no_buffer< double, size_t, int >( handle, n, a, ja, - ia, true, numa ); + ia, true, true ); } sparse_err_t sparse_cg_init_szz( sparse_cg_handle_t * const handle, const size_t n, - const float * const a, const size_t * const ja, const size_t * const ia, - const bool numa + const float * const a, const size_t * const ja, const size_t * const ia ) { return sparse_cg_init_impl_no_buffer< float, size_t, size_t >( handle, n, a, ja, - ia, true, numa ); + ia, true, true ); } sparse_err_t sparse_cg_init_dzz( sparse_cg_handle_t * const handle, const size_t n, - const double * const a, const size_t * const ja, const size_t * const ia, - const bool numa + const double * const a, const size_t * const ja, const size_t * const ia ) { return sparse_cg_init_impl_no_buffer< double, size_t, size_t >( handle, n, a, ja, - ia, true, numa ); + ia, true, true ); } template< typename T, typename NZI, typename RSI > From c3efc055b1e00244e31de768f5ab57af17f9e81e Mon Sep 17 00:00:00 2001 From: "Albert-Jan N. Yzelman" Date: Wed, 23 Jul 2025 14:55:43 +0200 Subject: [PATCH 11/22] Bugfix --- include/transition/solver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/transition/solver.h b/include/transition/solver.h index 95e1a1743..1291005f4 100644 --- a/include/transition/solver.h +++ b/include/transition/solver.h @@ -369,7 +369,7 @@ sparse_err_t sparse_cg_init_diz( * * @see #sparse_cg_init_sii for full documentation. */ -sparse_err_t sparse_cg_init_szz( +sparse_err_t sparse_cg_init_opt_szz( sparse_cg_handle_t * const handle, const size_t n, const float * const a, const size_t * const ja, const size_t * const ia, const bool precon, const bool numa From e9f9e4afb6a9ec4cfbf9350d2dd3ffd08e5d2031 Mon Sep 17 00:00:00 2001 From: "Albert-Jan N. Yzelman" Date: Wed, 23 Jul 2025 14:56:23 +0200 Subject: [PATCH 12/22] Update KML_Solver with those changes --- src/transition/kml_iss.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transition/kml_iss.cpp b/src/transition/kml_iss.cpp index d507587ea..b348b4d85 100644 --- a/src/transition/kml_iss.cpp +++ b/src/transition/kml_iss.cpp @@ -150,7 +150,7 @@ int KML_CG_PREFIXED( InitSI )( ) { // negative numbers become positive when cast to size_t if( n <= 0 ) { return KMLSS_BAD_N; } - sparse_err_t err = sparse_cg_init_sii( handle, n, a, ja, ia, true ); + sparse_err_t err = sparse_cg_init_sii( handle, n, a, ja, ia ); return sparse_err_t_2_int( err ); } @@ -160,7 +160,7 @@ int KML_CG_PREFIXED( InitDI )( ) { // negative numbers become positive when casted to size_t if( n <= 0 ) { return KMLSS_BAD_N; } - sparse_err_t err = sparse_cg_init_dii( handle, n, a, ja, ia, true ); + sparse_err_t err = sparse_cg_init_dii( handle, n, a, ja, ia ); return sparse_err_t_2_int( err ); } From 612cc5b5e3f4ed33423b22784af01465a33f9eae Mon Sep 17 00:00:00 2001 From: "Albert-Jan N. Yzelman" Date: Wed, 23 Jul 2025 15:02:26 +0200 Subject: [PATCH 13/22] Restore default behaviour --- src/transition/solver.cpp | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/transition/solver.cpp b/src/transition/solver.cpp index c1441138b..b72a64554 100644 --- a/src/transition/solver.cpp +++ b/src/transition/solver.cpp @@ -367,12 +367,20 @@ class CG_Data { * allocation. */ void setPreconditioner( const preconditioner_t in, void * const data ) { - if( grb::size( precond_workspace ) == 0 ) { - throw std::logic_error( "CG handle has no preconditioned solve support" ); - } preconditioner = in; preconditioner_data = data; assert( !( !preconditioner && preconditioner_data ) ); + if( grb::size( precond_workspace ) == 0 ) { + // note -- using the ALP default vector allocation here is the only + // convenient option: otherwise has to implement buffer management + // for this vector only. If it is important that this vector be allocated + // without a SPA, then the user should set the precond hint to true. + grb::Vector< T > replace( size ); + std::swap( replace, precond_workspace ); + std::cerr << "Warning: allocating additional workspace to handle CG solves " + << "with preconditioning. To prevent this on-demand allocation, set the " + << "precond option during CG solver handle creation to true.\n"; + } assert( grb::size( precond_workspace ) == size ); } @@ -886,9 +894,6 @@ static sparse_err_t sparse_cg_set_preconditioner_impl( try { static_cast< CG_Data< T, NZI, RSI > * >( handle )-> setPreconditioner( c_precond_p, c_precond_data_p ); - } catch( std::logic_error &e ) { - std::cerr << e.what() << "\n"; - return ILLEGAL_METHOD; } catch( std::exception &e ) { std::cerr << e.what() << "\n"; std::cerr << "This is an unexpected error; please submit a bug report\n"; From e3436449c35ce745bcf1a55a0a9dfdd482cfae37 Mon Sep 17 00:00:00 2001 From: "Albert-Jan N. Yzelman" Date: Wed, 23 Jul 2025 15:02:55 +0200 Subject: [PATCH 14/22] Remove now-again unnecesary error code --- include/transition/solver.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/include/transition/solver.h b/include/transition/solver.h index 1291005f4..a45ba8aa1 100644 --- a/include/transition/solver.h +++ b/include/transition/solver.h @@ -132,11 +132,6 @@ typedef enum { */ ILLEGAL_ARGUMENT, - /** - * Illegal method was called. - */ - ILLEGAL_METHOD, - /** * Out of memory error detected during call. */ From cf33c8a5c8bc071ba93d95bb6a45e28bf4cbc555 Mon Sep 17 00:00:00 2001 From: "Albert-Jan N. Yzelman" Date: Wed, 23 Jul 2025 15:03:53 +0200 Subject: [PATCH 15/22] Bring KML_Solver impl up to date with previous change --- src/transition/kml_iss.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/transition/kml_iss.cpp b/src/transition/kml_iss.cpp index b348b4d85..5c0fa2d0e 100644 --- a/src/transition/kml_iss.cpp +++ b/src/transition/kml_iss.cpp @@ -116,14 +116,6 @@ static int sparse_err_t_2_int( const sparse_err_t err ) { result = KMLSS_BAD_N; break; } - case ILLEGAL_METHOD: - { - // there is no case where this error should be exposed to a KML API user; - // rather, if encountered, it indicates an error in translating the KML API - // to the solver transition path - result = KMLSS_INTERNAL_ERROR; - break; - } case OUT_OF_MEMORY: { result = KMLSS_NO_MEMORY; From 2ff5b7097b4d4a04e0a10e0b38e79890ba375493 Mon Sep 17 00:00:00 2001 From: "Albert-Jan N. Yzelman" Date: Wed, 23 Jul 2025 18:35:42 +0200 Subject: [PATCH 16/22] Specify constructor variants that take a user-supplied buffer instead of allocating the required workspace automatically --- include/transition/solver.h | 177 ++++++++++++++++++++++++++++++++++-- 1 file changed, 170 insertions(+), 7 deletions(-) diff --git a/include/transition/solver.h b/include/transition/solver.h index a45ba8aa1..3bc75a636 100644 --- a/include/transition/solver.h +++ b/include/transition/solver.h @@ -260,6 +260,13 @@ typedef int (*sparse_cg_preconditioner_dxx_t) ( * @returns #NO_ERROR If initialisation of the handle proceeded * successfully. Only in this case shall \a handle * henceforth be a \em valid handle. + * + * On returning #NO_ERROR, \a handle shall correspond to an initialised (P)CG + * solver instance. Note that this handle contains workspace memory that are + * necessary for auxiliary vectors required by the CG algorithm. When + * preconditioning is employed, four such workspace vectors of size \a n are + * required; otherwise, only three such vectors are required. This memory will + * be freed on a call to #sparse_cg_destroy_sii. */ sparse_err_t sparse_cg_init_opt_sii( sparse_cg_handle_t * const handle, const size_t n, @@ -268,7 +275,7 @@ sparse_err_t sparse_cg_init_opt_sii( ); /** - * Variant of #sparse_cg_init_sii that results in a #sparse_cg_handle_t with + * Variant of #sparse_cg_init_opt_sii that results in a #sparse_cg_handle_t with * default hints (with preconditioning and with NUMA). * * @see sparse_cg_init_opt_sii for full details. @@ -278,6 +285,56 @@ sparse_err_t sparse_cg_init_sii( const float * const a, const int * const ja, const int * const ia ); +/** + * Variant of #sparse_cg_init_opt_sii that does not allocate workspace, but + * instead employs given user memory as its workspace memory. + * + * This variant of initialisation guarantees no additional dynamic memory + * allocation will be performed. A call to #sparse_cg_destroy_sii will likewise + * \em not free up the given \a workspace memory. + * + * @param[in,out] workspace The user-supplied memory that will be used as the + * work space for the returned PCG solver \a handle. + * + * Previous contents of the given \a workspace will be ignored, and, when given + * to this function, should be considered lost. On a successful call, the given + * memory should be considered to be under the sole custody of the PCG solver + * \a handle until the time the handle is destroyed; at that point, the + * ownership of the given memory will be returned to the callee. If the call to + * this function is not successful, the ownership of the memory region returns + * to the callee immediately. For as long as ownership of the memory region is + * not with the callee, the contents of the memory region are undefined. + * + * @param[in] workspace_size The size of the memory pointed to by \a workspace. + * + * The given workspace must have byte size greater than or equal to the value + * returned by a call to #sparse_cg_workspace_size_s. + * + * \note For variants that use double-precision values, the workspace must have + * byte size greater than or equal to the value returned by a call to + * #sparse_cg_workspace_size_d. + * + * \note Note that the hints that can be supplied to #sparse_cg_init_opt_sii + * affect its internal workspace allocation mechanisms. Therefore, those + * hints do not apply to this PCG solver initialisation variant. + * + * In addition to the possible return codes defined by #sparse_cg_init_opt_sii, + * a call to this variant may furthermore return: + * + * @returns #NULL_ARGUMENT If \a workspace equals NULL. + * @returns #ILLEGAL_ARGUMENT If \a workspace_size is smaller than required. + * + * Different from the specification of #sparse_cg_init_opt_sii, this variant + * may never return #OUT_OF_MEMORY. + * + * For further details, see #sparse_cg_init_opt_sii. + */ +sparse_err_t sparse_cg_manual_init_sii( + sparse_cg_handle_t * const handle, const size_t n, + const float * const a, const int * const ja, const int * const ia, + const void * workspace, const size_t workspace_size +); + /** * Initialises a #sparse_cg_handle_t object. * @@ -293,7 +350,7 @@ sparse_err_t sparse_cg_init_opt_dii( ); /** - * Variant of #sparse_cg_init_dii that results in a #sparse_cg_handle_t with + * Variant of #sparse_cg_init_opt_dii that results in a #sparse_cg_handle_t with * default hints (with preconditioning and with NUMA). * * @see sparse_cg_init_opt_dii for full details. @@ -303,6 +360,18 @@ sparse_err_t sparse_cg_init_dii( const double * const a, const int * const ja, const int * const ia ); +/** + * Variant of #sparse_cg_init_opt_dii that does not allocate workspace, but + * instead employs given user memory as its workspace memory. + * + * @see #sparse_cg_manual_init_sii for full details. + */ +sparse_err_t sparse_cg_manual_init_dii( + sparse_cg_handle_t * const handle, const size_t n, + const float * const a, const int * const ja, const int * const ia, + const void * workspace, const size_t workspace_size +); + /** * Initialises a #sparse_cg_handle_t object. * @@ -319,7 +388,7 @@ sparse_err_t sparse_cg_init_opt_siz( ); /** - * Variant of #sparse_cg_init_siz that results in a #sparse_cg_handle_t with + * Variant of #sparse_cg_init_opt_siz that results in a #sparse_cg_handle_t with * default hints (with preconditioning and with NUMA). * * @see sparse_cg_init_opt_siz for full details. @@ -329,6 +398,18 @@ sparse_err_t sparse_cg_init_siz( const float * const a, const int * const ja, const size_t * const ia ); +/** + * Variant of #sparse_cg_init_opt_siz that does not allocate workspace, but + * instead employs given user memory as its workspace memory. + * + * @see #sparse_cg_manual_init_sii for full details. + */ +sparse_err_t sparse_cg_manual_init_siz( + sparse_cg_handle_t * const handle, const size_t n, + const float * const a, const int * const ja, const int * const ia, + const void * workspace, const size_t workspace_size +); + /** * Initialises a #sparse_cg_handle_t object. * @@ -345,7 +426,7 @@ sparse_err_t sparse_cg_init_opt_diz( ); /** - * Variant of #sparse_cg_init_diz that results in a #sparse_cg_handle_t with + * Variant of #sparse_cg_init_opt_diz that results in a #sparse_cg_handle_t with * default hints (with preconditioning and with NUMA). * * @see sparse_cg_init_opt_diz for full details. @@ -355,6 +436,18 @@ sparse_err_t sparse_cg_init_diz( const double * const a, const int * const ja, const size_t * const ia ); +/** + * Variant of #sparse_cg_init_opt_siz that does not allocate workspace, but + * instead employs given user memory as its workspace memory. + * + * @see #sparse_cg_manual_init_sii for full details. + */ +sparse_err_t sparse_cg_manual_init_diz( + sparse_cg_handle_t * const handle, const size_t n, + const float * const a, const int * const ja, const int * const ia, + const void * workspace, const size_t workspace_size +); + /** * Initialises a #sparse_cg_handle_t object. * @@ -371,7 +464,7 @@ sparse_err_t sparse_cg_init_opt_szz( ); /** - * Variant of #sparse_cg_init_szz that results in a #sparse_cg_handle_t with + * Variant of #sparse_cg_init_opt_szz that results in a #sparse_cg_handle_t with * default hints (with preconditioning and with NUMA). * * @see sparse_cg_init_opt_szz for full details. @@ -381,6 +474,18 @@ sparse_err_t sparse_cg_init_szz( const float * const a, const size_t * const ja, const size_t * const ia ); +/** + * Variant of #sparse_cg_init_opt_siz that does not allocate workspace, but + * instead employs given user memory as its workspace memory. + * + * @see #sparse_cg_manual_init_sii for full details. + */ +sparse_err_t sparse_cg_manual_init_szz( + sparse_cg_handle_t * const handle, const size_t n, + const float * const a, const int * const ja, const int * const ia, + const void * workspace, const size_t workspace_size +); + /** * Initialises a #sparse_cg_handle_t object. * @@ -397,16 +502,74 @@ sparse_err_t sparse_cg_init_opt_dzz( ); /** - * Variant of #sparse_cg_init_dzz that results in a #sparse_cg_handle_t with + * Variant of #sparse_cg_init_opt_dzz that results in a #sparse_cg_handle_t with * default hints (with preconditioning and with NUMA). * * @see sparse_cg_init_opt_dzz for full details. */ -sparse_err_t sparse_cg_init_nop_dzz( +sparse_err_t sparse_cg_init_dzz( sparse_cg_handle_t * const handle, const size_t n, const double * const a, const size_t * const ja, const size_t * const ia ); +/** + * Variant of #sparse_cg_init_opt_siz that does not allocate workspace, but + * instead employs given user memory as its workspace memory. + * + * @see #sparse_cg_manual_init_sii for full details. + */ +sparse_err_t sparse_cg_manual_init_dzz( + sparse_cg_handle_t * const handle, const size_t n, + const float * const a, const int * const ja, const int * const ia, + const void * workspace, const size_t workspace_size +); + +/** + * Returns the minimum workspace size required when manually supplying a + * workspace for PCG solvers to initialise with. + * + * Those so-called manual initialisation functions are the following: + * - #sparse_cg_manual_init_sii, + * - #sparse_cg_manual_init_siz, and + * - #sparse_cg_manual_init_szz. + * + * @param[in] precon Whether the solver is expected to use a preconditioner. + * + * \note If preconditioning is required, the solver requires a larger work + * space. Note, however, that even if not enough workspace is supplied + * for a preconditioned solve, nonetheless requesting a preconditioned + * solve will still execute -- it will then dynamically allocate memory + * on the fly. + * + * A call to this function never fails. + * + * @returns The workspace size in bytes. + */ +size_t sparse_cg_workspace_size_s( const bool precon ); + +/** + * Returns the minimum workspace size required when manually supplying a + * workspace for PCG solvers to initialise with. + * + * Those so-called manual initialisation functions are the following: + * - #sparse_cg_manual_init_dii, + * - #sparse_cg_manual_init_diz, and + * - #sparse_cg_manual_init_dzz. + * + * @param[in] precon Whether the solver is expected to use a preconditioner. + * + * \note If preconditioning is required, the solver requires a larger work + * space. Note, however, that even if not enough workspace is supplied + * for a preconditioned solve, nonetheless requesting a preconditioned + * solve will still execute -- it will then dynamically allocate memory + * on the fly. + * + * A call to this function never fails. + * + * @returns The workspace size in bytes. + */ +size_t sparse_cg_workspace_size_d( const bool precon ); + // Note that szi and dzi are skipped on purpose. Such variants would not seem // sensible, though could easily be provided if they do turn out to be needed From 1ad71e3c4a9c889fcf0c0b14582959f98e6e810b Mon Sep 17 00:00:00 2001 From: "Albert-Jan N. Yzelman" Date: Wed, 23 Jul 2025 18:47:29 +0200 Subject: [PATCH 17/22] Implement manual initialisation API --- include/transition/solver.h | 12 ++++---- src/transition/solver.cpp | 56 +++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 6 deletions(-) diff --git a/include/transition/solver.h b/include/transition/solver.h index 3bc75a636..968054f94 100644 --- a/include/transition/solver.h +++ b/include/transition/solver.h @@ -332,7 +332,7 @@ sparse_err_t sparse_cg_init_sii( sparse_err_t sparse_cg_manual_init_sii( sparse_cg_handle_t * const handle, const size_t n, const float * const a, const int * const ja, const int * const ia, - const void * workspace, const size_t workspace_size + void * workspace, const size_t workspace_size ); /** @@ -369,7 +369,7 @@ sparse_err_t sparse_cg_init_dii( sparse_err_t sparse_cg_manual_init_dii( sparse_cg_handle_t * const handle, const size_t n, const float * const a, const int * const ja, const int * const ia, - const void * workspace, const size_t workspace_size + void * workspace, const size_t workspace_size ); /** @@ -407,7 +407,7 @@ sparse_err_t sparse_cg_init_siz( sparse_err_t sparse_cg_manual_init_siz( sparse_cg_handle_t * const handle, const size_t n, const float * const a, const int * const ja, const int * const ia, - const void * workspace, const size_t workspace_size + void * workspace, const size_t workspace_size ); /** @@ -445,7 +445,7 @@ sparse_err_t sparse_cg_init_diz( sparse_err_t sparse_cg_manual_init_diz( sparse_cg_handle_t * const handle, const size_t n, const float * const a, const int * const ja, const int * const ia, - const void * workspace, const size_t workspace_size + void * workspace, const size_t workspace_size ); /** @@ -483,7 +483,7 @@ sparse_err_t sparse_cg_init_szz( sparse_err_t sparse_cg_manual_init_szz( sparse_cg_handle_t * const handle, const size_t n, const float * const a, const int * const ja, const int * const ia, - const void * workspace, const size_t workspace_size + void * workspace, const size_t workspace_size ); /** @@ -521,7 +521,7 @@ sparse_err_t sparse_cg_init_dzz( sparse_err_t sparse_cg_manual_init_dzz( sparse_cg_handle_t * const handle, const size_t n, const float * const a, const int * const ja, const int * const ia, - const void * workspace, const size_t workspace_size + void * workspace, const size_t workspace_size ); /** diff --git a/src/transition/solver.cpp b/src/transition/solver.cpp index b72a64554..9b1fc9860 100644 --- a/src/transition/solver.cpp +++ b/src/transition/solver.cpp @@ -431,6 +431,8 @@ static sparse_err_t sparse_cg_init_impl( if( handle == nullptr || a == nullptr || ja == nullptr || ia == nullptr ) { return NULL_ARGUMENT; } + if( buffer == nullptr ) { return NULL_ARGUMENT; } + // TODO bounds-check bufferSize try { *handle = static_cast< void * >( new CG_Data< T, NZI, RSI >( n, a, ja, ia, buffer, bufferSize, deleter ) ); @@ -482,6 +484,60 @@ static sparse_err_t sparse_cg_init_impl_no_buffer( return rc; } +sparse_err_t sparse_cg_manual_init_sii( + sparse_cg_handle_t * const handle, const size_t n, + const float * const a, const int * const ja, const int * const ia, + void * workspace, const size_t workspace_size +) { + return sparse_cg_init_impl< float, int, int >( handle, n, a, ja, ia, + workspace, workspace_size, CGWorkspaceDeleter( false, false ) ); +} + +sparse_err_t sparse_cg_manual_init_dii( + sparse_cg_handle_t * const handle, const size_t n, + const double * const a, const int * const ja, const int * const ia, + void * workspace, const size_t workspace_size +) { + return sparse_cg_init_impl< double, int, int >( handle, n, a, ja, ia, + workspace, workspace_size, CGWorkspaceDeleter( false, false ) ); +} + +sparse_err_t sparse_cg_manual_init_siz( + sparse_cg_handle_t * const handle, const size_t n, + const float * const a, const int * const ja, const size_t * const ia, + void * workspace, const size_t workspace_size +) { + return sparse_cg_init_impl< float, size_t, int >( handle, n, a, ja, ia, + workspace, workspace_size, CGWorkspaceDeleter( false, false ) ); +} + +sparse_err_t sparse_cg_manual_init_diz( + sparse_cg_handle_t * const handle, const size_t n, + const double * const a, const int * const ja, const size_t * const ia, + void * workspace, const size_t workspace_size +) { + return sparse_cg_init_impl< double, size_t, int >( handle, n, a, ja, ia, + workspace, workspace_size, CGWorkspaceDeleter( false, false ) ); +} + +sparse_err_t sparse_cg_manual_init_szz( + sparse_cg_handle_t * const handle, const size_t n, + const float * const a, const size_t * const ja, const size_t * const ia, + void * workspace, const size_t workspace_size +) { + return sparse_cg_init_impl< float, size_t, size_t >( handle, n, a, ja, ia, + workspace, workspace_size, CGWorkspaceDeleter( false, false ) ); +} + +sparse_err_t sparse_cg_manual_init_dzz( + sparse_cg_handle_t * const handle, const size_t n, + const double * const a, const size_t * const ja, const size_t * const ia, + void * workspace, const size_t workspace_size +) { + return sparse_cg_init_impl< double, size_t, size_t >( handle, n, a, ja, ia, + workspace, workspace_size, CGWorkspaceDeleter( false, false ) ); +} + sparse_err_t sparse_cg_init_opt_sii( sparse_cg_handle_t * const handle, const size_t n, const float * const a, const int * const ja, const int * const ia, From 5e31461b8b750d2ae7f4fe7d87f782cae128aad2 Mon Sep 17 00:00:00 2001 From: "Albert-Jan N. Yzelman" Date: Wed, 23 Jul 2025 18:57:50 +0200 Subject: [PATCH 18/22] Spec-fix workspace size computation function, and implement it --- include/transition/solver.h | 8 ++++++-- src/transition/solver.cpp | 23 +++++++++++++++++------ 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/include/transition/solver.h b/include/transition/solver.h index 968054f94..dc12ec59a 100644 --- a/include/transition/solver.h +++ b/include/transition/solver.h @@ -533,6 +533,8 @@ sparse_err_t sparse_cg_manual_init_dzz( * - #sparse_cg_manual_init_siz, and * - #sparse_cg_manual_init_szz. * + * @param[in] n The maximum system size the solver will be initialised + * with. * @param[in] precon Whether the solver is expected to use a preconditioner. * * \note If preconditioning is required, the solver requires a larger work @@ -545,7 +547,7 @@ sparse_err_t sparse_cg_manual_init_dzz( * * @returns The workspace size in bytes. */ -size_t sparse_cg_workspace_size_s( const bool precon ); +size_t sparse_cg_workspace_size_s( const size_t n, const bool precon ); /** * Returns the minimum workspace size required when manually supplying a @@ -556,6 +558,8 @@ size_t sparse_cg_workspace_size_s( const bool precon ); * - #sparse_cg_manual_init_diz, and * - #sparse_cg_manual_init_dzz. * + * @param[in] n The maximum system size the solver will be initialised + * with. * @param[in] precon Whether the solver is expected to use a preconditioner. * * \note If preconditioning is required, the solver requires a larger work @@ -568,7 +572,7 @@ size_t sparse_cg_workspace_size_s( const bool precon ); * * @returns The workspace size in bytes. */ -size_t sparse_cg_workspace_size_d( const bool precon ); +size_t sparse_cg_workspace_size_d( const size_t n, const bool precon ); // Note that szi and dzi are skipped on purpose. Such variants would not seem // sensible, though could easily be provided if they do turn out to be needed diff --git a/src/transition/solver.cpp b/src/transition/solver.cpp index 9b1fc9860..079ea8c54 100644 --- a/src/transition/solver.cpp +++ b/src/transition/solver.cpp @@ -206,10 +206,12 @@ class CG_Data { * per both the C and C++ specifications. */ static size_t workspaceSize( const size_t n, const bool preconditioned ) { + static_assert( grb::config::CACHE_LINE_SIZE::value() >= sizeof(int), + "Unhandled padding case; please submit a bug report" ); if( preconditioned ) { - return 4 * n * sizeof( T ) + 3 * sizeof( int ); + return 4 * n * sizeof( T ) + 3 * grb::config::CACHE_LINE_SIZE::value(); } else { - return 3 * n * sizeof( T ) + 2 * sizeof( int ); + return 3 * n * sizeof( T ) + 2 * grb::config::CACHE_LINE_SIZE::value(); } } @@ -260,9 +262,10 @@ class CG_Data { assert( a != nullptr ); assert( ja != nullptr ); assert( ia != nullptr ); - constexpr size_t align = (64 % sizeof(int) == 0) - ? 64 - : (64 + (sizeof(int) - (64 % sizeof(int)))); + constexpr size_t L = grb::config::CACHE_LINE_SIZE::value(); + constexpr size_t align = (L % sizeof(int) == 0) + ? L + : (L + (sizeof(int) - (L % sizeof(int)))); if( buffer_size < workspaceSize( n, false ) ) { throw std::invalid_argument( "The given buffer size is too small" ); } @@ -445,6 +448,14 @@ static sparse_err_t sparse_cg_init_impl( return NO_ERROR; } +size_t sparse_cg_workspace_size_s( const size_t n, const bool precon ) { + return CG_Data< float, size_t, size_t >::workspaceSize( n, precon ); +} + +size_t sparse_cg_workspace_size_d( const size_t n, const bool precon ) { + return CG_Data< double, size_t, size_t >::workspaceSize( n, precon ); +} + template< typename T, typename NZI, typename RSI > static sparse_err_t sparse_cg_init_impl_no_buffer( sparse_cg_handle_t * const handle, const size_t n, @@ -452,7 +463,7 @@ static sparse_err_t sparse_cg_init_impl_no_buffer( const bool support_preconditioning, const bool numa ) { const size_t allocSize = CG_Data< T, NZI, RSI >:: - workspaceSize( n, support_preconditioning ) + 256; // TODO hide this const + workspaceSize( n, support_preconditioning ); void * buffer = nullptr; #ifdef _GRB_NO_LIBNUMA if( numa ) { From b07f383ad64f22c153888fc2b1fabe242b72ccf3 Mon Sep 17 00:00:00 2001 From: "Albert-Jan N. Yzelman" Date: Wed, 23 Jul 2025 18:59:35 +0200 Subject: [PATCH 19/22] Sanity check given buffer sizes on initialisation --- src/transition/solver.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/transition/solver.cpp b/src/transition/solver.cpp index 079ea8c54..6e029b858 100644 --- a/src/transition/solver.cpp +++ b/src/transition/solver.cpp @@ -435,7 +435,9 @@ static sparse_err_t sparse_cg_init_impl( return NULL_ARGUMENT; } if( buffer == nullptr ) { return NULL_ARGUMENT; } - // TODO bounds-check bufferSize + if( bufferSize < CG_Data< T, NZI, RSI >::workspaceSize( n, false ) ) { + return ILLEGAL_ARGUMENT; + } try { *handle = static_cast< void * >( new CG_Data< T, NZI, RSI >( n, a, ja, ia, buffer, bufferSize, deleter ) ); From 43fc263856cd280860fda5f423f4e069f305f84a Mon Sep 17 00:00:00 2001 From: "Albert-Jan N. Yzelman" Date: Wed, 23 Jul 2025 19:08:42 +0200 Subject: [PATCH 20/22] Partial code review -- will finish up tomorrow --- include/transition/solver.h | 20 ++++++++++---------- src/transition/solver.cpp | 4 +++- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/include/transition/solver.h b/include/transition/solver.h index dc12ec59a..58b9cdae6 100644 --- a/include/transition/solver.h +++ b/include/transition/solver.h @@ -341,7 +341,7 @@ sparse_err_t sparse_cg_manual_init_sii( * This variant is for double-precision floating point nonzeroes and integer * \a ja and \a ia arrays, as also indicated by the dii postfix. * - * @see #sparse_cg_init_sii for full documentation. + * @see #sparse_cg_init_opt_sii for full documentation. */ sparse_err_t sparse_cg_init_opt_dii( sparse_cg_handle_t * const handle, const size_t n, @@ -368,7 +368,7 @@ sparse_err_t sparse_cg_init_dii( */ sparse_err_t sparse_cg_manual_init_dii( sparse_cg_handle_t * const handle, const size_t n, - const float * const a, const int * const ja, const int * const ia, + const double * const a, const int * const ja, const int * const ia, void * workspace, const size_t workspace_size ); @@ -379,7 +379,7 @@ sparse_err_t sparse_cg_manual_init_dii( * size_t-valued \a ja, and integer-valued \a ia, as also indicated by * the siz postfix. * - * @see #sparse_cg_init_sii for full documentation. + * @see #sparse_cg_init_opt_sii for full documentation. */ sparse_err_t sparse_cg_init_opt_siz( sparse_cg_handle_t * const handle, const size_t n, @@ -406,7 +406,7 @@ sparse_err_t sparse_cg_init_siz( */ sparse_err_t sparse_cg_manual_init_siz( sparse_cg_handle_t * const handle, const size_t n, - const float * const a, const int * const ja, const int * const ia, + const float * const a, const int * const ja, const size_t * const ia, void * workspace, const size_t workspace_size ); @@ -417,7 +417,7 @@ sparse_err_t sparse_cg_manual_init_siz( * size_t-valued \a ja, and integer-valued \a ia, as also indicated by * the diz postfix. * - * @see #sparse_cg_init_sii for full documentation. + * @see #sparse_cg_init_opt_sii for full documentation. */ sparse_err_t sparse_cg_init_opt_diz( sparse_cg_handle_t * const handle, const size_t n, @@ -444,7 +444,7 @@ sparse_err_t sparse_cg_init_diz( */ sparse_err_t sparse_cg_manual_init_diz( sparse_cg_handle_t * const handle, const size_t n, - const float * const a, const int * const ja, const int * const ia, + const double * const a, const int * const ja, const size_t * const ia, void * workspace, const size_t workspace_size ); @@ -455,7 +455,7 @@ sparse_err_t sparse_cg_manual_init_diz( * size_t-valued \a ja and \a ia, as also indicated by the szz * postfix. * - * @see #sparse_cg_init_sii for full documentation. + * @see #sparse_cg_init_opt_sii for full documentation. */ sparse_err_t sparse_cg_init_opt_szz( sparse_cg_handle_t * const handle, const size_t n, @@ -482,7 +482,7 @@ sparse_err_t sparse_cg_init_szz( */ sparse_err_t sparse_cg_manual_init_szz( sparse_cg_handle_t * const handle, const size_t n, - const float * const a, const int * const ja, const int * const ia, + const float * const a, const size_t * const ja, const size_t * const ia, void * workspace, const size_t workspace_size ); @@ -493,7 +493,7 @@ sparse_err_t sparse_cg_manual_init_szz( * size_t-valued \a ja and \a ia, as also indicated by the dzz * postfix. * - * @see #sparse_cg_init_sii for full documentation. + * @see #sparse_cg_init_opt_sii for full documentation. */ sparse_err_t sparse_cg_init_opt_dzz( sparse_cg_handle_t * const handle, const size_t n, @@ -520,7 +520,7 @@ sparse_err_t sparse_cg_init_dzz( */ sparse_err_t sparse_cg_manual_init_dzz( sparse_cg_handle_t * const handle, const size_t n, - const float * const a, const int * const ja, const int * const ia, + const double * const a, const size_t * const ja, const size_t * const ia, void * workspace, const size_t workspace_size ); diff --git a/src/transition/solver.cpp b/src/transition/solver.cpp index 6e029b858..68b946ba8 100644 --- a/src/transition/solver.cpp +++ b/src/transition/solver.cpp @@ -205,7 +205,9 @@ class CG_Data { * \internal Note that the space for two additional integers is required as * per both the C and C++ specifications. */ - static size_t workspaceSize( const size_t n, const bool preconditioned ) { + static size_t workspaceSize( + const size_t n, const bool preconditioned + ) noexcept { static_assert( grb::config::CACHE_LINE_SIZE::value() >= sizeof(int), "Unhandled padding case; please submit a bug report" ); if( preconditioned ) { From 616d4d785cf2dd8493dbcd187a63ba4a413dbf7b Mon Sep 17 00:00:00 2001 From: "Albert-Jan N. Yzelman" Date: Thu, 24 Jul 2025 11:52:05 +0200 Subject: [PATCH 21/22] Code review on solver.h --- include/transition/solver.h | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/include/transition/solver.h b/include/transition/solver.h index 58b9cdae6..09e4f2acc 100644 --- a/include/transition/solver.h +++ b/include/transition/solver.h @@ -303,7 +303,8 @@ sparse_err_t sparse_cg_init_sii( * ownership of the given memory will be returned to the callee. If the call to * this function is not successful, the ownership of the memory region returns * to the callee immediately. For as long as ownership of the memory region is - * not with the callee, the contents of the memory region are undefined. + * not with the callee, the contents of the memory region are undefined while + * code not controlled by ALP is not allowed to write into the memory region. * * @param[in] workspace_size The size of the memory pointed to by \a workspace. * @@ -316,7 +317,7 @@ sparse_err_t sparse_cg_init_sii( * * \note Note that the hints that can be supplied to #sparse_cg_init_opt_sii * affect its internal workspace allocation mechanisms. Therefore, those - * hints do not apply to this PCG solver initialisation variant. + * hints do not apply to this manual PCG solver initialisation variant. * * In addition to the possible return codes defined by #sparse_cg_init_opt_sii, * a call to this variant may furthermore return: @@ -532,20 +533,21 @@ sparse_err_t sparse_cg_manual_init_dzz( * - #sparse_cg_manual_init_sii, * - #sparse_cg_manual_init_siz, and * - #sparse_cg_manual_init_szz. + * This is the variant for PCG solvers that operate on single-precision values. * * @param[in] n The maximum system size the solver will be initialised * with. * @param[in] precon Whether the solver is expected to use a preconditioner. * * \note If preconditioning is required, the solver requires a larger work - * space. Note, however, that even if not enough workspace is supplied - * for a preconditioned solve, nonetheless requesting a preconditioned - * solve will still execute -- it will then dynamically allocate memory - * on the fly. + * space. Note, however, that even if initially not enough workspace is + * supplied for a preconditioned solve, nonetheless requesting a + * preconditioned solve will still execute -- it will then dynamically + * allocate memory on the fly. * * A call to this function never fails. * - * @returns The workspace size in bytes. + * @returns The required workspace size in bytes. */ size_t sparse_cg_workspace_size_s( const size_t n, const bool precon ); @@ -557,20 +559,21 @@ size_t sparse_cg_workspace_size_s( const size_t n, const bool precon ); * - #sparse_cg_manual_init_dii, * - #sparse_cg_manual_init_diz, and * - #sparse_cg_manual_init_dzz. + * This is the variant for PCG solvers that operate on double-precision values. * * @param[in] n The maximum system size the solver will be initialised * with. * @param[in] precon Whether the solver is expected to use a preconditioner. * * \note If preconditioning is required, the solver requires a larger work - * space. Note, however, that even if not enough workspace is supplied - * for a preconditioned solve, nonetheless requesting a preconditioned - * solve will still execute -- it will then dynamically allocate memory - * on the fly. + * space. Note, however, that even if initially not enough workspace is + * supplied for a preconditioned solve, nonetheless requesting a + * preconditioned solve will still execute -- it will then dynamically + * allocate memory on the fly. * * A call to this function never fails. * - * @returns The workspace size in bytes. + * @returns The required workspace size in bytes. */ size_t sparse_cg_workspace_size_d( const size_t n, const bool precon ); From efa742b852d4568510c379b303b2180af951fbae Mon Sep 17 00:00:00 2001 From: "Albert-Jan N. Yzelman" Date: Thu, 24 Jul 2025 12:16:12 +0200 Subject: [PATCH 22/22] Code review solver.cpp --- src/transition/solver.cpp | 88 +++++++++++++++++++++++++++++++-------- 1 file changed, 71 insertions(+), 17 deletions(-) diff --git a/src/transition/solver.cpp b/src/transition/solver.cpp index 68b946ba8..b0a93da36 100644 --- a/src/transition/solver.cpp +++ b/src/transition/solver.cpp @@ -37,11 +37,15 @@ */ /** - * Implements the various ways the CG workspace data may be freed. + * Implements the various ways the CG workspace data may (or may not) be freed. * - * At the moment, always assumes that the workspace buffer is allocated by the - * transition path itself, using the standard C++ array allocation mechanism. - * The matching deleter action employs the matching standard C++ array deleter. + * This deleter at present considers that + * -# the solver does not own the workspace data (and hence may not free it); + * -# the solver has allocated the workspace data using standard C++ array + * allocation mechanisms, for which this class provides the matching + * standard C++ array deleter; and + * -# the solver has allocated the workspace data using libNUMA, for which this + * class provides a call to the matching libNUMA free. */ class CGWorkspaceDeleter { @@ -53,18 +57,44 @@ class CGWorkspaceDeleter { /** * Whether the workspace data was allocated NUMA-aware, and if so, what the * allocation size was. + * + * \note The allocation size is required for a call to the libNUMA free + * function. */ const size_t numa; public: + /** Default construction is not allowed. */ CGWorkspaceDeleter() = delete; - CGWorkspaceDeleter( const bool &owning_in, const size_t &numa_in ) : + /** + * Explicit constructor that requires all information necessary to determine + * how the workspace should be deleted. + * + * @param[in] owning_in Whether the solver owns the workspace buffer. + * @param[in] numa_in Zero if libNUMA was not used for allocating the + * workspace bufer, and the allocation size otherwise. + * + * If \a owning_in is false, the input to \a numa_in is ignored. + * + * \note In that case, the user is reponsible for freeing memory, which indeed + * could have been done in a myriad of other ways besides those this + * deleter knows about. + * + * \internal In case ALP is compiled without libNUMA support, the standard C++ + * array mechanism will be assumed always. + */ + CGWorkspaceDeleter( const bool &owning_in, const size_t &numa_in ) noexcept : owning( owning_in ), numa( numa_in ) {} + /** + * Implements the correct action for freeing the workspace data. + * + * @param[in] ptr Pointer to the workspace memory to be deleted. + */ void operator()( void * ptr ) { assert( !( !owning && numa ) ); if( owning ) { @@ -83,6 +113,11 @@ class CGWorkspaceDeleter { }; /** + * The class behind the PCG solver handle. + * + * More precisely, a valid PCG solver handle is a C void pointer to an instance + * of this class. + * * @tparam T The nonzero value type. * @tparam NZI The nonzero index type. * @tparam RSI The row and column index type. @@ -197,13 +232,19 @@ class CG_Data { * The size, in bytes, required as a work space for the PCG algorithm. * * @param[in] n The linear system size. - * @param[in] preconditioned Whether the solver may be called with a - * preconditioner. + * @param[in] preconditioned Whether the solver is expected to be called with + * a preconditioner. * * @returns The required size, in bytes. * * \internal Note that the space for two additional integers is required as - * per both the C and C++ specifications. + * per both the C and C++ specifications. However, we here also + * prevent false sharing between different workspace vectors, and so + * actually align on the cache line size boundary. + * + * \warning The current implementation assumes that the cache line size is a + * multiple of sizeof(int). If this assertion ever fails, + * an error will be thrown at compile time (static_assert). */ static size_t workspaceSize( const size_t n, const bool preconditioned @@ -239,15 +280,19 @@ class CG_Data { * to this constructor. * * @param[in] buffer_size The size of the given \a buffer. Used for sanity - * checking the use of the buffer. + * checking the intended use of the buffer. * * @param[in] buffer_deleter How the given \a buffer should be deleted on * destruction of this instance. * - * \note If the buffer is not to be owned by this instance, then a no-op - * \a buffer_deleter should be given. + * \note If the buffer is not to be owned by this instance, then a non-owning + * \a buffer_deleter should be given; see #CGWorkspaceDeleter for + * details. + * + * \internal The sanity check is weaker if not compiled in debug mode so + * should be done explicitly \em before entering this constructor. * - * \warning The sanity check is weaker if not compiled in debug mode. + * \internal This constructor may throw exceptions. */ CG_Data( const size_t n, @@ -494,7 +539,15 @@ static sparse_err_t sparse_cg_init_impl_no_buffer( CGWorkspaceDeleter( true, numa ? allocSize : 0 ) ); if( rc != NO_ERROR ) { - delete [] static_cast< char * >(buffer); +#ifndef _GRB_NO_LIBNUMA + if( numa ) { + numa_free( buffer, allocSize ); + } else { +#endif + delete [] static_cast< char * >(buffer); +#ifndef _GRB_NO_LIBNUMA + } +#endif } return rc; } @@ -965,10 +1018,11 @@ static sparse_err_t sparse_cg_set_preconditioner_impl( try { static_cast< CG_Data< T, NZI, RSI > * >( handle )-> setPreconditioner( c_precond_p, c_precond_data_p ); - } catch( std::exception &e ) { - std::cerr << e.what() << "\n"; - std::cerr << "This is an unexpected error; please submit a bug report\n"; - return UNKNOWN; + } catch( ... ) { + // by virtue of the ALP interface guarantees, the only possible exception is + // due to out-of-memory conditions (when allocating an on-demand vector + // workspace for preconditioned solves). + return OUT_OF_MEMORY; } return NO_ERROR; }