From 074299615cf0d64ed37d786320130602875f1674 Mon Sep 17 00:00:00 2001
From: "Albert-Jan N. Yzelman" <albertjan.yzelman@huawei.com>
Date: Thu, 5 Jun 2025 13:00:22 +0200
Subject: [PATCH 01/22] WIP commit (meeting interrupt), apologies

---
 src/transition/solver.cpp | 61 +++++++++++++++++++++++++++++++++++----
 1 file changed, 56 insertions(+), 5 deletions(-)
diff --git a/src/transition/solver.cpp b/src/transition/solver.cpp
index b52baca3d..9b279b05b 100644
--- a/src/transition/solver.cpp
+++ b/src/transition/solver.cpp
@@ -16,12 +16,13 @@
  */
 
 
-#include <assert.h>
 #include <array>
 
 #include <graphblas.hpp>
 #include <graphblas/algorithms/conjugate_gradient.hpp>
 
+#include <assert.h>
+
 #include "solver.h"
 
 /**
@@ -136,6 +137,20 @@ class CG_Data {
 
 	public:
 
+		/**
+		 * The size, in bytes, required as a work space for the PCG algorithm.
+		 *
+		 * @param[in] n The linear system size.
+		 *
+		 * @returns The required size, in bytes.
+		 *
+		 * \internal Note that the space for two additional integers is required as
+		 *           per both the C and C++ specifications.
+		 */
+		static size_t workspaceSize( const size_t n ) {
+			return 3 * sizeof( T ) + 2 * sizeof( int );
+		}
+
 		/** Disable default constructor. */
 		CG_Data() = delete;
 
@@ -149,16 +164,26 @@ class CG_Data {
 		 *
 		 * The matrix defined by \a a, \a ja, \a ia must be symmetric positive
 		 * definite.
+		 *
+		 * @param[in] buffer The workspace required by this algorithm. This must
+		 *                   point to a valid memory region that can be used
+		 *                   exclusively by ALP. The size of the memory region (in
+		 *                   bytes) must be greater or equal to that returned by
+		 *                   #workspaceSize( n ) with \a n equal to the value passed
+		 *                   to this constructor.
+		 *
+		 * @param[in] buffer_size The size of the given \a buffer. Used for sanity
+		 *                        checking the use of the buffer.
+		 *
+		 * \warning The sanity check is weaker if not compiled in debug mode.
 		 */
 		CG_Data(
 			const size_t n,
-			const T * const a, const RSI * const ja, const NZI * const ia
+			const T * const a, const RSI * const ja, const NZI * const ia,
+			void * const buffer, const size_t buffer_size
 		) :
 			size( n ), tolerance( 1e-5 ), max_iter( 1000 ), matrix( 0, 0 ),
 			residual( std::numeric_limits< T >::infinity() ), iters( 0 ),
-			workspace( {
-				grb::Vector< T >( n ), grb::Vector< T >( n ), grb::Vector< T >( n )
-			} ),
 			precond_workspace( grb::Vector< T >( 0 ) ),
 			preconditioner( nullptr ), preconditioner_data( nullptr )
 		{
@@ -166,8 +191,34 @@ class CG_Data {
 			assert( a != nullptr );
 			assert( ja != nullptr );
 			assert( ia != nullptr );
+			if( buffer_size >= workspaceSize( n ) ) {
+				throw std::invalid_argument( "The given buffer size is too small" );
+			}
 			Matrix A = grb::internal::wrapCRSMatrix( a, ja, ia, n, n );
 			std::swap( A, matrix );
+			char * workspace_ptr = buffer;
+			assert( static_cast< char * >(buffer) + buffer_size >= workspace_ptr + n );
+			{
+				T * const workspace_vector = reinterpret_cast< T * >(workspace_ptr);
+				grb::Vector< T > tmp = grb::internal::wrapVector( workspace_vector );
+				std::swap( workspace[ 0 ], tmp );
+			}
+			workspace_ptr += n * sizeof( T );
+			workspace_ptr += (sizeof(int) - (workspace_ptr % sizeof(int)));
+			assert( static_cast< char * >(buffer) + buffer_size >= workspace_ptr + n );
+			{
+				T * const workspace_vector = reinterpret_cast< T * >(workspace_ptr);
+				grb::Vector< T > tmp = grb::internal::wrapVector( workspace_vector );
+				std::swap( workspace[ 1 ], tmp );
+			}
+			workspace_ptr += n * sizeof( T );
+			workspace_ptr += (sizeof(int) - (workspace_ptr % sizeof(int)));
+			assert( static_cast< char * >(buffer) + buffer_size >= workspace_ptr + n );
+			{
+				T * const workspace_vector = reinterpret_cast< T * >(workspace_ptr);
+				grb::Vector< T > tmp = grb::internal::wrapVector( workspace_vector );
+				std::swap( workspace[ 2 ], tmp );
+			}
 		}
 
 		/** @returns The system size. */

From 5a494d4c15d7c4b5249809101b37dd8c4685d72d Mon Sep 17 00:00:00 2001
From: "Albert-Jan N. Yzelman" <albertjan.yzelman@huawei.com>
Date: Thu, 5 Jun 2025 14:25:57 +0200
Subject: [PATCH 02/22] This compiles, now need to add in additional workspace
 for the buffer

---
 src/transition/solver.cpp | 54 ++++++++++++++++++++++++++++-----------
 1 file changed, 39 insertions(+), 15 deletions(-)

diff --git a/src/transition/solver.cpp b/src/transition/solver.cpp
index 9b279b05b..92089a02b 100644
--- a/src/transition/solver.cpp
+++ b/src/transition/solver.cpp
@@ -148,7 +148,7 @@ class CG_Data {
 		 *           per both the C and C++ specifications.
 		 */
 		static size_t workspaceSize( const size_t n ) {
-			return 3 * sizeof( T ) + 2 * sizeof( int );
+			return 3 * n * sizeof( T ) + 2 * sizeof( int );
 		}
 
 		/** Disable default constructor. */
@@ -196,27 +196,29 @@ class CG_Data {
 			}
 			Matrix A = grb::internal::wrapCRSMatrix( a, ja, ia, n, n );
 			std::swap( A, matrix );
-			char * workspace_ptr = buffer;
+			char * workspace_ptr = static_cast< char * >(buffer);
 			assert( static_cast< char * >(buffer) + buffer_size >= workspace_ptr + n );
 			{
 				T * const workspace_vector = reinterpret_cast< T * >(workspace_ptr);
-				grb::Vector< T > tmp = grb::internal::wrapVector( workspace_vector );
+				grb::Vector< T > tmp = grb::internal::wrapRawVector( n, workspace_vector );
 				std::swap( workspace[ 0 ], tmp );
 			}
 			workspace_ptr += n * sizeof( T );
-			workspace_ptr += (sizeof(int) - (workspace_ptr % sizeof(int)));
+			workspace_ptr +=
+				(sizeof(int) - (reinterpret_cast<uintptr_t>(workspace_ptr) % sizeof(int)));
 			assert( static_cast< char * >(buffer) + buffer_size >= workspace_ptr + n );
 			{
 				T * const workspace_vector = reinterpret_cast< T * >(workspace_ptr);
-				grb::Vector< T > tmp = grb::internal::wrapVector( workspace_vector );
+				grb::Vector< T > tmp = grb::internal::wrapRawVector( n, workspace_vector );
 				std::swap( workspace[ 1 ], tmp );
 			}
 			workspace_ptr += n * sizeof( T );
-			workspace_ptr += (sizeof(int) - (workspace_ptr % sizeof(int)));
+			workspace_ptr +=
+				(sizeof(int) - (reinterpret_cast<uintptr_t>(workspace_ptr) % sizeof(int)));
 			assert( static_cast< char * >(buffer) + buffer_size >= workspace_ptr + n );
 			{
 				T * const workspace_vector = reinterpret_cast< T * >(workspace_ptr);
-				grb::Vector< T > tmp = grb::internal::wrapVector( workspace_vector );
+				grb::Vector< T > tmp = grb::internal::wrapRawVector( n, workspace_vector );
 				std::swap( workspace[ 2 ], tmp );
 			}
 		}
@@ -328,7 +330,8 @@ class CG_Data {
 template< typename T, typename NZI, typename RSI >
 static sparse_err_t sparse_cg_init_impl(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const T * const a, const RSI * const ja, const NZI * const ia
+	const T * const a, const RSI * const ja, const NZI * const ia,
+	void * const buffer, const size_t bufferSize
 ) {
 	if( n == 0 ) { return ILLEGAL_ARGUMENT; }
 	if( handle == nullptr || a == nullptr || ja == nullptr || ia == nullptr ) {
@@ -336,7 +339,7 @@ static sparse_err_t sparse_cg_init_impl(
 	}
 	try {
 		*handle = static_cast< void * >(
-			new CG_Data< T, NZI, RSI >( n, a, ja, ia ) );
+			new CG_Data< T, NZI, RSI >( n, a, ja, ia, buffer, bufferSize ) );
 	} catch( std::exception &e ) {
 		// the grb::Matrix constructor may only throw on out of memory errors
 		std::cerr << "Error: " << e.what() << "\n";
@@ -346,46 +349,67 @@ static sparse_err_t sparse_cg_init_impl(
 	return NO_ERROR;
 }
 
+template< typename T, typename NZI, typename RSI >
+static sparse_err_t sparse_cg_init_impl_no_buffer(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const T * const a, const RSI * const ja, const NZI * const ia
+) {
+	const size_t allocSize = CG_Data< T, NZI, RSI >::workspaceSize( n );
+	void * buffer = nullptr;
+	try {
+		buffer = static_cast< void * >(new char[ allocSize ]);
+	} catch( ... ) {
+		std::cerr << "Error allocating workspace buffer\n";
+		return OUT_OF_MEMORY;
+	}
+	const sparse_err_t rc = sparse_cg_init_impl(
+		handle, n, a, ja, ia, buffer, allocSize );
+	if( rc != NO_ERROR ) {
+		delete [] static_cast< char * >(buffer);
+	}
+	return rc;
+}
+
 sparse_err_t sparse_cg_init_sii(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const float * const a, const int * const ja, const int * const ia
 ) {
-	return sparse_cg_init_impl< float, int, int >( handle, n, a, ja, ia );
+	return sparse_cg_init_impl_no_buffer< float, int, int >( handle, n, a, ja, ia );
 }
 
 sparse_err_t sparse_cg_init_dii(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const double * const a, const int * const ja, const int * const ia
 ) {
-	return sparse_cg_init_impl< double, int, int >( handle, n, a, ja, ia );
+	return sparse_cg_init_impl_no_buffer< double, int, int >( handle, n, a, ja, ia );
 }
 
 sparse_err_t sparse_cg_init_siz(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const float * const a, const int * const ja, const size_t * const ia
 ) {
-	return sparse_cg_init_impl< float, size_t, int >( handle, n, a, ja, ia );
+	return sparse_cg_init_impl_no_buffer< float, size_t, int >( handle, n, a, ja, ia );
 }
 
 sparse_err_t sparse_cg_init_diz(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const double * const a, const int * const ja, const size_t * const ia
 ) {
-	return sparse_cg_init_impl< double, size_t, int >( handle, n, a, ja, ia );
+	return sparse_cg_init_impl_no_buffer< double, size_t, int >( handle, n, a, ja, ia );
 }
 
 sparse_err_t sparse_cg_init_szz(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const float * const a, const size_t * const ja, const size_t * const ia
 ) {
-	return sparse_cg_init_impl< float, size_t, size_t >( handle, n, a, ja, ia );
+	return sparse_cg_init_impl_no_buffer< float, size_t, size_t >( handle, n, a, ja, ia );
 }
 
 sparse_err_t sparse_cg_init_dzz(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const double * const a, const size_t * const ja, const size_t * const ia
 ) {
-	return sparse_cg_init_impl< double, size_t, size_t >( handle, n, a, ja, ia );
+	return sparse_cg_init_impl_no_buffer< double, size_t, size_t >( handle, n, a, ja, ia );
 }
 
 template< typename T, typename NZI, typename RSI >

From 6648919e7a0471215730b816c0ade808a1cc39c6 Mon Sep 17 00:00:00 2001
From: "Albert-Jan N. Yzelman" <albertjan.yzelman@huawei.com>
Date: Thu, 5 Jun 2025 14:31:21 +0200
Subject: [PATCH 03/22] Now also accounts for the preconditioner workspace out
 of the box

---
 src/transition/solver.cpp | 26 ++++++++++++++++++++++----
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/src/transition/solver.cpp b/src/transition/solver.cpp
index 92089a02b..da6d4e087 100644
--- a/src/transition/solver.cpp
+++ b/src/transition/solver.cpp
@@ -17,6 +17,7 @@
 
 
 #include <array>
+#include <stdexcept>
 
 #include <graphblas.hpp>
 #include <graphblas/algorithms/conjugate_gradient.hpp>
@@ -141,14 +142,20 @@ class CG_Data {
 		 * The size, in bytes, required as a work space for the PCG algorithm.
 		 *
 		 * @param[in] n The linear system size.
+		 * @param[in] preconditioned Whether the solver may be called with a
+		 *                           preconditioner.
 		 *
 		 * @returns The required size, in bytes.
 		 *
 		 * \internal Note that the space for two additional integers is required as
 		 *           per both the C and C++ specifications.
 		 */
-		static size_t workspaceSize( const size_t n ) {
-			return 3 * n * sizeof( T ) + 2 * sizeof( int );
+		static size_t workspaceSize( const size_t n, const bool preconditioned ) {
+			if( preconditioned ) {
+				return 4 * n * sizeof( T ) + 3 * sizeof( int );
+			} else {
+				return 3 * n * sizeof( T ) + 2 * sizeof( int );
+			}
 		}
 
 		/** Disable default constructor. */
@@ -191,7 +198,7 @@ class CG_Data {
 			assert( a != nullptr );
 			assert( ja != nullptr );
 			assert( ia != nullptr );
-			if( buffer_size >= workspaceSize( n ) ) {
+			if( buffer_size < workspaceSize( n, false ) ) {
 				throw std::invalid_argument( "The given buffer size is too small" );
 			}
 			Matrix A = grb::internal::wrapCRSMatrix( a, ja, ia, n, n );
@@ -221,6 +228,15 @@ class CG_Data {
 				grb::Vector< T > tmp = grb::internal::wrapRawVector( n, workspace_vector );
 				std::swap( workspace[ 2 ], tmp );
 			}
+			if( buffer_size >= workspaceSize( n, true ) ) {
+				workspace_ptr += n * sizeof( T );
+				workspace_ptr +=
+					(sizeof(int) - (reinterpret_cast<uintptr_t>(workspace_ptr) % sizeof(int)));
+				assert( static_cast< char * >(buffer) + buffer_size >= workspace_ptr + n );
+				T * const workspace_vector = reinterpret_cast< T * >(workspace_ptr);
+				grb::Vector< T > tmp = grb::internal::wrapRawVector( n, workspace_vector );
+				std::swap( precond_workspace, tmp );
+			}
 		}
 
 		/** @returns The system size. */
@@ -354,7 +370,9 @@ static sparse_err_t sparse_cg_init_impl_no_buffer(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const T * const a, const RSI * const ja, const NZI * const ia
 ) {
-	const size_t allocSize = CG_Data< T, NZI, RSI >::workspaceSize( n );
+	// we pass true here, since we want to support the entire solver transition
+	// path API out of the box
+	const size_t allocSize = CG_Data< T, NZI, RSI >::workspaceSize( n, true );
 	void * buffer = nullptr;
 	try {
 		buffer = static_cast< void * >(new char[ allocSize ]);

From 978e694e5a0b06173a7def1d59e4301899f3a647 Mon Sep 17 00:00:00 2001
From: "Albert-Jan N. Yzelman" <albertjan.yzelman@huawei.com>
Date: Mon, 14 Jul 2025 13:47:06 +0200
Subject: [PATCH 04/22] The CG algorithm was not always applying the descriptor
 it receives -- herewith fixed

---
 .../algorithms/conjugate_gradient.hpp         | 80 ++++++++++++++-----
 1 file changed, 59 insertions(+), 21 deletions(-)

diff --git a/include/graphblas/algorithms/conjugate_gradient.hpp b/include/graphblas/algorithms/conjugate_gradient.hpp
index 11e910566..157afd4cf 100644
--- a/include/graphblas/algorithms/conjugate_gradient.hpp
+++ b/include/graphblas/algorithms/conjugate_gradient.hpp
@@ -314,43 +314,77 @@ namespace grb {
 					std::cerr << "Error: at least one CG iteration must be requested\n";
 					return grb::ILLEGAL;
 				}
+
+				// dense descriptor
+				if( descr & grb::descriptors::dense ) {
+					if( grb::nnz( x ) != grb::size( x ) ) {
+						std::cerr << "Error: x was sparse while the dense descriptor was given\n";
+						return grb::ILLEGAL;
+					}
+					if( grb::nnz( b ) != grb::size( b ) ) {
+						std::cerr << "Error: b was sparse while the dense descriptor was given\n";
+						return grb::ILLEGAL;
+					}
+					if( grb::nnz( r ) != grb::size( r ) ) {
+						std::cerr << "Error: r was sparse while the dense descriptor was given\n";
+						return grb::ILLEGAL;
+					}
+					if( grb::nnz( temp ) != grb::size( temp ) ) {
+						std::cerr << "Error: temp was sparse while the dense descriptor was "
+							<< "given\n";
+						return grb::ILLEGAL;
+					}
+					if( preconditioned &&
+						(grb::nnz( temp_precond ) != grb::size( temp_precond ))
+					) {
+						std::cerr << "Error: temp_precond was sparse while the dense descriptor "
+							<< "was given\n";
+						return grb::ILLEGAL;
+					}
+				}
 			}
 
 			// set pure output fields to neutral defaults
 			iterations = 0;
 			residual = std::numeric_limits< double >::infinity();
 
-			// make x and b structurally dense (if not already) so that the remainder
+			// declare internal scalars
+			IOType sigma, bnorm, alpha, beta;
+
+			// make x structurally dense (if not already) so that the remainder
 			// algorithm can safely use the dense descriptor for faster operations
-			{
-				RC rc = grb::SUCCESS;
+			grb::RC ret = grb::SUCCESS;
+			if( !(descr & grb::descriptors::dense) ) {
 				if( nnz( x ) != n ) {
-					rc = grb::set< descriptors::invert_mask | descriptors::structural >(
+					ret = grb::set< descriptors::invert_mask | descriptors::structural >(
 						x, x, zero
 					);
+					assert( ret == grb::SUCCESS );
 				}
-				if( rc != grb::SUCCESS ) { return rc; }
 				assert( nnz( x ) == n );
 			}
 
-			IOType sigma, bnorm, alpha, beta;
-
 			// r = b - temp;
-			grb::RC ret = grb::set( temp, 0 ); assert( ret == grb::SUCCESS );
+			ret = ret ? ret : grb::set< descr >( temp, 0 );
+			assert( ret == grb::SUCCESS );
 			ret = ret ? ret : grb::mxv< descr_dense >( temp, A, x, ring );
 			assert( ret == grb::SUCCESS );
-			ret = ret ? ret : grb::set( r, zero ); assert( ret == grb::SUCCESS );
-			// note: no dense descriptor since we actually allow sparse b
-			ret = ret ? ret : grb::foldl( r, b, ring.getAdditiveMonoid() );
-			// from here onwards, r, temp, x are dense and will remain so
-			assert( nnz( r ) == n );
-			assert( nnz( temp ) == n );
+			ret = ret ? ret : grb::set< descr >( r, zero );
+			assert( ret == grb::SUCCESS );
+			// note: no forced dense descriptor since we actually allow sparse b
+			//       but if b was already dense and the dense descriptor was given, we
+			//       will respect that
+			ret = ret ? ret : grb::foldl< descr >( r, b, ring.getAdditiveMonoid() );
+			// note: at this point, r and temp are guaranteed dense, so we force a dense
+			//       descriptor
 			ret = ret ? ret : grb::foldl< descr_dense >( r, temp, minus );
 			assert( ret == grb::SUCCESS );
 
 			// bnorm = b' * b;
+			// Note that b can be structurally sparse (unless otherwise guaranteed by the
+			// user).
 			bnorm = zero;
-			ret = ret ? ret : grb::dot< descr_dense >(
+			ret = ret ? ret : grb::dot< descr >(
 					bnorm,
 					b, b,
 					ring.getAdditiveMonoid(),
@@ -381,7 +415,9 @@ namespace grb {
 
 			// z = M^-1r
 			if( preconditioned ) {
-				ret = ret ? ret : grb::set( z, 0 ); // also ensures z is dense, henceforth
+				ret = ret ? ret : grb::set< descr >( z, 0 );
+				// henceforth, also z is structurally dense and we can force a dense
+				// descriptor for it
 				if( preconditioned == 2 ) {
 					ret = ret ? ret : grb::wait( z, r );
 				}
@@ -389,10 +425,12 @@ namespace grb {
 			} // else, z equals r (by reference)
 
 			// u = z;
-			ret = ret ? ret : grb::set( u, z );
+			ret = ret ? ret : grb::set< descr >( u, z );
 			assert( ret == grb::SUCCESS );
-			// from here onwards, u is dense; i.e., all vectors are dense from now on,
-			// and we can freely use the dense descriptor in the subsequent
+			// henceforth, also u is structurally dense -- in fact, henceforth, all
+			// vectors in this algorithm except b are now guaranteed dense. We may
+			// hence liberally force the use of the dense descriptor in the main while
+			// loop of the CG (since it does not refer to b).
 
 			// sigma = r' * z;
 			sigma = zero;
@@ -435,7 +473,7 @@ namespace grb {
 				assert( ret == grb::SUCCESS );
 
 				// alpha = sigma / beta;
-				ret = ret ? ret : grb::apply( alpha, sigma, beta, divide );
+				ret = ret ? ret : grb::apply< descr >( alpha, sigma, beta, divide );
 				assert( ret == grb::SUCCESS );
 
 				// x = x + alpha * u;
@@ -489,7 +527,7 @@ namespace grb {
 				}
 
 				// alpha = beta / sigma;
-				ret = ret ? ret : grb::apply( alpha, beta, sigma, divide );
+				ret = ret ? ret : grb::apply< descr >( alpha, beta, sigma, divide );
 				assert( ret == grb::SUCCESS );
 
 				// u_next = z + beta * u_previous;

From 9bf7bfb03d072e857f26b066ea48071962fe6ab1 Mon Sep 17 00:00:00 2001
From: "Albert-Jan N. Yzelman" <albertjan.yzelman@huawei.com>
Date: Mon, 14 Jul 2025 15:19:15 +0200
Subject: [PATCH 05/22] Now allow for getting CG solver handles that do not
 support preconditioning, with the gain that those will require less memory.
 Also fix one memory leak

---
 include/transition/solver.h | 119 +++++++++++++++++++++++++++++
 src/transition/kml_iss.cpp  |   8 ++
 src/transition/solver.cpp   | 148 ++++++++++++++++++++++++++++++------
 3 files changed, 251 insertions(+), 24 deletions(-)

diff --git a/include/transition/solver.h b/include/transition/solver.h
index ba1afa008..1bc58f2d7 100644
--- a/include/transition/solver.h
+++ b/include/transition/solver.h
@@ -132,6 +132,11 @@ typedef enum {
 	 */
 	ILLEGAL_ARGUMENT,
 
+	/**
+	 * Illegal method was called.
+	 */
+	ILLEGAL_METHOD,
+
 	/**
 	 * Out of memory error detected during call.
 	 */
@@ -250,6 +255,25 @@ sparse_err_t sparse_cg_init_sii(
 	const float * const a, const int * const ja, const int * const ia
 );
 
+/**
+ * Variant of #sparse_cg_init_sii that results in a #sparse_cg_handle_t that
+ * does not support preconditioned solves. Handles constructed using this call
+ * require less memory.
+ *
+ * To have a handle returned by this function be used for a preconditioned solve
+ * anyway, the handle must first be destroyed and then re-created using a call
+ * to #sparse_cg_init_sii.
+ *
+ * \note Rationale: the transition path API opted to have this feature
+ *       implemented via a separate constructor call to make the fact that
+ *       resulting handles cannot be used for preconditioned solves as explicit
+ *       as possible.
+ */
+sparse_err_t sparse_cg_init_nop_sii(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const float * const a, const int * const ja, const int * const ia
+);
+
 /**
  * Initialises a #sparse_cg_handle_t object.
  *
@@ -263,6 +287,25 @@ sparse_err_t sparse_cg_init_dii(
 	const double * const a, const int * const ja, const int * const ia
 );
 
+/**
+ * Variant of #sparse_cg_init_dii that results in a #sparse_cg_handle_t that
+ * does not support preconditioned solves. Handles constructed using this call
+ * require less memory.
+ *
+ * To have a handle returned by this function be used for a preconditioned solve
+ * anyway, the handle must first be destroyed and then re-created using a call
+ * to #sparse_cg_init_dii.
+ *
+ * \note Rationale: the transition path API opted to have this feature
+ *       implemented via a separate constructor call to make the fact that
+ *       resulting handles cannot be used for preconditioned solves as explicit
+ *       as possible.
+ */
+sparse_err_t sparse_cg_init_nop_dii(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const double * const a, const int * const ja, const int * const ia
+);
+
 /**
  * Initialises a #sparse_cg_handle_t object.
  *
@@ -277,6 +320,25 @@ sparse_err_t sparse_cg_init_siz(
 	const float * const a, const int * const ja, const size_t * const ia
 );
 
+/**
+ * Variant of #sparse_cg_init_siz that results in a #sparse_cg_handle_t that
+ * does not support preconditioned solves. Handles constructed using this call
+ * require less memory.
+ *
+ * To have a handle returned by this function be used for a preconditioned solve
+ * anyway, the handle must first be destroyed and then re-created using a call
+ * to #sparse_cg_init_siz.
+ *
+ * \note Rationale: the transition path API opted to have this feature
+ *       implemented via a separate constructor call to make the fact that
+ *       resulting handles cannot be used for preconditioned solves as explicit
+ *       as possible.
+ */
+sparse_err_t sparse_cg_init_nop_siz(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const float * const a, const int * const ja, const size_t * const ia
+);
+
 /**
  * Initialises a #sparse_cg_handle_t object.
  *
@@ -291,6 +353,25 @@ sparse_err_t sparse_cg_init_diz(
 	const double * const a, const int * const ja, const size_t * const ia
 );
 
+/**
+ * Variant of #sparse_cg_init_diz that results in a #sparse_cg_handle_t that
+ * does not support preconditioned solves. Handles constructed using this call
+ * require less memory.
+ *
+ * To have a handle returned by this function be used for a preconditioned solve
+ * anyway, the handle must first be destroyed and then re-created using a call
+ * to #sparse_cg_init_diz.
+ *
+ * \note Rationale: the transition path API opted to have this feature
+ *       implemented via a separate constructor call to make the fact that
+ *       resulting handles cannot be used for preconditioned solves as explicit
+ *       as possible.
+ */
+sparse_err_t sparse_cg_init_nop_diz(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const double * const a, const int * const ja, const size_t * const ia
+);
+
 /**
  * Initialises a #sparse_cg_handle_t object.
  *
@@ -305,6 +386,25 @@ sparse_err_t sparse_cg_init_szz(
 	const float * const a, const size_t * const ja, const size_t * const ia
 );
 
+/**
+ * Variant of #sparse_cg_init_szz that results in a #sparse_cg_handle_t that
+ * does not support preconditioned solves. Handles constructed using this call
+ * require less memory.
+ *
+ * To have a handle returned by this function be used for a preconditioned solve
+ * anyway, the handle must first be destroyed and then re-created using a call
+ * to #sparse_cg_init_szz.
+ *
+ * \note Rationale: the transition path API opted to have this feature
+ *       implemented via a separate constructor call to make the fact that
+ *       resulting handles cannot be used for preconditioned solves as explicit
+ *       as possible.
+ */
+sparse_err_t sparse_cg_init_nop_szz(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const float * const a, const size_t * const ja, const size_t * const ia
+);
+
 /**
  * Initialises a #sparse_cg_handle_t object.
  *
@@ -319,6 +419,25 @@ sparse_err_t sparse_cg_init_dzz(
 	const double * const a, const size_t * const ja, const size_t * const ia
 );
 
+/**
+ * Variant of #sparse_cg_init_dzz that results in a #sparse_cg_handle_t that
+ * does not support preconditioned solves. Handles constructed using this call
+ * require less memory.
+ *
+ * To have a handle returned by this function be used for a preconditioned solve
+ * anyway, the handle must first be destroyed and then re-created using a call
+ * to #sparse_cg_init_dzz.
+ *
+ * \note Rationale: the transition path API opted to have this feature
+ *       implemented via a separate constructor call to make the fact that
+ *       resulting handles cannot be used for preconditioned solves as explicit
+ *       as possible.
+ */
+sparse_err_t sparse_cg_init_nop_dzz(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const double * const a, const size_t * const ja, const size_t * const ia
+);
+
 // Note that szi and dzi are skipped on purpose. Such variants would not seem
 // sensible, though could easily be provided if they do turn out to be needed
 
diff --git a/src/transition/kml_iss.cpp b/src/transition/kml_iss.cpp
index 2722f4450..53a1b286b 100644
--- a/src/transition/kml_iss.cpp
+++ b/src/transition/kml_iss.cpp
@@ -116,6 +116,14 @@ static int sparse_err_t_2_int( const sparse_err_t err ) {
 			result = KMLSS_BAD_N;
 			break;
 		}
+		case ILLEGAL_METHOD:
+		{
+			// there is no case where this error should be exposed to a KML API user;
+			// rather, if encountered, it indicates an error in translating the KML API
+			// to the solver transition path
+			result = KMLSS_INTERNAL_ERROR;
+			break;
+		}
 		case OUT_OF_MEMORY:
 		{
 			result = KMLSS_NO_MEMORY;
diff --git a/src/transition/solver.cpp b/src/transition/solver.cpp
index da6d4e087..37f62275a 100644
--- a/src/transition/solver.cpp
+++ b/src/transition/solver.cpp
@@ -35,6 +35,25 @@
  * @date 26/01/2024
  */
 
+/**
+ * Implements the various ways the CG workspace data may be freed.
+ *
+ * At the moment, always assumes that the workspace buffer is allocated by the
+ * transition path itself, using the standard C++ array allocation mechanism.
+ * The matching deleter action employs the matching standard C++ array deleter.
+ */
+class CGWorkspaceDeleter {
+
+	private:
+
+	public:
+
+		void operator()( const void * ptr ) {
+			delete [] static_cast<const char*>(ptr);
+		}
+
+};
+
 /**
  * @tparam T   The nonzero value type.
  * @tparam NZI The nonzero index type.
@@ -108,6 +127,14 @@ class CG_Data {
 		/** Any required data for the \a preconditioner. */
 		void * preconditioner_data;
 
+		// destructor data
+
+		/** The given buffer during construction. */
+		void * _buffer;
+
+		/** How to handle \a _buffer on destruction. */
+		CGWorkspaceDeleter _buffer_deleter;
+
 
 	protected:
 
@@ -182,17 +209,25 @@ class CG_Data {
 		 * @param[in] buffer_size The size of the given \a buffer. Used for sanity
 		 *                        checking the use of the buffer.
 		 *
+		 *
+		 * @param[in] buffer_deleter How the given \a buffer should be deleted on
+		 *                           destruction of this instance.
+		 *
+		 * \note If the buffer is not to be owned by this instance, then a no-op
+		 *       \a buffer_deleter should be given.
+		 *
 		 * \warning The sanity check is weaker if not compiled in debug mode.
 		 */
 		CG_Data(
 			const size_t n,
 			const T * const a, const RSI * const ja, const NZI * const ia,
-			void * const buffer, const size_t buffer_size
+			void * const buffer, const size_t buffer_size, const D &buffer_deleter
 		) :
 			size( n ), tolerance( 1e-5 ), max_iter( 1000 ), matrix( 0, 0 ),
 			residual( std::numeric_limits< T >::infinity() ), iters( 0 ),
 			precond_workspace( grb::Vector< T >( 0 ) ),
-			preconditioner( nullptr ), preconditioner_data( nullptr )
+			preconditioner( nullptr ), preconditioner_data( nullptr ),
+			_buffer( buffer ), _buffer_deleter( buffer_deleter )
 		{
 			assert( n > 0 );
 			assert( a != nullptr );
@@ -239,6 +274,12 @@ class CG_Data {
 			}
 		}
 
+		/** Destroys this CG solve handle. */
+		~CG_Data() {
+			// call the deleter on the buffer pointer (which may be a no-op)
+			_buffer_deleter( _buffer );
+		}
+
 		/** @returns The system size. */
 		size_t getSize() const noexcept { return size; }
 
@@ -296,13 +337,12 @@ class CG_Data {
 		 *         allocation.
 		 */
 		void setPreconditioner( const preconditioner_t in, void * const data ) {
+			if( grb::size( precond_workspace ) == 0 ) {
+				throw std::logic_error( "CG handle has no preconditioned solve support" );
+			}
 			preconditioner = in;
 			preconditioner_data = data;
 			assert( !( !preconditioner && preconditioner_data ) );
-			if( grb::size( precond_workspace ) == 0 ) {
-				grb::Vector< T > replace( size );
-				std::swap( replace, precond_workspace );
-			}
 			assert( grb::size( precond_workspace ) == size );
 		}
 
@@ -347,15 +387,15 @@ template< typename T, typename NZI, typename RSI >
 static sparse_err_t sparse_cg_init_impl(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const T * const a, const RSI * const ja, const NZI * const ia,
-	void * const buffer, const size_t bufferSize
+	void * const buffer, const size_t bufferSize, const CGWorkspaceDeleter &deleter
 ) {
 	if( n == 0 ) { return ILLEGAL_ARGUMENT; }
 	if( handle == nullptr || a == nullptr || ja == nullptr || ia == nullptr ) {
 		return NULL_ARGUMENT;
 	}
 	try {
-		*handle = static_cast< void * >(
-			new CG_Data< T, NZI, RSI >( n, a, ja, ia, buffer, bufferSize ) );
+		*handle = static_cast< void * >( new CG_Data< T, NZI, RSI, D >(
+			n, a, ja, ia, buffer, bufferSize, deleter ) );
 	} catch( std::exception &e ) {
 		// the grb::Matrix constructor may only throw on out of memory errors
 		std::cerr << "Error: " << e.what() << "\n";
@@ -368,11 +408,13 @@ static sparse_err_t sparse_cg_init_impl(
 template< typename T, typename NZI, typename RSI >
 static sparse_err_t sparse_cg_init_impl_no_buffer(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const T * const a, const RSI * const ja, const NZI * const ia
+	const T * const a, const RSI * const ja, const NZI * const ia,
+	bool support_preconditioning
 ) {
 	// we pass true here, since we want to support the entire solver transition
 	// path API out of the box
-	const size_t allocSize = CG_Data< T, NZI, RSI >::workspaceSize( n, true );
+	const size_t allocSize = CG_Data< T, NZI, RSI >::
+		workspaceSize( n, support_preconditioning );
 	void * buffer = nullptr;
 	try {
 		buffer = static_cast< void * >(new char[ allocSize ]);
@@ -381,53 +423,107 @@ static sparse_err_t sparse_cg_init_impl_no_buffer(
 		return OUT_OF_MEMORY;
 	}
 	const sparse_err_t rc = sparse_cg_init_impl(
-		handle, n, a, ja, ia, buffer, allocSize );
+		handle, n, a, ja, ia, buffer, allocSize, CGWorkspaceDeleter() );
 	if( rc != NO_ERROR ) {
 		delete [] static_cast< char * >(buffer);
 	}
 	return rc;
 }
 
+sparse_err_t sparse_cg_init_nop_sii(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const float * const a, const int * const ja, const int * const ia
+) {
+	return sparse_cg_init_impl_no_buffer< float, int, int >( handle, n, a, ja, ia,
+		false );
+}
+
+sparse_err_t sparse_cg_init_nop_dii(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const double * const a, const int * const ja, const int * const ia
+) {
+	return sparse_cg_init_impl_no_buffer< double, int, int >( handle, n, a, ja, ia,
+		false );
+}
+
+sparse_err_t sparse_cg_init_nop_siz(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const float * const a, const int * const ja, const size_t * const ia
+) {
+	return sparse_cg_init_impl_no_buffer< float, size_t, int >( handle, n, a, ja,
+		ia, false );
+}
+
+sparse_err_t sparse_cg_init_nop_diz(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const double * const a, const int * const ja, const size_t * const ia
+) {
+	return sparse_cg_init_impl_no_buffer< double, size_t, int >( handle, n, a, ja,
+		ia, false );
+}
+
+sparse_err_t sparse_cg_init_nop_szz(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const float * const a, const size_t * const ja, const size_t * const ia
+) {
+	return sparse_cg_init_impl_no_buffer< float, size_t, size_t >( handle, n, a, ja,
+		ia, false );
+}
+
+sparse_err_t sparse_cg_init_nop_dzz(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const double * const a, const size_t * const ja, const size_t * const ia
+) {
+	return sparse_cg_init_impl_no_buffer< double, size_t, size_t >( handle, n, a, ja,
+		ia, false );
+}
+
 sparse_err_t sparse_cg_init_sii(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const float * const a, const int * const ja, const int * const ia
 ) {
-	return sparse_cg_init_impl_no_buffer< float, int, int >( handle, n, a, ja, ia );
+	return sparse_cg_init_impl_no_buffer< float, int, int >( handle, n, a, ja, ia,
+		true );
 }
 
 sparse_err_t sparse_cg_init_dii(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const double * const a, const int * const ja, const int * const ia
 ) {
-	return sparse_cg_init_impl_no_buffer< double, int, int >( handle, n, a, ja, ia );
+	return sparse_cg_init_impl_no_buffer< double, int, int >( handle, n, a, ja, ia,
+		true );
 }
 
 sparse_err_t sparse_cg_init_siz(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const float * const a, const int * const ja, const size_t * const ia
 ) {
-	return sparse_cg_init_impl_no_buffer< float, size_t, int >( handle, n, a, ja, ia );
+	return sparse_cg_init_impl_no_buffer< float, size_t, int >( handle, n, a, ja,
+		ia, true );
 }
 
 sparse_err_t sparse_cg_init_diz(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const double * const a, const int * const ja, const size_t * const ia
 ) {
-	return sparse_cg_init_impl_no_buffer< double, size_t, int >( handle, n, a, ja, ia );
+	return sparse_cg_init_impl_no_buffer< double, size_t, int >( handle, n, a, ja,
+		ia, true );
 }
 
 sparse_err_t sparse_cg_init_szz(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const float * const a, const size_t * const ja, const size_t * const ia
 ) {
-	return sparse_cg_init_impl_no_buffer< float, size_t, size_t >( handle, n, a, ja, ia );
+	return sparse_cg_init_impl_no_buffer< float, size_t, size_t >( handle, n, a, ja,
+		ia, true );
 }
 
 sparse_err_t sparse_cg_init_dzz(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const double * const a, const size_t * const ja, const size_t * const ia
 ) {
-	return sparse_cg_init_impl_no_buffer< double, size_t, size_t >( handle, n, a, ja, ia );
+	return sparse_cg_init_impl_no_buffer< double, size_t, size_t >( handle, n, a, ja,
+		ia, true );
 }
 
 template< typename T, typename NZI, typename RSI >
@@ -618,7 +714,8 @@ static sparse_err_t sparse_cg_get_iter_count_impl(
 	const sparse_cg_handle_t handle, size_t * const iters
 ) {
 	if( handle == nullptr || iters == nullptr ) { return NULL_ARGUMENT; }
-	*iters = static_cast< CG_Data< T, NZI, RSI > * >( handle )->getIters();
+	*iters =
+		static_cast< CG_Data< T, NZI, RSI > * >( handle )->getIters();
 	return NO_ERROR;
 }
 
@@ -739,9 +836,13 @@ static sparse_err_t sparse_cg_set_preconditioner_impl(
 	try {
 		static_cast< CG_Data< T, NZI, RSI > * >( handle )->
 			setPreconditioner( c_precond_p, c_precond_data_p );
-	} catch(...) {
-		// spec says ALP vector allocation can only throw due to out-of-memory
-		return OUT_OF_MEMORY;
+	} catch( std::logic_error &e ) {
+		std::cerr << e.what() << "\n";
+		return ILLEGAL_METHOD;
+	} catch( std::exception &e ) {
+		std::cerr << e.what() << "\n";
+		std::cerr << "This is an unexpected error; please submit a bug report\n";
+		return UNKNOWN;
 	}
 	return NO_ERROR;
 }
@@ -805,8 +906,7 @@ static sparse_err_t sparse_cg_set_max_iter_count_impl(
 	sparse_cg_handle_t handle, const size_t max_iters
 ) {
 	if( handle == nullptr ) { return NULL_ARGUMENT; }
-	static_cast< CG_Data< T, NZI, RSI > * >( handle )->
-		setMaxIters( max_iters );
+	static_cast< CG_Data< T, NZI, RSI > * >( handle )->setMaxIters( max_iters );
 	return NO_ERROR;
 }
 

From 4c39734528262cff28fe435890167a771a64ef26 Mon Sep 17 00:00:00 2001
From: Christos Konstantinos Matzoros
 <christos.konstantinos.matzoros@h-partners.com>
Date: Mon, 14 Jul 2025 16:26:36 +0200
Subject: [PATCH 06/22] No type error fix

---
 src/transition/solver.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/transition/solver.cpp b/src/transition/solver.cpp
index 37f62275a..f2b28ab01 100644
--- a/src/transition/solver.cpp
+++ b/src/transition/solver.cpp
@@ -221,7 +221,7 @@ class CG_Data {
 		CG_Data(
 			const size_t n,
 			const T * const a, const RSI * const ja, const NZI * const ia,
-			void * const buffer, const size_t buffer_size, const D &buffer_deleter
+			void * const buffer, const size_t buffer_size, const CGWorkspaceDeleter &buffer_deleter
 		) :
 			size( n ), tolerance( 1e-5 ), max_iter( 1000 ), matrix( 0, 0 ),
 			residual( std::numeric_limits< T >::infinity() ), iters( 0 ),
@@ -394,7 +394,7 @@ static sparse_err_t sparse_cg_init_impl(
 		return NULL_ARGUMENT;
 	}
 	try {
-		*handle = static_cast< void * >( new CG_Data< T, NZI, RSI, D >(
+		*handle = static_cast< void * >( new CG_Data< T, NZI, RSI >(
 			n, a, ja, ia, buffer, bufferSize, deleter ) );
 	} catch( std::exception &e ) {
 		// the grb::Matrix constructor may only throw on out of memory errors

From bbe96c48e2de438039e587101225c048c40d4af1 Mon Sep 17 00:00:00 2001
From: "Albert-Jan N. Yzelman" <albertjan.yzelman@huawei.com>
Date: Mon, 14 Jul 2025 17:09:58 +0200
Subject: [PATCH 07/22] Add NUMA-aware workspace allocation to the transition
 path

---
 include/transition/solver.h |  40 ++++++++----
 src/transition/kml_iss.cpp  |   6 +-
 src/transition/solver.cpp   | 124 ++++++++++++++++++++++++++----------
 3 files changed, 120 insertions(+), 50 deletions(-)

diff --git a/include/transition/solver.h b/include/transition/solver.h
index 1bc58f2d7..e61628b1e 100644
--- a/include/transition/solver.h
+++ b/include/transition/solver.h
@@ -230,6 +230,10 @@ typedef int (*sparse_cg_preconditioner_dxx_t) (
  * @param[in]  a      The nonzero values of the system matrix.
  * @param[in]  ja     The column indices of the nonzeroes of the system matrix.
  * @param[in]  ia     The row offset array of the system matrix.
+ * @param[in]  numa   Whether the CG handle should employ more than one NUMA
+ *                    domain, in which case any memory allocated during
+ *                    construction of this handle will attempt to employ NUMA-
+ *                    aware allocation.
  *
  * This variant is for single-precision floating point nonzeroes and integer
  * \a ja and \a ia arrays, as also indicated by the <tt>sii</tt> postfix.
@@ -252,7 +256,8 @@ typedef int (*sparse_cg_preconditioner_dxx_t) (
  */
 sparse_err_t sparse_cg_init_sii(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const float * const a, const int * const ja, const int * const ia
+	const float * const a, const int * const ja, const int * const ia,
+	const bool numa
 );
 
 /**
@@ -271,7 +276,8 @@ sparse_err_t sparse_cg_init_sii(
  */
 sparse_err_t sparse_cg_init_nop_sii(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const float * const a, const int * const ja, const int * const ia
+	const float * const a, const int * const ja, const int * const ia,
+	const bool numa
 );
 
 /**
@@ -284,7 +290,8 @@ sparse_err_t sparse_cg_init_nop_sii(
  */
 sparse_err_t sparse_cg_init_dii(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const double * const a, const int * const ja, const int * const ia
+	const double * const a, const int * const ja, const int * const ia,
+	const bool numa
 );
 
 /**
@@ -303,7 +310,8 @@ sparse_err_t sparse_cg_init_dii(
  */
 sparse_err_t sparse_cg_init_nop_dii(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const double * const a, const int * const ja, const int * const ia
+	const double * const a, const int * const ja, const int * const ia,
+	const bool numa
 );
 
 /**
@@ -317,7 +325,8 @@ sparse_err_t sparse_cg_init_nop_dii(
  */
 sparse_err_t sparse_cg_init_siz(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const float * const a, const int * const ja, const size_t * const ia
+	const float * const a, const int * const ja, const size_t * const ia,
+	const bool numa
 );
 
 /**
@@ -336,7 +345,8 @@ sparse_err_t sparse_cg_init_siz(
  */
 sparse_err_t sparse_cg_init_nop_siz(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const float * const a, const int * const ja, const size_t * const ia
+	const float * const a, const int * const ja, const size_t * const ia,
+	const bool numa
 );
 
 /**
@@ -350,7 +360,8 @@ sparse_err_t sparse_cg_init_nop_siz(
  */
 sparse_err_t sparse_cg_init_diz(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const double * const a, const int * const ja, const size_t * const ia
+	const double * const a, const int * const ja, const size_t * const ia,
+	const bool numa
 );
 
 /**
@@ -369,7 +380,8 @@ sparse_err_t sparse_cg_init_diz(
  */
 sparse_err_t sparse_cg_init_nop_diz(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const double * const a, const int * const ja, const size_t * const ia
+	const double * const a, const int * const ja, const size_t * const ia,
+	const bool numa
 );
 
 /**
@@ -383,7 +395,8 @@ sparse_err_t sparse_cg_init_nop_diz(
  */
 sparse_err_t sparse_cg_init_szz(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const float * const a, const size_t * const ja, const size_t * const ia
+	const float * const a, const size_t * const ja, const size_t * const ia,
+	const bool numa
 );
 
 /**
@@ -402,7 +415,8 @@ sparse_err_t sparse_cg_init_szz(
  */
 sparse_err_t sparse_cg_init_nop_szz(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const float * const a, const size_t * const ja, const size_t * const ia
+	const float * const a, const size_t * const ja, const size_t * const ia,
+	const bool numa
 );
 
 /**
@@ -416,7 +430,8 @@ sparse_err_t sparse_cg_init_nop_szz(
  */
 sparse_err_t sparse_cg_init_dzz(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const double * const a, const size_t * const ja, const size_t * const ia
+	const double * const a, const size_t * const ja, const size_t * const ia,
+	const bool numa
 );
 
 /**
@@ -435,7 +450,8 @@ sparse_err_t sparse_cg_init_dzz(
  */
 sparse_err_t sparse_cg_init_nop_dzz(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const double * const a, const size_t * const ja, const size_t * const ia
+	const double * const a, const size_t * const ja, const size_t * const ia,
+	const bool numa
 );
 
 // Note that szi and dzi are skipped on purpose. Such variants would not seem
diff --git a/src/transition/kml_iss.cpp b/src/transition/kml_iss.cpp
index 53a1b286b..d507587ea 100644
--- a/src/transition/kml_iss.cpp
+++ b/src/transition/kml_iss.cpp
@@ -148,9 +148,9 @@ int KML_CG_PREFIXED( InitSI )(
 	KmlSolverTask **handle, int n, const float *a,
 	const int *ja, const int *ia
 ) {
-	// negative numbers become positive when casted to size_t
+	// negative numbers become positive when cast to size_t
 	if( n <= 0 ) { return KMLSS_BAD_N; }
-	sparse_err_t err = sparse_cg_init_sii( handle, n, a, ja, ia );
+	sparse_err_t err = sparse_cg_init_sii( handle, n, a, ja, ia, true );
 	return sparse_err_t_2_int( err );
 }
 
@@ -160,7 +160,7 @@ int KML_CG_PREFIXED( InitDI )(
 ) {
 	// negative numbers become positive when casted to size_t
 	if( n <= 0 ) { return KMLSS_BAD_N; }
-	sparse_err_t err = sparse_cg_init_dii( handle, n, a, ja, ia );
+	sparse_err_t err = sparse_cg_init_dii( handle, n, a, ja, ia, true );
 	return sparse_err_t_2_int( err );
 }
 
diff --git a/src/transition/solver.cpp b/src/transition/solver.cpp
index f2b28ab01..a976bceb7 100644
--- a/src/transition/solver.cpp
+++ b/src/transition/solver.cpp
@@ -22,6 +22,7 @@
 #include <graphblas.hpp>
 #include <graphblas/algorithms/conjugate_gradient.hpp>
 
+#include <numa.h>
 #include <assert.h>
 
 #include "solver.h"
@@ -46,10 +47,37 @@ class CGWorkspaceDeleter {
 
 	private:
 
+		/** Whether we own the workspace data. */
+		const bool owning;
+
+		/**
+		 * Whether the workspace data was allocated NUMA-aware, and if so, what the
+		 * allocation size was.
+		 */
+		const size_t numa;
+
+
 	public:
 
-		void operator()( const void * ptr ) {
-			delete [] static_cast<const char*>(ptr);
+		CGWorkspaceDeleter() = delete;
+
+		CGWorkspaceDeleter( const bool &owning_in, const size_t &numa_in ) :
+			owning( owning_in ), numa( numa_in )
+		{}
+
+		void operator()( void * ptr ) {
+			assert( !( !owning && numa ) );
+			if( owning ) {
+#ifndef _GRB_NO_LIBNUMA
+				if( numa ) {
+					numa_free( ptr, numa );
+				} else {
+#endif
+					delete [] static_cast< char * >(ptr);
+#ifndef _GRB_NO_LIBNUMA
+				}
+#endif
+			}
 		}
 
 };
@@ -409,21 +437,35 @@ template< typename T, typename NZI, typename RSI >
 static sparse_err_t sparse_cg_init_impl_no_buffer(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const T * const a, const RSI * const ja, const NZI * const ia,
-	bool support_preconditioning
+	const bool support_preconditioning, const bool numa
 ) {
-	// we pass true here, since we want to support the entire solver transition
-	// path API out of the box
 	const size_t allocSize = CG_Data< T, NZI, RSI >::
 		workspaceSize( n, support_preconditioning );
 	void * buffer = nullptr;
-	try {
-		buffer = static_cast< void * >(new char[ allocSize ]);
-	} catch( ... ) {
-		std::cerr << "Error allocating workspace buffer\n";
-		return OUT_OF_MEMORY;
+#ifdef _GRB_NO_LIBNUMA
+	if( numa ) {
+		std::cerr << "Warning: solver transition path was requested to perform "
+			<< "NUMA-aware allocation, but ALP was compiled without libnuma.\n";
+	}
+#else
+	if( numa ) {
+		buffer = numa_alloc_interleaved( allocSize );
+		if( buffer == NULL ) { return OUT_OF_MEMORY; }
+	} else {
+#endif
+		try {
+			buffer = static_cast< void * >(new char[ allocSize ]);
+		} catch( ... ) {
+			std::cerr << "Error allocating workspace buffer\n";
+			return OUT_OF_MEMORY;
+		}
+#ifndef _GRB_NO_LIBNUMA
 	}
+#endif
 	const sparse_err_t rc = sparse_cg_init_impl(
-		handle, n, a, ja, ia, buffer, allocSize, CGWorkspaceDeleter() );
+		handle, n, a, ja, ia, buffer, allocSize,
+		CGWorkspaceDeleter( true, numa ? allocSize : 0 )
+	);
 	if( rc != NO_ERROR ) {
 		delete [] static_cast< char * >(buffer);
 	}
@@ -432,98 +474,110 @@ static sparse_err_t sparse_cg_init_impl_no_buffer(
 
 sparse_err_t sparse_cg_init_nop_sii(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const float * const a, const int * const ja, const int * const ia
+	const float * const a, const int * const ja, const int * const ia,
+	const bool numa
 ) {
 	return sparse_cg_init_impl_no_buffer< float, int, int >( handle, n, a, ja, ia,
-		false );
+		false, numa );
 }
 
 sparse_err_t sparse_cg_init_nop_dii(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const double * const a, const int * const ja, const int * const ia
+	const double * const a, const int * const ja, const int * const ia,
+	const bool numa
 ) {
 	return sparse_cg_init_impl_no_buffer< double, int, int >( handle, n, a, ja, ia,
-		false );
+		false, numa );
 }
 
 sparse_err_t sparse_cg_init_nop_siz(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const float * const a, const int * const ja, const size_t * const ia
+	const float * const a, const int * const ja, const size_t * const ia,
+	const bool numa
 ) {
 	return sparse_cg_init_impl_no_buffer< float, size_t, int >( handle, n, a, ja,
-		ia, false );
+		ia, false, numa );
 }
 
 sparse_err_t sparse_cg_init_nop_diz(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const double * const a, const int * const ja, const size_t * const ia
+	const double * const a, const int * const ja, const size_t * const ia,
+	const bool numa
 ) {
 	return sparse_cg_init_impl_no_buffer< double, size_t, int >( handle, n, a, ja,
-		ia, false );
+		ia, false, numa );
 }
 
 sparse_err_t sparse_cg_init_nop_szz(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const float * const a, const size_t * const ja, const size_t * const ia
+	const float * const a, const size_t * const ja, const size_t * const ia,
+	const bool numa
 ) {
 	return sparse_cg_init_impl_no_buffer< float, size_t, size_t >( handle, n, a, ja,
-		ia, false );
+		ia, false, numa );
 }
 
 sparse_err_t sparse_cg_init_nop_dzz(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const double * const a, const size_t * const ja, const size_t * const ia
+	const double * const a, const size_t * const ja, const size_t * const ia,
+	const bool numa
 ) {
 	return sparse_cg_init_impl_no_buffer< double, size_t, size_t >( handle, n, a, ja,
-		ia, false );
+		ia, false, numa );
 }
 
 sparse_err_t sparse_cg_init_sii(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const float * const a, const int * const ja, const int * const ia
+	const float * const a, const int * const ja, const int * const ia,
+	const bool numa
 ) {
 	return sparse_cg_init_impl_no_buffer< float, int, int >( handle, n, a, ja, ia,
-		true );
+		true, numa );
 }
 
 sparse_err_t sparse_cg_init_dii(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const double * const a, const int * const ja, const int * const ia
+	const double * const a, const int * const ja, const int * const ia,
+	const bool numa
 ) {
 	return sparse_cg_init_impl_no_buffer< double, int, int >( handle, n, a, ja, ia,
-		true );
+		true, numa );
 }
 
 sparse_err_t sparse_cg_init_siz(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const float * const a, const int * const ja, const size_t * const ia
+	const float * const a, const int * const ja, const size_t * const ia,
+	const bool numa
 ) {
 	return sparse_cg_init_impl_no_buffer< float, size_t, int >( handle, n, a, ja,
-		ia, true );
+		ia, true, numa );
 }
 
 sparse_err_t sparse_cg_init_diz(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const double * const a, const int * const ja, const size_t * const ia
+	const double * const a, const int * const ja, const size_t * const ia,
+	const bool numa
 ) {
 	return sparse_cg_init_impl_no_buffer< double, size_t, int >( handle, n, a, ja,
-		ia, true );
+		ia, true, numa );
 }
 
 sparse_err_t sparse_cg_init_szz(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const float * const a, const size_t * const ja, const size_t * const ia
+	const float * const a, const size_t * const ja, const size_t * const ia,
+	const bool numa
 ) {
 	return sparse_cg_init_impl_no_buffer< float, size_t, size_t >( handle, n, a, ja,
-		ia, true );
+		ia, true, numa );
 }
 
 sparse_err_t sparse_cg_init_dzz(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const double * const a, const size_t * const ja, const size_t * const ia
+	const double * const a, const size_t * const ja, const size_t * const ia,
+	const bool numa
 ) {
 	return sparse_cg_init_impl_no_buffer< double, size_t, size_t >( handle, n, a, ja,
-		ia, true );
+		ia, true, numa );
 }
 
 template< typename T, typename NZI, typename RSI >

From 8706b69208c7a7b1de289a4c7c0221476813aa27 Mon Sep 17 00:00:00 2001
From: "Albert-Jan N. Yzelman" <albertjan.yzelman@huawei.com>
Date: Tue, 15 Jul 2025 10:44:48 +0200
Subject: [PATCH 08/22] Test false sharing effects

---
 src/transition/solver.cpp | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/transition/solver.cpp b/src/transition/solver.cpp
index a976bceb7..314b67cff 100644
--- a/src/transition/solver.cpp
+++ b/src/transition/solver.cpp
@@ -237,7 +237,6 @@ class CG_Data {
 		 * @param[in] buffer_size The size of the given \a buffer. Used for sanity
 		 *                        checking the use of the buffer.
 		 *
-		 *
 		 * @param[in] buffer_deleter How the given \a buffer should be deleted on
 		 *                           destruction of this instance.
 		 *
@@ -261,6 +260,9 @@ class CG_Data {
 			assert( a != nullptr );
 			assert( ja != nullptr );
 			assert( ia != nullptr );
+			constexpr size_t align = (64 % sizeof(int) == 0)
+				?  64
+				: (64 + (sizeof(int) - (64 % sizeof(int))));
 			if( buffer_size < workspaceSize( n, false ) ) {
 				throw std::invalid_argument( "The given buffer size is too small" );
 			}
@@ -275,7 +277,7 @@ class CG_Data {
 			}
 			workspace_ptr += n * sizeof( T );
 			workspace_ptr +=
-				(sizeof(int) - (reinterpret_cast<uintptr_t>(workspace_ptr) % sizeof(int)));
+				(align - (reinterpret_cast<uintptr_t>(workspace_ptr) % align));
 			assert( static_cast< char * >(buffer) + buffer_size >= workspace_ptr + n );
 			{
 				T * const workspace_vector = reinterpret_cast< T * >(workspace_ptr);
@@ -284,7 +286,7 @@ class CG_Data {
 			}
 			workspace_ptr += n * sizeof( T );
 			workspace_ptr +=
-				(sizeof(int) - (reinterpret_cast<uintptr_t>(workspace_ptr) % sizeof(int)));
+				(align - (reinterpret_cast<uintptr_t>(workspace_ptr) % align));
 			assert( static_cast< char * >(buffer) + buffer_size >= workspace_ptr + n );
 			{
 				T * const workspace_vector = reinterpret_cast< T * >(workspace_ptr);
@@ -294,7 +296,7 @@ class CG_Data {
 			if( buffer_size >= workspaceSize( n, true ) ) {
 				workspace_ptr += n * sizeof( T );
 				workspace_ptr +=
-					(sizeof(int) - (reinterpret_cast<uintptr_t>(workspace_ptr) % sizeof(int)));
+					(align - (reinterpret_cast<uintptr_t>(workspace_ptr) % align));
 				assert( static_cast< char * >(buffer) + buffer_size >= workspace_ptr + n );
 				T * const workspace_vector = reinterpret_cast< T * >(workspace_ptr);
 				grb::Vector< T > tmp = grb::internal::wrapRawVector( n, workspace_vector );
@@ -440,7 +442,7 @@ static sparse_err_t sparse_cg_init_impl_no_buffer(
 	const bool support_preconditioning, const bool numa
 ) {
 	const size_t allocSize = CG_Data< T, NZI, RSI >::
-		workspaceSize( n, support_preconditioning );
+		workspaceSize( n, support_preconditioning ) + 256; // TODO hide this const
 	void * buffer = nullptr;
 #ifdef _GRB_NO_LIBNUMA
 	if( numa ) {

From a9224f26b2f025ecfed6b12d8339e76928d3ee73 Mon Sep 17 00:00:00 2001
From: "Albert-Jan N. Yzelman" <albertjan.yzelman@huawei.com>
Date: Wed, 23 Jul 2025 14:28:55 +0200
Subject: [PATCH 09/22] Going to take that last suggestion from the MR --
 modify header file accordingly

---
 include/transition/solver.h | 154 +++++++++++++-----------------------
 1 file changed, 56 insertions(+), 98 deletions(-)

diff --git a/include/transition/solver.h b/include/transition/solver.h
index e61628b1e..95e1a1743 100644
--- a/include/transition/solver.h
+++ b/include/transition/solver.h
@@ -230,10 +230,22 @@ typedef int (*sparse_cg_preconditioner_dxx_t) (
  * @param[in]  a      The nonzero values of the system matrix.
  * @param[in]  ja     The column indices of the nonzeroes of the system matrix.
  * @param[in]  ia     The row offset array of the system matrix.
- * @param[in]  numa   Whether the CG handle should employ more than one NUMA
- *                    domain, in which case any memory allocated during
- *                    construction of this handle will attempt to employ NUMA-
- *                    aware allocation.
+ * @param[in]  precon Whether the CG handle is expected to execute solves with
+ *                    preconditioning.
+ * @param[in]  numa   Whether the CG handle is expected to execute solves that
+ *                    employ more than one NUMA domain, in which case any memory
+ *                    allocated during construction of this handle may attempt
+ *                    to employ NUMA-aware allocation.
+ *
+ * The Boolean flags \a precon and \a numa indicated above are \em hints to the
+ * underlying ALP implementation.
+ *
+ * \note This means that even if \a precon was set to <tt>false</tt> during
+ *       handle creation, a user may still request preconditioning and expect
+ *       no error. Similarly, even if \a numa was set <tt>true</tt> during
+ *       handle creation, the underlying ALP implementation may still opt to
+ *       \em not employ NUMA-aware allocation, for example, if it was configured
+ *       without libnuma support.
  *
  * This variant is for single-precision floating point nonzeroes and integer
  * \a ja and \a ia arrays, as also indicated by the <tt>sii</tt> postfix.
@@ -254,30 +266,21 @@ typedef int (*sparse_cg_preconditioner_dxx_t) (
  *                            successfully. Only in this case shall \a handle
  *                            henceforth be a \em valid handle.
  */
-sparse_err_t sparse_cg_init_sii(
+sparse_err_t sparse_cg_init_opt_sii(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const float * const a, const int * const ja, const int * const ia,
-	const bool numa
+	const bool precon, const bool numa
 );
 
 /**
- * Variant of #sparse_cg_init_sii that results in a #sparse_cg_handle_t that
- * does not support preconditioned solves. Handles constructed using this call
- * require less memory.
- *
- * To have a handle returned by this function be used for a preconditioned solve
- * anyway, the handle must first be destroyed and then re-created using a call
- * to #sparse_cg_init_sii.
+ * Variant of #sparse_cg_init_sii that results in a #sparse_cg_handle_t with
+ * default hints (with preconditioning and with NUMA).
  *
- * \note Rationale: the transition path API opted to have this feature
- *       implemented via a separate constructor call to make the fact that
- *       resulting handles cannot be used for preconditioned solves as explicit
- *       as possible.
+ * @see sparse_cg_init_opt_sii for full details.
  */
-sparse_err_t sparse_cg_init_nop_sii(
+sparse_err_t sparse_cg_init_sii(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const float * const a, const int * const ja, const int * const ia,
-	const bool numa
+	const float * const a, const int * const ja, const int * const ia
 );
 
 /**
@@ -288,30 +291,21 @@ sparse_err_t sparse_cg_init_nop_sii(
  *
  * @see #sparse_cg_init_sii for full documentation.
  */
-sparse_err_t sparse_cg_init_dii(
+sparse_err_t sparse_cg_init_opt_dii(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const double * const a, const int * const ja, const int * const ia,
-	const bool numa
+	const bool precon, const bool numa
 );
 
 /**
- * Variant of #sparse_cg_init_dii that results in a #sparse_cg_handle_t that
- * does not support preconditioned solves. Handles constructed using this call
- * require less memory.
- *
- * To have a handle returned by this function be used for a preconditioned solve
- * anyway, the handle must first be destroyed and then re-created using a call
- * to #sparse_cg_init_dii.
+ * Variant of #sparse_cg_init_dii that results in a #sparse_cg_handle_t with
+ * default hints (with preconditioning and with NUMA).
  *
- * \note Rationale: the transition path API opted to have this feature
- *       implemented via a separate constructor call to make the fact that
- *       resulting handles cannot be used for preconditioned solves as explicit
- *       as possible.
+ * @see sparse_cg_init_opt_dii for full details.
  */
-sparse_err_t sparse_cg_init_nop_dii(
+sparse_err_t sparse_cg_init_dii(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const double * const a, const int * const ja, const int * const ia,
-	const bool numa
+	const double * const a, const int * const ja, const int * const ia
 );
 
 /**
@@ -323,30 +317,21 @@ sparse_err_t sparse_cg_init_nop_dii(
  *
  * @see #sparse_cg_init_sii for full documentation.
  */
-sparse_err_t sparse_cg_init_siz(
+sparse_err_t sparse_cg_init_opt_siz(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const float * const a, const int * const ja, const size_t * const ia,
-	const bool numa
+	const bool precon, const bool numa
 );
 
 /**
- * Variant of #sparse_cg_init_siz that results in a #sparse_cg_handle_t that
- * does not support preconditioned solves. Handles constructed using this call
- * require less memory.
+ * Variant of #sparse_cg_init_siz that results in a #sparse_cg_handle_t with
+ * default hints (with preconditioning and with NUMA).
  *
- * To have a handle returned by this function be used for a preconditioned solve
- * anyway, the handle must first be destroyed and then re-created using a call
- * to #sparse_cg_init_siz.
- *
- * \note Rationale: the transition path API opted to have this feature
- *       implemented via a separate constructor call to make the fact that
- *       resulting handles cannot be used for preconditioned solves as explicit
- *       as possible.
+ * @see sparse_cg_init_opt_siz for full details.
  */
-sparse_err_t sparse_cg_init_nop_siz(
+sparse_err_t sparse_cg_init_siz(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const float * const a, const int * const ja, const size_t * const ia,
-	const bool numa
+	const float * const a, const int * const ja, const size_t * const ia
 );
 
 /**
@@ -358,30 +343,21 @@ sparse_err_t sparse_cg_init_nop_siz(
  *
  * @see #sparse_cg_init_sii for full documentation.
  */
-sparse_err_t sparse_cg_init_diz(
+sparse_err_t sparse_cg_init_opt_diz(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const double * const a, const int * const ja, const size_t * const ia,
-	const bool numa
+	const bool precon, const bool numa
 );
 
 /**
- * Variant of #sparse_cg_init_diz that results in a #sparse_cg_handle_t that
- * does not support preconditioned solves. Handles constructed using this call
- * require less memory.
- *
- * To have a handle returned by this function be used for a preconditioned solve
- * anyway, the handle must first be destroyed and then re-created using a call
- * to #sparse_cg_init_diz.
+ * Variant of #sparse_cg_init_diz that results in a #sparse_cg_handle_t with
+ * default hints (with preconditioning and with NUMA).
  *
- * \note Rationale: the transition path API opted to have this feature
- *       implemented via a separate constructor call to make the fact that
- *       resulting handles cannot be used for preconditioned solves as explicit
- *       as possible.
+ * @see sparse_cg_init_opt_diz for full details.
  */
-sparse_err_t sparse_cg_init_nop_diz(
+sparse_err_t sparse_cg_init_diz(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const double * const a, const int * const ja, const size_t * const ia,
-	const bool numa
+	const double * const a, const int * const ja, const size_t * const ia
 );
 
 /**
@@ -396,27 +372,18 @@ sparse_err_t sparse_cg_init_nop_diz(
 sparse_err_t sparse_cg_init_szz(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const float * const a, const size_t * const ja, const size_t * const ia,
-	const bool numa
+	const bool precon, const bool numa
 );
 
 /**
- * Variant of #sparse_cg_init_szz that results in a #sparse_cg_handle_t that
- * does not support preconditioned solves. Handles constructed using this call
- * require less memory.
+ * Variant of #sparse_cg_init_szz that results in a #sparse_cg_handle_t with
+ * default hints (with preconditioning and with NUMA).
  *
- * To have a handle returned by this function be used for a preconditioned solve
- * anyway, the handle must first be destroyed and then re-created using a call
- * to #sparse_cg_init_szz.
- *
- * \note Rationale: the transition path API opted to have this feature
- *       implemented via a separate constructor call to make the fact that
- *       resulting handles cannot be used for preconditioned solves as explicit
- *       as possible.
+ * @see sparse_cg_init_opt_szz for full details.
  */
-sparse_err_t sparse_cg_init_nop_szz(
+sparse_err_t sparse_cg_init_szz(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const float * const a, const size_t * const ja, const size_t * const ia,
-	const bool numa
+	const float * const a, const size_t * const ja, const size_t * const ia
 );
 
 /**
@@ -428,30 +395,21 @@ sparse_err_t sparse_cg_init_nop_szz(
  *
  * @see #sparse_cg_init_sii for full documentation.
  */
-sparse_err_t sparse_cg_init_dzz(
+sparse_err_t sparse_cg_init_opt_dzz(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const double * const a, const size_t * const ja, const size_t * const ia,
-	const bool numa
+	const bool precon, const bool numa
 );
 
 /**
- * Variant of #sparse_cg_init_dzz that results in a #sparse_cg_handle_t that
- * does not support preconditioned solves. Handles constructed using this call
- * require less memory.
+ * Variant of #sparse_cg_init_dzz that results in a #sparse_cg_handle_t with
+ * default hints (with preconditioning and with NUMA).
  *
- * To have a handle returned by this function be used for a preconditioned solve
- * anyway, the handle must first be destroyed and then re-created using a call
- * to #sparse_cg_init_dzz.
- *
- * \note Rationale: the transition path API opted to have this feature
- *       implemented via a separate constructor call to make the fact that
- *       resulting handles cannot be used for preconditioned solves as explicit
- *       as possible.
+ * @see sparse_cg_init_opt_dzz for full details.
  */
 sparse_err_t sparse_cg_init_nop_dzz(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const double * const a, const size_t * const ja, const size_t * const ia,
-	const bool numa
+	const double * const a, const size_t * const ja, const size_t * const ia
 );
 
 // Note that szi and dzi are skipped on purpose. Such variants would not seem

From b7caeb3adf0b1430482160b8f3e06eff1f7529d8 Mon Sep 17 00:00:00 2001
From: "Albert-Jan N. Yzelman" <albertjan.yzelman@huawei.com>
Date: Wed, 23 Jul 2025 14:54:28 +0200
Subject: [PATCH 10/22] Corresponding cpp changes to previous commit

---
 src/transition/solver.cpp | 66 ++++++++++++++++++---------------------
 1 file changed, 30 insertions(+), 36 deletions(-)

diff --git a/src/transition/solver.cpp b/src/transition/solver.cpp
index 314b67cff..c1441138b 100644
--- a/src/transition/solver.cpp
+++ b/src/transition/solver.cpp
@@ -474,112 +474,106 @@ static sparse_err_t sparse_cg_init_impl_no_buffer(
 	return rc;
 }
 
-sparse_err_t sparse_cg_init_nop_sii(
+sparse_err_t sparse_cg_init_opt_sii(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const float * const a, const int * const ja, const int * const ia,
-	const bool numa
+	const bool precond, const bool numa
 ) {
 	return sparse_cg_init_impl_no_buffer< float, int, int >( handle, n, a, ja, ia,
-		false, numa );
+		precond, numa );
 }
 
-sparse_err_t sparse_cg_init_nop_dii(
+sparse_err_t sparse_cg_init_opt_dii(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const double * const a, const int * const ja, const int * const ia,
-	const bool numa
+	const bool precond, const bool numa
 ) {
 	return sparse_cg_init_impl_no_buffer< double, int, int >( handle, n, a, ja, ia,
-		false, numa );
+		precond, numa );
 }
 
-sparse_err_t sparse_cg_init_nop_siz(
+sparse_err_t sparse_cg_init_opt_siz(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const float * const a, const int * const ja, const size_t * const ia,
-	const bool numa
+	const bool precond, const bool numa
 ) {
 	return sparse_cg_init_impl_no_buffer< float, size_t, int >( handle, n, a, ja,
-		ia, false, numa );
+		ia, precond, numa );
 }
 
-sparse_err_t sparse_cg_init_nop_diz(
+sparse_err_t sparse_cg_init_opt_diz(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const double * const a, const int * const ja, const size_t * const ia,
-	const bool numa
+	const bool precond, const bool numa
 ) {
 	return sparse_cg_init_impl_no_buffer< double, size_t, int >( handle, n, a, ja,
-		ia, false, numa );
+		ia, precond, numa );
 }
 
-sparse_err_t sparse_cg_init_nop_szz(
+sparse_err_t sparse_cg_init_opt_szz(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const float * const a, const size_t * const ja, const size_t * const ia,
-	const bool numa
+	const bool precond, const bool numa
 ) {
 	return sparse_cg_init_impl_no_buffer< float, size_t, size_t >( handle, n, a, ja,
-		ia, false, numa );
+		ia, precond, numa );
 }
 
-sparse_err_t sparse_cg_init_nop_dzz(
+sparse_err_t sparse_cg_init_opt_dzz(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const double * const a, const size_t * const ja, const size_t * const ia,
-	const bool numa
+	const bool precond, const bool numa
 ) {
 	return sparse_cg_init_impl_no_buffer< double, size_t, size_t >( handle, n, a, ja,
-		ia, false, numa );
+		ia, precond, numa );
 }
 
 sparse_err_t sparse_cg_init_sii(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const float * const a, const int * const ja, const int * const ia,
-	const bool numa
+	const float * const a, const int * const ja, const int * const ia
 ) {
 	return sparse_cg_init_impl_no_buffer< float, int, int >( handle, n, a, ja, ia,
-		true, numa );
+		true, true );
 }
 
 sparse_err_t sparse_cg_init_dii(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const double * const a, const int * const ja, const int * const ia,
-	const bool numa
+	const double * const a, const int * const ja, const int * const ia
 ) {
 	return sparse_cg_init_impl_no_buffer< double, int, int >( handle, n, a, ja, ia,
-		true, numa );
+		true, true );
 }
 
 sparse_err_t sparse_cg_init_siz(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const float * const a, const int * const ja, const size_t * const ia,
-	const bool numa
+	const float * const a, const int * const ja, const size_t * const ia
 ) {
 	return sparse_cg_init_impl_no_buffer< float, size_t, int >( handle, n, a, ja,
-		ia, true, numa );
+		ia, true, true );
 }
 
 sparse_err_t sparse_cg_init_diz(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const double * const a, const int * const ja, const size_t * const ia,
-	const bool numa
+	const double * const a, const int * const ja, const size_t * const ia
 ) {
 	return sparse_cg_init_impl_no_buffer< double, size_t, int >( handle, n, a, ja,
-		ia, true, numa );
+		ia, true, true );
 }
 
 sparse_err_t sparse_cg_init_szz(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const float * const a, const size_t * const ja, const size_t * const ia,
-	const bool numa
+	const float * const a, const size_t * const ja, const size_t * const ia
 ) {
 	return sparse_cg_init_impl_no_buffer< float, size_t, size_t >( handle, n, a, ja,
-		ia, true, numa );
+		ia, true, true );
 }
 
 sparse_err_t sparse_cg_init_dzz(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const double * const a, const size_t * const ja, const size_t * const ia,
-	const bool numa
+	const double * const a, const size_t * const ja, const size_t * const ia
 ) {
 	return sparse_cg_init_impl_no_buffer< double, size_t, size_t >( handle, n, a, ja,
-		ia, true, numa );
+		ia, true, true );
 }
 
 template< typename T, typename NZI, typename RSI >

From c3efc055b1e00244e31de768f5ab57af17f9e81e Mon Sep 17 00:00:00 2001
From: "Albert-Jan N. Yzelman" <albertjan.yzelman@huawei.com>
Date: Wed, 23 Jul 2025 14:55:43 +0200
Subject: [PATCH 11/22] Bugfix

---
 include/transition/solver.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/transition/solver.h b/include/transition/solver.h
index 95e1a1743..1291005f4 100644
--- a/include/transition/solver.h
+++ b/include/transition/solver.h
@@ -369,7 +369,7 @@ sparse_err_t sparse_cg_init_diz(
  *
  * @see #sparse_cg_init_sii for full documentation.
  */
-sparse_err_t sparse_cg_init_szz(
+sparse_err_t sparse_cg_init_opt_szz(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const float * const a, const size_t * const ja, const size_t * const ia,
 	const bool precon, const bool numa

From e9f9e4afb6a9ec4cfbf9350d2dd3ffd08e5d2031 Mon Sep 17 00:00:00 2001
From: "Albert-Jan N. Yzelman" <albertjan.yzelman@huawei.com>
Date: Wed, 23 Jul 2025 14:56:23 +0200
Subject: [PATCH 12/22] Update KML_Solver with those changes

---
 src/transition/kml_iss.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/transition/kml_iss.cpp b/src/transition/kml_iss.cpp
index d507587ea..b348b4d85 100644
--- a/src/transition/kml_iss.cpp
+++ b/src/transition/kml_iss.cpp
@@ -150,7 +150,7 @@ int KML_CG_PREFIXED( InitSI )(
 ) {
 	// negative numbers become positive when cast to size_t
 	if( n <= 0 ) { return KMLSS_BAD_N; }
-	sparse_err_t err = sparse_cg_init_sii( handle, n, a, ja, ia, true );
+	sparse_err_t err = sparse_cg_init_sii( handle, n, a, ja, ia );
 	return sparse_err_t_2_int( err );
 }
 
@@ -160,7 +160,7 @@ int KML_CG_PREFIXED( InitDI )(
 ) {
 	// negative numbers become positive when casted to size_t
 	if( n <= 0 ) { return KMLSS_BAD_N; }
-	sparse_err_t err = sparse_cg_init_dii( handle, n, a, ja, ia, true );
+	sparse_err_t err = sparse_cg_init_dii( handle, n, a, ja, ia );
 	return sparse_err_t_2_int( err );
 }
 

From 612cc5b5e3f4ed33423b22784af01465a33f9eae Mon Sep 17 00:00:00 2001
From: "Albert-Jan N. Yzelman" <albertjan.yzelman@huawei.com>
Date: Wed, 23 Jul 2025 15:02:26 +0200
Subject: [PATCH 13/22] Restore default behaviour

---
 src/transition/solver.cpp | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/transition/solver.cpp b/src/transition/solver.cpp
index c1441138b..b72a64554 100644
--- a/src/transition/solver.cpp
+++ b/src/transition/solver.cpp
@@ -367,12 +367,20 @@ class CG_Data {
 		 *         allocation.
 		 */
 		void setPreconditioner( const preconditioner_t in, void * const data ) {
-			if( grb::size( precond_workspace ) == 0 ) {
-				throw std::logic_error( "CG handle has no preconditioned solve support" );
-			}
 			preconditioner = in;
 			preconditioner_data = data;
 			assert( !( !preconditioner && preconditioner_data ) );
+			if( grb::size( precond_workspace ) == 0 ) {
+				// note -- using the ALP default vector allocation here is the only
+				// convenient option: otherwise has to implement buffer management
+				// for this vector only. If it is important that this vector be allocated
+				// without a SPA, then the user should set the precond hint to true.
+				grb::Vector< T > replace( size );
+				std::swap( replace, precond_workspace );
+				std::cerr << "Warning: allocating additional workspace to handle CG solves "
+					<< "with preconditioning. To prevent this on-demand allocation, set the "
+					<< "precond option during CG solver handle creation to true.\n";
+			}
 			assert( grb::size( precond_workspace ) == size );
 		}
 
@@ -886,9 +894,6 @@ static sparse_err_t sparse_cg_set_preconditioner_impl(
 	try {
 		static_cast< CG_Data< T, NZI, RSI > * >( handle )->
 			setPreconditioner( c_precond_p, c_precond_data_p );
-	} catch( std::logic_error &e ) {
-		std::cerr << e.what() << "\n";
-		return ILLEGAL_METHOD;
 	} catch( std::exception &e ) {
 		std::cerr << e.what() << "\n";
 		std::cerr << "This is an unexpected error; please submit a bug report\n";

From e3436449c35ce745bcf1a55a0a9dfdd482cfae37 Mon Sep 17 00:00:00 2001
From: "Albert-Jan N. Yzelman" <albertjan.yzelman@huawei.com>
Date: Wed, 23 Jul 2025 15:02:55 +0200
Subject: [PATCH 14/22] Remove now-again unnecesary error code

---
 include/transition/solver.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/include/transition/solver.h b/include/transition/solver.h
index 1291005f4..a45ba8aa1 100644
--- a/include/transition/solver.h
+++ b/include/transition/solver.h
@@ -132,11 +132,6 @@ typedef enum {
 	 */
 	ILLEGAL_ARGUMENT,
 
-	/**
-	 * Illegal method was called.
-	 */
-	ILLEGAL_METHOD,
-
 	/**
 	 * Out of memory error detected during call.
 	 */

From cf33c8a5c8bc071ba93d95bb6a45e28bf4cbc555 Mon Sep 17 00:00:00 2001
From: "Albert-Jan N. Yzelman" <albertjan.yzelman@huawei.com>
Date: Wed, 23 Jul 2025 15:03:53 +0200
Subject: [PATCH 15/22] Bring KML_Solver impl up to date with previous change

---
 src/transition/kml_iss.cpp | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/src/transition/kml_iss.cpp b/src/transition/kml_iss.cpp
index b348b4d85..5c0fa2d0e 100644
--- a/src/transition/kml_iss.cpp
+++ b/src/transition/kml_iss.cpp
@@ -116,14 +116,6 @@ static int sparse_err_t_2_int( const sparse_err_t err ) {
 			result = KMLSS_BAD_N;
 			break;
 		}
-		case ILLEGAL_METHOD:
-		{
-			// there is no case where this error should be exposed to a KML API user;
-			// rather, if encountered, it indicates an error in translating the KML API
-			// to the solver transition path
-			result = KMLSS_INTERNAL_ERROR;
-			break;
-		}
 		case OUT_OF_MEMORY:
 		{
 			result = KMLSS_NO_MEMORY;

From 2ff5b7097b4d4a04e0a10e0b38e79890ba375493 Mon Sep 17 00:00:00 2001
From: "Albert-Jan N. Yzelman" <albertjan.yzelman@huawei.com>
Date: Wed, 23 Jul 2025 18:35:42 +0200
Subject: [PATCH 16/22] Specify constructor variants that take a user-supplied
 buffer instead of allocating the required workspace automatically

---
 include/transition/solver.h | 177 ++++++++++++++++++++++++++++++++++--
 1 file changed, 170 insertions(+), 7 deletions(-)

diff --git a/include/transition/solver.h b/include/transition/solver.h
index a45ba8aa1..3bc75a636 100644
--- a/include/transition/solver.h
+++ b/include/transition/solver.h
@@ -260,6 +260,13 @@ typedef int (*sparse_cg_preconditioner_dxx_t) (
  * @returns #NO_ERROR         If initialisation of the handle proceeded
  *                            successfully. Only in this case shall \a handle
  *                            henceforth be a \em valid handle.
+ *
+ * On returning #NO_ERROR, \a handle shall correspond to an initialised (P)CG
+ * solver instance. Note that this handle contains workspace memory that are
+ * necessary for auxiliary vectors required by the CG algorithm. When
+ * preconditioning is employed, four such workspace vectors of size \a n are
+ * required; otherwise, only three such vectors are required. This memory will
+ * be freed on a call to #sparse_cg_destroy_sii.
  */
 sparse_err_t sparse_cg_init_opt_sii(
 	sparse_cg_handle_t * const handle, const size_t n,
@@ -268,7 +275,7 @@ sparse_err_t sparse_cg_init_opt_sii(
 );
 
 /**
- * Variant of #sparse_cg_init_sii that results in a #sparse_cg_handle_t with
+ * Variant of #sparse_cg_init_opt_sii that results in a #sparse_cg_handle_t with
  * default hints (with preconditioning and with NUMA).
  *
  * @see sparse_cg_init_opt_sii for full details.
@@ -278,6 +285,56 @@ sparse_err_t sparse_cg_init_sii(
 	const float * const a, const int * const ja, const int * const ia
 );
 
+/**
+ * Variant of #sparse_cg_init_opt_sii that does not allocate workspace, but
+ * instead employs given user memory as its workspace memory.
+ *
+ * This variant of initialisation guarantees no additional dynamic memory
+ * allocation will be performed. A call to #sparse_cg_destroy_sii will likewise
+ * \em not free up the given \a workspace memory.
+ *
+ * @param[in,out] workspace The user-supplied memory that will be used as the
+ *                          work space for the returned PCG solver \a handle.
+ *
+ * Previous contents of the given \a workspace will be ignored, and, when given
+ * to this function, should be considered lost. On a successful call, the given
+ * memory should be considered to be under the sole custody of the PCG solver
+ * \a handle until the time the handle is destroyed; at that point, the
+ * ownership of the given memory will be returned to the callee. If the call to
+ * this function is not successful, the ownership of the memory region returns
+ * to the callee immediately. For as long as ownership of the memory region is
+ * not with the callee, the contents of the memory region are undefined.
+ *
+ * @param[in] workspace_size The size of the memory pointed to by \a workspace.
+ *
+ * The given workspace must have byte size greater than or equal to the value
+ * returned by a call to #sparse_cg_workspace_size_s.
+ *
+ * \note For variants that use double-precision values, the workspace must have
+ *       byte size greater than or equal to the value returned by a call to
+ *       #sparse_cg_workspace_size_d.
+ *
+ * \note Note that the hints that can be supplied to #sparse_cg_init_opt_sii
+ *       affect its internal workspace allocation mechanisms. Therefore, those
+ *       hints do not apply to this PCG solver initialisation variant.
+ *
+ * In addition to the possible return codes defined by #sparse_cg_init_opt_sii,
+ * a call to this variant may furthermore return:
+ *
+ * @returns #NULL_ARGUMENT    If \a workspace equals <tt>NULL</tt>.
+ * @returns #ILLEGAL_ARGUMENT If \a workspace_size is smaller than required.
+ *
+ * Different from the specification of #sparse_cg_init_opt_sii, this variant
+ * may never return #OUT_OF_MEMORY.
+ *
+ * For further details, see #sparse_cg_init_opt_sii.
+ */
+sparse_err_t sparse_cg_manual_init_sii(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const float * const a, const int * const ja, const int * const ia,
+	const void * workspace, const size_t workspace_size
+);
+
 /**
  * Initialises a #sparse_cg_handle_t object.
  *
@@ -293,7 +350,7 @@ sparse_err_t sparse_cg_init_opt_dii(
 );
 
 /**
- * Variant of #sparse_cg_init_dii that results in a #sparse_cg_handle_t with
+ * Variant of #sparse_cg_init_opt_dii that results in a #sparse_cg_handle_t with
  * default hints (with preconditioning and with NUMA).
  *
  * @see sparse_cg_init_opt_dii for full details.
@@ -303,6 +360,18 @@ sparse_err_t sparse_cg_init_dii(
 	const double * const a, const int * const ja, const int * const ia
 );
 
+/**
+ * Variant of #sparse_cg_init_opt_dii that does not allocate workspace, but
+ * instead employs given user memory as its workspace memory.
+ *
+ * @see #sparse_cg_manual_init_sii for full details.
+ */
+sparse_err_t sparse_cg_manual_init_dii(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const float * const a, const int * const ja, const int * const ia,
+	const void * workspace, const size_t workspace_size
+);
+
 /**
  * Initialises a #sparse_cg_handle_t object.
  *
@@ -319,7 +388,7 @@ sparse_err_t sparse_cg_init_opt_siz(
 );
 
 /**
- * Variant of #sparse_cg_init_siz that results in a #sparse_cg_handle_t with
+ * Variant of #sparse_cg_init_opt_siz that results in a #sparse_cg_handle_t with
  * default hints (with preconditioning and with NUMA).
  *
  * @see sparse_cg_init_opt_siz for full details.
@@ -329,6 +398,18 @@ sparse_err_t sparse_cg_init_siz(
 	const float * const a, const int * const ja, const size_t * const ia
 );
 
+/**
+ * Variant of #sparse_cg_init_opt_siz that does not allocate workspace, but
+ * instead employs given user memory as its workspace memory.
+ *
+ * @see #sparse_cg_manual_init_sii for full details.
+ */
+sparse_err_t sparse_cg_manual_init_siz(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const float * const a, const int * const ja, const int * const ia,
+	const void * workspace, const size_t workspace_size
+);
+
 /**
  * Initialises a #sparse_cg_handle_t object.
  *
@@ -345,7 +426,7 @@ sparse_err_t sparse_cg_init_opt_diz(
 );
 
 /**
- * Variant of #sparse_cg_init_diz that results in a #sparse_cg_handle_t with
+ * Variant of #sparse_cg_init_opt_diz that results in a #sparse_cg_handle_t with
  * default hints (with preconditioning and with NUMA).
  *
  * @see sparse_cg_init_opt_diz for full details.
@@ -355,6 +436,18 @@ sparse_err_t sparse_cg_init_diz(
 	const double * const a, const int * const ja, const size_t * const ia
 );
 
+/**
+ * Variant of #sparse_cg_init_opt_siz that does not allocate workspace, but
+ * instead employs given user memory as its workspace memory.
+ *
+ * @see #sparse_cg_manual_init_sii for full details.
+ */
+sparse_err_t sparse_cg_manual_init_diz(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const float * const a, const int * const ja, const int * const ia,
+	const void * workspace, const size_t workspace_size
+);
+
 /**
  * Initialises a #sparse_cg_handle_t object.
  *
@@ -371,7 +464,7 @@ sparse_err_t sparse_cg_init_opt_szz(
 );
 
 /**
- * Variant of #sparse_cg_init_szz that results in a #sparse_cg_handle_t with
+ * Variant of #sparse_cg_init_opt_szz that results in a #sparse_cg_handle_t with
  * default hints (with preconditioning and with NUMA).
  *
  * @see sparse_cg_init_opt_szz for full details.
@@ -381,6 +474,18 @@ sparse_err_t sparse_cg_init_szz(
 	const float * const a, const size_t * const ja, const size_t * const ia
 );
 
+/**
+ * Variant of #sparse_cg_init_opt_siz that does not allocate workspace, but
+ * instead employs given user memory as its workspace memory.
+ *
+ * @see #sparse_cg_manual_init_sii for full details.
+ */
+sparse_err_t sparse_cg_manual_init_szz(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const float * const a, const int * const ja, const int * const ia,
+	const void * workspace, const size_t workspace_size
+);
+
 /**
  * Initialises a #sparse_cg_handle_t object.
  *
@@ -397,16 +502,74 @@ sparse_err_t sparse_cg_init_opt_dzz(
 );
 
 /**
- * Variant of #sparse_cg_init_dzz that results in a #sparse_cg_handle_t with
+ * Variant of #sparse_cg_init_opt_dzz that results in a #sparse_cg_handle_t with
  * default hints (with preconditioning and with NUMA).
  *
  * @see sparse_cg_init_opt_dzz for full details.
  */
-sparse_err_t sparse_cg_init_nop_dzz(
+sparse_err_t sparse_cg_init_dzz(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const double * const a, const size_t * const ja, const size_t * const ia
 );
 
+/**
+ * Variant of #sparse_cg_init_opt_siz that does not allocate workspace, but
+ * instead employs given user memory as its workspace memory.
+ *
+ * @see #sparse_cg_manual_init_sii for full details.
+ */
+sparse_err_t sparse_cg_manual_init_dzz(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const float * const a, const int * const ja, const int * const ia,
+	const void * workspace, const size_t workspace_size
+);
+
+/**
+ * Returns the minimum workspace size required when manually supplying a
+ * workspace for PCG solvers to initialise with.
+ *
+ * Those so-called manual initialisation functions are the following:
+ *  - #sparse_cg_manual_init_sii,
+ *  - #sparse_cg_manual_init_siz, and
+ *  - #sparse_cg_manual_init_szz.
+ *
+ * @param[in] precon Whether the solver is expected to use a preconditioner.
+ *
+ * \note If preconditioning is required, the solver requires a larger work
+ *       space. Note, however, that even if not enough workspace is supplied
+ *       for a preconditioned solve, nonetheless requesting a preconditioned
+ *       solve will still execute -- it will then dynamically allocate memory
+ *       on the fly.
+ *
+ * A call to this function never fails.
+ *
+ * @returns The workspace size in bytes.
+ */
+size_t sparse_cg_workspace_size_s( const bool precon );
+
+/**
+ * Returns the minimum workspace size required when manually supplying a
+ * workspace for PCG solvers to initialise with.
+ *
+ * Those so-called manual initialisation functions are the following:
+ *  - #sparse_cg_manual_init_dii,
+ *  - #sparse_cg_manual_init_diz, and
+ *  - #sparse_cg_manual_init_dzz.
+ *
+ * @param[in] precon Whether the solver is expected to use a preconditioner.
+ *
+ * \note If preconditioning is required, the solver requires a larger work
+ *       space. Note, however, that even if not enough workspace is supplied
+ *       for a preconditioned solve, nonetheless requesting a preconditioned
+ *       solve will still execute -- it will then dynamically allocate memory
+ *       on the fly.
+ *
+ * A call to this function never fails.
+ *
+ * @returns The workspace size in bytes.
+ */
+size_t sparse_cg_workspace_size_d( const bool precon );
+
 // Note that szi and dzi are skipped on purpose. Such variants would not seem
 // sensible, though could easily be provided if they do turn out to be needed
 

From 1ad71e3c4a9c889fcf0c0b14582959f98e6e810b Mon Sep 17 00:00:00 2001
From: "Albert-Jan N. Yzelman" <albertjan.yzelman@huawei.com>
Date: Wed, 23 Jul 2025 18:47:29 +0200
Subject: [PATCH 17/22] Implement manual initialisation API

---
 include/transition/solver.h | 12 ++++----
 src/transition/solver.cpp   | 56 +++++++++++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+), 6 deletions(-)

diff --git a/include/transition/solver.h b/include/transition/solver.h
index 3bc75a636..968054f94 100644
--- a/include/transition/solver.h
+++ b/include/transition/solver.h
@@ -332,7 +332,7 @@ sparse_err_t sparse_cg_init_sii(
 sparse_err_t sparse_cg_manual_init_sii(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const float * const a, const int * const ja, const int * const ia,
-	const void * workspace, const size_t workspace_size
+	void * workspace, const size_t workspace_size
 );
 
 /**
@@ -369,7 +369,7 @@ sparse_err_t sparse_cg_init_dii(
 sparse_err_t sparse_cg_manual_init_dii(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const float * const a, const int * const ja, const int * const ia,
-	const void * workspace, const size_t workspace_size
+	void * workspace, const size_t workspace_size
 );
 
 /**
@@ -407,7 +407,7 @@ sparse_err_t sparse_cg_init_siz(
 sparse_err_t sparse_cg_manual_init_siz(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const float * const a, const int * const ja, const int * const ia,
-	const void * workspace, const size_t workspace_size
+	void * workspace, const size_t workspace_size
 );
 
 /**
@@ -445,7 +445,7 @@ sparse_err_t sparse_cg_init_diz(
 sparse_err_t sparse_cg_manual_init_diz(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const float * const a, const int * const ja, const int * const ia,
-	const void * workspace, const size_t workspace_size
+	void * workspace, const size_t workspace_size
 );
 
 /**
@@ -483,7 +483,7 @@ sparse_err_t sparse_cg_init_szz(
 sparse_err_t sparse_cg_manual_init_szz(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const float * const a, const int * const ja, const int * const ia,
-	const void * workspace, const size_t workspace_size
+	void * workspace, const size_t workspace_size
 );
 
 /**
@@ -521,7 +521,7 @@ sparse_err_t sparse_cg_init_dzz(
 sparse_err_t sparse_cg_manual_init_dzz(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const float * const a, const int * const ja, const int * const ia,
-	const void * workspace, const size_t workspace_size
+	void * workspace, const size_t workspace_size
 );
 
 /**
diff --git a/src/transition/solver.cpp b/src/transition/solver.cpp
index b72a64554..9b1fc9860 100644
--- a/src/transition/solver.cpp
+++ b/src/transition/solver.cpp
@@ -431,6 +431,8 @@ static sparse_err_t sparse_cg_init_impl(
 	if( handle == nullptr || a == nullptr || ja == nullptr || ia == nullptr ) {
 		return NULL_ARGUMENT;
 	}
+	if( buffer == nullptr ) { return NULL_ARGUMENT; }
+	// TODO bounds-check bufferSize
 	try {
 		*handle = static_cast< void * >( new CG_Data< T, NZI, RSI >(
 			n, a, ja, ia, buffer, bufferSize, deleter ) );
@@ -482,6 +484,60 @@ static sparse_err_t sparse_cg_init_impl_no_buffer(
 	return rc;
 }
 
+sparse_err_t sparse_cg_manual_init_sii(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const float * const a, const int * const ja, const int * const ia,
+	void * workspace, const size_t workspace_size
+) {
+	return sparse_cg_init_impl< float, int, int >( handle, n, a, ja, ia,
+		workspace, workspace_size, CGWorkspaceDeleter( false, false ) );
+}
+
+sparse_err_t sparse_cg_manual_init_dii(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const double * const a, const int * const ja, const int * const ia,
+	void * workspace, const size_t workspace_size
+) {
+	return sparse_cg_init_impl< double, int, int >( handle, n, a, ja, ia,
+		workspace, workspace_size, CGWorkspaceDeleter( false, false ) );
+}
+
+sparse_err_t sparse_cg_manual_init_siz(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const float * const a, const int * const ja, const size_t * const ia,
+	void * workspace, const size_t workspace_size
+) {
+	return sparse_cg_init_impl< float, size_t, int >( handle, n, a, ja, ia,
+		workspace, workspace_size, CGWorkspaceDeleter( false, false ) );
+}
+
+sparse_err_t sparse_cg_manual_init_diz(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const double * const a, const int * const ja, const size_t * const ia,
+	void * workspace, const size_t workspace_size
+) {
+	return sparse_cg_init_impl< double, size_t, int >( handle, n, a, ja, ia,
+		workspace, workspace_size, CGWorkspaceDeleter( false, false ) );
+}
+
+sparse_err_t sparse_cg_manual_init_szz(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const float * const a, const size_t * const ja, const size_t * const ia,
+	void * workspace, const size_t workspace_size
+) {
+	return sparse_cg_init_impl< float, size_t, size_t >( handle, n, a, ja, ia,
+		workspace, workspace_size, CGWorkspaceDeleter( false, false ) );
+}
+
+sparse_err_t sparse_cg_manual_init_dzz(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const double * const a, const size_t * const ja, const size_t * const ia,
+	void * workspace, const size_t workspace_size
+) {
+	return sparse_cg_init_impl< double, size_t, size_t >( handle, n, a, ja, ia,
+		workspace, workspace_size, CGWorkspaceDeleter( false, false ) );
+}
+
 sparse_err_t sparse_cg_init_opt_sii(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const float * const a, const int * const ja, const int * const ia,

From 5e31461b8b750d2ae7f4fe7d87f782cae128aad2 Mon Sep 17 00:00:00 2001
From: "Albert-Jan N. Yzelman" <albertjan.yzelman@huawei.com>
Date: Wed, 23 Jul 2025 18:57:50 +0200
Subject: [PATCH 18/22] Spec-fix workspace size computation function, and
 implement it

---
 include/transition/solver.h |  8 ++++++--
 src/transition/solver.cpp   | 23 +++++++++++++++++------
 2 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/include/transition/solver.h b/include/transition/solver.h
index 968054f94..dc12ec59a 100644
--- a/include/transition/solver.h
+++ b/include/transition/solver.h
@@ -533,6 +533,8 @@ sparse_err_t sparse_cg_manual_init_dzz(
  *  - #sparse_cg_manual_init_siz, and
  *  - #sparse_cg_manual_init_szz.
  *
+ * @param[in] n      The maximum system size the solver will be initialised
+ *                   with.
  * @param[in] precon Whether the solver is expected to use a preconditioner.
  *
  * \note If preconditioning is required, the solver requires a larger work
@@ -545,7 +547,7 @@ sparse_err_t sparse_cg_manual_init_dzz(
  *
  * @returns The workspace size in bytes.
  */
-size_t sparse_cg_workspace_size_s( const bool precon );
+size_t sparse_cg_workspace_size_s( const size_t n, const bool precon );
 
 /**
  * Returns the minimum workspace size required when manually supplying a
@@ -556,6 +558,8 @@ size_t sparse_cg_workspace_size_s( const bool precon );
  *  - #sparse_cg_manual_init_diz, and
  *  - #sparse_cg_manual_init_dzz.
  *
+ * @param[in] n      The maximum system size the solver will be initialised
+ *                   with.
  * @param[in] precon Whether the solver is expected to use a preconditioner.
  *
  * \note If preconditioning is required, the solver requires a larger work
@@ -568,7 +572,7 @@ size_t sparse_cg_workspace_size_s( const bool precon );
  *
  * @returns The workspace size in bytes.
  */
-size_t sparse_cg_workspace_size_d( const bool precon );
+size_t sparse_cg_workspace_size_d( const size_t n, const bool precon );
 
 // Note that szi and dzi are skipped on purpose. Such variants would not seem
 // sensible, though could easily be provided if they do turn out to be needed
diff --git a/src/transition/solver.cpp b/src/transition/solver.cpp
index 9b1fc9860..079ea8c54 100644
--- a/src/transition/solver.cpp
+++ b/src/transition/solver.cpp
@@ -206,10 +206,12 @@ class CG_Data {
 		 *           per both the C and C++ specifications.
 		 */
 		static size_t workspaceSize( const size_t n, const bool preconditioned ) {
+			static_assert( grb::config::CACHE_LINE_SIZE::value() >= sizeof(int),
+				"Unhandled padding case; please submit a bug report" );
 			if( preconditioned ) {
-				return 4 * n * sizeof( T ) + 3 * sizeof( int );
+				return 4 * n * sizeof( T ) + 3 * grb::config::CACHE_LINE_SIZE::value();
 			} else {
-				return 3 * n * sizeof( T ) + 2 * sizeof( int );
+				return 3 * n * sizeof( T ) + 2 * grb::config::CACHE_LINE_SIZE::value();
 			}
 		}
 
@@ -260,9 +262,10 @@ class CG_Data {
 			assert( a != nullptr );
 			assert( ja != nullptr );
 			assert( ia != nullptr );
-			constexpr size_t align = (64 % sizeof(int) == 0)
-				?  64
-				: (64 + (sizeof(int) - (64 % sizeof(int))));
+			constexpr size_t L = grb::config::CACHE_LINE_SIZE::value();
+			constexpr size_t align = (L % sizeof(int) == 0)
+				?  L
+				: (L + (sizeof(int) - (L % sizeof(int))));
 			if( buffer_size < workspaceSize( n, false ) ) {
 				throw std::invalid_argument( "The given buffer size is too small" );
 			}
@@ -445,6 +448,14 @@ static sparse_err_t sparse_cg_init_impl(
 	return NO_ERROR;
 }
 
+size_t sparse_cg_workspace_size_s( const size_t n, const bool precon ) {
+	return CG_Data< float, size_t, size_t >::workspaceSize( n, precon );
+}
+
+size_t sparse_cg_workspace_size_d( const size_t n, const bool precon ) {
+	return CG_Data< double, size_t, size_t >::workspaceSize( n, precon );
+}
+
 template< typename T, typename NZI, typename RSI >
 static sparse_err_t sparse_cg_init_impl_no_buffer(
 	sparse_cg_handle_t * const handle, const size_t n,
@@ -452,7 +463,7 @@ static sparse_err_t sparse_cg_init_impl_no_buffer(
 	const bool support_preconditioning, const bool numa
 ) {
 	const size_t allocSize = CG_Data< T, NZI, RSI >::
-		workspaceSize( n, support_preconditioning ) + 256; // TODO hide this const
+		workspaceSize( n, support_preconditioning );
 	void * buffer = nullptr;
 #ifdef _GRB_NO_LIBNUMA
 	if( numa ) {

From b07f383ad64f22c153888fc2b1fabe242b72ccf3 Mon Sep 17 00:00:00 2001
From: "Albert-Jan N. Yzelman" <albertjan.yzelman@huawei.com>
Date: Wed, 23 Jul 2025 18:59:35 +0200
Subject: [PATCH 19/22] Sanity check given buffer sizes on initialisation

---
 src/transition/solver.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/transition/solver.cpp b/src/transition/solver.cpp
index 079ea8c54..6e029b858 100644
--- a/src/transition/solver.cpp
+++ b/src/transition/solver.cpp
@@ -435,7 +435,9 @@ static sparse_err_t sparse_cg_init_impl(
 		return NULL_ARGUMENT;
 	}
 	if( buffer == nullptr ) { return NULL_ARGUMENT; }
-	// TODO bounds-check bufferSize
+	if( bufferSize < CG_Data< T, NZI, RSI >::workspaceSize( n, false ) ) {
+		return ILLEGAL_ARGUMENT;
+	}
 	try {
 		*handle = static_cast< void * >( new CG_Data< T, NZI, RSI >(
 			n, a, ja, ia, buffer, bufferSize, deleter ) );

From 43fc263856cd280860fda5f423f4e069f305f84a Mon Sep 17 00:00:00 2001
From: "Albert-Jan N. Yzelman" <albertjan.yzelman@huawei.com>
Date: Wed, 23 Jul 2025 19:08:42 +0200
Subject: [PATCH 20/22] Partial code review -- will finish up tomorrow

---
 include/transition/solver.h | 20 ++++++++++----------
 src/transition/solver.cpp   |  4 +++-
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/include/transition/solver.h b/include/transition/solver.h
index dc12ec59a..58b9cdae6 100644
--- a/include/transition/solver.h
+++ b/include/transition/solver.h
@@ -341,7 +341,7 @@ sparse_err_t sparse_cg_manual_init_sii(
  * This variant is for double-precision floating point nonzeroes and integer
  * \a ja and \a ia arrays, as also indicated by the <tt>dii</tt> postfix.
  *
- * @see #sparse_cg_init_sii for full documentation.
+ * @see #sparse_cg_init_opt_sii for full documentation.
  */
 sparse_err_t sparse_cg_init_opt_dii(
 	sparse_cg_handle_t * const handle, const size_t n,
@@ -368,7 +368,7 @@ sparse_err_t sparse_cg_init_dii(
  */
 sparse_err_t sparse_cg_manual_init_dii(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const float * const a, const int * const ja, const int * const ia,
+	const double * const a, const int * const ja, const int * const ia,
 	void * workspace, const size_t workspace_size
 );
 
@@ -379,7 +379,7 @@ sparse_err_t sparse_cg_manual_init_dii(
  * <tt>size_t</tt>-valued \a ja, and integer-valued \a ia, as also indicated by
  * the <tt>siz</tt> postfix.
  *
- * @see #sparse_cg_init_sii for full documentation.
+ * @see #sparse_cg_init_opt_sii for full documentation.
  */
 sparse_err_t sparse_cg_init_opt_siz(
 	sparse_cg_handle_t * const handle, const size_t n,
@@ -406,7 +406,7 @@ sparse_err_t sparse_cg_init_siz(
  */
 sparse_err_t sparse_cg_manual_init_siz(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const float * const a, const int * const ja, const int * const ia,
+	const float * const a, const int * const ja, const size_t * const ia,
 	void * workspace, const size_t workspace_size
 );
 
@@ -417,7 +417,7 @@ sparse_err_t sparse_cg_manual_init_siz(
  * <tt>size_t</tt>-valued \a ja, and integer-valued \a ia, as also indicated by
  * the <tt>diz</tt> postfix.
  *
- * @see #sparse_cg_init_sii for full documentation.
+ * @see #sparse_cg_init_opt_sii for full documentation.
  */
 sparse_err_t sparse_cg_init_opt_diz(
 	sparse_cg_handle_t * const handle, const size_t n,
@@ -444,7 +444,7 @@ sparse_err_t sparse_cg_init_diz(
  */
 sparse_err_t sparse_cg_manual_init_diz(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const float * const a, const int * const ja, const int * const ia,
+	const double * const a, const int * const ja, const size_t * const ia,
 	void * workspace, const size_t workspace_size
 );
 
@@ -455,7 +455,7 @@ sparse_err_t sparse_cg_manual_init_diz(
  * <tt>size_t</tt>-valued \a ja and \a ia, as also indicated by the <tt>szz</tt>
  * postfix.
  *
- * @see #sparse_cg_init_sii for full documentation.
+ * @see #sparse_cg_init_opt_sii for full documentation.
  */
 sparse_err_t sparse_cg_init_opt_szz(
 	sparse_cg_handle_t * const handle, const size_t n,
@@ -482,7 +482,7 @@ sparse_err_t sparse_cg_init_szz(
  */
 sparse_err_t sparse_cg_manual_init_szz(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const float * const a, const int * const ja, const int * const ia,
+	const float * const a, const size_t * const ja, const size_t * const ia,
 	void * workspace, const size_t workspace_size
 );
 
@@ -493,7 +493,7 @@ sparse_err_t sparse_cg_manual_init_szz(
  * <tt>size_t</tt>-valued \a ja and \a ia, as also indicated by the <tt>dzz</tt>
  * postfix.
  *
- * @see #sparse_cg_init_sii for full documentation.
+ * @see #sparse_cg_init_opt_sii for full documentation.
  */
 sparse_err_t sparse_cg_init_opt_dzz(
 	sparse_cg_handle_t * const handle, const size_t n,
@@ -520,7 +520,7 @@ sparse_err_t sparse_cg_init_dzz(
  */
 sparse_err_t sparse_cg_manual_init_dzz(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const float * const a, const int * const ja, const int * const ia,
+	const double * const a, const size_t * const ja, const size_t * const ia,
 	void * workspace, const size_t workspace_size
 );
 
diff --git a/src/transition/solver.cpp b/src/transition/solver.cpp
index 6e029b858..68b946ba8 100644
--- a/src/transition/solver.cpp
+++ b/src/transition/solver.cpp
@@ -205,7 +205,9 @@ class CG_Data {
 		 * \internal Note that the space for two additional integers is required as
 		 *           per both the C and C++ specifications.
 		 */
-		static size_t workspaceSize( const size_t n, const bool preconditioned ) {
+		static size_t workspaceSize(
+			const size_t n, const bool preconditioned
+		) noexcept {
 			static_assert( grb::config::CACHE_LINE_SIZE::value() >= sizeof(int),
 				"Unhandled padding case; please submit a bug report" );
 			if( preconditioned ) {

From 616d4d785cf2dd8493dbcd187a63ba4a413dbf7b Mon Sep 17 00:00:00 2001
From: "Albert-Jan N. Yzelman" <albertjan.yzelman@huawei.com>
Date: Thu, 24 Jul 2025 11:52:05 +0200
Subject: [PATCH 21/22] Code review on solver.h

---
 include/transition/solver.h | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/include/transition/solver.h b/include/transition/solver.h
index 58b9cdae6..09e4f2acc 100644
--- a/include/transition/solver.h
+++ b/include/transition/solver.h
@@ -303,7 +303,8 @@ sparse_err_t sparse_cg_init_sii(
  * ownership of the given memory will be returned to the callee. If the call to
  * this function is not successful, the ownership of the memory region returns
  * to the callee immediately. For as long as ownership of the memory region is
- * not with the callee, the contents of the memory region are undefined.
+ * not with the callee, the contents of the memory region are undefined while
+ * code not controlled by ALP is not allowed to write into the memory region.
  *
  * @param[in] workspace_size The size of the memory pointed to by \a workspace.
  *
@@ -316,7 +317,7 @@ sparse_err_t sparse_cg_init_sii(
  *
  * \note Note that the hints that can be supplied to #sparse_cg_init_opt_sii
  *       affect its internal workspace allocation mechanisms. Therefore, those
- *       hints do not apply to this PCG solver initialisation variant.
+ *       hints do not apply to this manual PCG solver initialisation variant.
  *
  * In addition to the possible return codes defined by #sparse_cg_init_opt_sii,
  * a call to this variant may furthermore return:
@@ -532,20 +533,21 @@ sparse_err_t sparse_cg_manual_init_dzz(
  *  - #sparse_cg_manual_init_sii,
  *  - #sparse_cg_manual_init_siz, and
  *  - #sparse_cg_manual_init_szz.
+ * This is the variant for PCG solvers that operate on single-precision values.
  *
  * @param[in] n      The maximum system size the solver will be initialised
  *                   with.
  * @param[in] precon Whether the solver is expected to use a preconditioner.
  *
  * \note If preconditioning is required, the solver requires a larger work
- *       space. Note, however, that even if not enough workspace is supplied
- *       for a preconditioned solve, nonetheless requesting a preconditioned
- *       solve will still execute -- it will then dynamically allocate memory
- *       on the fly.
+ *       space. Note, however, that even if initially not enough workspace is
+ *       supplied for a preconditioned solve, nonetheless requesting a
+ *       preconditioned solve will still execute -- it will then dynamically
+ *       allocate memory on the fly.
  *
  * A call to this function never fails.
  *
- * @returns The workspace size in bytes.
+ * @returns The required workspace size in bytes.
  */
 size_t sparse_cg_workspace_size_s( const size_t n, const bool precon );
 
@@ -557,20 +559,21 @@ size_t sparse_cg_workspace_size_s( const size_t n, const bool precon );
  *  - #sparse_cg_manual_init_dii,
  *  - #sparse_cg_manual_init_diz, and
  *  - #sparse_cg_manual_init_dzz.
+ * This is the variant for PCG solvers that operate on double-precision values.
  *
  * @param[in] n      The maximum system size the solver will be initialised
  *                   with.
  * @param[in] precon Whether the solver is expected to use a preconditioner.
  *
  * \note If preconditioning is required, the solver requires a larger work
- *       space. Note, however, that even if not enough workspace is supplied
- *       for a preconditioned solve, nonetheless requesting a preconditioned
- *       solve will still execute -- it will then dynamically allocate memory
- *       on the fly.
+ *       space. Note, however, that even if initially not enough workspace is
+ *       supplied for a preconditioned solve, nonetheless requesting a
+ *       preconditioned solve will still execute -- it will then dynamically
+ *       allocate memory on the fly.
  *
  * A call to this function never fails.
  *
- * @returns The workspace size in bytes.
+ * @returns The required workspace size in bytes.
  */
 size_t sparse_cg_workspace_size_d( const size_t n, const bool precon );
 

From efa742b852d4568510c379b303b2180af951fbae Mon Sep 17 00:00:00 2001
From: "Albert-Jan N. Yzelman" <albertjan.yzelman@huawei.com>
Date: Thu, 24 Jul 2025 12:16:12 +0200
Subject: [PATCH 22/22] Code review solver.cpp

---
 src/transition/solver.cpp | 88 +++++++++++++++++++++++++++++++--------
 1 file changed, 71 insertions(+), 17 deletions(-)

diff --git a/src/transition/solver.cpp b/src/transition/solver.cpp
index 68b946ba8..b0a93da36 100644
--- a/src/transition/solver.cpp
+++ b/src/transition/solver.cpp
@@ -37,11 +37,15 @@
  */
 
 /**
- * Implements the various ways the CG workspace data may be freed.
+ * Implements the various ways the CG workspace data may (or may not) be freed.
  *
- * At the moment, always assumes that the workspace buffer is allocated by the
- * transition path itself, using the standard C++ array allocation mechanism.
- * The matching deleter action employs the matching standard C++ array deleter.
+ * This deleter at present considers that
+ *  -# the solver does not own the workspace data (and hence may not free it);
+ *  -# the solver has allocated the workspace data using standard C++ array
+ *     allocation mechanisms, for which this class provides the matching
+ *     standard C++ array deleter; and
+ *  -# the solver has allocated the workspace data using libNUMA, for which this
+ *     class provides a call to the matching libNUMA free.
  */
 class CGWorkspaceDeleter {
 
@@ -53,18 +57,44 @@ class CGWorkspaceDeleter {
 		/**
 		 * Whether the workspace data was allocated NUMA-aware, and if so, what the
 		 * allocation size was.
+		 *
+		 * \note The allocation size is required for a call to the libNUMA free
+		 *       function.
 		 */
 		const size_t numa;
 
 
 	public:
 
+		/** Default construction is not allowed. */
 		CGWorkspaceDeleter() = delete;
 
-		CGWorkspaceDeleter( const bool &owning_in, const size_t &numa_in ) :
+		/**
+		 * Explicit constructor that requires all information necessary to determine
+		 * how the workspace should be deleted.
+		 *
+		 * @param[in] owning_in Whether the solver owns the workspace buffer.
+		 * @param[in] numa_in   Zero if libNUMA was not used for allocating the
+		 *                      workspace bufer, and the allocation size otherwise.
+		 *
+		 * If \a owning_in is <tt>false</tt>, the input to \a numa_in is ignored.
+		 *
+		 * \note In that case, the user is reponsible for freeing memory, which indeed
+		 *       could have been done in a myriad of other ways besides those this
+		 *       deleter knows about.
+		 *
+		 * \internal In case ALP is compiled without libNUMA support, the standard C++
+		 *           array mechanism will be assumed always.
+		 */
+		CGWorkspaceDeleter( const bool &owning_in, const size_t &numa_in ) noexcept :
 			owning( owning_in ), numa( numa_in )
 		{}
 
+		/**
+		 * Implements the correct action for freeing the workspace data.
+		 *
+		 * @param[in] ptr Pointer to the workspace memory to be deleted.
+		 */
 		void operator()( void * ptr ) {
 			assert( !( !owning && numa ) );
 			if( owning ) {
@@ -83,6 +113,11 @@ class CGWorkspaceDeleter {
 };
 
 /**
+ * The class behind the PCG solver handle.
+ *
+ * More precisely, a valid PCG solver handle is a C void pointer to an instance
+ * of this class.
+ *
  * @tparam T   The nonzero value type.
  * @tparam NZI The nonzero index type.
  * @tparam RSI The row and column index type.
@@ -197,13 +232,19 @@ class CG_Data {
 		 * The size, in bytes, required as a work space for the PCG algorithm.
 		 *
 		 * @param[in] n The linear system size.
-		 * @param[in] preconditioned Whether the solver may be called with a
-		 *                           preconditioner.
+		 * @param[in] preconditioned Whether the solver is expected to be called with
+		 *                           a preconditioner.
 		 *
 		 * @returns The required size, in bytes.
 		 *
 		 * \internal Note that the space for two additional integers is required as
-		 *           per both the C and C++ specifications.
+		 *           per both the C and C++ specifications. However, we here also
+		 *           prevent false sharing between different workspace vectors, and so
+		 *           actually align on the cache line size boundary.
+		 *
+		 * \warning The current implementation assumes that the cache line size is a
+		 *          multiple of <tt>sizeof(int)</tt>. If this assertion ever fails,
+		 *          an error will be thrown at compile time (<tt>static_assert</tt>).
 		 */
 		static size_t workspaceSize(
 			const size_t n, const bool preconditioned
@@ -239,15 +280,19 @@ class CG_Data {
 		 *                   to this constructor.
 		 *
 		 * @param[in] buffer_size The size of the given \a buffer. Used for sanity
-		 *                        checking the use of the buffer.
+		 *                        checking the intended use of the buffer.
 		 *
 		 * @param[in] buffer_deleter How the given \a buffer should be deleted on
 		 *                           destruction of this instance.
 		 *
-		 * \note If the buffer is not to be owned by this instance, then a no-op
-		 *       \a buffer_deleter should be given.
+		 * \note If the buffer is not to be owned by this instance, then a non-owning
+		 *       \a buffer_deleter should be given; see #CGWorkspaceDeleter for
+		 *       details.
+		 *
+		 * \internal The sanity check is weaker if not compiled in debug mode so
+		 *           should be done explicitly \em before entering this constructor.
 		 *
-		 * \warning The sanity check is weaker if not compiled in debug mode.
+		 * \internal This constructor may throw exceptions.
 		 */
 		CG_Data(
 			const size_t n,
@@ -494,7 +539,15 @@ static sparse_err_t sparse_cg_init_impl_no_buffer(
 		CGWorkspaceDeleter( true, numa ? allocSize : 0 )
 	);
 	if( rc != NO_ERROR ) {
-		delete [] static_cast< char * >(buffer);
+#ifndef _GRB_NO_LIBNUMA
+		if( numa ) {
+			numa_free( buffer, allocSize );
+		} else {
+#endif
+			delete [] static_cast< char * >(buffer);
+#ifndef _GRB_NO_LIBNUMA
+		}
+#endif
 	}
 	return rc;
 }
@@ -965,10 +1018,11 @@ static sparse_err_t sparse_cg_set_preconditioner_impl(
 	try {
 		static_cast< CG_Data< T, NZI, RSI > * >( handle )->
 			setPreconditioner( c_precond_p, c_precond_data_p );
-	} catch( std::exception &e ) {
-		std::cerr << e.what() << "\n";
-		std::cerr << "This is an unexpected error; please submit a bug report\n";
-		return UNKNOWN;
+	} catch( ... ) {
+		// by virtue of the ALP interface guarantees, the only possible exception is
+		// due to out-of-memory conditions (when allocating an on-demand vector
+		// workspace for preconditioned solves).
+		return OUT_OF_MEMORY;
 	}
 	return NO_ERROR;
 }