diff --git a/include/graphblas/algorithms/conjugate_gradient.hpp b/include/graphblas/algorithms/conjugate_gradient.hpp
index 11e910566..157afd4cf 100644
--- a/include/graphblas/algorithms/conjugate_gradient.hpp
+++ b/include/graphblas/algorithms/conjugate_gradient.hpp
@@ -314,43 +314,77 @@ namespace grb {
 					std::cerr << "Error: at least one CG iteration must be requested\n";
 					return grb::ILLEGAL;
 				}
+
+				// dense descriptor
+				if( descr & grb::descriptors::dense ) {
+					if( grb::nnz( x ) != grb::size( x ) ) {
+						std::cerr << "Error: x was sparse while the dense descriptor was given\n";
+						return grb::ILLEGAL;
+					}
+					if( grb::nnz( b ) != grb::size( b ) ) {
+						std::cerr << "Error: b was sparse while the dense descriptor was given\n";
+						return grb::ILLEGAL;
+					}
+					if( grb::nnz( r ) != grb::size( r ) ) {
+						std::cerr << "Error: r was sparse while the dense descriptor was given\n";
+						return grb::ILLEGAL;
+					}
+					if( grb::nnz( temp ) != grb::size( temp ) ) {
+						std::cerr << "Error: temp was sparse while the dense descriptor was "
+							<< "given\n";
+						return grb::ILLEGAL;
+					}
+					if( preconditioned &&
+						(grb::nnz( temp_precond ) != grb::size( temp_precond ))
+					) {
+						std::cerr << "Error: temp_precond was sparse while the dense descriptor "
+							<< "was given\n";
+						return grb::ILLEGAL;
+					}
+				}
 			}
 
 			// set pure output fields to neutral defaults
 			iterations = 0;
 			residual = std::numeric_limits< double >::infinity();
 
-			// make x and b structurally dense (if not already) so that the remainder
+			// declare internal scalars
+			IOType sigma, bnorm, alpha, beta;
+
+			// make x structurally dense (if not already) so that the remainder
 			// algorithm can safely use the dense descriptor for faster operations
-			{
-				RC rc = grb::SUCCESS;
+			grb::RC ret = grb::SUCCESS;
+			if( !(descr & grb::descriptors::dense) ) {
 				if( nnz( x ) != n ) {
-					rc = grb::set< descriptors::invert_mask | descriptors::structural >(
+					ret = grb::set< descriptors::invert_mask | descriptors::structural >(
 						x, x, zero
 					);
+					assert( ret == grb::SUCCESS );
 				}
-				if( rc != grb::SUCCESS ) { return rc; }
 				assert( nnz( x ) == n );
 			}
 
-			IOType sigma, bnorm, alpha, beta;
-
 			// r = b - temp;
-			grb::RC ret = grb::set( temp, 0 ); assert( ret == grb::SUCCESS );
+			ret = ret ? ret : grb::set< descr >( temp, 0 );
+			assert( ret == grb::SUCCESS );
 			ret = ret ? ret : grb::mxv< descr_dense >( temp, A, x, ring );
 			assert( ret == grb::SUCCESS );
-			ret = ret ? ret : grb::set( r, zero ); assert( ret == grb::SUCCESS );
-			// note: no dense descriptor since we actually allow sparse b
-			ret = ret ? ret : grb::foldl( r, b, ring.getAdditiveMonoid() );
-			// from here onwards, r, temp, x are dense and will remain so
-			assert( nnz( r ) == n );
-			assert( nnz( temp ) == n );
+			ret = ret ? ret : grb::set< descr >( r, zero );
+			assert( ret == grb::SUCCESS );
+			// note: no forced dense descriptor since we actually allow sparse b
+			//       but if b was already dense and the dense descriptor was given, we
+			//       will respect that
+			ret = ret ? ret : grb::foldl< descr >( r, b, ring.getAdditiveMonoid() );
+			// note: at this point, r and temp are guaranteed dense, so we force a dense
+			//       descriptor
 			ret = ret ? ret : grb::foldl< descr_dense >( r, temp, minus );
 			assert( ret == grb::SUCCESS );
 
 			// bnorm = b' * b;
+			// Note that b can be structurally sparse (unless otherwise guaranteed by the
+			// user).
 			bnorm = zero;
-			ret = ret ? ret : grb::dot< descr_dense >(
+			ret = ret ? ret : grb::dot< descr >(
 					bnorm,
 					b, b,
 					ring.getAdditiveMonoid(),
@@ -381,7 +415,9 @@ namespace grb {
 
 			// z = M^-1r
 			if( preconditioned ) {
-				ret = ret ? ret : grb::set( z, 0 ); // also ensures z is dense, henceforth
+				ret = ret ? ret : grb::set< descr >( z, 0 );
+				// henceforth, also z is structurally dense and we can force a dense
+				// descriptor for it
 				if( preconditioned == 2 ) {
 					ret = ret ? ret : grb::wait( z, r );
 				}
@@ -389,10 +425,12 @@ namespace grb {
 			} // else, z equals r (by reference)
 
 			// u = z;
-			ret = ret ? ret : grb::set( u, z );
+			ret = ret ? ret : grb::set< descr >( u, z );
 			assert( ret == grb::SUCCESS );
-			// from here onwards, u is dense; i.e., all vectors are dense from now on,
-			// and we can freely use the dense descriptor in the subsequent
+			// henceforth, also u is structurally dense -- in fact, henceforth, all
+			// vectors in this algorithm except b are now guaranteed dense. We may
+			// hence liberally force the use of the dense descriptor in the main while
+			// loop of the CG (since it does not refer to b).
 
 			// sigma = r' * z;
 			sigma = zero;
@@ -435,7 +473,7 @@ namespace grb {
 				assert( ret == grb::SUCCESS );
 
 				// alpha = sigma / beta;
-				ret = ret ? ret : grb::apply( alpha, sigma, beta, divide );
+				ret = ret ? ret : grb::apply< descr >( alpha, sigma, beta, divide );
 				assert( ret == grb::SUCCESS );
 
 				// x = x + alpha * u;
@@ -489,7 +527,7 @@ namespace grb {
 				}
 
 				// alpha = beta / sigma;
-				ret = ret ? ret : grb::apply( alpha, beta, sigma, divide );
+				ret = ret ? ret : grb::apply< descr >( alpha, beta, sigma, divide );
 				assert( ret == grb::SUCCESS );
 
 				// u_next = z + beta * u_previous;
diff --git a/include/transition/solver.h b/include/transition/solver.h
index ba1afa008..09e4f2acc 100644
--- a/include/transition/solver.h
+++ b/include/transition/solver.h
@@ -225,6 +225,22 @@ typedef int (*sparse_cg_preconditioner_dxx_t) (
  * @param[in]  a      The nonzero values of the system matrix.
  * @param[in]  ja     The column indices of the nonzeroes of the system matrix.
  * @param[in]  ia     The row offset array of the system matrix.
+ * @param[in]  precon Whether the CG handle is expected to execute solves with
+ *                    preconditioning.
+ * @param[in]  numa   Whether the CG handle is expected to execute solves that
+ *                    employ more than one NUMA domain, in which case any memory
+ *                    allocated during construction of this handle may attempt
+ *                    to employ NUMA-aware allocation.
+ *
+ * The Boolean flags \a precon and \a numa indicated above are \em hints to the
+ * underlying ALP implementation.
+ *
+ * \note This means that even if \a precon was set to <tt>false</tt> during
+ *       handle creation, a user may still request preconditioning and expect
+ *       no error. Similarly, even if \a numa was set <tt>true</tt> during
+ *       handle creation, the underlying ALP implementation may still opt to
+ *       \em not employ NUMA-aware allocation, for example, if it was configured
+ *       without libnuma support.
  *
  * This variant is for single-precision floating point nonzeroes and integer
  * \a ja and \a ia arrays, as also indicated by the <tt>sii</tt> postfix.
@@ -244,25 +260,119 @@ typedef int (*sparse_cg_preconditioner_dxx_t) (
  * @returns #NO_ERROR         If initialisation of the handle proceeded
  *                            successfully. Only in this case shall \a handle
  *                            henceforth be a \em valid handle.
+ *
+ * On returning #NO_ERROR, \a handle shall correspond to an initialised (P)CG
+ * solver instance. Note that this handle contains workspace memory that are
+ * necessary for auxiliary vectors required by the CG algorithm. When
+ * preconditioning is employed, four such workspace vectors of size \a n are
+ * required; otherwise, only three such vectors are required. This memory will
+ * be freed on a call to #sparse_cg_destroy_sii.
+ */
+sparse_err_t sparse_cg_init_opt_sii(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const float * const a, const int * const ja, const int * const ia,
+	const bool precon, const bool numa
+);
+
+/**
+ * Variant of #sparse_cg_init_opt_sii that results in a #sparse_cg_handle_t with
+ * default hints (with preconditioning and with NUMA).
+ *
+ * @see sparse_cg_init_opt_sii for full details.
  */
 sparse_err_t sparse_cg_init_sii(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const float * const a, const int * const ja, const int * const ia
 );
 
+/**
+ * Variant of #sparse_cg_init_opt_sii that does not allocate workspace, but
+ * instead employs given user memory as its workspace memory.
+ *
+ * This variant of initialisation guarantees no additional dynamic memory
+ * allocation will be performed. A call to #sparse_cg_destroy_sii will likewise
+ * \em not free up the given \a workspace memory.
+ *
+ * @param[in,out] workspace The user-supplied memory that will be used as the
+ *                          work space for the returned PCG solver \a handle.
+ *
+ * Previous contents of the given \a workspace will be ignored, and, when given
+ * to this function, should be considered lost. On a successful call, the given
+ * memory should be considered to be under the sole custody of the PCG solver
+ * \a handle until the time the handle is destroyed; at that point, the
+ * ownership of the given memory will be returned to the callee. If the call to
+ * this function is not successful, the ownership of the memory region returns
+ * to the callee immediately. For as long as ownership of the memory region is
+ * not with the callee, the contents of the memory region are undefined while
+ * code not controlled by ALP is not allowed to write into the memory region.
+ *
+ * @param[in] workspace_size The size of the memory pointed to by \a workspace.
+ *
+ * The given workspace must have byte size greater than or equal to the value
+ * returned by a call to #sparse_cg_workspace_size_s.
+ *
+ * \note For variants that use double-precision values, the workspace must have
+ *       byte size greater than or equal to the value returned by a call to
+ *       #sparse_cg_workspace_size_d.
+ *
+ * \note Note that the hints that can be supplied to #sparse_cg_init_opt_sii
+ *       affect its internal workspace allocation mechanisms. Therefore, those
+ *       hints do not apply to this manual PCG solver initialisation variant.
+ *
+ * In addition to the possible return codes defined by #sparse_cg_init_opt_sii,
+ * a call to this variant may furthermore return:
+ *
+ * @returns #NULL_ARGUMENT    If \a workspace equals <tt>NULL</tt>.
+ * @returns #ILLEGAL_ARGUMENT If \a workspace_size is smaller than required.
+ *
+ * Different from the specification of #sparse_cg_init_opt_sii, this variant
+ * may never return #OUT_OF_MEMORY.
+ *
+ * For further details, see #sparse_cg_init_opt_sii.
+ */
+sparse_err_t sparse_cg_manual_init_sii(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const float * const a, const int * const ja, const int * const ia,
+	void * workspace, const size_t workspace_size
+);
+
 /**
  * Initialises a #sparse_cg_handle_t object.
  *
  * This variant is for double-precision floating point nonzeroes and integer
  * \a ja and \a ia arrays, as also indicated by the <tt>dii</tt> postfix.
  *
- * @see #sparse_cg_init_sii for full documentation.
+ * @see #sparse_cg_init_opt_sii for full documentation.
+ */
+sparse_err_t sparse_cg_init_opt_dii(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const double * const a, const int * const ja, const int * const ia,
+	const bool precon, const bool numa
+);
+
+/**
+ * Variant of #sparse_cg_init_opt_dii that results in a #sparse_cg_handle_t with
+ * default hints (with preconditioning and with NUMA).
+ *
+ * @see sparse_cg_init_opt_dii for full details.
  */
 sparse_err_t sparse_cg_init_dii(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const double * const a, const int * const ja, const int * const ia
 );
 
+/**
+ * Variant of #sparse_cg_init_opt_dii that does not allocate workspace, but
+ * instead employs given user memory as its workspace memory.
+ *
+ * @see #sparse_cg_manual_init_sii for full details.
+ */
+sparse_err_t sparse_cg_manual_init_dii(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const double * const a, const int * const ja, const int * const ia,
+	void * workspace, const size_t workspace_size
+);
+
 /**
  * Initialises a #sparse_cg_handle_t object.
  *
@@ -270,13 +380,37 @@ sparse_err_t sparse_cg_init_dii(
  * <tt>size_t</tt>-valued \a ja, and integer-valued \a ia, as also indicated by
  * the <tt>siz</tt> postfix.
  *
- * @see #sparse_cg_init_sii for full documentation.
+ * @see #sparse_cg_init_opt_sii for full documentation.
+ */
+sparse_err_t sparse_cg_init_opt_siz(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const float * const a, const int * const ja, const size_t * const ia,
+	const bool precon, const bool numa
+);
+
+/**
+ * Variant of #sparse_cg_init_opt_siz that results in a #sparse_cg_handle_t with
+ * default hints (with preconditioning and with NUMA).
+ *
+ * @see sparse_cg_init_opt_siz for full details.
  */
 sparse_err_t sparse_cg_init_siz(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const float * const a, const int * const ja, const size_t * const ia
 );
 
+/**
+ * Variant of #sparse_cg_init_opt_siz that does not allocate workspace, but
+ * instead employs given user memory as its workspace memory.
+ *
+ * @see #sparse_cg_manual_init_sii for full details.
+ */
+sparse_err_t sparse_cg_manual_init_siz(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const float * const a, const int * const ja, const size_t * const ia,
+	void * workspace, const size_t workspace_size
+);
+
 /**
  * Initialises a #sparse_cg_handle_t object.
  *
@@ -284,13 +418,37 @@ sparse_err_t sparse_cg_init_siz(
  * <tt>size_t</tt>-valued \a ja, and integer-valued \a ia, as also indicated by
  * the <tt>diz</tt> postfix.
  *
- * @see #sparse_cg_init_sii for full documentation.
+ * @see #sparse_cg_init_opt_sii for full documentation.
+ */
+sparse_err_t sparse_cg_init_opt_diz(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const double * const a, const int * const ja, const size_t * const ia,
+	const bool precon, const bool numa
+);
+
+/**
+ * Variant of #sparse_cg_init_opt_diz that results in a #sparse_cg_handle_t with
+ * default hints (with preconditioning and with NUMA).
+ *
+ * @see sparse_cg_init_opt_diz for full details.
  */
 sparse_err_t sparse_cg_init_diz(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const double * const a, const int * const ja, const size_t * const ia
 );
 
+/**
+ * Variant of #sparse_cg_init_opt_siz that does not allocate workspace, but
+ * instead employs given user memory as its workspace memory.
+ *
+ * @see #sparse_cg_manual_init_sii for full details.
+ */
+sparse_err_t sparse_cg_manual_init_diz(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const double * const a, const int * const ja, const size_t * const ia,
+	void * workspace, const size_t workspace_size
+);
+
 /**
  * Initialises a #sparse_cg_handle_t object.
  *
@@ -298,13 +456,37 @@ sparse_err_t sparse_cg_init_diz(
  * <tt>size_t</tt>-valued \a ja and \a ia, as also indicated by the <tt>szz</tt>
  * postfix.
  *
- * @see #sparse_cg_init_sii for full documentation.
+ * @see #sparse_cg_init_opt_sii for full documentation.
+ */
+sparse_err_t sparse_cg_init_opt_szz(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const float * const a, const size_t * const ja, const size_t * const ia,
+	const bool precon, const bool numa
+);
+
+/**
+ * Variant of #sparse_cg_init_opt_szz that results in a #sparse_cg_handle_t with
+ * default hints (with preconditioning and with NUMA).
+ *
+ * @see sparse_cg_init_opt_szz for full details.
  */
 sparse_err_t sparse_cg_init_szz(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const float * const a, const size_t * const ja, const size_t * const ia
 );
 
+/**
+ * Variant of #sparse_cg_init_opt_siz that does not allocate workspace, but
+ * instead employs given user memory as its workspace memory.
+ *
+ * @see #sparse_cg_manual_init_sii for full details.
+ */
+sparse_err_t sparse_cg_manual_init_szz(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const float * const a, const size_t * const ja, const size_t * const ia,
+	void * workspace, const size_t workspace_size
+);
+
 /**
  * Initialises a #sparse_cg_handle_t object.
  *
@@ -312,13 +494,89 @@ sparse_err_t sparse_cg_init_szz(
  * <tt>size_t</tt>-valued \a ja and \a ia, as also indicated by the <tt>dzz</tt>
  * postfix.
  *
- * @see #sparse_cg_init_sii for full documentation.
+ * @see #sparse_cg_init_opt_sii for full documentation.
+ */
+sparse_err_t sparse_cg_init_opt_dzz(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const double * const a, const size_t * const ja, const size_t * const ia,
+	const bool precon, const bool numa
+);
+
+/**
+ * Variant of #sparse_cg_init_opt_dzz that results in a #sparse_cg_handle_t with
+ * default hints (with preconditioning and with NUMA).
+ *
+ * @see sparse_cg_init_opt_dzz for full details.
  */
 sparse_err_t sparse_cg_init_dzz(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const double * const a, const size_t * const ja, const size_t * const ia
 );
 
+/**
+ * Variant of #sparse_cg_init_opt_siz that does not allocate workspace, but
+ * instead employs given user memory as its workspace memory.
+ *
+ * @see #sparse_cg_manual_init_sii for full details.
+ */
+sparse_err_t sparse_cg_manual_init_dzz(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const double * const a, const size_t * const ja, const size_t * const ia,
+	void * workspace, const size_t workspace_size
+);
+
+/**
+ * Returns the minimum workspace size required when manually supplying a
+ * workspace for PCG solvers to initialise with.
+ *
+ * Those so-called manual initialisation functions are the following:
+ *  - #sparse_cg_manual_init_sii,
+ *  - #sparse_cg_manual_init_siz, and
+ *  - #sparse_cg_manual_init_szz.
+ * This is the variant for PCG solvers that operate on single-precision values.
+ *
+ * @param[in] n      The maximum system size the solver will be initialised
+ *                   with.
+ * @param[in] precon Whether the solver is expected to use a preconditioner.
+ *
+ * \note If preconditioning is required, the solver requires a larger work
+ *       space. Note, however, that even if initially not enough workspace is
+ *       supplied for a preconditioned solve, nonetheless requesting a
+ *       preconditioned solve will still execute -- it will then dynamically
+ *       allocate memory on the fly.
+ *
+ * A call to this function never fails.
+ *
+ * @returns The required workspace size in bytes.
+ */
+size_t sparse_cg_workspace_size_s( const size_t n, const bool precon );
+
+/**
+ * Returns the minimum workspace size required when manually supplying a
+ * workspace for PCG solvers to initialise with.
+ *
+ * Those so-called manual initialisation functions are the following:
+ *  - #sparse_cg_manual_init_dii,
+ *  - #sparse_cg_manual_init_diz, and
+ *  - #sparse_cg_manual_init_dzz.
+ * This is the variant for PCG solvers that operate on double-precision values.
+ *
+ * @param[in] n      The maximum system size the solver will be initialised
+ *                   with.
+ * @param[in] precon Whether the solver is expected to use a preconditioner.
+ *
+ * \note If preconditioning is required, the solver requires a larger work
+ *       space. Note, however, that even if initially not enough workspace is
+ *       supplied for a preconditioned solve, nonetheless requesting a
+ *       preconditioned solve will still execute -- it will then dynamically
+ *       allocate memory on the fly.
+ *
+ * A call to this function never fails.
+ *
+ * @returns The required workspace size in bytes.
+ */
+size_t sparse_cg_workspace_size_d( const size_t n, const bool precon );
+
 // Note that szi and dzi are skipped on purpose. Such variants would not seem
 // sensible, though could easily be provided if they do turn out to be needed
 
diff --git a/src/transition/kml_iss.cpp b/src/transition/kml_iss.cpp
index 2722f4450..5c0fa2d0e 100644
--- a/src/transition/kml_iss.cpp
+++ b/src/transition/kml_iss.cpp
@@ -140,7 +140,7 @@ int KML_CG_PREFIXED( InitSI )(
 	KmlSolverTask **handle, int n, const float *a,
 	const int *ja, const int *ia
 ) {
-	// negative numbers become positive when casted to size_t
+	// negative numbers become positive when cast to size_t
 	if( n <= 0 ) { return KMLSS_BAD_N; }
 	sparse_err_t err = sparse_cg_init_sii( handle, n, a, ja, ia );
 	return sparse_err_t_2_int( err );
diff --git a/src/transition/solver.cpp b/src/transition/solver.cpp
index b52baca3d..b0a93da36 100644
--- a/src/transition/solver.cpp
+++ b/src/transition/solver.cpp
@@ -16,12 +16,15 @@
  */
 
 
-#include <assert.h>
 #include <array>
+#include <stdexcept>
 
 #include <graphblas.hpp>
 #include <graphblas/algorithms/conjugate_gradient.hpp>
 
+#include <numa.h>
+#include <assert.h>
+
 #include "solver.h"
 
 /**
@@ -34,6 +37,87 @@
  */
 
 /**
+ * Implements the various ways the CG workspace data may (or may not) be freed.
+ *
+ * This deleter at present considers that
+ *  -# the solver does not own the workspace data (and hence may not free it);
+ *  -# the solver has allocated the workspace data using standard C++ array
+ *     allocation mechanisms, for which this class provides the matching
+ *     standard C++ array deleter; and
+ *  -# the solver has allocated the workspace data using libNUMA, for which this
+ *     class provides a call to the matching libNUMA free.
+ */
+class CGWorkspaceDeleter {
+
+	private:
+
+		/** Whether we own the workspace data. */
+		const bool owning;
+
+		/**
+		 * Whether the workspace data was allocated NUMA-aware, and if so, what the
+		 * allocation size was.
+		 *
+		 * \note The allocation size is required for a call to the libNUMA free
+		 *       function.
+		 */
+		const size_t numa;
+
+
+	public:
+
+		/** Default construction is not allowed. */
+		CGWorkspaceDeleter() = delete;
+
+		/**
+		 * Explicit constructor that requires all information necessary to determine
+		 * how the workspace should be deleted.
+		 *
+		 * @param[in] owning_in Whether the solver owns the workspace buffer.
+		 * @param[in] numa_in   Zero if libNUMA was not used for allocating the
+		 *                      workspace bufer, and the allocation size otherwise.
+		 *
+		 * If \a owning_in is <tt>false</tt>, the input to \a numa_in is ignored.
+		 *
+		 * \note In that case, the user is reponsible for freeing memory, which indeed
+		 *       could have been done in a myriad of other ways besides those this
+		 *       deleter knows about.
+		 *
+		 * \internal In case ALP is compiled without libNUMA support, the standard C++
+		 *           array mechanism will be assumed always.
+		 */
+		CGWorkspaceDeleter( const bool &owning_in, const size_t &numa_in ) noexcept :
+			owning( owning_in ), numa( numa_in )
+		{}
+
+		/**
+		 * Implements the correct action for freeing the workspace data.
+		 *
+		 * @param[in] ptr Pointer to the workspace memory to be deleted.
+		 */
+		void operator()( void * ptr ) {
+			assert( !( !owning && numa ) );
+			if( owning ) {
+#ifndef _GRB_NO_LIBNUMA
+				if( numa ) {
+					numa_free( ptr, numa );
+				} else {
+#endif
+					delete [] static_cast< char * >(ptr);
+#ifndef _GRB_NO_LIBNUMA
+				}
+#endif
+			}
+		}
+
+};
+
+/**
+ * The class behind the PCG solver handle.
+ *
+ * More precisely, a valid PCG solver handle is a C void pointer to an instance
+ * of this class.
+ *
  * @tparam T   The nonzero value type.
  * @tparam NZI The nonzero index type.
  * @tparam RSI The row and column index type.
@@ -106,6 +190,14 @@ class CG_Data {
 		/** Any required data for the \a preconditioner. */
 		void * preconditioner_data;
 
+		// destructor data
+
+		/** The given buffer during construction. */
+		void * _buffer;
+
+		/** How to handle \a _buffer on destruction. */
+		CGWorkspaceDeleter _buffer_deleter;
+
 
 	protected:
 
@@ -136,6 +228,36 @@ class CG_Data {
 
 	public:
 
+		/**
+		 * The size, in bytes, required as a work space for the PCG algorithm.
+		 *
+		 * @param[in] n The linear system size.
+		 * @param[in] preconditioned Whether the solver is expected to be called with
+		 *                           a preconditioner.
+		 *
+		 * @returns The required size, in bytes.
+		 *
+		 * \internal Note that the space for two additional integers is required as
+		 *           per both the C and C++ specifications. However, we here also
+		 *           prevent false sharing between different workspace vectors, and so
+		 *           actually align on the cache line size boundary.
+		 *
+		 * \warning The current implementation assumes that the cache line size is a
+		 *          multiple of <tt>sizeof(int)</tt>. If this assertion ever fails,
+		 *          an error will be thrown at compile time (<tt>static_assert</tt>).
+		 */
+		static size_t workspaceSize(
+			const size_t n, const bool preconditioned
+		) noexcept {
+			static_assert( grb::config::CACHE_LINE_SIZE::value() >= sizeof(int),
+				"Unhandled padding case; please submit a bug report" );
+			if( preconditioned ) {
+				return 4 * n * sizeof( T ) + 3 * grb::config::CACHE_LINE_SIZE::value();
+			} else {
+				return 3 * n * sizeof( T ) + 2 * grb::config::CACHE_LINE_SIZE::value();
+			}
+		}
+
 		/** Disable default constructor. */
 		CG_Data() = delete;
 
@@ -149,25 +271,93 @@ class CG_Data {
 		 *
 		 * The matrix defined by \a a, \a ja, \a ia must be symmetric positive
 		 * definite.
+		 *
+		 * @param[in] buffer The workspace required by this algorithm. This must
+		 *                   point to a valid memory region that can be used
+		 *                   exclusively by ALP. The size of the memory region (in
+		 *                   bytes) must be greater or equal to that returned by
+		 *                   #workspaceSize( n ) with \a n equal to the value passed
+		 *                   to this constructor.
+		 *
+		 * @param[in] buffer_size The size of the given \a buffer. Used for sanity
+		 *                        checking the intended use of the buffer.
+		 *
+		 * @param[in] buffer_deleter How the given \a buffer should be deleted on
+		 *                           destruction of this instance.
+		 *
+		 * \note If the buffer is not to be owned by this instance, then a non-owning
+		 *       \a buffer_deleter should be given; see #CGWorkspaceDeleter for
+		 *       details.
+		 *
+		 * \internal The sanity check is weaker if not compiled in debug mode so
+		 *           should be done explicitly \em before entering this constructor.
+		 *
+		 * \internal This constructor may throw exceptions.
 		 */
 		CG_Data(
 			const size_t n,
-			const T * const a, const RSI * const ja, const NZI * const ia
+			const T * const a, const RSI * const ja, const NZI * const ia,
+			void * const buffer, const size_t buffer_size, const CGWorkspaceDeleter &buffer_deleter
 		) :
 			size( n ), tolerance( 1e-5 ), max_iter( 1000 ), matrix( 0, 0 ),
 			residual( std::numeric_limits< T >::infinity() ), iters( 0 ),
-			workspace( {
-				grb::Vector< T >( n ), grb::Vector< T >( n ), grb::Vector< T >( n )
-			} ),
 			precond_workspace( grb::Vector< T >( 0 ) ),
-			preconditioner( nullptr ), preconditioner_data( nullptr )
+			preconditioner( nullptr ), preconditioner_data( nullptr ),
+			_buffer( buffer ), _buffer_deleter( buffer_deleter )
 		{
 			assert( n > 0 );
 			assert( a != nullptr );
 			assert( ja != nullptr );
 			assert( ia != nullptr );
+			constexpr size_t L = grb::config::CACHE_LINE_SIZE::value();
+			constexpr size_t align = (L % sizeof(int) == 0)
+				?  L
+				: (L + (sizeof(int) - (L % sizeof(int))));
+			if( buffer_size < workspaceSize( n, false ) ) {
+				throw std::invalid_argument( "The given buffer size is too small" );
+			}
 			Matrix A = grb::internal::wrapCRSMatrix( a, ja, ia, n, n );
 			std::swap( A, matrix );
+			char * workspace_ptr = static_cast< char * >(buffer);
+			assert( static_cast< char * >(buffer) + buffer_size >= workspace_ptr + n );
+			{
+				T * const workspace_vector = reinterpret_cast< T * >(workspace_ptr);
+				grb::Vector< T > tmp = grb::internal::wrapRawVector( n, workspace_vector );
+				std::swap( workspace[ 0 ], tmp );
+			}
+			workspace_ptr += n * sizeof( T );
+			workspace_ptr +=
+				(align - (reinterpret_cast<uintptr_t>(workspace_ptr) % align));
+			assert( static_cast< char * >(buffer) + buffer_size >= workspace_ptr + n );
+			{
+				T * const workspace_vector = reinterpret_cast< T * >(workspace_ptr);
+				grb::Vector< T > tmp = grb::internal::wrapRawVector( n, workspace_vector );
+				std::swap( workspace[ 1 ], tmp );
+			}
+			workspace_ptr += n * sizeof( T );
+			workspace_ptr +=
+				(align - (reinterpret_cast<uintptr_t>(workspace_ptr) % align));
+			assert( static_cast< char * >(buffer) + buffer_size >= workspace_ptr + n );
+			{
+				T * const workspace_vector = reinterpret_cast< T * >(workspace_ptr);
+				grb::Vector< T > tmp = grb::internal::wrapRawVector( n, workspace_vector );
+				std::swap( workspace[ 2 ], tmp );
+			}
+			if( buffer_size >= workspaceSize( n, true ) ) {
+				workspace_ptr += n * sizeof( T );
+				workspace_ptr +=
+					(align - (reinterpret_cast<uintptr_t>(workspace_ptr) % align));
+				assert( static_cast< char * >(buffer) + buffer_size >= workspace_ptr + n );
+				T * const workspace_vector = reinterpret_cast< T * >(workspace_ptr);
+				grb::Vector< T > tmp = grb::internal::wrapRawVector( n, workspace_vector );
+				std::swap( precond_workspace, tmp );
+			}
+		}
+
+		/** Destroys this CG solve handle. */
+		~CG_Data() {
+			// call the deleter on the buffer pointer (which may be a no-op)
+			_buffer_deleter( _buffer );
 		}
 
 		/** @returns The system size. */
@@ -231,8 +421,15 @@ class CG_Data {
 			preconditioner_data = data;
 			assert( !( !preconditioner && preconditioner_data ) );
 			if( grb::size( precond_workspace ) == 0 ) {
+				// note -- using the ALP default vector allocation here is the only
+				// convenient option: otherwise has to implement buffer management
+				// for this vector only. If it is important that this vector be allocated
+				// without a SPA, then the user should set the precond hint to true.
 				grb::Vector< T > replace( size );
 				std::swap( replace, precond_workspace );
+				std::cerr << "Warning: allocating additional workspace to handle CG solves "
+					<< "with preconditioning. To prevent this on-demand allocation, set the "
+					<< "precond option during CG solver handle creation to true.\n";
 			}
 			assert( grb::size( precond_workspace ) == size );
 		}
@@ -277,15 +474,20 @@ class CG_Data {
 template< typename T, typename NZI, typename RSI >
 static sparse_err_t sparse_cg_init_impl(
 	sparse_cg_handle_t * const handle, const size_t n,
-	const T * const a, const RSI * const ja, const NZI * const ia
+	const T * const a, const RSI * const ja, const NZI * const ia,
+	void * const buffer, const size_t bufferSize, const CGWorkspaceDeleter &deleter
 ) {
 	if( n == 0 ) { return ILLEGAL_ARGUMENT; }
 	if( handle == nullptr || a == nullptr || ja == nullptr || ia == nullptr ) {
 		return NULL_ARGUMENT;
 	}
+	if( buffer == nullptr ) { return NULL_ARGUMENT; }
+	if( bufferSize < CG_Data< T, NZI, RSI >::workspaceSize( n, false ) ) {
+		return ILLEGAL_ARGUMENT;
+	}
 	try {
-		*handle = static_cast< void * >(
-			new CG_Data< T, NZI, RSI >( n, a, ja, ia ) );
+		*handle = static_cast< void * >( new CG_Data< T, NZI, RSI >(
+			n, a, ja, ia, buffer, bufferSize, deleter ) );
 	} catch( std::exception &e ) {
 		// the grb::Matrix constructor may only throw on out of memory errors
 		std::cerr << "Error: " << e.what() << "\n";
@@ -295,46 +497,215 @@ static sparse_err_t sparse_cg_init_impl(
 	return NO_ERROR;
 }
 
+size_t sparse_cg_workspace_size_s( const size_t n, const bool precon ) {
+	return CG_Data< float, size_t, size_t >::workspaceSize( n, precon );
+}
+
+size_t sparse_cg_workspace_size_d( const size_t n, const bool precon ) {
+	return CG_Data< double, size_t, size_t >::workspaceSize( n, precon );
+}
+
+template< typename T, typename NZI, typename RSI >
+static sparse_err_t sparse_cg_init_impl_no_buffer(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const T * const a, const RSI * const ja, const NZI * const ia,
+	const bool support_preconditioning, const bool numa
+) {
+	const size_t allocSize = CG_Data< T, NZI, RSI >::
+		workspaceSize( n, support_preconditioning );
+	void * buffer = nullptr;
+#ifdef _GRB_NO_LIBNUMA
+	if( numa ) {
+		std::cerr << "Warning: solver transition path was requested to perform "
+			<< "NUMA-aware allocation, but ALP was compiled without libnuma.\n";
+	}
+#else
+	if( numa ) {
+		buffer = numa_alloc_interleaved( allocSize );
+		if( buffer == NULL ) { return OUT_OF_MEMORY; }
+	} else {
+#endif
+		try {
+			buffer = static_cast< void * >(new char[ allocSize ]);
+		} catch( ... ) {
+			std::cerr << "Error allocating workspace buffer\n";
+			return OUT_OF_MEMORY;
+		}
+#ifndef _GRB_NO_LIBNUMA
+	}
+#endif
+	const sparse_err_t rc = sparse_cg_init_impl(
+		handle, n, a, ja, ia, buffer, allocSize,
+		CGWorkspaceDeleter( true, numa ? allocSize : 0 )
+	);
+	if( rc != NO_ERROR ) {
+#ifndef _GRB_NO_LIBNUMA
+		if( numa ) {
+			numa_free( buffer, allocSize );
+		} else {
+#endif
+			delete [] static_cast< char * >(buffer);
+#ifndef _GRB_NO_LIBNUMA
+		}
+#endif
+	}
+	return rc;
+}
+
+sparse_err_t sparse_cg_manual_init_sii(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const float * const a, const int * const ja, const int * const ia,
+	void * workspace, const size_t workspace_size
+) {
+	return sparse_cg_init_impl< float, int, int >( handle, n, a, ja, ia,
+		workspace, workspace_size, CGWorkspaceDeleter( false, false ) );
+}
+
+sparse_err_t sparse_cg_manual_init_dii(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const double * const a, const int * const ja, const int * const ia,
+	void * workspace, const size_t workspace_size
+) {
+	return sparse_cg_init_impl< double, int, int >( handle, n, a, ja, ia,
+		workspace, workspace_size, CGWorkspaceDeleter( false, false ) );
+}
+
+sparse_err_t sparse_cg_manual_init_siz(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const float * const a, const int * const ja, const size_t * const ia,
+	void * workspace, const size_t workspace_size
+) {
+	return sparse_cg_init_impl< float, size_t, int >( handle, n, a, ja, ia,
+		workspace, workspace_size, CGWorkspaceDeleter( false, false ) );
+}
+
+sparse_err_t sparse_cg_manual_init_diz(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const double * const a, const int * const ja, const size_t * const ia,
+	void * workspace, const size_t workspace_size
+) {
+	return sparse_cg_init_impl< double, size_t, int >( handle, n, a, ja, ia,
+		workspace, workspace_size, CGWorkspaceDeleter( false, false ) );
+}
+
+sparse_err_t sparse_cg_manual_init_szz(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const float * const a, const size_t * const ja, const size_t * const ia,
+	void * workspace, const size_t workspace_size
+) {
+	return sparse_cg_init_impl< float, size_t, size_t >( handle, n, a, ja, ia,
+		workspace, workspace_size, CGWorkspaceDeleter( false, false ) );
+}
+
+sparse_err_t sparse_cg_manual_init_dzz(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const double * const a, const size_t * const ja, const size_t * const ia,
+	void * workspace, const size_t workspace_size
+) {
+	return sparse_cg_init_impl< double, size_t, size_t >( handle, n, a, ja, ia,
+		workspace, workspace_size, CGWorkspaceDeleter( false, false ) );
+}
+
+sparse_err_t sparse_cg_init_opt_sii(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const float * const a, const int * const ja, const int * const ia,
+	const bool precond, const bool numa
+) {
+	return sparse_cg_init_impl_no_buffer< float, int, int >( handle, n, a, ja, ia,
+		precond, numa );
+}
+
+sparse_err_t sparse_cg_init_opt_dii(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const double * const a, const int * const ja, const int * const ia,
+	const bool precond, const bool numa
+) {
+	return sparse_cg_init_impl_no_buffer< double, int, int >( handle, n, a, ja, ia,
+		precond, numa );
+}
+
+sparse_err_t sparse_cg_init_opt_siz(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const float * const a, const int * const ja, const size_t * const ia,
+	const bool precond, const bool numa
+) {
+	return sparse_cg_init_impl_no_buffer< float, size_t, int >( handle, n, a, ja,
+		ia, precond, numa );
+}
+
+sparse_err_t sparse_cg_init_opt_diz(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const double * const a, const int * const ja, const size_t * const ia,
+	const bool precond, const bool numa
+) {
+	return sparse_cg_init_impl_no_buffer< double, size_t, int >( handle, n, a, ja,
+		ia, precond, numa );
+}
+
+sparse_err_t sparse_cg_init_opt_szz(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const float * const a, const size_t * const ja, const size_t * const ia,
+	const bool precond, const bool numa
+) {
+	return sparse_cg_init_impl_no_buffer< float, size_t, size_t >( handle, n, a, ja,
+		ia, precond, numa );
+}
+
+sparse_err_t sparse_cg_init_opt_dzz(
+	sparse_cg_handle_t * const handle, const size_t n,
+	const double * const a, const size_t * const ja, const size_t * const ia,
+	const bool precond, const bool numa
+) {
+	return sparse_cg_init_impl_no_buffer< double, size_t, size_t >( handle, n, a, ja,
+		ia, precond, numa );
+}
+
 sparse_err_t sparse_cg_init_sii(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const float * const a, const int * const ja, const int * const ia
 ) {
-	return sparse_cg_init_impl< float, int, int >( handle, n, a, ja, ia );
+	return sparse_cg_init_impl_no_buffer< float, int, int >( handle, n, a, ja, ia,
+		true, true );
 }
 
 sparse_err_t sparse_cg_init_dii(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const double * const a, const int * const ja, const int * const ia
 ) {
-	return sparse_cg_init_impl< double, int, int >( handle, n, a, ja, ia );
+	return sparse_cg_init_impl_no_buffer< double, int, int >( handle, n, a, ja, ia,
+		true, true );
 }
 
 sparse_err_t sparse_cg_init_siz(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const float * const a, const int * const ja, const size_t * const ia
 ) {
-	return sparse_cg_init_impl< float, size_t, int >( handle, n, a, ja, ia );
+	return sparse_cg_init_impl_no_buffer< float, size_t, int >( handle, n, a, ja,
+		ia, true, true );
 }
 
 sparse_err_t sparse_cg_init_diz(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const double * const a, const int * const ja, const size_t * const ia
 ) {
-	return sparse_cg_init_impl< double, size_t, int >( handle, n, a, ja, ia );
+	return sparse_cg_init_impl_no_buffer< double, size_t, int >( handle, n, a, ja,
+		ia, true, true );
 }
 
 sparse_err_t sparse_cg_init_szz(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const float * const a, const size_t * const ja, const size_t * const ia
 ) {
-	return sparse_cg_init_impl< float, size_t, size_t >( handle, n, a, ja, ia );
+	return sparse_cg_init_impl_no_buffer< float, size_t, size_t >( handle, n, a, ja,
+		ia, true, true );
 }
 
 sparse_err_t sparse_cg_init_dzz(
 	sparse_cg_handle_t * const handle, const size_t n,
 	const double * const a, const size_t * const ja, const size_t * const ia
 ) {
-	return sparse_cg_init_impl< double, size_t, size_t >( handle, n, a, ja, ia );
+	return sparse_cg_init_impl_no_buffer< double, size_t, size_t >( handle, n, a, ja,
+		ia, true, true );
 }
 
 template< typename T, typename NZI, typename RSI >
@@ -525,7 +896,8 @@ static sparse_err_t sparse_cg_get_iter_count_impl(
 	const sparse_cg_handle_t handle, size_t * const iters
 ) {
 	if( handle == nullptr || iters == nullptr ) { return NULL_ARGUMENT; }
-	*iters = static_cast< CG_Data< T, NZI, RSI > * >( handle )->getIters();
+	*iters =
+		static_cast< CG_Data< T, NZI, RSI > * >( handle )->getIters();
 	return NO_ERROR;
 }
 
@@ -646,8 +1018,10 @@ static sparse_err_t sparse_cg_set_preconditioner_impl(
 	try {
 		static_cast< CG_Data< T, NZI, RSI > * >( handle )->
 			setPreconditioner( c_precond_p, c_precond_data_p );
-	} catch(...) {
-		// spec says ALP vector allocation can only throw due to out-of-memory
+	} catch( ... ) {
+		// by virtue of the ALP interface guarantees, the only possible exception is
+		// due to out-of-memory conditions (when allocating an on-demand vector
+		// workspace for preconditioned solves).
 		return OUT_OF_MEMORY;
 	}
 	return NO_ERROR;
@@ -712,8 +1086,7 @@ static sparse_err_t sparse_cg_set_max_iter_count_impl(
 	sparse_cg_handle_t handle, const size_t max_iters
 ) {
 	if( handle == nullptr ) { return NULL_ARGUMENT; }
-	static_cast< CG_Data< T, NZI, RSI > * >( handle )->
-		setMaxIters( max_iters );
+	static_cast< CG_Data< T, NZI, RSI > * >( handle )->setMaxIters( max_iters );
 	return NO_ERROR;
 }