diff --git a/include/graphblas/algorithms/conjugate_gradient.hpp b/include/graphblas/algorithms/conjugate_gradient.hpp
index 11e910566..157afd4cf 100644
--- a/include/graphblas/algorithms/conjugate_gradient.hpp
+++ b/include/graphblas/algorithms/conjugate_gradient.hpp
@@ -314,43 +314,77 @@ namespace grb {
std::cerr << "Error: at least one CG iteration must be requested\n";
return grb::ILLEGAL;
}
+
+ // dense descriptor
+ if( descr & grb::descriptors::dense ) {
+ if( grb::nnz( x ) != grb::size( x ) ) {
+ std::cerr << "Error: x was sparse while the dense descriptor was given\n";
+ return grb::ILLEGAL;
+ }
+ if( grb::nnz( b ) != grb::size( b ) ) {
+ std::cerr << "Error: b was sparse while the dense descriptor was given\n";
+ return grb::ILLEGAL;
+ }
+ if( grb::nnz( r ) != grb::size( r ) ) {
+ std::cerr << "Error: r was sparse while the dense descriptor was given\n";
+ return grb::ILLEGAL;
+ }
+ if( grb::nnz( temp ) != grb::size( temp ) ) {
+ std::cerr << "Error: temp was sparse while the dense descriptor was "
+ << "given\n";
+ return grb::ILLEGAL;
+ }
+ if( preconditioned &&
+ (grb::nnz( temp_precond ) != grb::size( temp_precond ))
+ ) {
+ std::cerr << "Error: temp_precond was sparse while the dense descriptor "
+ << "was given\n";
+ return grb::ILLEGAL;
+ }
+ }
}
// set pure output fields to neutral defaults
iterations = 0;
residual = std::numeric_limits< double >::infinity();
- // make x and b structurally dense (if not already) so that the remainder
+ // declare internal scalars
+ IOType sigma, bnorm, alpha, beta;
+
+ // make x structurally dense (if not already) so that the remainder
// algorithm can safely use the dense descriptor for faster operations
- {
- RC rc = grb::SUCCESS;
+ grb::RC ret = grb::SUCCESS;
+ if( !(descr & grb::descriptors::dense) ) {
if( nnz( x ) != n ) {
- rc = grb::set< descriptors::invert_mask | descriptors::structural >(
+ ret = grb::set< descriptors::invert_mask | descriptors::structural >(
x, x, zero
);
+ assert( ret == grb::SUCCESS );
}
- if( rc != grb::SUCCESS ) { return rc; }
assert( nnz( x ) == n );
}
- IOType sigma, bnorm, alpha, beta;
-
// r = b - temp;
- grb::RC ret = grb::set( temp, 0 ); assert( ret == grb::SUCCESS );
+ ret = ret ? ret : grb::set< descr >( temp, 0 );
+ assert( ret == grb::SUCCESS );
ret = ret ? ret : grb::mxv< descr_dense >( temp, A, x, ring );
assert( ret == grb::SUCCESS );
- ret = ret ? ret : grb::set( r, zero ); assert( ret == grb::SUCCESS );
- // note: no dense descriptor since we actually allow sparse b
- ret = ret ? ret : grb::foldl( r, b, ring.getAdditiveMonoid() );
- // from here onwards, r, temp, x are dense and will remain so
- assert( nnz( r ) == n );
- assert( nnz( temp ) == n );
+ ret = ret ? ret : grb::set< descr >( r, zero );
+ assert( ret == grb::SUCCESS );
+ // note: no forced dense descriptor since we actually allow sparse b
+ // but if b was already dense and the dense descriptor was given, we
+ // will respect that
+ ret = ret ? ret : grb::foldl< descr >( r, b, ring.getAdditiveMonoid() );
+ // note: at this point, r and temp are guaranteed dense, so we force a dense
+ // descriptor
ret = ret ? ret : grb::foldl< descr_dense >( r, temp, minus );
assert( ret == grb::SUCCESS );
// bnorm = b' * b;
+ // Note that b can be structurally sparse (unless otherwise guaranteed by the
+ // user).
bnorm = zero;
- ret = ret ? ret : grb::dot< descr_dense >(
+ ret = ret ? ret : grb::dot< descr >(
bnorm,
b, b,
ring.getAdditiveMonoid(),
@@ -381,7 +415,9 @@ namespace grb {
// z = M^-1r
if( preconditioned ) {
- ret = ret ? ret : grb::set( z, 0 ); // also ensures z is dense, henceforth
+ ret = ret ? ret : grb::set< descr >( z, 0 );
+ // henceforth, also z is structurally dense and we can force a dense
+ // descriptor for it
if( preconditioned == 2 ) {
ret = ret ? ret : grb::wait( z, r );
}
@@ -389,10 +425,12 @@ namespace grb {
} // else, z equals r (by reference)
// u = z;
- ret = ret ? ret : grb::set( u, z );
+ ret = ret ? ret : grb::set< descr >( u, z );
assert( ret == grb::SUCCESS );
- // from here onwards, u is dense; i.e., all vectors are dense from now on,
- // and we can freely use the dense descriptor in the subsequent
+ // henceforth, also u is structurally dense -- in fact, henceforth, all
+ // vectors in this algorithm except b are now guaranteed dense. We may
+ // hence liberally force the use of the dense descriptor in the main while
+ // loop of the CG (since it does not refer to b).
// sigma = r' * z;
sigma = zero;
@@ -435,7 +473,7 @@ namespace grb {
assert( ret == grb::SUCCESS );
// alpha = sigma / beta;
- ret = ret ? ret : grb::apply( alpha, sigma, beta, divide );
+ ret = ret ? ret : grb::apply< descr >( alpha, sigma, beta, divide );
assert( ret == grb::SUCCESS );
// x = x + alpha * u;
@@ -489,7 +527,7 @@ namespace grb {
}
// alpha = beta / sigma;
- ret = ret ? ret : grb::apply( alpha, beta, sigma, divide );
+ ret = ret ? ret : grb::apply< descr >( alpha, beta, sigma, divide );
assert( ret == grb::SUCCESS );
// u_next = z + beta * u_previous;
diff --git a/include/transition/solver.h b/include/transition/solver.h
index ba1afa008..09e4f2acc 100644
--- a/include/transition/solver.h
+++ b/include/transition/solver.h
@@ -225,6 +225,22 @@ typedef int (*sparse_cg_preconditioner_dxx_t) (
* @param[in] a The nonzero values of the system matrix.
* @param[in] ja The column indices of the nonzeroes of the system matrix.
* @param[in] ia The row offset array of the system matrix.
+ * @param[in] precon Whether the CG handle is expected to execute solves with
+ * preconditioning.
+ * @param[in] numa Whether the CG handle is expected to execute solves that
+ * employ more than one NUMA domain, in which case any memory
+ * allocated during construction of this handle may attempt
+ * to employ NUMA-aware allocation.
+ *
+ * The Boolean flags \a precon and \a numa indicated above are \em hints to the
+ * underlying ALP implementation.
+ *
+ * \note This means that even if \a precon was set to false during
+ * handle creation, a user may still request preconditioning and expect
+ * no error. Similarly, even if \a numa was set true during
+ * handle creation, the underlying ALP implementation may still opt to
+ * \em not employ NUMA-aware allocation, for example, if it was configured
+ * without libnuma support.
*
* This variant is for single-precision floating point nonzeroes and integer
* \a ja and \a ia arrays, as also indicated by the sii postfix.
@@ -244,25 +260,119 @@ typedef int (*sparse_cg_preconditioner_dxx_t) (
* @returns #NO_ERROR If initialisation of the handle proceeded
* successfully. Only in this case shall \a handle
* henceforth be a \em valid handle.
+ *
+ * On returning #NO_ERROR, \a handle shall correspond to an initialised (P)CG
+ * solver instance. Note that this handle contains workspace memory that are
+ * necessary for auxiliary vectors required by the CG algorithm. When
+ * preconditioning is employed, four such workspace vectors of size \a n are
+ * required; otherwise, only three such vectors are required. This memory will
+ * be freed on a call to #sparse_cg_destroy_sii.
+ */
+sparse_err_t sparse_cg_init_opt_sii(
+ sparse_cg_handle_t * const handle, const size_t n,
+ const float * const a, const int * const ja, const int * const ia,
+ const bool precon, const bool numa
+);
+
+/**
+ * Variant of #sparse_cg_init_opt_sii that results in a #sparse_cg_handle_t with
+ * default hints (with preconditioning and with NUMA).
+ *
+ * @see sparse_cg_init_opt_sii for full details.
*/
sparse_err_t sparse_cg_init_sii(
sparse_cg_handle_t * const handle, const size_t n,
const float * const a, const int * const ja, const int * const ia
);
+/**
+ * Variant of #sparse_cg_init_opt_sii that does not allocate workspace, but
+ * instead employs given user memory as its workspace memory.
+ *
+ * This variant of initialisation guarantees no additional dynamic memory
+ * allocation will be performed. A call to #sparse_cg_destroy_sii will likewise
+ * \em not free up the given \a workspace memory.
+ *
+ * @param[in,out] workspace The user-supplied memory that will be used as the
+ * work space for the returned PCG solver \a handle.
+ *
+ * Previous contents of the given \a workspace will be ignored, and, when given
+ * to this function, should be considered lost. On a successful call, the given
+ * memory should be considered to be under the sole custody of the PCG solver
+ * \a handle until the time the handle is destroyed; at that point, the
+ * ownership of the given memory will be returned to the callee. If the call to
+ * this function is not successful, the ownership of the memory region returns
+ * to the callee immediately. For as long as ownership of the memory region is
+ * not with the callee, the contents of the memory region are undefined while
+ * code not controlled by ALP is not allowed to write into the memory region.
+ *
+ * @param[in] workspace_size The size of the memory pointed to by \a workspace.
+ *
+ * The given workspace must have byte size greater than or equal to the value
+ * returned by a call to #sparse_cg_workspace_size_s.
+ *
+ * \note For variants that use double-precision values, the workspace must have
+ * byte size greater than or equal to the value returned by a call to
+ * #sparse_cg_workspace_size_d.
+ *
+ * \note Note that the hints that can be supplied to #sparse_cg_init_opt_sii
+ * affect its internal workspace allocation mechanisms. Therefore, those
+ * hints do not apply to this manual PCG solver initialisation variant.
+ *
+ * In addition to the possible return codes defined by #sparse_cg_init_opt_sii,
+ * a call to this variant may furthermore return:
+ *
+ * @returns #NULL_ARGUMENT If \a workspace equals NULL.
+ * @returns #ILLEGAL_ARGUMENT If \a workspace_size is smaller than required.
+ *
+ * Different from the specification of #sparse_cg_init_opt_sii, this variant
+ * may never return #OUT_OF_MEMORY.
+ *
+ * For further details, see #sparse_cg_init_opt_sii.
+ */
+sparse_err_t sparse_cg_manual_init_sii(
+ sparse_cg_handle_t * const handle, const size_t n,
+ const float * const a, const int * const ja, const int * const ia,
+ void * workspace, const size_t workspace_size
+);
+
/**
* Initialises a #sparse_cg_handle_t object.
*
* This variant is for double-precision floating point nonzeroes and integer
* \a ja and \a ia arrays, as also indicated by the dii postfix.
*
- * @see #sparse_cg_init_sii for full documentation.
+ * @see #sparse_cg_init_opt_sii for full documentation.
+ */
+sparse_err_t sparse_cg_init_opt_dii(
+ sparse_cg_handle_t * const handle, const size_t n,
+ const double * const a, const int * const ja, const int * const ia,
+ const bool precon, const bool numa
+);
+
+/**
+ * Variant of #sparse_cg_init_opt_dii that results in a #sparse_cg_handle_t with
+ * default hints (with preconditioning and with NUMA).
+ *
+ * @see sparse_cg_init_opt_dii for full details.
*/
sparse_err_t sparse_cg_init_dii(
sparse_cg_handle_t * const handle, const size_t n,
const double * const a, const int * const ja, const int * const ia
);
+/**
+ * Variant of #sparse_cg_init_opt_dii that does not allocate workspace, but
+ * instead employs given user memory as its workspace memory.
+ *
+ * @see #sparse_cg_manual_init_sii for full details.
+ */
+sparse_err_t sparse_cg_manual_init_dii(
+ sparse_cg_handle_t * const handle, const size_t n,
+ const double * const a, const int * const ja, const int * const ia,
+ void * workspace, const size_t workspace_size
+);
+
/**
* Initialises a #sparse_cg_handle_t object.
*
@@ -270,13 +380,37 @@ sparse_err_t sparse_cg_init_dii(
* size_t-valued \a ja, and integer-valued \a ia, as also indicated by
* the siz postfix.
*
- * @see #sparse_cg_init_sii for full documentation.
+ * @see #sparse_cg_init_opt_sii for full documentation.
+ */
+sparse_err_t sparse_cg_init_opt_siz(
+ sparse_cg_handle_t * const handle, const size_t n,
+ const float * const a, const int * const ja, const size_t * const ia,
+ const bool precon, const bool numa
+);
+
+/**
+ * Variant of #sparse_cg_init_opt_siz that results in a #sparse_cg_handle_t with
+ * default hints (with preconditioning and with NUMA).
+ *
+ * @see sparse_cg_init_opt_siz for full details.
*/
sparse_err_t sparse_cg_init_siz(
sparse_cg_handle_t * const handle, const size_t n,
const float * const a, const int * const ja, const size_t * const ia
);
+/**
+ * Variant of #sparse_cg_init_opt_siz that does not allocate workspace, but
+ * instead employs given user memory as its workspace memory.
+ *
+ * @see #sparse_cg_manual_init_sii for full details.
+ */
+sparse_err_t sparse_cg_manual_init_siz(
+ sparse_cg_handle_t * const handle, const size_t n,
+ const float * const a, const int * const ja, const size_t * const ia,
+ void * workspace, const size_t workspace_size
+);
+
/**
* Initialises a #sparse_cg_handle_t object.
*
@@ -284,13 +418,37 @@ sparse_err_t sparse_cg_init_siz(
* size_t-valued \a ja, and integer-valued \a ia, as also indicated by
* the diz postfix.
*
- * @see #sparse_cg_init_sii for full documentation.
+ * @see #sparse_cg_init_opt_sii for full documentation.
+ */
+sparse_err_t sparse_cg_init_opt_diz(
+ sparse_cg_handle_t * const handle, const size_t n,
+ const double * const a, const int * const ja, const size_t * const ia,
+ const bool precon, const bool numa
+);
+
+/**
+ * Variant of #sparse_cg_init_opt_diz that results in a #sparse_cg_handle_t with
+ * default hints (with preconditioning and with NUMA).
+ *
+ * @see sparse_cg_init_opt_diz for full details.
*/
sparse_err_t sparse_cg_init_diz(
sparse_cg_handle_t * const handle, const size_t n,
const double * const a, const int * const ja, const size_t * const ia
);
+/**
+ * Variant of #sparse_cg_init_opt_siz that does not allocate workspace, but
+ * instead employs given user memory as its workspace memory.
+ *
+ * @see #sparse_cg_manual_init_sii for full details.
+ */
+sparse_err_t sparse_cg_manual_init_diz(
+ sparse_cg_handle_t * const handle, const size_t n,
+ const double * const a, const int * const ja, const size_t * const ia,
+ void * workspace, const size_t workspace_size
+);
+
/**
* Initialises a #sparse_cg_handle_t object.
*
@@ -298,13 +456,37 @@ sparse_err_t sparse_cg_init_diz(
* size_t-valued \a ja and \a ia, as also indicated by the szz
* postfix.
*
- * @see #sparse_cg_init_sii for full documentation.
+ * @see #sparse_cg_init_opt_sii for full documentation.
+ */
+sparse_err_t sparse_cg_init_opt_szz(
+ sparse_cg_handle_t * const handle, const size_t n,
+ const float * const a, const size_t * const ja, const size_t * const ia,
+ const bool precon, const bool numa
+);
+
+/**
+ * Variant of #sparse_cg_init_opt_szz that results in a #sparse_cg_handle_t with
+ * default hints (with preconditioning and with NUMA).
+ *
+ * @see sparse_cg_init_opt_szz for full details.
*/
sparse_err_t sparse_cg_init_szz(
sparse_cg_handle_t * const handle, const size_t n,
const float * const a, const size_t * const ja, const size_t * const ia
);
+/**
+ * Variant of #sparse_cg_init_opt_siz that does not allocate workspace, but
+ * instead employs given user memory as its workspace memory.
+ *
+ * @see #sparse_cg_manual_init_sii for full details.
+ */
+sparse_err_t sparse_cg_manual_init_szz(
+ sparse_cg_handle_t * const handle, const size_t n,
+ const float * const a, const size_t * const ja, const size_t * const ia,
+ void * workspace, const size_t workspace_size
+);
+
/**
* Initialises a #sparse_cg_handle_t object.
*
@@ -312,13 +494,89 @@ sparse_err_t sparse_cg_init_szz(
* size_t-valued \a ja and \a ia, as also indicated by the dzz
* postfix.
*
- * @see #sparse_cg_init_sii for full documentation.
+ * @see #sparse_cg_init_opt_sii for full documentation.
+ */
+sparse_err_t sparse_cg_init_opt_dzz(
+ sparse_cg_handle_t * const handle, const size_t n,
+ const double * const a, const size_t * const ja, const size_t * const ia,
+ const bool precon, const bool numa
+);
+
+/**
+ * Variant of #sparse_cg_init_opt_dzz that results in a #sparse_cg_handle_t with
+ * default hints (with preconditioning and with NUMA).
+ *
+ * @see sparse_cg_init_opt_dzz for full details.
*/
sparse_err_t sparse_cg_init_dzz(
sparse_cg_handle_t * const handle, const size_t n,
const double * const a, const size_t * const ja, const size_t * const ia
);
+/**
+ * Variant of #sparse_cg_init_opt_siz that does not allocate workspace, but
+ * instead employs given user memory as its workspace memory.
+ *
+ * @see #sparse_cg_manual_init_sii for full details.
+ */
+sparse_err_t sparse_cg_manual_init_dzz(
+ sparse_cg_handle_t * const handle, const size_t n,
+ const double * const a, const size_t * const ja, const size_t * const ia,
+ void * workspace, const size_t workspace_size
+);
+
+/**
+ * Returns the minimum workspace size required when manually supplying a
+ * workspace for PCG solvers to initialise with.
+ *
+ * Those so-called manual initialisation functions are the following:
+ * - #sparse_cg_manual_init_sii,
+ * - #sparse_cg_manual_init_siz, and
+ * - #sparse_cg_manual_init_szz.
+ * This is the variant for PCG solvers that operate on single-precision values.
+ *
+ * @param[in] n The maximum system size the solver will be initialised
+ * with.
+ * @param[in] precon Whether the solver is expected to use a preconditioner.
+ *
+ * \note If preconditioning is required, the solver requires a larger work
+ * space. Note, however, that even if initially not enough workspace is
+ * supplied for a preconditioned solve, nonetheless requesting a
+ * preconditioned solve will still execute -- it will then dynamically
+ * allocate memory on the fly.
+ *
+ * A call to this function never fails.
+ *
+ * @returns The required workspace size in bytes.
+ */
+size_t sparse_cg_workspace_size_s( const size_t n, const bool precon );
+
+/**
+ * Returns the minimum workspace size required when manually supplying a
+ * workspace for PCG solvers to initialise with.
+ *
+ * Those so-called manual initialisation functions are the following:
+ * - #sparse_cg_manual_init_dii,
+ * - #sparse_cg_manual_init_diz, and
+ * - #sparse_cg_manual_init_dzz.
+ * This is the variant for PCG solvers that operate on double-precision values.
+ *
+ * @param[in] n The maximum system size the solver will be initialised
+ * with.
+ * @param[in] precon Whether the solver is expected to use a preconditioner.
+ *
+ * \note If preconditioning is required, the solver requires a larger work
+ * space. Note, however, that even if initially not enough workspace is
+ * supplied for a preconditioned solve, nonetheless requesting a
+ * preconditioned solve will still execute -- it will then dynamically
+ * allocate memory on the fly.
+ *
+ * A call to this function never fails.
+ *
+ * @returns The required workspace size in bytes.
+ */
+size_t sparse_cg_workspace_size_d( const size_t n, const bool precon );
+
// Note that szi and dzi are skipped on purpose. Such variants would not seem
// sensible, though could easily be provided if they do turn out to be needed
diff --git a/src/transition/kml_iss.cpp b/src/transition/kml_iss.cpp
index 2722f4450..5c0fa2d0e 100644
--- a/src/transition/kml_iss.cpp
+++ b/src/transition/kml_iss.cpp
@@ -140,7 +140,7 @@ int KML_CG_PREFIXED( InitSI )(
KmlSolverTask **handle, int n, const float *a,
const int *ja, const int *ia
) {
- // negative numbers become positive when casted to size_t
+ // negative numbers become positive when cast to size_t
if( n <= 0 ) { return KMLSS_BAD_N; }
sparse_err_t err = sparse_cg_init_sii( handle, n, a, ja, ia );
return sparse_err_t_2_int( err );
diff --git a/src/transition/solver.cpp b/src/transition/solver.cpp
index b52baca3d..b0a93da36 100644
--- a/src/transition/solver.cpp
+++ b/src/transition/solver.cpp
@@ -16,12 +16,15 @@
*/
-#include
#include
+#include
#include
#include
+#include
+#include
+
#include "solver.h"
/**
@@ -34,6 +37,87 @@
*/
/**
+ * Implements the various ways the CG workspace data may (or may not) be freed.
+ *
+ * This deleter at present considers that
+ * -# the solver does not own the workspace data (and hence may not free it);
+ * -# the solver has allocated the workspace data using standard C++ array
+ * allocation mechanisms, for which this class provides the matching
+ * standard C++ array deleter; and
+ * -# the solver has allocated the workspace data using libNUMA, for which this
+ * class provides a call to the matching libNUMA free.
+ */
+class CGWorkspaceDeleter {
+
+ private:
+
+ /** Whether we own the workspace data. */
+ const bool owning;
+
+ /**
+ * Whether the workspace data was allocated NUMA-aware, and if so, what the
+ * allocation size was.
+ *
+ * \note The allocation size is required for a call to the libNUMA free
+ * function.
+ */
+ const size_t numa;
+
+
+ public:
+
+ /** Default construction is not allowed. */
+ CGWorkspaceDeleter() = delete;
+
+ /**
+ * Explicit constructor that requires all information necessary to determine
+ * how the workspace should be deleted.
+ *
+ * @param[in] owning_in Whether the solver owns the workspace buffer.
+ * @param[in] numa_in Zero if libNUMA was not used for allocating the
+ * workspace bufer, and the allocation size otherwise.
+ *
+ * If \a owning_in is false, the input to \a numa_in is ignored.
+ *
+ * \note In that case, the user is reponsible for freeing memory, which indeed
+ * could have been done in a myriad of other ways besides those this
+ * deleter knows about.
+ *
+ * \internal In case ALP is compiled without libNUMA support, the standard C++
+ * array mechanism will be assumed always.
+ */
+ CGWorkspaceDeleter( const bool &owning_in, const size_t &numa_in ) noexcept :
+ owning( owning_in ), numa( numa_in )
+ {}
+
+ /**
+ * Implements the correct action for freeing the workspace data.
+ *
+ * @param[in] ptr Pointer to the workspace memory to be deleted.
+ */
+ void operator()( void * ptr ) {
+ assert( !( !owning && numa ) );
+ if( owning ) {
+#ifndef _GRB_NO_LIBNUMA
+ if( numa ) {
+ numa_free( ptr, numa );
+ } else {
+#endif
+ delete [] static_cast< char * >(ptr);
+#ifndef _GRB_NO_LIBNUMA
+ }
+#endif
+ }
+ }
+
+};
+
+/**
+ * The class behind the PCG solver handle.
+ *
+ * More precisely, a valid PCG solver handle is a C void pointer to an instance
+ * of this class.
+ *
* @tparam T The nonzero value type.
* @tparam NZI The nonzero index type.
* @tparam RSI The row and column index type.
@@ -106,6 +190,14 @@ class CG_Data {
/** Any required data for the \a preconditioner. */
void * preconditioner_data;
+ // destructor data
+
+ /** The given buffer during construction. */
+ void * _buffer;
+
+ /** How to handle \a _buffer on destruction. */
+ CGWorkspaceDeleter _buffer_deleter;
+
protected:
@@ -136,6 +228,36 @@ class CG_Data {
public:
+ /**
+ * The size, in bytes, required as a work space for the PCG algorithm.
+ *
+ * @param[in] n The linear system size.
+ * @param[in] preconditioned Whether the solver is expected to be called with
+ * a preconditioner.
+ *
+ * @returns The required size, in bytes.
+ *
+ * \internal Note that the space for two additional integers is required as
+ * per both the C and C++ specifications. However, we here also
+ * prevent false sharing between different workspace vectors, and so
+ * actually align on the cache line size boundary.
+ *
+ * \warning The current implementation assumes that the cache line size is a
+ * multiple of sizeof(int). If this assertion ever fails,
+ * an error will be thrown at compile time (static_assert).
+ */
+ static size_t workspaceSize(
+ const size_t n, const bool preconditioned
+ ) noexcept {
+ static_assert( grb::config::CACHE_LINE_SIZE::value() >= sizeof(int),
+ "Unhandled padding case; please submit a bug report" );
+ if( preconditioned ) {
+ return 4 * n * sizeof( T ) + 3 * grb::config::CACHE_LINE_SIZE::value();
+ } else {
+ return 3 * n * sizeof( T ) + 2 * grb::config::CACHE_LINE_SIZE::value();
+ }
+ }
+
/** Disable default constructor. */
CG_Data() = delete;
@@ -149,25 +271,93 @@ class CG_Data {
*
* The matrix defined by \a a, \a ja, \a ia must be symmetric positive
* definite.
+ *
+ * @param[in] buffer The workspace required by this algorithm. This must
+ * point to a valid memory region that can be used
+ * exclusively by ALP. The size of the memory region (in
+ * bytes) must be greater or equal to that returned by
+ * #workspaceSize( n ) with \a n equal to the value passed
+ * to this constructor.
+ *
+ * @param[in] buffer_size The size of the given \a buffer. Used for sanity
+ * checking the intended use of the buffer.
+ *
+ * @param[in] buffer_deleter How the given \a buffer should be deleted on
+ * destruction of this instance.
+ *
+ * \note If the buffer is not to be owned by this instance, then a non-owning
+ * \a buffer_deleter should be given; see #CGWorkspaceDeleter for
+ * details.
+ *
+ * \internal The sanity check is weaker if not compiled in debug mode so
+ * should be done explicitly \em before entering this constructor.
+ *
+ * \internal This constructor may throw exceptions.
*/
CG_Data(
const size_t n,
- const T * const a, const RSI * const ja, const NZI * const ia
+ const T * const a, const RSI * const ja, const NZI * const ia,
+ void * const buffer, const size_t buffer_size, const CGWorkspaceDeleter &buffer_deleter
) :
size( n ), tolerance( 1e-5 ), max_iter( 1000 ), matrix( 0, 0 ),
residual( std::numeric_limits< T >::infinity() ), iters( 0 ),
- workspace( {
- grb::Vector< T >( n ), grb::Vector< T >( n ), grb::Vector< T >( n )
- } ),
precond_workspace( grb::Vector< T >( 0 ) ),
- preconditioner( nullptr ), preconditioner_data( nullptr )
+ preconditioner( nullptr ), preconditioner_data( nullptr ),
+ _buffer( buffer ), _buffer_deleter( buffer_deleter )
{
assert( n > 0 );
assert( a != nullptr );
assert( ja != nullptr );
assert( ia != nullptr );
+ constexpr size_t L = grb::config::CACHE_LINE_SIZE::value();
+ constexpr size_t align = (L % sizeof(int) == 0)
+ ? L
+ : (L + (sizeof(int) - (L % sizeof(int))));
+ if( buffer_size < workspaceSize( n, false ) ) {
+ throw std::invalid_argument( "The given buffer size is too small" );
+ }
Matrix A = grb::internal::wrapCRSMatrix( a, ja, ia, n, n );
std::swap( A, matrix );
+ char * workspace_ptr = static_cast< char * >(buffer);
+ assert( static_cast< char * >(buffer) + buffer_size >= workspace_ptr + n );
+ {
+ T * const workspace_vector = reinterpret_cast< T * >(workspace_ptr);
+ grb::Vector< T > tmp = grb::internal::wrapRawVector( n, workspace_vector );
+ std::swap( workspace[ 0 ], tmp );
+ }
+ workspace_ptr += n * sizeof( T );
+ workspace_ptr +=
+ (align - (reinterpret_cast(workspace_ptr) % align));
+ assert( static_cast< char * >(buffer) + buffer_size >= workspace_ptr + n );
+ {
+ T * const workspace_vector = reinterpret_cast< T * >(workspace_ptr);
+ grb::Vector< T > tmp = grb::internal::wrapRawVector( n, workspace_vector );
+ std::swap( workspace[ 1 ], tmp );
+ }
+ workspace_ptr += n * sizeof( T );
+ workspace_ptr +=
+ (align - (reinterpret_cast(workspace_ptr) % align));
+ assert( static_cast< char * >(buffer) + buffer_size >= workspace_ptr + n );
+ {
+ T * const workspace_vector = reinterpret_cast< T * >(workspace_ptr);
+ grb::Vector< T > tmp = grb::internal::wrapRawVector( n, workspace_vector );
+ std::swap( workspace[ 2 ], tmp );
+ }
+ if( buffer_size >= workspaceSize( n, true ) ) {
+ workspace_ptr += n * sizeof( T );
+ workspace_ptr +=
+ (align - (reinterpret_cast(workspace_ptr) % align));
+ assert( static_cast< char * >(buffer) + buffer_size >= workspace_ptr + n );
+ T * const workspace_vector = reinterpret_cast< T * >(workspace_ptr);
+ grb::Vector< T > tmp = grb::internal::wrapRawVector( n, workspace_vector );
+ std::swap( precond_workspace, tmp );
+ }
+ }
+
+ /** Destroys this CG solve handle. */
+ ~CG_Data() {
+ // call the deleter on the buffer pointer (which may be a no-op)
+ _buffer_deleter( _buffer );
}
/** @returns The system size. */
@@ -231,8 +421,15 @@ class CG_Data {
preconditioner_data = data;
assert( !( !preconditioner && preconditioner_data ) );
if( grb::size( precond_workspace ) == 0 ) {
+ // note -- using the ALP default vector allocation here is the only
+ // convenient option: otherwise has to implement buffer management
+ // for this vector only. If it is important that this vector be allocated
+ // without a SPA, then the user should set the precond hint to true.
grb::Vector< T > replace( size );
std::swap( replace, precond_workspace );
+ std::cerr << "Warning: allocating additional workspace to handle CG solves "
+ << "with preconditioning. To prevent this on-demand allocation, set the "
+ << "precond option during CG solver handle creation to true.\n";
}
assert( grb::size( precond_workspace ) == size );
}
@@ -277,15 +474,20 @@ class CG_Data {
template< typename T, typename NZI, typename RSI >
static sparse_err_t sparse_cg_init_impl(
sparse_cg_handle_t * const handle, const size_t n,
- const T * const a, const RSI * const ja, const NZI * const ia
+ const T * const a, const RSI * const ja, const NZI * const ia,
+ void * const buffer, const size_t bufferSize, const CGWorkspaceDeleter &deleter
) {
if( n == 0 ) { return ILLEGAL_ARGUMENT; }
if( handle == nullptr || a == nullptr || ja == nullptr || ia == nullptr ) {
return NULL_ARGUMENT;
}
+ if( buffer == nullptr ) { return NULL_ARGUMENT; }
+ if( bufferSize < CG_Data< T, NZI, RSI >::workspaceSize( n, false ) ) {
+ return ILLEGAL_ARGUMENT;
+ }
try {
- *handle = static_cast< void * >(
- new CG_Data< T, NZI, RSI >( n, a, ja, ia ) );
+ *handle = static_cast< void * >( new CG_Data< T, NZI, RSI >(
+ n, a, ja, ia, buffer, bufferSize, deleter ) );
} catch( std::exception &e ) {
// the grb::Matrix constructor may only throw on out of memory errors
std::cerr << "Error: " << e.what() << "\n";
@@ -295,46 +497,215 @@ static sparse_err_t sparse_cg_init_impl(
return NO_ERROR;
}
+size_t sparse_cg_workspace_size_s( const size_t n, const bool precon ) {
+ return CG_Data< float, size_t, size_t >::workspaceSize( n, precon );
+}
+
+size_t sparse_cg_workspace_size_d( const size_t n, const bool precon ) {
+ return CG_Data< double, size_t, size_t >::workspaceSize( n, precon );
+}
+
+template< typename T, typename NZI, typename RSI >
+static sparse_err_t sparse_cg_init_impl_no_buffer(
+ sparse_cg_handle_t * const handle, const size_t n,
+ const T * const a, const RSI * const ja, const NZI * const ia,
+ const bool support_preconditioning, const bool numa
+) {
+ const size_t allocSize = CG_Data< T, NZI, RSI >::
+ workspaceSize( n, support_preconditioning );
+ void * buffer = nullptr;
+#ifdef _GRB_NO_LIBNUMA
+ if( numa ) {
+ std::cerr << "Warning: solver transition path was requested to perform "
+ << "NUMA-aware allocation, but ALP was compiled without libnuma.\n";
+ }
+#else
+ if( numa ) {
+ buffer = numa_alloc_interleaved( allocSize );
+ if( buffer == NULL ) { return OUT_OF_MEMORY; }
+ } else {
+#endif
+ try {
+ buffer = static_cast< void * >(new char[ allocSize ]);
+ } catch( ... ) {
+ std::cerr << "Error allocating workspace buffer\n";
+ return OUT_OF_MEMORY;
+ }
+#ifndef _GRB_NO_LIBNUMA
+ }
+#endif
+ const sparse_err_t rc = sparse_cg_init_impl(
+ handle, n, a, ja, ia, buffer, allocSize,
+ CGWorkspaceDeleter( true, numa ? allocSize : 0 )
+ );
+ if( rc != NO_ERROR ) {
+#ifndef _GRB_NO_LIBNUMA
+ if( numa ) {
+ numa_free( buffer, allocSize );
+ } else {
+#endif
+ delete [] static_cast< char * >(buffer);
+#ifndef _GRB_NO_LIBNUMA
+ }
+#endif
+ }
+ return rc;
+}
+
+sparse_err_t sparse_cg_manual_init_sii(
+ sparse_cg_handle_t * const handle, const size_t n,
+ const float * const a, const int * const ja, const int * const ia,
+ void * workspace, const size_t workspace_size
+) {
+ return sparse_cg_init_impl< float, int, int >( handle, n, a, ja, ia,
+ workspace, workspace_size, CGWorkspaceDeleter( false, false ) );
+}
+
+sparse_err_t sparse_cg_manual_init_dii(
+ sparse_cg_handle_t * const handle, const size_t n,
+ const double * const a, const int * const ja, const int * const ia,
+ void * workspace, const size_t workspace_size
+) {
+ return sparse_cg_init_impl< double, int, int >( handle, n, a, ja, ia,
+ workspace, workspace_size, CGWorkspaceDeleter( false, false ) );
+}
+
+sparse_err_t sparse_cg_manual_init_siz(
+ sparse_cg_handle_t * const handle, const size_t n,
+ const float * const a, const int * const ja, const size_t * const ia,
+ void * workspace, const size_t workspace_size
+) {
+ return sparse_cg_init_impl< float, size_t, int >( handle, n, a, ja, ia,
+ workspace, workspace_size, CGWorkspaceDeleter( false, false ) );
+}
+
+sparse_err_t sparse_cg_manual_init_diz(
+ sparse_cg_handle_t * const handle, const size_t n,
+ const double * const a, const int * const ja, const size_t * const ia,
+ void * workspace, const size_t workspace_size
+) {
+ return sparse_cg_init_impl< double, size_t, int >( handle, n, a, ja, ia,
+ workspace, workspace_size, CGWorkspaceDeleter( false, false ) );
+}
+
+sparse_err_t sparse_cg_manual_init_szz(
+ sparse_cg_handle_t * const handle, const size_t n,
+ const float * const a, const size_t * const ja, const size_t * const ia,
+ void * workspace, const size_t workspace_size
+) {
+ return sparse_cg_init_impl< float, size_t, size_t >( handle, n, a, ja, ia,
+ workspace, workspace_size, CGWorkspaceDeleter( false, false ) );
+}
+
+sparse_err_t sparse_cg_manual_init_dzz(
+ sparse_cg_handle_t * const handle, const size_t n,
+ const double * const a, const size_t * const ja, const size_t * const ia,
+ void * workspace, const size_t workspace_size
+) {
+ return sparse_cg_init_impl< double, size_t, size_t >( handle, n, a, ja, ia,
+ workspace, workspace_size, CGWorkspaceDeleter( false, false ) );
+}
+
+sparse_err_t sparse_cg_init_opt_sii(
+ sparse_cg_handle_t * const handle, const size_t n,
+ const float * const a, const int * const ja, const int * const ia,
+ const bool precond, const bool numa
+) {
+ return sparse_cg_init_impl_no_buffer< float, int, int >( handle, n, a, ja, ia,
+ precond, numa );
+}
+
+sparse_err_t sparse_cg_init_opt_dii(
+ sparse_cg_handle_t * const handle, const size_t n,
+ const double * const a, const int * const ja, const int * const ia,
+ const bool precond, const bool numa
+) {
+ return sparse_cg_init_impl_no_buffer< double, int, int >( handle, n, a, ja, ia,
+ precond, numa );
+}
+
+sparse_err_t sparse_cg_init_opt_siz(
+ sparse_cg_handle_t * const handle, const size_t n,
+ const float * const a, const int * const ja, const size_t * const ia,
+ const bool precond, const bool numa
+) {
+ return sparse_cg_init_impl_no_buffer< float, size_t, int >( handle, n, a, ja,
+ ia, precond, numa );
+}
+
+sparse_err_t sparse_cg_init_opt_diz(
+ sparse_cg_handle_t * const handle, const size_t n,
+ const double * const a, const int * const ja, const size_t * const ia,
+ const bool precond, const bool numa
+) {
+ return sparse_cg_init_impl_no_buffer< double, size_t, int >( handle, n, a, ja,
+ ia, precond, numa );
+}
+
+sparse_err_t sparse_cg_init_opt_szz(
+ sparse_cg_handle_t * const handle, const size_t n,
+ const float * const a, const size_t * const ja, const size_t * const ia,
+ const bool precond, const bool numa
+) {
+ return sparse_cg_init_impl_no_buffer< float, size_t, size_t >( handle, n, a, ja,
+ ia, precond, numa );
+}
+
+sparse_err_t sparse_cg_init_opt_dzz(
+ sparse_cg_handle_t * const handle, const size_t n,
+ const double * const a, const size_t * const ja, const size_t * const ia,
+ const bool precond, const bool numa
+) {
+ return sparse_cg_init_impl_no_buffer< double, size_t, size_t >( handle, n, a, ja,
+ ia, precond, numa );
+}
+
sparse_err_t sparse_cg_init_sii(
sparse_cg_handle_t * const handle, const size_t n,
const float * const a, const int * const ja, const int * const ia
) {
- return sparse_cg_init_impl< float, int, int >( handle, n, a, ja, ia );
+ return sparse_cg_init_impl_no_buffer< float, int, int >( handle, n, a, ja, ia,
+ true, true );
}
sparse_err_t sparse_cg_init_dii(
sparse_cg_handle_t * const handle, const size_t n,
const double * const a, const int * const ja, const int * const ia
) {
- return sparse_cg_init_impl< double, int, int >( handle, n, a, ja, ia );
+ return sparse_cg_init_impl_no_buffer< double, int, int >( handle, n, a, ja, ia,
+ true, true );
}
sparse_err_t sparse_cg_init_siz(
sparse_cg_handle_t * const handle, const size_t n,
const float * const a, const int * const ja, const size_t * const ia
) {
- return sparse_cg_init_impl< float, size_t, int >( handle, n, a, ja, ia );
+ return sparse_cg_init_impl_no_buffer< float, size_t, int >( handle, n, a, ja,
+ ia, true, true );
}
sparse_err_t sparse_cg_init_diz(
sparse_cg_handle_t * const handle, const size_t n,
const double * const a, const int * const ja, const size_t * const ia
) {
- return sparse_cg_init_impl< double, size_t, int >( handle, n, a, ja, ia );
+ return sparse_cg_init_impl_no_buffer< double, size_t, int >( handle, n, a, ja,
+ ia, true, true );
}
sparse_err_t sparse_cg_init_szz(
sparse_cg_handle_t * const handle, const size_t n,
const float * const a, const size_t * const ja, const size_t * const ia
) {
- return sparse_cg_init_impl< float, size_t, size_t >( handle, n, a, ja, ia );
+ return sparse_cg_init_impl_no_buffer< float, size_t, size_t >( handle, n, a, ja,
+ ia, true, true );
}
sparse_err_t sparse_cg_init_dzz(
sparse_cg_handle_t * const handle, const size_t n,
const double * const a, const size_t * const ja, const size_t * const ia
) {
- return sparse_cg_init_impl< double, size_t, size_t >( handle, n, a, ja, ia );
+ return sparse_cg_init_impl_no_buffer< double, size_t, size_t >( handle, n, a, ja,
+ ia, true, true );
}
template< typename T, typename NZI, typename RSI >
@@ -525,7 +896,8 @@ static sparse_err_t sparse_cg_get_iter_count_impl(
const sparse_cg_handle_t handle, size_t * const iters
) {
if( handle == nullptr || iters == nullptr ) { return NULL_ARGUMENT; }
- *iters = static_cast< CG_Data< T, NZI, RSI > * >( handle )->getIters();
+ *iters =
+ static_cast< CG_Data< T, NZI, RSI > * >( handle )->getIters();
return NO_ERROR;
}
@@ -646,8 +1018,10 @@ static sparse_err_t sparse_cg_set_preconditioner_impl(
try {
static_cast< CG_Data< T, NZI, RSI > * >( handle )->
setPreconditioner( c_precond_p, c_precond_data_p );
- } catch(...) {
- // spec says ALP vector allocation can only throw due to out-of-memory
+ } catch( ... ) {
+ // by virtue of the ALP interface guarantees, the only possible exception is
+ // due to out-of-memory conditions (when allocating an on-demand vector
+ // workspace for preconditioned solves).
return OUT_OF_MEMORY;
}
return NO_ERROR;
@@ -712,8 +1086,7 @@ static sparse_err_t sparse_cg_set_max_iter_count_impl(
sparse_cg_handle_t handle, const size_t max_iters
) {
if( handle == nullptr ) { return NULL_ARGUMENT; }
- static_cast< CG_Data< T, NZI, RSI > * >( handle )->
- setMaxIters( max_iters );
+ static_cast< CG_Data< T, NZI, RSI > * >( handle )->setMaxIters( max_iters );
return NO_ERROR;
}