1#ifndef IRLBA_MATRIX_SPARSE_HPP
2#define IRLBA_MATRIX_SPARSE_HPP
14#include "sanisizer/sanisizer.hpp"
16#ifndef IRLBA_CUSTOM_PARALLEL
17#include "subpar/subpar.hpp"
30template<
class ValueArray_,
class IndexArray_,
class Po
interArray_ >
31class ParallelSparseMatrixCore {
33 typedef I<decltype(std::declval<PointerArray_>()[0])> PointerType;
36 ParallelSparseMatrixCore(
45 my_primary_dim(column_major ? ncol : nrow),
46 my_secondary_dim(column_major ? nrow : ncol),
47 my_num_threads(num_threads),
48 my_values(std::move(x)),
49 my_indices(std::move(i)),
50 my_ptrs(std::move(p)),
51 my_column_major(column_major)
53 assert(num_threads > 0);
54 if (num_threads > 1) {
55 const auto total_nzeros = my_ptrs[my_primary_dim];
56 const PointerType per_thread_floor = total_nzeros / my_num_threads;
57 const int per_thread_extra = total_nzeros % my_num_threads;
61 const auto nthreads_p1 = sanisizer::sum<std::size_t>(my_num_threads, 1);
64 sanisizer::resize(my_primary_boundaries, nthreads_p1);
66 Eigen::Index primary_counter = 0;
67 PointerType sofar = 0;
68 for (
int t = 0; t < my_num_threads; ++t) {
69 sofar += per_thread_floor + (t < per_thread_extra);
70 while (primary_counter < my_primary_dim && my_ptrs[primary_counter + 1] <= sofar) {
73 my_primary_boundaries[t + 1] = primary_counter;
79 Eigen::Index my_primary_dim, my_secondary_dim;
82 ValueArray_ my_values;
83 IndexArray_ my_indices;
84 PointerArray_ my_ptrs;
87 std::vector<Eigen::Index> my_primary_boundaries;
90 Eigen::Index rows()
const {
91 if (my_column_major) {
92 return my_secondary_dim;
94 return my_primary_dim;
98 Eigen::Index cols()
const {
99 if (my_column_major) {
100 return my_primary_dim;
102 return my_secondary_dim;
106 const ValueArray_& get_values()
const {
110 const IndexArray_& get_indices()
const {
114 const PointerArray_& get_pointers()
const {
118 int get_num_threads()
const {
119 return my_num_threads;
122 bool get_column_major()
const {
123 return my_column_major;
126 const std::vector<Eigen::Index>& get_primary_boundaries()
const {
127 return my_primary_boundaries;
131 template<
typename EigenVector_>
132 void indirect_multiply(
const EigenVector_& rhs, std::vector<std::vector<typename EigenVector_::Scalar> >& thread_buffers, EigenVector_& output)
const {
133 if (my_num_threads == 1) {
135 for (Eigen::Index c = 0; c < my_primary_dim; ++c) {
136 const auto start = my_ptrs[c];
137 const auto end = my_ptrs[c + 1];
138 const auto val = rhs.coeff(c);
139 for (PointerType s = start; s < end; ++s) {
140 output.coeffRef(my_indices[s]) += my_values[s] * val;
150 typename EigenVector_::Scalar* optr;
152 auto& curbuffer = thread_buffers[t - 1];
153 sanisizer::resize(curbuffer, my_secondary_dim);
154 optr = curbuffer.data();
156 optr = output.data();
158 std::fill_n(optr, my_secondary_dim,
static_cast<typename EigenVector_::Scalar
>(0));
162 const auto curstart = my_primary_boundaries[t];
163 const auto curend = my_primary_boundaries[t + 1];
164 for (Eigen::Index p = curstart; p < curend; ++p) {
165 const auto start = my_ptrs[p];
166 const auto end = my_ptrs[p + 1];
167 const auto val = rhs.coeff(p);
168 for (PointerType s = start; s < end; ++s) {
169 optr[my_indices[s]] += my_values[s] * val;
174 assert(sanisizer::is_equal(thread_buffers.size(), my_num_threads - 1));
175 for (
const auto& buffer : thread_buffers) {
176 for (Eigen::Index x = 0; x < my_secondary_dim; ++x) {
177 output.coeffRef(x) += buffer[x];
183 template<
typename EigenVector_>
184 void direct_multiply(
const EigenVector_& rhs, EigenVector_& output)
const {
185 if (my_num_threads == 1) {
186 for (Eigen::Index c = 0; c < my_primary_dim; ++c) {
187 output.coeffRef(c) = column_dot_product<typename EigenVector_::Scalar>(c, rhs);
193 const auto curstart = my_primary_boundaries[t];
194 const auto curend = my_primary_boundaries[t + 1];
195 for (
auto c = curstart; c < curend; ++c) {
196 output.coeffRef(c) = column_dot_product<typename EigenVector_::Scalar>(c, rhs);
202 template<
typename Scalar_,
class EigenVector_>
203 Scalar_ column_dot_product(Eigen::Index p,
const EigenVector_& rhs)
const {
204 const PointerType primary_start = my_ptrs[p], primary_end = my_ptrs[p + 1];
205 if (primary_start == primary_end) {
210 Scalar_ dot1 = 0, dot2 = 0;
212 PointerType s = primary_start;
213 const PointerType primary_end_m1 = primary_end - 1;
214 for (; s < primary_end_m1; s += 2) {
215 dot1 += my_values[s] * rhs.coeff(my_indices[s]);
216 dot2 += my_values[s + 1] * rhs.coeff(my_indices[s + 1]);
219 if (s < primary_end) {
220 dot1 += my_values[s] * rhs.coeff(my_indices[s]);
227template<
class EigenVector_,
class ValueArray_,
class IndexArray_,
class Po
interArray_ >
228class ParallelSparseWorkspace final :
public Workspace<EigenVector_> {
230 ParallelSparseWorkspace(
const ParallelSparseMatrixCore<ValueArray_, IndexArray_, PointerArray_>& core) :
233 if (my_core.get_num_threads() > 1 && my_core.get_column_major()) {
234 sanisizer::resize(my_thread_buffers, my_core.get_num_threads() - 1);
239 const ParallelSparseMatrixCore<ValueArray_, IndexArray_, PointerArray_>& my_core;
240 std::vector<std::vector<typename EigenVector_::Scalar> > my_thread_buffers;
243 void multiply(
const EigenVector_& right, EigenVector_& output) {
244 if (my_core.get_column_major()) {
245 my_core.indirect_multiply(right, my_thread_buffers, output);
247 my_core.direct_multiply(right, output);
252template<
class EigenVector_,
class ValueArray_,
class IndexArray_,
class Po
interArray_ >
253class ParallelSparseAdjointWorkspace final :
public AdjointWorkspace<EigenVector_> {
255 ParallelSparseAdjointWorkspace(
const ParallelSparseMatrixCore<ValueArray_, IndexArray_, PointerArray_>& core) :
258 if (my_core.get_num_threads() > 1 && !my_core.get_column_major()) {
259 sanisizer::resize(my_thread_buffers, my_core.get_num_threads() - 1);
264 const ParallelSparseMatrixCore<ValueArray_, IndexArray_, PointerArray_>& my_core;
265 std::vector<std::vector<typename EigenVector_::Scalar> > my_thread_buffers;
268 void multiply(
const EigenVector_& right, EigenVector_& output) {
269 if (my_core.get_column_major()) {
270 my_core.direct_multiply(right, output);
272 my_core.indirect_multiply(right, my_thread_buffers, output);
277template<
class EigenMatrix_,
class ValueArray_,
class IndexArray_,
class Po
interArray_ >
278class ParallelSparseRealizeWorkspace final :
public RealizeWorkspace<EigenMatrix_> {
280 ParallelSparseRealizeWorkspace(
const ParallelSparseMatrixCore<ValueArray_, IndexArray_, PointerArray_>& core) :
285 const ParallelSparseMatrixCore<ValueArray_, IndexArray_, PointerArray_>& my_core;
288 const EigenMatrix_& realize(EigenMatrix_& buffer) {
289 const auto nr = my_core.rows(), nc = my_core.cols();
290 buffer.resize(nr, nc);
293 const auto& ptrs = my_core.get_pointers();
294 const auto& indices = my_core.get_indices();
295 const auto& values = my_core.get_values();
297 typedef I<decltype(std::declval<PointerArray_>()[0])> PointerType;
298 if (my_core.get_column_major()) {
299 for (Eigen::Index c = 0; c < nc; ++c) {
300 PointerType col_start = ptrs[c], col_end = ptrs[c + 1];
301 for (PointerType s = col_start; s < col_end; ++s) {
302 buffer.coeffRef(indices[s], c) = values[s];
306 for (Eigen::Index r = 0; r < nr; ++r) {
307 PointerType row_start = ptrs[r], row_end = ptrs[r + 1];
308 for (PointerType s = row_start; s < row_end; ++s) {
309 buffer.coeffRef(r, indices[s]) = values[s];
384 ParallelSparseMatrix(Eigen::Index nrow, Eigen::Index ncol, ValueArray_ x, IndexArray_ i, PointerArray_ p,
bool column_major,
int num_threads) :
385 my_core(nrow, ncol, std::move(x), std::move(i), std::move(p), column_major, num_threads)
389 ParallelSparseMatrixCore<ValueArray_, IndexArray_, PointerArray_> my_core;
396 return my_core.rows();
403 return my_core.cols();
411 return my_core.get_values();
419 return my_core.get_indices();
426 return my_core.get_pointers();
432 typedef I<decltype(std::declval<PointerArray_>()[0])>
PointerType;
442 return my_core.get_primary_boundaries();
463 return std::make_unique<ParallelSparseWorkspace<EigenVector_, ValueArray_, IndexArray_, PointerArray_> >(my_core);
470 return std::make_unique<ParallelSparseAdjointWorkspace<EigenVector_, ValueArray_, IndexArray_, PointerArray_> >(my_core);
477 return std::make_unique<ParallelSparseRealizeWorkspace<EigenMatrix_, ValueArray_, IndexArray_, PointerArray_> >(my_core);
Interface for a matrix to use in compute().
Definition interface.hpp:142
Sparse matrix with customizable parallelization.
Definition sparse.hpp:359
ParallelSparseMatrix(Eigen::Index nrow, Eigen::Index ncol, ValueArray_ x, IndexArray_ i, PointerArray_ p, bool column_major, int num_threads)
Definition sparse.hpp:384
const std::vector< Eigen::Index > & get_primary_boundaries() const
Definition sparse.hpp:441
Eigen::Index cols() const
Definition sparse.hpp:402
const ValueArray_ & get_values() const
Definition sparse.hpp:410
std::unique_ptr< AdjointWorkspace< EigenVector_ > > new_adjoint_workspace() const
Definition sparse.hpp:450
Eigen::Index rows() const
Definition sparse.hpp:395
I< decltype(std::declval< PointerArray_ >()[0])> PointerType
Definition sparse.hpp:432
ParallelSparseMatrix()
Definition sparse.hpp:365
std::unique_ptr< RealizeWorkspace< EigenMatrix_ > > new_realize_workspace() const
Definition sparse.hpp:454
auto new_known_adjoint_workspace() const
Definition sparse.hpp:469
const PointerArray_ & get_pointers() const
Definition sparse.hpp:425
const IndexArray_ & get_indices() const
Definition sparse.hpp:418
std::unique_ptr< Workspace< EigenVector_ > > new_workspace() const
Definition sparse.hpp:446
auto new_known_realize_workspace() const
Definition sparse.hpp:476
auto new_known_workspace() const
Definition sparse.hpp:462
Interfaces for matrix inputs.
Implements IRLBA for approximate SVD.
Definition compute.hpp:23
void parallelize(Task_ num_tasks, Run_ run_task)
Definition parallel.hpp:33
Classes for parallelized multiplication.