scran_pca
Principal component analysis for single-cell data
Loading...
Searching...
No Matches
blocked_pca.hpp
Go to the documentation of this file.
1#ifndef SCRAN_PCA_BLOCKED_PCA_HPP
2#define SCRAN_PCA_BLOCKED_PCA_HPP
3
4#include "tatami/tatami.hpp"
5#include "irlba/irlba.hpp"
6#include "irlba/parallel.hpp"
7#include "Eigen/Dense"
8
9#include <vector>
10#include <cmath>
11#include <algorithm>
12#include <type_traits>
13
15#include "utils.hpp"
16
23namespace scran_pca {
24
93
97namespace internal {
98
99/*****************************************************
100 ************* Blocking data structures **************
101 *****************************************************/
102
103template<typename Index_, class EigenVector_>
104struct BlockingDetails {
105 std::vector<Index_> block_size;
106
107 bool weighted = false;
108 typedef typename EigenVector_::Scalar Weight;
109
110 // The below should only be used if weighted = true.
111 std::vector<Weight> per_element_weight;
112 Weight total_block_weight = 0;
113 EigenVector_ expanded_weights;
114};
115
116template<class EigenVector_, typename Index_, typename Block_>
117BlockingDetails<Index_, EigenVector_> compute_blocking_details(
118 Index_ ncells,
119 const Block_* block,
120 scran_blocks::WeightPolicy block_weight_policy,
121 const scran_blocks::VariableWeightParameters& variable_block_weight_parameters)
122{
123 BlockingDetails<Index_, EigenVector_> output;
124 output.block_size = tatami_stats::tabulate_groups(block, ncells);
125 if (block_weight_policy == scran_blocks::WeightPolicy::NONE) {
126 return output;
127 }
128
129 const auto& block_size = output.block_size;
130 size_t nblocks = block_size.size();
131 output.weighted = true;
132 auto& total_weight = output.total_block_weight;
133 auto& element_weight = output.per_element_weight;
134 element_weight.resize(nblocks);
135
136 for (size_t i = 0; i < nblocks; ++i) {
137 auto bsize = block_size[i];
138
139 // Computing effective block weights that also incorporate division by the
140 // block size. This avoids having to do the division by block size in the
141 // 'compute_blockwise_mean_and_variance*()' functions.
142 if (bsize) {
143 typename EigenVector_::Scalar block_weight = 1;
144 if (block_weight_policy == scran_blocks::WeightPolicy::VARIABLE) {
145 block_weight = scran_blocks::compute_variable_weight(bsize, variable_block_weight_parameters);
146 }
147
148 element_weight[i] = block_weight / bsize;
149 total_weight += block_weight;
150 } else {
151 element_weight[i] = 0;
152 }
153 }
154
155 // Setting a placeholder value to avoid problems with division by zero.
156 if (total_weight == 0) {
157 total_weight = 1;
158 }
159
160 // Expanding them for multiplication in the IRLBA wrappers.
161 auto sqrt_weights = element_weight;
162 for (auto& s : sqrt_weights) {
163 s = std::sqrt(s);
164 }
165
166 auto& expanded = output.expanded_weights;
167 expanded.resize(ncells);
168 for (Index_ i = 0; i < ncells; ++i) {
169 expanded.coeffRef(i) = sqrt_weights[block[i]];
170 }
171
172 return output;
173}
174
175/*****************************************************************
176 ************ Computing the blockwise mean and variance **********
177 *****************************************************************/
178
179template<typename Num_, typename Value_, typename Index_, typename Block_, typename EigenVector_, typename Float_>
180void compute_sparse_mean_and_variance_blocked(
181 Num_ num_nonzero,
182 const Value_* values,
183 const Index_* indices,
184 const Block_* block,
185 const BlockingDetails<Index_, EigenVector_>& block_details,
186 Float_* centers,
187 Float_& variance,
188 std::vector<Index_>& block_copy,
189 Num_ num_all)
190{
191 const auto& block_size = block_details.block_size;
192 size_t nblocks = block_size.size();
193
194 std::fill_n(centers, nblocks, 0);
195 for (Num_ i = 0; i < num_nonzero; ++i) {
196 centers[block[indices[i]]] += values[i];
197 }
198 for (size_t b = 0; b < nblocks; ++b) {
199 auto bsize = block_size[b];
200 if (bsize) {
201 centers[b] /= bsize;
202 }
203 }
204
205 // Computing the variance from the sum of squared differences.
206 // This is technically not the correct variance estimate if we
207 // were to consider the loss of residual d.f. from estimating
208 // the block means, but it's what the PCA sees, so whatever.
209 variance = 0;
210 std::copy(block_size.begin(), block_size.end(), block_copy.begin());
211
212 if (block_details.weighted) {
213 for (Num_ i = 0; i < num_nonzero; ++i) {
214 Block_ curb = block[indices[i]];
215 auto diff = values[i] - centers[curb];
216 variance += diff * diff * block_details.per_element_weight[curb];
217 --block_copy[curb];
218 }
219 for (size_t b = 0; b < nblocks; ++b) {
220 auto val = centers[b];
221 variance += val * val * block_copy[b] * block_details.per_element_weight[b];
222 }
223 } else {
224 for (Num_ i = 0; i < num_nonzero; ++i) {
225 Block_ curb = block[indices[i]];
226 auto diff = values[i] - centers[curb];
227 variance += diff * diff;
228 --block_copy[curb];
229 }
230 for (size_t b = 0; b < nblocks; ++b) {
231 auto val = centers[b];
232 variance += val * val * block_copy[b];
233 }
234 }
235
236 // COMMENT ON DENOMINATOR:
237 // If we're not dealing with weights, we compute the actual sample
238 // variance for easy interpretation (and to match up with the
239 // per-PC calculations in internal::clean_up).
240 //
241 // If we're dealing with weights, the concept of the sample variance
242 // becomes somewhat weird, but we just use the same denominator for
243 // consistency in clean_up_projected. Magnitude doesn't matter when
244 // scaling for internal::process_scale_vector anyway.
245 variance /= num_all - 1;
246}
247
248template<class IrlbaSparseMatrix_, typename Block_, class Index_, class EigenVector_, class EigenMatrix_>
249void compute_blockwise_mean_and_variance_realized_sparse(
250 const IrlbaSparseMatrix_& emat, // this should be column-major with genes in the columns.
251 const Block_* block,
252 const BlockingDetails<Index_, EigenVector_>& block_details,
253 EigenMatrix_& centers,
254 EigenVector_& variances,
255 int nthreads)
256{
257 tatami::parallelize([&](size_t, Eigen::Index start, Eigen::Index length) -> void {
258 size_t ncells = emat.rows();
259 const auto& values = emat.get_values();
260 const auto& indices = emat.get_indices();
261 const auto& ptrs = emat.get_pointers();
262
263 size_t nblocks = block_details.block_size.size();
264 static_assert(!EigenMatrix_::IsRowMajor);
265 auto mptr = centers.data() + static_cast<size_t>(start) * nblocks; // cast to avoid overflow.
266 std::vector<Index_> block_copy(nblocks);
267
268 for (size_t c = start, end = start + length; c < end; ++c, mptr += nblocks) {
269 auto offset = ptrs[c];
270 compute_sparse_mean_and_variance_blocked(
271 ptrs[c + 1] - offset,
272 values.data() + offset,
273 indices.data() + offset,
274 block,
275 block_details,
276 mptr,
277 variances[c],
278 block_copy,
279 ncells
280 );
281 }
282 }, emat.cols(), nthreads);
283}
284
285template<typename Num_, typename Value_, typename Block_, typename Index_, typename EigenVector_, typename Float_>
286void compute_dense_mean_and_variance_blocked(
287 Num_ number,
288 const Value_* values,
289 const Block_* block,
290 const BlockingDetails<Index_, EigenVector_>& block_details,
291 Float_* centers,
292 Float_& variance)
293{
294 const auto& block_size = block_details.block_size;
295 size_t nblocks = block_size.size();
296 std::fill_n(centers, nblocks, 0);
297 for (Num_ r = 0; r < number; ++r) {
298 centers[block[r]] += values[r];
299 }
300 for (size_t b = 0; b < nblocks; ++b) {
301 const auto& bsize = block_size[b];
302 if (bsize) {
303 centers[b] /= bsize;
304 }
305 }
306
307 variance = 0;
308
309 if (block_details.weighted) {
310 for (Num_ r = 0; r < number; ++r) {
311 auto curb = block[r];
312 auto delta = values[r] - centers[curb];
313 variance += delta * delta * block_details.per_element_weight[curb];
314 }
315 } else {
316 for (Num_ r = 0; r < number; ++r) {
317 auto curb = block[r];
318 auto delta = values[r] - centers[curb];
319 variance += delta * delta;
320 }
321 }
322
323 variance /= number - 1; // See COMMENT ON DENOMINATOR above.
324}
325
326template<class EigenMatrix_, typename Block_, class Index_, class EigenVector_>
327void compute_blockwise_mean_and_variance_realized_dense(
328 const EigenMatrix_& emat, // this should be column-major with genes in the columns.
329 const Block_* block,
330 const BlockingDetails<Index_, EigenVector_>& block_details,
331 EigenMatrix_& centers,
332 EigenVector_& variances,
333 int nthreads)
334{
335 tatami::parallelize([&](size_t, size_t start, size_t length) -> void {
336 size_t ncells = emat.rows();
337 static_assert(!EigenMatrix_::IsRowMajor);
338 auto ptr = emat.data() + static_cast<size_t>(start) * ncells;
339
340 const auto& block_size = block_details.block_size;
341 size_t nblocks = block_size.size();
342 auto mptr = centers.data() + static_cast<size_t>(start) * nblocks; // cast to avoid overflow.
343
344 for (size_t c = start, end = start + length; c < end; ++c, ptr += ncells, mptr += nblocks) {
345 compute_dense_mean_and_variance_blocked(ncells, ptr, block, block_details, mptr, variances[c]);
346 }
347 }, emat.cols(), nthreads);
348}
349
350template<typename Value_, typename Index_, typename Block_, class EigenMatrix_, class EigenVector_>
351void compute_blockwise_mean_and_variance_tatami(
352 const tatami::Matrix<Value_, Index_>& mat, // this should have genes in the rows!
353 const Block_* block,
354 const BlockingDetails<Index_, EigenVector_>& block_details,
355 EigenMatrix_& centers,
356 EigenVector_& variances,
357 int nthreads)
358{
359 const auto& block_size = block_details.block_size;
360 size_t nblocks = block_size.size();
361 Index_ ngenes = mat.nrow();
362 Index_ ncells = mat.ncol();
363
364 if (mat.prefer_rows()) {
365 tatami::parallelize([&](size_t, Index_ start, Index_ length) -> void {
366 static_assert(!EigenMatrix_::IsRowMajor);
367 auto mptr = centers.data() + static_cast<size_t>(start) * nblocks; // cast to avoid overflow.
368 std::vector<Index_> block_copy(nblocks);
369
370 std::vector<Value_> vbuffer(ncells);
371
372 if (mat.is_sparse()) {
373 std::vector<Index_> ibuffer(ncells);
374 auto ext = tatami::consecutive_extractor<true>(&mat, true, start, length);
375 for (Index_ r = start, end = start + length; r < end; ++r, mptr += nblocks) {
376 auto range = ext->fetch(vbuffer.data(), ibuffer.data());
377 compute_sparse_mean_and_variance_blocked(range.number, range.value, range.index, block, block_details, mptr, variances[r], block_copy, ncells);
378 }
379 } else {
380 auto ext = tatami::consecutive_extractor<false>(&mat, true, start, length);
381 for (Index_ r = start, end = start + length; r < end; ++r, mptr += nblocks) {
382 auto ptr = ext->fetch(vbuffer.data());
383 compute_dense_mean_and_variance_blocked(ncells, ptr, block, block_details, mptr, variances[r]);
384 }
385 }
386 }, ngenes, nthreads);
387
388 } else {
389 typedef typename EigenVector_::Scalar Scalar;
390
391 std::vector<std::pair<size_t, Scalar> > block_multipliers;
392 block_multipliers.reserve(nblocks);
393 for (size_t b = 0; b < nblocks; ++b) {
394 auto bsize = block_size[b];
395 if (bsize > 1) { // skipping blocks with NaN variances.
396 Scalar mult = bsize - 1; // need to convert variances back into sum of squared differences.
397 if (block_details.weighted) {
398 mult *= block_details.per_element_weight[b];
399 }
400 block_multipliers.emplace_back(b, mult);
401 }
402 }
403
404 tatami::parallelize([&](size_t, Index_ start, Index_ length) -> void {
405 std::vector<std::vector<Scalar> > re_centers, re_variances;
406 re_centers.reserve(nblocks);
407 re_variances.reserve(nblocks);
408 for (size_t b = 0; b < nblocks; ++b) {
409 re_centers.emplace_back(length);
410 re_variances.emplace_back(length);
411 }
412
413 std::vector<Value_> vbuffer(length);
414
415 if (mat.is_sparse()) {
416 std::vector<tatami_stats::variances::RunningSparse<Scalar, Value_, Index_> > running;
417 running.reserve(nblocks);
418 for (size_t b = 0; b < nblocks; ++b) {
419 running.emplace_back(length, re_centers[b].data(), re_variances[b].data(), /* skip_nan = */ false, /* subtract = */ start);
420 }
421
422 std::vector<Index_> ibuffer(length);
423 auto ext = tatami::consecutive_extractor<true>(&mat, false, static_cast<Index_>(0), ncells, start, length);
424 for (Index_ c = 0; c < ncells; ++c) {
425 auto range = ext->fetch(vbuffer.data(), ibuffer.data());
426 running[block[c]].add(range.value, range.index, range.number);
427 }
428
429 for (size_t b = 0; b < nblocks; ++b) {
430 running[b].finish();
431 }
432
433 } else {
434 std::vector<tatami_stats::variances::RunningDense<Scalar, Value_, Index_> > running;
435 running.reserve(nblocks);
436 for (size_t b = 0; b < nblocks; ++b) {
437 running.emplace_back(length, re_centers[b].data(), re_variances[b].data(), /* skip_nan = */ false);
438 }
439
440 auto ext = tatami::consecutive_extractor<false>(&mat, false, static_cast<Index_>(0), ncells, start, length);
441 for (Index_ c = 0; c < ncells; ++c) {
442 auto ptr = ext->fetch(vbuffer.data());
443 running[block[c]].add(ptr);
444 }
445
446 for (size_t b = 0; b < nblocks; ++b) {
447 running[b].finish();
448 }
449 }
450
451 static_assert(!EigenMatrix_::IsRowMajor);
452 auto mptr = centers.data() + static_cast<size_t>(start) * nblocks; // cast to avoid overflow.
453 for (Index_ r = 0; r < length; ++r, mptr += nblocks) {
454 for (size_t b = 0; b < nblocks; ++b) {
455 mptr[b] = re_centers[b][r];
456 }
457
458 auto& my_var = variances[start + r];
459 my_var = 0;
460 for (const auto& bm : block_multipliers) {
461 my_var += re_variances[bm.first][r] * bm.second;
462 }
463 my_var /= ncells - 1; // See COMMENT ON DENOMINATOR above.
464 }
465 }, ngenes, nthreads);
466 }
467}
468
469/******************************************************************
470 ************ Project matrices on their rotation vectors **********
471 ******************************************************************/
472
473template<class EigenMatrix_, class EigenVector_>
474const EigenMatrix_& scale_rotation_matrix(const EigenMatrix_& rotation, bool scale, const EigenVector_& scale_v, EigenMatrix_& tmp) {
475 if (scale) {
476 tmp = (rotation.array().colwise() / scale_v.array()).matrix();
477 return tmp;
478 } else {
479 return rotation;
480 }
481}
482
483template<class IrlbaSparseMatrix_, class EigenMatrix_>
484inline void project_matrix_realized_sparse(
485 const IrlbaSparseMatrix_& emat, // cell in rows, genes in the columns, CSC.
486 EigenMatrix_& components, // dims in rows, cells in columns
487 const EigenMatrix_& scaled_rotation, // genes in rows, dims in columns
488 int nthreads)
489{
490 size_t rank = scaled_rotation.cols();
491 size_t ncells = emat.rows();
492 size_t ngenes = emat.cols();
493
494 components.resize(rank, ncells); // used a transposed version for more cache efficiency.
495 components.setZero();
496
497 const auto& x = emat.get_values();
498 const auto& i = emat.get_indices();
499 const auto& p = emat.get_pointers();
500
501 if (nthreads == 1) {
502 Eigen::VectorXd multipliers(rank);
503 for (size_t g = 0; g < ngenes; ++g) {
504 multipliers.noalias() = scaled_rotation.row(g);
505 auto start = p[g], end = p[g + 1];
506 for (size_t s = start; s < end; ++s) {
507 components.col(i[s]).noalias() += x[s] * multipliers;
508 }
509 }
510
511 } else {
512 const auto& row_nonzero_starts = emat.get_secondary_nonzero_starts();
513
514 irlba::parallelize(nthreads, [&](size_t t) -> void {
515 const auto& starts = row_nonzero_starts[t];
516 const auto& ends = row_nonzero_starts[t + 1];
517 Eigen::VectorXd multipliers(rank);
518
519 for (size_t g = 0; g < ngenes; ++g) {
520 multipliers.noalias() = scaled_rotation.row(g);
521 auto start = starts[g], end = ends[g];
522 for (size_t s = start; s < end; ++s) {
523 components.col(i[s]).noalias() += x[s] * multipliers;
524 }
525 }
526 });
527 }
528}
529
530template<typename Value_, typename Index_, class EigenMatrix_>
531void project_matrix_transposed_tatami(
532 const tatami::Matrix<Value_, Index_>& mat, // genes in rows, cells in columns
533 EigenMatrix_& components,
534 const EigenMatrix_& scaled_rotation, // genes in rows, dims in columns
535 int nthreads)
536{
537 size_t rank = scaled_rotation.cols();
538 auto ngenes = mat.nrow(), ncells = mat.ncol();
539 typedef typename EigenMatrix_::Scalar Scalar;
540
541 components.resize(rank, ncells); // used a transposed version for more cache efficiency.
542
543 if (mat.prefer_rows()) {
544 tatami::parallelize([&](size_t, Index_ start, Index_ length) -> void {
545 static_assert(!EigenMatrix_::IsRowMajor);
546 auto vptr = scaled_rotation.data();
547 std::vector<Value_> vbuffer(length);
548
549 std::vector<std::vector<Scalar> > local_buffers; // create separate buffers to avoid false sharing.
550 local_buffers.reserve(rank);
551 for (size_t r = 0; r < rank; ++r) {
552 local_buffers.emplace_back(length);
553 }
554
555 if (mat.is_sparse()) {
556 std::vector<Index_> ibuffer(length);
557 auto ext = tatami::consecutive_extractor<true>(&mat, true, static_cast<Index_>(0), ngenes, start, length);
558 for (Index_ g = 0; g < ngenes; ++g) {
559 auto range = ext->fetch(vbuffer.data(), ibuffer.data());
560 for (size_t r = 0; r < rank; ++r) {
561 auto mult = vptr[r * static_cast<size_t>(ngenes) + static_cast<size_t>(g)]; // cast to avoid overflow.
562 auto& local_buffer = local_buffers[r];
563 for (Index_ i = 0; i < range.number; ++i) {
564 local_buffer[range.index[i] - start] += range.value[i] * mult;
565 }
566 }
567 }
568
569 } else {
570 auto ext = tatami::consecutive_extractor<false>(&mat, true, static_cast<Index_>(0), ngenes, start, length);
571 for (Index_ g = 0; g < ngenes; ++g) {
572 auto ptr = ext->fetch(vbuffer.data());
573 for (size_t r = 0; r < rank; ++r) {
574 auto mult = vptr[r * static_cast<size_t>(ngenes) + static_cast<size_t>(g)]; // cast to avoid overflow.
575 auto& local_buffer = local_buffers[r];
576 for (Index_ c = 0; c < length; ++c) {
577 local_buffer[c] += ptr[c] * mult;
578 }
579 }
580 }
581 }
582
583 for (size_t r = 0; r < rank; ++r) {
584 for (Index_ c = 0; c < length; ++c) {
585 components.coeffRef(r, c + start) = local_buffers[r][c];
586 }
587 }
588
589 }, ncells, nthreads);
590
591 } else {
592 tatami::parallelize([&](size_t, Index_ start, Index_ length) -> void {
593 std::vector<Value_> vbuffer(ngenes);
594 static_assert(!EigenMatrix_::IsRowMajor);
595 auto optr = components.data() + static_cast<size_t>(start) * rank; // cast to avoid overflow.
596
597 if (mat.is_sparse()) {
598 std::vector<Index_> ibuffer(ngenes);
599 auto ext = tatami::consecutive_extractor<true>(&mat, false, start, length);
600
601 for (Index_ c = start, end = start + length; c < end; ++c, optr += rank) {
602 auto range = ext->fetch(vbuffer.data(), ibuffer.data());
603 static_assert(!EigenMatrix_::IsRowMajor);
604 auto vptr = scaled_rotation.data();
605 for (size_t v = 0; v < rank; ++v, vptr += ngenes) {
606 auto& output = optr[v];
607 output = 0;
608 for (Index_ i = 0; i < range.number; ++i) {
609 output += vptr[range.index[i]] * range.value[i];
610 }
611 }
612 }
613
614 } else {
615 auto ext = tatami::consecutive_extractor<false>(&mat, false, start, length);
616 for (Index_ c = start, end = start + length; c < end; ++c, optr += rank) {
617 auto ptr = ext->fetch(vbuffer.data());
618 static_assert(!EigenMatrix_::IsRowMajor);
619 auto vptr = scaled_rotation.data();
620 for (size_t v = 0; v < rank; ++v, vptr += ngenes) {
621 optr[v] = std::inner_product(vptr, vptr + ngenes, ptr, static_cast<Scalar>(0));
622 }
623 }
624 }
625 }, ncells, nthreads);
626 }
627}
628
629template<class EigenMatrix_, class EigenVector_>
630void clean_up_projected(EigenMatrix_& projected, EigenVector_& D) {
631 // Empirically centering to give nice centered PCs, because we can't
632 // guarantee that the projection is centered in this manner.
633 for (size_t i = 0, iend = projected.rows(); i < iend; ++i) {
634 projected.row(i).array() -= projected.row(i).sum() / projected.cols();
635 }
636
637 // Just dividing by the number of observations - 1 regardless of weighting.
638 typename EigenMatrix_::Scalar denom = projected.cols() - 1;
639 for (auto& d : D) {
640 d = d * d / denom;
641 }
642}
643
644/*******************************
645 ***** Residual wrapper ********
646 *******************************/
647
648// This wrapper class mimics multiplication with the residuals,
649// i.e., after subtracting the per-block mean from each cell.
650template<class Matrix_, typename Block_, class EigenMatrix_, class EigenVector_>
651class ResidualWrapper {
652public:
653 ResidualWrapper(const Matrix_& mat, const Block_* block, const EigenMatrix_& means) : my_mat(mat), my_block(block), my_means(means) {}
654
655public:
656 Eigen::Index rows() const { return my_mat.rows(); }
657 Eigen::Index cols() const { return my_mat.cols(); }
658
659public:
660 struct Workspace {
661 Workspace(size_t nblocks, irlba::WrappedWorkspace<Matrix_> c) : sub(nblocks), child(std::move(c)) {}
662 EigenVector_ sub;
663 EigenVector_ holding;
665 };
666
667 Workspace workspace() const {
668 return Workspace(my_means.rows(), irlba::wrapped_workspace(my_mat));
669 }
670
671 template<class Right_>
672 void multiply(const Right_& rhs, Workspace& work, EigenVector_& output) const {
673 const auto& realized_rhs = [&]() {
674 if constexpr(std::is_same<Right_, EigenVector_>::value) {
675 return rhs;
676 } else {
677 work.holding.noalias() = rhs;
678 return work.holding;
679 }
680 }();
681
682 irlba::wrapped_multiply(my_mat, realized_rhs, work.child, output);
683
684 work.sub.noalias() = my_means * realized_rhs;
685 for (Eigen::Index i = 0, end = output.size(); i < end; ++i) {
686 auto& val = output.coeffRef(i);
687 val -= work.sub.coeff(my_block[i]);
688 }
689 }
690
691public:
692 struct AdjointWorkspace {
693 AdjointWorkspace(size_t nblocks, irlba::WrappedAdjointWorkspace<Matrix_> c) : aggr(nblocks), child(std::move(c)) {}
694 EigenVector_ aggr;
695 EigenVector_ holding;
697 };
698
699 AdjointWorkspace adjoint_workspace() const {
700 return AdjointWorkspace(my_means.rows(), irlba::wrapped_adjoint_workspace(my_mat));
701 }
702
703 template<class Right_>
704 void adjoint_multiply(const Right_& rhs, AdjointWorkspace& work, EigenVector_& output) const {
705 const auto& realized_rhs = [&]() {
706 if constexpr(std::is_same<Right_, EigenVector_>::value) {
707 return rhs;
708 } else {
709 work.holding.noalias() = rhs;
710 return work.holding;
711 }
712 }();
713
714 irlba::wrapped_adjoint_multiply(my_mat, realized_rhs, work.child, output);
715
716 work.aggr.setZero();
717 for (Eigen::Index i = 0, end = realized_rhs.size(); i < end; ++i) {
718 work.aggr.coeffRef(my_block[i]) += realized_rhs.coeff(i);
719 }
720
721 output.noalias() -= my_means.adjoint() * work.aggr;
722 }
723
724public:
725 template<class EigenMatrix2_>
726 EigenMatrix2_ realize() const {
727 EigenMatrix2_ output = irlba::wrapped_realize<EigenMatrix2_>(my_mat);
728 for (Eigen::Index r = 0, rend = output.rows(); r < rend; ++r) {
729 output.row(r) -= my_means.row(my_block[r]);
730 }
731 return output;
732 }
733
734private:
735 const Matrix_& my_mat;
736 const Block_* my_block;
737 const EigenMatrix_& my_means;
738};
739
740/**************************
741 ***** Dispatchers ********
742 **************************/
743
744template<bool realize_matrix_, bool sparse_, typename Value_, typename Index_, typename Block_, class EigenMatrix_, class EigenVector_>
745void run_blocked(
747 const Block_* block,
748 const BlockingDetails<Index_, EigenVector_>& block_details,
749 const BlockedPcaOptions& options,
750 EigenMatrix_& components,
751 EigenMatrix_& rotation,
752 EigenVector_& variance_explained,
753 EigenMatrix_& center_m,
754 EigenVector_& scale_v,
755 typename EigenVector_::Scalar& total_var,
756 bool& converged)
757{
758 Index_ ngenes = mat.nrow(), ncells = mat.ncol();
759
760 auto emat = [&]{
761 if constexpr(!realize_matrix_) {
762 return internal::TransposedTatamiWrapper<EigenVector_, Value_, Index_>(mat, options.num_threads);
763
764 } else if constexpr(sparse_) {
765 // 'extracted' contains row-major contents... but we implicitly transpose it to CSC with genes in columns.
766 auto extracted = tatami::retrieve_compressed_sparse_contents<Value_, Index_>(&mat, /* row = */ true, /* two_pass = */ false, /* threads = */ options.num_threads);
767 return irlba::ParallelSparseMatrix(ncells, ngenes, std::move(extracted.value), std::move(extracted.index), std::move(extracted.pointers), true, options.num_threads);
768
769 } else {
770 // Perform an implicit transposition by performing a row-major extraction into a column-major transposed matrix.
771 EigenMatrix_ emat(ncells, ngenes);
772 static_assert(!EigenMatrix_::IsRowMajor);
773 tatami::convert_to_dense(&mat, /* row_major = */ true, emat.data(), options.num_threads);
774 return emat;
775 }
776 }();
777
778 auto nblocks = block_details.block_size.size();
779 center_m.resize(nblocks, ngenes);
780 scale_v.resize(ngenes);
781 if constexpr(!realize_matrix_) {
782 compute_blockwise_mean_and_variance_tatami(mat, block, block_details, center_m, scale_v, options.num_threads);
783 } else if constexpr(sparse_) {
784 compute_blockwise_mean_and_variance_realized_sparse(emat, block, block_details, center_m, scale_v, options.num_threads);
785 } else {
786 compute_blockwise_mean_and_variance_realized_dense(emat, block, block_details, center_m, scale_v, options.num_threads);
787 }
788 total_var = internal::process_scale_vector(options.scale, scale_v);
789
790 ResidualWrapper<decltype(emat), Block_, EigenMatrix_, EigenVector_> centered(emat, block, center_m);
791
792 if (block_details.weighted) {
793 if (options.scale) {
794 irlba::Scaled<true, decltype(centered), EigenVector_> scaled(centered, scale_v, /* divide = */ true);
795 irlba::Scaled<false, decltype(scaled), EigenVector_> weighted(scaled, block_details.expanded_weights, /* divide = */ false);
796 auto out = irlba::compute(weighted, options.number, components, rotation, variance_explained, options.irlba_options);
797 converged = out.first;
798 } else {
799 irlba::Scaled<false, decltype(centered), EigenVector_> weighted(centered, block_details.expanded_weights, /* divide = */ false);
800 auto out = irlba::compute(weighted, options.number, components, rotation, variance_explained, options.irlba_options);
801 converged = out.first;
802 }
803
804 EigenMatrix_ tmp;
805 const auto& scaled_rotation = scale_rotation_matrix(rotation, options.scale, scale_v, tmp);
806
807 // This transposes 'components' to be a NDIM * NCELLS matrix.
808 if constexpr(!realize_matrix_) {
809 project_matrix_transposed_tatami(mat, components, scaled_rotation, options.num_threads);
810 } else if constexpr(sparse_) {
811 project_matrix_realized_sparse(emat, components, scaled_rotation, options.num_threads);
812 } else {
813 components.noalias() = (emat * scaled_rotation).adjoint();
814 }
815
816 // Subtracting each block's mean from the PCs.
817 if (options.components_from_residuals) {
818 EigenMatrix_ centering = (center_m * scaled_rotation).adjoint();
819 for (size_t i = 0, iend = components.cols(); i < iend; ++i) {
820 components.col(i) -= centering.col(block[i]);
821 }
822 }
823
824 clean_up_projected(components, variance_explained);
825 if (!options.transpose) {
826 components.adjointInPlace();
827 }
828
829 } else {
830 if (options.scale) {
831 irlba::Scaled<true, decltype(centered), EigenVector_> scaled(centered, scale_v, /* divide = */ true);
832 auto out = irlba::compute(scaled, options.number, components, rotation, variance_explained, options.irlba_options);
833 converged = out.first;
834 } else {
835 auto out = irlba::compute(centered, options.number, components, rotation, variance_explained, options.irlba_options);
836 converged = out.first;
837 }
838
839 if (options.components_from_residuals) {
840 internal::clean_up(mat.ncol(), components, variance_explained);
841 if (options.transpose) {
842 components.adjointInPlace();
843 }
844 } else {
845 EigenMatrix_ tmp;
846 const auto& scaled_rotation = scale_rotation_matrix(rotation, options.scale, scale_v, tmp);
847
848 // This transposes 'components' to be a NDIM * NCELLS matrix.
849 if constexpr(!realize_matrix_) {
850 project_matrix_transposed_tatami(mat, components, scaled_rotation, options.num_threads);
851 } else if constexpr(sparse_) {
852 project_matrix_realized_sparse(emat, components, scaled_rotation, options.num_threads);
853 } else {
854 components.noalias() = (emat * scaled_rotation).adjoint();
855 }
856
857 clean_up_projected(components, variance_explained);
858 if (!options.transpose) {
859 components.adjointInPlace();
860 }
861 }
862 }
863}
864
865}
876template<typename EigenMatrix_, typename EigenVector_>
884 EigenMatrix_ components;
885
891 EigenVector_ variance_explained;
892
897 typename EigenVector_::Scalar total_variance = 0;
898
904 EigenMatrix_ rotation;
905
911 EigenMatrix_ center;
912
917 EigenVector_ scale;
918
922 bool converged = false;
923};
924
968template<typename Value_, typename Index_, typename Block_, typename EigenMatrix_, class EigenVector_>
971 auto bdetails = internal::compute_blocking_details<EigenVector_>(mat.ncol(), block, options.block_weight_policy, options.variable_block_weight_parameters);
972
973 EigenMatrix_& components = output.components;
974 EigenMatrix_& rotation = output.rotation;
975 EigenVector_& variance_explained = output.variance_explained;
976 EigenMatrix_& center_m = output.center;
977 EigenVector_& scale_v = output.scale;
978 auto& total_var = output.total_variance;
979 bool& converged = output.converged;
980
981 if (mat.sparse()) {
982 if (options.realize_matrix) {
983 internal::run_blocked<true, true>(mat, block, bdetails, options, components, rotation, variance_explained, center_m, scale_v, total_var, converged);
984 } else {
985 internal::run_blocked<false, true>(mat, block, bdetails, options, components, rotation, variance_explained, center_m, scale_v, total_var, converged);
986 }
987 } else {
988 if (options.realize_matrix) {
989 internal::run_blocked<true, false>(mat, block, bdetails, options, components, rotation, variance_explained, center_m, scale_v, total_var, converged);
990 } else {
991 internal::run_blocked<false, false>(mat, block, bdetails, options, components, rotation, variance_explained, center_m, scale_v, total_var, converged);
992 }
993 }
994
995 if (!options.scale) {
996 output.scale = EigenVector_();
997 }
998}
999
1019template<typename EigenMatrix_ = Eigen::MatrixXd, class EigenVector_ = Eigen::VectorXd, typename Value_, typename Index_, typename Block_>
1022 blocked_pca(mat, block, options, output);
1023 return output;
1024}
1025
1026}
1027
1028#endif
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
virtual bool is_sparse() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
void wrapped_adjoint_multiply(const Matrix_ &matrix, const Right_ &rhs, WrappedAdjointWorkspace< Matrix_ > &work, EigenVector_ &out)
WrappedWorkspace< Matrix_ > wrapped_workspace(const Matrix_ &matrix)
std::pair< bool, int > compute(const Matrix_ &matrix, Eigen::Index number, EigenMatrix_ &outU, EigenMatrix_ &outV, EigenVector_ &outD, const Options &options)
void parallelize(Task_ num_tasks, Run_ run_task)
void wrapped_multiply(const Matrix_ &matrix, const Right_ &rhs, WrappedWorkspace< Matrix_ > &work, EigenVector_ &out)
WrappedAdjointWorkspace< Matrix_ > wrapped_adjoint_workspace(const Matrix_ &matrix)
typename get_workspace< Matrix_ >::type WrappedWorkspace
typename get_adjoint_workspace< Matrix_ >::type WrappedAdjointWorkspace
double compute_variable_weight(double s, const VariableWeightParameters &params)
Principal component analysis on single-cell data.
void blocked_pca(const tatami::Matrix< Value_, Index_ > &mat, const Block_ *block, const BlockedPcaOptions &options, BlockedPcaResults< EigenMatrix_, EigenVector_ > &output)
Definition blocked_pca.hpp:969
void parallelize(Function_ fun, Index_ tasks, int threads)
void convert_to_dense(const Matrix< InputValue_, InputIndex_ > *matrix, bool row_major, StoredValue_ *store, int threads=1)
auto consecutive_extractor(const Matrix< Value_, Index_ > *mat, bool row, Index_ iter_start, Index_ iter_length, Args_ &&... args)
Options for blocked_pca().
Definition blocked_pca.hpp:28
scran_blocks::VariableWeightParameters variable_block_weight_parameters
Definition blocked_pca.hpp:67
int num_threads
Definition blocked_pca.hpp:86
bool components_from_residuals
Definition blocked_pca.hpp:74
bool realize_matrix
Definition blocked_pca.hpp:80
irlba::Options irlba_options
Definition blocked_pca.hpp:91
bool transpose
Definition blocked_pca.hpp:56
scran_blocks::WeightPolicy block_weight_policy
Definition blocked_pca.hpp:61
int number
Definition blocked_pca.hpp:44
bool scale
Definition blocked_pca.hpp:50
Results of blocked_pca().
Definition blocked_pca.hpp:877
EigenVector_::Scalar total_variance
Definition blocked_pca.hpp:897
bool converged
Definition blocked_pca.hpp:922
EigenMatrix_ components
Definition blocked_pca.hpp:884
EigenMatrix_ rotation
Definition blocked_pca.hpp:904
EigenVector_ scale
Definition blocked_pca.hpp:917
EigenMatrix_ center
Definition blocked_pca.hpp:911
EigenVector_ variance_explained
Definition blocked_pca.hpp:891