scran_pca
Principal component analysis for single-cell data
Loading...
Searching...
No Matches
subset_pca.hpp
Go to the documentation of this file.
1#ifndef SCRAN_PCA_SUBSET_HPP
2#define SCRAN_PCA_SUBSET_HPP
3
4#include <vector>
5#include <optional>
6#include <cstddef>
7
8#include "simple_pca.hpp"
9#include "blocked_pca.hpp"
10#include "utils.hpp"
11
12#include "sanisizer/sanisizer.hpp"
13#include "tatami_mult/tatami_mult.hpp"
14#include "tatami/tatami.hpp"
15#include "Eigen/Dense"
16
22namespace scran_pca {
23
27template<typename Index_, class SubsetVector_>
28std::vector<Index_> invert_subset(const Index_ total, const SubsetVector_& subset) {
29 std::vector<Index_> output;
30 output.reserve(total - subset.size());
31 const auto end = subset.size();
32 I<decltype(end)> pos = 0;
33 for (Index_ i = 0; i < total; ++i) {
34 if (pos != end && sanisizer::is_equal(subset[pos], i)) {
35 ++pos;
36 continue;
37 }
38 output.push_back(i);
39 }
40 return output;
41}
42
43template<typename Value_, typename Index_, typename EigenMatrix_, typename Scalar_>
44void multiply_by_right_singular_vectors(
46 const EigenMatrix_& rhs_vectors,
47 std::vector<Scalar_>& output,
48 std::vector<Scalar_*>& out_ptrs,
49 int num_threads
50) {
51 const auto num_features = mat.nrow();
52 const auto num_cells = mat.ncol();
53 const auto rank = rhs_vectors.cols();
54 static_assert(!EigenMatrix_::IsRowMajor);
55
56 output.resize(sanisizer::product<I<decltype(output.size())> >(num_features, rank));
57 sanisizer::resize(out_ptrs, rank);
58 auto rhs_ptrs = sanisizer::create<std::vector<const typename EigenMatrix_::Scalar*> >(rank);
59 for (I<decltype(rank)> r = 0; r < rank; ++r) {
60 rhs_ptrs[r] = rhs_vectors.data() + sanisizer::product_unsafe<std::size_t>(r, num_cells);
61 out_ptrs[r] = output.data() + sanisizer::product_unsafe<std::size_t>(r, num_features);
62 }
63
64 tatami_mult::Options opt;
65 opt.num_threads = num_threads;
66 tatami_mult::multiply(mat, rhs_ptrs, out_ptrs, opt);
67}
68
69template<class SubsetVector_, class EigenVector_>
70void expand_into_vector(const SubsetVector_& subset, const EigenVector_& source, EigenVector_& dest) {
71 const auto nsub = subset.size();
72 for (I<decltype(nsub)> s = 0; s < nsub; ++s) {
73 dest.coeffRef(subset[s]) = source.coeff(s);
74 }
75}
76
77template<class SubsetVector_, class EigenMatrix_>
78void expand_into_matrix_rows(const SubsetVector_& subset, const EigenMatrix_& source, EigenMatrix_& dest) {
79 const auto nsub = subset.size();
80
81 // This access pattern should be a little more cache-friendly for the
82 // default column-major storage of Eigen::MatrixXd's.
83 const auto cols = dest.cols();
84 for (I<decltype(cols)> c = 0; c < cols; ++c) {
85 for (I<decltype(nsub)> s = 0; s < nsub; ++s) {
86 dest.coeffRef(subset[s], c) = source.coeff(s, c);
87 }
88 }
89}
90
91template<class SubsetVector_, class EigenMatrix_>
92void expand_into_matrix_columns(const SubsetVector_& subset, const EigenMatrix_& source, EigenMatrix_& dest) {
93 const auto nsub = subset.size();
94 for (I<decltype(nsub)> s = 0; s < nsub; ++s) {
95 dest.col(subset[s]) = source.col(s);
96 }
97}
108template<typename EigenVector_ = Eigen::VectorXd>
110
121template<typename EigenMatrix_, class EigenVector_>
123
149template<typename Value_, typename Index_, typename SubsetVector_, typename EigenMatrix_, class EigenVector_>
152 const SubsetVector_& subset,
153 const SubsetPcaOptions<EigenVector_>& options,
155) {
156 const auto full_size = mat.nrow();
157 auto final_center = sanisizer::create<EigenVector_>(full_size);
158 auto final_scale = sanisizer::create<EigenVector_>(full_size);
159 EigenMatrix_ final_rotation;
160
161 // Don't move subset into the constructor, we'll need it later.
163
164 simple_pca_internal(
165 sub_mat,
166 options,
167 output,
168 [&](const EigenMatrix_& rhs_vectors, const EigenVector_& sing_vals) -> void {
169 const auto inv_subset = invert_subset(mat.nrow(), subset);
170 // Don't move inv_subset into the constructor, we'll need it later.
171 tatami::DelayedSubsetSortedUnique<Value_, Index_, I<decltype(inv_subset)> > inv_mat(tatami::wrap_shared_ptr(&mat), inv_subset, true);
172
173 const auto num_inv = inv_mat.nrow();
174 auto inv_center = sanisizer::create<EigenVector_>(num_inv);
175 auto inv_scale = sanisizer::create<EigenVector_>(num_inv);
176 if (inv_mat.sparse()) {
177 compute_row_means_and_variances<true>(inv_mat, options.num_threads, inv_center, inv_scale);
178 } else {
179 compute_row_means_and_variances<false>(inv_mat, options.num_threads, inv_center, inv_scale);
180 }
181 process_scale_vector(options.scale, inv_scale);
182
183 std::vector<typename EigenVector_::Scalar> product;
184 std::vector<typename EigenVector_::Scalar*> product_ptrs;
185 multiply_by_right_singular_vectors(
186 inv_mat,
187 rhs_vectors,
188 product,
189 product_ptrs,
190 options.num_threads
191 );
192
193 const auto rank = rhs_vectors.cols();
194 final_rotation.resize(sanisizer::cast<Eigen::Index>(full_size), rank);
195 for (I<decltype(rank)> r = 0; r < rank; ++r) {
196 const auto curshift = rhs_vectors.col(r).sum();
197 const auto varexp = sing_vals.coeff(r);
198 const auto optr = product_ptrs[r];
199 const auto compute = [&](I<decltype(num_inv)> i) -> typename EigenVector_::Scalar {
200 return (optr[i] - curshift * inv_center.coeff(i)) / varexp;
201 };
202
203 if (!options.scale) {
204 for (I<decltype(num_inv)> i = 0; i < num_inv; ++i) {
205 final_rotation.coeffRef(inv_subset[i], r) = compute(i);
206 }
207 } else {
208 for (I<decltype(num_inv)> i = 0; i < num_inv; ++i) {
209 final_rotation.coeffRef(inv_subset[i], r) = compute(i) / inv_scale.coeff(i);
210 }
211 }
212 }
213
214 expand_into_vector(inv_subset, inv_center, final_center);
215 if (options.scale) {
216 expand_into_vector(inv_subset, inv_scale, final_scale);
217 }
218 }
219 );
220
221 expand_into_vector(subset, output.center, final_center);
222 output.center.swap(final_center);
223
224 if (options.scale) {
225 expand_into_vector(subset, output.scale, final_scale);
226 output.scale.swap(final_scale);
227 }
228
229 expand_into_matrix_rows(subset, output.rotation, final_rotation);
230 output.rotation.swap(final_rotation);
231}
232
252template<typename EigenMatrix_ = Eigen::MatrixXd, class EigenVector_ = Eigen::VectorXd, typename Value_, typename Index_, class SubsetVector_>
255 const SubsetVector_& subset,
256 const SubsetPcaOptions<EigenVector_>& options
257) {
259 subset_pca(mat, subset, options, output);
260 return output;
261}
262
269template<typename EigenVector_ = Eigen::VectorXd>
271
282template<typename EigenMatrix_, class EigenVector_>
284
313template<typename Value_, typename Index_, class SubsetVector_, typename Block_, typename EigenMatrix_, class EigenVector_>
316 const SubsetVector_& subset,
317 const Block_* block,
320) {
321 const auto full_size = mat.nrow();
322 EigenMatrix_ final_center;
323 auto final_scale = sanisizer::create<EigenVector_>(full_size);
324 EigenMatrix_ final_rotation;
325
326 // Don't move subset into the constructor, we'll need it later.
328
329 blocked_pca_internal<Value_, Index_, Block_, EigenMatrix_, EigenVector_>(
330 sub_mat,
331 block,
332 options,
333 output,
334 [&](
335 const BlockingDetails<Index_, EigenVector_>& block_details,
336 const EigenMatrix_& rhs_vectors,
337 const EigenVector_& sing_vals
338 ) -> void {
339 final_center.resize(
340 sanisizer::cast<I<decltype(final_center.rows())> >(block_details.block_size.size()),
341 sanisizer::cast<I<decltype(final_center.cols())> >(full_size)
342 );
343 final_rotation.resize(
344 sanisizer::cast<I<decltype(final_rotation.rows())> >(full_size),
345 rhs_vectors.cols()
346 );
347
348 auto inv_subset = invert_subset(mat.nrow(), subset);
349 // Don't move inv_subset into the constructor, we'll need it later.
350 tatami::DelayedSubsetSortedUnique<Value_, Index_, I<decltype(inv_subset)> > inv_mat(tatami::wrap_shared_ptr(&mat), inv_subset, true);
351
352 const auto num_cells = inv_mat.ncol();
353 const auto num_inv = inv_mat.nrow();
354 const auto num_blocks = block_details.block_size.size();
355 EigenMatrix_ inv_center(
356 sanisizer::cast<Eigen::Index>(num_blocks),
357 sanisizer::cast<Eigen::Index>(num_inv)
358 );
359 auto inv_scale = sanisizer::create<EigenVector_>(num_inv);
360 compute_blockwise_mean_and_variance_tatami(inv_mat, block, block_details, inv_center, inv_scale, options.num_threads);
361 process_scale_vector(options.scale, inv_scale);
362
363 // Need to adjust the RHS singular vector matrix to mimic weighting of the input matrix.
364 const EigenMatrix_* rhs_ptr = NULL;
365 std::optional<EigenMatrix_> weighted_rhs;
366 if (block_details.weighted) {
367 weighted_rhs = rhs_vectors;
368 weighted_rhs->array().colwise() *= block_details.expanded_weights.array();
369 rhs_ptr = &(*weighted_rhs);
370 } else {
371 rhs_ptr = &rhs_vectors;
372 }
373
374 std::vector<typename EigenVector_::Scalar> product;
375 std::vector<typename EigenVector_::Scalar*> out_ptrs;
376 multiply_by_right_singular_vectors(
377 inv_mat,
378 *rhs_ptr,
379 product,
380 out_ptrs,
381 options.num_threads
382 );
383
384 const auto rank = rhs_vectors.cols();
385 auto shift_buffer = sanisizer::create<EigenVector_>(num_blocks);
386 for (I<decltype(rank)> r = 0; r < rank; ++r) {
387 std::fill(shift_buffer.begin(), shift_buffer.end(), 0);
388 for (I<decltype(num_cells)> i = 0; i < num_cells; ++i) {
389 shift_buffer.coeffRef(block[i]) += rhs_vectors.coeff(i, r);
390 }
391
392 const auto varexp = sing_vals.coeff(r);
393 const auto optr = out_ptrs[r];
394 const auto compute = [&](I<decltype(num_inv)> i) -> typename EigenVector_::Scalar {
395 typename EigenVector_::Scalar curshift = 0;
396 for (I<decltype(num_blocks)> b = 0; b < num_blocks; ++b) {
397 curshift += shift_buffer.coeff(b) * inv_center.coeff(b, i);
398 }
399 return (optr[i] - curshift) / varexp;
400 };
401
402 if (options.scale) {
403 for (I<decltype(num_inv)> i = 0; i < num_inv; ++i) {
404 final_rotation.coeffRef(inv_subset[i], r) = compute(i) / inv_scale.coeff(i);
405 }
406 } else {
407 for (I<decltype(num_inv)> i = 0; i < num_inv; ++i) {
408 final_rotation.coeffRef(inv_subset[i], r) = compute(i);
409 }
410 }
411 }
412
413 expand_into_matrix_columns(inv_subset, inv_center, final_center);
414 if (options.scale) {
415 expand_into_vector(inv_subset, inv_scale, final_scale);
416 }
417 }
418 );
419
420 expand_into_matrix_columns(subset, output.center, final_center);
421 output.center.swap(final_center);
422
423 if (options.scale) {
424 expand_into_vector(subset, output.scale, final_scale);
425 output.scale.swap(final_scale);
426 }
427
428 expand_into_matrix_rows(subset, output.rotation, final_rotation);
429 output.rotation.swap(final_rotation);
430}
431
455template<typename EigenMatrix_ = Eigen::MatrixXd, class EigenVector_ = Eigen::VectorXd, typename Value_, typename Index_, class SubsetVector_, typename Block_>
458 const SubsetVector_& subset,
459 const Block_* block,
461) {
463 subset_pca_blocked(mat, subset, block, options, output);
464 return output;
465}
466
467}
468
469#endif
PCA on residuals after regressing out a blocking factor.
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
Principal component analysis on single-cell data.
void subset_pca(const tatami::Matrix< Value_, Index_ > &mat, const SubsetVector_ &subset, const SubsetPcaOptions< EigenVector_ > &options, SubsetPcaResults< EigenMatrix_, EigenVector_ > &output)
Definition subset_pca.hpp:150
void subset_pca_blocked(const tatami::Matrix< Value_, Index_ > &mat, const SubsetVector_ &subset, const Block_ *block, const SubsetPcaBlockedOptions< EigenVector_ > &options, SubsetPcaBlockedResults< EigenMatrix_, EigenVector_ > &output)
Definition subset_pca.hpp:314
std::shared_ptr< const Matrix< Value_, Index_ > > wrap_shared_ptr(const Matrix< Value_, Index_ > *const ptr)
PCA on a gene-by-cell matrix.
Options for blocked_pca().
Definition blocked_pca.hpp:34
bool scale
Definition blocked_pca.hpp:59
int num_threads
Definition blocked_pca.hpp:104
Results of blocked_pca().
Definition blocked_pca.hpp:854
EigenMatrix_ rotation
Definition blocked_pca.hpp:883
EigenVector_ scale
Definition blocked_pca.hpp:896
EigenMatrix_ center
Definition blocked_pca.hpp:890
Options for simple_pca().
Definition simple_pca.hpp:32
bool scale
Definition simple_pca.hpp:56
int num_threads
Definition simple_pca.hpp:76
Results of simple_pca().
Definition simple_pca.hpp:328
EigenMatrix_ rotation
Definition simple_pca.hpp:357
EigenVector_ center
Definition simple_pca.hpp:363
EigenVector_ scale
Definition simple_pca.hpp:369