scran_pca
Principal component analysis for single-cell data
Loading...
Searching...
No Matches
subset_pca.hpp
Go to the documentation of this file.
1#ifndef SCRAN_PCA_SUBSET_HPP
2#define SCRAN_PCA_SUBSET_HPP
3
4#include <vector>
5#include <optional>
6#include <cstddef>
7
8#include "simple_pca.hpp"
9#include "blocked_pca.hpp"
10#include "utils.hpp"
11
12#include "sanisizer/sanisizer.hpp"
13#include "tatami_mult/tatami_mult.hpp"
14#include "tatami/tatami.hpp"
15#include "Eigen/Dense"
16
22namespace scran_pca {
23
27template<typename Index_, class SubsetVector_>
28std::vector<Index_> invert_subset(const Index_ total, const SubsetVector_& subset) {
29 std::vector<Index_> output;
30 output.reserve(total - subset.size());
31 const auto end = subset.size();
32 I<decltype(end)> pos = 0;
33 for (Index_ i = 0; i < total; ++i) {
34 if (pos != end && sanisizer::is_equal(subset[pos], i)) {
35 ++pos;
36 continue;
37 }
38 output.push_back(i);
39 }
40 return output;
41}
42
43template<typename Value_, typename Index_, typename EigenMatrix_, typename Scalar_>
44void multiply_by_right_singular_vectors(
46 const EigenMatrix_& rhs_vectors,
47 std::vector<Scalar_>& output,
48 std::vector<Scalar_*>& out_ptrs,
49 int num_threads
50) {
51 const auto num_features = mat.nrow();
52 const auto num_cells = mat.ncol();
53 const auto rank = rhs_vectors.cols();
54 static_assert(!EigenMatrix_::IsRowMajor);
55
56 output.resize(sanisizer::product<I<decltype(output.size())> >(num_features, rank));
57 sanisizer::resize(out_ptrs, rank);
58 auto rhs_ptrs = sanisizer::create<std::vector<const typename EigenMatrix_::Scalar*> >(rank);
59 for (I<decltype(rank)> r = 0; r < rank; ++r) {
60 rhs_ptrs[r] = rhs_vectors.data() + sanisizer::product_unsafe<std::size_t>(r, num_cells);
61 out_ptrs[r] = output.data() + sanisizer::product_unsafe<std::size_t>(r, num_features);
62 }
63
64 tatami_mult::Options opt;
65 opt.num_threads = num_threads;
66 tatami_mult::multiply(mat, rhs_ptrs, out_ptrs, opt);
67}
68
69template<class SubsetVector_, class EigenVector_>
70void expand_into_vector(const SubsetVector_& subset, const EigenVector_& source, EigenVector_& dest) {
71 const auto nsub = subset.size();
72 for (I<decltype(nsub)> s = 0; s < nsub; ++s) {
73 dest.coeffRef(subset[s]) = source.coeff(s);
74 }
75}
76
77template<class SubsetVector_, class EigenMatrix_>
78void expand_into_matrix_rows(const SubsetVector_& subset, const EigenMatrix_& source, EigenMatrix_& dest) {
79 const auto nsub = subset.size();
80
81 // This access pattern should be a little more cache-friendly for the
82 // default column-major storage of Eigen::MatrixXd's.
83 const auto cols = dest.cols();
84 for (I<decltype(cols)> c = 0; c < cols; ++c) {
85 for (I<decltype(nsub)> s = 0; s < nsub; ++s) {
86 dest.coeffRef(subset[s], c) = source.coeff(s, c);
87 }
88 }
89}
90
91template<class SubsetVector_, class EigenMatrix_>
92void expand_into_matrix_columns(const SubsetVector_& subset, const EigenMatrix_& source, EigenMatrix_& dest) {
93 const auto nsub = subset.size();
94 for (I<decltype(nsub)> s = 0; s < nsub; ++s) {
95 dest.col(subset[s]) = source.col(s);
96 }
97}
107
118template<typename EigenMatrix_, class EigenVector_>
120
146template<typename Value_, typename Index_, typename SubsetVector_, typename EigenMatrix_, class EigenVector_>
149 const SubsetVector_& subset,
150 const SubsetPcaOptions& options,
152) {
153 const auto full_size = mat.nrow();
154 auto final_center = sanisizer::create<EigenVector_>(full_size);
155 auto final_scale = sanisizer::create<EigenVector_>(full_size);
156 EigenMatrix_ final_rotation;
157
158 // Don't move subset into the constructor, we'll need it later.
160
161 simple_pca_internal(
162 sub_mat,
163 options,
164 output,
165 [&](const EigenMatrix_& rhs_vectors, const EigenVector_& sing_vals) -> void {
166 const auto inv_subset = invert_subset(mat.nrow(), subset);
167 // Don't move inv_subset into the constructor, we'll need it later.
168 tatami::DelayedSubsetSortedUnique<Value_, Index_, I<decltype(inv_subset)> > inv_mat(tatami::wrap_shared_ptr(&mat), inv_subset, true);
169
170 const auto num_inv = inv_mat.nrow();
171 auto inv_center = sanisizer::create<EigenVector_>(num_inv);
172 auto inv_scale = sanisizer::create<EigenVector_>(num_inv);
173 if (inv_mat.sparse()) {
174 compute_row_means_and_variances<true>(inv_mat, options.num_threads, inv_center, inv_scale);
175 } else {
176 compute_row_means_and_variances<false>(inv_mat, options.num_threads, inv_center, inv_scale);
177 }
178 process_scale_vector(options.scale, inv_scale);
179
180 std::vector<typename EigenVector_::Scalar> product;
181 std::vector<typename EigenVector_::Scalar*> product_ptrs;
182 multiply_by_right_singular_vectors(
183 inv_mat,
184 rhs_vectors,
185 product,
186 product_ptrs,
187 options.num_threads
188 );
189
190 const auto rank = rhs_vectors.cols();
191 final_rotation.resize(sanisizer::cast<Eigen::Index>(full_size), rank);
192 for (I<decltype(rank)> r = 0; r < rank; ++r) {
193 const auto curshift = rhs_vectors.col(r).sum();
194 const auto varexp = sing_vals.coeff(r);
195 const auto optr = product_ptrs[r];
196 const auto compute = [&](I<decltype(num_inv)> i) -> typename EigenVector_::Scalar {
197 return (optr[i] - curshift * inv_center.coeff(i)) / varexp;
198 };
199
200 if (!options.scale) {
201 for (I<decltype(num_inv)> i = 0; i < num_inv; ++i) {
202 final_rotation.coeffRef(inv_subset[i], r) = compute(i);
203 }
204 } else {
205 for (I<decltype(num_inv)> i = 0; i < num_inv; ++i) {
206 final_rotation.coeffRef(inv_subset[i], r) = compute(i) / inv_scale.coeff(i);
207 }
208 }
209 }
210
211 expand_into_vector(inv_subset, inv_center, final_center);
212 if (options.scale) {
213 expand_into_vector(inv_subset, inv_scale, final_scale);
214 }
215 }
216 );
217
218 expand_into_vector(subset, output.center, final_center);
219 output.center.swap(final_center);
220
221 if (options.scale) {
222 expand_into_vector(subset, output.scale, final_scale);
223 output.scale.swap(final_scale);
224 }
225
226 expand_into_matrix_rows(subset, output.rotation, final_rotation);
227 output.rotation.swap(final_rotation);
228}
229
249template<typename EigenMatrix_ = Eigen::MatrixXd, class EigenVector_ = Eigen::VectorXd, typename Value_, typename Index_, class SubsetVector_>
252 const SubsetVector_& subset,
253 const SubsetPcaOptions& options
254) {
256 subset_pca(mat, subset, options, output);
257 return output;
258}
259
265
276template<typename EigenMatrix_, class EigenVector_>
278
307template<typename Value_, typename Index_, class SubsetVector_, typename Block_, typename EigenMatrix_, class EigenVector_>
310 const SubsetVector_& subset,
311 const Block_* block,
312 const SubsetPcaBlockedOptions& options,
314) {
315 const auto full_size = mat.nrow();
316 EigenMatrix_ final_center;
317 auto final_scale = sanisizer::create<EigenVector_>(full_size);
318 EigenMatrix_ final_rotation;
319
320 // Don't move subset into the constructor, we'll need it later.
322
323 blocked_pca_internal<Value_, Index_, Block_, EigenMatrix_, EigenVector_>(
324 sub_mat,
325 block,
326 options,
327 output,
328 [&](
329 const BlockingDetails<Index_, EigenVector_>& block_details,
330 const EigenMatrix_& rhs_vectors,
331 const EigenVector_& sing_vals
332 ) -> void {
333 final_center.resize(
334 sanisizer::cast<I<decltype(final_center.rows())> >(block_details.block_size.size()),
335 sanisizer::cast<I<decltype(final_center.cols())> >(full_size)
336 );
337 final_rotation.resize(
338 sanisizer::cast<I<decltype(final_rotation.rows())> >(full_size),
339 rhs_vectors.cols()
340 );
341
342 auto inv_subset = invert_subset(mat.nrow(), subset);
343 // Don't move inv_subset into the constructor, we'll need it later.
344 tatami::DelayedSubsetSortedUnique<Value_, Index_, I<decltype(inv_subset)> > inv_mat(tatami::wrap_shared_ptr(&mat), inv_subset, true);
345
346 const auto num_cells = inv_mat.ncol();
347 const auto num_inv = inv_mat.nrow();
348 const auto num_blocks = block_details.block_size.size();
349 EigenMatrix_ inv_center(
350 sanisizer::cast<Eigen::Index>(num_blocks),
351 sanisizer::cast<Eigen::Index>(num_inv)
352 );
353 auto inv_scale = sanisizer::create<EigenVector_>(num_inv);
354 compute_blockwise_mean_and_variance_tatami(inv_mat, block, block_details, inv_center, inv_scale, options.num_threads);
355 process_scale_vector(options.scale, inv_scale);
356
357 // Need to adjust the RHS singular vector matrix to mimic weighting of the input matrix.
358 const EigenMatrix_* rhs_ptr = NULL;
359 std::optional<EigenMatrix_> weighted_rhs;
360 if (block_details.weighted) {
361 weighted_rhs = rhs_vectors;
362 weighted_rhs->array().colwise() *= block_details.expanded_weights.array();
363 rhs_ptr = &(*weighted_rhs);
364 } else {
365 rhs_ptr = &rhs_vectors;
366 }
367
368 std::vector<typename EigenVector_::Scalar> product;
369 std::vector<typename EigenVector_::Scalar*> out_ptrs;
370 multiply_by_right_singular_vectors(
371 inv_mat,
372 *rhs_ptr,
373 product,
374 out_ptrs,
375 options.num_threads
376 );
377
378 const auto rank = rhs_vectors.cols();
379 auto shift_buffer = sanisizer::create<EigenVector_>(num_blocks);
380 for (I<decltype(rank)> r = 0; r < rank; ++r) {
381 std::fill(shift_buffer.begin(), shift_buffer.end(), 0);
382 for (I<decltype(num_cells)> i = 0; i < num_cells; ++i) {
383 shift_buffer.coeffRef(block[i]) += rhs_vectors.coeff(i, r);
384 }
385
386 const auto varexp = sing_vals.coeff(r);
387 const auto optr = out_ptrs[r];
388 const auto compute = [&](I<decltype(num_inv)> i) -> typename EigenVector_::Scalar {
389 typename EigenVector_::Scalar curshift = 0;
390 for (I<decltype(num_blocks)> b = 0; b < num_blocks; ++b) {
391 curshift += shift_buffer.coeff(b) * inv_center.coeff(b, i);
392 }
393 return (optr[i] - curshift) / varexp;
394 };
395
396 if (options.scale) {
397 for (I<decltype(num_inv)> i = 0; i < num_inv; ++i) {
398 final_rotation.coeffRef(inv_subset[i], r) = compute(i) / inv_scale.coeff(i);
399 }
400 } else {
401 for (I<decltype(num_inv)> i = 0; i < num_inv; ++i) {
402 final_rotation.coeffRef(inv_subset[i], r) = compute(i);
403 }
404 }
405 }
406
407 expand_into_matrix_columns(inv_subset, inv_center, final_center);
408 if (options.scale) {
409 expand_into_vector(inv_subset, inv_scale, final_scale);
410 }
411 }
412 );
413
414 expand_into_matrix_columns(subset, output.center, final_center);
415 output.center.swap(final_center);
416
417 if (options.scale) {
418 expand_into_vector(subset, output.scale, final_scale);
419 output.scale.swap(final_scale);
420 }
421
422 expand_into_matrix_rows(subset, output.rotation, final_rotation);
423 output.rotation.swap(final_rotation);
424}
425
449template<typename EigenMatrix_ = Eigen::MatrixXd, class EigenVector_ = Eigen::VectorXd, typename Value_, typename Index_, class SubsetVector_, typename Block_>
452 const SubsetVector_& subset,
453 const Block_* block,
454 const SubsetPcaBlockedOptions& options
455) {
457 subset_pca_blocked(mat, subset, block, options, output);
458 return output;
459}
460
461}
462
463#endif
PCA on residuals after regressing out a blocking factor.
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
Principal component analysis on single-cell data.
BlockedPcaOptions SubsetPcaBlockedOptions
Definition subset_pca.hpp:264
void subset_pca_blocked(const tatami::Matrix< Value_, Index_ > &mat, const SubsetVector_ &subset, const Block_ *block, const SubsetPcaBlockedOptions &options, SubsetPcaBlockedResults< EigenMatrix_, EigenVector_ > &output)
Definition subset_pca.hpp:308
SimplePcaOptions SubsetPcaOptions
Definition subset_pca.hpp:106
void subset_pca(const tatami::Matrix< Value_, Index_ > &mat, const SubsetVector_ &subset, const SubsetPcaOptions &options, SubsetPcaResults< EigenMatrix_, EigenVector_ > &output)
Definition subset_pca.hpp:147
std::shared_ptr< const Matrix< Value_, Index_ > > wrap_shared_ptr(const Matrix< Value_, Index_ > *const ptr)
PCA on a gene-by-cell matrix.
Options for blocked_pca().
Definition blocked_pca.hpp:30
int num_threads
Definition blocked_pca.hpp:97
bool scale
Definition blocked_pca.hpp:55
Results of blocked_pca().
Definition blocked_pca.hpp:827
EigenMatrix_ rotation
Definition blocked_pca.hpp:856
EigenVector_ scale
Definition blocked_pca.hpp:869
EigenMatrix_ center
Definition blocked_pca.hpp:863
Options for simple_pca().
Definition simple_pca.hpp:28
int num_threads
Definition simple_pca.hpp:70
bool scale
Definition simple_pca.hpp:52
Results of simple_pca().
Definition simple_pca.hpp:320
EigenMatrix_ rotation
Definition simple_pca.hpp:349
EigenVector_ center
Definition simple_pca.hpp:355
EigenVector_ scale
Definition simple_pca.hpp:361