1#ifndef SCRAN_PCA_SUBSET_HPP
2#define SCRAN_PCA_SUBSET_HPP
12#include "sanisizer/sanisizer.hpp"
13#include "tatami_mult/tatami_mult.hpp"
27template<
typename Index_,
class SubsetVector_>
28std::vector<Index_> invert_subset(
const Index_ total,
const SubsetVector_& subset) {
29 std::vector<Index_> output;
30 output.reserve(total - subset.size());
31 const auto end = subset.size();
32 I<
decltype(end)> pos = 0;
33 for (Index_ i = 0; i < total; ++i) {
34 if (pos != end && sanisizer::is_equal(subset[pos], i)) {
43template<
typename Value_,
typename Index_,
typename EigenMatrix_,
typename Scalar_>
44void multiply_by_right_singular_vectors(
46 const EigenMatrix_& rhs_vectors,
47 std::vector<Scalar_>& output,
48 std::vector<Scalar_*>& out_ptrs,
51 const auto num_features = mat.
nrow();
52 const auto num_cells = mat.
ncol();
53 const auto rank = rhs_vectors.cols();
54 static_assert(!EigenMatrix_::IsRowMajor);
56 output.resize(sanisizer::product<I<
decltype(output.size())> >(num_features, rank));
57 sanisizer::resize(out_ptrs, rank);
58 auto rhs_ptrs = sanisizer::create<std::vector<const typename EigenMatrix_::Scalar*> >(rank);
59 for (I<
decltype(rank)> r = 0; r < rank; ++r) {
60 rhs_ptrs[r] = rhs_vectors.data() + sanisizer::product_unsafe<std::size_t>(r, num_cells);
61 out_ptrs[r] = output.data() + sanisizer::product_unsafe<std::size_t>(r, num_features);
64 tatami_mult::Options opt;
65 opt.num_threads = num_threads;
66 tatami_mult::multiply(mat, rhs_ptrs, out_ptrs, opt);
69template<
class SubsetVector_,
class EigenVector_>
70void expand_into_vector(
const SubsetVector_& subset,
const EigenVector_& source, EigenVector_& dest) {
71 const auto nsub = subset.size();
72 for (I<
decltype(nsub)> s = 0; s < nsub; ++s) {
73 dest.coeffRef(subset[s]) = source.coeff(s);
77template<
class SubsetVector_,
class EigenMatrix_>
78void expand_into_matrix_rows(
const SubsetVector_& subset,
const EigenMatrix_& source, EigenMatrix_& dest) {
79 const auto nsub = subset.size();
83 const auto cols = dest.cols();
84 for (I<
decltype(cols)> c = 0; c < cols; ++c) {
85 for (I<
decltype(nsub)> s = 0; s < nsub; ++s) {
86 dest.coeffRef(subset[s], c) = source.coeff(s, c);
91template<
class SubsetVector_,
class EigenMatrix_>
92void expand_into_matrix_columns(
const SubsetVector_& subset,
const EigenMatrix_& source, EigenMatrix_& dest) {
93 const auto nsub = subset.size();
94 for (I<
decltype(nsub)> s = 0; s < nsub; ++s) {
95 dest.col(subset[s]) = source.col(s);
118template<
typename EigenMatrix_,
class EigenVector_>
146template<
typename Value_,
typename Index_,
typename SubsetVector_,
typename EigenMatrix_,
class EigenVector_>
149 const SubsetVector_& subset,
153 const auto full_size = mat.
nrow();
154 auto final_center = sanisizer::create<EigenVector_>(full_size);
155 auto final_scale = sanisizer::create<EigenVector_>(full_size);
156 EigenMatrix_ final_rotation;
165 [&](
const EigenMatrix_& rhs_vectors,
const EigenVector_& sing_vals) ->
void {
166 const auto inv_subset = invert_subset(mat.
nrow(), subset);
170 const auto num_inv = inv_mat.nrow();
171 auto inv_center = sanisizer::create<EigenVector_>(num_inv);
172 auto inv_scale = sanisizer::create<EigenVector_>(num_inv);
173 if (inv_mat.sparse()) {
174 compute_row_means_and_variances<true>(inv_mat, options.
num_threads, inv_center, inv_scale);
176 compute_row_means_and_variances<false>(inv_mat, options.
num_threads, inv_center, inv_scale);
178 process_scale_vector(options.
scale, inv_scale);
180 std::vector<typename EigenVector_::Scalar> product;
181 std::vector<typename EigenVector_::Scalar*> product_ptrs;
182 multiply_by_right_singular_vectors(
190 const auto rank = rhs_vectors.cols();
191 final_rotation.resize(sanisizer::cast<Eigen::Index>(full_size), rank);
192 for (I<
decltype(rank)> r = 0; r < rank; ++r) {
193 const auto curshift = rhs_vectors.col(r).sum();
194 const auto varexp = sing_vals.coeff(r);
195 const auto optr = product_ptrs[r];
196 const auto compute = [&](I<
decltype(num_inv)> i) ->
typename EigenVector_::Scalar {
197 return (optr[i] - curshift * inv_center.coeff(i)) / varexp;
200 if (!options.
scale) {
201 for (I<
decltype(num_inv)> i = 0; i < num_inv; ++i) {
202 final_rotation.coeffRef(inv_subset[i], r) = compute(i);
205 for (I<
decltype(num_inv)> i = 0; i < num_inv; ++i) {
206 final_rotation.coeffRef(inv_subset[i], r) = compute(i) / inv_scale.coeff(i);
211 expand_into_vector(inv_subset, inv_center, final_center);
213 expand_into_vector(inv_subset, inv_scale, final_scale);
218 expand_into_vector(subset, output.
center, final_center);
219 output.
center.swap(final_center);
222 expand_into_vector(subset, output.
scale, final_scale);
223 output.
scale.swap(final_scale);
226 expand_into_matrix_rows(subset, output.
rotation, final_rotation);
227 output.
rotation.swap(final_rotation);
249template<
typename EigenMatrix_ = Eigen::MatrixXd,
class EigenVector_ = Eigen::VectorXd,
typename Value_,
typename Index_,
class SubsetVector_>
252 const SubsetVector_& subset,
276template<
typename EigenMatrix_,
class EigenVector_>
307template<
typename Value_,
typename Index_,
class SubsetVector_,
typename Block_,
typename EigenMatrix_,
class EigenVector_>
310 const SubsetVector_& subset,
315 const auto full_size = mat.
nrow();
316 EigenMatrix_ final_center;
317 auto final_scale = sanisizer::create<EigenVector_>(full_size);
318 EigenMatrix_ final_rotation;
323 blocked_pca_internal<Value_, Index_, Block_, EigenMatrix_, EigenVector_>(
329 const BlockingDetails<Index_, EigenVector_>& block_details,
330 const EigenMatrix_& rhs_vectors,
331 const EigenVector_& sing_vals
334 sanisizer::cast<I<
decltype(final_center.rows())> >(block_details.block_size.size()),
335 sanisizer::cast<I<
decltype(final_center.cols())> >(full_size)
337 final_rotation.resize(
338 sanisizer::cast<I<
decltype(final_rotation.rows())> >(full_size),
342 auto inv_subset = invert_subset(mat.
nrow(), subset);
346 const auto num_cells = inv_mat.ncol();
347 const auto num_inv = inv_mat.nrow();
348 const auto num_blocks = block_details.block_size.size();
349 EigenMatrix_ inv_center(
350 sanisizer::cast<Eigen::Index>(num_blocks),
351 sanisizer::cast<Eigen::Index>(num_inv)
353 auto inv_scale = sanisizer::create<EigenVector_>(num_inv);
354 compute_blockwise_mean_and_variance_tatami(inv_mat, block, block_details, inv_center, inv_scale, options.
num_threads);
355 process_scale_vector(options.
scale, inv_scale);
358 const EigenMatrix_* rhs_ptr = NULL;
359 std::optional<EigenMatrix_> weighted_rhs;
360 if (block_details.weighted) {
361 weighted_rhs = rhs_vectors;
362 weighted_rhs->array().colwise() *= block_details.expanded_weights.array();
363 rhs_ptr = &(*weighted_rhs);
365 rhs_ptr = &rhs_vectors;
368 std::vector<typename EigenVector_::Scalar> product;
369 std::vector<typename EigenVector_::Scalar*> out_ptrs;
370 multiply_by_right_singular_vectors(
378 const auto rank = rhs_vectors.cols();
379 auto shift_buffer = sanisizer::create<EigenVector_>(num_blocks);
380 for (I<
decltype(rank)> r = 0; r < rank; ++r) {
381 std::fill(shift_buffer.begin(), shift_buffer.end(), 0);
382 for (I<
decltype(num_cells)> i = 0; i < num_cells; ++i) {
383 shift_buffer.coeffRef(block[i]) += rhs_vectors.coeff(i, r);
386 const auto varexp = sing_vals.coeff(r);
387 const auto optr = out_ptrs[r];
388 const auto compute = [&](I<
decltype(num_inv)> i) ->
typename EigenVector_::Scalar {
389 typename EigenVector_::Scalar curshift = 0;
390 for (I<
decltype(num_blocks)> b = 0; b < num_blocks; ++b) {
391 curshift += shift_buffer.coeff(b) * inv_center.coeff(b, i);
393 return (optr[i] - curshift) / varexp;
397 for (I<
decltype(num_inv)> i = 0; i < num_inv; ++i) {
398 final_rotation.coeffRef(inv_subset[i], r) = compute(i) / inv_scale.coeff(i);
401 for (I<
decltype(num_inv)> i = 0; i < num_inv; ++i) {
402 final_rotation.coeffRef(inv_subset[i], r) = compute(i);
407 expand_into_matrix_columns(inv_subset, inv_center, final_center);
409 expand_into_vector(inv_subset, inv_scale, final_scale);
414 expand_into_matrix_columns(subset, output.
center, final_center);
415 output.
center.swap(final_center);
418 expand_into_vector(subset, output.
scale, final_scale);
419 output.
scale.swap(final_scale);
422 expand_into_matrix_rows(subset, output.
rotation, final_rotation);
423 output.
rotation.swap(final_rotation);
449template<
typename EigenMatrix_ = Eigen::MatrixXd,
class EigenVector_ = Eigen::VectorXd,
typename Value_,
typename Index_,
class SubsetVector_,
typename Block_>
452 const SubsetVector_& subset,
PCA on residuals after regressing out a blocking factor.
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
Principal component analysis on single-cell data.
BlockedPcaOptions SubsetPcaBlockedOptions
Definition subset_pca.hpp:264
void subset_pca_blocked(const tatami::Matrix< Value_, Index_ > &mat, const SubsetVector_ &subset, const Block_ *block, const SubsetPcaBlockedOptions &options, SubsetPcaBlockedResults< EigenMatrix_, EigenVector_ > &output)
Definition subset_pca.hpp:308
SimplePcaOptions SubsetPcaOptions
Definition subset_pca.hpp:106
void subset_pca(const tatami::Matrix< Value_, Index_ > &mat, const SubsetVector_ &subset, const SubsetPcaOptions &options, SubsetPcaResults< EigenMatrix_, EigenVector_ > &output)
Definition subset_pca.hpp:147
std::shared_ptr< const Matrix< Value_, Index_ > > wrap_shared_ptr(const Matrix< Value_, Index_ > *const ptr)
PCA on a gene-by-cell matrix.
Options for blocked_pca().
Definition blocked_pca.hpp:30
int num_threads
Definition blocked_pca.hpp:97
bool scale
Definition blocked_pca.hpp:55
Results of blocked_pca().
Definition blocked_pca.hpp:827
EigenMatrix_ rotation
Definition blocked_pca.hpp:856
EigenVector_ scale
Definition blocked_pca.hpp:869
EigenMatrix_ center
Definition blocked_pca.hpp:863
Options for simple_pca().
Definition simple_pca.hpp:28
int num_threads
Definition simple_pca.hpp:70
bool scale
Definition simple_pca.hpp:52
Results of simple_pca().
Definition simple_pca.hpp:320
EigenMatrix_ rotation
Definition simple_pca.hpp:349
EigenVector_ center
Definition simple_pca.hpp:355
EigenVector_ scale
Definition simple_pca.hpp:361