1#ifndef SCRAN_PCA_SUBSET_HPP
2#define SCRAN_PCA_SUBSET_HPP
12#include "sanisizer/sanisizer.hpp"
13#include "tatami_mult/tatami_mult.hpp"
27template<
typename Index_,
class SubsetVector_>
28std::vector<Index_> invert_subset(
const Index_ total,
const SubsetVector_& subset) {
29 std::vector<Index_> output;
30 output.reserve(total - subset.size());
31 const auto end = subset.size();
32 I<
decltype(end)> pos = 0;
33 for (Index_ i = 0; i < total; ++i) {
34 if (pos != end && sanisizer::is_equal(subset[pos], i)) {
43template<
typename Value_,
typename Index_,
typename EigenMatrix_,
typename Scalar_>
44void multiply_by_right_singular_vectors(
46 const EigenMatrix_& rhs_vectors,
47 std::vector<Scalar_>& output,
48 std::vector<Scalar_*>& out_ptrs,
51 const auto num_features = mat.
nrow();
52 const auto num_cells = mat.
ncol();
53 const auto rank = rhs_vectors.cols();
54 static_assert(!EigenMatrix_::IsRowMajor);
56 output.resize(sanisizer::product<I<
decltype(output.size())> >(num_features, rank));
57 sanisizer::resize(out_ptrs, rank);
58 auto rhs_ptrs = sanisizer::create<std::vector<const typename EigenMatrix_::Scalar*> >(rank);
59 for (I<
decltype(rank)> r = 0; r < rank; ++r) {
60 rhs_ptrs[r] = rhs_vectors.data() + sanisizer::product_unsafe<std::size_t>(r, num_cells);
61 out_ptrs[r] = output.data() + sanisizer::product_unsafe<std::size_t>(r, num_features);
64 tatami_mult::Options opt;
65 opt.num_threads = num_threads;
66 tatami_mult::multiply(mat, rhs_ptrs, out_ptrs, opt);
69template<
class SubsetVector_,
class EigenVector_>
70void expand_into_vector(
const SubsetVector_& subset,
const EigenVector_& source, EigenVector_& dest) {
71 const auto nsub = subset.size();
72 for (I<
decltype(nsub)> s = 0; s < nsub; ++s) {
73 dest.coeffRef(subset[s]) = source.coeff(s);
77template<
class SubsetVector_,
class EigenMatrix_>
78void expand_into_matrix_rows(
const SubsetVector_& subset,
const EigenMatrix_& source, EigenMatrix_& dest) {
79 const auto nsub = subset.size();
83 const auto cols = dest.cols();
84 for (I<
decltype(cols)> c = 0; c < cols; ++c) {
85 for (I<
decltype(nsub)> s = 0; s < nsub; ++s) {
86 dest.coeffRef(subset[s], c) = source.coeff(s, c);
91template<
class SubsetVector_,
class EigenMatrix_>
92void expand_into_matrix_columns(
const SubsetVector_& subset,
const EigenMatrix_& source, EigenMatrix_& dest) {
93 const auto nsub = subset.size();
94 for (I<
decltype(nsub)> s = 0; s < nsub; ++s) {
95 dest.col(subset[s]) = source.col(s);
108template<
typename EigenVector_ = Eigen::VectorXd>
121template<
typename EigenMatrix_,
class EigenVector_>
149template<
typename Value_,
typename Index_,
typename SubsetVector_,
typename EigenMatrix_,
class EigenVector_>
152 const SubsetVector_& subset,
156 const auto full_size = mat.
nrow();
157 auto final_center = sanisizer::create<EigenVector_>(full_size);
158 auto final_scale = sanisizer::create<EigenVector_>(full_size);
159 EigenMatrix_ final_rotation;
168 [&](
const EigenMatrix_& rhs_vectors,
const EigenVector_& sing_vals) ->
void {
169 const auto inv_subset = invert_subset(mat.
nrow(), subset);
173 const auto num_inv = inv_mat.nrow();
174 auto inv_center = sanisizer::create<EigenVector_>(num_inv);
175 auto inv_scale = sanisizer::create<EigenVector_>(num_inv);
176 if (inv_mat.sparse()) {
177 compute_row_means_and_variances<true>(inv_mat, options.
num_threads, inv_center, inv_scale);
179 compute_row_means_and_variances<false>(inv_mat, options.
num_threads, inv_center, inv_scale);
181 process_scale_vector(options.
scale, inv_scale);
183 std::vector<typename EigenVector_::Scalar> product;
184 std::vector<typename EigenVector_::Scalar*> product_ptrs;
185 multiply_by_right_singular_vectors(
193 const auto rank = rhs_vectors.cols();
194 final_rotation.resize(sanisizer::cast<Eigen::Index>(full_size), rank);
195 for (I<
decltype(rank)> r = 0; r < rank; ++r) {
196 const auto curshift = rhs_vectors.col(r).sum();
197 const auto varexp = sing_vals.coeff(r);
198 const auto optr = product_ptrs[r];
199 const auto compute = [&](I<
decltype(num_inv)> i) ->
typename EigenVector_::Scalar {
200 return (optr[i] - curshift * inv_center.coeff(i)) / varexp;
203 if (!options.
scale) {
204 for (I<
decltype(num_inv)> i = 0; i < num_inv; ++i) {
205 final_rotation.coeffRef(inv_subset[i], r) = compute(i);
208 for (I<
decltype(num_inv)> i = 0; i < num_inv; ++i) {
209 final_rotation.coeffRef(inv_subset[i], r) = compute(i) / inv_scale.coeff(i);
214 expand_into_vector(inv_subset, inv_center, final_center);
216 expand_into_vector(inv_subset, inv_scale, final_scale);
221 expand_into_vector(subset, output.
center, final_center);
222 output.
center.swap(final_center);
225 expand_into_vector(subset, output.
scale, final_scale);
226 output.
scale.swap(final_scale);
229 expand_into_matrix_rows(subset, output.
rotation, final_rotation);
230 output.
rotation.swap(final_rotation);
252template<
typename EigenMatrix_ = Eigen::MatrixXd,
class EigenVector_ = Eigen::VectorXd,
typename Value_,
typename Index_,
class SubsetVector_>
255 const SubsetVector_& subset,
269template<
typename EigenVector_ = Eigen::VectorXd>
282template<
typename EigenMatrix_,
class EigenVector_>
313template<
typename Value_,
typename Index_,
class SubsetVector_,
typename Block_,
typename EigenMatrix_,
class EigenVector_>
316 const SubsetVector_& subset,
321 const auto full_size = mat.
nrow();
322 EigenMatrix_ final_center;
323 auto final_scale = sanisizer::create<EigenVector_>(full_size);
324 EigenMatrix_ final_rotation;
329 blocked_pca_internal<Value_, Index_, Block_, EigenMatrix_, EigenVector_>(
335 const BlockingDetails<Index_, EigenVector_>& block_details,
336 const EigenMatrix_& rhs_vectors,
337 const EigenVector_& sing_vals
340 sanisizer::cast<I<
decltype(final_center.rows())> >(block_details.block_size.size()),
341 sanisizer::cast<I<
decltype(final_center.cols())> >(full_size)
343 final_rotation.resize(
344 sanisizer::cast<I<
decltype(final_rotation.rows())> >(full_size),
348 auto inv_subset = invert_subset(mat.
nrow(), subset);
352 const auto num_cells = inv_mat.ncol();
353 const auto num_inv = inv_mat.nrow();
354 const auto num_blocks = block_details.block_size.size();
355 EigenMatrix_ inv_center(
356 sanisizer::cast<Eigen::Index>(num_blocks),
357 sanisizer::cast<Eigen::Index>(num_inv)
359 auto inv_scale = sanisizer::create<EigenVector_>(num_inv);
360 compute_blockwise_mean_and_variance_tatami(inv_mat, block, block_details, inv_center, inv_scale, options.
num_threads);
361 process_scale_vector(options.
scale, inv_scale);
364 const EigenMatrix_* rhs_ptr = NULL;
365 std::optional<EigenMatrix_> weighted_rhs;
366 if (block_details.weighted) {
367 weighted_rhs = rhs_vectors;
368 weighted_rhs->array().colwise() *= block_details.expanded_weights.array();
369 rhs_ptr = &(*weighted_rhs);
371 rhs_ptr = &rhs_vectors;
374 std::vector<typename EigenVector_::Scalar> product;
375 std::vector<typename EigenVector_::Scalar*> out_ptrs;
376 multiply_by_right_singular_vectors(
384 const auto rank = rhs_vectors.cols();
385 auto shift_buffer = sanisizer::create<EigenVector_>(num_blocks);
386 for (I<
decltype(rank)> r = 0; r < rank; ++r) {
387 std::fill(shift_buffer.begin(), shift_buffer.end(), 0);
388 for (I<
decltype(num_cells)> i = 0; i < num_cells; ++i) {
389 shift_buffer.coeffRef(block[i]) += rhs_vectors.coeff(i, r);
392 const auto varexp = sing_vals.coeff(r);
393 const auto optr = out_ptrs[r];
394 const auto compute = [&](I<
decltype(num_inv)> i) ->
typename EigenVector_::Scalar {
395 typename EigenVector_::Scalar curshift = 0;
396 for (I<
decltype(num_blocks)> b = 0; b < num_blocks; ++b) {
397 curshift += shift_buffer.coeff(b) * inv_center.coeff(b, i);
399 return (optr[i] - curshift) / varexp;
403 for (I<
decltype(num_inv)> i = 0; i < num_inv; ++i) {
404 final_rotation.coeffRef(inv_subset[i], r) = compute(i) / inv_scale.coeff(i);
407 for (I<
decltype(num_inv)> i = 0; i < num_inv; ++i) {
408 final_rotation.coeffRef(inv_subset[i], r) = compute(i);
413 expand_into_matrix_columns(inv_subset, inv_center, final_center);
415 expand_into_vector(inv_subset, inv_scale, final_scale);
420 expand_into_matrix_columns(subset, output.
center, final_center);
421 output.
center.swap(final_center);
424 expand_into_vector(subset, output.
scale, final_scale);
425 output.
scale.swap(final_scale);
428 expand_into_matrix_rows(subset, output.
rotation, final_rotation);
429 output.
rotation.swap(final_rotation);
455template<
typename EigenMatrix_ = Eigen::MatrixXd,
class EigenVector_ = Eigen::VectorXd,
typename Value_,
typename Index_,
class SubsetVector_,
typename Block_>
458 const SubsetVector_& subset,
PCA on residuals after regressing out a blocking factor.
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
Principal component analysis on single-cell data.
void subset_pca(const tatami::Matrix< Value_, Index_ > &mat, const SubsetVector_ &subset, const SubsetPcaOptions< EigenVector_ > &options, SubsetPcaResults< EigenMatrix_, EigenVector_ > &output)
Definition subset_pca.hpp:150
void subset_pca_blocked(const tatami::Matrix< Value_, Index_ > &mat, const SubsetVector_ &subset, const Block_ *block, const SubsetPcaBlockedOptions< EigenVector_ > &options, SubsetPcaBlockedResults< EigenMatrix_, EigenVector_ > &output)
Definition subset_pca.hpp:314
std::shared_ptr< const Matrix< Value_, Index_ > > wrap_shared_ptr(const Matrix< Value_, Index_ > *const ptr)
PCA on a gene-by-cell matrix.
Options for blocked_pca().
Definition blocked_pca.hpp:34
bool scale
Definition blocked_pca.hpp:59
int num_threads
Definition blocked_pca.hpp:104
Results of blocked_pca().
Definition blocked_pca.hpp:854
EigenMatrix_ rotation
Definition blocked_pca.hpp:883
EigenVector_ scale
Definition blocked_pca.hpp:896
EigenMatrix_ center
Definition blocked_pca.hpp:890
Options for simple_pca().
Definition simple_pca.hpp:32
bool scale
Definition simple_pca.hpp:56
int num_threads
Definition simple_pca.hpp:76
Results of simple_pca().
Definition simple_pca.hpp:328
EigenMatrix_ rotation
Definition simple_pca.hpp:357
EigenVector_ center
Definition simple_pca.hpp:363
EigenVector_ scale
Definition simple_pca.hpp:369