1#ifndef SCRAN_PCA_SIMPLE_PCA_HPP
2#define SCRAN_PCA_SIMPLE_PCA_HPP
10#include "tatami_stats/tatami_stats.hpp"
14#include "sanisizer/sanisizer.hpp"
81template<
bool sparse_,
typename Value_,
typename Index_,
class EigenVector_>
82void compute_row_means_and_variances(
const tatami::Matrix<Value_, Index_>& mat,
const int num_threads, EigenVector_& center_v, EigenVector_& scale_v) {
83 const auto ngenes = mat.
nrow();
92 const auto ncells = mat.
ncol();
95 for (Index_ g = start, end = start + length; g < end; ++g) {
96 const auto results = [&]{
97 if constexpr(sparse_) {
98 auto range = ext->fetch(vbuffer.data(), NULL);
99 return tatami_stats::variances::direct(range.value, range.number, ncells,
false);
101 auto ptr = ext->fetch(vbuffer.data());
102 return tatami_stats::variances::direct(ptr, ncells,
false);
105 center_v.coeffRef(g) = results.first;
106 scale_v.coeffRef(g) = results.second;
108 }, ngenes, num_threads);
112 const auto ncells = mat.
ncol();
115 typedef typename EigenVector_::Scalar Scalar;
116 tatami_stats::LocalOutputBuffer<Scalar> cbuffer(t, start, length, center_v.data());
117 tatami_stats::LocalOutputBuffer<Scalar> sbuffer(t, start, length, scale_v.data());
120 if constexpr(sparse_) {
121 return tatami_stats::variances::RunningSparse<Scalar, Value_, Index_>(length, cbuffer.data(), sbuffer.data(),
false, start);
123 return tatami_stats::variances::RunningDense<Scalar, Value_, Index_>(length, cbuffer.data(), sbuffer.data(),
false);
129 if constexpr(sparse_) {
136 for (Index_ c = 0; c < ncells; ++c) {
137 if constexpr(sparse_) {
138 const auto range = ext->fetch(vbuffer.data(), ibuffer.data());
139 running.add(range.value, range.index, range.number);
141 const auto ptr = ext->fetch(vbuffer.data());
149 }, ngenes, num_threads);
153template<
class EigenVector_,
class EigenMatrix_>
154std::unique_ptr<irlba::Matrix<EigenVector_, EigenMatrix_> > prepare_deferred_matrix_for_irlba(
156 const SimplePcaOptions& options,
157 const EigenVector_& center_v,
158 const EigenVector_& scale_v
160 std::unique_ptr<irlba::Matrix<EigenVector_, EigenMatrix_> > alt;
161 alt.reset(
new irlba::CenteredMatrix<EigenVector_, EigenMatrix_, I<
decltype(ptr)>, I<
decltype(¢er_v)> >(std::move(ptr), ¢er_v));
165 alt.reset(
new irlba::ScaledMatrix<EigenVector_, EigenMatrix_, I<
decltype(ptr)>, I<
decltype(&scale_v)> >(std::move(ptr), &scale_v,
true,
true));
172template<
class EigenMatrix_,
typename Value_,
typename Index_,
class EigenVector_>
173std::unique_ptr<irlba::Matrix<EigenVector_, EigenMatrix_> > prepare_sparse_matrix_for_irlba(
175 const SimplePcaOptions& options,
176 EigenVector_& center_v,
177 EigenVector_& scale_v,
178 typename EigenVector_::Scalar& total_var
180 const auto ngenes = mat.
nrow();
181 sanisizer::resize(center_v, ngenes);
182 sanisizer::resize(scale_v, ngenes);
183 std::unique_ptr<irlba::Matrix<EigenVector_, EigenMatrix_> > output;
185 if (options.realize_matrix) {
199 const Index_ ncells = mat.
ncol();
203 I<
decltype(extracted.value)>,
204 I<
decltype(extracted.index)>,
205 I<
decltype(extracted.pointers)>
209 std::move(extracted.value),
210 std::move(extracted.index),
211 std::move(extracted.pointers),
215 output.reset(sparse_ptr);
218 const auto& pointers = sparse_ptr->get_pointers();
219 const auto& values = sparse_ptr->get_values();
220 for (Index_ g = start, end = start + length; g < end; ++g) {
221 const auto offset = pointers[g];
222 const auto next_offset = pointers[g + 1];
223 const Index_ num_nonzero = next_offset - offset;
224 const auto results = tatami_stats::variances::direct(values.data() + offset, num_nonzero, ncells,
false);
225 center_v.coeffRef(g) = results.first;
226 scale_v.coeffRef(g) = results.second;
228 }, ngenes, options.num_threads);
230 total_var = process_scale_vector(options.scale, scale_v);
233 compute_row_means_and_variances<true>(mat, options.num_threads, center_v, scale_v);
234 total_var = process_scale_vector(options.scale, scale_v);
236 output.reset(
new TransposedTatamiWrapperMatrix<EigenVector_, EigenMatrix_, Value_, Index_>(mat, options.num_threads));
239 return prepare_deferred_matrix_for_irlba(std::move(output), options, center_v, scale_v);
242template<
class EigenMatrix_,
typename Value_,
typename Index_,
class EigenVector_>
243std::unique_ptr<irlba::Matrix<EigenVector_, EigenMatrix_> > prepare_dense_matrix_for_irlba(
245 const SimplePcaOptions& options,
246 EigenVector_& center_v,
247 EigenVector_& scale_v,
248 typename EigenVector_::Scalar& total_var
250 const Index_ ngenes = mat.
nrow();
251 sanisizer::resize(center_v, ngenes);
252 sanisizer::resize(scale_v, ngenes);
254 if (options.realize_matrix) {
256 const Index_ ncells = mat.
ncol();
257 auto emat = std::make_unique<EigenMatrix_>(
258 sanisizer::cast<I<
decltype(std::declval<EigenMatrix_>().rows())> >(ncells),
259 sanisizer::cast<I<
decltype(std::declval<EigenMatrix_>().cols())> >(ngenes)
264 static_assert(!EigenMatrix_::IsRowMajor);
270 tatami::ConvertToDenseOptions opt;
271 opt.num_threads = options.num_threads;
276 center_v.array() = emat->array().colwise().sum();
280 std::fill(center_v.begin(), center_v.end(), std::numeric_limits<typename EigenVector_::Scalar>::quiet_NaN());
282 emat->array().rowwise() -= center_v.adjoint().array();
284 scale_v.array() = emat->array().colwise().squaredNorm();
286 scale_v /= ncells - 1;
288 std::fill(scale_v.begin(), scale_v.end(), std::numeric_limits<typename EigenVector_::Scalar>::quiet_NaN());
291 total_var = process_scale_vector(options.scale, scale_v);
293 emat->array().rowwise() /= scale_v.adjoint().array();
296 return std::unique_ptr<irlba::Matrix<EigenVector_, EigenMatrix_> >(
301 compute_row_means_and_variances<false>(mat, options.num_threads, center_v, scale_v);
302 total_var = process_scale_vector(options.scale, scale_v);
304 std::unique_ptr<irlba::Matrix<EigenVector_, EigenMatrix_> > output(
305 new TransposedTatamiWrapperMatrix<EigenVector_, EigenMatrix_, Value_, Index_>(mat, options.num_threads)
307 return prepare_deferred_matrix_for_irlba(std::move(output), options, center_v, scale_v);
319template<
typename EigenMatrix_,
typename EigenVector_>
372template<
typename Value_,
typename Index_,
typename EigenMatrix_,
class EigenVector_,
class SubsetFunction_>
373void simple_pca_internal(
377 SubsetFunction_ subset_fun
381 std::unique_ptr<irlba::Matrix<EigenVector_, EigenMatrix_> > ptr;
398 if (!options.
scale) {
399 output.
scale = EigenVector_();
429template<
typename Value_,
typename Index_,
typename EigenMatrix_,
class EigenVector_>
435 [](
const EigenMatrix_&,
const EigenVector_&) ->
void {}
454template<
typename EigenMatrix_ = Eigen::MatrixXd,
class EigenVector_ = Eigen::VectorXd,
typename Value_,
typename Index_>
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
std::pair< bool, int > compute(const Matrix_ &matrix, const Eigen::Index number, EigenMatrix_ &outU, EigenMatrix_ &outV, EigenVector_ &outD, const Options< EigenVector_ > &options)
Principal component analysis on single-cell data.
void simple_pca(const tatami::Matrix< Value_, Index_ > &mat, const SimplePcaOptions &options, SimplePcaResults< EigenMatrix_, EigenVector_ > &output)
Definition simple_pca.hpp:430
void parallelize(Function_ fun, const Index_ tasks, const int threads)
CompressedSparseContents< StoredValue_, StoredIndex_, StoredPointer_ > retrieve_compressed_sparse_contents(const Matrix< InputValue_, InputIndex_ > &matrix, const bool row, const RetrieveCompressedSparseContentsOptions &options)
void convert_to_dense(const Matrix< InputValue_, InputIndex_ > &matrix, const bool row_major, StoredValue_ *const store, const ConvertToDenseOptions &options)
Container_ create_container_of_Index_size(const Index_ x, Args_ &&... args)
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, const bool row, const Index_ iter_start, const Index_ iter_length, Args_ &&... args)
Options for simple_pca().
Definition simple_pca.hpp:28
bool realize_matrix
Definition simple_pca.hpp:64
irlba::Options< Eigen::VectorXd > irlba_options
Definition simple_pca.hpp:75
bool transpose
Definition simple_pca.hpp:58
int num_threads
Definition simple_pca.hpp:70
bool scale
Definition simple_pca.hpp:52
int number
Definition simple_pca.hpp:45
Results of simple_pca().
Definition simple_pca.hpp:320
EigenMatrix_ components
Definition simple_pca.hpp:329
EigenMatrix_ rotation
Definition simple_pca.hpp:349
bool converged
Definition simple_pca.hpp:366
EigenVector_ center
Definition simple_pca.hpp:355
EigenVector_ variance_explained
Definition simple_pca.hpp:336
EigenVector_ scale
Definition simple_pca.hpp:361
EigenVector_::Scalar total_variance
Definition simple_pca.hpp:342
bool sparse_extract_index