1#ifndef SCRAN_PCA_SIMPLE_PCA_HPP
2#define SCRAN_PCA_SIMPLE_PCA_HPP
5#include "tatami_stats/tatami_stats.hpp"
79template<
bool sparse_,
typename Value_,
typename Index_,
class EigenVector_>
86 auto ncells = mat.
ncol();
87 std::vector<Value_> vbuffer(ncells);
89 for (Index_ r = start, end = start + length; r < end; ++r) {
90 auto results = [&]() {
91 if constexpr(sparse_) {
92 auto range = ext->fetch(vbuffer.data(), NULL);
93 return tatami_stats::variances::direct(range.value, range.number, ncells,
false);
95 auto ptr = ext->fetch(vbuffer.data());
96 return tatami_stats::variances::direct(ptr, ncells,
false);
99 center_v.coeffRef(r) = results.first;
100 scale_v.coeffRef(r) = results.second;
102 }, mat.
nrow(), num_threads);
107 auto ncells = mat.
ncol();
110 typedef typename EigenVector_::Scalar Scalar;
111 tatami_stats::LocalOutputBuffer<Scalar> cbuffer(t, start, length, center_v.data());
112 tatami_stats::LocalOutputBuffer<Scalar> sbuffer(t, start, length, scale_v.data());
114 auto running = [&]() {
115 if constexpr(sparse_) {
116 return tatami_stats::variances::RunningSparse<Scalar, Value_, Index_>(length, cbuffer.data(), sbuffer.data(),
false, start);
118 return tatami_stats::variances::RunningDense<Scalar, Value_, Index_>(length, cbuffer.data(), sbuffer.data(),
false);
122 std::vector<Value_> vbuffer(length);
123 typename std::conditional<sparse_, std::vector<Index_>, Index_>::type ibuffer(length);
124 for (Index_ r = 0; r < ncells; ++r) {
125 if constexpr(sparse_) {
126 auto range = ext->fetch(vbuffer.data(), ibuffer.data());
127 running.add(range.value, range.index, range.number);
129 auto ptr = ext->fetch(vbuffer.data());
137 }, mat.
nrow(), num_threads);
141template<
class IrlbaMatrix_,
class EigenMatrix_,
class EigenVector_>
142auto run_irlba_deferred(
143 const IrlbaMatrix_& mat,
144 const SimplePcaOptions& options,
145 EigenMatrix_& components,
146 EigenMatrix_& rotation,
147 EigenVector_& variance_explained,
148 EigenVector_& center_v,
149 EigenVector_& scale_v)
153 irlba::Scaled<
true,
decltype(centered), EigenVector_> scaled(centered, scale_v,
true);
154 return irlba::compute(scaled, options.number, components, rotation, variance_explained, options.irlba_options);
156 return irlba::compute(centered, options.number, components, rotation, variance_explained, options.irlba_options);
160template<
typename Value_,
typename Index_,
class EigenMatrix_,
class EigenVector_>
163 const SimplePcaOptions& options,
164 EigenMatrix_& components,
165 EigenMatrix_& rotation,
166 EigenVector_& variance_explained,
167 EigenVector_& center_v,
168 EigenVector_& scale_v,
169 typename EigenVector_::Scalar& total_var,
172 Index_ ngenes = mat.
nrow();
173 center_v.resize(ngenes);
174 scale_v.resize(ngenes);
176 if (options.realize_matrix) {
186 Index_ ncells = mat.
ncol();
190 std::move(extracted.value),
191 std::move(extracted.index),
192 std::move(extracted.pointers),
198 const auto& ptrs = emat.get_pointers();
199 const auto& values = emat.get_values();
200 for (
size_t r = start, end = start + length; r < end; ++r) {
201 auto offset = ptrs[r];
202 Index_ num_nonzero = ptrs[r + 1] - offset;
203 auto results = tatami_stats::variances::direct(values.data() + offset, num_nonzero, ncells,
false);
204 center_v.coeffRef(r) = results.first;
205 scale_v.coeffRef(r) = results.second;
207 }, ngenes, options.num_threads);
209 total_var = internal::process_scale_vector(options.scale, scale_v);
210 auto out = run_irlba_deferred(emat, options, components, rotation, variance_explained, center_v, scale_v);
211 converged = out.first;
214 compute_row_means_and_variances<true>(mat, options.num_threads, center_v, scale_v);
215 total_var = internal::process_scale_vector(options.scale, scale_v);
216 auto out = run_irlba_deferred(
217 internal::TransposedTatamiWrapper<EigenVector_, Value_, Index_>(mat, options.num_threads),
225 converged = out.first;
229template<
typename Value_,
typename Index_,
class EigenMatrix_,
class EigenVector_>
232 const SimplePcaOptions& options,
233 EigenMatrix_& components,
234 EigenMatrix_& rotation,
235 EigenVector_& variance_explained,
236 EigenVector_& center_v,
237 EigenVector_& scale_v,
238 typename EigenVector_::Scalar& total_var,
241 Index_ ngenes = mat.
nrow();
242 center_v.resize(ngenes);
243 scale_v.resize(ngenes);
245 if (options.realize_matrix) {
247 Index_ ncells = mat.
ncol();
248 EigenMatrix_ emat(ncells, ngenes);
254 center_v.array() = emat.array().colwise().sum();
258 std::fill(center_v.begin(), center_v.end(), std::numeric_limits<typename EigenVector_::Scalar>::quiet_NaN());
260 emat.array().rowwise() -= center_v.adjoint().array();
262 scale_v.array() = emat.array().colwise().squaredNorm();
264 scale_v /= ncells - 1;
266 std::fill(scale_v.begin(), scale_v.end(), std::numeric_limits<typename EigenVector_::Scalar>::quiet_NaN());
269 total_var = internal::process_scale_vector(options.scale, scale_v);
271 emat.array().rowwise() /= scale_v.adjoint().array();
274 auto out =
irlba::compute(emat, options.number, components, rotation, variance_explained, options.irlba_options);
275 converged = out.first;
278 compute_row_means_and_variances<false>(mat, options.num_threads, center_v, scale_v);
279 total_var = internal::process_scale_vector(options.scale, scale_v);
280 auto out = run_irlba_deferred(
281 internal::TransposedTatamiWrapper<EigenVector_, Value_, Index_>(mat, options.num_threads),
289 converged = out.first;
303template<
typename EigenMatrix_,
typename EigenVector_>
373template<
typename Value_,
typename Index_,
typename EigenMatrix_,
class EigenVector_>
388 if (!options.
scale) {
389 output.
scale = EigenVector_();
408template<
typename EigenMatrix_ = Eigen::MatrixXd,
class EigenVector_ = Eigen::VectorXd,
typename Value_,
typename Index_>
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
std::pair< bool, int > compute(const Matrix_ &matrix, Eigen::Index number, EigenMatrix_ &outU, EigenMatrix_ &outV, EigenVector_ &outD, const Options &options)
Principal component analysis on single-cell data.
void simple_pca(const tatami::Matrix< Value_, Index_ > &mat, const SimplePcaOptions &options, SimplePcaResults< EigenMatrix_, EigenVector_ > &output)
Definition simple_pca.hpp:374
void parallelize(Function_ fun, Index_ tasks, int threads)
void convert_to_dense(const Matrix< InputValue_, InputIndex_ > *matrix, bool row_major, StoredValue_ *store, int threads=1)
auto consecutive_extractor(const Matrix< Value_, Index_ > *mat, bool row, Index_ iter_start, Index_ iter_length, Args_ &&... args)
Options for simple_pca().
Definition simple_pca.hpp:26
bool realize_matrix
Definition simple_pca.hpp:66
bool transpose
Definition simple_pca.hpp:54
int num_threads
Definition simple_pca.hpp:60
irlba::Options irlba_options
Definition simple_pca.hpp:71
bool scale
Definition simple_pca.hpp:48
int number
Definition simple_pca.hpp:42
Results of simple_pca().
Definition simple_pca.hpp:304
EigenMatrix_ components
Definition simple_pca.hpp:311
EigenMatrix_ rotation
Definition simple_pca.hpp:330
bool converged
Definition simple_pca.hpp:347
EigenVector_ center
Definition simple_pca.hpp:336
EigenVector_ variance_explained
Definition simple_pca.hpp:317
EigenVector_ scale
Definition simple_pca.hpp:342
EigenVector_::Scalar total_variance
Definition simple_pca.hpp:323
bool sparse_extract_index