scran_pca
Principal component analysis for single-cell data
Loading...
Searching...
No Matches
simple_pca.hpp
Go to the documentation of this file.
1#ifndef SCRAN_PCA_SIMPLE_PCA_HPP
2#define SCRAN_PCA_SIMPLE_PCA_HPP
3
4#include "tatami/tatami.hpp"
5#include "tatami_stats/tatami_stats.hpp"
6#include "irlba/irlba.hpp"
7#include "irlba/parallel.hpp"
8#include "Eigen/Dense"
9
10#include <vector>
11#include <type_traits>
12#include <algorithm>
13
14#include "utils.hpp"
15
21namespace scran_pca {
22
32 }
42 int number = 25;
43
48 bool scale = false;
49
54 bool transpose = true;
55
60 int num_threads = 1;
61
66 bool realize_matrix = true;
67
72};
73
77namespace internal {
78
79template<bool sparse_, typename Value_, typename Index_, class EigenVector_>
80void compute_row_means_and_variances(const tatami::Matrix<Value_, Index_>& mat, int num_threads, EigenVector_& center_v, EigenVector_& scale_v) {
81 if (mat.prefer_rows()) {
82 tatami::parallelize([&](size_t, Index_ start, Index_ length) -> void {
84 opt.sparse_extract_index = false;
85 auto ext = tatami::consecutive_extractor<sparse_>(&mat, true, start, length, opt);
86 auto ncells = mat.ncol();
87 std::vector<Value_> vbuffer(ncells);
88
89 for (Index_ r = start, end = start + length; r < end; ++r) {
90 auto results = [&]() {
91 if constexpr(sparse_) {
92 auto range = ext->fetch(vbuffer.data(), NULL);
93 return tatami_stats::variances::direct(range.value, range.number, ncells, /* skip_nan = */ false);
94 } else {
95 auto ptr = ext->fetch(vbuffer.data());
96 return tatami_stats::variances::direct(ptr, ncells, /* skip_nan = */ false);
97 }
98 }();
99 center_v.coeffRef(r) = results.first;
100 scale_v.coeffRef(r) = results.second;
101 }
102 }, mat.nrow(), num_threads);
103
104 } else {
105 tatami::parallelize([&](size_t t, Index_ start, Index_ length) -> void {
106 tatami::Options opt;
107 auto ncells = mat.ncol();
108 auto ext = tatami::consecutive_extractor<sparse_>(&mat, false, static_cast<Index_>(0), ncells, start, length, opt);
109
110 typedef typename EigenVector_::Scalar Scalar;
111 tatami_stats::LocalOutputBuffer<Scalar> cbuffer(t, start, length, center_v.data());
112 tatami_stats::LocalOutputBuffer<Scalar> sbuffer(t, start, length, scale_v.data());
113
114 auto running = [&]() {
115 if constexpr(sparse_) {
116 return tatami_stats::variances::RunningSparse<Scalar, Value_, Index_>(length, cbuffer.data(), sbuffer.data(), /* skip_nan = */ false, /* subtract = */ start);
117 } else {
118 return tatami_stats::variances::RunningDense<Scalar, Value_, Index_>(length, cbuffer.data(), sbuffer.data(), /* skip_nan = */ false);
119 }
120 }();
121
122 std::vector<Value_> vbuffer(length);
123 typename std::conditional<sparse_, std::vector<Index_>, Index_>::type ibuffer(length);
124 for (Index_ r = 0; r < ncells; ++r) {
125 if constexpr(sparse_) {
126 auto range = ext->fetch(vbuffer.data(), ibuffer.data());
127 running.add(range.value, range.index, range.number);
128 } else {
129 auto ptr = ext->fetch(vbuffer.data());
130 running.add(ptr);
131 }
132 }
133
134 running.finish();
135 cbuffer.transfer();
136 sbuffer.transfer();
137 }, mat.nrow(), num_threads);
138 }
139}
140
141template<class IrlbaMatrix_, class EigenMatrix_, class EigenVector_>
142auto run_irlba_deferred(
143 const IrlbaMatrix_& mat,
144 const SimplePcaOptions& options,
145 EigenMatrix_& components,
146 EigenMatrix_& rotation,
147 EigenVector_& variance_explained,
148 EigenVector_& center_v,
149 EigenVector_& scale_v)
150{
151 irlba::Centered<IrlbaMatrix_, EigenVector_> centered(mat, center_v);
152 if (options.scale) {
153 irlba::Scaled<true, decltype(centered), EigenVector_> scaled(centered, scale_v, true);
154 return irlba::compute(scaled, options.number, components, rotation, variance_explained, options.irlba_options);
155 } else {
156 return irlba::compute(centered, options.number, components, rotation, variance_explained, options.irlba_options);
157 }
158}
159
160template<typename Value_, typename Index_, class EigenMatrix_, class EigenVector_>
161void run_sparse(
163 const SimplePcaOptions& options,
164 EigenMatrix_& components,
165 EigenMatrix_& rotation,
166 EigenVector_& variance_explained,
167 EigenVector_& center_v,
168 EigenVector_& scale_v,
169 typename EigenVector_::Scalar& total_var,
170 bool& converged)
171{
172 Index_ ngenes = mat.nrow();
173 center_v.resize(ngenes);
174 scale_v.resize(ngenes);
175
176 if (options.realize_matrix) {
177 // 'extracted' contains row-major contents...
179 &mat,
180 /* row = */ true,
181 /* two_pass = */ false,
182 /* threads = */ options.num_threads
183 );
184
185 // But we effectively transpose it to CSC with genes in columns.
186 Index_ ncells = mat.ncol();
188 ncells,
189 ngenes,
190 std::move(extracted.value),
191 std::move(extracted.index),
192 std::move(extracted.pointers),
193 true,
194 options.num_threads
195 );
196
197 tatami::parallelize([&](size_t, size_t start, size_t length) -> void {
198 const auto& ptrs = emat.get_pointers();
199 const auto& values = emat.get_values();
200 for (size_t r = start, end = start + length; r < end; ++r) {
201 auto offset = ptrs[r];
202 Index_ num_nonzero = ptrs[r + 1] - offset;
203 auto results = tatami_stats::variances::direct(values.data() + offset, num_nonzero, ncells, /* skip_nan = */ false);
204 center_v.coeffRef(r) = results.first;
205 scale_v.coeffRef(r) = results.second;
206 }
207 }, ngenes, options.num_threads);
208
209 total_var = internal::process_scale_vector(options.scale, scale_v);
210 auto out = run_irlba_deferred(emat, options, components, rotation, variance_explained, center_v, scale_v);
211 converged = out.first;
212
213 } else {
214 compute_row_means_and_variances<true>(mat, options.num_threads, center_v, scale_v);
215 total_var = internal::process_scale_vector(options.scale, scale_v);
216 auto out = run_irlba_deferred(
217 internal::TransposedTatamiWrapper<EigenVector_, Value_, Index_>(mat, options.num_threads),
218 options,
219 components,
220 rotation,
221 variance_explained,
222 center_v,
223 scale_v
224 );
225 converged = out.first;
226 }
227}
228
229template<typename Value_, typename Index_, class EigenMatrix_, class EigenVector_>
230void run_dense(
232 const SimplePcaOptions& options,
233 EigenMatrix_& components,
234 EigenMatrix_& rotation,
235 EigenVector_& variance_explained,
236 EigenVector_& center_v,
237 EigenVector_& scale_v,
238 typename EigenVector_::Scalar& total_var,
239 bool& converged)
240{
241 Index_ ngenes = mat.nrow();
242 center_v.resize(ngenes);
243 scale_v.resize(ngenes);
244
245 if (options.realize_matrix) {
246 // Create a matrix with genes in columns.
247 Index_ ncells = mat.ncol();
248 EigenMatrix_ emat(ncells, ngenes);
249
250 // If emat is row-major, we want to fill it with columns of 'mat', so row_major = false.
251 // If emat is column-major, we want to fill it with rows of 'mat', so row_major = true.
252 tatami::convert_to_dense(&mat, /* row_major = */ !emat.IsRowMajor, emat.data(), options.num_threads);
253
254 center_v.array() = emat.array().colwise().sum();
255 if (ncells) {
256 center_v /= ncells;
257 } else {
258 std::fill(center_v.begin(), center_v.end(), std::numeric_limits<typename EigenVector_::Scalar>::quiet_NaN());
259 }
260 emat.array().rowwise() -= center_v.adjoint().array(); // applying it to avoid wasting time with deferred operations inside IRLBA.
261
262 scale_v.array() = emat.array().colwise().squaredNorm();
263 if (ncells > 1) {
264 scale_v /= ncells - 1;
265 } else {
266 std::fill(scale_v.begin(), scale_v.end(), std::numeric_limits<typename EigenVector_::Scalar>::quiet_NaN());
267 }
268
269 total_var = internal::process_scale_vector(options.scale, scale_v);
270 if (options.scale) {
271 emat.array().rowwise() /= scale_v.adjoint().array();
272 }
273
274 auto out = irlba::compute(emat, options.number, components, rotation, variance_explained, options.irlba_options);
275 converged = out.first;
276
277 } else {
278 compute_row_means_and_variances<false>(mat, options.num_threads, center_v, scale_v);
279 total_var = internal::process_scale_vector(options.scale, scale_v);
280 auto out = run_irlba_deferred(
281 internal::TransposedTatamiWrapper<EigenVector_, Value_, Index_>(mat, options.num_threads),
282 options,
283 components,
284 rotation,
285 variance_explained,
286 center_v,
287 scale_v
288 );
289 converged = out.first;
290 }
291}
292
293}
303template<typename EigenMatrix_, typename EigenVector_>
311 EigenMatrix_ components;
312
317 EigenVector_ variance_explained;
318
323 typename EigenVector_::Scalar total_variance = 0;
324
330 EigenMatrix_ rotation;
331
336 EigenVector_ center;
337
342 EigenVector_ scale;
343
347 bool converged = false;
348};
349
373template<typename Value_, typename Index_, typename EigenMatrix_, class EigenVector_>
376
377 if (mat.sparse()) {
378 internal::run_sparse(mat, options, output.components, output.rotation, output.variance_explained, output.center, output.scale, output.total_variance, output.converged);
379 } else {
380 internal::run_dense(mat, options, output.components, output.rotation, output.variance_explained, output.center, output.scale, output.total_variance, output.converged);
381 }
382
383 internal::clean_up(mat.ncol(), output.components, output.variance_explained);
384 if (options.transpose) {
385 output.components.adjointInPlace();
386 }
387
388 if (!options.scale) {
389 output.scale = EigenVector_();
390 }
391}
392
408template<typename EigenMatrix_ = Eigen::MatrixXd, class EigenVector_ = Eigen::VectorXd, typename Value_, typename Index_>
414
415}
416
417#endif
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
std::pair< bool, int > compute(const Matrix_ &matrix, Eigen::Index number, EigenMatrix_ &outU, EigenMatrix_ &outV, EigenVector_ &outD, const Options &options)
Principal component analysis on single-cell data.
void simple_pca(const tatami::Matrix< Value_, Index_ > &mat, const SimplePcaOptions &options, SimplePcaResults< EigenMatrix_, EigenVector_ > &output)
Definition simple_pca.hpp:374
void parallelize(Function_ fun, Index_ tasks, int threads)
void convert_to_dense(const Matrix< InputValue_, InputIndex_ > *matrix, bool row_major, StoredValue_ *store, int threads=1)
auto consecutive_extractor(const Matrix< Value_, Index_ > *mat, bool row, Index_ iter_start, Index_ iter_length, Args_ &&... args)
Options for simple_pca().
Definition simple_pca.hpp:26
bool realize_matrix
Definition simple_pca.hpp:66
bool transpose
Definition simple_pca.hpp:54
int num_threads
Definition simple_pca.hpp:60
irlba::Options irlba_options
Definition simple_pca.hpp:71
bool scale
Definition simple_pca.hpp:48
int number
Definition simple_pca.hpp:42
Results of simple_pca().
Definition simple_pca.hpp:304
EigenMatrix_ components
Definition simple_pca.hpp:311
EigenMatrix_ rotation
Definition simple_pca.hpp:330
bool converged
Definition simple_pca.hpp:347
EigenVector_ center
Definition simple_pca.hpp:336
EigenVector_ variance_explained
Definition simple_pca.hpp:317
EigenVector_ scale
Definition simple_pca.hpp:342
EigenVector_::Scalar total_variance
Definition simple_pca.hpp:323
bool sparse_extract_index