1#ifndef SCRAN_MODEL_GENE_VARIANCES_H
2#define SCRAN_MODEL_GENE_VARIANCES_H
10#include "tatami_stats/tatami_stats.hpp"
12#include "sanisizer/sanisizer.hpp"
73template<
typename Stat_>
100template<
typename Stat_>
107 template<
typename Ngenes_>
109 means(sanisizer::cast<
decltype(I(
means.size()))>(ngenes)
110#ifdef SCRAN_VARIANCES_TEST_INIT
111 , SCRAN_VARIANCES_TEST_INIT
115#ifdef SCRAN_VARIANCES_TEST_INIT
116 , SCRAN_VARIANCES_TEST_INIT
119 fitted(sanisizer::cast<
decltype(I(
fitted.size()))>(ngenes)
120#ifdef SCRAN_VARIANCES_TEST_INIT
121 , SCRAN_VARIANCES_TEST_INIT
125#ifdef SCRAN_VARIANCES_TEST_INIT
126 , SCRAN_VARIANCES_TEST_INIT
159template<
typename Stat_>
165 std::vector<ModelGeneVariancesBuffers<Stat_> >
per_block;
178template<
typename Stat_>
185 template<
typename Ngenes_,
typename Nblocks_>
188 for (
decltype(I(nblocks)) b = 0; b < nblocks; ++b) {
199 std::vector<ModelGeneVariancesResults<Stat_> >
per_block;
213template<
typename Value_,
typename Index_,
typename Stat_,
typename Block_>
214void compute_variances_dense_row(
217 const Block_*
const block,
218 const std::vector<Index_>& block_size,
219 const int num_threads)
221 const bool blocked = (block != NULL);
222 const auto nblocks = block_size.size();
223 const auto NR = mat.
nrow(), NC = mat.
ncol();
226 auto tmp_means = sanisizer::create<std::vector<Stat_> >(blocked ? nblocks : 0);
227 auto tmp_vars = sanisizer::create<std::vector<Stat_> >(blocked ? nblocks : 0);
231 for (Index_ r = start, end = start + length; r < end; ++r) {
232 auto ptr = ext->fetch(buffer.data());
235 tatami_stats::grouped_variances::direct(
244 static_cast<Index_*
>(NULL)
246 for (
decltype(I(nblocks)) b = 0; b < nblocks; ++b) {
247 buffers[b].means[r] = tmp_means[b];
248 buffers[b].variances[r] = tmp_vars[b];
251 const auto stat = tatami_stats::variances::direct(ptr, NC,
false);
252 buffers[0].means[r] = stat.first;
253 buffers[0].variances[r] = stat.second;
259template<
typename Value_,
typename Index_,
typename Stat_,
typename Block_>
260void compute_variances_sparse_row(
262 const std::vector<ModelGeneVariancesBuffers<Stat_> >& buffers,
263 const Block_*
const block,
264 const std::vector<Index_>& block_size,
265 const int num_threads)
267 const bool blocked = (block != NULL);
268 const auto nblocks = block_size.size();
269 const auto NR = mat.
nrow(), NC = mat.
ncol();
272 auto tmp_means = sanisizer::create<std::vector<Stat_> >(nblocks);
273 auto tmp_vars = sanisizer::create<std::vector<Stat_> >(nblocks);
274 auto tmp_nzero = sanisizer::create<std::vector<Index_> >(nblocks);
284 for (Index_ r = start, end = start + length; r < end; ++r) {
285 auto range = ext->fetch(vbuffer.data(), ibuffer.data());
288 tatami_stats::grouped_variances::direct(
299 static_cast<Index_*
>(NULL)
301 for (
decltype(I(nblocks)) b = 0; b < nblocks; ++b) {
302 buffers[b].means[r] = tmp_means[b];
303 buffers[b].variances[r] = tmp_vars[b];
306 const auto stat = tatami_stats::variances::direct(range.value, range.number, NC,
false);
307 buffers[0].means[r] = stat.first;
308 buffers[0].variances[r] = stat.second;
314template<
typename Value_,
typename Index_,
typename Stat_,
typename Block_>
315void compute_variances_dense_column(
317 const std::vector<ModelGeneVariancesBuffers<Stat_> >& buffers,
318 const Block_*
const block,
319 const std::vector<Index_>& block_size,
320 const int num_threads)
322 const bool blocked = (block != NULL);
323 const auto nblocks = block_size.size();
324 const auto NR = mat.
nrow(), NC = mat.
ncol();
326 tatami::parallelize([&](
const int thread,
const Index_ start,
const Index_ length) ->
void {
330 auto get_var = [&](Index_ b) -> Stat_* {
return buffers[b].variances; };
331 tatami_stats::LocalOutputBuffers<Stat_,
decltype(get_var)> local_vars(thread, nblocks, start, length, std::move(get_var));
332 auto get_mean = [&](Index_ b) -> Stat_* {
return buffers[b].means; };
333 tatami_stats::LocalOutputBuffers<Stat_,
decltype(get_mean)> local_means(thread, nblocks, start, length, std::move(get_mean));
335 std::vector<tatami_stats::variances::RunningDense<Stat_, Value_, Index_> > runners;
336 runners.reserve(nblocks);
337 for (
decltype(I(nblocks)) b = 0; b < nblocks; ++b) {
338 runners.emplace_back(length, local_means.data(b), local_vars.data(b),
false);
342 for (
decltype(I(NC)) c = 0; c < NC; ++c) {
343 auto ptr = ext->fetch(buffer.data());
344 runners[block[c]].add(ptr);
347 for (
decltype(I(NC)) c = 0; c < NC; ++c) {
348 auto ptr = ext->fetch(buffer.data());
353 for (
decltype(I(nblocks)) b = 0; b < nblocks; ++b) {
356 local_vars.transfer();
357 local_means.transfer();
361template<
typename Value_,
typename Index_,
typename Stat_,
typename Block_>
362void compute_variances_sparse_column(
364 const std::vector<ModelGeneVariancesBuffers<Stat_> >& buffers,
365 const Block_*
const block,
366 const std::vector<Index_>& block_size,
367 const int num_threads)
369 const bool blocked = (block != NULL);
370 const auto nblocks = block_size.size();
371 const auto NR = mat.
nrow(), NC = mat.
ncol();
372 auto nonzeros = sanisizer::create<std::vector<std::vector<Index_> > >(
377 tatami::parallelize([&](
const int thread,
const Index_ start,
const Index_ length) ->
void {
386 auto get_var = [&](Index_ b) -> Stat_* {
return buffers[b].variances; };
387 tatami_stats::LocalOutputBuffers<Stat_,
decltype(get_var)> local_vars(thread, nblocks, start, length, std::move(get_var));
388 auto get_mean = [&](Index_ b) -> Stat_* {
return buffers[b].means; };
389 tatami_stats::LocalOutputBuffers<Stat_,
decltype(get_mean)> local_means(thread, nblocks, start, length, std::move(get_mean));
391 std::vector<tatami_stats::variances::RunningSparse<Stat_, Value_, Index_> > runners;
392 runners.reserve(nblocks);
393 for (
decltype(I(nblocks)) b = 0; b < nblocks; ++b) {
394 runners.emplace_back(length, local_means.data(b), local_vars.data(b),
false, start);
398 for (
decltype(I(NC)) c = 0; c < NC; ++c) {
399 auto range = ext->fetch(vbuffer.data(), ibuffer.data());
400 runners[block[c]].add(range.value, range.index, range.number);
403 for (
decltype(I(NC)) c = 0; c < NC; ++c) {
404 auto range = ext->fetch(vbuffer.data(), ibuffer.data());
405 runners[0].add(range.value, range.index, range.number);
409 for (
decltype(I(nblocks)) b = 0; b < nblocks; ++b) {
412 local_vars.transfer();
413 local_means.transfer();
417template<
typename Value_,
typename Index_,
typename Stat_,
typename Block_>
418void compute_variances(
420 const std::vector<ModelGeneVariancesBuffers<Stat_> >& buffers,
421 const Block_*
const block,
422 const std::vector<Index_>& block_size,
423 const int num_threads)
427 compute_variances_sparse_row(mat, buffers, block, block_size, num_threads);
429 compute_variances_dense_row(mat, buffers, block, block_size, num_threads);
433 compute_variances_sparse_column(mat, buffers, block, block_size, num_threads);
435 compute_variances_dense_column(mat, buffers, block, block_size, num_threads);
440template<
typename Index_,
typename Stat_,
class Function_>
443 const std::vector<ModelGeneVariancesBuffers<Stat_> >& per_block,
444 const std::vector<Index_>& block_size,
445 const std::vector<Stat_>& block_weights,
446 const Index_ min_size,
448 std::vector<Stat_*>& tmp_pointers,
449 std::vector<Stat_>& tmp_weights,
456 tmp_pointers.clear();
459 const auto nblocks = per_block.size();
460 for (
decltype(I(nblocks)) b = 0; b < nblocks; ++b) {
461 if (block_size[b] < min_size) {
464 tmp_weights.push_back(block_weights[b]);
465 tmp_pointers.push_back(fun(per_block[b]));
499template<
typename Value_,
typename Index_,
typename Block_,
typename Stat_>
502 const Block_*
const block,
506 const Index_ NR = mat.
nrow(), NC = mat.
ncol();
507 std::vector<Index_> block_size;
510 block_size = tatami_stats::tabulate_groups(block, NC);
513 block_size.push_back(NC);
516 const auto nblocks = block_size.size();
521 for (
decltype(I(nblocks)) b = 0; b < nblocks; ++b) {
522 const auto& current = buffers.
per_block[b];
523 if (block_size[b] >= 2) {
524 fit_variance_trend(NR, current.means, current.variances, current.fitted, current.residuals, work, fopt);
526 std::fill_n(current.fitted, NR, std::numeric_limits<double>::quiet_NaN());
527 std::fill_n(current.residuals, NR, std::numeric_limits<double>::quiet_NaN());
531 const auto ave_means = buffers.
average.means;
532 const auto ave_variances = buffers.
average.variances;
533 const auto ave_fitted = buffers.
average.fitted;
534 const auto ave_residuals = buffers.
average.residuals;
536 if (ave_means || ave_variances || ave_fitted || ave_residuals) {
539 std::vector<Stat_*> tmp_pointers;
540 std::vector<Stat_> tmp_weights;
541 tmp_pointers.reserve(nblocks);
542 tmp_weights.reserve(nblocks);
544 internal::compute_average(
549 static_cast<Index_
>(1),
550 [](
const auto& x) -> Stat_* { return x.means; },
556 internal::compute_average(
561 static_cast<Index_
>(2),
562 [](
const auto& x) -> Stat_* { return x.variances; },
568 internal::compute_average(
573 static_cast<Index_
>(2),
574 [](
const auto& x) -> Stat_* { return x.fitted; },
580 internal::compute_average(
585 static_cast<Index_
>(2),
586 [](
const auto& x) -> Stat_* { return x.residuals; },
611template<
typename Value_,
typename Index_,
typename Stat_>
618 bbuffers.
per_block.emplace_back(std::move(buffers));
621 bbuffers.
average.variances = NULL;
622 bbuffers.
average.fitted = NULL;
623 bbuffers.
average.residuals = NULL;
641template<
typename Stat_ =
double,
typename Value_,
typename Index_>
672template<
typename Stat_ =
double,
typename Value_,
typename Index_,
typename Block_>
674 const auto nblocks = (block ? tatami_stats::total_groups(block, mat.
ncol()) : 1);
678 sanisizer::resize(buffers.
per_block, nblocks);
679 for (
decltype(I(nblocks)) b = 0; b < nblocks; ++b) {
681 current.means = output.
per_block[b].means.data();
682 current.variances = output.
per_block[b].variances.data();
683 current.fitted = output.
per_block[b].fitted.data();
684 current.residuals = output.
per_block[b].residuals.data();
689 buffers.
average.variances = NULL;
691 buffers.
average.residuals = NULL;
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
Fit a mean-variance trend to log-count data.
void average_vectors_weighted(size_t n, std::vector< Stat_ * > in, const Weight_ *w, Output_ *out, bool skip_nan)
void compute_weights(std::size_t num_blocks, const Size_ *sizes, WeightPolicy policy, const VariableWeightParameters &variable, Weight_ *weights)
Variance modelling for single-cell expression data.
Definition choose_highly_variable_genes.hpp:15
void model_gene_variances_blocked(const tatami::Matrix< Value_, Index_ > &mat, const Block_ *const block, const ModelGeneVariancesBlockedBuffers< Stat_ > &buffers, const ModelGeneVariancesOptions &options)
Definition model_gene_variances.hpp:500
void fit_variance_trend(const std::size_t n, const Float_ *const mean, const Float_ *const variance, Float_ *const fitted, Float_ *const residuals, FitVarianceTrendWorkspace< Float_ > &workspace, const FitVarianceTrendOptions &options)
Definition fit_variance_trend.hpp:131
void model_gene_variances(const tatami::Matrix< Value_, Index_ > &mat, ModelGeneVariancesBuffers< Stat_ > buffers, const ModelGeneVariancesOptions &options)
Definition model_gene_variances.hpp:612
void parallelize(Function_ fun, Index_ tasks, int threads)
Container_ create_container_of_Index_size(Index_ x, Args_ &&... args)
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, bool row, Index_ iter_start, Index_ iter_length, Args_ &&... args)
Options for fit_variance_trend().
Definition fit_variance_trend.hpp:24
int num_threads
Definition fit_variance_trend.hpp:82
Workspace for fit_variance_trend().
Definition fit_variance_trend.hpp:91
Buffers for model_gene_variances_blocked().
Definition model_gene_variances.hpp:160
ModelGeneVariancesBuffers< Stat_ > average
Definition model_gene_variances.hpp:171
std::vector< ModelGeneVariancesBuffers< Stat_ > > per_block
Definition model_gene_variances.hpp:165
Results of model_gene_variances_blocked().
Definition model_gene_variances.hpp:179
std::vector< ModelGeneVariancesResults< Stat_ > > per_block
Definition model_gene_variances.hpp:199
ModelGeneVariancesResults< Stat_ > average
Definition model_gene_variances.hpp:205
Buffers for model_gene_variances() and friends.
Definition model_gene_variances.hpp:74
Stat_ * means
Definition model_gene_variances.hpp:78
Stat_ * residuals
Definition model_gene_variances.hpp:93
Stat_ * variances
Definition model_gene_variances.hpp:83
Stat_ * fitted
Definition model_gene_variances.hpp:88
Options for model_gene_variances() and friends.
Definition model_gene_variances.hpp:27
FitVarianceTrendOptions fit_variance_trend_options
Definition model_gene_variances.hpp:31
bool compute_average
Definition model_gene_variances.hpp:56
scran_blocks::VariableWeightParameters variable_block_weight_parameters
Definition model_gene_variances.hpp:50
int num_threads
Definition model_gene_variances.hpp:62
scran_blocks::WeightPolicy block_weight_policy
Definition model_gene_variances.hpp:43
Results of model_gene_variances().
Definition model_gene_variances.hpp:101
std::vector< Stat_ > fitted
Definition model_gene_variances.hpp:147
std::vector< Stat_ > means
Definition model_gene_variances.hpp:137
std::vector< Stat_ > residuals
Definition model_gene_variances.hpp:152
std::vector< Stat_ > variances
Definition model_gene_variances.hpp:142
bool sparse_ordered_index