1#ifndef SCRAN_MODEL_GENE_VARIANCES_H
2#define SCRAN_MODEL_GENE_VARIANCES_H
10#include "tatami_stats/tatami_stats.hpp"
12#include "sanisizer/sanisizer.hpp"
72 bool compute_average =
true;
99template<
typename Stat_>
126template<
typename Stat_>
134 means(sanisizer::cast<I<
decltype(
means.size())> >(ngenes)
135#ifdef SCRAN_VARIANCES_TEST_INIT
136 , SCRAN_VARIANCES_TEST_INIT
140#ifdef SCRAN_VARIANCES_TEST_INIT
141 , SCRAN_VARIANCES_TEST_INIT
144 fitted(sanisizer::cast<I<
decltype(
fitted.size())> >(ngenes)
145#ifdef SCRAN_VARIANCES_TEST_INIT
146 , SCRAN_VARIANCES_TEST_INIT
150#ifdef SCRAN_VARIANCES_TEST_INIT
151 , SCRAN_VARIANCES_TEST_INIT
184template<
typename Stat_>
190 std::vector<ModelGeneVariancesBuffers<Stat_> >
per_block;
203template<
typename Stat_>
211 average(do_average ? ngenes : 0)
214 for (I<
decltype(nblocks)> b = 0; b < nblocks; ++b) {
225 std::vector<ModelGeneVariancesResults<Stat_> >
per_block;
239template<
typename Value_,
typename Index_,
typename Stat_,
typename Block_>
240void compute_variances_dense_row(
243 const Block_*
const block,
244 const std::vector<Index_>& block_size,
245 const int num_threads)
247 const bool blocked = (block != NULL);
248 const auto nblocks = block_size.size();
249 const auto NR = mat.
nrow(), NC = mat.
ncol();
252 auto tmp_means = sanisizer::create<std::vector<Stat_> >(blocked ? nblocks : 0);
253 auto tmp_vars = sanisizer::create<std::vector<Stat_> >(blocked ? nblocks : 0);
257 for (Index_ r = start, end = start + length; r < end; ++r) {
258 auto ptr = ext->fetch(buffer.data());
261 tatami_stats::grouped_variances::direct(
270 static_cast<Index_*
>(NULL)
272 for (I<
decltype(nblocks)> b = 0; b < nblocks; ++b) {
273 buffers[b].means[r] = tmp_means[b];
274 buffers[b].variances[r] = tmp_vars[b];
277 const auto stat = tatami_stats::variances::direct(ptr, NC,
false);
278 buffers[0].means[r] = stat.first;
279 buffers[0].variances[r] = stat.second;
285template<
typename Value_,
typename Index_,
typename Stat_,
typename Block_>
286void compute_variances_sparse_row(
288 const std::vector<ModelGeneVariancesBuffers<Stat_> >& buffers,
289 const Block_*
const block,
290 const std::vector<Index_>& block_size,
291 const int num_threads)
293 const bool blocked = (block != NULL);
294 const auto nblocks = block_size.size();
295 const auto NR = mat.
nrow(), NC = mat.
ncol();
298 auto tmp_means = sanisizer::create<std::vector<Stat_> >(nblocks);
299 auto tmp_vars = sanisizer::create<std::vector<Stat_> >(nblocks);
300 auto tmp_nzero = sanisizer::create<std::vector<Index_> >(nblocks);
310 for (Index_ r = start, end = start + length; r < end; ++r) {
311 auto range = ext->fetch(vbuffer.data(), ibuffer.data());
314 tatami_stats::grouped_variances::direct(
325 static_cast<Index_*
>(NULL)
327 for (I<
decltype(nblocks)> b = 0; b < nblocks; ++b) {
328 buffers[b].means[r] = tmp_means[b];
329 buffers[b].variances[r] = tmp_vars[b];
332 const auto stat = tatami_stats::variances::direct(range.value, range.number, NC,
false);
333 buffers[0].means[r] = stat.first;
334 buffers[0].variances[r] = stat.second;
340template<
typename Value_,
typename Index_,
typename Stat_,
typename Block_>
341void compute_variances_dense_column(
343 const std::vector<ModelGeneVariancesBuffers<Stat_> >& buffers,
344 const Block_*
const block,
345 const std::vector<Index_>& block_size,
346 const int num_threads)
348 const bool blocked = (block != NULL);
349 const auto nblocks = block_size.size();
350 const auto NR = mat.
nrow(), NC = mat.
ncol();
352 tatami::parallelize([&](
const int thread,
const Index_ start,
const Index_ length) ->
void {
356 auto get_var = [&](Index_ b) -> Stat_* {
return buffers[b].variances; };
357 tatami_stats::LocalOutputBuffers<Stat_,
decltype(get_var)> local_vars(thread, nblocks, start, length, std::move(get_var));
358 auto get_mean = [&](Index_ b) -> Stat_* {
return buffers[b].means; };
359 tatami_stats::LocalOutputBuffers<Stat_,
decltype(get_mean)> local_means(thread, nblocks, start, length, std::move(get_mean));
361 std::vector<tatami_stats::variances::RunningDense<Stat_, Value_, Index_> > runners;
362 runners.reserve(nblocks);
363 for (I<
decltype(nblocks)> b = 0; b < nblocks; ++b) {
364 runners.emplace_back(length, local_means.data(b), local_vars.data(b),
false);
368 for (I<
decltype(NC)> c = 0; c < NC; ++c) {
369 auto ptr = ext->fetch(buffer.data());
370 runners[block[c]].add(ptr);
373 for (I<
decltype(NC)> c = 0; c < NC; ++c) {
374 auto ptr = ext->fetch(buffer.data());
379 for (I<
decltype(nblocks)> b = 0; b < nblocks; ++b) {
382 local_vars.transfer();
383 local_means.transfer();
387template<
typename Value_,
typename Index_,
typename Stat_,
typename Block_>
388void compute_variances_sparse_column(
390 const std::vector<ModelGeneVariancesBuffers<Stat_> >& buffers,
391 const Block_*
const block,
392 const std::vector<Index_>& block_size,
393 const int num_threads)
395 const bool blocked = (block != NULL);
396 const auto nblocks = block_size.size();
397 const auto NR = mat.
nrow(), NC = mat.
ncol();
398 auto nonzeros = sanisizer::create<std::vector<std::vector<Index_> > >(
403 tatami::parallelize([&](
const int thread,
const Index_ start,
const Index_ length) ->
void {
412 auto get_var = [&](Index_ b) -> Stat_* {
return buffers[b].variances; };
413 tatami_stats::LocalOutputBuffers<Stat_,
decltype(get_var)> local_vars(thread, nblocks, start, length, std::move(get_var));
414 auto get_mean = [&](Index_ b) -> Stat_* {
return buffers[b].means; };
415 tatami_stats::LocalOutputBuffers<Stat_,
decltype(get_mean)> local_means(thread, nblocks, start, length, std::move(get_mean));
417 std::vector<tatami_stats::variances::RunningSparse<Stat_, Value_, Index_> > runners;
418 runners.reserve(nblocks);
419 for (I<
decltype(nblocks)> b = 0; b < nblocks; ++b) {
420 runners.emplace_back(length, local_means.data(b), local_vars.data(b),
false, start);
424 for (I<
decltype(NC)> c = 0; c < NC; ++c) {
425 auto range = ext->fetch(vbuffer.data(), ibuffer.data());
426 runners[block[c]].add(range.value, range.index, range.number);
429 for (I<
decltype(NC)> c = 0; c < NC; ++c) {
430 auto range = ext->fetch(vbuffer.data(), ibuffer.data());
431 runners[0].add(range.value, range.index, range.number);
435 for (I<
decltype(nblocks)> b = 0; b < nblocks; ++b) {
438 local_vars.transfer();
439 local_means.transfer();
443template<
typename Value_,
typename Index_,
typename Stat_,
typename Block_>
444void compute_variances(
446 const std::vector<ModelGeneVariancesBuffers<Stat_> >& buffers,
447 const Block_*
const block,
448 const std::vector<Index_>& block_size,
449 const int num_threads)
453 compute_variances_sparse_row(mat, buffers, block, block_size, num_threads);
455 compute_variances_dense_row(mat, buffers, block, block_size, num_threads);
459 compute_variances_sparse_column(mat, buffers, block, block_size, num_threads);
461 compute_variances_dense_column(mat, buffers, block, block_size, num_threads);
466template<
typename Stat_,
typename Index_>
468 const std::vector<Stat_>& block_weights,
469 const std::vector<Index_>& block_size,
470 const Index_ min_size,
471 std::vector<Stat_>& tmp_weights
473 const auto nblocks = block_weights.size();
475 for (I<
decltype(nblocks)> b = 0; b < nblocks; ++b) {
476 if (block_size[b] < min_size) {
479 tmp_weights.push_back(block_weights[b]);
483template<
typename Stat_,
typename Index_,
class Function_>
484void extract_pointers(
485 const std::vector<ModelGeneVariancesBuffers<Stat_> >& per_block,
486 const std::vector<Index_>& block_size,
487 const Index_ min_size,
489 std::vector<Stat_*>& tmp_pointers
491 const auto nblocks = per_block.size();
492 tmp_pointers.clear();
493 for (I<
decltype(nblocks)> b = 0; b < nblocks; ++b) {
494 if (block_size[b] < min_size) {
497 tmp_pointers.push_back(fun(per_block[b]));
531template<
typename Value_,
typename Index_,
typename Block_,
typename Stat_>
534 const Block_*
const block,
538 const Index_ NR = mat.
nrow(), NC = mat.
ncol();
539 std::vector<Index_> block_size;
542 block_size = tatami_stats::tabulate_groups(block, NC);
545 block_size.push_back(NC);
548 const auto nblocks = block_size.size();
553 for (I<
decltype(nblocks)> b = 0; b < nblocks; ++b) {
554 const auto& current = buffers.
per_block[b];
555 if (block_size[b] >= 2) {
556 fit_variance_trend(NR, current.means, current.variances, current.fitted, current.residuals, work, fopt);
558 std::fill_n(current.fitted, NR, std::numeric_limits<double>::quiet_NaN());
559 std::fill_n(current.residuals, NR, std::numeric_limits<double>::quiet_NaN());
563 const auto ave_means = buffers.
average.means;
564 const auto ave_variances = buffers.
average.variances;
565 const auto ave_fitted = buffers.
average.fitted;
566 const auto ave_residuals = buffers.
average.residuals;
568 std::vector<Stat_*> tmp_pointers;
569 tmp_pointers.reserve(nblocks);
573 std::vector<Stat_> tmp_weights;
574 tmp_weights.reserve(nblocks);
577 internal::extract_weights(block_weight, block_size,
static_cast<Index_
>(1), tmp_weights);
578 internal::extract_pointers(buffers.
per_block, block_size,
static_cast<Index_
>(1), [](
const auto& x) -> Stat_* { return x.means; }, tmp_pointers);
583 internal::extract_weights(block_weight, block_size,
static_cast<Index_
>(2), tmp_weights);
586 internal::extract_pointers(buffers.
per_block, block_size,
static_cast<Index_
>(2), [](
const auto& x) -> Stat_* { return x.variances; }, tmp_pointers);
591 internal::extract_pointers(buffers.
per_block, block_size,
static_cast<Index_
>(2), [](
const auto& x) -> Stat_* { return x.fitted; }, tmp_pointers);
596 internal::extract_pointers(buffers.
per_block, block_size,
static_cast<Index_
>(2), [](
const auto& x) -> Stat_* { return x.residuals; }, tmp_pointers);
602 internal::extract_pointers(buffers.
per_block, block_size,
static_cast<Index_
>(1), [](
const auto& x) -> Stat_* { return x.means; }, tmp_pointers);
609 internal::extract_pointers(buffers.
per_block, block_size,
static_cast<Index_
>(2), [](
const auto& x) -> Stat_* { return x.variances; }, tmp_pointers);
614 internal::extract_pointers(buffers.
per_block, block_size,
static_cast<Index_
>(2), [](
const auto& x) -> Stat_* { return x.fitted; }, tmp_pointers);
619 internal::extract_pointers(buffers.
per_block, block_size,
static_cast<Index_
>(2), [](
const auto& x) -> Stat_* { return x.residuals; }, tmp_pointers);
642template<
typename Value_,
typename Index_,
typename Stat_>
649 bbuffers.
per_block.emplace_back(std::move(buffers));
652 bbuffers.
average.variances = NULL;
653 bbuffers.
average.fitted = NULL;
654 bbuffers.
average.residuals = NULL;
672template<
typename Stat_ =
double,
typename Value_,
typename Index_>
703template<
typename Stat_ =
double,
typename Value_,
typename Index_,
typename Block_>
705 const auto nblocks = (block ? tatami_stats::total_groups(block, mat.
ncol()) : 1);
707 const bool do_average = options.compute_average && options.
block_average_policy != BlockAveragePolicy::NONE;
711 sanisizer::resize(buffers.
per_block, nblocks);
712 for (I<
decltype(nblocks)> b = 0; b < nblocks; ++b) {
714 current.means = output.
per_block[b].means.data();
715 current.variances = output.
per_block[b].variances.data();
716 current.fitted = output.
per_block[b].fitted.data();
717 current.residuals = output.
per_block[b].residuals.data();
722 buffers.
average.variances = NULL;
724 buffers.
average.residuals = NULL;
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
Fit a mean-variance trend to log-count data.
void compute_weights(const std::size_t num_blocks, const Size_ *const sizes, const WeightPolicy policy, const VariableWeightParameters &variable, Weight_ *const weights)
void parallel_weighted_means(const std::size_t n, std::vector< Stat_ * > in, const Weight_ *const w, Output_ *const out, const bool skip_nan)
void parallel_quantiles(const std::size_t n, const std::vector< Stat_ * > &in, const double quantile, Output_ *const out, const bool skip_nan)
Variance modelling for single-cell expression data.
Definition choose_highly_variable_genes.hpp:15
void model_gene_variances_blocked(const tatami::Matrix< Value_, Index_ > &mat, const Block_ *const block, const ModelGeneVariancesBlockedBuffers< Stat_ > &buffers, const ModelGeneVariancesOptions &options)
Definition model_gene_variances.hpp:532
void fit_variance_trend(const std::size_t n, const Float_ *const mean, const Float_ *const variance, Float_ *const fitted, Float_ *const residuals, FitVarianceTrendWorkspace< Float_ > &workspace, const FitVarianceTrendOptions &options)
Definition fit_variance_trend.hpp:131
void model_gene_variances(const tatami::Matrix< Value_, Index_ > &mat, ModelGeneVariancesBuffers< Stat_ > buffers, const ModelGeneVariancesOptions &options)
Definition model_gene_variances.hpp:643
BlockAveragePolicy
Definition model_gene_variances.hpp:31
void parallelize(Function_ fun, const Index_ tasks, const int threads)
Container_ create_container_of_Index_size(const Index_ x, Args_ &&... args)
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, const bool row, const Index_ iter_start, const Index_ iter_length, Args_ &&... args)
Options for fit_variance_trend().
Definition fit_variance_trend.hpp:24
int num_threads
Definition fit_variance_trend.hpp:82
Workspace for fit_variance_trend().
Definition fit_variance_trend.hpp:91
Buffers for model_gene_variances_blocked().
Definition model_gene_variances.hpp:185
ModelGeneVariancesBuffers< Stat_ > average
Definition model_gene_variances.hpp:196
std::vector< ModelGeneVariancesBuffers< Stat_ > > per_block
Definition model_gene_variances.hpp:190
Results of model_gene_variances_blocked().
Definition model_gene_variances.hpp:204
std::vector< ModelGeneVariancesResults< Stat_ > > per_block
Definition model_gene_variances.hpp:225
ModelGeneVariancesResults< Stat_ > average
Definition model_gene_variances.hpp:231
Buffers for model_gene_variances() and friends.
Definition model_gene_variances.hpp:100
Stat_ * means
Definition model_gene_variances.hpp:104
Stat_ * residuals
Definition model_gene_variances.hpp:119
Stat_ * variances
Definition model_gene_variances.hpp:109
Stat_ * fitted
Definition model_gene_variances.hpp:114
Options for model_gene_variances() and friends.
Definition model_gene_variances.hpp:36
FitVarianceTrendOptions fit_variance_trend_options
Definition model_gene_variances.hpp:40
double block_quantile
Definition model_gene_variances.hpp:82
BlockAveragePolicy block_average_policy
Definition model_gene_variances.hpp:47
scran_blocks::VariableWeightParameters variable_block_weight_parameters
Definition model_gene_variances.hpp:66
int num_threads
Definition model_gene_variances.hpp:88
scran_blocks::WeightPolicy block_weight_policy
Definition model_gene_variances.hpp:59
Results of model_gene_variances().
Definition model_gene_variances.hpp:127
std::vector< Stat_ > fitted
Definition model_gene_variances.hpp:172
std::vector< Stat_ > means
Definition model_gene_variances.hpp:162
std::vector< Stat_ > residuals
Definition model_gene_variances.hpp:177
std::vector< Stat_ > variances
Definition model_gene_variances.hpp:167
bool sparse_ordered_index