1#ifndef SCRAN_MODEL_GENE_VARIANCES_H
2#define SCRAN_MODEL_GENE_VARIANCES_H
10#include "tatami_stats/tatami_stats.hpp"
12#include "sanisizer/sanisizer.hpp"
80 bool compute_average =
true;
107template<
typename Stat_>
138template<
typename Stat_>
146 means(sanisizer::cast<I<
decltype(
means.size())> >(ngenes)
147#ifdef SCRAN_VARIANCES_TEST_INIT
148 , SCRAN_VARIANCES_TEST_INIT
152#ifdef SCRAN_VARIANCES_TEST_INIT
153 , SCRAN_VARIANCES_TEST_INIT
156 fitted(sanisizer::cast<I<
decltype(
fitted.size())> >(trend ? ngenes : 0)
157#ifdef SCRAN_VARIANCES_TEST_INIT
158 , SCRAN_VARIANCES_TEST_INIT
162#ifdef SCRAN_VARIANCES_TEST_INIT
163 , SCRAN_VARIANCES_TEST_INIT
200template<
typename Stat_>
206 std::vector<ModelGeneVariancesBuffers<Stat_> >
per_block;
224template<
typename Stat_>
232 average(do_average ? ngenes : 0, do_trend)
235 for (I<
decltype(nblocks)> b = 0; b < nblocks; ++b) {
236 per_block.emplace_back(ngenes, do_trend);
246 std::vector<ModelGeneVariancesResults<Stat_> >
per_block;
260template<
typename Value_,
typename Index_,
typename Stat_,
typename Block_>
261void compute_variances_dense_row(
264 const Block_*
const block,
265 const std::vector<Index_>& block_size,
266 const int num_threads)
268 const bool blocked = (block != NULL);
269 const auto nblocks = block_size.size();
270 const auto NR = mat.
nrow(), NC = mat.
ncol();
273 auto tmp_means = sanisizer::create<std::vector<Stat_> >(blocked ? nblocks : 0);
274 auto tmp_vars = sanisizer::create<std::vector<Stat_> >(blocked ? nblocks : 0);
278 for (Index_ r = start, end = start + length; r < end; ++r) {
279 auto ptr = ext->fetch(buffer.data());
282 tatami_stats::grouped_variances::direct(
291 static_cast<Index_*
>(NULL)
293 for (I<
decltype(nblocks)> b = 0; b < nblocks; ++b) {
294 buffers[b].means[r] = tmp_means[b];
295 buffers[b].variances[r] = tmp_vars[b];
298 const auto stat = tatami_stats::variances::direct(ptr, NC,
false);
299 buffers[0].means[r] = stat.first;
300 buffers[0].variances[r] = stat.second;
306template<
typename Value_,
typename Index_,
typename Stat_,
typename Block_>
307void compute_variances_sparse_row(
309 const std::vector<ModelGeneVariancesBuffers<Stat_> >& buffers,
310 const Block_*
const block,
311 const std::vector<Index_>& block_size,
312 const int num_threads)
314 const bool blocked = (block != NULL);
315 const auto nblocks = block_size.size();
316 const auto NR = mat.
nrow(), NC = mat.
ncol();
319 auto tmp_means = sanisizer::create<std::vector<Stat_> >(nblocks);
320 auto tmp_vars = sanisizer::create<std::vector<Stat_> >(nblocks);
321 auto tmp_nzero = sanisizer::create<std::vector<Index_> >(nblocks);
331 for (Index_ r = start, end = start + length; r < end; ++r) {
332 auto range = ext->fetch(vbuffer.data(), ibuffer.data());
335 tatami_stats::grouped_variances::direct(
346 static_cast<Index_*
>(NULL)
348 for (I<
decltype(nblocks)> b = 0; b < nblocks; ++b) {
349 buffers[b].means[r] = tmp_means[b];
350 buffers[b].variances[r] = tmp_vars[b];
353 const auto stat = tatami_stats::variances::direct(range.value, range.number, NC,
false);
354 buffers[0].means[r] = stat.first;
355 buffers[0].variances[r] = stat.second;
361template<
typename Value_,
typename Index_,
typename Stat_,
typename Block_>
362void compute_variances_dense_column(
364 const std::vector<ModelGeneVariancesBuffers<Stat_> >& buffers,
365 const Block_*
const block,
366 const std::vector<Index_>& block_size,
367 const int num_threads)
369 const bool blocked = (block != NULL);
370 const auto nblocks = block_size.size();
371 const auto NR = mat.
nrow(), NC = mat.
ncol();
373 tatami::parallelize([&](
const int thread,
const Index_ start,
const Index_ length) ->
void {
377 auto get_var = [&](Index_ b) -> Stat_* {
return buffers[b].variances; };
378 tatami_stats::LocalOutputBuffers<Stat_,
decltype(get_var)> local_vars(thread, nblocks, start, length, std::move(get_var));
379 auto get_mean = [&](Index_ b) -> Stat_* {
return buffers[b].means; };
380 tatami_stats::LocalOutputBuffers<Stat_,
decltype(get_mean)> local_means(thread, nblocks, start, length, std::move(get_mean));
382 std::vector<tatami_stats::variances::RunningDense<Stat_, Value_, Index_> > runners;
383 runners.reserve(nblocks);
384 for (I<
decltype(nblocks)> b = 0; b < nblocks; ++b) {
385 runners.emplace_back(length, local_means.data(b), local_vars.data(b),
false);
389 for (I<
decltype(NC)> c = 0; c < NC; ++c) {
390 auto ptr = ext->fetch(buffer.data());
391 runners[block[c]].add(ptr);
394 for (I<
decltype(NC)> c = 0; c < NC; ++c) {
395 auto ptr = ext->fetch(buffer.data());
400 for (I<
decltype(nblocks)> b = 0; b < nblocks; ++b) {
403 local_vars.transfer();
404 local_means.transfer();
408template<
typename Value_,
typename Index_,
typename Stat_,
typename Block_>
409void compute_variances_sparse_column(
411 const std::vector<ModelGeneVariancesBuffers<Stat_> >& buffers,
412 const Block_*
const block,
413 const std::vector<Index_>& block_size,
414 const int num_threads)
416 const bool blocked = (block != NULL);
417 const auto nblocks = block_size.size();
418 const auto NR = mat.
nrow(), NC = mat.
ncol();
419 auto nonzeros = sanisizer::create<std::vector<std::vector<Index_> > >(
424 tatami::parallelize([&](
const int thread,
const Index_ start,
const Index_ length) ->
void {
433 auto get_var = [&](Index_ b) -> Stat_* {
return buffers[b].variances; };
434 tatami_stats::LocalOutputBuffers<Stat_,
decltype(get_var)> local_vars(thread, nblocks, start, length, std::move(get_var));
435 auto get_mean = [&](Index_ b) -> Stat_* {
return buffers[b].means; };
436 tatami_stats::LocalOutputBuffers<Stat_,
decltype(get_mean)> local_means(thread, nblocks, start, length, std::move(get_mean));
438 std::vector<tatami_stats::variances::RunningSparse<Stat_, Value_, Index_> > runners;
439 runners.reserve(nblocks);
440 for (I<
decltype(nblocks)> b = 0; b < nblocks; ++b) {
441 runners.emplace_back(length, local_means.data(b), local_vars.data(b),
false, start);
445 for (I<
decltype(NC)> c = 0; c < NC; ++c) {
446 auto range = ext->fetch(vbuffer.data(), ibuffer.data());
447 runners[block[c]].add(range.value, range.index, range.number);
450 for (I<
decltype(NC)> c = 0; c < NC; ++c) {
451 auto range = ext->fetch(vbuffer.data(), ibuffer.data());
452 runners[0].add(range.value, range.index, range.number);
456 for (I<
decltype(nblocks)> b = 0; b < nblocks; ++b) {
459 local_vars.transfer();
460 local_means.transfer();
464template<
typename Value_,
typename Index_,
typename Stat_,
typename Block_>
465void compute_variances(
467 const std::vector<ModelGeneVariancesBuffers<Stat_> >& buffers,
468 const Block_*
const block,
469 const std::vector<Index_>& block_size,
470 const int num_threads)
474 compute_variances_sparse_row(mat, buffers, block, block_size, num_threads);
476 compute_variances_dense_row(mat, buffers, block, block_size, num_threads);
480 compute_variances_sparse_column(mat, buffers, block, block_size, num_threads);
482 compute_variances_dense_column(mat, buffers, block, block_size, num_threads);
487template<
typename Stat_,
typename Index_>
489 const std::vector<Stat_>& block_weights,
490 const std::vector<Index_>& block_size,
491 const Index_ min_size,
492 std::vector<Stat_>& tmp_weights
494 const auto nblocks = block_weights.size();
496 for (I<
decltype(nblocks)> b = 0; b < nblocks; ++b) {
497 if (block_size[b] < min_size) {
500 tmp_weights.push_back(block_weights[b]);
504template<
typename Stat_,
typename Index_,
class Function_>
505void extract_pointers(
506 const std::vector<ModelGeneVariancesBuffers<Stat_> >& per_block,
507 const std::vector<Index_>& block_size,
508 const Index_ min_size,
510 std::vector<Stat_*>& tmp_pointers
512 const auto nblocks = per_block.size();
513 tmp_pointers.clear();
514 for (I<
decltype(nblocks)> b = 0; b < nblocks; ++b) {
515 if (block_size[b] < min_size) {
518 tmp_pointers.push_back(fun(per_block[b]));
552template<
typename Value_,
typename Index_,
typename Block_,
typename Stat_>
555 const Block_*
const block,
559 const Index_ NR = mat.
nrow(), NC = mat.
ncol();
560 std::vector<Index_> block_size;
563 block_size = tatami_stats::tabulate_groups(block, NC);
566 block_size.push_back(NC);
569 const auto nblocks = block_size.size();
574 bool all_trends_fitted =
true;
576 for (I<
decltype(nblocks)> b = 0; b < nblocks; ++b) {
577 const auto& current = buffers.
per_block[b];
578 if (current.fitted == NULL || current.residuals == NULL) {
579 all_trends_fitted =
false;
582 if (block_size[b] >= 2) {
583 fit_variance_trend(NR, current.means, current.variances, current.fitted, current.residuals, work, fopt);
585 std::fill_n(current.fitted, NR, std::numeric_limits<double>::quiet_NaN());
586 std::fill_n(current.residuals, NR, std::numeric_limits<double>::quiet_NaN());
590 const auto ave_means = buffers.
average.means;
591 const auto ave_variances = buffers.
average.variances;
592 const auto ave_fitted = buffers.
average.fitted;
593 const auto ave_residuals = buffers.
average.residuals;
595 if ((ave_fitted || ave_residuals) && !all_trends_fitted) {
596 throw std::runtime_error(
"cannot compute average fitted values/residuals without per-block trend fits");
599 std::vector<Stat_*> tmp_pointers;
600 tmp_pointers.reserve(nblocks);
604 std::vector<Stat_> tmp_weights;
605 tmp_weights.reserve(nblocks);
608 internal::extract_weights(block_weight, block_size,
static_cast<Index_
>(1), tmp_weights);
609 internal::extract_pointers(buffers.
per_block, block_size,
static_cast<Index_
>(1), [](
const auto& x) -> Stat_* { return x.means; }, tmp_pointers);
614 internal::extract_weights(block_weight, block_size,
static_cast<Index_
>(2), tmp_weights);
617 internal::extract_pointers(buffers.
per_block, block_size,
static_cast<Index_
>(2), [](
const auto& x) -> Stat_* { return x.variances; }, tmp_pointers);
622 internal::extract_pointers(buffers.
per_block, block_size,
static_cast<Index_
>(2), [](
const auto& x) -> Stat_* { return x.fitted; }, tmp_pointers);
627 internal::extract_pointers(buffers.
per_block, block_size,
static_cast<Index_
>(2), [](
const auto& x) -> Stat_* { return x.residuals; }, tmp_pointers);
633 internal::extract_pointers(buffers.
per_block, block_size,
static_cast<Index_
>(1), [](
const auto& x) -> Stat_* { return x.means; }, tmp_pointers);
640 internal::extract_pointers(buffers.
per_block, block_size,
static_cast<Index_
>(2), [](
const auto& x) -> Stat_* { return x.variances; }, tmp_pointers);
645 internal::extract_pointers(buffers.
per_block, block_size,
static_cast<Index_
>(2), [](
const auto& x) -> Stat_* { return x.fitted; }, tmp_pointers);
650 internal::extract_pointers(buffers.
per_block, block_size,
static_cast<Index_
>(2), [](
const auto& x) -> Stat_* { return x.residuals; }, tmp_pointers);
673template<
typename Value_,
typename Index_,
typename Stat_>
680 bbuffers.
per_block.emplace_back(std::move(buffers));
683 bbuffers.
average.variances = NULL;
684 bbuffers.
average.fitted = NULL;
685 bbuffers.
average.residuals = NULL;
703template<
typename Stat_ =
double,
typename Value_,
typename Index_>
740template<
typename Stat_ =
double,
typename Value_,
typename Index_,
typename Block_>
742 const auto nblocks = (block ? tatami_stats::total_groups(block, mat.
ncol()) : 1);
744 const bool do_average = options.compute_average && options.
block_average_policy != BlockAveragePolicy::NONE;
753 sanisizer::resize(buffers.
per_block, nblocks);
754 for (I<
decltype(nblocks)> b = 0; b < nblocks; ++b) {
756 current.means = output.
per_block[b].means.data();
757 current.variances = output.
per_block[b].variances.data();
760 current.fitted = output.
per_block[b].fitted.data();
761 current.residuals = output.
per_block[b].residuals.data();
763 current.fitted = NULL;
764 current.residuals = NULL;
770 buffers.
average.variances = NULL;
772 buffers.
average.residuals = NULL;
782 buffers.
average.residuals = NULL;
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
Fit a mean-variance trend to log-count data.
void compute_weights(const std::size_t num_blocks, const Size_ *const sizes, const WeightPolicy policy, const VariableWeightParameters &variable, Weight_ *const weights)
void parallel_weighted_means(const std::size_t n, std::vector< Stat_ * > in, const Weight_ *const w, Output_ *const out, const bool skip_nan)
void parallel_quantiles(const std::size_t n, const std::vector< Stat_ * > &in, const double quantile, Output_ *const out, const bool skip_nan)
Variance modelling for single-cell expression data.
Definition choose_highly_variable_genes.hpp:15
void model_gene_variances_blocked(const tatami::Matrix< Value_, Index_ > &mat, const Block_ *const block, const ModelGeneVariancesBlockedBuffers< Stat_ > &buffers, const ModelGeneVariancesOptions &options)
Definition model_gene_variances.hpp:553
void fit_variance_trend(const std::size_t n, const Float_ *const mean, const Float_ *const variance, Float_ *const fitted, Float_ *const residuals, FitVarianceTrendWorkspace< Float_ > &workspace, const FitVarianceTrendOptions &options)
Definition fit_variance_trend.hpp:131
void model_gene_variances(const tatami::Matrix< Value_, Index_ > &mat, ModelGeneVariancesBuffers< Stat_ > buffers, const ModelGeneVariancesOptions &options)
Definition model_gene_variances.hpp:674
BlockAveragePolicy
Definition model_gene_variances.hpp:31
int parallelize(Function_ fun, const Index_ tasks, const int workers)
Container_ create_container_of_Index_size(const Index_ x, Args_ &&... args)
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, const bool row, const Index_ iter_start, const Index_ iter_length, Args_ &&... args)
Options for fit_variance_trend().
Definition fit_variance_trend.hpp:24
int num_threads
Definition fit_variance_trend.hpp:82
Workspace for fit_variance_trend().
Definition fit_variance_trend.hpp:91
Buffers for model_gene_variances_blocked().
Definition model_gene_variances.hpp:201
ModelGeneVariancesBuffers< Stat_ > average
Definition model_gene_variances.hpp:217
std::vector< ModelGeneVariancesBuffers< Stat_ > > per_block
Definition model_gene_variances.hpp:206
Results of model_gene_variances_blocked().
Definition model_gene_variances.hpp:225
std::vector< ModelGeneVariancesResults< Stat_ > > per_block
Definition model_gene_variances.hpp:246
ModelGeneVariancesResults< Stat_ > average
Definition model_gene_variances.hpp:252
Buffers for model_gene_variances() and friends.
Definition model_gene_variances.hpp:108
Stat_ * means
Definition model_gene_variances.hpp:112
Stat_ * residuals
Definition model_gene_variances.hpp:131
Stat_ * variances
Definition model_gene_variances.hpp:117
Stat_ * fitted
Definition model_gene_variances.hpp:124
Options for model_gene_variances() and friends.
Definition model_gene_variances.hpp:36
FitVarianceTrendOptions fit_variance_trend_options
Definition model_gene_variances.hpp:48
double block_quantile
Definition model_gene_variances.hpp:90
bool trend
Definition model_gene_variances.hpp:42
BlockAveragePolicy block_average_policy
Definition model_gene_variances.hpp:55
scran_blocks::VariableWeightParameters variable_block_weight_parameters
Definition model_gene_variances.hpp:74
int num_threads
Definition model_gene_variances.hpp:96
scran_blocks::WeightPolicy block_weight_policy
Definition model_gene_variances.hpp:67
Results of model_gene_variances().
Definition model_gene_variances.hpp:139
std::vector< Stat_ > fitted
Definition model_gene_variances.hpp:186
std::vector< Stat_ > means
Definition model_gene_variances.hpp:174
std::vector< Stat_ > residuals
Definition model_gene_variances.hpp:193
std::vector< Stat_ > variances
Definition model_gene_variances.hpp:179
bool sparse_ordered_index