1#ifndef SCRAN_MODEL_GENE_VARIANCES_H
2#define SCRAN_MODEL_GENE_VARIANCES_H
5#include "tatami_stats/tatami_stats.hpp"
64template<
typename Stat_>
91template<
typename Stat_>
100#ifdef SCRAN_VARIANCES_TEST_INIT
101 , SCRAN_VARIANCES_TEST_INIT
105#ifdef SCRAN_VARIANCES_TEST_INIT
106 , SCRAN_VARIANCES_TEST_INIT
110#ifdef SCRAN_VARIANCES_TEST_INIT
111 , SCRAN_VARIANCES_TEST_INIT
115#ifdef SCRAN_VARIANCES_TEST_INIT
116 , SCRAN_VARIANCES_TEST_INIT
149template<
typename Stat_>
155 std::vector<ModelGeneVariancesBuffers<Stat_> >
per_block;
168template<
typename Stat_>
177 for (
size_t b = 0; b < nblocks; ++b) {
188 std::vector<ModelGeneVariancesResults<Stat_> >
per_block;
202template<
typename Value_,
typename Index_,
typename Stat_,
typename Block_>
203void compute_variances_dense_row(
207 const std::vector<Index_>& block_size,
210 bool blocked = (block != NULL);
211 auto nblocks = block_size.size();
212 auto NR = mat.
nrow(), NC = mat.
ncol();
215 std::vector<Stat_> tmp_means(blocked ? nblocks : 0);
216 std::vector<Stat_> tmp_vars(blocked ? nblocks : 0);
218 std::vector<Value_> buffer(NC);
220 for (Index_ r = start, end = start + length; r < end; ++r) {
221 auto ptr = ext->fetch(buffer.data());
224 tatami_stats::grouped_variances::direct(
233 static_cast<Index_*
>(NULL)
235 for (
size_t b = 0; b < nblocks; ++b) {
236 buffers[b].means[r] = tmp_means[b];
237 buffers[b].variances[r] = tmp_vars[b];
240 auto stat = tatami_stats::variances::direct(ptr, NC,
false);
241 buffers[0].means[r] = stat.first;
242 buffers[0].variances[r] = stat.second;
248template<
typename Value_,
typename Index_,
typename Stat_,
typename Block_>
249void compute_variances_sparse_row(
251 const std::vector<ModelGeneVariancesBuffers<Stat_> >& buffers,
253 const std::vector<Index_>& block_size,
256 bool blocked = (block != NULL);
257 auto nblocks = block_size.size();
258 auto NR = mat.
nrow(), NC = mat.
ncol();
261 std::vector<Stat_> tmp_means(nblocks);
262 std::vector<Stat_> tmp_vars(nblocks);
263 std::vector<Index_> tmp_nzero(nblocks);
265 std::vector<Value_> vbuffer(NC);
266 std::vector<Index_> ibuffer(NC);
271 for (Index_ r = start, end = start + length; r < end; ++r) {
272 auto range = ext->fetch(vbuffer.data(), ibuffer.data());
275 tatami_stats::grouped_variances::direct(
286 static_cast<Index_*
>(NULL)
288 for (
size_t b = 0; b < nblocks; ++b) {
289 buffers[b].means[r] = tmp_means[b];
290 buffers[b].variances[r] = tmp_vars[b];
293 auto stat = tatami_stats::variances::direct(range.value, range.number, NC,
false);
294 buffers[0].means[r] = stat.first;
295 buffers[0].variances[r] = stat.second;
301template<
typename Value_,
typename Index_,
typename Stat_,
typename Block_>
302void compute_variances_dense_column(
304 const std::vector<ModelGeneVariancesBuffers<Stat_> >& buffers,
306 const std::vector<Index_>& block_size,
309 bool blocked = (block != NULL);
310 auto nblocks = block_size.size();
311 auto NR = mat.
nrow(), NC = mat.
ncol();
314 std::vector<Value_> buffer(length);
317 auto get_var = [&](Index_ b) -> Stat_* {
return buffers[b].variances; };
318 tatami_stats::LocalOutputBuffers<Stat_,
decltype(get_var)> local_vars(thread, nblocks, start, length, std::move(get_var));
319 auto get_mean = [&](Index_ b) -> Stat_* {
return buffers[b].means; };
320 tatami_stats::LocalOutputBuffers<Stat_,
decltype(get_mean)> local_means(thread, nblocks, start, length, std::move(get_mean));
322 std::vector<tatami_stats::variances::RunningDense<Stat_, Value_, Index_> > runners;
323 runners.reserve(nblocks);
324 for (
size_t b = 0; b < nblocks; ++b) {
325 runners.emplace_back(length, local_means.data(b), local_vars.data(b),
false);
329 for (Index_ c = 0; c < NC; ++c) {
330 auto ptr = ext->fetch(buffer.data());
331 runners[block[c]].add(ptr);
334 for (Index_ c = 0; c < NC; ++c) {
335 auto ptr = ext->fetch(buffer.data());
340 for (
size_t b = 0; b < nblocks; ++b) {
343 local_vars.transfer();
344 local_means.transfer();
348template<
typename Value_,
typename Index_,
typename Stat_,
typename Block_>
349void compute_variances_sparse_column(
351 const std::vector<ModelGeneVariancesBuffers<Stat_> >& buffers,
353 const std::vector<Index_>& block_size,
356 bool blocked = (block != NULL);
357 auto nblocks = block_size.size();
358 auto NR = mat.
nrow(), NC = mat.
ncol();
359 std::vector<std::vector<Index_> > nonzeros(nblocks, std::vector<Index_>(NR));
362 std::vector<Value_> vbuffer(length);
363 std::vector<Index_> ibuffer(length);
368 auto get_var = [&](Index_ b) -> Stat_* {
return buffers[b].variances; };
369 tatami_stats::LocalOutputBuffers<Stat_,
decltype(get_var)> local_vars(thread, nblocks, start, length, std::move(get_var));
370 auto get_mean = [&](Index_ b) -> Stat_* {
return buffers[b].means; };
371 tatami_stats::LocalOutputBuffers<Stat_,
decltype(get_mean)> local_means(thread, nblocks, start, length, std::move(get_mean));
373 std::vector<tatami_stats::variances::RunningSparse<Stat_, Value_, Index_> > runners;
374 runners.reserve(nblocks);
375 for (
size_t b = 0; b < nblocks; ++b) {
376 runners.emplace_back(length, local_means.data(b), local_vars.data(b),
false, start);
380 for (Index_ c = 0; c < NC; ++c) {
381 auto range = ext->fetch(vbuffer.data(), ibuffer.data());
382 runners[block[c]].add(range.value, range.index, range.number);
385 for (Index_ c = 0; c < NC; ++c) {
386 auto range = ext->fetch(vbuffer.data(), ibuffer.data());
387 runners[0].add(range.value, range.index, range.number);
391 for (
size_t b = 0; b < nblocks; ++b) {
394 local_vars.transfer();
395 local_means.transfer();
399template<
typename Value_,
typename Index_,
typename Stat_,
typename Block_>
400void compute_variances(
402 const std::vector<ModelGeneVariancesBuffers<Stat_> >& buffers,
404 const std::vector<Index_>& block_size,
409 compute_variances_sparse_row(mat, buffers, block, block_size, num_threads);
411 compute_variances_dense_row(mat, buffers, block, block_size, num_threads);
415 compute_variances_sparse_column(mat, buffers, block, block_size, num_threads);
417 compute_variances_dense_column(mat, buffers, block, block_size, num_threads);
422template<
typename Index_,
typename Stat_,
class Function_>
425 const std::vector<ModelGeneVariancesBuffers<Stat_> >& per_block,
426 const std::vector<Index_>& block_size,
427 const std::vector<Stat_>& block_weights,
430 std::vector<Stat_*>& tmp_pointers,
431 std::vector<Stat_>& tmp_weights,
438 tmp_pointers.clear();
440 for (
size_t b = 0, nblocks = per_block.size(); b < nblocks; ++b) {
441 if (block_size[b] < min_size) {
444 tmp_weights.push_back(block_weights[b]);
445 tmp_pointers.push_back(fun(per_block[b]));
479template<
typename Value_,
typename Index_,
typename Block_,
typename Stat_>
486 Index_ NR = mat.
nrow(), NC = mat.
ncol();
487 std::vector<Index_> block_size;
490 block_size = tatami_stats::tabulate_groups(block, NC);
493 block_size.push_back(NC);
496 size_t nblocks = block_size.size();
501 for (
size_t b = 0; b < nblocks; ++b) {
502 const auto& current = buffers.
per_block[b];
503 if (block_size[b] >= 2) {
504 fit_variance_trend(NR, current.means, current.variances, current.fitted, current.residuals, work, fopt);
506 std::fill_n(current.fitted, NR, std::numeric_limits<double>::quiet_NaN());
507 std::fill_n(current.residuals, NR, std::numeric_limits<double>::quiet_NaN());
511 auto ave_means = buffers.
average.means;
512 auto ave_variances = buffers.
average.variances;
513 auto ave_fitted = buffers.
average.fitted;
514 auto ave_residuals = buffers.
average.residuals;
516 if (ave_means || ave_variances || ave_fitted || ave_residuals) {
519 std::vector<Stat_*> tmp_pointers;
520 std::vector<Stat_> tmp_weights;
521 tmp_pointers.reserve(nblocks);
522 tmp_weights.reserve(nblocks);
524 internal::compute_average(NR, buffers.
per_block, block_size, block_weight,
525 static_cast<Index_
>(1),
526 [](
const auto& x) -> Stat_* { return x.means; }, tmp_pointers, tmp_weights, ave_means);
528 internal::compute_average(NR, buffers.
per_block, block_size, block_weight,
529 static_cast<Index_
>(2),
530 [](
const auto& x) -> Stat_* { return x.variances; }, tmp_pointers, tmp_weights, ave_variances);
532 internal::compute_average(NR, buffers.
per_block, block_size, block_weight,
533 static_cast<Index_
>(2),
534 [](
const auto& x) -> Stat_* { return x.fitted; }, tmp_pointers, tmp_weights, ave_fitted);
536 internal::compute_average(NR, buffers.
per_block, block_size, block_weight,
537 static_cast<Index_
>(2),
538 [](
const auto& x) -> Stat_* { return x.residuals; }, tmp_pointers, tmp_weights, ave_residuals);
558template<
typename Value_,
typename Index_,
typename Stat_>
564 bbuffers.
per_block.emplace_back(std::move(buffers));
567 bbuffers.
average.variances = NULL;
568 bbuffers.
average.fitted = NULL;
569 bbuffers.
average.residuals = NULL;
587template<
typename Stat_ =
double,
typename Value_,
typename Index_>
618template<
typename Stat_ =
double,
typename Value_,
typename Index_,
typename Block_>
620 size_t nblocks = (block ? tatami_stats::total_groups(block, mat.
ncol()) : 1);
625 for (
size_t b = 0; b < nblocks; ++b) {
627 current.means = output.
per_block[b].means.data();
628 current.variances = output.
per_block[b].variances.data();
629 current.fitted = output.
per_block[b].fitted.data();
630 current.residuals = output.
per_block[b].residuals.data();
635 buffers.
average.variances = NULL;
637 buffers.
average.residuals = NULL;
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
Fit a mean-variance trend to log-count data.
void average_vectors_weighted(size_t n, std::vector< Stat_ * > in, const Weight_ *w, Output_ *out, bool skip_nan)
void compute_weights(size_t num_blocks, const Size_ *sizes, WeightPolicy policy, const VariableWeightParameters &variable, Weight_ *weights)
Variance modelling for single-cell expression data.
Definition choose_highly_variable_genes.hpp:14
void fit_variance_trend(size_t n, const Float_ *mean, const Float_ *variance, Float_ *fitted, Float_ *residuals, FitVarianceTrendWorkspace< Float_ > &workspace, const FitVarianceTrendOptions &options)
Definition fit_variance_trend.hpp:120
void model_gene_variances_blocked(const tatami::Matrix< Value_, Index_ > &mat, const Block_ *block, const ModelGeneVariancesBlockedBuffers< Stat_ > &buffers, const ModelGeneVariancesOptions &options)
Definition model_gene_variances.hpp:480
void model_gene_variances(const tatami::Matrix< Value_, Index_ > &mat, ModelGeneVariancesBuffers< Stat_ > buffers, const ModelGeneVariancesOptions &options)
Definition model_gene_variances.hpp:559
void parallelize(Function_ fun, Index_ tasks, int threads)
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, bool row, Index_ iter_start, Index_ iter_length, Args_ &&... args)
Options for fit_variance_trend().
Definition fit_variance_trend.hpp:19
int num_threads
Definition fit_variance_trend.hpp:71
Workspace for fit_variance_trend().
Definition fit_variance_trend.hpp:80
Buffers for model_gene_variances_blocked().
Definition model_gene_variances.hpp:150
ModelGeneVariancesBuffers< Stat_ > average
Definition model_gene_variances.hpp:161
std::vector< ModelGeneVariancesBuffers< Stat_ > > per_block
Definition model_gene_variances.hpp:155
Results of model_gene_variances_blocked().
Definition model_gene_variances.hpp:169
std::vector< ModelGeneVariancesResults< Stat_ > > per_block
Definition model_gene_variances.hpp:188
ModelGeneVariancesResults< Stat_ > average
Definition model_gene_variances.hpp:194
Buffers for model_gene_variances() and friends.
Definition model_gene_variances.hpp:65
Stat_ * means
Definition model_gene_variances.hpp:69
Stat_ * residuals
Definition model_gene_variances.hpp:84
Stat_ * variances
Definition model_gene_variances.hpp:74
Stat_ * fitted
Definition model_gene_variances.hpp:79
Options for model_gene_variances() and friends.
Definition model_gene_variances.hpp:24
FitVarianceTrendOptions fit_variance_trend_options
Definition model_gene_variances.hpp:28
bool compute_average
Definition model_gene_variances.hpp:47
scran_blocks::VariableWeightParameters variable_block_weight_parameters
Definition model_gene_variances.hpp:41
int num_threads
Definition model_gene_variances.hpp:53
scran_blocks::WeightPolicy block_weight_policy
Definition model_gene_variances.hpp:34
Results of model_gene_variances().
Definition model_gene_variances.hpp:92
std::vector< Stat_ > fitted
Definition model_gene_variances.hpp:137
std::vector< Stat_ > means
Definition model_gene_variances.hpp:127
std::vector< Stat_ > residuals
Definition model_gene_variances.hpp:142
std::vector< Stat_ > variances
Definition model_gene_variances.hpp:132
bool sparse_ordered_index