1#ifndef SCRAN_MODEL_GENE_VARIANCES_H
2#define SCRAN_MODEL_GENE_VARIANCES_H
12#include "tatami_stats/tatami_stats.hpp"
14#include "sanisizer/sanisizer.hpp"
66template<
typename Stat_>
93template<
typename Stat_>
100 template<
typename Ngenes_>
102 means(sanisizer::cast<
decltype(
means.size())>(ngenes)
103#ifdef SCRAN_VARIANCES_TEST_INIT
104 , SCRAN_VARIANCES_TEST_INIT
108#ifdef SCRAN_VARIANCES_TEST_INIT
109 , SCRAN_VARIANCES_TEST_INIT
113#ifdef SCRAN_VARIANCES_TEST_INIT
114 , SCRAN_VARIANCES_TEST_INIT
118#ifdef SCRAN_VARIANCES_TEST_INIT
119 , SCRAN_VARIANCES_TEST_INIT
152template<
typename Stat_>
158 std::vector<ModelGeneVariancesBuffers<Stat_> >
per_block;
171template<
typename Stat_>
178 template<
typename Ngenes_,
typename Nblocks_>
181 for (
decltype(nblocks) b = 0; b < nblocks; ++b) {
192 std::vector<ModelGeneVariancesResults<Stat_> >
per_block;
206template<
typename Value_,
typename Index_,
typename Stat_,
typename Block_>
207void compute_variances_dense_row(
211 const std::vector<Index_>& block_size,
214 bool blocked = (block != NULL);
215 auto nblocks = block_size.size();
216 auto NR = mat.
nrow(), NC = mat.
ncol();
219 auto tmp_means = sanisizer::create<std::vector<Stat_> >(blocked ? nblocks : 0);
220 auto tmp_vars = sanisizer::create<std::vector<Stat_> >(blocked ? nblocks : 0);
224 for (Index_ r = start, end = start + length; r < end; ++r) {
225 auto ptr = ext->fetch(buffer.data());
228 tatami_stats::grouped_variances::direct(
237 static_cast<Index_*
>(NULL)
239 for (
decltype(nblocks) b = 0; b < nblocks; ++b) {
240 buffers[b].means[r] = tmp_means[b];
241 buffers[b].variances[r] = tmp_vars[b];
244 auto stat = tatami_stats::variances::direct(ptr, NC,
false);
245 buffers[0].means[r] = stat.first;
246 buffers[0].variances[r] = stat.second;
252template<
typename Value_,
typename Index_,
typename Stat_,
typename Block_>
253void compute_variances_sparse_row(
255 const std::vector<ModelGeneVariancesBuffers<Stat_> >& buffers,
257 const std::vector<Index_>& block_size,
260 bool blocked = (block != NULL);
261 auto nblocks = block_size.size();
262 auto NR = mat.
nrow(), NC = mat.
ncol();
265 auto tmp_means = sanisizer::create<std::vector<Stat_> >(nblocks);
266 auto tmp_vars = sanisizer::create<std::vector<Stat_> >(nblocks);
267 auto tmp_nzero = sanisizer::create<std::vector<Index_> >(nblocks);
275 for (Index_ r = start, end = start + length; r < end; ++r) {
276 auto range = ext->fetch(vbuffer.data(), ibuffer.data());
279 tatami_stats::grouped_variances::direct(
290 static_cast<Index_*
>(NULL)
292 for (
decltype(nblocks) b = 0; b < nblocks; ++b) {
293 buffers[b].means[r] = tmp_means[b];
294 buffers[b].variances[r] = tmp_vars[b];
297 auto stat = tatami_stats::variances::direct(range.value, range.number, NC,
false);
298 buffers[0].means[r] = stat.first;
299 buffers[0].variances[r] = stat.second;
305template<
typename Value_,
typename Index_,
typename Stat_,
typename Block_>
306void compute_variances_dense_column(
308 const std::vector<ModelGeneVariancesBuffers<Stat_> >& buffers,
310 const std::vector<Index_>& block_size,
313 bool blocked = (block != NULL);
314 auto nblocks = block_size.size();
315 auto NR = mat.
nrow(), NC = mat.
ncol();
321 auto get_var = [&](Index_ b) -> Stat_* {
return buffers[b].variances; };
322 tatami_stats::LocalOutputBuffers<Stat_,
decltype(get_var)> local_vars(thread, nblocks, start, length, std::move(get_var));
323 auto get_mean = [&](Index_ b) -> Stat_* {
return buffers[b].means; };
324 tatami_stats::LocalOutputBuffers<Stat_,
decltype(get_mean)> local_means(thread, nblocks, start, length, std::move(get_mean));
326 std::vector<tatami_stats::variances::RunningDense<Stat_, Value_, Index_> > runners;
327 runners.reserve(nblocks);
328 for (
decltype(nblocks) b = 0; b < nblocks; ++b) {
329 runners.emplace_back(length, local_means.data(b), local_vars.data(b),
false);
333 for (
decltype(NC) c = 0; c < NC; ++c) {
334 auto ptr = ext->fetch(buffer.data());
335 runners[block[c]].add(ptr);
338 for (
decltype(NC) c = 0; c < NC; ++c) {
339 auto ptr = ext->fetch(buffer.data());
344 for (
decltype(nblocks) b = 0; b < nblocks; ++b) {
347 local_vars.transfer();
348 local_means.transfer();
352template<
typename Value_,
typename Index_,
typename Stat_,
typename Block_>
353void compute_variances_sparse_column(
355 const std::vector<ModelGeneVariancesBuffers<Stat_> >& buffers,
357 const std::vector<Index_>& block_size,
360 bool blocked = (block != NULL);
361 auto nblocks = block_size.size();
362 auto NR = mat.
nrow(), NC = mat.
ncol();
363 auto nonzeros = sanisizer::create<std::vector<std::vector<Index_> > >(
375 auto get_var = [&](Index_ b) -> Stat_* {
return buffers[b].variances; };
376 tatami_stats::LocalOutputBuffers<Stat_,
decltype(get_var)> local_vars(thread, nblocks, start, length, std::move(get_var));
377 auto get_mean = [&](Index_ b) -> Stat_* {
return buffers[b].means; };
378 tatami_stats::LocalOutputBuffers<Stat_,
decltype(get_mean)> local_means(thread, nblocks, start, length, std::move(get_mean));
380 std::vector<tatami_stats::variances::RunningSparse<Stat_, Value_, Index_> > runners;
381 runners.reserve(nblocks);
382 for (
decltype(nblocks) b = 0; b < nblocks; ++b) {
383 runners.emplace_back(length, local_means.data(b), local_vars.data(b),
false, start);
387 for (
decltype(NC) c = 0; c < NC; ++c) {
388 auto range = ext->fetch(vbuffer.data(), ibuffer.data());
389 runners[block[c]].add(range.value, range.index, range.number);
392 for (
decltype(NC) c = 0; c < NC; ++c) {
393 auto range = ext->fetch(vbuffer.data(), ibuffer.data());
394 runners[0].add(range.value, range.index, range.number);
398 for (
decltype(nblocks) b = 0; b < nblocks; ++b) {
401 local_vars.transfer();
402 local_means.transfer();
406template<
typename Value_,
typename Index_,
typename Stat_,
typename Block_>
407void compute_variances(
409 const std::vector<ModelGeneVariancesBuffers<Stat_> >& buffers,
411 const std::vector<Index_>& block_size,
416 compute_variances_sparse_row(mat, buffers, block, block_size, num_threads);
418 compute_variances_dense_row(mat, buffers, block, block_size, num_threads);
422 compute_variances_sparse_column(mat, buffers, block, block_size, num_threads);
424 compute_variances_dense_column(mat, buffers, block, block_size, num_threads);
429template<
typename Index_,
typename Stat_,
class Function_>
432 const std::vector<ModelGeneVariancesBuffers<Stat_> >& per_block,
433 const std::vector<Index_>& block_size,
434 const std::vector<Stat_>& block_weights,
437 std::vector<Stat_*>& tmp_pointers,
438 std::vector<Stat_>& tmp_weights,
445 tmp_pointers.clear();
447 for (
decltype(per_block.size()) b = 0, nblocks = per_block.size(); b < nblocks; ++b) {
448 if (block_size[b] < min_size) {
451 tmp_weights.push_back(block_weights[b]);
452 tmp_pointers.push_back(fun(per_block[b]));
486template<
typename Value_,
typename Index_,
typename Block_,
typename Stat_>
493 Index_ NR = mat.
nrow(), NC = mat.
ncol();
494 std::vector<Index_> block_size;
497 block_size = tatami_stats::tabulate_groups(block, NC);
500 block_size.push_back(NC);
503 auto nblocks = block_size.size();
508 for (
decltype(nblocks) b = 0; b < nblocks; ++b) {
509 const auto& current = buffers.
per_block[b];
510 if (block_size[b] >= 2) {
511 fit_variance_trend(NR, current.means, current.variances, current.fitted, current.residuals, work, fopt);
513 std::fill_n(current.fitted, NR, std::numeric_limits<double>::quiet_NaN());
514 std::fill_n(current.residuals, NR, std::numeric_limits<double>::quiet_NaN());
518 auto ave_means = buffers.
average.means;
519 auto ave_variances = buffers.
average.variances;
520 auto ave_fitted = buffers.
average.fitted;
521 auto ave_residuals = buffers.
average.residuals;
523 if (ave_means || ave_variances || ave_fitted || ave_residuals) {
526 std::vector<Stat_*> tmp_pointers;
527 std::vector<Stat_> tmp_weights;
528 tmp_pointers.reserve(nblocks);
529 tmp_weights.reserve(nblocks);
531 internal::compute_average(NR, buffers.
per_block, block_size, block_weight,
532 static_cast<Index_
>(1),
533 [](
const auto& x) -> Stat_* { return x.means; }, tmp_pointers, tmp_weights, ave_means);
535 internal::compute_average(NR, buffers.
per_block, block_size, block_weight,
536 static_cast<Index_
>(2),
537 [](
const auto& x) -> Stat_* { return x.variances; }, tmp_pointers, tmp_weights, ave_variances);
539 internal::compute_average(NR, buffers.
per_block, block_size, block_weight,
540 static_cast<Index_
>(2),
541 [](
const auto& x) -> Stat_* { return x.fitted; }, tmp_pointers, tmp_weights, ave_fitted);
543 internal::compute_average(NR, buffers.
per_block, block_size, block_weight,
544 static_cast<Index_
>(2),
545 [](
const auto& x) -> Stat_* { return x.residuals; }, tmp_pointers, tmp_weights, ave_residuals);
565template<
typename Value_,
typename Index_,
typename Stat_>
571 bbuffers.
per_block.emplace_back(std::move(buffers));
574 bbuffers.
average.variances = NULL;
575 bbuffers.
average.fitted = NULL;
576 bbuffers.
average.residuals = NULL;
594template<
typename Stat_ =
double,
typename Value_,
typename Index_>
625template<
typename Stat_ =
double,
typename Value_,
typename Index_,
typename Block_>
627 auto nblocks = (block ? tatami_stats::total_groups(block, mat.
ncol()) : 1);
632 for (
decltype(nblocks) b = 0; b < nblocks; ++b) {
634 current.means = output.
per_block[b].means.data();
635 current.variances = output.
per_block[b].variances.data();
636 current.fitted = output.
per_block[b].fitted.data();
637 current.residuals = output.
per_block[b].residuals.data();
642 buffers.
average.variances = NULL;
644 buffers.
average.residuals = NULL;
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
Fit a mean-variance trend to log-count data.
void average_vectors_weighted(size_t n, std::vector< Stat_ * > in, const Weight_ *w, Output_ *out, bool skip_nan)
void compute_weights(size_t num_blocks, const Size_ *sizes, WeightPolicy policy, const VariableWeightParameters &variable, Weight_ *weights)
Variance modelling for single-cell expression data.
Definition choose_highly_variable_genes.hpp:16
void fit_variance_trend(std::size_t n, const Float_ *mean, const Float_ *variance, Float_ *fitted, Float_ *residuals, FitVarianceTrendWorkspace< Float_ > &workspace, const FitVarianceTrendOptions &options)
Definition fit_variance_trend.hpp:123
void model_gene_variances_blocked(const tatami::Matrix< Value_, Index_ > &mat, const Block_ *block, const ModelGeneVariancesBlockedBuffers< Stat_ > &buffers, const ModelGeneVariancesOptions &options)
Definition model_gene_variances.hpp:487
void model_gene_variances(const tatami::Matrix< Value_, Index_ > &mat, ModelGeneVariancesBuffers< Stat_ > buffers, const ModelGeneVariancesOptions &options)
Definition model_gene_variances.hpp:566
void parallelize(Function_ fun, Index_ tasks, int threads)
Container_ create_container_of_Index_size(Index_ x, Args_ &&... args)
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, bool row, Index_ iter_start, Index_ iter_length, Args_ &&... args)
Options for fit_variance_trend().
Definition fit_variance_trend.hpp:22
int num_threads
Definition fit_variance_trend.hpp:74
Workspace for fit_variance_trend().
Definition fit_variance_trend.hpp:83
Buffers for model_gene_variances_blocked().
Definition model_gene_variances.hpp:153
ModelGeneVariancesBuffers< Stat_ > average
Definition model_gene_variances.hpp:164
std::vector< ModelGeneVariancesBuffers< Stat_ > > per_block
Definition model_gene_variances.hpp:158
Results of model_gene_variances_blocked().
Definition model_gene_variances.hpp:172
std::vector< ModelGeneVariancesResults< Stat_ > > per_block
Definition model_gene_variances.hpp:192
ModelGeneVariancesResults< Stat_ > average
Definition model_gene_variances.hpp:198
Buffers for model_gene_variances() and friends.
Definition model_gene_variances.hpp:67
Stat_ * means
Definition model_gene_variances.hpp:71
Stat_ * residuals
Definition model_gene_variances.hpp:86
Stat_ * variances
Definition model_gene_variances.hpp:76
Stat_ * fitted
Definition model_gene_variances.hpp:81
Options for model_gene_variances() and friends.
Definition model_gene_variances.hpp:26
FitVarianceTrendOptions fit_variance_trend_options
Definition model_gene_variances.hpp:30
bool compute_average
Definition model_gene_variances.hpp:49
scran_blocks::VariableWeightParameters variable_block_weight_parameters
Definition model_gene_variances.hpp:43
int num_threads
Definition model_gene_variances.hpp:55
scran_blocks::WeightPolicy block_weight_policy
Definition model_gene_variances.hpp:36
Results of model_gene_variances().
Definition model_gene_variances.hpp:94
std::vector< Stat_ > fitted
Definition model_gene_variances.hpp:140
std::vector< Stat_ > means
Definition model_gene_variances.hpp:130
std::vector< Stat_ > residuals
Definition model_gene_variances.hpp:145
std::vector< Stat_ > variances
Definition model_gene_variances.hpp:135
bool sparse_ordered_index