1#ifndef SCRAN_SCORE_MARKERS_HPP
2#define SCRAN_SCORE_MARKERS_HPP
4#include "scan_matrix.hpp"
6#include "simple_diff.hpp"
8#include "average_group_stats.hpp"
9#include "create_combinations.hpp"
13#include "sanisizer/sanisizer.hpp"
119template<
typename Stat_,
typename Rank_>
142 std::vector<SummaryBuffers<Stat_, Rank_> >
cohens_d;
151 std::vector<SummaryBuffers<Stat_, Rank_> >
auc;
177template<
typename Stat_,
typename Rank_>
183 std::vector<std::vector<Stat_> >
mean;
198 std::vector<SummaryResults<Stat_, Rank_> >
cohens_d;
207 std::vector<SummaryResults<Stat_, Rank_> >
auc;
233enum class CacheAction :
unsigned char { SKIP, COMPUTE, CACHE };
236inline std::size_t cap_cache_size(std::size_t cache_size, std::size_t ngroups) {
239 }
else if (ngroups % 2 == 0) {
240 auto denom = ngroups / 2;
241 auto ratio = cache_size / denom;
242 if (ratio <= ngroups - 1) {
245 return denom * (ngroups - 1);
248 auto denom = (ngroups - 1) / 2;
249 auto ratio = cache_size / denom;
250 if (ratio <= ngroups) {
253 return denom * ngroups;
279template<
typename Index_,
typename Stat_>
282 EffectsCacher(Index_ ngenes, std::size_t ngroups, std::size_t cache_size) :
285 my_cache_size(cap_cache_size(cache_size, ngroups)),
286 my_actions(sanisizer::cast<decltype(my_actions.size())>(ngroups)),
287 my_common_cache(sanisizer::product<decltype(my_common_cache.size())>(ngenes, my_cache_size)),
288 my_staging_cache(sanisizer::cast<decltype(my_staging_cache.size())>(ngroups))
290 my_unused_pool.reserve(my_cache_size);
291 auto ptr = my_common_cache.data();
292 for (
decltype(my_cache_size) c = 0; c < my_cache_size; ++c, ptr += ngenes) {
293 my_unused_pool.push_back(ptr);
299 std::size_t my_ngroups;
300 std::size_t my_cache_size;
302 std::vector<CacheAction> my_actions;
306 std::vector<Stat_> my_common_cache;
313 std::vector<Stat_*> my_staging_cache;
317 std::vector<Stat_*> my_unused_pool;
321 std::map<std::pair<std::size_t, std::size_t>, Stat_*> my_cached;
329 void fill_effects_from_cache(std::size_t group, std::vector<double>& full_effects) {
334 for (
decltype(my_ngroups) other = 0; other < my_ngroups; ++other) {
335 if (other == group) {
336 my_actions[other] = CacheAction::SKIP;
340 if (my_cache_size == 0) {
341 my_actions[other] = CacheAction::COMPUTE;
349 my_actions[other] = CacheAction::CACHE;
357 if (my_cached.empty()) {
358 my_actions[other] = CacheAction::COMPUTE;
362 const auto& front = my_cached.begin()->first;
363 if (front.first > group || front.second > other) {
367 my_actions[other] = CacheAction::COMPUTE;
374 my_actions[other] = CacheAction::SKIP;
375 auto curcache = my_cached.begin()->second;
376 for (
decltype(my_ngenes) i = 0; i < my_ngenes; ++i) {
377 full_effects[sanisizer::nd_offset<std::size_t>(other, my_ngroups, i)] = curcache[i];
380 my_unused_pool.push_back(curcache);
381 my_cached.erase(my_cached.begin());
387 for (
decltype(my_ngroups) other = 0; other < my_ngroups; ++other) {
388 if (my_actions[other] != CacheAction::CACHE) {
392 std::pair<std::size_t, std::size_t> key(other, group);
393 if (my_cached.size() < my_cache_size) {
394 auto ptr = my_unused_pool.back();
395 my_cached[key] = ptr;
396 my_staging_cache[other] = ptr;
397 my_unused_pool.pop_back();
404 auto it = my_cached.end();
406 if ((it->first).first > other) {
407 auto ptr = it->second;
408 my_cached[key] = ptr;
409 my_staging_cache[other] = ptr;
415 my_actions[other] = CacheAction::COMPUTE;
421 CacheAction get_action(std::size_t other)
const {
422 return my_actions[other];
425 Stat_* get_cache_location(std::size_t other)
const {
426 return my_staging_cache[other];
430template<
typename Index_,
typename Stat_,
typename Rank_>
431void process_simple_summary_effects(
436 const std::vector<Stat_>& combo_means,
437 const std::vector<Stat_>& combo_vars,
438 const std::vector<Stat_>& combo_detected,
439 const ScoreMarkersSummaryBuffers<Stat_, Rank_>& output,
440 const std::vector<Stat_>& combo_weights,
442 std::size_t cache_size,
447 std::vector<Stat_> total_weights_per_group;
448 const Stat_* total_weights_ptr = combo_weights.data();
450 total_weights_per_group = compute_total_weight_per_group(ngroups, nblocks, combo_weights.data());
451 total_weights_ptr = total_weights_per_group.data();
455 for (Index_ gene = start, end = start + length; gene < end; ++gene) {
456 auto in_offset = sanisizer::product_unsafe<std::size_t>(gene, ncombos);
457 const auto* tmp_means = combo_means.data() + in_offset;
458 const auto* tmp_detected = combo_detected.data() + in_offset;
459 average_group_stats(gene, ngroups, nblocks, tmp_means, tmp_detected, combo_weights.data(), total_weights_ptr, output.mean, output.detected);
461 }, ngenes, num_threads);
464 PrecomputedPairwiseWeights<Stat_> preweights(ngroups, nblocks, combo_weights.data());
465 EffectsCacher<Index_, Stat_> cache(ngenes, ngroups, cache_size);
466 std::vector<Stat_> full_effects(sanisizer::product<
typename std::vector<Stat_>::size_type>(ngroups, ngenes));
467 auto effect_buffers = sanisizer::create<std::vector<std::vector<Stat_> > >(num_threads);
468 for (
auto& ef : effect_buffers) {
472 if (output.cohens_d.size()) {
474 for (
decltype(ngroups) group = 0; group < ngroups; ++group) {
475 cache.fill_effects_from_cache(group, full_effects);
478 auto& effect_buffer = effect_buffers[t];
480 for (Index_ gene = start, end = start + length; gene < end; ++gene) {
481 auto in_offset = sanisizer::product_unsafe<std::size_t>(gene, ncombos);
482 auto my_means = combo_means.data() + in_offset;
483 auto my_variances = combo_vars.data() + in_offset;
484 auto store_ptr = full_effects.data() + sanisizer::product_unsafe<std::size_t>(gene, ngroups);
486 for (
decltype(ngroups) other = 0; other < ngroups; ++other) {
487 auto cache_action = cache.get_action(other);
488 if (cache_action == internal::CacheAction::COMPUTE) {
489 store_ptr[other] = compute_pairwise_cohens_d_one_sided(group, other, my_means, my_variances, ngroups, nblocks, preweights, threshold);
490 }
else if (cache_action == internal::CacheAction::CACHE) {
491 auto tmp = compute_pairwise_cohens_d_two_sided(group, other, my_means, my_variances, ngroups, nblocks, preweights, threshold);
492 store_ptr[other] = tmp.first;
493 cache.get_cache_location(other)[gene] = tmp.second;
496 summarize_comparisons(ngroups, store_ptr, group, gene, output.cohens_d[group], effect_buffer);
498 }, ngenes, num_threads);
500 auto mr = output.cohens_d[group].min_rank;
502 compute_min_rank_for_group(ngenes, ngroups, group, full_effects.data(), mr, num_threads);
507 if (output.delta_mean.size()) {
509 for (
decltype(ngroups) group = 0; group < ngroups; ++group) {
510 cache.fill_effects_from_cache(group, full_effects);
513 auto& effect_buffer = effect_buffers[t];
515 for (Index_ gene = start, end = start + length; gene < end; ++gene) {
516 auto my_means = combo_means.data() + sanisizer::product_unsafe<std::size_t>(gene, ncombos);
517 auto store_ptr = full_effects.data() + sanisizer::product_unsafe<std::size_t>(gene, ngroups);
519 for (
decltype(ngroups) other = 0; other < ngroups; ++other) {
520 auto cache_action = cache.get_action(other);
521 if (cache_action != internal::CacheAction::SKIP) {
522 auto val = compute_pairwise_simple_diff(group, other, my_means, ngroups, nblocks, preweights);
523 store_ptr[other] = val;
524 if (cache_action == CacheAction::CACHE) {
525 cache.get_cache_location(other)[gene] = -val;
529 summarize_comparisons(ngroups, store_ptr, group, gene, output.delta_mean[group], effect_buffer);
531 }, ngenes, num_threads);
533 auto mr = output.delta_mean[group].min_rank;
535 compute_min_rank_for_group(ngenes, ngroups, group, full_effects.data(), mr, num_threads);
540 if (output.delta_detected.size()) {
542 for (
decltype(ngroups) group = 0; group < ngroups; ++group) {
543 cache.fill_effects_from_cache(group, full_effects);
546 auto& effect_buffer = effect_buffers[t];
548 for (Index_ gene = start, end = start + length; gene < end; ++gene) {
549 auto my_detected = combo_detected.data() + sanisizer::product_unsafe<std::size_t>(gene, ncombos);
550 auto store_ptr = full_effects.data() + sanisizer::product_unsafe<std::size_t>(gene, ngroups);
552 for (
decltype(ngroups) other = 0; other < ngroups; ++other) {
553 auto cache_action = cache.get_action(other);
554 if (cache_action != CacheAction::SKIP) {
555 auto val = compute_pairwise_simple_diff(group, other, my_detected, ngroups, nblocks, preweights);
556 store_ptr[other] = val;
557 if (cache_action == CacheAction::CACHE) {
558 cache.get_cache_location(other)[gene] = -val;
562 summarize_comparisons(ngroups, store_ptr, group, gene, output.delta_detected[group], effect_buffer);
564 }, ngenes, num_threads);
566 auto mr = output.delta_detected[group].min_rank;
568 compute_min_rank_for_group(ngenes, ngroups, group, full_effects.data(), mr, num_threads);
574template<
typename Index_,
typename Stat_,
typename Rank_>
575ScoreMarkersSummaryBuffers<Stat_, Rank_> fill_summary_results(Index_ ngenes, std::size_t ngroups, ScoreMarkersSummaryResults<Stat_, Rank_>& store,
const ScoreMarkersSummaryOptions& options) {
576 ScoreMarkersSummaryBuffers<Stat_, Rank_> output;
578 internal::fill_average_results(ngenes, ngroups, store.mean, store.detected, output.mean, output.detected);
580 if (options.compute_cohens_d) {
581 output.cohens_d = internal::fill_summary_results(
586 options.compute_mean,
587 options.compute_median,
589 options.compute_min_rank
593 if (options.compute_auc) {
594 output.auc = internal::fill_summary_results(
599 options.compute_mean,
600 options.compute_median,
602 options.compute_min_rank
606 if (options.compute_delta_mean) {
607 output.delta_mean = internal::fill_summary_results(
612 options.compute_mean,
613 options.compute_median,
615 options.compute_min_rank
619 if (options.compute_delta_detected) {
620 output.delta_detected = internal::fill_summary_results(
623 store.delta_detected,
625 options.compute_mean,
626 options.compute_median,
628 options.compute_min_rank
672template<
typename Value_,
typename Index_,
typename Group_,
typename Stat_,
typename Rank_>
679 auto NC = matrix.
ncol();
680 auto group_sizes = tatami_stats::tabulate_groups(group, NC);
681 auto ngenes = matrix.
nrow();
682 auto ngroups = group_sizes.size();
688 auto payload_size = sanisizer::product<typename std::vector<Stat_>::size_type>(ngenes, ngroups);
689 std::vector<Stat_> group_means(payload_size), group_vars(payload_size), group_detected(payload_size);
691 bool do_auc = !output.
auc.empty();
692 std::vector<Stat_> tmp_auc;
693 Stat_* auc_ptr = NULL;
695 tmp_auc.resize(sanisizer::product<
decltype(tmp_auc.size())>(ngroups, ngroups, ngenes));
696 auc_ptr = tmp_auc.data();
700 internal::scan_matrix_by_row<true>(
705 static_cast<int*
>(NULL),
719 internal::scan_matrix_by_column(
731 internal::process_simple_summary_effects(
747 internal::summarize_comparisons(ngenes, ngroups, auc_ptr, output.
auc, options.
num_threads);
748 internal::compute_min_rank_pairwise(ngenes, ngroups, auc_ptr, output.
auc, options.
num_threads);
779template<
typename Value_,
typename Index_,
typename Group_,
typename Block_,
typename Stat_,
typename Rank_>
787 auto NC = matrix.
ncol();
788 auto ngenes = matrix.
nrow();
789 auto ngroups = output.
mean.size();
790 auto nblocks = tatami_stats::total_groups(block, NC);
792 auto combinations = internal::create_combinations(ngroups, group, block, NC);
793 auto combo_sizes = internal::tabulate_combinations<Index_>(ngroups, nblocks, combinations);
794 auto ncombos = combo_sizes.size();
797 auto payload_size = sanisizer::product<typename std::vector<Stat_>::size_type>(ngenes, ncombos);
798 std::vector<Stat_> combo_means(payload_size), combo_vars(payload_size), combo_detected(payload_size);
800 bool do_auc = !output.
auc.empty();
801 std::vector<Stat_> tmp_auc;
802 Stat_* auc_ptr = NULL;
804 tmp_auc.resize(sanisizer::product<
decltype(tmp_auc.size())>(ngroups, ngroups, ngenes));
805 auc_ptr = tmp_auc.data();
809 internal::scan_matrix_by_row<false>(
828 internal::scan_matrix_by_column(
840 internal::process_simple_summary_effects(
856 internal::summarize_comparisons(ngenes, ngroups, auc_ptr, output.
auc, options.
num_threads);
857 internal::compute_min_rank_pairwise(ngenes, ngroups, auc_ptr, output.
auc, options.
num_threads);
877template<
typename Stat_ =
double,
typename Rank_ =
int,
typename Value_,
typename Index_,
typename Group_>
883 auto ngroups = tatami_stats::total_groups(group, matrix.
ncol());
885 auto buffers = internal::fill_summary_results(matrix.
nrow(), ngroups, output, options);
909template<
typename Stat_ =
double,
typename Rank_ =
int,
typename Value_,
typename Index_,
typename Group_,
typename Block_>
916 auto ngroups = tatami_stats::total_groups(group, matrix.
ncol());
918 auto buffers = internal::fill_summary_results(matrix.
nrow(), ngroups, output, options);
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
void compute_weights(std::size_t num_blocks, const Size_ *sizes, WeightPolicy policy, const VariableWeightParameters &variable, Weight_ *weights)
Marker detection for single-cell data.
Definition score_markers_pairwise.hpp:25
void score_markers_summary(const tatami::Matrix< Value_, Index_ > &matrix, const Group_ *group, const ScoreMarkersSummaryOptions &options, const ScoreMarkersSummaryBuffers< Stat_, Rank_ > &output)
Definition score_markers_summary.hpp:673
void score_markers_summary_blocked(const tatami::Matrix< Value_, Index_ > &matrix, const Group_ *group, const Block_ *block, const ScoreMarkersSummaryOptions &options, const ScoreMarkersSummaryBuffers< Stat_, Rank_ > &output)
Definition score_markers_summary.hpp:780
void parallelize(Function_ fun, Index_ tasks, int threads)
Utilities for effect summarization.