1#ifndef SCRAN_SCORE_MARKERS_HPP
2#define SCRAN_SCORE_MARKERS_HPP
6#include "sanisizer/sanisizer.hpp"
12#include "scan_matrix.hpp"
13#include "cohens_d.hpp"
14#include "simple_diff.hpp"
16#include "average_group_stats.hpp"
17#include "create_combinations.hpp"
128template<
typename Stat_,
typename Rank_>
151 std::vector<SummaryBuffers<Stat_, Rank_> >
cohens_d;
160 std::vector<SummaryBuffers<Stat_, Rank_> >
auc;
186template<
typename Stat_,
typename Rank_>
192 std::vector<std::vector<Stat_> >
mean;
207 std::vector<SummaryResults<Stat_, Rank_> >
cohens_d;
216 std::vector<SummaryResults<Stat_, Rank_> >
auc;
242enum class CacheAction :
unsigned char { SKIP, COMPUTE, CACHE };
245inline std::size_t cap_cache_size(
const std::size_t cache_size,
const std::size_t ngroups) {
248 }
else if (ngroups % 2 == 0) {
249 const auto denom = ngroups / 2;
250 const auto ratio = cache_size / denom;
251 if (ratio <= ngroups - 1) {
254 return denom * (ngroups - 1);
257 const auto denom = (ngroups - 1) / 2;
258 const auto ratio = cache_size / denom;
259 if (ratio <= ngroups) {
262 return denom * ngroups;
288template<
typename Index_,
typename Stat_>
291 EffectsCacher(
const Index_ ngenes,
const std::size_t ngroups,
const std::size_t cache_size) :
294 my_cache_size(cap_cache_size(cache_size, ngroups)),
295 my_actions(sanisizer::cast<decltype(I(my_actions.size()))>(ngroups)),
296 my_common_cache(sanisizer::product<decltype(I(my_common_cache.size()))>(ngenes, my_cache_size)),
297 my_staging_cache(sanisizer::cast<decltype(I(my_staging_cache.size()))>(ngroups))
299 my_unused_pool.reserve(my_cache_size);
300 for (
decltype(I(my_cache_size)) c = 0; c < my_cache_size; ++c) {
301 my_unused_pool.push_back(my_common_cache.data() + sanisizer::product_unsafe<std::size_t>(c, ngenes));
307 std::size_t my_ngroups;
308 std::size_t my_cache_size;
310 std::vector<CacheAction> my_actions;
314 std::vector<Stat_> my_common_cache;
321 std::vector<Stat_*> my_staging_cache;
325 std::vector<Stat_*> my_unused_pool;
329 std::map<std::pair<std::size_t, std::size_t>, Stat_*> my_cached;
337 void fill_effects_from_cache(
const std::size_t group, std::vector<double>& full_effects) {
342 for (
decltype(I(my_ngroups)) other = 0; other < my_ngroups; ++other) {
343 if (other == group) {
344 my_actions[other] = CacheAction::SKIP;
348 if (my_cache_size == 0) {
349 my_actions[other] = CacheAction::COMPUTE;
357 my_actions[other] = CacheAction::CACHE;
365 if (my_cached.empty()) {
366 my_actions[other] = CacheAction::COMPUTE;
370 const auto& front = my_cached.begin()->first;
371 if (front.first > group || front.second > other) {
375 my_actions[other] = CacheAction::COMPUTE;
382 my_actions[other] = CacheAction::SKIP;
383 const auto curcache = my_cached.begin()->second;
384 for (
decltype(I(my_ngenes)) i = 0; i < my_ngenes; ++i) {
385 full_effects[sanisizer::nd_offset<std::size_t>(other, my_ngroups, i)] = curcache[i];
388 my_unused_pool.push_back(curcache);
389 my_cached.erase(my_cached.begin());
395 for (
decltype(I(my_ngroups)) other = 0; other < my_ngroups; ++other) {
396 if (my_actions[other] != CacheAction::CACHE) {
400 const std::pair<std::size_t, std::size_t> key(other, group);
401 if (my_cached.size() < my_cache_size) {
402 const auto ptr = my_unused_pool.back();
403 my_cached[key] = ptr;
404 my_staging_cache[other] = ptr;
405 my_unused_pool.pop_back();
412 auto it = my_cached.end();
414 if ((it->first).first > other) {
415 const auto ptr = it->second;
416 my_cached[key] = ptr;
417 my_staging_cache[other] = ptr;
423 my_actions[other] = CacheAction::COMPUTE;
429 CacheAction get_action(std::size_t other)
const {
430 return my_actions[other];
433 Stat_* get_cache_location(std::size_t other)
const {
434 return my_staging_cache[other];
438template<
typename Index_,
typename Stat_,
typename Rank_>
439void process_simple_summary_effects(
441 const std::size_t ngroups,
442 const std::size_t nblocks,
443 const std::size_t ncombos,
444 const std::vector<Stat_>& combo_means,
445 const std::vector<Stat_>& combo_vars,
446 const std::vector<Stat_>& combo_detected,
447 const ScoreMarkersSummaryBuffers<Stat_, Rank_>& output,
448 const std::vector<Stat_>& combo_weights,
449 const double threshold,
450 const std::size_t cache_size,
451 const int num_threads)
455 std::vector<Stat_> total_weights_per_group;
456 auto total_weights_ptr = combo_weights.data();
458 total_weights_per_group = compute_total_weight_per_group(ngroups, nblocks, combo_weights.data());
459 total_weights_ptr = total_weights_per_group.data();
463 for (Index_ gene = start, end = start + length; gene < end; ++gene) {
464 const auto in_offset = sanisizer::product_unsafe<std::size_t>(gene, ncombos);
465 const auto tmp_means = combo_means.data() + in_offset;
466 const auto tmp_detected = combo_detected.data() + in_offset;
467 average_group_stats(gene, ngroups, nblocks, tmp_means, tmp_detected, combo_weights.data(), total_weights_ptr, output.mean, output.detected);
469 }, ngenes, num_threads);
472 PrecomputedPairwiseWeights<Stat_> preweights(ngroups, nblocks, combo_weights.data());
473 EffectsCacher<Index_, Stat_> cache(ngenes, ngroups, cache_size);
474 std::vector<Stat_> full_effects(sanisizer::product<
typename std::vector<Stat_>::size_type>(ngroups, ngenes));
475 auto effect_buffers = sanisizer::create<std::vector<std::vector<Stat_> > >(num_threads);
476 for (
auto& ef : effect_buffers) {
477 sanisizer::resize(ef, ngroups);
480 if (output.cohens_d.size()) {
482 for (
decltype(I(ngroups)) group = 0; group < ngroups; ++group) {
483 cache.fill_effects_from_cache(group, full_effects);
486 auto& effect_buffer = effect_buffers[t];
488 for (Index_ gene = start, end = start + length; gene < end; ++gene) {
489 const auto in_offset = sanisizer::product_unsafe<std::size_t>(gene, ncombos);
490 const auto my_means = combo_means.data() + in_offset;
491 const auto my_variances = combo_vars.data() + in_offset;
492 const auto store_ptr = full_effects.data() + sanisizer::product_unsafe<std::size_t>(gene, ngroups);
494 for (
decltype(I(ngroups)) other = 0; other < ngroups; ++other) {
495 auto cache_action = cache.get_action(other);
496 if (cache_action == CacheAction::COMPUTE) {
497 store_ptr[other] = compute_pairwise_cohens_d_one_sided(group, other, my_means, my_variances, ngroups, nblocks, preweights, threshold);
498 }
else if (cache_action == CacheAction::CACHE) {
499 auto tmp = compute_pairwise_cohens_d_two_sided(group, other, my_means, my_variances, ngroups, nblocks, preweights, threshold);
500 store_ptr[other] = tmp.first;
501 cache.get_cache_location(other)[gene] = tmp.second;
504 summarize_comparisons(ngroups, store_ptr, group, gene, output.cohens_d[group], effect_buffer);
506 }, ngenes, num_threads);
508 const auto mr = output.cohens_d[group].min_rank;
510 compute_min_rank_for_group(ngenes, ngroups, group, full_effects.data(), mr, num_threads);
515 if (output.delta_mean.size()) {
517 for (
decltype(I(ngroups)) group = 0; group < ngroups; ++group) {
518 cache.fill_effects_from_cache(group, full_effects);
521 auto& effect_buffer = effect_buffers[t];
523 for (Index_ gene = start, end = start + length; gene < end; ++gene) {
524 const auto my_means = combo_means.data() + sanisizer::product_unsafe<std::size_t>(gene, ncombos);
525 const auto store_ptr = full_effects.data() + sanisizer::product_unsafe<std::size_t>(gene, ngroups);
527 for (
decltype(I(ngroups)) other = 0; other < ngroups; ++other) {
528 const auto cache_action = cache.get_action(other);
529 if (cache_action != CacheAction::SKIP) {
530 const auto val = compute_pairwise_simple_diff(group, other, my_means, ngroups, nblocks, preweights);
531 store_ptr[other] = val;
532 if (cache_action == CacheAction::CACHE) {
533 cache.get_cache_location(other)[gene] = -val;
537 summarize_comparisons(ngroups, store_ptr, group, gene, output.delta_mean[group], effect_buffer);
539 }, ngenes, num_threads);
541 const auto mr = output.delta_mean[group].min_rank;
543 compute_min_rank_for_group(ngenes, ngroups, group, full_effects.data(), mr, num_threads);
548 if (output.delta_detected.size()) {
550 for (
decltype(I(ngroups)) group = 0; group < ngroups; ++group) {
551 cache.fill_effects_from_cache(group, full_effects);
554 auto& effect_buffer = effect_buffers[t];
556 for (Index_ gene = start, end = start + length; gene < end; ++gene) {
557 const auto my_detected = combo_detected.data() + sanisizer::product_unsafe<std::size_t>(gene, ncombos);
558 const auto store_ptr = full_effects.data() + sanisizer::product_unsafe<std::size_t>(gene, ngroups);
560 for (
decltype(I(ngroups)) other = 0; other < ngroups; ++other) {
561 const auto cache_action = cache.get_action(other);
562 if (cache_action != CacheAction::SKIP) {
563 const auto val = compute_pairwise_simple_diff(group, other, my_detected, ngroups, nblocks, preweights);
564 store_ptr[other] = val;
565 if (cache_action == CacheAction::CACHE) {
566 cache.get_cache_location(other)[gene] = -val;
570 summarize_comparisons(ngroups, store_ptr, group, gene, output.delta_detected[group], effect_buffer);
572 }, ngenes, num_threads);
574 const auto mr = output.delta_detected[group].min_rank;
576 compute_min_rank_for_group(ngenes, ngroups, group, full_effects.data(), mr, num_threads);
582template<
typename Index_,
typename Stat_,
typename Rank_>
583ScoreMarkersSummaryBuffers<Stat_, Rank_> fill_summary_results(
585 const std::size_t ngroups,
586 ScoreMarkersSummaryResults<Stat_, Rank_>& store,
587 const ScoreMarkersSummaryOptions& options)
589 ScoreMarkersSummaryBuffers<Stat_, Rank_> output;
591 fill_average_results(ngenes, ngroups, store.mean, store.detected, output.mean, output.detected);
593 if (options.compute_cohens_d) {
594 output.cohens_d = fill_summary_results(
599 options.compute_mean,
600 options.compute_median,
602 options.compute_min_rank
606 if (options.compute_auc) {
607 output.auc = fill_summary_results(
612 options.compute_mean,
613 options.compute_median,
615 options.compute_min_rank
619 if (options.compute_delta_mean) {
620 output.delta_mean = fill_summary_results(
625 options.compute_mean,
626 options.compute_median,
628 options.compute_min_rank
632 if (options.compute_delta_detected) {
633 output.delta_detected = fill_summary_results(
636 store.delta_detected,
638 options.compute_mean,
639 options.compute_median,
641 options.compute_min_rank
648template<
typename Stat_,
typename Rank_>
649bool auc_needs_minrank(
const std::vector<SummaryBuffers<Stat_, Rank_> >& auc_summaries) {
650 for (
const auto& outs : auc_summaries) {
669 const std::size_t ngroups,
670 const Group_*
const group,
671 const std::size_t nblocks,
672 const Block_*
const block,
673 const std::size_t ncombos,
674 const std::size_t*
const combo,
675 const std::vector<Index_>& combo_sizes,
676 const ScoreMarkersSummaryOptions& options,
677 const ScoreMarkersSummaryBuffers<Stat_, Rank_>& output
679 const auto ngenes = matrix.
nrow();
680 const auto payload_size = sanisizer::product<typename std::vector<Stat_>::size_type>(ngenes, ncombos);
681 std::vector<Stat_> combo_means(payload_size), combo_vars(payload_size), combo_detected(payload_size);
687 options.block_weight_policy,
688 options.variable_block_weight_parameters
691 const bool do_auc = !output.auc.empty();
694 if (do_auc && !auc_needs_minrank(output.auc)) {
697 struct AucResultWorkspace {
698 AucResultWorkspace() =
default;
699 AucResultWorkspace(
const std::size_t ngroups) :
700 pairwise_buffer(sanisizer::product<typename std::vector<Stat_>::size_type>(ngroups, ngroups)),
701 summary_buffer(sanisizer::cast<typename std::vector<Stat_>::size_type>(ngroups))
703 std::vector<Stat_> pairwise_buffer;
704 std::vector<Stat_> summary_buffer;
707 scan_matrix_by_row_custom_auc<single_block_>(
719 [&](
int) -> AucResultWorkspace {
720 return AucResultWorkspace(ngroups);
724 AucScanWorkspace<Value_, Group_, Index_, Stat_>& auc_work,
725 AucResultWorkspace& res_work
727 process_auc_for_rows(auc_work, ngroups, nblocks, options.threshold, res_work.pairwise_buffer.data());
728 for (
decltype(I(ngroups)) l = 0; l < ngroups; ++l) {
729 const auto current_effects = res_work.pairwise_buffer.data() + sanisizer::product_unsafe<std::size_t>(l, ngroups);
730 summarize_comparisons(ngroups, current_effects, l, gene, output.auc[l], res_work.summary_buffer);
739 std::vector<Stat_> tmp_auc;
740 Stat_* auc_ptr = NULL;
742 tmp_auc.resize(sanisizer::product<
decltype(I(tmp_auc.size()))>(ngroups, ngroups, ngenes));
743 auc_ptr = tmp_auc.data();
746 scan_matrix_by_row_full_auc<single_block_>(
765 summarize_comparisons(ngenes, ngroups, auc_ptr, output.auc, options.num_threads);
766 compute_min_rank_pairwise(ngenes, ngroups, auc_ptr, output.auc, options.num_threads);
771 scan_matrix_by_column(
774 if constexpr(single_block_) {
781 if constexpr(single_block_) {
795 process_simple_summary_effects(
850template<
typename Value_,
typename Index_,
typename Group_,
typename Stat_,
typename Rank_>
853 const Group_*
const group,
857 const auto NC = matrix.
ncol();
858 const auto group_sizes = tatami_stats::tabulate_groups(group, NC);
859 const auto ngroups = sanisizer::cast<std::size_t>(group_sizes.size());
861 internal::score_markers_summary<true>(
866 static_cast<int*
>(NULL),
868 static_cast<std::size_t*
>(NULL),
903template<
typename Value_,
typename Index_,
typename Group_,
typename Block_,
typename Stat_,
typename Rank_>
906 const Group_*
const group,
907 const Block_*
const block,
911 const auto NC = matrix.
ncol();
912 const auto ngroups = output.
mean.size();
913 const auto nblocks = tatami_stats::total_groups(block, NC);
915 const auto combinations = internal::create_combinations(ngroups, group, block, NC);
916 const auto combo_sizes = internal::tabulate_combinations<Index_>(ngroups, nblocks, combinations);
917 const auto ncombos = combo_sizes.size();
919 internal::score_markers_summary<false>(
921 sanisizer::cast<std::size_t>(ngroups),
923 sanisizer::cast<std::size_t>(nblocks),
925 sanisizer::cast<std::size_t>(ncombos),
951template<
typename Stat_ =
double,
typename Rank_ =
int,
typename Value_,
typename Index_,
typename Group_>
954 const Group_*
const group,
957 const auto ngroups = tatami_stats::total_groups(group, matrix.
ncol());
959 const auto buffers = internal::fill_summary_results(matrix.
nrow(), ngroups, output, options);
984template<
typename Stat_ =
double,
typename Rank_ =
int,
typename Value_,
typename Index_,
typename Group_,
typename Block_>
987 const Group_*
const group,
988 const Block_*
const block,
991 const auto ngroups = tatami_stats::total_groups(group, matrix.
ncol());
993 const auto buffers = internal::fill_summary_results(matrix.
nrow(), ngroups, output, options);
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
void compute_weights(const std::size_t num_blocks, const Size_ *const sizes, const WeightPolicy policy, const VariableWeightParameters &variable, Weight_ *const weights)
Marker detection for single-cell data.
Definition score_markers_pairwise.hpp:25
void score_markers_summary(const tatami::Matrix< Value_, Index_ > &matrix, const Group_ *const group, const ScoreMarkersSummaryOptions &options, const ScoreMarkersSummaryBuffers< Stat_, Rank_ > &output)
Definition score_markers_summary.hpp:851
void score_markers_summary_blocked(const tatami::Matrix< Value_, Index_ > &matrix, const Group_ *const group, const Block_ *const block, const ScoreMarkersSummaryOptions &options, const ScoreMarkersSummaryBuffers< Stat_, Rank_ > &output)
Definition score_markers_summary.hpp:904
void parallelize(Function_ fun, const Index_ tasks, const int threads)
Utilities for effect summarization.