1#ifndef SCRAN_SCORE_MARKERS_HPP
2#define SCRAN_SCORE_MARKERS_HPP
6#include "sanisizer/sanisizer.hpp"
12#include "scan_matrix.hpp"
13#include "cohens_d.hpp"
14#include "simple_diff.hpp"
16#include "average_group_stats.hpp"
17#include "create_combinations.hpp"
128template<
typename Stat_,
typename Rank_>
151 std::vector<SummaryBuffers<Stat_, Rank_> >
cohens_d;
160 std::vector<SummaryBuffers<Stat_, Rank_> >
auc;
186template<
typename Stat_,
typename Rank_>
192 std::vector<std::vector<Stat_> >
mean;
207 std::vector<SummaryResults<Stat_, Rank_> >
cohens_d;
216 std::vector<SummaryResults<Stat_, Rank_> >
auc;
242enum class CacheAction :
unsigned char { SKIP, COMPUTE, CACHE };
245inline std::size_t cap_cache_size(
const std::size_t cache_size,
const std::size_t ngroups) {
248 }
else if (ngroups % 2 == 0) {
249 const auto denom = ngroups / 2;
250 const auto ratio = cache_size / denom;
251 if (ratio <= ngroups - 1) {
254 return denom * (ngroups - 1);
257 const auto denom = (ngroups - 1) / 2;
258 const auto ratio = cache_size / denom;
259 if (ratio <= ngroups) {
262 return denom * ngroups;
288template<
typename Index_,
typename Stat_>
291 EffectsCacher(
const Index_ ngenes,
const std::size_t ngroups,
const std::size_t cache_size) :
294 my_cache_size(cap_cache_size(cache_size, ngroups)),
295 my_actions(sanisizer::cast<decltype(I(my_actions.size()))>(ngroups)),
296 my_common_cache(sanisizer::product<decltype(I(my_common_cache.size()))>(ngenes, my_cache_size)),
297 my_staging_cache(sanisizer::cast<decltype(I(my_staging_cache.size()))>(ngroups))
299 my_unused_pool.reserve(my_cache_size);
300 for (
decltype(I(my_cache_size)) c = 0; c < my_cache_size; ++c) {
301 my_unused_pool.push_back(my_common_cache.data() + sanisizer::product_unsafe<std::size_t>(c, ngenes));
307 std::size_t my_ngroups;
308 std::size_t my_cache_size;
310 std::vector<CacheAction> my_actions;
314 std::vector<Stat_> my_common_cache;
321 std::vector<Stat_*> my_staging_cache;
325 std::vector<Stat_*> my_unused_pool;
329 std::map<std::pair<std::size_t, std::size_t>, Stat_*> my_cached;
337 void fill_effects_from_cache(
const std::size_t group, std::vector<double>& full_effects) {
342 for (
decltype(I(my_ngroups)) other = 0; other < my_ngroups; ++other) {
343 if (other == group) {
344 my_actions[other] = CacheAction::SKIP;
348 if (my_cache_size == 0) {
349 my_actions[other] = CacheAction::COMPUTE;
357 my_actions[other] = CacheAction::CACHE;
365 if (my_cached.empty()) {
366 my_actions[other] = CacheAction::COMPUTE;
370 const auto& front = my_cached.begin()->first;
371 if (front.first > group || front.second > other) {
375 my_actions[other] = CacheAction::COMPUTE;
382 my_actions[other] = CacheAction::SKIP;
383 const auto curcache = my_cached.begin()->second;
384 for (
decltype(I(my_ngenes)) i = 0; i < my_ngenes; ++i) {
385 full_effects[sanisizer::nd_offset<std::size_t>(other, my_ngroups, i)] = curcache[i];
388 my_unused_pool.push_back(curcache);
389 my_cached.erase(my_cached.begin());
395 for (
decltype(I(my_ngroups)) other = 0; other < my_ngroups; ++other) {
396 if (my_actions[other] != CacheAction::CACHE) {
400 const std::pair<std::size_t, std::size_t> key(other, group);
401 if (my_cached.size() < my_cache_size) {
402 const auto ptr = my_unused_pool.back();
403 my_cached[key] = ptr;
404 my_staging_cache[other] = ptr;
405 my_unused_pool.pop_back();
412 auto it = my_cached.end();
414 if ((it->first).first > other) {
415 const auto ptr = it->second;
416 my_cached[key] = ptr;
417 my_staging_cache[other] = ptr;
423 my_actions[other] = CacheAction::COMPUTE;
429 CacheAction get_action(std::size_t other)
const {
430 return my_actions[other];
433 Stat_* get_cache_location(std::size_t other)
const {
434 return my_staging_cache[other];
438template<
typename Index_,
typename Stat_,
typename Rank_>
439void process_simple_summary_effects(
441 const std::size_t ngroups,
442 const std::size_t nblocks,
443 const std::size_t ncombos,
444 const std::vector<Stat_>& combo_means,
445 const std::vector<Stat_>& combo_vars,
446 const std::vector<Stat_>& combo_detected,
447 const ScoreMarkersSummaryBuffers<Stat_, Rank_>& output,
448 const std::vector<Stat_>& combo_weights,
449 const double threshold,
450 const std::size_t cache_size,
451 const int num_threads)
455 std::vector<Stat_> total_weights_per_group;
456 auto total_weights_ptr = combo_weights.data();
458 total_weights_per_group = compute_total_weight_per_group(ngroups, nblocks, combo_weights.data());
459 total_weights_ptr = total_weights_per_group.data();
463 for (Index_ gene = start, end = start + length; gene < end; ++gene) {
464 const auto in_offset = sanisizer::product_unsafe<std::size_t>(gene, ncombos);
465 const auto tmp_means = combo_means.data() + in_offset;
466 const auto tmp_detected = combo_detected.data() + in_offset;
467 average_group_stats(gene, ngroups, nblocks, tmp_means, tmp_detected, combo_weights.data(), total_weights_ptr, output.mean, output.detected);
469 }, ngenes, num_threads);
472 PrecomputedPairwiseWeights<Stat_> preweights(ngroups, nblocks, combo_weights.data());
473 EffectsCacher<Index_, Stat_> cache(ngenes, ngroups, cache_size);
474 std::vector<Stat_> full_effects(sanisizer::product<
typename std::vector<Stat_>::size_type>(ngroups, ngenes));
475 auto effect_buffers = sanisizer::create<std::vector<std::vector<Stat_> > >(num_threads);
476 for (
auto& ef : effect_buffers) {
477 sanisizer::resize(ef, ngroups);
480 if (output.cohens_d.size()) {
482 for (
decltype(I(ngroups)) group = 0; group < ngroups; ++group) {
483 cache.fill_effects_from_cache(group, full_effects);
486 auto& effect_buffer = effect_buffers[t];
488 for (Index_ gene = start, end = start + length; gene < end; ++gene) {
489 const auto in_offset = sanisizer::product_unsafe<std::size_t>(gene, ncombos);
490 const auto my_means = combo_means.data() + in_offset;
491 const auto my_variances = combo_vars.data() + in_offset;
492 const auto store_ptr = full_effects.data() + sanisizer::product_unsafe<std::size_t>(gene, ngroups);
494 for (
decltype(I(ngroups)) other = 0; other < ngroups; ++other) {
495 auto cache_action = cache.get_action(other);
496 if (cache_action == internal::CacheAction::COMPUTE) {
497 store_ptr[other] = compute_pairwise_cohens_d_one_sided(group, other, my_means, my_variances, ngroups, nblocks, preweights, threshold);
498 }
else if (cache_action == internal::CacheAction::CACHE) {
499 auto tmp = compute_pairwise_cohens_d_two_sided(group, other, my_means, my_variances, ngroups, nblocks, preweights, threshold);
500 store_ptr[other] = tmp.first;
501 cache.get_cache_location(other)[gene] = tmp.second;
504 summarize_comparisons(ngroups, store_ptr, group, gene, output.cohens_d[group], effect_buffer);
506 }, ngenes, num_threads);
508 const auto mr = output.cohens_d[group].min_rank;
510 compute_min_rank_for_group(ngenes, ngroups, group, full_effects.data(), mr, num_threads);
515 if (output.delta_mean.size()) {
517 for (
decltype(I(ngroups)) group = 0; group < ngroups; ++group) {
518 cache.fill_effects_from_cache(group, full_effects);
521 auto& effect_buffer = effect_buffers[t];
523 for (Index_ gene = start, end = start + length; gene < end; ++gene) {
524 const auto my_means = combo_means.data() + sanisizer::product_unsafe<std::size_t>(gene, ncombos);
525 const auto store_ptr = full_effects.data() + sanisizer::product_unsafe<std::size_t>(gene, ngroups);
527 for (
decltype(I(ngroups)) other = 0; other < ngroups; ++other) {
528 const auto cache_action = cache.get_action(other);
529 if (cache_action != internal::CacheAction::SKIP) {
530 const auto val = compute_pairwise_simple_diff(group, other, my_means, ngroups, nblocks, preweights);
531 store_ptr[other] = val;
532 if (cache_action == CacheAction::CACHE) {
533 cache.get_cache_location(other)[gene] = -val;
537 summarize_comparisons(ngroups, store_ptr, group, gene, output.delta_mean[group], effect_buffer);
539 }, ngenes, num_threads);
541 const auto mr = output.delta_mean[group].min_rank;
543 compute_min_rank_for_group(ngenes, ngroups, group, full_effects.data(), mr, num_threads);
548 if (output.delta_detected.size()) {
550 for (
decltype(I(ngroups)) group = 0; group < ngroups; ++group) {
551 cache.fill_effects_from_cache(group, full_effects);
554 auto& effect_buffer = effect_buffers[t];
556 for (Index_ gene = start, end = start + length; gene < end; ++gene) {
557 const auto my_detected = combo_detected.data() + sanisizer::product_unsafe<std::size_t>(gene, ncombos);
558 const auto store_ptr = full_effects.data() + sanisizer::product_unsafe<std::size_t>(gene, ngroups);
560 for (
decltype(I(ngroups)) other = 0; other < ngroups; ++other) {
561 const auto cache_action = cache.get_action(other);
562 if (cache_action != CacheAction::SKIP) {
563 const auto val = compute_pairwise_simple_diff(group, other, my_detected, ngroups, nblocks, preweights);
564 store_ptr[other] = val;
565 if (cache_action == CacheAction::CACHE) {
566 cache.get_cache_location(other)[gene] = -val;
570 summarize_comparisons(ngroups, store_ptr, group, gene, output.delta_detected[group], effect_buffer);
572 }, ngenes, num_threads);
574 const auto mr = output.delta_detected[group].min_rank;
576 compute_min_rank_for_group(ngenes, ngroups, group, full_effects.data(), mr, num_threads);
582template<
typename Index_,
typename Stat_,
typename Rank_>
583ScoreMarkersSummaryBuffers<Stat_, Rank_> fill_summary_results(
585 const std::size_t ngroups,
586 ScoreMarkersSummaryResults<Stat_, Rank_>& store,
587 const ScoreMarkersSummaryOptions& options)
589 ScoreMarkersSummaryBuffers<Stat_, Rank_> output;
591 internal::fill_average_results(ngenes, ngroups, store.mean, store.detected, output.mean, output.detected);
593 if (options.compute_cohens_d) {
594 output.cohens_d = internal::fill_summary_results(
599 options.compute_mean,
600 options.compute_median,
602 options.compute_min_rank
606 if (options.compute_auc) {
607 output.auc = internal::fill_summary_results(
612 options.compute_mean,
613 options.compute_median,
615 options.compute_min_rank
619 if (options.compute_delta_mean) {
620 output.delta_mean = internal::fill_summary_results(
625 options.compute_mean,
626 options.compute_median,
628 options.compute_min_rank
632 if (options.compute_delta_detected) {
633 output.delta_detected = internal::fill_summary_results(
636 store.delta_detected,
638 options.compute_mean,
639 options.compute_median,
641 options.compute_min_rank
687template<
typename Value_,
typename Index_,
typename Group_,
typename Stat_,
typename Rank_>
690 const Group_*
const group,
694 const auto NC = matrix.
ncol();
695 const auto group_sizes = tatami_stats::tabulate_groups(group, NC);
696 const auto ngenes = matrix.
nrow();
697 const auto ngroups = group_sizes.size();
703 const auto payload_size = sanisizer::product<typename std::vector<Stat_>::size_type>(ngenes, ngroups);
704 std::vector<Stat_> group_means(payload_size), group_vars(payload_size), group_detected(payload_size);
706 const bool do_auc = !output.
auc.empty();
707 std::vector<Stat_> tmp_auc;
708 Stat_* auc_ptr = NULL;
710 tmp_auc.resize(sanisizer::product<
decltype(I(tmp_auc.size()))>(ngroups, ngroups, ngenes));
711 auc_ptr = tmp_auc.data();
715 internal::scan_matrix_by_row<true>(
720 static_cast<int*
>(NULL),
734 internal::scan_matrix_by_column(
746 internal::process_simple_summary_effects(
762 internal::summarize_comparisons(ngenes, ngroups, auc_ptr, output.
auc, options.
num_threads);
763 internal::compute_min_rank_pairwise(ngenes, ngroups, auc_ptr, output.
auc, options.
num_threads);
795template<
typename Value_,
typename Index_,
typename Group_,
typename Block_,
typename Stat_,
typename Rank_>
798 const Group_*
const group,
799 const Block_*
const block,
803 const auto NC = matrix.
ncol();
804 const auto ngenes = matrix.
nrow();
805 const auto ngroups = output.
mean.size();
806 const auto nblocks = tatami_stats::total_groups(block, NC);
808 const auto combinations = internal::create_combinations(ngroups, group, block, NC);
809 const auto combo_sizes = internal::tabulate_combinations<Index_>(ngroups, nblocks, combinations);
810 const auto ncombos = combo_sizes.size();
813 const auto payload_size = sanisizer::product<typename std::vector<Stat_>::size_type>(ngenes, ncombos);
814 std::vector<Stat_> combo_means(payload_size), combo_vars(payload_size), combo_detected(payload_size);
816 const bool do_auc = !output.
auc.empty();
817 std::vector<Stat_> tmp_auc;
818 Stat_* auc_ptr = NULL;
820 tmp_auc.resize(sanisizer::product<
decltype(I(tmp_auc.size()))>(ngroups, ngroups, ngenes));
821 auc_ptr = tmp_auc.data();
825 internal::scan_matrix_by_row<false>(
844 internal::scan_matrix_by_column(
856 internal::process_simple_summary_effects(
872 internal::summarize_comparisons(ngenes, ngroups, auc_ptr, output.
auc, options.
num_threads);
873 internal::compute_min_rank_pairwise(ngenes, ngroups, auc_ptr, output.
auc, options.
num_threads);
894template<
typename Stat_ =
double,
typename Rank_ =
int,
typename Value_,
typename Index_,
typename Group_>
897 const Group_*
const group,
900 const auto ngroups = tatami_stats::total_groups(group, matrix.
ncol());
902 const auto buffers = internal::fill_summary_results(matrix.
nrow(), ngroups, output, options);
927template<
typename Stat_ =
double,
typename Rank_ =
int,
typename Value_,
typename Index_,
typename Group_,
typename Block_>
930 const Group_*
const group,
931 const Block_*
const block,
934 const auto ngroups = tatami_stats::total_groups(group, matrix.
ncol());
936 const auto buffers = internal::fill_summary_results(matrix.
nrow(), ngroups, output, options);
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
void compute_weights(std::size_t num_blocks, const Size_ *sizes, WeightPolicy policy, const VariableWeightParameters &variable, Weight_ *weights)
Marker detection for single-cell data.
Definition score_markers_pairwise.hpp:25
void score_markers_summary(const tatami::Matrix< Value_, Index_ > &matrix, const Group_ *const group, const ScoreMarkersSummaryOptions &options, const ScoreMarkersSummaryBuffers< Stat_, Rank_ > &output)
Definition score_markers_summary.hpp:688
void score_markers_summary_blocked(const tatami::Matrix< Value_, Index_ > &matrix, const Group_ *const group, const Block_ *const block, const ScoreMarkersSummaryOptions &options, const ScoreMarkersSummaryBuffers< Stat_, Rank_ > &output)
Definition score_markers_summary.hpp:796
void parallelize(Function_ fun, Index_ tasks, int threads)
Utilities for effect summarization.