scran_markers
Marker detection for single-cell data
Loading...
Searching...
No Matches
score_markers_summary.hpp
Go to the documentation of this file.
1#ifndef SCRAN_SCORE_MARKERS_HPP
2#define SCRAN_SCORE_MARKERS_HPP
3
5#include "tatami/tatami.hpp"
6#include "sanisizer/sanisizer.hpp"
7
8#include <array>
9#include <map>
10#include <vector>
11
12#include "scan_matrix.hpp"
13#include "cohens_d.hpp"
14#include "simple_diff.hpp"
16#include "average_group_stats.hpp"
17#include "create_combinations.hpp"
18#include "utils.hpp"
19
25namespace scran_markers {
26
36 double threshold = 0;
37
42 int num_threads = 1;
43
48 int cache_size = 100;
49
54 bool compute_cohens_d = true;
55
60 bool compute_auc = true;
61
66 bool compute_delta_mean = true;
67
73
78 bool compute_min = true;
79
84 bool compute_mean = true;
85
90 bool compute_median = true;
91
96 bool compute_max = true;
97
102 bool compute_min_rank = true;
103
114 scran_blocks::WeightPolicy block_weight_policy = scran_blocks::WeightPolicy::VARIABLE;
115
121};
122
128template<typename Stat_, typename Rank_>
135 std::vector<Stat_*> mean;
136
142 std::vector<Stat_*> detected;
143
151 std::vector<SummaryBuffers<Stat_, Rank_> > cohens_d;
152
160 std::vector<SummaryBuffers<Stat_, Rank_> > auc;
161
169 std::vector<SummaryBuffers<Stat_, Rank_> > delta_mean;
170
178 std::vector<SummaryBuffers<Stat_, Rank_> > delta_detected;
179};
180
186template<typename Stat_, typename Rank_>
192 std::vector<std::vector<Stat_> > mean;
193
198 std::vector<std::vector<Stat_> > detected;
199
207 std::vector<SummaryResults<Stat_, Rank_> > cohens_d;
208
216 std::vector<SummaryResults<Stat_, Rank_> > auc;
217
225 std::vector<SummaryResults<Stat_, Rank_> > delta_mean;
226
234 std::vector<SummaryResults<Stat_, Rank_> > delta_detected;
235};
236
240namespace internal {
241
242enum class CacheAction : unsigned char { SKIP, COMPUTE, CACHE };
243
244// Safely cap the cache size at the maximum possible number of comparisons (i.e., ngroups * (ngroups - 1) / 2).
245inline std::size_t cap_cache_size(const std::size_t cache_size, const std::size_t ngroups) {
246 if (ngroups < 2) {
247 return 0;
248 } else if (ngroups % 2 == 0) {
249 const auto denom = ngroups / 2;
250 const auto ratio = cache_size / denom;
251 if (ratio <= ngroups - 1) {
252 return cache_size;
253 } else {
254 return denom * (ngroups - 1);
255 }
256 } else {
257 const auto denom = (ngroups - 1) / 2;
258 const auto ratio = cache_size / denom;
259 if (ratio <= ngroups) {
260 return cache_size;
261 } else {
262 return denom * ngroups;
263 }
264 }
265}
266
267/*
268 * We compute effect sizes in a pairwise fashion with some nested loops, i.e.,
269 * iterating from g1 in [1, G) and then for g2 in [0, g1). When we compute the
270 * effect size for g1 vs g2, we sometimes get a free effect size for g2 vs g1,
271 * which we call the "reverse effect". However, we can't use the reverse effect
272 * size until we get around to summarizing effects for g2, hence the caching.
273 *
274 * This cache tries to store as many of the reverse effects as possible before
275 * it starts evicting. Evictions are based on the principle that it is better
276 * to store effects that will be re-used quickly, thus freeing up the cache for
277 * future stores. The 'speed' of reusability of each cache entry depends on the
278 * first group in the comparison corresponding to each cached effect size; the
279 * smaller the first group, the sooner it will be reached when iterating across
280 * groups in the ScoreMarkers function.
281 *
282 * So, the policy is to evict cache entries when the identity of the first
283 * group in the cached entry is larger than the identity of the first group for
284 * the incoming entry. Given that, if the cache is full, we have to throw away
285 * one of these effects anyway, I'd prefer to hold onto the one we're using
286 * soon, because at least it'll be freed up rapidly.
287 */
288template<typename Index_, typename Stat_>
289class EffectsCacher {
290public:
291 EffectsCacher(const Index_ ngenes, const std::size_t ngroups, const std::size_t cache_size) :
292 my_ngenes(ngenes),
293 my_ngroups(ngroups),
294 my_cache_size(cap_cache_size(cache_size, ngroups)),
295 my_actions(sanisizer::cast<decltype(I(my_actions.size()))>(ngroups)),
296 my_common_cache(sanisizer::product<decltype(I(my_common_cache.size()))>(ngenes, my_cache_size)),
297 my_staging_cache(sanisizer::cast<decltype(I(my_staging_cache.size()))>(ngroups))
298 {
299 my_unused_pool.reserve(my_cache_size);
300 for (decltype(I(my_cache_size)) c = 0; c < my_cache_size; ++c) {
301 my_unused_pool.push_back(my_common_cache.data() + sanisizer::product_unsafe<std::size_t>(c, ngenes));
302 }
303 }
304
305private:
306 Index_ my_ngenes;
307 std::size_t my_ngroups;
308 std::size_t my_cache_size;
309
310 std::vector<CacheAction> my_actions;
311
312 // 'common_cache' contains allocation so that we don't have to do
313 // a lot of fiddling with move constructors, swaps, etc.
314 std::vector<Stat_> my_common_cache;
315
316 // 'staging_cache' contains the set of cached effects in the other
317 // direction, i.e., all other groups compared to the current group. This is
318 // only used to avoid repeated look-ups in 'cached' while filling the
319 // effect size vectors; they will ultimately be transferred to cached after
320 // the processing for the current group is complete.
321 std::vector<Stat_*> my_staging_cache;
322
323 // 'unused_pool' contains the currently-unused set of pointers to free
324 // subarrays in 'my_common_cache'
325 std::vector<Stat_*> my_unused_pool;
326
327 // 'cached' contains the cached effect size vectors from previous groups. Note
328 // that the use of a map is deliberate as we need the sorting.
329 std::map<std::pair<std::size_t, std::size_t>, Stat_*> my_cached;
330
331public:
332 void clear() {
333 my_cached.clear();
334 }
335
336public:
337 void fill_effects_from_cache(const std::size_t group, std::vector<double>& full_effects) {
338 // During calculation of effects, the current group (i.e., 'group') is
339 // the first group in the comparison and 'other' is the second group.
340 // However, remember that we want to cache the reverse effects, so in
341 // the cached entry, 'group' is second and 'other' is first.
342 for (decltype(I(my_ngroups)) other = 0; other < my_ngroups; ++other) {
343 if (other == group) {
344 my_actions[other] = CacheAction::SKIP;
345 continue;
346 }
347
348 if (my_cache_size == 0) {
349 my_actions[other] = CacheAction::COMPUTE;
350 continue;
351 }
352
353 // If 'other' is later than 'group', it's a candidate to be cached,
354 // as it will be used when this method is called with 'group = other'.
355 // Note that the ACTUAL caching decision is made in the refinement step below.
356 if (other > group) {
357 my_actions[other] = CacheAction::CACHE;
358 continue;
359 }
360
361 // Need to recompute cache entries that were previously evicted. We
362 // do so if the cache is empty or the first group of the first cached
363 // entry has a higher index than the current group (and thus the
364 // desired comparison's effects cannot possibly exist in the cache).
365 if (my_cached.empty()) {
366 my_actions[other] = CacheAction::COMPUTE;
367 continue;
368 }
369
370 const auto& front = my_cached.begin()->first;
371 if (front.first > group || front.second > other) {
372 // Technically, the second clause should be (front.first == group && front.second > other).
373 // However, less-thans should be impossible as they should have been used up during processing
374 // of previous 'group' values. Thus, equality is already implied if the first condition fails.
375 my_actions[other] = CacheAction::COMPUTE;
376 continue;
377 }
378
379 // If we got past the previous clause, this implies that the first cache entry
380 // contains the effect sizes for the desired comparison (i.e., 'group - other').
381 // We thus transfer the cached vector to the full_set.
382 my_actions[other] = CacheAction::SKIP;
383 const auto curcache = my_cached.begin()->second;
384 for (decltype(I(my_ngenes)) i = 0; i < my_ngenes; ++i) {
385 full_effects[sanisizer::nd_offset<std::size_t>(other, my_ngroups, i)] = curcache[i];
386 }
387
388 my_unused_pool.push_back(curcache);
389 my_cached.erase(my_cached.begin());
390 }
391
392 // Refining our choice of cacheable entries by doing a dummy run and
393 // seeing whether eviction actually happens. If it doesn't, we won't
394 // bother caching, because that would be a waste of memory accesses.
395 for (decltype(I(my_ngroups)) other = 0; other < my_ngroups; ++other) {
396 if (my_actions[other] != CacheAction::CACHE) {
397 continue;
398 }
399
400 const std::pair<std::size_t, std::size_t> key(other, group);
401 if (my_cached.size() < my_cache_size) {
402 const auto ptr = my_unused_pool.back();
403 my_cached[key] = ptr;
404 my_staging_cache[other] = ptr;
405 my_unused_pool.pop_back();
406 continue;
407 }
408
409 // Looking at the last cache entry. If the first group of this
410 // entry is larger than the first group of the incoming entry, we
411 // evict it, as the incoming entry has faster reusability.
412 auto it = my_cached.end();
413 --it;
414 if ((it->first).first > other) {
415 const auto ptr = it->second;
416 my_cached[key] = ptr;
417 my_staging_cache[other] = ptr;
418 my_cached.erase(it);
419 } else {
420 // Otherwise, if we're not going to do any evictions, we
421 // indicate that we shouldn't even bother computing the
422 // reverse, because we're won't cache the incoming entry.
423 my_actions[other] = CacheAction::COMPUTE;
424 }
425 }
426 }
427
428public:
429 CacheAction get_action(std::size_t other) const {
430 return my_actions[other];
431 }
432
433 Stat_* get_cache_location(std::size_t other) const {
434 return my_staging_cache[other];
435 }
436};
437
438template<typename Index_, typename Stat_, typename Rank_>
439void process_simple_summary_effects(
440 const Index_ ngenes,
441 const std::size_t ngroups,
442 const std::size_t nblocks,
443 const std::size_t ncombos,
444 const std::vector<Stat_>& combo_means,
445 const std::vector<Stat_>& combo_vars,
446 const std::vector<Stat_>& combo_detected,
447 const ScoreMarkersSummaryBuffers<Stat_, Rank_>& output,
448 const std::vector<Stat_>& combo_weights,
449 const double threshold,
450 const std::size_t cache_size,
451 const int num_threads)
452{
453 // First, computing the pooled averages to get that out of the way.
454 {
455 std::vector<Stat_> total_weights_per_group;
456 auto total_weights_ptr = combo_weights.data();
457 if (nblocks > 1) {
458 total_weights_per_group = compute_total_weight_per_group(ngroups, nblocks, combo_weights.data());
459 total_weights_ptr = total_weights_per_group.data();
460 }
461
462 tatami::parallelize([&](const int, const Index_ start, const Index_ length) -> void {
463 for (Index_ gene = start, end = start + length; gene < end; ++gene) {
464 const auto in_offset = sanisizer::product_unsafe<std::size_t>(gene, ncombos);
465 const auto tmp_means = combo_means.data() + in_offset;
466 const auto tmp_detected = combo_detected.data() + in_offset;
467 average_group_stats(gene, ngroups, nblocks, tmp_means, tmp_detected, combo_weights.data(), total_weights_ptr, output.mean, output.detected);
468 }
469 }, ngenes, num_threads);
470 }
471
472 PrecomputedPairwiseWeights<Stat_> preweights(ngroups, nblocks, combo_weights.data());
473 EffectsCacher<Index_, Stat_> cache(ngenes, ngroups, cache_size);
474 std::vector<Stat_> full_effects(sanisizer::product<typename std::vector<Stat_>::size_type>(ngroups, ngenes));
475 auto effect_buffers = sanisizer::create<std::vector<std::vector<Stat_> > >(num_threads);
476 for (auto& ef : effect_buffers) {
477 sanisizer::resize(ef, ngroups);
478 }
479
480 if (output.cohens_d.size()) {
481 cache.clear();
482 for (decltype(I(ngroups)) group = 0; group < ngroups; ++group) {
483 cache.fill_effects_from_cache(group, full_effects);
484
485 tatami::parallelize([&](const int t, const Index_ start, const Index_ length) -> void {
486 auto& effect_buffer = effect_buffers[t];
487
488 for (Index_ gene = start, end = start + length; gene < end; ++gene) {
489 const auto in_offset = sanisizer::product_unsafe<std::size_t>(gene, ncombos);
490 const auto my_means = combo_means.data() + in_offset;
491 const auto my_variances = combo_vars.data() + in_offset;
492 const auto store_ptr = full_effects.data() + sanisizer::product_unsafe<std::size_t>(gene, ngroups);
493
494 for (decltype(I(ngroups)) other = 0; other < ngroups; ++other) {
495 auto cache_action = cache.get_action(other);
496 if (cache_action == CacheAction::COMPUTE) {
497 store_ptr[other] = compute_pairwise_cohens_d_one_sided(group, other, my_means, my_variances, ngroups, nblocks, preweights, threshold);
498 } else if (cache_action == CacheAction::CACHE) {
499 auto tmp = compute_pairwise_cohens_d_two_sided(group, other, my_means, my_variances, ngroups, nblocks, preweights, threshold);
500 store_ptr[other] = tmp.first;
501 cache.get_cache_location(other)[gene] = tmp.second;
502 }
503 }
504 summarize_comparisons(ngroups, store_ptr, group, gene, output.cohens_d[group], effect_buffer);
505 }
506 }, ngenes, num_threads);
507
508 const auto mr = output.cohens_d[group].min_rank;
509 if (mr) {
510 compute_min_rank_for_group(ngenes, ngroups, group, full_effects.data(), mr, num_threads);
511 }
512 }
513 }
514
515 if (output.delta_mean.size()) {
516 cache.clear();
517 for (decltype(I(ngroups)) group = 0; group < ngroups; ++group) {
518 cache.fill_effects_from_cache(group, full_effects);
519
520 tatami::parallelize([&](const int t, const Index_ start, const Index_ length) -> void {
521 auto& effect_buffer = effect_buffers[t];
522
523 for (Index_ gene = start, end = start + length; gene < end; ++gene) {
524 const auto my_means = combo_means.data() + sanisizer::product_unsafe<std::size_t>(gene, ncombos);
525 const auto store_ptr = full_effects.data() + sanisizer::product_unsafe<std::size_t>(gene, ngroups);
526
527 for (decltype(I(ngroups)) other = 0; other < ngroups; ++other) {
528 const auto cache_action = cache.get_action(other);
529 if (cache_action != CacheAction::SKIP) {
530 const auto val = compute_pairwise_simple_diff(group, other, my_means, ngroups, nblocks, preweights);
531 store_ptr[other] = val;
532 if (cache_action == CacheAction::CACHE) {
533 cache.get_cache_location(other)[gene] = -val;
534 }
535 }
536 }
537 summarize_comparisons(ngroups, store_ptr, group, gene, output.delta_mean[group], effect_buffer);
538 }
539 }, ngenes, num_threads);
540
541 const auto mr = output.delta_mean[group].min_rank;
542 if (mr) {
543 compute_min_rank_for_group(ngenes, ngroups, group, full_effects.data(), mr, num_threads);
544 }
545 }
546 }
547
548 if (output.delta_detected.size()) {
549 cache.clear();
550 for (decltype(I(ngroups)) group = 0; group < ngroups; ++group) {
551 cache.fill_effects_from_cache(group, full_effects);
552
553 tatami::parallelize([&](const int t, const Index_ start, const Index_ length) -> void {
554 auto& effect_buffer = effect_buffers[t];
555
556 for (Index_ gene = start, end = start + length; gene < end; ++gene) {
557 const auto my_detected = combo_detected.data() + sanisizer::product_unsafe<std::size_t>(gene, ncombos);
558 const auto store_ptr = full_effects.data() + sanisizer::product_unsafe<std::size_t>(gene, ngroups);
559
560 for (decltype(I(ngroups)) other = 0; other < ngroups; ++other) {
561 const auto cache_action = cache.get_action(other);
562 if (cache_action != CacheAction::SKIP) {
563 const auto val = compute_pairwise_simple_diff(group, other, my_detected, ngroups, nblocks, preweights);
564 store_ptr[other] = val;
565 if (cache_action == CacheAction::CACHE) {
566 cache.get_cache_location(other)[gene] = -val;
567 }
568 }
569 }
570 summarize_comparisons(ngroups, store_ptr, group, gene, output.delta_detected[group], effect_buffer);
571 }
572 }, ngenes, num_threads);
573
574 const auto mr = output.delta_detected[group].min_rank;
575 if (mr) {
576 compute_min_rank_for_group(ngenes, ngroups, group, full_effects.data(), mr, num_threads);
577 }
578 }
579 }
580}
581
582template<typename Index_, typename Stat_, typename Rank_>
583ScoreMarkersSummaryBuffers<Stat_, Rank_> fill_summary_results(
584 const Index_ ngenes,
585 const std::size_t ngroups,
586 ScoreMarkersSummaryResults<Stat_, Rank_>& store,
587 const ScoreMarkersSummaryOptions& options)
588{
589 ScoreMarkersSummaryBuffers<Stat_, Rank_> output;
590
591 fill_average_results(ngenes, ngroups, store.mean, store.detected, output.mean, output.detected);
592
593 if (options.compute_cohens_d) {
594 output.cohens_d = fill_summary_results(
595 ngenes,
596 ngroups,
597 store.cohens_d,
598 options.compute_min,
599 options.compute_mean,
600 options.compute_median,
601 options.compute_max,
602 options.compute_min_rank
603 );
604 }
605
606 if (options.compute_auc) {
607 output.auc = fill_summary_results(
608 ngenes,
609 ngroups,
610 store.auc,
611 options.compute_min,
612 options.compute_mean,
613 options.compute_median,
614 options.compute_max,
615 options.compute_min_rank
616 );
617 }
618
619 if (options.compute_delta_mean) {
620 output.delta_mean = fill_summary_results(
621 ngenes,
622 ngroups,
623 store.delta_mean,
624 options.compute_min,
625 options.compute_mean,
626 options.compute_median,
627 options.compute_max,
628 options.compute_min_rank
629 );
630 }
631
632 if (options.compute_delta_detected) {
633 output.delta_detected = fill_summary_results(
634 ngenes,
635 ngroups,
636 store.delta_detected,
637 options.compute_min,
638 options.compute_mean,
639 options.compute_median,
640 options.compute_max,
641 options.compute_min_rank
642 );
643 }
644
645 return output;
646}
647
648template<typename Stat_, typename Rank_>
649bool auc_needs_minrank(const std::vector<SummaryBuffers<Stat_, Rank_> >& auc_summaries) {
650 for (const auto& outs : auc_summaries) {
651 if (outs.min_rank) {
652 return true;
653 }
654 }
655 return false;
656}
657
658template<
659 bool single_block_,
660 typename Value_,
661 typename Index_,
662 typename Group_,
663 typename Block_,
664 typename Stat_,
665 typename Rank_
666>
668 const tatami::Matrix<Value_, Index_>& matrix,
669 const std::size_t ngroups,
670 const Group_* const group,
671 const std::size_t nblocks,
672 const Block_* const block,
673 const std::size_t ncombos,
674 const std::size_t* const combo,
675 const std::vector<Index_>& combo_sizes,
676 const ScoreMarkersSummaryOptions& options,
677 const ScoreMarkersSummaryBuffers<Stat_, Rank_>& output
678) {
679 const auto ngenes = matrix.nrow();
680 const auto payload_size = sanisizer::product<typename std::vector<Stat_>::size_type>(ngenes, ncombos);
681 std::vector<Stat_> combo_means(payload_size), combo_vars(payload_size), combo_detected(payload_size);
682
683 // For a single block, this usually doesn't really matter, but we do it for consistency with the multi-block case,
684 // and to account for variable weighting where non-zero block sizes get zero weight.
685 const auto combo_weights = scran_blocks::compute_weights<Stat_>(
686 combo_sizes,
687 options.block_weight_policy,
688 options.variable_block_weight_parameters
689 );
690
691 const bool do_auc = !output.auc.empty();
692
693 if (do_auc || matrix.prefer_rows()) {
694 if (do_auc && !auc_needs_minrank(output.auc)) {
695 // If we don't need the min-rank, we can compute summaries for the AUCs directly.
696 // This means that we don't have to store the full 3D array of AUCs across all genes.
697 struct AucResultWorkspace {
698 AucResultWorkspace() = default;
699 AucResultWorkspace(const std::size_t ngroups) :
700 pairwise_buffer(sanisizer::product<typename std::vector<Stat_>::size_type>(ngroups, ngroups)),
701 summary_buffer(sanisizer::cast<typename std::vector<Stat_>::size_type>(ngroups))
702 {};
703 std::vector<Stat_> pairwise_buffer;
704 std::vector<Stat_> summary_buffer;
705 };
706
707 scan_matrix_by_row_custom_auc<single_block_>(
708 matrix,
709 ngroups,
710 group,
711 nblocks,
712 block,
713 ncombos,
714 combo,
715 combo_means,
716 combo_vars,
717 combo_detected,
718 /* do_auc = */ true,
719 /* auc_result_initialize = */ [&](int) -> AucResultWorkspace {
720 return AucResultWorkspace(ngroups);
721 },
722 /* auc_result_process = */ [&](
723 const Index_ gene,
724 AucScanWorkspace<Value_, Group_, Index_, Stat_>& auc_work,
725 AucResultWorkspace& res_work
726 ) -> void {
727 process_auc_for_rows(auc_work, ngroups, nblocks, options.threshold, res_work.pairwise_buffer.data());
728 for (decltype(I(ngroups)) l = 0; l < ngroups; ++l) {
729 const auto current_effects = res_work.pairwise_buffer.data() + sanisizer::product_unsafe<std::size_t>(l, ngroups);
730 summarize_comparisons(ngroups, current_effects, l, gene, output.auc[l], res_work.summary_buffer);
731 }
732 },
733 combo_sizes,
734 combo_weights,
735 options.num_threads
736 );
737
738 } else {
739 std::vector<Stat_> tmp_auc;
740 Stat_* auc_ptr = NULL;
741 if (do_auc) {
742 tmp_auc.resize(sanisizer::product<decltype(I(tmp_auc.size()))>(ngroups, ngroups, ngenes));
743 auc_ptr = tmp_auc.data();
744 }
745
746 scan_matrix_by_row_full_auc<single_block_>(
747 matrix,
748 ngroups,
749 group,
750 nblocks,
751 block,
752 ncombos,
753 combo,
754 combo_means,
755 combo_vars,
756 combo_detected,
757 auc_ptr,
758 combo_sizes,
759 combo_weights,
760 options.threshold,
761 options.num_threads
762 );
763
764 if (do_auc) {
765 summarize_comparisons(ngenes, ngroups, auc_ptr, output.auc, options.num_threads);
766 compute_min_rank_pairwise(ngenes, ngroups, auc_ptr, output.auc, options.num_threads);
767 }
768 }
769
770 } else {
771 scan_matrix_by_column(
772 matrix,
773 [&]{
774 if constexpr(single_block_) {
775 return ngroups;
776 } else {
777 return ncombos;
778 }
779 }(),
780 [&]{
781 if constexpr(single_block_) {
782 return group;
783 } else {
784 return combo;
785 }
786 }(),
787 combo_means,
788 combo_vars,
789 combo_detected,
790 combo_sizes,
791 options.num_threads
792 );
793 }
794
795 process_simple_summary_effects(
796 matrix.nrow(),
797 ngroups,
798 nblocks,
799 ncombos,
800 combo_means,
801 combo_vars,
802 combo_detected,
803 output,
804 combo_weights,
805 options.threshold,
806 options.cache_size,
807 options.num_threads
808 );
809}
810
811}
850template<typename Value_, typename Index_, typename Group_, typename Stat_, typename Rank_>
852 const tatami::Matrix<Value_, Index_>& matrix,
853 const Group_* const group,
854 const ScoreMarkersSummaryOptions& options,
856) {
857 const auto NC = matrix.ncol();
858 const auto group_sizes = tatami_stats::tabulate_groups(group, NC);
859 const auto ngroups = sanisizer::cast<std::size_t>(group_sizes.size());
860
861 internal::score_markers_summary<true>(
862 matrix,
863 ngroups,
864 group,
865 1,
866 static_cast<int*>(NULL),
867 ngroups,
868 static_cast<std::size_t*>(NULL),
869 group_sizes,
870 options,
871 output
872 );
873}
874
903template<typename Value_, typename Index_, typename Group_, typename Block_, typename Stat_, typename Rank_>
905 const tatami::Matrix<Value_, Index_>& matrix,
906 const Group_* const group,
907 const Block_* const block,
908 const ScoreMarkersSummaryOptions& options,
910{
911 const auto NC = matrix.ncol();
912 const auto ngroups = output.mean.size();
913 const auto nblocks = tatami_stats::total_groups(block, NC);
914
915 const auto combinations = internal::create_combinations(ngroups, group, block, NC);
916 const auto combo_sizes = internal::tabulate_combinations<Index_>(ngroups, nblocks, combinations);
917 const auto ncombos = combo_sizes.size();
918
919 internal::score_markers_summary<false>(
920 matrix,
921 sanisizer::cast<std::size_t>(ngroups),
922 group,
923 sanisizer::cast<std::size_t>(nblocks),
924 block,
925 sanisizer::cast<std::size_t>(ncombos),
926 combinations.data(),
927 combo_sizes,
928 options,
929 output
930 );
931}
932
933
951template<typename Stat_ = double, typename Rank_ = int, typename Value_, typename Index_, typename Group_>
953 const tatami::Matrix<Value_, Index_>& matrix,
954 const Group_* const group,
955 const ScoreMarkersSummaryOptions& options)
956{
957 const auto ngroups = tatami_stats::total_groups(group, matrix.ncol());
959 const auto buffers = internal::fill_summary_results(matrix.nrow(), ngroups, output, options);
960 score_markers_summary(matrix, group, options, buffers);
961 return output;
962}
963
984template<typename Stat_ = double, typename Rank_ = int, typename Value_, typename Index_, typename Group_, typename Block_>
986 const tatami::Matrix<Value_, Index_>& matrix,
987 const Group_* const group,
988 const Block_* const block,
989 const ScoreMarkersSummaryOptions& options)
990{
991 const auto ngroups = tatami_stats::total_groups(group, matrix.ncol());
993 const auto buffers = internal::fill_summary_results(matrix.nrow(), ngroups, output, options);
994 score_markers_summary_blocked(matrix, group, block, options, buffers);
995 return output;
996}
997
998}
999
1000#endif
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
void compute_weights(const std::size_t num_blocks, const Size_ *const sizes, const WeightPolicy policy, const VariableWeightParameters &variable, Weight_ *const weights)
Marker detection for single-cell data.
Definition score_markers_pairwise.hpp:25
void score_markers_summary(const tatami::Matrix< Value_, Index_ > &matrix, const Group_ *const group, const ScoreMarkersSummaryOptions &options, const ScoreMarkersSummaryBuffers< Stat_, Rank_ > &output)
Definition score_markers_summary.hpp:851
void score_markers_summary_blocked(const tatami::Matrix< Value_, Index_ > &matrix, const Group_ *const group, const Block_ *const block, const ScoreMarkersSummaryOptions &options, const ScoreMarkersSummaryBuffers< Stat_, Rank_ > &output)
Definition score_markers_summary.hpp:904
void parallelize(Function_ fun, const Index_ tasks, const int threads)
Buffers for score_markers_summary() and friends.
Definition score_markers_summary.hpp:129
std::vector< Stat_ * > detected
Definition score_markers_summary.hpp:142
std::vector< SummaryBuffers< Stat_, Rank_ > > delta_detected
Definition score_markers_summary.hpp:178
std::vector< Stat_ * > mean
Definition score_markers_summary.hpp:135
std::vector< SummaryBuffers< Stat_, Rank_ > > delta_mean
Definition score_markers_summary.hpp:169
std::vector< SummaryBuffers< Stat_, Rank_ > > cohens_d
Definition score_markers_summary.hpp:151
std::vector< SummaryBuffers< Stat_, Rank_ > > auc
Definition score_markers_summary.hpp:160
Options for score_markers_summary() and friends.
Definition score_markers_summary.hpp:30
bool compute_max
Definition score_markers_summary.hpp:96
bool compute_delta_detected
Definition score_markers_summary.hpp:72
int cache_size
Definition score_markers_summary.hpp:48
bool compute_auc
Definition score_markers_summary.hpp:60
bool compute_min
Definition score_markers_summary.hpp:78
bool compute_median
Definition score_markers_summary.hpp:90
bool compute_mean
Definition score_markers_summary.hpp:84
double threshold
Definition score_markers_summary.hpp:36
scran_blocks::WeightPolicy block_weight_policy
Definition score_markers_summary.hpp:114
bool compute_cohens_d
Definition score_markers_summary.hpp:54
bool compute_min_rank
Definition score_markers_summary.hpp:102
int num_threads
Definition score_markers_summary.hpp:42
bool compute_delta_mean
Definition score_markers_summary.hpp:66
scran_blocks::VariableWeightParameters variable_block_weight_parameters
Definition score_markers_summary.hpp:120
Results for score_markers_summary() and friends.
Definition score_markers_summary.hpp:187
std::vector< SummaryResults< Stat_, Rank_ > > cohens_d
Definition score_markers_summary.hpp:207
std::vector< std::vector< Stat_ > > mean
Definition score_markers_summary.hpp:192
std::vector< SummaryResults< Stat_, Rank_ > > auc
Definition score_markers_summary.hpp:216
std::vector< SummaryResults< Stat_, Rank_ > > delta_detected
Definition score_markers_summary.hpp:234
std::vector< std::vector< Stat_ > > detected
Definition score_markers_summary.hpp:198
std::vector< SummaryResults< Stat_, Rank_ > > delta_mean
Definition score_markers_summary.hpp:225
Utilities for effect summarization.