scran_markers
Marker detection for single-cell data
Loading...
Searching...
No Matches
score_markers_summary.hpp
Go to the documentation of this file.
1#ifndef SCRAN_SCORE_MARKERS_HPP
2#define SCRAN_SCORE_MARKERS_HPP
3
5#include "tatami/tatami.hpp"
6#include "sanisizer/sanisizer.hpp"
7#include "topicks/topicks.hpp"
8#include "quickstats/quickstats.hpp"
9
10#include <array>
11#include <map>
12#include <vector>
13#include <optional>
14
15#include "scan_matrix.hpp"
16#include "cohens_d.hpp"
17#include "simple_diff.hpp"
18#include "block_averages.hpp"
20#include "average_group_stats.hpp"
21#include "create_combinations.hpp"
22#include "utils.hpp"
23
29namespace scran_markers {
30
40 double threshold = 0;
41
46 int num_threads = 1;
47
52 bool compute_group_mean = true;
53
59
64 bool compute_cohens_d = true;
65
70 bool compute_auc = true;
71
76 bool compute_delta_mean = true;
77
83
88 bool compute_min = true;
89
94 bool compute_mean = true;
95
100 bool compute_median = true;
101
106 bool compute_max = true;
107
113 std::optional<std::vector<double> > compute_summary_quantiles;
114
119 bool compute_min_rank = true;
120
126 std::size_t min_rank_limit = 500;
127
133 bool min_rank_preserve_ties = false;
134
140 BlockAveragePolicy block_average_policy = BlockAveragePolicy::MEAN;
141
154 scran_blocks::WeightPolicy block_weight_policy = scran_blocks::WeightPolicy::VARIABLE;
155
161 scran_blocks::VariableWeightParameters variable_block_weight_parameters;
162
167 double block_quantile = 0.5;
168};
169
175template<typename Stat_, typename Rank_>
184 std::vector<Stat_*> mean;
185
193 std::vector<Stat_*> detected;
194
202 std::vector<SummaryBuffers<Stat_, Rank_> > cohens_d;
203
211 std::vector<SummaryBuffers<Stat_, Rank_> > auc;
212
220 std::vector<SummaryBuffers<Stat_, Rank_> > delta_mean;
221
229 std::vector<SummaryBuffers<Stat_, Rank_> > delta_detected;
230};
231
237template<typename Stat_, typename Rank_>
243 std::vector<std::vector<Stat_> > mean;
244
249 std::vector<std::vector<Stat_> > detected;
250
258 std::vector<SummaryResults<Stat_, Rank_> > cohens_d;
259
267 std::vector<SummaryResults<Stat_, Rank_> > auc;
268
276 std::vector<SummaryResults<Stat_, Rank_> > delta_mean;
277
285 std::vector<SummaryResults<Stat_, Rank_> > delta_detected;
286};
287
291namespace internal {
292
293// Inner vector is optional as we might not need it if SummaryBuffers::min_rank=NULL for a group.
294template<typename Stat_, typename Index_>
295using MinrankTopQueues = std::vector<std::optional<std::vector<topicks::TopQueue<Stat_, Index_> > > >;
296
297template<typename Stat_, typename Index_, typename Rank_>
298void preallocate_minrank_queues(
299 const std::size_t ngroups,
300 MinrankTopQueues<Stat_, Index_>& queue,
301 const std::vector<SummaryBuffers<Stat_, Rank_> >& summaries,
302 const Index_ limit,
303 const bool keep_ties
304) {
306 qopt.keep_ties = keep_ties;
307 qopt.check_nan = true;
308
309 sanisizer::resize(queue, ngroups);
310 for (I<decltype(ngroups)> g1 = 0; g1 < ngroups; ++g1) {
311 if (summaries[g1].min_rank == NULL) {
312 continue;
313 }
314 queue[g1].emplace();
315
316 auto& g_queue = *(queue[g1]);
317 sanisizer::reserve(g_queue, ngroups);
318 for (I<decltype(ngroups)> g2 = 0; g2 < ngroups; ++g2) {
319 g_queue.emplace_back(limit, true, qopt);
320 }
321 }
322}
323
324template<typename Stat_, typename Index_, typename Rank_>
325void compute_summary_stats_per_gene(
326 const Index_ gene,
327 const std::size_t ngroups,
328 const Stat_* const pairwise_buffer_ptr,
329 std::vector<Stat_>& summary_buffer,
330 MaybeMultipleQuantiles<Stat_>& summary_qcalcs,
331 MinrankTopQueues<Stat_, Index_>& minrank_queues,
332 const std::vector<SummaryBuffers<Stat_, Rank_> >& summaries
333) {
334 for (I<decltype(ngroups)> gr = 0; gr < ngroups; ++gr) {
335 auto& cursummary = summaries[gr];
336 const auto in_offset = sanisizer::product_unsafe<std::size_t>(ngroups, gr);
337 summarize_comparisons(ngroups, pairwise_buffer_ptr + in_offset, gr, gene, cursummary, summary_qcalcs, summary_buffer);
338
339 if (cursummary.min_rank) {
340 auto& gr_queue = *(minrank_queues[gr]);
341 for (I<decltype(ngroups)> gr2 = 0; gr2 < ngroups; ++gr2) {
342 if (gr != gr2) {
343 gr_queue[gr2].emplace(pairwise_buffer_ptr[in_offset + gr2], gene);
344 }
345 }
346 }
347 }
348}
349
350template<typename Stat_, typename Index_, typename Rank_>
351void report_minrank_from_queues(
352 const Index_ ngenes,
353 const std::size_t ngroups,
354 std::vector<std::optional<MinrankTopQueues<Stat_, Index_> > >& all_queues,
355 const std::vector<SummaryBuffers<Stat_, Rank_> >& summaries,
356 const int num_threads,
357 const bool keep_ties
358) {
359 if (all_queues.empty()) {
360 // If no queues were populated with ranks, this means that the matrix had no rows at all.
361 // If that's the case, there's no point iterating through the groups.
362 // We don't need to fill the min_rank array because ngenes == 0.
363 // Thus, we can just return immediately.
364 return;
365 }
366
367 tatami::parallelize([&](const int, const std::size_t start, const std::size_t length) -> void {
368 std::vector<Index_> tie_buffer;
369
370 for (I<decltype(ngroups)> gr = start, grend = start + length; gr < grend; ++gr) {
371 const auto mr_out = summaries[gr].min_rank;
372 if (mr_out == NULL) {
373 continue;
374 }
375
376 // Using the maximum possible rank (i.e., 'ngenes') as the default.
377 const auto maxrank_placeholder = sanisizer::cast<Rank_>(ngenes);
378 std::fill_n(mr_out, ngenes, maxrank_placeholder);
379
380 auto& first_mr_queue = *(all_queues.front());
381 for (I<decltype(ngroups)> gr2 = 0; gr2 < ngroups; ++gr2) {
382 if (gr == gr2) {
383 continue;
384 }
385
386 // Moving contents into a thread-local variable to minimize false sharing,
387 // at least while we repeatedly update the queue within this thread.
388 auto current_out = std::move((*(first_mr_queue[gr]))[gr2]);
389
390 const auto num_queues = all_queues.size();
391 for (I<decltype(num_queues)> q = 1; q < num_queues; ++q) {
392 auto& current_mr_queue = *(all_queues[q]);
393 auto current_in = std::move((*(current_mr_queue[gr]))[gr2]); // moving it to minimize false sharing.
394 while (!current_in.empty()) {
395 current_out.push(current_in.top());
396 current_in.pop();
397 }
398 }
399
400 // Cast to Rank_ is safe as current_out.size() <= ngenes,
401 // and we already checked that ngenes can fit into Rank_ in report_minrank_from_current_outs().
402 if (!keep_ties) {
403 while (!current_out.empty()) {
404 auto& mr = mr_out[current_out.top().second];
405 mr = std::min(mr, static_cast<Rank_>(current_out.size()));
406 current_out.pop();
407 }
408 } else {
409 while (!current_out.empty()) {
410 tie_buffer.clear();
411 const auto curtop = current_out.top();
412 current_out.pop();
413
414 while (!current_out.empty() && current_out.top().first == curtop.first) {
415 tie_buffer.push_back(current_out.top().second);
416 current_out.pop();
417 }
418
419 // Increment is safe as we already reduced the size at least once.
420 const Rank_ tied_rank = current_out.size() + 1;
421
422 mr_out[curtop.second] = std::min(mr_out[curtop.second], tied_rank);
423 for (const auto t : tie_buffer) {
424 mr_out[t] = std::min(mr_out[t], tied_rank);
425 }
426 }
427 }
428 }
429 }
430 }, ngroups, num_threads);
431}
432
433template<typename Index_, typename Stat_, typename Rank_>
434void process_simple_summary_effects(
435 const Index_ ngenes,
436 const std::size_t ngroups,
437 const std::size_t nblocks,
438 const std::size_t ncombos,
439 const std::vector<Stat_>& combo_means,
440 const std::vector<Stat_>& combo_vars,
441 const std::vector<Stat_>& combo_detected,
442 const double threshold,
443 const BlockAverageInfo<Stat_>& average_info,
444 const std::optional<std::vector<double> >& summary_quantiles,
445 const Index_ minrank_limit,
446 const bool minrank_keep_ties,
448 const int num_threads
449) {
450 std::optional<std::vector<std::optional<MinrankTopQueues<Stat_, Index_> > > > cohens_d_minrank_all_queues, delta_mean_minrank_all_queues, delta_detected_minrank_all_queues;
451 if (output.cohens_d.size()) {
452 cohens_d_minrank_all_queues.emplace(sanisizer::cast<I<decltype(cohens_d_minrank_all_queues->size())> >(num_threads));
453 }
454 if (output.delta_mean.size()) {
455 delta_mean_minrank_all_queues.emplace(sanisizer::cast<I<decltype(delta_mean_minrank_all_queues->size())> >(num_threads));
456 }
457 if (output.delta_detected.size()) {
458 delta_detected_minrank_all_queues.emplace(sanisizer::cast<I<decltype(delta_detected_minrank_all_queues->size())> >(num_threads));
459 }
460
461 std::optional<std::vector<Stat_> > total_weights_per_group;
462 const Stat_* total_weights_ptr = NULL;
463 if (average_info.use_mean()) {
464 if (!output.mean.empty() || !output.detected.empty()) {
465 if (nblocks > 1) {
466 total_weights_per_group = compute_total_weight_per_group(ngroups, nblocks, average_info.combo_weights().data());
467 total_weights_ptr = total_weights_per_group->data();
468 } else {
469 total_weights_ptr = average_info.combo_weights().data();
470 }
471 }
472 }
473
474 std::optional<PrecomputedPairwiseWeights<Stat_> > preweights;
475 if (average_info.use_mean()) {
476 if (!output.cohens_d.empty() || !output.delta_mean.empty() || !output.delta_detected.empty()) {
477 preweights = PrecomputedPairwiseWeights<Stat_>(ngroups, nblocks, average_info.combo_weights().data());
478 }
479 }
480
481 const auto ngroups2 = sanisizer::product<typename std::vector<Stat_>::size_type>(ngroups, ngroups);
482 const auto nused = tatami::parallelize([&](const int t, const Index_ start, const Index_ length) -> void {
483 std::vector<Stat_> pairwise_buffer(ngroups2);
484 std::vector<Stat_> summary_buffer(ngroups);
485 auto summary_qcalcs = setup_multiple_quantiles<Stat_>(summary_quantiles, ngroups);
486
487 std::optional<std::vector<Stat_> > qbuffer, qrevbuffer;
488 std::optional<quickstats::SingleQuantileVariableNumber<Stat_, std::size_t> > qcalc;
489 if (!average_info.use_mean()) {
490 qbuffer.emplace();
491 qrevbuffer.emplace();
492 qcalc.emplace(nblocks, average_info.quantile());
493 }
494
495 std::optional<MinrankTopQueues<Stat_, Index_> > cohens_d_minrank_queue, delta_mean_minrank_queue, delta_detected_minrank_queue;
496 if (output.cohens_d.size()) {
497 cohens_d_minrank_queue.emplace();
498 preallocate_minrank_queues(ngroups, *cohens_d_minrank_queue, output.cohens_d, minrank_limit, minrank_keep_ties);
499 }
500 if (output.delta_mean.size()) {
501 delta_mean_minrank_queue.emplace();
502 preallocate_minrank_queues(ngroups, *delta_mean_minrank_queue, output.delta_mean, minrank_limit, minrank_keep_ties);
503 }
504 if (output.delta_detected.size()) {
505 delta_detected_minrank_queue.emplace();
506 preallocate_minrank_queues(ngroups, *delta_detected_minrank_queue, output.delta_detected, minrank_limit, minrank_keep_ties);
507 }
508
509 for (Index_ gene = start, end = start + length; gene < end; ++gene) {
510 const auto in_offset = sanisizer::product_unsafe<std::size_t>(gene, ncombos);
511
512 if (!output.mean.empty()) {
513 const auto tmp_means = combo_means.data() + in_offset;
514 if (average_info.use_mean()) {
515 average_group_stats_blockmean(gene, ngroups, nblocks, tmp_means, average_info.combo_weights().data(), total_weights_ptr, output.mean);
516 } else {
517 average_group_stats_blockquantile(gene, ngroups, nblocks, tmp_means, *qbuffer, *qcalc, output.mean);
518 }
519 }
520
521 if (!output.detected.empty()) {
522 const auto tmp_detected = combo_detected.data() + in_offset;
523 if (average_info.use_mean()) {
524 average_group_stats_blockmean(gene, ngroups, nblocks, tmp_detected, average_info.combo_weights().data(), total_weights_ptr, output.detected);
525 } else {
526 average_group_stats_blockquantile(gene, ngroups, nblocks, tmp_detected, *qbuffer, *qcalc, output.detected);
527 }
528 }
529
530 if (output.cohens_d.size()) {
531 const auto tmp_means = combo_means.data() + in_offset;
532 const auto tmp_variances = combo_vars.data() + in_offset;
533 if (average_info.use_mean()) {
534 compute_pairwise_cohens_d_blockmean(tmp_means, tmp_variances, ngroups, nblocks, threshold, *preweights, pairwise_buffer.data());
535 } else {
536 compute_pairwise_cohens_d_blockquantile(tmp_means, tmp_variances, ngroups, nblocks, threshold, *qbuffer, *qrevbuffer, *qcalc, pairwise_buffer.data());
537 }
538 compute_summary_stats_per_gene(gene, ngroups, pairwise_buffer.data(), summary_buffer, summary_qcalcs, *cohens_d_minrank_queue, output.cohens_d);
539 }
540
541 if (output.delta_mean.size()) {
542 const auto tmp_means = combo_means.data() + in_offset;
543 if (average_info.use_mean()) {
544 compute_pairwise_simple_diff_blockmean(tmp_means, ngroups, nblocks, *preweights, pairwise_buffer.data());
545 } else {
546 compute_pairwise_simple_diff_blockquantile(tmp_means, ngroups, nblocks, *qbuffer, *qcalc, pairwise_buffer.data());
547 }
548 compute_summary_stats_per_gene(gene, ngroups, pairwise_buffer.data(), summary_buffer, summary_qcalcs, *delta_mean_minrank_queue, output.delta_mean);
549 }
550
551 if (output.delta_detected.size()) {
552 const auto tmp_det = combo_detected.data() + in_offset;
553 if (average_info.use_mean()) {
554 compute_pairwise_simple_diff_blockmean(tmp_det, ngroups, nblocks, *preweights, pairwise_buffer.data());
555 } else {
556 compute_pairwise_simple_diff_blockquantile(tmp_det, ngroups, nblocks, *qbuffer, *qcalc, pairwise_buffer.data());
557 }
558 compute_summary_stats_per_gene(gene, ngroups, pairwise_buffer.data(), summary_buffer, summary_qcalcs, *delta_detected_minrank_queue, output.delta_detected);
559 }
560 }
561
562 // Only flushing it to the output buffer at the very end to minimize false sharing.
563 if (output.cohens_d.size()) {
564 (*cohens_d_minrank_all_queues)[t] = std::move(cohens_d_minrank_queue);
565 }
566 if (output.delta_mean.size()) {
567 (*delta_mean_minrank_all_queues)[t] = std::move(delta_mean_minrank_queue);
568 }
569 if (output.delta_detected.size()) {
570 (*delta_detected_minrank_all_queues)[t] = std::move(delta_detected_minrank_queue);
571 }
572 }, ngenes, num_threads);
573
574 if (output.cohens_d.size()) {
575 cohens_d_minrank_all_queues->resize(nused);
576 report_minrank_from_queues(ngenes, ngroups, *cohens_d_minrank_all_queues, output.cohens_d, num_threads, minrank_keep_ties);
577 }
578 if (output.delta_mean.size()) {
579 delta_mean_minrank_all_queues->resize(nused);
580 report_minrank_from_queues(ngenes, ngroups, *delta_mean_minrank_all_queues, output.delta_mean, num_threads, minrank_keep_ties);
581 }
582 if (output.delta_detected.size()) {
583 delta_detected_minrank_all_queues->resize(nused);
584 report_minrank_from_queues(ngenes, ngroups, *delta_detected_minrank_all_queues, output.delta_detected, num_threads, minrank_keep_ties);
585 }
586}
587
588template<typename Index_, typename Stat_, typename Rank_>
589ScoreMarkersSummaryBuffers<Stat_, Rank_> preallocate_summary_results(
590 const Index_ ngenes,
591 const std::size_t ngroups,
593 const ScoreMarkersSummaryOptions& options)
594{
596
597 if (options.compute_group_mean) {
598 preallocate_average_results(ngenes, ngroups, store.mean, output.mean);
599 }
600
601 if (options.compute_group_detected) {
602 preallocate_average_results(ngenes, ngroups, store.detected, output.detected);
603 }
604
605 if (options.compute_cohens_d) {
606 output.cohens_d = fill_summary_results(
607 ngenes,
608 ngroups,
609 store.cohens_d,
610 options.compute_min,
611 options.compute_mean,
612 options.compute_median,
613 options.compute_max,
614 options.compute_summary_quantiles,
615 options.compute_min_rank
616 );
617 }
618
619 if (options.compute_auc) {
620 output.auc = fill_summary_results(
621 ngenes,
622 ngroups,
623 store.auc,
624 options.compute_min,
625 options.compute_mean,
626 options.compute_median,
627 options.compute_max,
628 options.compute_summary_quantiles,
629 options.compute_min_rank
630 );
631 }
632
633 if (options.compute_delta_mean) {
634 output.delta_mean = fill_summary_results(
635 ngenes,
636 ngroups,
637 store.delta_mean,
638 options.compute_min,
639 options.compute_mean,
640 options.compute_median,
641 options.compute_max,
642 options.compute_summary_quantiles,
643 options.compute_min_rank
644 );
645 }
646
647 if (options.compute_delta_detected) {
648 output.delta_detected = fill_summary_results(
649 ngenes,
650 ngroups,
651 store.delta_detected,
652 options.compute_min,
653 options.compute_mean,
654 options.compute_median,
655 options.compute_max,
656 options.compute_summary_quantiles,
657 options.compute_min_rank
658 );
659 }
660
661 return output;
662}
663
664template<
665 bool single_block_,
666 typename Value_,
667 typename Index_,
668 typename Group_,
669 typename Block_,
670 typename Stat_,
671 typename Rank_
672>
673void score_markers_summary(
674 const tatami::Matrix<Value_, Index_>& matrix,
675 const std::size_t ngroups,
676 const Group_* const group,
677 const std::size_t nblocks,
678 const Block_* const block,
679 const std::size_t ncombos,
680 const std::size_t* const combo,
681 const std::vector<Index_>& combo_sizes,
682 const ScoreMarkersSummaryOptions& options,
684) {
685 const auto ngenes = matrix.nrow();
686 const auto payload_size = sanisizer::product<typename std::vector<Stat_>::size_type>(ngenes, ncombos);
687 std::vector<Stat_> combo_means, combo_vars, combo_detected;
688 if (!output.mean.empty() || !output.cohens_d.empty() || !output.delta_mean.empty()) {
689 combo_means.resize(payload_size);
690 }
691 if (!output.cohens_d.empty()) {
692 combo_vars.resize(payload_size);
693 }
694 if (!output.detected.empty() || !output.delta_detected.empty()) {
695 combo_detected.resize(payload_size);
696 }
697
698 // For a single block, this usually doesn't really matter, but we do it for consistency with the multi-block case,
699 // and to account for variable weighting where non-zero block sizes get zero weight.
700 BlockAverageInfo<Stat_> average_info;
701 if (options.block_average_policy == BlockAveragePolicy::MEAN) {
702 average_info = BlockAverageInfo<Stat_>(
704 combo_sizes,
705 options.block_weight_policy,
706 options.variable_block_weight_parameters
707 )
708 );
709 } else {
710 average_info = BlockAverageInfo<Stat_>(options.block_quantile);
711 }
712
713 const Index_ minrank_limit = sanisizer::cap<Index_>(options.min_rank_limit);
714 internal::validate_quantiles(options.compute_summary_quantiles);
715
716 if (!output.auc.empty()) {
717 auto auc_minrank_all_queues = sanisizer::create<std::vector<std::optional<MinrankTopQueues<Stat_, Index_> > > >(options.num_threads);
718
719 struct AucResultWorkspace {
720 AucResultWorkspace(const std::size_t ngroups, const std::optional<std::vector<double> >& summary_quantiles) :
721 pairwise_buffer(sanisizer::product<typename std::vector<Stat_>::size_type>(ngroups, ngroups)),
722 summary_buffer(sanisizer::cast<typename std::vector<Stat_>::size_type>(ngroups)),
723 summary_qcalcs(setup_multiple_quantiles<Stat_>(summary_quantiles, ngroups))
724 {};
725
726 public:
727 std::vector<Stat_> pairwise_buffer;
728 std::vector<Stat_> summary_buffer;
729 MaybeMultipleQuantiles<Stat_> summary_qcalcs;
730 MinrankTopQueues<Stat_, Index_> queue;
731 };
732
733 const auto num_used = scan_matrix_by_row_custom_auc<single_block_>(
734 matrix,
735 ngroups,
736 group,
737 nblocks,
738 block,
739 ncombos,
740 combo,
741 combo_sizes,
742 average_info,
743 combo_means,
744 combo_vars,
745 combo_detected,
746 /* do_auc = */ true,
747 /* auc_result_initialize = */ [&](const int) -> AucResultWorkspace {
748 AucResultWorkspace res_work(ngroups, options.compute_summary_quantiles);
749 preallocate_minrank_queues(ngroups, res_work.queue, output.auc, minrank_limit, options.min_rank_preserve_ties);
750 return res_work;
751 },
752 /* auc_result_process = */ [&](const Index_ gene, AucScanWorkspace<Value_, Group_, Stat_, Index_>& auc_work, AucResultWorkspace& res_work) -> void {
753 process_auc_for_rows(auc_work, ngroups, nblocks, options.threshold, res_work.pairwise_buffer.data());
754 compute_summary_stats_per_gene(gene, ngroups, res_work.pairwise_buffer.data(), res_work.summary_buffer, res_work.summary_qcalcs, res_work.queue, output.auc);
755 },
756 /* auc_result_finalize = */ [&](const int t, AucResultWorkspace& res_work) -> void {
757 auc_minrank_all_queues[t] = std::move(res_work.queue);
758 },
759 options.num_threads
760 );
761
762 auc_minrank_all_queues.resize(num_used);
763 report_minrank_from_queues(ngenes, ngroups, auc_minrank_all_queues, output.auc, options.num_threads, options.min_rank_preserve_ties);
764
765 } else if (matrix.prefer_rows()) {
766 scan_matrix_by_row_full_auc<single_block_>(
767 matrix,
768 ngroups,
769 group,
770 nblocks,
771 block,
772 ncombos,
773 combo,
774 combo_sizes,
775 average_info,
776 combo_means,
777 combo_vars,
778 combo_detected,
779 static_cast<Stat_*>(NULL),
780 options.threshold,
781 options.num_threads
782 );
783
784 } else {
785 scan_matrix_by_column(
786 matrix,
787 [&]{
788 if constexpr(single_block_) {
789 return ngroups;
790 } else {
791 return ncombos;
792 }
793 }(),
794 [&]{
795 if constexpr(single_block_) {
796 return group;
797 } else {
798 return combo;
799 }
800 }(),
801 combo_sizes,
802 combo_means,
803 combo_vars,
804 combo_detected,
805 options.num_threads
806 );
807 }
808
809 process_simple_summary_effects(
810 matrix.nrow(),
811 ngroups,
812 nblocks,
813 ncombos,
814 combo_means,
815 combo_vars,
816 combo_detected,
817 options.threshold,
818 average_info,
819 options.compute_summary_quantiles,
820 minrank_limit,
821 options.min_rank_preserve_ties,
822 output,
823 options.num_threads
824 );
825}
826
827}
866template<typename Value_, typename Index_, typename Group_, typename Stat_, typename Rank_>
868 const tatami::Matrix<Value_, Index_>& matrix,
869 const Group_* const group,
870 const ScoreMarkersSummaryOptions& options,
872) {
873 const auto NC = matrix.ncol();
874 const auto group_sizes = tatami_stats::tabulate_groups(group, NC);
875 const auto ngroups = sanisizer::cast<std::size_t>(group_sizes.size());
876
877 internal::score_markers_summary<true>(
878 matrix,
879 ngroups,
880 group,
881 1,
882 static_cast<int*>(NULL),
883 ngroups,
884 static_cast<std::size_t*>(NULL),
885 group_sizes,
886 options,
887 output
888 );
889}
890
919template<typename Value_, typename Index_, typename Group_, typename Block_, typename Stat_, typename Rank_>
921 const tatami::Matrix<Value_, Index_>& matrix,
922 const Group_* const group,
923 const Block_* const block,
924 const ScoreMarkersSummaryOptions& options,
926{
927 const auto NC = matrix.ncol();
928 const auto ngroups = output.mean.size();
929 const auto nblocks = tatami_stats::total_groups(block, NC);
930
931 const auto combinations = internal::create_combinations(ngroups, group, nblocks, block, NC);
932 const auto combo_sizes = internal::tabulate_combinations<Index_>(ngroups, nblocks, combinations);
933 const auto ncombos = combo_sizes.size();
934
935 internal::score_markers_summary<false>(
936 matrix,
937 sanisizer::cast<std::size_t>(ngroups),
938 group,
939 sanisizer::cast<std::size_t>(nblocks),
940 block,
941 sanisizer::cast<std::size_t>(ncombos),
942 combinations.data(),
943 combo_sizes,
944 options,
945 output
946 );
947}
948
949
967template<typename Stat_ = double, typename Rank_ = int, typename Value_, typename Index_, typename Group_>
969 const tatami::Matrix<Value_, Index_>& matrix,
970 const Group_* const group,
971 const ScoreMarkersSummaryOptions& options)
972{
973 const auto ngroups = tatami_stats::total_groups(group, matrix.ncol());
975 const auto buffers = internal::preallocate_summary_results(matrix.nrow(), ngroups, output, options);
976 score_markers_summary(matrix, group, options, buffers);
977 return output;
978}
979
1000template<typename Stat_ = double, typename Rank_ = int, typename Value_, typename Index_, typename Group_, typename Block_>
1002 const tatami::Matrix<Value_, Index_>& matrix,
1003 const Group_* const group,
1004 const Block_* const block,
1005 const ScoreMarkersSummaryOptions& options)
1006{
1007 const auto ngroups = tatami_stats::total_groups(group, matrix.ncol());
1009 const auto buffers = internal::preallocate_summary_results(matrix.nrow(), ngroups, output, options);
1010 score_markers_summary_blocked(matrix, group, block, options, buffers);
1011 return output;
1012}
1013
1014}
1015
1016#endif
Averaging statistics over blocks.
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
void compute_weights(const std::size_t num_blocks, const Size_ *const sizes, const WeightPolicy policy, const VariableWeightParameters &variable, Weight_ *const weights)
Marker detection for single-cell data.
Definition score_markers_pairwise.hpp:27
BlockAveragePolicy
Definition block_averages.hpp:27
void score_markers_summary(const tatami::Matrix< Value_, Index_ > &matrix, const Group_ *const group, const ScoreMarkersSummaryOptions &options, const ScoreMarkersSummaryBuffers< Stat_, Rank_ > &output)
Definition score_markers_summary.hpp:867
void score_markers_summary_blocked(const tatami::Matrix< Value_, Index_ > &matrix, const Group_ *const group, const Block_ *const block, const ScoreMarkersSummaryOptions &options, const ScoreMarkersSummaryBuffers< Stat_, Rank_ > &output)
Definition score_markers_summary.hpp:920
int parallelize(Function_ fun, const Index_ tasks, const int workers)
Buffers for score_markers_summary() and friends.
Definition score_markers_summary.hpp:176
std::vector< Stat_ * > detected
Definition score_markers_summary.hpp:193
std::vector< SummaryBuffers< Stat_, Rank_ > > delta_detected
Definition score_markers_summary.hpp:229
std::vector< Stat_ * > mean
Definition score_markers_summary.hpp:184
std::vector< SummaryBuffers< Stat_, Rank_ > > delta_mean
Definition score_markers_summary.hpp:220
std::vector< SummaryBuffers< Stat_, Rank_ > > cohens_d
Definition score_markers_summary.hpp:202
std::vector< SummaryBuffers< Stat_, Rank_ > > auc
Definition score_markers_summary.hpp:211
Options for score_markers_summary() and friends.
Definition score_markers_summary.hpp:34
bool compute_group_detected
Definition score_markers_summary.hpp:58
bool compute_max
Definition score_markers_summary.hpp:106
bool compute_delta_detected
Definition score_markers_summary.hpp:82
bool compute_auc
Definition score_markers_summary.hpp:70
bool compute_group_mean
Definition score_markers_summary.hpp:52
bool compute_min
Definition score_markers_summary.hpp:88
bool compute_median
Definition score_markers_summary.hpp:100
bool compute_mean
Definition score_markers_summary.hpp:94
double threshold
Definition score_markers_summary.hpp:40
bool compute_cohens_d
Definition score_markers_summary.hpp:64
int num_threads
Definition score_markers_summary.hpp:46
bool compute_delta_mean
Definition score_markers_summary.hpp:76
Results for score_markers_summary() and friends.
Definition score_markers_summary.hpp:238
std::vector< SummaryResults< Stat_, Rank_ > > cohens_d
Definition score_markers_summary.hpp:258
std::vector< std::vector< Stat_ > > mean
Definition score_markers_summary.hpp:243
std::vector< SummaryResults< Stat_, Rank_ > > auc
Definition score_markers_summary.hpp:267
std::vector< SummaryResults< Stat_, Rank_ > > delta_detected
Definition score_markers_summary.hpp:285
std::vector< std::vector< Stat_ > > detected
Definition score_markers_summary.hpp:249
std::vector< SummaryResults< Stat_, Rank_ > > delta_mean
Definition score_markers_summary.hpp:276
Pointers to arrays to hold the summary statistics.
Definition summarize_comparisons.hpp:33
Utilities for effect summarization.