1#ifndef SCRAN_SCORE_MARKERS_HPP
2#define SCRAN_SCORE_MARKERS_HPP
6#include "sanisizer/sanisizer.hpp"
13#include "scan_matrix.hpp"
14#include "cohens_d.hpp"
15#include "simple_diff.hpp"
17#include "average_group_stats.hpp"
18#include "create_combinations.hpp"
149template<
typename Stat_,
typename Rank_>
176 std::vector<SummaryBuffers<Stat_, Rank_> >
cohens_d;
185 std::vector<SummaryBuffers<Stat_, Rank_> >
auc;
211template<
typename Stat_,
typename Rank_>
217 std::vector<std::vector<Stat_> >
mean;
232 std::vector<SummaryResults<Stat_, Rank_> >
cohens_d;
241 std::vector<SummaryResults<Stat_, Rank_> >
auc;
267template<
typename Stat_,
typename Index_>
268using MinrankTopQueues = std::vector<std::vector<topicks::TopQueue<Stat_, Index_> > >;
270template<
typename Stat_,
typename Index_,
typename Rank_>
271void preallocate_minrank_queues(
272 const std::size_t ngroups,
273 std::vector<MinrankTopQueues<Stat_, Index_> >& all_queues,
276 const bool keep_ties,
277 const int num_threads
283 sanisizer::resize(all_queues, num_threads);
284 for (
int t = 0; t < num_threads; ++t) {
285 sanisizer::resize(all_queues[t], ngroups);
287 for (
decltype(I(ngroups)) g1 = 0; g1 < ngroups; ++g1) {
288 if (summaries[g1].min_rank == NULL) {
292 for (
decltype(I(ngroups)) g2 = 0; g2 < ngroups; ++g2) {
293 all_queues[t][g1].emplace_back(limit,
true, qopt);
299template<
typename Stat_,
typename Index_,
typename Rank_>
300void compute_summary_stats_per_gene(
302 const std::size_t ngroups,
303 const Stat_*
const pairwise_buffer_ptr,
304 std::vector<Stat_>& summary_buffer,
305 MinrankTopQueues<Stat_, Index_>& minrank_queues,
306 const std::vector<SummaryBuffers<Stat_, Rank_> >& summaries
308 for (
decltype(I(ngroups)) gr = 0; gr < ngroups; ++gr) {
309 auto& cursummary = summaries[gr];
310 const auto in_offset = sanisizer::product_unsafe<std::size_t>(ngroups, gr);
311 summarize_comparisons(ngroups, pairwise_buffer_ptr + in_offset, gr, gene, cursummary, summary_buffer);
313 if (cursummary.min_rank) {
314 auto& cur_queues = minrank_queues[gr];
315 for (
decltype(I(ngroups)) gr2 = 0; gr2 < ngroups; ++gr2) {
317 cur_queues[gr2].emplace(pairwise_buffer_ptr[in_offset + gr2], gene);
324template<
typename Stat_,
typename Index_,
typename Rank_>
325void report_minrank_from_queues(
327 const std::size_t ngroups,
328 std::vector<MinrankTopQueues<Stat_, Index_> >& all_queues,
329 const std::vector<SummaryBuffers<Stat_, Rank_> >& summaries,
330 const int num_threads,
333 tatami::parallelize([&](
const int,
const std::size_t start,
const std::size_t length) ->
void {
334 std::vector<Index_> tie_buffer;
336 for (
decltype(I(ngroups)) gr = start, grend = start + length; gr < grend; ++gr) {
337 const auto mr_out = summaries[gr].min_rank;
338 if (mr_out == NULL) {
343 const auto maxrank_placeholder = sanisizer::cast<Rank_>(ngenes);
344 std::fill_n(mr_out, ngenes, maxrank_placeholder);
346 for (
decltype(I(ngroups)) gr2 = 0; gr2 < ngroups; ++gr2) {
350 auto& current_out = all_queues.front()[gr][gr2];
352 const auto num_queues = all_queues.size();
353 for (
decltype(I(num_queues)) q = 1; q < num_queues; ++q) {
354 auto& current_in = all_queues[q][gr][gr2];
355 while (!current_in.empty()) {
356 current_out.push(current_in.top());
364 while (!current_out.empty()) {
365 auto& mr = mr_out[current_out.top().second];
366 mr = std::min(mr,
static_cast<Rank_
>(current_out.size()));
370 while (!current_out.empty()) {
372 const auto curtop = current_out.top();
375 while (!current_out.empty() && current_out.top().first == curtop.first) {
376 tie_buffer.push_back(current_out.top().second);
381 const Rank_ tied_rank = current_out.size() + 1;
383 mr_out[curtop.second] = std::min(mr_out[curtop.second], tied_rank);
384 for (
const auto t : tie_buffer) {
385 mr_out[t] = std::min(mr_out[t], tied_rank);
391 }, ngroups, num_threads);
394template<
typename Index_,
typename Stat_,
typename Rank_>
395void process_simple_summary_effects(
397 const std::size_t ngroups,
398 const std::size_t nblocks,
399 const std::size_t ncombos,
400 const std::vector<Stat_>& combo_means,
401 const std::vector<Stat_>& combo_vars,
402 const std::vector<Stat_>& combo_detected,
403 const ScoreMarkersSummaryBuffers<Stat_, Rank_>& output,
404 const std::vector<Stat_>& combo_weights,
405 const double threshold,
406 const Index_ minrank_limit,
407 const bool minrank_keep_ties,
408 const int num_threads)
410 std::vector<MinrankTopQueues<Stat_, Index_> > cohens_d_minrank_all_queues, delta_mean_minrank_all_queues, delta_detected_minrank_all_queues;
411 if (output.cohens_d.size()) {
412 preallocate_minrank_queues(ngroups, cohens_d_minrank_all_queues, output.cohens_d, minrank_limit, minrank_keep_ties, num_threads);
414 if (output.delta_mean.size()) {
415 preallocate_minrank_queues(ngroups, delta_mean_minrank_all_queues, output.delta_mean, minrank_limit, minrank_keep_ties, num_threads);
417 if (output.delta_detected.size()) {
418 preallocate_minrank_queues(ngroups, delta_detected_minrank_all_queues, output.delta_detected, minrank_limit, minrank_keep_ties, num_threads);
421 std::vector<Stat_> total_weights_per_group;
422 auto total_weights_ptr = combo_weights.data();
423 if (!output.mean.empty() || !output.detected.empty()) {
425 total_weights_per_group = compute_total_weight_per_group(ngroups, nblocks, combo_weights.data());
426 total_weights_ptr = total_weights_per_group.data();
430 PrecomputedPairwiseWeights<Stat_> preweights;
431 if (!output.cohens_d.empty() || !output.delta_mean.empty() || !output.delta_detected.empty()) {
432 preweights = PrecomputedPairwiseWeights<Stat_>(ngroups, nblocks, combo_weights.data());
435 const auto ngroups2 = sanisizer::product<typename std::vector<Stat_>::size_type>(ngroups, ngroups);
437 std::vector<Stat_> pairwise_buffer(ngroups2);
438 std::vector<Stat_> summary_buffer(ngroups);
440 for (Index_ gene = start, end = start + length; gene < end; ++gene) {
441 const auto in_offset = sanisizer::product_unsafe<std::size_t>(gene, ncombos);
443 if (!output.mean.empty()) {
444 const auto tmp_means = combo_means.data() + in_offset;
445 average_group_stats(gene, ngroups, nblocks, tmp_means, combo_weights.data(), total_weights_ptr, output.mean);
448 if (!output.detected.empty()) {
449 const auto tmp_detected = combo_detected.data() + in_offset;
450 average_group_stats(gene, ngroups, nblocks, tmp_detected, combo_weights.data(), total_weights_ptr, output.detected);
453 if (output.cohens_d.size()) {
454 const auto tmp_means = combo_means.data() + in_offset;
455 const auto tmp_variances = combo_vars.data() + in_offset;
456 compute_pairwise_cohens_d(tmp_means, tmp_variances, ngroups, nblocks, preweights, threshold, pairwise_buffer.data());
457 compute_summary_stats_per_gene(gene, ngroups, pairwise_buffer.data(), summary_buffer, cohens_d_minrank_all_queues[t], output.cohens_d);
460 if (output.delta_mean.size()) {
461 const auto tmp_means = combo_means.data() + in_offset;
462 compute_pairwise_simple_diff(tmp_means, ngroups, nblocks, preweights, pairwise_buffer.data());
463 compute_summary_stats_per_gene(gene, ngroups, pairwise_buffer.data(), summary_buffer, delta_mean_minrank_all_queues[t], output.delta_mean);
466 if (output.delta_detected.size()) {
467 const auto tmp_det = combo_detected.data() + in_offset;
468 compute_pairwise_simple_diff(tmp_det, ngroups, nblocks, preweights, pairwise_buffer.data());
469 compute_summary_stats_per_gene(gene, ngroups, pairwise_buffer.data(), summary_buffer, delta_detected_minrank_all_queues[t], output.delta_detected);
472 }, ngenes, num_threads);
474 if (output.cohens_d.size()) {
475 report_minrank_from_queues(ngenes, ngroups, cohens_d_minrank_all_queues, output.cohens_d, num_threads, minrank_keep_ties);
478 if (output.delta_mean.size()) {
479 report_minrank_from_queues(ngenes, ngroups, delta_mean_minrank_all_queues, output.delta_mean, num_threads, minrank_keep_ties);
482 if (output.delta_detected.size()) {
483 report_minrank_from_queues(ngenes, ngroups, delta_detected_minrank_all_queues, output.delta_detected, num_threads, minrank_keep_ties);
487template<
typename Index_,
typename Stat_,
typename Rank_>
488ScoreMarkersSummaryBuffers<Stat_, Rank_> preallocate_summary_results(
490 const std::size_t ngroups,
491 ScoreMarkersSummaryResults<Stat_, Rank_>& store,
492 const ScoreMarkersSummaryOptions& options)
494 ScoreMarkersSummaryBuffers<Stat_, Rank_> output;
496 if (options.compute_group_mean) {
497 preallocate_average_results(ngenes, ngroups, store.mean, output.mean);
500 if (options.compute_group_detected) {
501 preallocate_average_results(ngenes, ngroups, store.detected, output.detected);
504 if (options.compute_cohens_d) {
505 output.cohens_d = fill_summary_results(
510 options.compute_mean,
511 options.compute_median,
513 options.compute_min_rank
517 if (options.compute_auc) {
518 output.auc = fill_summary_results(
523 options.compute_mean,
524 options.compute_median,
526 options.compute_min_rank
530 if (options.compute_delta_mean) {
531 output.delta_mean = fill_summary_results(
536 options.compute_mean,
537 options.compute_median,
539 options.compute_min_rank
543 if (options.compute_delta_detected) {
544 output.delta_detected = fill_summary_results(
547 store.delta_detected,
549 options.compute_mean,
550 options.compute_median,
552 options.compute_min_rank
570 const std::size_t ngroups,
571 const Group_*
const group,
572 const std::size_t nblocks,
573 const Block_*
const block,
574 const std::size_t ncombos,
575 const std::size_t*
const combo,
576 const std::vector<Index_>& combo_sizes,
577 const ScoreMarkersSummaryOptions& options,
578 const ScoreMarkersSummaryBuffers<Stat_, Rank_>& output
580 const auto ngenes = matrix.
nrow();
581 const auto payload_size = sanisizer::product<typename std::vector<Stat_>::size_type>(ngenes, ncombos);
582 std::vector<Stat_> combo_means, combo_vars, combo_detected;
583 if (!output.mean.empty() || !output.cohens_d.empty() || !output.delta_mean.empty()) {
584 combo_means.resize(payload_size);
586 if (!output.cohens_d.empty()) {
587 combo_vars.resize(payload_size);
589 if (!output.detected.empty() || !output.delta_detected.empty()) {
590 combo_detected.resize(payload_size);
597 options.block_weight_policy,
598 options.variable_block_weight_parameters
601 const Index_ minrank_limit = sanisizer::cap<Index_>(options.min_rank_limit);
603 if (!output.auc.empty()) {
604 std::vector<MinrankTopQueues<Stat_, Index_> > auc_minrank_all_queues;
605 preallocate_minrank_queues(ngroups, auc_minrank_all_queues, output.auc, minrank_limit, options.min_rank_preserve_ties, options.num_threads);
607 struct AucResultWorkspace {
608 AucResultWorkspace() =
default;
609 AucResultWorkspace(
const std::size_t ngroups, MinrankTopQueues<Stat_, Index_>& queues) :
610 pairwise_buffer(sanisizer::product<typename std::vector<Stat_>::size_type>(ngroups, ngroups)),
611 summary_buffer(sanisizer::cast<typename std::vector<Stat_>::size_type>(ngroups)),
614 std::vector<Stat_> pairwise_buffer;
615 std::vector<Stat_> summary_buffer;
616 MinrankTopQueues<Stat_, Index_>* queue_ptr;
619 scan_matrix_by_row_custom_auc<single_block_>(
631 [&](
const int t) -> AucResultWorkspace {
632 return AucResultWorkspace(ngroups, auc_minrank_all_queues[t]);
636 AucScanWorkspace<Value_, Group_, Index_, Stat_>& auc_work,
637 AucResultWorkspace& res_work
639 process_auc_for_rows(auc_work, ngroups, nblocks, options.threshold, res_work.pairwise_buffer.data());
640 compute_summary_stats_per_gene(gene, ngroups, res_work.pairwise_buffer.data(), res_work.summary_buffer, *(res_work.queue_ptr), output.auc);
647 report_minrank_from_queues(ngenes, ngroups, auc_minrank_all_queues, output.auc, options.num_threads, options.min_rank_preserve_ties);
650 scan_matrix_by_row_full_auc<single_block_>(
661 static_cast<Stat_*
>(NULL),
669 scan_matrix_by_column(
672 if constexpr(single_block_) {
679 if constexpr(single_block_) {
693 process_simple_summary_effects(
705 options.min_rank_preserve_ties,
749template<
typename Value_,
typename Index_,
typename Group_,
typename Stat_,
typename Rank_>
752 const Group_*
const group,
756 const auto NC = matrix.
ncol();
757 const auto group_sizes = tatami_stats::tabulate_groups(group, NC);
758 const auto ngroups = sanisizer::cast<std::size_t>(group_sizes.size());
760 internal::score_markers_summary<true>(
765 static_cast<int*
>(NULL),
767 static_cast<std::size_t*
>(NULL),
802template<
typename Value_,
typename Index_,
typename Group_,
typename Block_,
typename Stat_,
typename Rank_>
805 const Group_*
const group,
806 const Block_*
const block,
810 const auto NC = matrix.
ncol();
811 const auto ngroups = output.
mean.size();
812 const auto nblocks = tatami_stats::total_groups(block, NC);
814 const auto combinations = internal::create_combinations(ngroups, group, block, NC);
815 const auto combo_sizes = internal::tabulate_combinations<Index_>(ngroups, nblocks, combinations);
816 const auto ncombos = combo_sizes.size();
818 internal::score_markers_summary<false>(
820 sanisizer::cast<std::size_t>(ngroups),
822 sanisizer::cast<std::size_t>(nblocks),
824 sanisizer::cast<std::size_t>(ncombos),
850template<
typename Stat_ =
double,
typename Rank_ =
int,
typename Value_,
typename Index_,
typename Group_>
853 const Group_*
const group,
856 const auto ngroups = tatami_stats::total_groups(group, matrix.
ncol());
858 const auto buffers = internal::preallocate_summary_results(matrix.
nrow(), ngroups, output, options);
883template<
typename Stat_ =
double,
typename Rank_ =
int,
typename Value_,
typename Index_,
typename Group_,
typename Block_>
886 const Group_*
const group,
887 const Block_*
const block,
890 const auto ngroups = tatami_stats::total_groups(group, matrix.
ncol());
892 const auto buffers = internal::preallocate_summary_results(matrix.
nrow(), ngroups, output, options);
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
void compute_weights(const std::size_t num_blocks, const Size_ *const sizes, const WeightPolicy policy, const VariableWeightParameters &variable, Weight_ *const weights)
Marker detection for single-cell data.
Definition score_markers_pairwise.hpp:25
void score_markers_summary(const tatami::Matrix< Value_, Index_ > &matrix, const Group_ *const group, const ScoreMarkersSummaryOptions &options, const ScoreMarkersSummaryBuffers< Stat_, Rank_ > &output)
Definition score_markers_summary.hpp:750
void score_markers_summary_blocked(const tatami::Matrix< Value_, Index_ > &matrix, const Group_ *const group, const Block_ *const block, const ScoreMarkersSummaryOptions &options, const ScoreMarkersSummaryBuffers< Stat_, Rank_ > &output)
Definition score_markers_summary.hpp:803
void parallelize(Function_ fun, const Index_ tasks, const int threads)
Pointers to arrays to hold the summary statistics.
Definition summarize_comparisons.hpp:29
Utilities for effect summarization.