294template<
typename Stat_,
typename Index_>
295using MinrankTopQueues = std::vector<std::optional<std::vector<topicks::TopQueue<Stat_, Index_> > > >;
297template<
typename Stat_,
typename Index_,
typename Rank_>
298void preallocate_minrank_queues(
299 const std::size_t ngroups,
300 MinrankTopQueues<Stat_, Index_>& queue,
309 sanisizer::resize(queue, ngroups);
310 for (I<
decltype(ngroups)> g1 = 0; g1 < ngroups; ++g1) {
311 if (summaries[g1].min_rank == NULL) {
316 auto& g_queue = *(queue[g1]);
317 sanisizer::reserve(g_queue, ngroups);
318 for (I<
decltype(ngroups)> g2 = 0; g2 < ngroups; ++g2) {
319 g_queue.emplace_back(limit,
true, qopt);
324template<
typename Stat_,
typename Index_,
typename Rank_>
325void compute_summary_stats_per_gene(
327 const std::size_t ngroups,
328 const Stat_*
const pairwise_buffer_ptr,
329 std::vector<Stat_>& summary_buffer,
330 MaybeMultipleQuantiles<Stat_>& summary_qcalcs,
331 MinrankTopQueues<Stat_, Index_>& minrank_queues,
334 for (I<
decltype(ngroups)> gr = 0; gr < ngroups; ++gr) {
335 auto& cursummary = summaries[gr];
336 const auto in_offset = sanisizer::product_unsafe<std::size_t>(ngroups, gr);
337 summarize_comparisons(ngroups, pairwise_buffer_ptr + in_offset, gr, gene, cursummary, summary_qcalcs, summary_buffer);
339 if (cursummary.min_rank) {
340 auto& gr_queue = *(minrank_queues[gr]);
341 for (I<
decltype(ngroups)> gr2 = 0; gr2 < ngroups; ++gr2) {
343 gr_queue[gr2].emplace(pairwise_buffer_ptr[in_offset + gr2], gene);
350template<
typename Stat_,
typename Index_,
typename Rank_>
351void report_minrank_from_queues(
353 const std::size_t ngroups,
354 std::vector<std::optional<MinrankTopQueues<Stat_, Index_> > >& all_queues,
356 const int num_threads,
359 if (all_queues.empty()) {
367 tatami::parallelize([&](
const int,
const std::size_t start,
const std::size_t length) ->
void {
368 std::vector<Index_> tie_buffer;
370 for (I<
decltype(ngroups)> gr = start, grend = start + length; gr < grend; ++gr) {
371 const auto mr_out = summaries[gr].min_rank;
372 if (mr_out == NULL) {
377 const auto maxrank_placeholder = sanisizer::cast<Rank_>(ngenes);
378 std::fill_n(mr_out, ngenes, maxrank_placeholder);
380 auto& first_mr_queue = *(all_queues.front());
381 for (I<
decltype(ngroups)> gr2 = 0; gr2 < ngroups; ++gr2) {
388 auto current_out = std::move((*(first_mr_queue[gr]))[gr2]);
390 const auto num_queues = all_queues.size();
391 for (I<
decltype(num_queues)> q = 1; q < num_queues; ++q) {
392 auto& current_mr_queue = *(all_queues[q]);
393 auto current_in = std::move((*(current_mr_queue[gr]))[gr2]);
394 while (!current_in.empty()) {
395 current_out.push(current_in.top());
403 while (!current_out.empty()) {
404 auto& mr = mr_out[current_out.top().second];
405 mr = std::min(mr,
static_cast<Rank_
>(current_out.size()));
409 while (!current_out.empty()) {
411 const auto curtop = current_out.top();
414 while (!current_out.empty() && current_out.top().first == curtop.first) {
415 tie_buffer.push_back(current_out.top().second);
420 const Rank_ tied_rank = current_out.size() + 1;
422 mr_out[curtop.second] = std::min(mr_out[curtop.second], tied_rank);
423 for (
const auto t : tie_buffer) {
424 mr_out[t] = std::min(mr_out[t], tied_rank);
430 }, ngroups, num_threads);
433template<
typename Index_,
typename Stat_,
typename Rank_>
434void process_simple_summary_effects(
436 const std::size_t ngroups,
437 const std::size_t nblocks,
438 const std::size_t ncombos,
439 const std::vector<Stat_>& combo_means,
440 const std::vector<Stat_>& combo_vars,
441 const std::vector<Stat_>& combo_detected,
442 const double threshold,
443 const BlockAverageInfo<Stat_>& average_info,
444 const std::optional<std::vector<double> >& summary_quantiles,
445 const Index_ minrank_limit,
446 const bool minrank_keep_ties,
448 const int num_threads
450 std::optional<std::vector<std::optional<MinrankTopQueues<Stat_, Index_> > > > cohens_d_minrank_all_queues, delta_mean_minrank_all_queues, delta_detected_minrank_all_queues;
452 cohens_d_minrank_all_queues.emplace(sanisizer::cast<I<
decltype(cohens_d_minrank_all_queues->size())> >(num_threads));
455 delta_mean_minrank_all_queues.emplace(sanisizer::cast<I<
decltype(delta_mean_minrank_all_queues->size())> >(num_threads));
458 delta_detected_minrank_all_queues.emplace(sanisizer::cast<I<
decltype(delta_detected_minrank_all_queues->size())> >(num_threads));
461 std::optional<std::vector<Stat_> > total_weights_per_group;
462 const Stat_* total_weights_ptr = NULL;
463 if (average_info.use_mean()) {
466 total_weights_per_group = compute_total_weight_per_group(ngroups, nblocks, average_info.combo_weights().data());
467 total_weights_ptr = total_weights_per_group->data();
469 total_weights_ptr = average_info.combo_weights().data();
474 std::optional<PrecomputedPairwiseWeights<Stat_> > preweights;
475 if (average_info.use_mean()) {
477 preweights = PrecomputedPairwiseWeights<Stat_>(ngroups, nblocks, average_info.combo_weights().data());
481 const auto ngroups2 = sanisizer::product<typename std::vector<Stat_>::size_type>(ngroups, ngroups);
482 const auto nused =
tatami::parallelize([&](
const int t,
const Index_ start,
const Index_ length) ->
void {
483 std::vector<Stat_> pairwise_buffer(ngroups2);
484 std::vector<Stat_> summary_buffer(ngroups);
485 auto summary_qcalcs = setup_multiple_quantiles<Stat_>(summary_quantiles, ngroups);
487 std::optional<std::vector<Stat_> > qbuffer, qrevbuffer;
488 std::optional<quickstats::SingleQuantileVariableNumber<Stat_, std::size_t> > qcalc;
489 if (!average_info.use_mean()) {
491 qrevbuffer.emplace();
492 qcalc.emplace(nblocks, average_info.quantile());
495 std::optional<MinrankTopQueues<Stat_, Index_> > cohens_d_minrank_queue, delta_mean_minrank_queue, delta_detected_minrank_queue;
497 cohens_d_minrank_queue.emplace();
498 preallocate_minrank_queues(ngroups, *cohens_d_minrank_queue, output.
cohens_d, minrank_limit, minrank_keep_ties);
501 delta_mean_minrank_queue.emplace();
502 preallocate_minrank_queues(ngroups, *delta_mean_minrank_queue, output.
delta_mean, minrank_limit, minrank_keep_ties);
505 delta_detected_minrank_queue.emplace();
506 preallocate_minrank_queues(ngroups, *delta_detected_minrank_queue, output.
delta_detected, minrank_limit, minrank_keep_ties);
509 for (Index_ gene = start, end = start + length; gene < end; ++gene) {
510 const auto in_offset = sanisizer::product_unsafe<std::size_t>(gene, ncombos);
512 if (!output.
mean.empty()) {
513 const auto tmp_means = combo_means.data() + in_offset;
514 if (average_info.use_mean()) {
515 average_group_stats_blockmean(gene, ngroups, nblocks, tmp_means, average_info.combo_weights().data(), total_weights_ptr, output.
mean);
517 average_group_stats_blockquantile(gene, ngroups, nblocks, tmp_means, *qbuffer, *qcalc, output.
mean);
522 const auto tmp_detected = combo_detected.data() + in_offset;
523 if (average_info.use_mean()) {
524 average_group_stats_blockmean(gene, ngroups, nblocks, tmp_detected, average_info.combo_weights().data(), total_weights_ptr, output.
detected);
526 average_group_stats_blockquantile(gene, ngroups, nblocks, tmp_detected, *qbuffer, *qcalc, output.
detected);
531 const auto tmp_means = combo_means.data() + in_offset;
532 const auto tmp_variances = combo_vars.data() + in_offset;
533 if (average_info.use_mean()) {
534 compute_pairwise_cohens_d_blockmean(tmp_means, tmp_variances, ngroups, nblocks, threshold, *preweights, pairwise_buffer.data());
536 compute_pairwise_cohens_d_blockquantile(tmp_means, tmp_variances, ngroups, nblocks, threshold, *qbuffer, *qrevbuffer, *qcalc, pairwise_buffer.data());
538 compute_summary_stats_per_gene(gene, ngroups, pairwise_buffer.data(), summary_buffer, summary_qcalcs, *cohens_d_minrank_queue, output.
cohens_d);
542 const auto tmp_means = combo_means.data() + in_offset;
543 if (average_info.use_mean()) {
544 compute_pairwise_simple_diff_blockmean(tmp_means, ngroups, nblocks, *preweights, pairwise_buffer.data());
546 compute_pairwise_simple_diff_blockquantile(tmp_means, ngroups, nblocks, *qbuffer, *qcalc, pairwise_buffer.data());
548 compute_summary_stats_per_gene(gene, ngroups, pairwise_buffer.data(), summary_buffer, summary_qcalcs, *delta_mean_minrank_queue, output.
delta_mean);
552 const auto tmp_det = combo_detected.data() + in_offset;
553 if (average_info.use_mean()) {
554 compute_pairwise_simple_diff_blockmean(tmp_det, ngroups, nblocks, *preweights, pairwise_buffer.data());
556 compute_pairwise_simple_diff_blockquantile(tmp_det, ngroups, nblocks, *qbuffer, *qcalc, pairwise_buffer.data());
558 compute_summary_stats_per_gene(gene, ngroups, pairwise_buffer.data(), summary_buffer, summary_qcalcs, *delta_detected_minrank_queue, output.
delta_detected);
564 (*cohens_d_minrank_all_queues)[t] = std::move(cohens_d_minrank_queue);
567 (*delta_mean_minrank_all_queues)[t] = std::move(delta_mean_minrank_queue);
570 (*delta_detected_minrank_all_queues)[t] = std::move(delta_detected_minrank_queue);
572 }, ngenes, num_threads);
575 cohens_d_minrank_all_queues->resize(nused);
576 report_minrank_from_queues(ngenes, ngroups, *cohens_d_minrank_all_queues, output.
cohens_d, num_threads, minrank_keep_ties);
579 delta_mean_minrank_all_queues->resize(nused);
580 report_minrank_from_queues(ngenes, ngroups, *delta_mean_minrank_all_queues, output.
delta_mean, num_threads, minrank_keep_ties);
583 delta_detected_minrank_all_queues->resize(nused);
584 report_minrank_from_queues(ngenes, ngroups, *delta_detected_minrank_all_queues, output.
delta_detected, num_threads, minrank_keep_ties);
588template<
typename Index_,
typename Stat_,
typename Rank_>
591 const std::size_t ngroups,
598 preallocate_average_results(ngenes, ngroups, store.
mean, output.
mean);
602 preallocate_average_results(ngenes, ngroups, store.
detected, output.
detected);
606 output.
cohens_d = fill_summary_results(
614 options.compute_summary_quantiles,
615 options.compute_min_rank
620 output.
auc = fill_summary_results(
628 options.compute_summary_quantiles,
629 options.compute_min_rank
642 options.compute_summary_quantiles,
643 options.compute_min_rank
656 options.compute_summary_quantiles,
657 options.compute_min_rank
673void score_markers_summary(
675 const std::size_t ngroups,
676 const Group_*
const group,
677 const std::size_t nblocks,
678 const Block_*
const block,
679 const std::size_t ncombos,
680 const std::size_t*
const combo,
681 const std::vector<Index_>& combo_sizes,
685 const auto ngenes = matrix.
nrow();
686 const auto payload_size = sanisizer::product<typename std::vector<Stat_>::size_type>(ngenes, ncombos);
687 std::vector<Stat_> combo_means, combo_vars, combo_detected;
689 combo_means.resize(payload_size);
692 combo_vars.resize(payload_size);
695 combo_detected.resize(payload_size);
700 BlockAverageInfo<Stat_> average_info;
701 if (options.block_average_policy == BlockAveragePolicy::MEAN) {
702 average_info = BlockAverageInfo<Stat_>(
705 options.block_weight_policy,
706 options.variable_block_weight_parameters
710 average_info = BlockAverageInfo<Stat_>(options.block_quantile);
713 const Index_ minrank_limit = sanisizer::cap<Index_>(options.min_rank_limit);
714 internal::validate_quantiles(options.compute_summary_quantiles);
716 if (!output.
auc.empty()) {
717 auto auc_minrank_all_queues = sanisizer::create<std::vector<std::optional<MinrankTopQueues<Stat_, Index_> > > >(options.
num_threads);
719 struct AucResultWorkspace {
720 AucResultWorkspace(
const std::size_t ngroups,
const std::optional<std::vector<double> >& summary_quantiles) :
721 pairwise_buffer(sanisizer::product<
typename std::vector<Stat_>::size_type>(ngroups, ngroups)),
722 summary_buffer(sanisizer::cast<
typename std::vector<Stat_>::size_type>(ngroups)),
723 summary_qcalcs(setup_multiple_quantiles<Stat_>(summary_quantiles, ngroups))
727 std::vector<Stat_> pairwise_buffer;
728 std::vector<Stat_> summary_buffer;
729 MaybeMultipleQuantiles<Stat_> summary_qcalcs;
730 MinrankTopQueues<Stat_, Index_> queue;
733 const auto num_used = scan_matrix_by_row_custom_auc<single_block_>(
747 [&](
const int) -> AucResultWorkspace {
748 AucResultWorkspace res_work(ngroups, options.compute_summary_quantiles);
749 preallocate_minrank_queues(ngroups, res_work.queue, output.
auc, minrank_limit, options.min_rank_preserve_ties);
752 [&](
const Index_ gene, AucScanWorkspace<Value_, Group_, Stat_, Index_>& auc_work, AucResultWorkspace& res_work) ->
void {
753 process_auc_for_rows(auc_work, ngroups, nblocks, options.
threshold, res_work.pairwise_buffer.data());
754 compute_summary_stats_per_gene(gene, ngroups, res_work.pairwise_buffer.data(), res_work.summary_buffer, res_work.summary_qcalcs, res_work.queue, output.
auc);
756 [&](
const int t, AucResultWorkspace& res_work) ->
void {
757 auc_minrank_all_queues[t] = std::move(res_work.queue);
762 auc_minrank_all_queues.resize(num_used);
763 report_minrank_from_queues(ngenes, ngroups, auc_minrank_all_queues, output.
auc, options.
num_threads, options.min_rank_preserve_ties);
766 scan_matrix_by_row_full_auc<single_block_>(
779 static_cast<Stat_*
>(NULL),
785 scan_matrix_by_column(
788 if constexpr(single_block_) {
795 if constexpr(single_block_) {
809 process_simple_summary_effects(
819 options.compute_summary_quantiles,
821 options.min_rank_preserve_ties,