292template<
typename Stat_,
typename Index_>
293using MinrankTopQueues = std::vector<std::vector<topicks::TopQueue<Stat_, Index_> > >;
295template<
typename Stat_,
typename Index_,
typename Rank_>
296void preallocate_minrank_queues(
297 const std::size_t ngroups,
298 std::vector<MinrankTopQueues<Stat_, Index_> >& all_queues,
301 const bool keep_ties,
302 const int num_threads
308 sanisizer::resize(all_queues, num_threads);
309 for (
int t = 0; t < num_threads; ++t) {
310 sanisizer::resize(all_queues[t], ngroups);
312 for (I<
decltype(ngroups)> g1 = 0; g1 < ngroups; ++g1) {
313 if (summaries[g1].min_rank == NULL) {
317 for (I<
decltype(ngroups)> g2 = 0; g2 < ngroups; ++g2) {
318 all_queues[t][g1].emplace_back(limit,
true, qopt);
324template<
typename Stat_,
typename Index_,
typename Rank_>
325void compute_summary_stats_per_gene(
327 const std::size_t ngroups,
328 const Stat_*
const pairwise_buffer_ptr,
329 std::vector<Stat_>& summary_buffer,
330 MaybeMultipleQuantiles<Stat_>& summary_qcalcs,
331 MinrankTopQueues<Stat_, Index_>& minrank_queues,
334 for (I<
decltype(ngroups)> gr = 0; gr < ngroups; ++gr) {
335 auto& cursummary = summaries[gr];
336 const auto in_offset = sanisizer::product_unsafe<std::size_t>(ngroups, gr);
337 summarize_comparisons(ngroups, pairwise_buffer_ptr + in_offset, gr, gene, cursummary, summary_qcalcs, summary_buffer);
339 if (cursummary.min_rank) {
340 auto& cur_queues = minrank_queues[gr];
341 for (I<
decltype(ngroups)> gr2 = 0; gr2 < ngroups; ++gr2) {
343 cur_queues[gr2].emplace(pairwise_buffer_ptr[in_offset + gr2], gene);
350template<
typename Stat_,
typename Index_,
typename Rank_>
351void report_minrank_from_queues(
353 const std::size_t ngroups,
354 std::vector<MinrankTopQueues<Stat_, Index_> >& all_queues,
356 const int num_threads,
359 tatami::parallelize([&](
const int,
const std::size_t start,
const std::size_t length) ->
void {
360 std::vector<Index_> tie_buffer;
362 for (I<
decltype(ngroups)> gr = start, grend = start + length; gr < grend; ++gr) {
363 const auto mr_out = summaries[gr].min_rank;
364 if (mr_out == NULL) {
369 const auto maxrank_placeholder = sanisizer::cast<Rank_>(ngenes);
370 std::fill_n(mr_out, ngenes, maxrank_placeholder);
372 for (I<
decltype(ngroups)> gr2 = 0; gr2 < ngroups; ++gr2) {
376 auto& current_out = all_queues.front()[gr][gr2];
378 const auto num_queues = all_queues.size();
379 for (I<
decltype(num_queues)> q = 1; q < num_queues; ++q) {
380 auto& current_in = all_queues[q][gr][gr2];
381 while (!current_in.empty()) {
382 current_out.push(current_in.top());
390 while (!current_out.empty()) {
391 auto& mr = mr_out[current_out.top().second];
392 mr = std::min(mr,
static_cast<Rank_
>(current_out.size()));
396 while (!current_out.empty()) {
398 const auto curtop = current_out.top();
401 while (!current_out.empty() && current_out.top().first == curtop.first) {
402 tie_buffer.push_back(current_out.top().second);
407 const Rank_ tied_rank = current_out.size() + 1;
409 mr_out[curtop.second] = std::min(mr_out[curtop.second], tied_rank);
410 for (
const auto t : tie_buffer) {
411 mr_out[t] = std::min(mr_out[t], tied_rank);
417 }, ngroups, num_threads);
420template<
typename Index_,
typename Stat_,
typename Rank_>
421void process_simple_summary_effects(
423 const std::size_t ngroups,
424 const std::size_t nblocks,
425 const std::size_t ncombos,
426 const std::vector<Stat_>& combo_means,
427 const std::vector<Stat_>& combo_vars,
428 const std::vector<Stat_>& combo_detected,
429 const double threshold,
430 const BlockAverageInfo<Stat_>& average_info,
431 const std::optional<std::vector<double> >& summary_quantiles,
432 const Index_ minrank_limit,
433 const bool minrank_keep_ties,
435 const int num_threads
437 std::vector<MinrankTopQueues<Stat_, Index_> > cohens_d_minrank_all_queues, delta_mean_minrank_all_queues, delta_detected_minrank_all_queues;
439 preallocate_minrank_queues(ngroups, cohens_d_minrank_all_queues, output.
cohens_d, minrank_limit, minrank_keep_ties, num_threads);
442 preallocate_minrank_queues(ngroups, delta_mean_minrank_all_queues, output.
delta_mean, minrank_limit, minrank_keep_ties, num_threads);
445 preallocate_minrank_queues(ngroups, delta_detected_minrank_all_queues, output.
delta_detected, minrank_limit, minrank_keep_ties, num_threads);
448 std::optional<std::vector<Stat_> > total_weights_per_group;
449 const Stat_* total_weights_ptr = NULL;
450 if (average_info.use_mean()) {
453 total_weights_per_group = compute_total_weight_per_group(ngroups, nblocks, average_info.combo_weights().data());
454 total_weights_ptr = total_weights_per_group->data();
456 total_weights_ptr = average_info.combo_weights().data();
461 std::optional<PrecomputedPairwiseWeights<Stat_> > preweights;
462 if (average_info.use_mean()) {
464 preweights = PrecomputedPairwiseWeights<Stat_>(ngroups, nblocks, average_info.combo_weights().data());
468 const auto ngroups2 = sanisizer::product<typename std::vector<Stat_>::size_type>(ngroups, ngroups);
470 std::vector<Stat_> pairwise_buffer(ngroups2);
471 std::vector<Stat_> summary_buffer(ngroups);
472 auto summary_qcalcs = setup_multiple_quantiles<Stat_>(summary_quantiles, ngroups);
474 std::optional<std::vector<Stat_> > qbuffer, qrevbuffer;
475 std::optional<scran_blocks::SingleQuantileVariable<Stat_, typename std::vector<Stat_>::iterator> > qcalc;
476 if (!average_info.use_mean()) {
478 qrevbuffer.emplace();
479 qcalc.emplace(nblocks, average_info.quantile());
482 for (Index_ gene = start, end = start + length; gene < end; ++gene) {
483 const auto in_offset = sanisizer::product_unsafe<std::size_t>(gene, ncombos);
485 if (!output.
mean.empty()) {
486 const auto tmp_means = combo_means.data() + in_offset;
487 if (average_info.use_mean()) {
488 average_group_stats_blockmean(gene, ngroups, nblocks, tmp_means, average_info.combo_weights().data(), total_weights_ptr, output.
mean);
490 average_group_stats_blockquantile(gene, ngroups, nblocks, tmp_means, *qbuffer, *qcalc, output.
mean);
495 const auto tmp_detected = combo_detected.data() + in_offset;
496 if (average_info.use_mean()) {
497 average_group_stats_blockmean(gene, ngroups, nblocks, tmp_detected, average_info.combo_weights().data(), total_weights_ptr, output.
detected);
499 average_group_stats_blockquantile(gene, ngroups, nblocks, tmp_detected, *qbuffer, *qcalc, output.
detected);
504 const auto tmp_means = combo_means.data() + in_offset;
505 const auto tmp_variances = combo_vars.data() + in_offset;
506 if (average_info.use_mean()) {
507 compute_pairwise_cohens_d_blockmean(tmp_means, tmp_variances, ngroups, nblocks, threshold, *preweights, pairwise_buffer.data());
509 compute_pairwise_cohens_d_blockquantile(tmp_means, tmp_variances, ngroups, nblocks, threshold, *qbuffer, *qrevbuffer, *qcalc, pairwise_buffer.data());
511 compute_summary_stats_per_gene(gene, ngroups, pairwise_buffer.data(), summary_buffer, summary_qcalcs, cohens_d_minrank_all_queues[t], output.
cohens_d);
515 const auto tmp_means = combo_means.data() + in_offset;
516 if (average_info.use_mean()) {
517 compute_pairwise_simple_diff_blockmean(tmp_means, ngroups, nblocks, *preweights, pairwise_buffer.data());
519 compute_pairwise_simple_diff_blockquantile(tmp_means, ngroups, nblocks, *qbuffer, *qcalc, pairwise_buffer.data());
521 compute_summary_stats_per_gene(gene, ngroups, pairwise_buffer.data(), summary_buffer, summary_qcalcs, delta_mean_minrank_all_queues[t], output.
delta_mean);
525 const auto tmp_det = combo_detected.data() + in_offset;
526 if (average_info.use_mean()) {
527 compute_pairwise_simple_diff_blockmean(tmp_det, ngroups, nblocks, *preweights, pairwise_buffer.data());
529 compute_pairwise_simple_diff_blockquantile(tmp_det, ngroups, nblocks, *qbuffer, *qcalc, pairwise_buffer.data());
531 compute_summary_stats_per_gene(gene, ngroups, pairwise_buffer.data(), summary_buffer, summary_qcalcs, delta_detected_minrank_all_queues[t], output.
delta_detected);
534 }, ngenes, num_threads);
537 report_minrank_from_queues(ngenes, ngroups, cohens_d_minrank_all_queues, output.
cohens_d, num_threads, minrank_keep_ties);
541 report_minrank_from_queues(ngenes, ngroups, delta_mean_minrank_all_queues, output.
delta_mean, num_threads, minrank_keep_ties);
545 report_minrank_from_queues(ngenes, ngroups, delta_detected_minrank_all_queues, output.
delta_detected, num_threads, minrank_keep_ties);
549template<
typename Index_,
typename Stat_,
typename Rank_>
552 const std::size_t ngroups,
559 preallocate_average_results(ngenes, ngroups, store.
mean, output.
mean);
563 preallocate_average_results(ngenes, ngroups, store.
detected, output.
detected);
567 output.
cohens_d = fill_summary_results(
575 options.compute_summary_quantiles,
576 options.compute_min_rank
581 output.
auc = fill_summary_results(
589 options.compute_summary_quantiles,
590 options.compute_min_rank
603 options.compute_summary_quantiles,
604 options.compute_min_rank
617 options.compute_summary_quantiles,
618 options.compute_min_rank
634void score_markers_summary(
636 const std::size_t ngroups,
637 const Group_*
const group,
638 const std::size_t nblocks,
639 const Block_*
const block,
640 const std::size_t ncombos,
641 const std::size_t*
const combo,
642 const std::vector<Index_>& combo_sizes,
646 const auto ngenes = matrix.
nrow();
647 const auto payload_size = sanisizer::product<typename std::vector<Stat_>::size_type>(ngenes, ncombos);
648 std::vector<Stat_> combo_means, combo_vars, combo_detected;
650 combo_means.resize(payload_size);
653 combo_vars.resize(payload_size);
656 combo_detected.resize(payload_size);
661 BlockAverageInfo<Stat_> average_info;
662 if (options.block_average_policy == BlockAveragePolicy::MEAN) {
663 average_info = BlockAverageInfo<Stat_>(
666 options.block_weight_policy,
667 options.variable_block_weight_parameters
671 average_info = BlockAverageInfo<Stat_>(options.block_quantile);
674 const Index_ minrank_limit = sanisizer::cap<Index_>(options.min_rank_limit);
675 internal::validate_quantiles(options.compute_summary_quantiles);
677 if (!output.
auc.empty()) {
678 std::vector<MinrankTopQueues<Stat_, Index_> > auc_minrank_all_queues;
679 preallocate_minrank_queues(ngroups, auc_minrank_all_queues, output.
auc, minrank_limit, options.min_rank_preserve_ties, options.
num_threads);
681 struct AucResultWorkspace {
682 AucResultWorkspace(
const std::size_t ngroups, MinrankTopQueues<Stat_, Index_>& queues,
const std::optional<std::vector<double> >& summary_quantiles) :
683 pairwise_buffer(sanisizer::product<
typename std::vector<Stat_>::size_type>(ngroups, ngroups)),
684 summary_buffer(sanisizer::cast<
typename std::vector<Stat_>::size_type>(ngroups)),
686 summary_qcalcs(setup_multiple_quantiles<Stat_>(summary_quantiles, ngroups))
690 std::vector<Stat_> pairwise_buffer;
691 std::vector<Stat_> summary_buffer;
692 MinrankTopQueues<Stat_, Index_>* queue_ptr;
693 MaybeMultipleQuantiles<Stat_> summary_qcalcs;
696 scan_matrix_by_row_custom_auc<single_block_>(
710 [&](
const int t) -> AucResultWorkspace {
711 return AucResultWorkspace(ngroups, auc_minrank_all_queues[t], options.compute_summary_quantiles);
715 AucScanWorkspace<Value_, Group_, Stat_, Index_>& auc_work,
716 AucResultWorkspace& res_work
718 process_auc_for_rows(auc_work, ngroups, nblocks, options.
threshold, res_work.pairwise_buffer.data());
719 compute_summary_stats_per_gene(
722 res_work.pairwise_buffer.data(),
723 res_work.summary_buffer,
724 res_work.summary_qcalcs,
725 *(res_work.queue_ptr),
732 report_minrank_from_queues(ngenes, ngroups, auc_minrank_all_queues, output.
auc, options.
num_threads, options.min_rank_preserve_ties);
735 scan_matrix_by_row_full_auc<single_block_>(
748 static_cast<Stat_*
>(NULL),
754 scan_matrix_by_column(
757 if constexpr(single_block_) {
764 if constexpr(single_block_) {
778 process_simple_summary_effects(
788 options.compute_summary_quantiles,
790 options.min_rank_preserve_ties,