1#ifndef SCRAN_SCORE_MARKERS_HPP 
    2#define SCRAN_SCORE_MARKERS_HPP 
    6#include "sanisizer/sanisizer.hpp" 
   13#include "scan_matrix.hpp" 
   14#include "cohens_d.hpp" 
   15#include "simple_diff.hpp" 
   17#include "average_group_stats.hpp" 
   18#include "create_combinations.hpp" 
  149template<
typename Stat_, 
typename Rank_>
 
  176    std::vector<SummaryBuffers<Stat_, Rank_> > 
cohens_d;
 
  185    std::vector<SummaryBuffers<Stat_, Rank_> > 
auc;
 
 
  211template<
typename Stat_, 
typename Rank_>
 
  217    std::vector<std::vector<Stat_> > 
mean;
 
  232    std::vector<SummaryResults<Stat_, Rank_> > 
cohens_d;
 
  241    std::vector<SummaryResults<Stat_, Rank_> > 
auc;
 
 
  267template<
typename Stat_, 
typename Index_>
 
  268using MinrankTopQueues = std::vector<std::vector<topicks::TopQueue<Stat_, Index_> > >;
 
  270template<
typename Stat_, 
typename Index_, 
typename Rank_>
 
  271void preallocate_minrank_queues( 
 
  272    const std::size_t ngroups,
 
  273    std::vector<MinrankTopQueues<Stat_, Index_> >& all_queues,
 
  276    const bool keep_ties,
 
  277    const int num_threads
 
  283    sanisizer::resize(all_queues, num_threads);
 
  284    for (
int t = 0; t < num_threads; ++t) {
 
  285        sanisizer::resize(all_queues[t], ngroups);
 
  287        for (
decltype(I(ngroups)) g1 = 0; g1 < ngroups; ++g1) {
 
  288            if (summaries[g1].min_rank == NULL) {
 
  292            for (
decltype(I(ngroups)) g2 = 0; g2 < ngroups; ++g2) {
 
  293                all_queues[t][g1].emplace_back(limit, 
true, qopt);
 
  299template<
typename Stat_, 
typename Index_, 
typename Rank_>
 
  300void compute_summary_stats_per_gene(
 
  302    const std::size_t ngroups,
 
  303    const Stat_* 
const pairwise_buffer_ptr,
 
  304    std::vector<Stat_>& summary_buffer,
 
  305    MinrankTopQueues<Stat_, Index_>& minrank_queues,
 
  306    const std::vector<SummaryBuffers<Stat_, Rank_> >& summaries
 
  308    for (
decltype(I(ngroups)) gr = 0; gr < ngroups; ++gr) {
 
  309        auto& cursummary = summaries[gr];
 
  310        const auto in_offset = sanisizer::product_unsafe<std::size_t>(ngroups, gr);
 
  311        summarize_comparisons(ngroups, pairwise_buffer_ptr + in_offset, gr, gene, cursummary, summary_buffer);
 
  313        if (cursummary.min_rank) {
 
  314            auto& cur_queues = minrank_queues[gr];
 
  315            for (
decltype(I(ngroups)) gr2 = 0; gr2 < ngroups; ++gr2) {
 
  317                    cur_queues[gr2].emplace(pairwise_buffer_ptr[in_offset + gr2], gene);
 
  324template<
typename Stat_, 
typename Index_, 
typename Rank_>
 
  325void report_minrank_from_queues(
 
  327    const std::size_t ngroups,
 
  328    std::vector<MinrankTopQueues<Stat_, Index_> >& all_queues,
 
  329    const std::vector<SummaryBuffers<Stat_, Rank_> >& summaries,
 
  330    const int num_threads,
 
  333    tatami::parallelize([&](
const int, 
const std::size_t start, 
const std::size_t length) -> 
void {
 
  334        std::vector<Index_> tie_buffer;
 
  336        for (
decltype(I(ngroups)) gr = start, grend = start + length; gr < grend; ++gr) {
 
  337            const auto mr_out = summaries[gr].min_rank;
 
  338            if (mr_out == NULL) {
 
  343            const auto maxrank_placeholder = sanisizer::cast<Rank_>(ngenes);
 
  344            std::fill_n(mr_out, ngenes, maxrank_placeholder);
 
  346            for (
decltype(I(ngroups)) gr2 = 0; gr2 < ngroups; ++gr2) {
 
  350                auto& current_out = all_queues.front()[gr][gr2];
 
  352                const auto num_queues = all_queues.size();
 
  353                for (
decltype(I(num_queues)) q = 1; q < num_queues; ++q) {
 
  354                    auto& current_in = all_queues[q][gr][gr2];
 
  355                    while (!current_in.empty()) {
 
  356                        current_out.push(current_in.top());
 
  364                    while (!current_out.empty()) {
 
  365                        auto& mr = mr_out[current_out.top().second];
 
  366                        mr = std::min(mr, 
static_cast<Rank_
>(current_out.size()));
 
  370                    while (!current_out.empty()) {
 
  372                        const auto curtop = current_out.top();
 
  375                        while (!current_out.empty() && current_out.top().first == curtop.first) {
 
  376                            tie_buffer.push_back(current_out.top().second);
 
  381                        const Rank_ tied_rank = current_out.size() + 1;
 
  383                        mr_out[curtop.second] = std::min(mr_out[curtop.second], tied_rank);
 
  384                        for (
const auto t : tie_buffer) {
 
  385                            mr_out[t] = std::min(mr_out[t], tied_rank);
 
  391    }, ngroups, num_threads);
 
  394template<
typename Index_, 
typename Stat_, 
typename Rank_>
 
  395void process_simple_summary_effects(
 
  397    const std::size_t ngroups,
 
  398    const std::size_t nblocks,
 
  399    const std::size_t ncombos,
 
  400    const std::vector<Stat_>& combo_means,
 
  401    const std::vector<Stat_>& combo_vars,
 
  402    const std::vector<Stat_>& combo_detected,
 
  403    const ScoreMarkersSummaryBuffers<Stat_, Rank_>& output,
 
  404    const std::vector<Stat_>& combo_weights,
 
  405    const double threshold,
 
  406    const Index_ minrank_limit,
 
  407    const bool minrank_keep_ties,
 
  408    const int num_threads)
 
  410    std::vector<MinrankTopQueues<Stat_, Index_> > cohens_d_minrank_all_queues, delta_mean_minrank_all_queues, delta_detected_minrank_all_queues;
 
  411    if (output.cohens_d.size()) {
 
  412        preallocate_minrank_queues(ngroups, cohens_d_minrank_all_queues, output.cohens_d, minrank_limit, minrank_keep_ties, num_threads);
 
  414    if (output.delta_mean.size()) {
 
  415        preallocate_minrank_queues(ngroups, delta_mean_minrank_all_queues, output.delta_mean, minrank_limit, minrank_keep_ties, num_threads);
 
  417    if (output.delta_detected.size()) {
 
  418        preallocate_minrank_queues(ngroups, delta_detected_minrank_all_queues, output.delta_detected, minrank_limit, minrank_keep_ties, num_threads);
 
  421    std::vector<Stat_> total_weights_per_group;
 
  422    auto total_weights_ptr = combo_weights.data();
 
  423    if (!output.mean.empty() || !output.detected.empty()) {
 
  425            total_weights_per_group = compute_total_weight_per_group(ngroups, nblocks, combo_weights.data());
 
  426            total_weights_ptr = total_weights_per_group.data();
 
  430    PrecomputedPairwiseWeights<Stat_> preweights;
 
  431    if (!output.cohens_d.empty() || !output.delta_mean.empty() || !output.delta_detected.empty()) {
 
  432        preweights =  PrecomputedPairwiseWeights<Stat_>(ngroups, nblocks, combo_weights.data());
 
  435    const auto ngroups2 = sanisizer::product<typename std::vector<Stat_>::size_type>(ngroups, ngroups);
 
  437        std::vector<Stat_> pairwise_buffer(ngroups2);
 
  438        std::vector<Stat_> summary_buffer(ngroups);
 
  440        for (Index_ gene = start, end = start + length; gene < end; ++gene) {
 
  441            const auto in_offset = sanisizer::product_unsafe<std::size_t>(gene, ncombos);
 
  443            if (!output.mean.empty()) {
 
  444                const auto tmp_means = combo_means.data() + in_offset;
 
  445                average_group_stats(gene, ngroups, nblocks, tmp_means, combo_weights.data(), total_weights_ptr, output.mean);
 
  448            if (!output.detected.empty()) {
 
  449                const auto tmp_detected = combo_detected.data() + in_offset;
 
  450                average_group_stats(gene, ngroups, nblocks, tmp_detected, combo_weights.data(), total_weights_ptr, output.detected);
 
  453            if (output.cohens_d.size()) {
 
  454                const auto tmp_means = combo_means.data() + in_offset;
 
  455                const auto tmp_variances = combo_vars.data() + in_offset;
 
  456                compute_pairwise_cohens_d(tmp_means, tmp_variances, ngroups, nblocks, preweights, threshold, pairwise_buffer.data());
 
  457                compute_summary_stats_per_gene(gene, ngroups, pairwise_buffer.data(), summary_buffer, cohens_d_minrank_all_queues[t], output.cohens_d);
 
  460            if (output.delta_mean.size()) {
 
  461                const auto tmp_means = combo_means.data() + in_offset;
 
  462                compute_pairwise_simple_diff(tmp_means, ngroups, nblocks, preweights, pairwise_buffer.data());
 
  463                compute_summary_stats_per_gene(gene, ngroups, pairwise_buffer.data(), summary_buffer, delta_mean_minrank_all_queues[t], output.delta_mean);
 
  466            if (output.delta_detected.size()) {
 
  467                const auto tmp_det = combo_detected.data() + in_offset;
 
  468                compute_pairwise_simple_diff(tmp_det, ngroups, nblocks, preweights, pairwise_buffer.data());
 
  469                compute_summary_stats_per_gene(gene, ngroups, pairwise_buffer.data(), summary_buffer, delta_detected_minrank_all_queues[t], output.delta_detected);
 
  472    }, ngenes, num_threads);
 
  474    if (output.cohens_d.size()) {
 
  475        report_minrank_from_queues(ngenes, ngroups, cohens_d_minrank_all_queues, output.cohens_d, num_threads, minrank_keep_ties);
 
  478    if (output.delta_mean.size()) {
 
  479        report_minrank_from_queues(ngenes, ngroups, delta_mean_minrank_all_queues, output.delta_mean, num_threads, minrank_keep_ties);
 
  482    if (output.delta_detected.size()) {
 
  483        report_minrank_from_queues(ngenes, ngroups, delta_detected_minrank_all_queues, output.delta_detected, num_threads, minrank_keep_ties);
 
  487template<
typename Index_, 
typename Stat_, 
typename Rank_>
 
  488ScoreMarkersSummaryBuffers<Stat_, Rank_> preallocate_summary_results(
 
  490    const std::size_t ngroups,
 
  491    ScoreMarkersSummaryResults<Stat_, Rank_>& store,
 
  492    const ScoreMarkersSummaryOptions& options)
 
  494    ScoreMarkersSummaryBuffers<Stat_, Rank_> output;
 
  496    if (options.compute_group_mean) { 
 
  497        preallocate_average_results(ngenes, ngroups, store.mean, output.mean);
 
  500    if (options.compute_group_detected) { 
 
  501        preallocate_average_results(ngenes, ngroups, store.detected, output.detected);
 
  504    if (options.compute_cohens_d) {
 
  505        output.cohens_d = fill_summary_results(
 
  510            options.compute_mean,
 
  511            options.compute_median,
 
  513            options.compute_min_rank
 
  517    if (options.compute_auc) {
 
  518        output.auc = fill_summary_results(
 
  523            options.compute_mean,
 
  524            options.compute_median,
 
  526            options.compute_min_rank
 
  530    if (options.compute_delta_mean) {
 
  531        output.delta_mean = fill_summary_results(
 
  536            options.compute_mean,
 
  537            options.compute_median,
 
  539            options.compute_min_rank
 
  543    if (options.compute_delta_detected) {
 
  544        output.delta_detected = fill_summary_results(
 
  547            store.delta_detected,
 
  549            options.compute_mean,
 
  550            options.compute_median,
 
  552            options.compute_min_rank
 
  570    const std::size_t ngroups,
 
  571    const Group_* 
const group, 
 
  572    const std::size_t nblocks,
 
  573    const Block_* 
const block,
 
  574    const std::size_t ncombos,
 
  575    const std::size_t* 
const combo,
 
  576    const std::vector<Index_>& combo_sizes,
 
  577    const ScoreMarkersSummaryOptions& options,
 
  578    const ScoreMarkersSummaryBuffers<Stat_, Rank_>& output
 
  580    const auto ngenes = matrix.
nrow();
 
  581    const auto payload_size = sanisizer::product<typename std::vector<Stat_>::size_type>(ngenes, ncombos);
 
  582    std::vector<Stat_> combo_means, combo_vars, combo_detected;
 
  583    if (!output.mean.empty() || !output.cohens_d.empty() || !output.delta_mean.empty()) {
 
  584        combo_means.resize(payload_size);
 
  586    if (!output.cohens_d.empty()) {
 
  587        combo_vars.resize(payload_size);
 
  589    if (!output.detected.empty() || !output.delta_detected.empty()) {
 
  590        combo_detected.resize(payload_size);
 
  597        options.block_weight_policy,
 
  598        options.variable_block_weight_parameters
 
  601    const Index_ minrank_limit = sanisizer::cap<Index_>(options.min_rank_limit);
 
  603    if (!output.auc.empty()) {
 
  604        std::vector<MinrankTopQueues<Stat_, Index_> > auc_minrank_all_queues;
 
  605        preallocate_minrank_queues(ngroups, auc_minrank_all_queues, output.auc, minrank_limit, options.min_rank_preserve_ties, options.num_threads);
 
  607        struct AucResultWorkspace {
 
  608            AucResultWorkspace() = 
default;
 
  609            AucResultWorkspace(
const std::size_t ngroups, MinrankTopQueues<Stat_, Index_>& queues) :
 
  610                pairwise_buffer(sanisizer::product<typename std::vector<Stat_>::size_type>(ngroups, ngroups)),
 
  611                summary_buffer(sanisizer::cast<typename std::vector<Stat_>::size_type>(ngroups)),
 
  614            std::vector<Stat_> pairwise_buffer;
 
  615            std::vector<Stat_> summary_buffer;
 
  616            MinrankTopQueues<Stat_, Index_>* queue_ptr;
 
  619        scan_matrix_by_row_custom_auc<single_block_>(
 
  631             [&](
const int t) -> AucResultWorkspace {
 
  632                return AucResultWorkspace(ngroups, auc_minrank_all_queues[t]);
 
  636                AucScanWorkspace<Value_, Group_, Index_, Stat_>& auc_work,
 
  637                AucResultWorkspace& res_work
 
  639                process_auc_for_rows(auc_work, ngroups, nblocks, options.threshold, res_work.pairwise_buffer.data());
 
  640                compute_summary_stats_per_gene(gene, ngroups, res_work.pairwise_buffer.data(), res_work.summary_buffer, *(res_work.queue_ptr), output.auc);
 
  647        report_minrank_from_queues(ngenes, ngroups, auc_minrank_all_queues, output.auc, options.num_threads, options.min_rank_preserve_ties);
 
  650        scan_matrix_by_row_full_auc<single_block_>(
 
  661            static_cast<Stat_*
>(NULL),
 
  669        scan_matrix_by_column(
 
  672                if constexpr(single_block_) {
 
  679                if constexpr(single_block_) {
 
  693    process_simple_summary_effects(
 
  705        options.min_rank_preserve_ties,
 
  749template<
typename Value_, 
typename Index_, 
typename Group_, 
typename Stat_, 
typename Rank_>
 
  752    const Group_* 
const group, 
 
  756    const auto NC = matrix.
ncol();
 
  757    const auto group_sizes = tatami_stats::tabulate_groups(group, NC); 
 
  758    const auto ngroups = sanisizer::cast<std::size_t>(group_sizes.size());
 
  760    internal::score_markers_summary<true>(
 
  765        static_cast<int*
>(NULL),
 
  767        static_cast<std::size_t*
>(NULL),
 
 
  802template<
typename Value_, 
typename Index_, 
typename Group_, 
typename Block_, 
typename Stat_, 
typename Rank_>
 
  805    const Group_* 
const group, 
 
  806    const Block_* 
const block,
 
  810    const auto NC = matrix.
ncol();
 
  811    const auto ngroups = output.
mean.size();
 
  812    const auto nblocks = tatami_stats::total_groups(block, NC); 
 
  814    const auto combinations = internal::create_combinations(ngroups, group, block, NC);
 
  815    const auto combo_sizes = internal::tabulate_combinations<Index_>(ngroups, nblocks, combinations);
 
  816    const auto ncombos = combo_sizes.size();
 
  818    internal::score_markers_summary<false>(
 
  820        sanisizer::cast<std::size_t>(ngroups),
 
  822        sanisizer::cast<std::size_t>(nblocks),
 
  824        sanisizer::cast<std::size_t>(ncombos),
 
 
  850template<
typename Stat_ = 
double, 
typename Rank_ = 
int, 
typename Value_, 
typename Index_, 
typename Group_>
 
  853    const Group_* 
const group,
 
  856    const auto ngroups = tatami_stats::total_groups(group, matrix.
ncol());
 
  858    const auto buffers = internal::preallocate_summary_results(matrix.
nrow(), ngroups, output, options);
 
 
  883template<
typename Stat_ = 
double, 
typename Rank_ = 
int, 
typename Value_, 
typename Index_, 
typename Group_, 
typename Block_>
 
  886    const Group_* 
const group,
 
  887    const Block_* 
const block,
 
  890    const auto ngroups = tatami_stats::total_groups(group, matrix.
ncol());
 
  892    const auto buffers = internal::preallocate_summary_results(matrix.
nrow(), ngroups, output, options);
 
 
virtual Index_ ncol() const=0
 
virtual Index_ nrow() const=0
 
virtual bool prefer_rows() const=0
 
void compute_weights(const std::size_t num_blocks, const Size_ *const sizes, const WeightPolicy policy, const VariableWeightParameters &variable, Weight_ *const weights)
 
Marker detection for single-cell data.
Definition score_markers_pairwise.hpp:25
 
void score_markers_summary(const tatami::Matrix< Value_, Index_ > &matrix, const Group_ *const group, const ScoreMarkersSummaryOptions &options, const ScoreMarkersSummaryBuffers< Stat_, Rank_ > &output)
Definition score_markers_summary.hpp:750
 
void score_markers_summary_blocked(const tatami::Matrix< Value_, Index_ > &matrix, const Group_ *const group, const Block_ *const block, const ScoreMarkersSummaryOptions &options, const ScoreMarkersSummaryBuffers< Stat_, Rank_ > &output)
Definition score_markers_summary.hpp:803
 
void parallelize(Function_ fun, const Index_ tasks, const int threads)
 
Pointers to arrays to hold the summary statistics.
Definition summarize_comparisons.hpp:29
 
Utilities for effect summarization.