1#ifndef SCRAN_SCORE_MARKERS_HPP
2#define SCRAN_SCORE_MARKERS_HPP
4#include "scan_matrix.hpp"
6#include "simple_diff.hpp"
8#include "average_group_stats.hpp"
9#include "create_combinations.hpp"
118template<
typename Stat_,
typename Rank_>
141 std::vector<SummaryBuffers<Stat_, Rank_> >
cohens_d;
150 std::vector<SummaryBuffers<Stat_, Rank_> >
auc;
176template<
typename Stat_,
typename Rank_>
182 std::vector<std::vector<Stat_> >
mean;
197 std::vector<SummaryResults<Stat_, Rank_> >
cohens_d;
206 std::vector<SummaryResults<Stat_, Rank_> >
auc;
255template<
typename Stat_>
258 EffectsCacher(
size_t ngenes,
size_t ngroups,
size_t cache_size) :
261 my_cache_size(std::min(cache_size, ngroups * (ngroups - 1) / 2)),
263 my_common_cache(ngenes * my_cache_size),
264 my_staging_cache(ngroups)
266 my_unused_pool.reserve(my_cache_size);
267 auto ptr = my_common_cache.data();
268 for (
size_t c = 0; c < my_cache_size; ++c, ptr += ngenes) {
269 my_unused_pool.push_back(ptr);
276 size_t my_cache_size;
278 std::vector<CacheAction> my_actions;
282 std::vector<Stat_> my_common_cache;
289 std::vector<Stat_*> my_staging_cache;
293 std::vector<Stat_*> my_unused_pool;
297 std::map<std::pair<size_t, size_t>, Stat_*> my_cached;
305 void fill_effects_from_cache(
size_t group, std::vector<double>& full_effects) {
310 for (
size_t other = 0; other < my_ngroups; ++other) {
311 if (other == group) {
312 my_actions[other] = CacheAction::SKIP;
316 if (my_cache_size == 0) {
317 my_actions[other] = CacheAction::COMPUTE;
325 my_actions[other] = CacheAction::CACHE;
333 if (my_cached.empty()) {
334 my_actions[other] = CacheAction::COMPUTE;
338 const auto& front = my_cached.begin()->first;
339 if (front.first > group || front.second > other) {
343 my_actions[other] = CacheAction::COMPUTE;
350 my_actions[other] = CacheAction::SKIP;
351 auto curcache = my_cached.begin()->second;
352 size_t offset = other;
353 for (
size_t i = 0; i < my_ngenes; ++i, offset += my_ngroups) {
354 full_effects[offset ] = curcache[i];
357 my_unused_pool.push_back(curcache);
358 my_cached.erase(my_cached.begin());
364 for (
size_t other = 0; other < my_ngroups; ++other) {
365 if (my_actions[other] != CacheAction::CACHE) {
369 std::pair<size_t, size_t> key(other, group);
370 if (my_cached.size() < my_cache_size) {
371 auto ptr = my_unused_pool.back();
372 my_cached[key] = ptr;
373 my_staging_cache[other] = ptr;
374 my_unused_pool.pop_back();
381 auto it = my_cached.end();
383 if ((it->first).first > other) {
384 auto ptr = it->second;
385 my_cached[key] = ptr;
386 my_staging_cache[other] = ptr;
392 my_actions[other] = CacheAction::COMPUTE;
398 CacheAction get_action(
size_t other)
const {
399 return my_actions[other];
402 Stat_* get_cache_location(
size_t other)
const {
403 return my_staging_cache[other];
407template<
typename Stat_,
typename Rank_>
408void process_simple_summary_effects(
413 const std::vector<Stat_>& combo_means,
414 const std::vector<Stat_>& combo_vars,
415 const std::vector<Stat_>& combo_detected,
416 const ScoreMarkersSummaryBuffers<Stat_, Rank_>& output,
417 const std::vector<Stat_>& combo_weights,
424 std::vector<Stat_> total_weights_per_group;
425 const Stat_* total_weights_ptr = combo_weights.data();
427 total_weights_per_group = compute_total_weight_per_group(ngroups, nblocks, combo_weights.data());
428 total_weights_ptr = total_weights_per_group.data();
432 size_t in_offset = ncombos *
static_cast<size_t>(start);
433 const auto* tmp_means = combo_means.data() + in_offset;
434 const auto* tmp_detected = combo_detected.data() + in_offset;
435 for (
size_t gene = start, end = start + length; gene < end; ++gene, tmp_means += ncombos, tmp_detected += ncombos) {
436 average_group_stats(gene, ngroups, nblocks, tmp_means, tmp_detected, combo_weights.data(), total_weights_ptr, output.mean, output.detected);
438 }, ngenes, num_threads);
441 PrecomputedPairwiseWeights<Stat_> preweights(ngroups, nblocks, combo_weights.data());
442 EffectsCacher<Stat_> cache(ngenes, ngroups, cache_size);
443 std::vector<Stat_> full_effects(ngroups * ngenes);
444 std::vector<std::vector<Stat_> > effect_buffers(num_threads);
445 for (
auto& ef : effect_buffers) {
449 if (output.cohens_d.size()) {
451 for (
size_t group = 0; group < ngroups; ++group) {
452 cache.fill_effects_from_cache(group, full_effects);
455 size_t in_offset = ncombos *
static_cast<size_t>(start);
456 auto my_means = combo_means.data() + in_offset;
457 auto my_variances = combo_vars.data() + in_offset;
458 auto store_ptr = full_effects.data() +
static_cast<size_t>(start) * ngroups;
459 auto& effect_buffer = effect_buffers[t];
461 for (
size_t gene = start, end = start + length; gene < end; ++gene, my_means += ncombos, my_variances += ncombos, store_ptr += ngroups) {
462 for (
size_t other = 0; other < ngroups; ++other) {
463 auto cache_action = cache.get_action(other);
464 if (cache_action == internal::CacheAction::COMPUTE) {
465 store_ptr[other] = compute_pairwise_cohens_d_one_sided(group, other, my_means, my_variances, ngroups, nblocks, preweights, threshold);
466 }
else if (cache_action == internal::CacheAction::CACHE) {
467 auto tmp = compute_pairwise_cohens_d_two_sided(group, other, my_means, my_variances, ngroups, nblocks, preweights, threshold);
468 store_ptr[other] = tmp.first;
469 cache.get_cache_location(other)[gene] = tmp.second;
472 summarize_comparisons(ngroups, store_ptr, group, gene, output.cohens_d[group], effect_buffer);
474 }, ngenes, num_threads);
476 auto mr = output.cohens_d[group].min_rank;
478 compute_min_rank_for_group(ngenes, ngroups, group, full_effects.data(), mr, num_threads);
483 if (output.delta_mean.size()) {
485 for (
size_t group = 0; group < ngroups; ++group) {
486 cache.fill_effects_from_cache(group, full_effects);
489 auto my_means = combo_means.data() + ncombos *
static_cast<size_t>(start);
490 auto store_ptr = full_effects.data() +
static_cast<size_t>(start) * ngroups;
491 auto& effect_buffer = effect_buffers[t];
493 for (
size_t gene = start, end = start + length; gene < end; ++gene, my_means += ncombos, store_ptr += ngroups) {
494 for (
size_t other = 0; other < ngroups; ++other) {
495 auto cache_action = cache.get_action(other);
496 if (cache_action != internal::CacheAction::SKIP) {
497 auto val = compute_pairwise_simple_diff(group, other, my_means, ngroups, nblocks, preweights);
498 store_ptr[other] = val;
499 if (cache_action == CacheAction::CACHE) {
500 cache.get_cache_location(other)[gene] = -val;
504 summarize_comparisons(ngroups, store_ptr, group, gene, output.delta_mean[group], effect_buffer);
506 }, ngenes, num_threads);
508 auto mr = output.delta_mean[group].min_rank;
510 compute_min_rank_for_group(ngenes, ngroups, group, full_effects.data(), mr, num_threads);
515 if (output.delta_detected.size()) {
517 for (
size_t group = 0; group < ngroups; ++group) {
518 cache.fill_effects_from_cache(group, full_effects);
521 auto my_detected = combo_detected.data() + ncombos *
static_cast<size_t>(start);
522 auto store_ptr = full_effects.data() +
static_cast<size_t>(start) * ngroups;
523 auto& effect_buffer = effect_buffers[t];
525 for (
size_t gene = start, end = start + length; gene < end; ++gene, my_detected += ncombos, store_ptr += ngroups) {
526 for (
size_t other = 0; other < ngroups; ++other) {
527 auto cache_action = cache.get_action(other);
528 if (cache_action != CacheAction::SKIP) {
529 auto val = compute_pairwise_simple_diff(group, other, my_detected, ngroups, nblocks, preweights);
530 store_ptr[other] = val;
531 if (cache_action == CacheAction::CACHE) {
532 cache.get_cache_location(other)[gene] = -val;
536 summarize_comparisons(ngroups, store_ptr, group, gene, output.delta_detected[group], effect_buffer);
538 }, ngenes, num_threads);
540 auto mr = output.delta_detected[group].min_rank;
542 compute_min_rank_for_group(ngenes, ngroups, group, full_effects.data(), mr, num_threads);
548template<
typename Stat_,
typename Rank_>
549ScoreMarkersSummaryBuffers<Stat_, Rank_> fill_summary_results(
size_t ngenes,
size_t ngroups, ScoreMarkersSummaryResults<Stat_, Rank_>& store,
const ScoreMarkersSummaryOptions& options) {
550 ScoreMarkersSummaryBuffers<Stat_, Rank_> output;
552 internal::fill_average_results(ngenes, ngroups, store.mean, store.detected, output.mean, output.detected);
554 if (options.compute_cohens_d) {
555 output.cohens_d = internal::fill_summary_results(
560 options.compute_mean,
561 options.compute_median,
563 options.compute_min_rank
567 if (options.compute_auc) {
568 output.auc = internal::fill_summary_results(
573 options.compute_mean,
574 options.compute_median,
576 options.compute_min_rank
580 if (options.compute_delta_mean) {
581 output.delta_mean = internal::fill_summary_results(
586 options.compute_mean,
587 options.compute_median,
589 options.compute_min_rank
593 if (options.compute_delta_detected) {
594 output.delta_detected = internal::fill_summary_results(
597 store.delta_detected,
599 options.compute_mean,
600 options.compute_median,
602 options.compute_min_rank
646template<
typename Value_,
typename Index_,
typename Group_,
typename Stat_,
typename Rank_>
674 internal::scan_matrix_by_row<true>(
679 static_cast<int*
>(
NULL),
693 internal::scan_matrix_by_column(
705 internal::process_simple_summary_effects(
753template<
typename Value_,
typename Index_,
typename Group_,
typename Block_,
typename Stat_,
typename Rank_>
783 internal::scan_matrix_by_row<false>(
802 internal::scan_matrix_by_column(
814 internal::process_simple_summary_effects(
851template<
typename Stat_ =
double,
typename Rank_ =
int,
typename Value_,
typename Index_,
typename Group_>
883template<
typename Stat_ =
double,
typename Rank_ =
int,
typename Value_,
typename Index_,
typename Group_,
typename Block_>
Marker detection for single-cell data.
Definition score_markers_pairwise.hpp:23
void score_markers_summary(const tatami::Matrix< Value_, Index_ > &matrix, const Group_ *group, const ScoreMarkersSummaryOptions &options, const ScoreMarkersSummaryBuffers< Stat_, Rank_ > &output)
Definition score_markers_summary.hpp:647
void score_markers_summary_blocked(const tatami::Matrix< Value_, Index_ > &matrix, const Group_ *group, const Block_ *block, const ScoreMarkersSummaryOptions &options, const ScoreMarkersSummaryBuffers< Stat_, Rank_ > &output)
Definition score_markers_summary.hpp:754
void parallelize(Function_ fun, Index_ tasks, int threads)
Utilities for effect summarization.