scran_markers
Marker detection for single-cell data
Loading...
Searching...
No Matches
score_markers_pairwise.hpp
Go to the documentation of this file.
1#ifndef SCRAN_MARKERS_SCORE_MARKERS_PAIRWISE_HPP
2#define SCRAN_MARKERS_SCORE_MARKERS_PAIRWISE_HPP
3
4#include <vector>
5#include <cstddef>
6#include <optional>
7
9#include "tatami/tatami.hpp"
10#include "tatami_stats/tatami_stats.hpp"
11#include "sanisizer/sanisizer.hpp"
12
13#include "scan_matrix.hpp"
14#include "average_group_stats.hpp"
15#include "block_averages.hpp"
16#include "create_combinations.hpp"
17#include "cohens_d.hpp"
18#include "simple_diff.hpp"
19#include "utils.hpp"
20
26namespace scran_markers {
27
37 double threshold = 0;
38
43 int num_threads = 1;
44
49 bool compute_group_mean = true;
50
56
61 bool compute_cohens_d = true;
62
67 bool compute_auc = true;
68
73 bool compute_delta_mean = true;
74
80
86 BlockAveragePolicy block_average_policy = BlockAveragePolicy::MEAN;
87
100 scran_blocks::WeightPolicy block_weight_policy = scran_blocks::WeightPolicy::VARIABLE;
101
108
113 double block_quantile = 0.5;
114};
115
120template<typename Stat_>
129 std::vector<Stat_*> mean;
130
138 std::vector<Stat_*> detected;
139
155 Stat_* cohens_d = NULL;
156
169 Stat_* auc = NULL;
170
178 Stat_* delta_mean = NULL;
179
187 Stat_* delta_detected = NULL;
188};
189
194template<typename Stat_>
202 std::vector<std::vector<Stat_> > mean;
203
210 std::vector<std::vector<Stat_> > detected;
211
219 std::vector<Stat_> cohens_d;
220
228 std::vector<Stat_> auc;
229
237 std::vector<Stat_> delta_mean;
238
246 std::vector<Stat_> delta_detected;
247};
248
252namespace internal {
253
254template<typename Index_, typename Stat_>
255void process_simple_pairwise_effects(
256 const Index_ ngenes,
257 const std::size_t ngroups,
258 const std::size_t nblocks,
259 const std::size_t ncombos,
260 const std::vector<Stat_>& combo_means,
261 const std::vector<Stat_>& combo_vars,
262 const std::vector<Stat_>& combo_detected,
263 const double threshold,
264 const BlockAverageInfo<Stat_>& average_info,
266 const int num_threads
267) {
268 const Stat_* total_weights_ptr = NULL;
269 std::optional<std::vector<Stat_> > total_weights_per_group;
270 if (average_info.use_mean()) {
271 if (!output.mean.empty() || !output.detected.empty()) {
272 if (nblocks > 1) {
273 total_weights_per_group = compute_total_weight_per_group(ngroups, nblocks, average_info.combo_weights().data());
274 total_weights_ptr = total_weights_per_group->data();
275 } else {
276 total_weights_ptr = average_info.combo_weights().data();
277 }
278 }
279 }
280
281 std::optional<PrecomputedPairwiseWeights<Stat_> > preweights;
282 if (average_info.use_mean()) {
283 if (output.cohens_d != NULL || output.delta_mean != NULL || output.delta_detected != NULL) {
284 preweights.emplace(ngroups, nblocks, average_info.combo_weights().data());
285 }
286 }
287
288 tatami::parallelize([&](const int, const Index_ start, const Index_ length) -> void {
289 std::optional<std::vector<Stat_> > qbuffer, qrevbuffer;
290 std::optional<scran_blocks::SingleQuantileVariable<Stat_, typename std::vector<Stat_>::iterator> > qcalc;
291 if (!average_info.use_mean()) {
292 qbuffer.emplace();
293 qrevbuffer.emplace();
294 qcalc.emplace(nblocks, average_info.quantile());
295 }
296
297 for (Index_ gene = start, end = start + length; gene < end; ++gene) {
298 auto in_offset = sanisizer::product_unsafe<std::size_t>(gene, ncombos);
299
300 if (!output.mean.empty()) {
301 const auto tmp_means = combo_means.data() + in_offset;
302 if (average_info.use_mean()) {
303 average_group_stats_blockmean(gene, ngroups, nblocks, tmp_means, average_info.combo_weights().data(), total_weights_ptr, output.mean);
304 } else {
305 average_group_stats_blockquantile(gene, ngroups, nblocks, tmp_means, *qbuffer, *qcalc, output.mean);
306 }
307 }
308
309 if (!output.detected.empty()) {
310 const auto tmp_detected = combo_detected.data() + in_offset;
311 if (average_info.use_mean()) {
312 average_group_stats_blockmean(gene, ngroups, nblocks, tmp_detected, average_info.combo_weights().data(), total_weights_ptr, output.detected);
313 } else {
314 average_group_stats_blockquantile(gene, ngroups, nblocks, tmp_detected, *qbuffer, *qcalc, output.detected);
315 }
316 }
317
318 // Computing the effect sizes.
319 const auto out_offset = sanisizer::product_unsafe<std::size_t>(gene, ngroups, ngroups);
320
321 if (output.cohens_d != NULL) {
322 const auto tmp_means = combo_means.data() + in_offset;
323 const auto tmp_variances = combo_vars.data() + in_offset;
324 const auto outptr = output.cohens_d + out_offset;
325 if (average_info.use_mean()) {
326 compute_pairwise_cohens_d_blockmean(tmp_means, tmp_variances, ngroups, nblocks, threshold, *preweights, outptr);
327 } else {
328 compute_pairwise_cohens_d_blockquantile(tmp_means, tmp_variances, ngroups, nblocks, threshold, *qbuffer, *qrevbuffer, *qcalc, outptr);
329 }
330 }
331
332 if (output.delta_detected != NULL) {
333 const auto tmp_detected = combo_detected.data() + in_offset;
334 const auto outptr = output.delta_detected + out_offset;
335 if (average_info.use_mean()) {
336 compute_pairwise_simple_diff_blockmean(tmp_detected, ngroups, nblocks, *preweights, outptr);
337 } else {
338 compute_pairwise_simple_diff_blockquantile(tmp_detected, ngroups, nblocks, *qbuffer, *qcalc, outptr);
339 }
340 }
341
342 if (output.delta_mean != NULL) {
343 const auto tmp_means = combo_means.data() + in_offset;
344 const auto outptr = output.delta_mean + out_offset;
345 if (average_info.use_mean()) {
346 compute_pairwise_simple_diff_blockmean(tmp_means, ngroups, nblocks, *preweights, outptr);
347 } else {
348 compute_pairwise_simple_diff_blockquantile(tmp_means, ngroups, nblocks, *qbuffer, *qcalc, outptr);
349 }
350 }
351 }
352 }, ngenes, num_threads);
353}
354
355template<typename Index_, typename Stat_>
356ScoreMarkersPairwiseBuffers<Stat_> preallocate_pairwise_results(
357 const Index_ ngenes,
358 const std::size_t ngroups,
359 ScoreMarkersPairwiseResults<Stat_>& store,
360 const ScoreMarkersPairwiseOptions& opt
361) {
362 ScoreMarkersPairwiseBuffers<Stat_> output;
363
364 if (opt.compute_group_mean) {
365 preallocate_average_results(ngenes, ngroups, store.mean, output.mean);
366 }
367 if (opt.compute_group_detected) {
368 preallocate_average_results(ngenes, ngroups, store.detected, output.detected);
369 }
370
371 const auto num_effect_sizes = sanisizer::product<typename std::vector<Stat_>::size_type>(ngenes, ngroups, ngroups);
372
373 if (opt.compute_cohens_d) {
374 store.cohens_d.resize(num_effect_sizes
375#ifdef SCRAN_MARKERS_TEST_INIT
376 , SCRAN_MARKERS_TEST_INIT
377#endif
378 );
379 output.cohens_d = store.cohens_d.data();
380 }
381 if (opt.compute_auc) {
382 store.auc.resize(num_effect_sizes
383#ifdef SCRAN_MARKERS_TEST_INIT
384 , SCRAN_MARKERS_TEST_INIT
385#endif
386 );
387 output.auc = store.auc.data();
388 }
389 if (opt.compute_delta_mean) {
390 store.delta_mean.resize(num_effect_sizes
391#ifdef SCRAN_MARKERS_TEST_INIT
392 , SCRAN_MARKERS_TEST_INIT
393#endif
394 );
395 output.delta_mean = store.delta_mean.data();
396 }
397 if (opt.compute_delta_detected) {
398 store.delta_detected.resize(num_effect_sizes
399#ifdef SCRAN_MARKERS_TEST_INIT
400 , SCRAN_MARKERS_TEST_INIT
401#endif
402 );
403 output.delta_detected = store.delta_detected.data();
404 }
405
406 return output;
407}
408
409template<
410 bool single_block_,
411 typename Value_,
412 typename Index_,
413 typename Group_,
414 typename Block_,
415 typename Stat_
416>
418 const tatami::Matrix<Value_, Index_>& matrix,
419 const std::size_t ngroups,
420 const Group_* const group,
421 const std::size_t nblocks,
422 const Block_* const block,
423 const std::size_t ncombos,
424 const std::size_t* const combo,
425 const std::vector<Index_>& combo_sizes,
426 const ScoreMarkersPairwiseOptions& options,
427 const ScoreMarkersPairwiseBuffers<Stat_>& output
428) {
429 const auto ngenes = matrix.nrow();
430 const auto payload_size = sanisizer::product<typename std::vector<Stat_>::size_type>(ngenes, ncombos);
431 std::vector<Stat_> combo_means, combo_vars, combo_detected;
432 if (!output.mean.empty() || output.cohens_d != NULL || output.delta_mean != NULL) {
433 combo_means.resize(payload_size);
434 }
435 if (output.cohens_d != NULL) {
436 combo_vars.resize(payload_size);
437 }
438 if (!output.detected.empty() || output.delta_detected != NULL) {
439 combo_detected.resize(payload_size);
440 }
441
442 // For a single block, this usually doesn't really matter, but we do it for consistency with the multi-block case,
443 // and to account for variable weighting where non-zero block sizes get zero weight.
444 BlockAverageInfo<Stat_> average_info;
445 if (options.block_average_policy == BlockAveragePolicy::MEAN) {
446 average_info = BlockAverageInfo<Stat_>(
448 combo_sizes,
449 options.block_weight_policy,
450 options.variable_block_weight_parameters
451 )
452 );
453 } else {
454 average_info = BlockAverageInfo<Stat_>(options.block_quantile);
455 }
456
457 if (output.auc != NULL || matrix.prefer_rows()) {
458 scan_matrix_by_row_full_auc<single_block_>(
459 matrix,
460 ngroups,
461 group,
462 nblocks,
463 block,
464 ncombos,
465 combo,
466 combo_sizes,
467 average_info,
468 combo_means,
469 combo_vars,
470 combo_detected,
471 output.auc,
472 options.threshold,
473 options.num_threads
474 );
475
476 } else {
477 scan_matrix_by_column(
478 matrix,
479 [&]{
480 if constexpr(single_block_) {
481 return ngroups;
482 } else {
483 return ncombos;
484 }
485 }(),
486 [&]{
487 if constexpr(single_block_) {
488 return group;
489 } else {
490 return combo;
491 }
492 }(),
493 combo_sizes,
494 combo_means,
495 combo_vars,
496 combo_detected,
497 options.num_threads
498 );
499 }
500
501 process_simple_pairwise_effects(
502 matrix.nrow(),
503 ngroups,
504 nblocks,
505 ncombos,
506 combo_means,
507 combo_vars,
508 combo_detected,
509 options.threshold,
510 average_info,
511 output,
512 options.num_threads
513 );
514}
515
516}
591template<typename Value_, typename Index_, typename Group_, typename Stat_>
593 const tatami::Matrix<Value_, Index_>& matrix,
594 const Group_* const group,
595 const ScoreMarkersPairwiseOptions& options,
597) {
598 const Index_ NC = matrix.ncol();
599 const auto group_sizes = tatami_stats::tabulate_groups(group, NC);
600 const auto ngroups = sanisizer::cast<std::size_t>(group_sizes.size());
601
602 internal::score_markers_pairwise<true>(
603 matrix,
604 ngroups,
605 group,
606 1,
607 static_cast<int*>(NULL),
608 ngroups,
609 static_cast<std::size_t*>(NULL),
610 group_sizes,
611 options,
612 output
613 );
614}
615
655template<typename Value_, typename Index_, typename Group_, typename Block_, typename Stat_>
657 const tatami::Matrix<Value_, Index_>& matrix,
658 const Group_* const group,
659 const Block_* const block,
660 const ScoreMarkersPairwiseOptions& options,
662) {
663 const Index_ NC = matrix.ncol();
664 const auto ngroups = output.mean.size();
665 const auto nblocks = tatami_stats::total_groups(block, NC);
666
667 const auto combinations = internal::create_combinations(ngroups, group, block, NC);
668 const auto combo_sizes = internal::tabulate_combinations<Index_>(ngroups, nblocks, combinations);
669 const auto ncombos = combo_sizes.size();
670
671 internal::score_markers_pairwise<false>(
672 matrix,
673 sanisizer::cast<std::size_t>(ngroups),
674 group,
675 sanisizer::cast<std::size_t>(nblocks),
676 block,
677 sanisizer::cast<std::size_t>(ncombos),
678 combinations.data(),
679 combo_sizes,
680 options,
681 output
682 );
683}
684
701template<typename Stat_ = double, typename Value_, typename Index_, typename Group_>
703 const auto ngroups = tatami_stats::total_groups(group, matrix.ncol());
705 auto buffers = internal::preallocate_pairwise_results(matrix.nrow(), ngroups, res, options);
706 score_markers_pairwise(matrix, group, options, buffers);
707 return res;
708}
709
729template<typename Stat_ = double, typename Value_, typename Index_, typename Group_, typename Block_>
731 const tatami::Matrix<Value_, Index_>& matrix,
732 const Group_* const group,
733 const Block_* const block,
734 const ScoreMarkersPairwiseOptions& options)
735{
736 const auto ngroups = tatami_stats::total_groups(group, matrix.ncol());
738 const auto buffers = internal::preallocate_pairwise_results(matrix.nrow(), ngroups, res, options);
739 score_markers_pairwise_blocked(matrix, group, block, options, buffers);
740 return res;
741}
742
743}
744
745#endif
Averaging statistics over blocks.
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
void compute_weights(const std::size_t num_blocks, const Size_ *const sizes, const WeightPolicy policy, const VariableWeightParameters &variable, Weight_ *const weights)
Marker detection for single-cell data.
Definition score_markers_pairwise.hpp:26
void score_markers_pairwise(const tatami::Matrix< Value_, Index_ > &matrix, const Group_ *const group, const ScoreMarkersPairwiseOptions &options, const ScoreMarkersPairwiseBuffers< Stat_ > &output)
Definition score_markers_pairwise.hpp:592
BlockAveragePolicy
Definition block_averages.hpp:27
void score_markers_pairwise_blocked(const tatami::Matrix< Value_, Index_ > &matrix, const Group_ *const group, const Block_ *const block, const ScoreMarkersPairwiseOptions &options, const ScoreMarkersPairwiseBuffers< Stat_ > &output)
Definition score_markers_pairwise.hpp:656
void parallelize(Function_ fun, const Index_ tasks, const int threads)
Buffers for score_markers_pairwise() and friends.
Definition score_markers_pairwise.hpp:121
Stat_ * delta_mean
Definition score_markers_pairwise.hpp:178
Stat_ * cohens_d
Definition score_markers_pairwise.hpp:155
std::vector< Stat_ * > detected
Definition score_markers_pairwise.hpp:138
Stat_ * delta_detected
Definition score_markers_pairwise.hpp:187
Stat_ * auc
Definition score_markers_pairwise.hpp:169
std::vector< Stat_ * > mean
Definition score_markers_pairwise.hpp:129
Options for score_markers_pairwise() and friends.
Definition score_markers_pairwise.hpp:31
bool compute_group_mean
Definition score_markers_pairwise.hpp:49
scran_blocks::WeightPolicy block_weight_policy
Definition score_markers_pairwise.hpp:100
int num_threads
Definition score_markers_pairwise.hpp:43
bool compute_delta_mean
Definition score_markers_pairwise.hpp:73
double block_quantile
Definition score_markers_pairwise.hpp:113
double threshold
Definition score_markers_pairwise.hpp:37
bool compute_group_detected
Definition score_markers_pairwise.hpp:55
BlockAveragePolicy block_average_policy
Definition score_markers_pairwise.hpp:86
bool compute_cohens_d
Definition score_markers_pairwise.hpp:61
bool compute_auc
Definition score_markers_pairwise.hpp:67
bool compute_delta_detected
Definition score_markers_pairwise.hpp:79
scran_blocks::VariableWeightParameters variable_block_weight_parameters
Definition score_markers_pairwise.hpp:107
Results for score_markers_pairwise() and friends.
Definition score_markers_pairwise.hpp:195
std::vector< Stat_ > delta_mean
Definition score_markers_pairwise.hpp:237
std::vector< std::vector< Stat_ > > detected
Definition score_markers_pairwise.hpp:210
std::vector< Stat_ > cohens_d
Definition score_markers_pairwise.hpp:219
std::vector< Stat_ > auc
Definition score_markers_pairwise.hpp:228
std::vector< Stat_ > delta_detected
Definition score_markers_pairwise.hpp:246
std::vector< std::vector< Stat_ > > mean
Definition score_markers_pairwise.hpp:202