scran_markers
Marker detection for single-cell data
Loading...
Searching...
No Matches
score_markers_pairwise.hpp
Go to the documentation of this file.
1#ifndef SCRAN_MARKERS_SCORE_MARKERS_PAIRWISE_HPP
2#define SCRAN_MARKERS_SCORE_MARKERS_PAIRWISE_HPP
3
4#include <vector>
5#include <cstddef>
6#include <optional>
7
9#include "tatami/tatami.hpp"
10#include "tatami_stats/tatami_stats.hpp"
11#include "sanisizer/sanisizer.hpp"
12#include "quickstats/quickstats.hpp"
13
14#include "scan_matrix.hpp"
15#include "average_group_stats.hpp"
16#include "block_averages.hpp"
17#include "create_combinations.hpp"
18#include "cohens_d.hpp"
19#include "simple_diff.hpp"
20#include "utils.hpp"
21
27namespace scran_markers {
28
38 double threshold = 0;
39
44 int num_threads = 1;
45
50 bool compute_group_mean = true;
51
57
62 bool compute_cohens_d = true;
63
68 bool compute_auc = true;
69
74 bool compute_delta_mean = true;
75
81
87 BlockAveragePolicy block_average_policy = BlockAveragePolicy::MEAN;
88
101 scran_blocks::WeightPolicy block_weight_policy = scran_blocks::WeightPolicy::VARIABLE;
102
109
114 double block_quantile = 0.5;
115};
116
121template<typename Stat_>
130 std::vector<Stat_*> mean;
131
139 std::vector<Stat_*> detected;
140
156 Stat_* cohens_d = NULL;
157
170 Stat_* auc = NULL;
171
179 Stat_* delta_mean = NULL;
180
188 Stat_* delta_detected = NULL;
189};
190
195template<typename Stat_>
203 std::vector<std::vector<Stat_> > mean;
204
211 std::vector<std::vector<Stat_> > detected;
212
220 std::vector<Stat_> cohens_d;
221
229 std::vector<Stat_> auc;
230
238 std::vector<Stat_> delta_mean;
239
247 std::vector<Stat_> delta_detected;
248};
249
253namespace internal {
254
255template<typename Index_, typename Stat_>
256void process_simple_pairwise_effects(
257 const Index_ ngenes,
258 const std::size_t ngroups,
259 const std::size_t nblocks,
260 const std::size_t ncombos,
261 const std::vector<Stat_>& combo_means,
262 const std::vector<Stat_>& combo_vars,
263 const std::vector<Stat_>& combo_detected,
264 const double threshold,
265 const BlockAverageInfo<Stat_>& average_info,
267 const int num_threads
268) {
269 const Stat_* total_weights_ptr = NULL;
270 std::optional<std::vector<Stat_> > total_weights_per_group;
271 if (average_info.use_mean()) {
272 if (!output.mean.empty() || !output.detected.empty()) {
273 if (nblocks > 1) {
274 total_weights_per_group = compute_total_weight_per_group(ngroups, nblocks, average_info.combo_weights().data());
275 total_weights_ptr = total_weights_per_group->data();
276 } else {
277 total_weights_ptr = average_info.combo_weights().data();
278 }
279 }
280 }
281
282 std::optional<PrecomputedPairwiseWeights<Stat_> > preweights;
283 if (average_info.use_mean()) {
284 if (output.cohens_d != NULL || output.delta_mean != NULL || output.delta_detected != NULL) {
285 preweights.emplace(ngroups, nblocks, average_info.combo_weights().data());
286 }
287 }
288
289 tatami::parallelize([&](const int, const Index_ start, const Index_ length) -> void {
290 std::optional<std::vector<Stat_> > qbuffer, qrevbuffer;
291 std::optional<quickstats::SingleQuantileVariableNumber<Stat_, std::size_t> > qcalc;
292 if (!average_info.use_mean()) {
293 qbuffer.emplace();
294 qrevbuffer.emplace();
295 qcalc.emplace(nblocks, average_info.quantile());
296 }
297
298 for (Index_ gene = start, end = start + length; gene < end; ++gene) {
299 auto in_offset = sanisizer::product_unsafe<std::size_t>(gene, ncombos);
300
301 if (!output.mean.empty()) {
302 const auto tmp_means = combo_means.data() + in_offset;
303 if (average_info.use_mean()) {
304 average_group_stats_blockmean(gene, ngroups, nblocks, tmp_means, average_info.combo_weights().data(), total_weights_ptr, output.mean);
305 } else {
306 average_group_stats_blockquantile(gene, ngroups, nblocks, tmp_means, *qbuffer, *qcalc, output.mean);
307 }
308 }
309
310 if (!output.detected.empty()) {
311 const auto tmp_detected = combo_detected.data() + in_offset;
312 if (average_info.use_mean()) {
313 average_group_stats_blockmean(gene, ngroups, nblocks, tmp_detected, average_info.combo_weights().data(), total_weights_ptr, output.detected);
314 } else {
315 average_group_stats_blockquantile(gene, ngroups, nblocks, tmp_detected, *qbuffer, *qcalc, output.detected);
316 }
317 }
318
319 // Computing the effect sizes.
320 const auto out_offset = sanisizer::product_unsafe<std::size_t>(gene, ngroups, ngroups);
321
322 if (output.cohens_d != NULL) {
323 const auto tmp_means = combo_means.data() + in_offset;
324 const auto tmp_variances = combo_vars.data() + in_offset;
325 const auto outptr = output.cohens_d + out_offset;
326 if (average_info.use_mean()) {
327 compute_pairwise_cohens_d_blockmean(tmp_means, tmp_variances, ngroups, nblocks, threshold, *preweights, outptr);
328 } else {
329 compute_pairwise_cohens_d_blockquantile(tmp_means, tmp_variances, ngroups, nblocks, threshold, *qbuffer, *qrevbuffer, *qcalc, outptr);
330 }
331 }
332
333 if (output.delta_detected != NULL) {
334 const auto tmp_detected = combo_detected.data() + in_offset;
335 const auto outptr = output.delta_detected + out_offset;
336 if (average_info.use_mean()) {
337 compute_pairwise_simple_diff_blockmean(tmp_detected, ngroups, nblocks, *preweights, outptr);
338 } else {
339 compute_pairwise_simple_diff_blockquantile(tmp_detected, ngroups, nblocks, *qbuffer, *qcalc, outptr);
340 }
341 }
342
343 if (output.delta_mean != NULL) {
344 const auto tmp_means = combo_means.data() + in_offset;
345 const auto outptr = output.delta_mean + out_offset;
346 if (average_info.use_mean()) {
347 compute_pairwise_simple_diff_blockmean(tmp_means, ngroups, nblocks, *preweights, outptr);
348 } else {
349 compute_pairwise_simple_diff_blockquantile(tmp_means, ngroups, nblocks, *qbuffer, *qcalc, outptr);
350 }
351 }
352 }
353 }, ngenes, num_threads);
354}
355
356template<typename Index_, typename Stat_>
357ScoreMarkersPairwiseBuffers<Stat_> preallocate_pairwise_results(
358 const Index_ ngenes,
359 const std::size_t ngroups,
360 ScoreMarkersPairwiseResults<Stat_>& store,
361 const ScoreMarkersPairwiseOptions& opt
362) {
363 ScoreMarkersPairwiseBuffers<Stat_> output;
364
365 if (opt.compute_group_mean) {
366 preallocate_average_results(ngenes, ngroups, store.mean, output.mean);
367 }
368 if (opt.compute_group_detected) {
369 preallocate_average_results(ngenes, ngroups, store.detected, output.detected);
370 }
371
372 const auto num_effect_sizes = sanisizer::product<typename std::vector<Stat_>::size_type>(ngenes, ngroups, ngroups);
373
374 if (opt.compute_cohens_d) {
375 store.cohens_d.resize(num_effect_sizes
376#ifdef SCRAN_MARKERS_TEST_INIT
377 , SCRAN_MARKERS_TEST_INIT
378#endif
379 );
380 output.cohens_d = store.cohens_d.data();
381 }
382 if (opt.compute_auc) {
383 store.auc.resize(num_effect_sizes
384#ifdef SCRAN_MARKERS_TEST_INIT
385 , SCRAN_MARKERS_TEST_INIT
386#endif
387 );
388 output.auc = store.auc.data();
389 }
390 if (opt.compute_delta_mean) {
391 store.delta_mean.resize(num_effect_sizes
392#ifdef SCRAN_MARKERS_TEST_INIT
393 , SCRAN_MARKERS_TEST_INIT
394#endif
395 );
396 output.delta_mean = store.delta_mean.data();
397 }
398 if (opt.compute_delta_detected) {
399 store.delta_detected.resize(num_effect_sizes
400#ifdef SCRAN_MARKERS_TEST_INIT
401 , SCRAN_MARKERS_TEST_INIT
402#endif
403 );
404 output.delta_detected = store.delta_detected.data();
405 }
406
407 return output;
408}
409
410template<
411 bool single_block_,
412 typename Value_,
413 typename Index_,
414 typename Group_,
415 typename Block_,
416 typename Stat_
417>
419 const tatami::Matrix<Value_, Index_>& matrix,
420 const std::size_t ngroups,
421 const Group_* const group,
422 const std::size_t nblocks,
423 const Block_* const block,
424 const std::size_t ncombos,
425 const std::size_t* const combo,
426 const std::vector<Index_>& combo_sizes,
427 const ScoreMarkersPairwiseOptions& options,
428 const ScoreMarkersPairwiseBuffers<Stat_>& output
429) {
430 const auto ngenes = matrix.nrow();
431 const auto payload_size = sanisizer::product<typename std::vector<Stat_>::size_type>(ngenes, ncombos);
432 std::vector<Stat_> combo_means, combo_vars, combo_detected;
433 if (!output.mean.empty() || output.cohens_d != NULL || output.delta_mean != NULL) {
434 combo_means.resize(payload_size);
435 }
436 if (output.cohens_d != NULL) {
437 combo_vars.resize(payload_size);
438 }
439 if (!output.detected.empty() || output.delta_detected != NULL) {
440 combo_detected.resize(payload_size);
441 }
442
443 // For a single block, this usually doesn't really matter, but we do it for consistency with the multi-block case,
444 // and to account for variable weighting where non-zero block sizes get zero weight.
445 BlockAverageInfo<Stat_> average_info;
446 if (options.block_average_policy == BlockAveragePolicy::MEAN) {
447 average_info = BlockAverageInfo<Stat_>(
449 combo_sizes,
450 options.block_weight_policy,
451 options.variable_block_weight_parameters
452 )
453 );
454 } else {
455 average_info = BlockAverageInfo<Stat_>(options.block_quantile);
456 }
457
458 if (output.auc != NULL || matrix.prefer_rows()) {
459 scan_matrix_by_row_full_auc<single_block_>(
460 matrix,
461 ngroups,
462 group,
463 nblocks,
464 block,
465 ncombos,
466 combo,
467 combo_sizes,
468 average_info,
469 combo_means,
470 combo_vars,
471 combo_detected,
472 output.auc,
473 options.threshold,
474 options.num_threads
475 );
476
477 } else {
478 scan_matrix_by_column(
479 matrix,
480 [&]{
481 if constexpr(single_block_) {
482 return ngroups;
483 } else {
484 return ncombos;
485 }
486 }(),
487 [&]{
488 if constexpr(single_block_) {
489 return group;
490 } else {
491 return combo;
492 }
493 }(),
494 combo_sizes,
495 combo_means,
496 combo_vars,
497 combo_detected,
498 options.num_threads
499 );
500 }
501
502 process_simple_pairwise_effects(
503 matrix.nrow(),
504 ngroups,
505 nblocks,
506 ncombos,
507 combo_means,
508 combo_vars,
509 combo_detected,
510 options.threshold,
511 average_info,
512 output,
513 options.num_threads
514 );
515}
516
517}
592template<typename Value_, typename Index_, typename Group_, typename Stat_>
594 const tatami::Matrix<Value_, Index_>& matrix,
595 const Group_* const group,
596 const ScoreMarkersPairwiseOptions& options,
598) {
599 const Index_ NC = matrix.ncol();
600 const auto group_sizes = tatami_stats::tabulate_groups(group, NC);
601 const auto ngroups = sanisizer::cast<std::size_t>(group_sizes.size());
602
603 internal::score_markers_pairwise<true>(
604 matrix,
605 ngroups,
606 group,
607 1,
608 static_cast<int*>(NULL),
609 ngroups,
610 static_cast<std::size_t*>(NULL),
611 group_sizes,
612 options,
613 output
614 );
615}
616
656template<typename Value_, typename Index_, typename Group_, typename Block_, typename Stat_>
658 const tatami::Matrix<Value_, Index_>& matrix,
659 const Group_* const group,
660 const Block_* const block,
661 const ScoreMarkersPairwiseOptions& options,
663) {
664 const Index_ NC = matrix.ncol();
665 const auto ngroups = output.mean.size();
666 const auto nblocks = tatami_stats::total_groups(block, NC);
667
668 const auto combinations = internal::create_combinations(ngroups, group, nblocks, block, NC);
669 const auto combo_sizes = internal::tabulate_combinations<Index_>(ngroups, nblocks, combinations);
670 const auto ncombos = combo_sizes.size();
671
672 internal::score_markers_pairwise<false>(
673 matrix,
674 sanisizer::cast<std::size_t>(ngroups),
675 group,
676 sanisizer::cast<std::size_t>(nblocks),
677 block,
678 sanisizer::cast<std::size_t>(ncombos),
679 combinations.data(),
680 combo_sizes,
681 options,
682 output
683 );
684}
685
702template<typename Stat_ = double, typename Value_, typename Index_, typename Group_>
704 const auto ngroups = tatami_stats::total_groups(group, matrix.ncol());
706 auto buffers = internal::preallocate_pairwise_results(matrix.nrow(), ngroups, res, options);
707 score_markers_pairwise(matrix, group, options, buffers);
708 return res;
709}
710
730template<typename Stat_ = double, typename Value_, typename Index_, typename Group_, typename Block_>
732 const tatami::Matrix<Value_, Index_>& matrix,
733 const Group_* const group,
734 const Block_* const block,
735 const ScoreMarkersPairwiseOptions& options)
736{
737 const auto ngroups = tatami_stats::total_groups(group, matrix.ncol());
739 const auto buffers = internal::preallocate_pairwise_results(matrix.nrow(), ngroups, res, options);
740 score_markers_pairwise_blocked(matrix, group, block, options, buffers);
741 return res;
742}
743
744}
745
746#endif
Averaging statistics over blocks.
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
void compute_weights(const std::size_t num_blocks, const Size_ *const sizes, const WeightPolicy policy, const VariableWeightParameters &variable, Weight_ *const weights)
Marker detection for single-cell data.
Definition score_markers_pairwise.hpp:27
void score_markers_pairwise(const tatami::Matrix< Value_, Index_ > &matrix, const Group_ *const group, const ScoreMarkersPairwiseOptions &options, const ScoreMarkersPairwiseBuffers< Stat_ > &output)
Definition score_markers_pairwise.hpp:593
BlockAveragePolicy
Definition block_averages.hpp:27
void score_markers_pairwise_blocked(const tatami::Matrix< Value_, Index_ > &matrix, const Group_ *const group, const Block_ *const block, const ScoreMarkersPairwiseOptions &options, const ScoreMarkersPairwiseBuffers< Stat_ > &output)
Definition score_markers_pairwise.hpp:657
int parallelize(Function_ fun, const Index_ tasks, const int workers)
Buffers for score_markers_pairwise() and friends.
Definition score_markers_pairwise.hpp:122
Stat_ * delta_mean
Definition score_markers_pairwise.hpp:179
Stat_ * cohens_d
Definition score_markers_pairwise.hpp:156
std::vector< Stat_ * > detected
Definition score_markers_pairwise.hpp:139
Stat_ * delta_detected
Definition score_markers_pairwise.hpp:188
Stat_ * auc
Definition score_markers_pairwise.hpp:170
std::vector< Stat_ * > mean
Definition score_markers_pairwise.hpp:130
Options for score_markers_pairwise() and friends.
Definition score_markers_pairwise.hpp:32
bool compute_group_mean
Definition score_markers_pairwise.hpp:50
scran_blocks::WeightPolicy block_weight_policy
Definition score_markers_pairwise.hpp:101
int num_threads
Definition score_markers_pairwise.hpp:44
bool compute_delta_mean
Definition score_markers_pairwise.hpp:74
double block_quantile
Definition score_markers_pairwise.hpp:114
double threshold
Definition score_markers_pairwise.hpp:38
bool compute_group_detected
Definition score_markers_pairwise.hpp:56
BlockAveragePolicy block_average_policy
Definition score_markers_pairwise.hpp:87
bool compute_cohens_d
Definition score_markers_pairwise.hpp:62
bool compute_auc
Definition score_markers_pairwise.hpp:68
bool compute_delta_detected
Definition score_markers_pairwise.hpp:80
scran_blocks::VariableWeightParameters variable_block_weight_parameters
Definition score_markers_pairwise.hpp:108
Results for score_markers_pairwise() and friends.
Definition score_markers_pairwise.hpp:196
std::vector< Stat_ > delta_mean
Definition score_markers_pairwise.hpp:238
std::vector< std::vector< Stat_ > > detected
Definition score_markers_pairwise.hpp:211
std::vector< Stat_ > cohens_d
Definition score_markers_pairwise.hpp:220
std::vector< Stat_ > auc
Definition score_markers_pairwise.hpp:229
std::vector< Stat_ > delta_detected
Definition score_markers_pairwise.hpp:247
std::vector< std::vector< Stat_ > > mean
Definition score_markers_pairwise.hpp:203