1#ifndef SCRAN_MARKERS_SUMMARIZE_COMPARISONS_HPP
2#define SCRAN_MARKERS_SUMMARIZE_COMPARISONS_HPP
12#include "tatami_stats/tatami_stats.hpp"
13#include "sanisizer/sanisizer.hpp"
31template<
typename Stat_ =
double,
typename Rank_ =
int>
90template<
typename Stat_ =
double,
typename Rank_ =
int>
96 std::vector<Stat_>
min;
122 std::optional<std::vector<std::vector<Stat_> > >
quantiles;
136inline void validate_quantiles(
const std::optional<std::vector<double> >& probs) {
137 if (!probs.has_value()) {
141 const auto val = probs->front();
142 if (val < 0 || val > 1) {
143 throw std::runtime_error(
"quantile probabilities should be in [0, 1]");
146 const auto nprobs = probs->size();
147 for (I<
decltype(nprobs)> i = 1; i < nprobs; ++i) {
148 const auto val = (*probs)[i];
149 if (val < 0 || val > 1) {
150 throw std::runtime_error(
"quantile probabilities should be in [0, 1]");
152 if (val < (*probs)[i - 1]) {
153 throw std::runtime_error(
"quantile probabilities should be sorted");
158template<
typename Stat_,
class Iterator_>
159class MultipleQuantiles {
161 MultipleQuantiles(
const std::vector<double>& probs,
const std::size_t max_len) :
162 my_stacks(sanisizer::cast<I<decltype(my_stacks.size())> >(max_len)),
165 sanisizer::can_ptrdiff<Iterator_>(max_len);
169 std::vector<std::size_t> index;
170 std::vector<double> lower_fraction;
171 std::vector<double> upper_fraction;
175 std::vector<std::optional<Details> > my_stacks;
176 const std::vector<double>* my_probs;
179 Details& initialize_stack(
const std::size_t len) {
182 auto& raw_stack = my_stacks[len - 1];
183 if (raw_stack.has_value()) {
188 auto& stack = *raw_stack;
189 const auto nprobs = my_probs->size();
190 stack.index.reserve(nprobs);
191 stack.lower_fraction.reserve(nprobs);
192 stack.upper_fraction.reserve(nprobs);
194 for (
const auto prob : *my_probs) {
195 const double frac =
static_cast<double>(len - 1) *
static_cast<double>(prob);
196 const double base = std::floor(frac);
197 stack.index.push_back(base);
198 stack.upper_fraction.push_back(frac - base);
199 stack.lower_fraction.push_back(
static_cast<double>(1) - stack.upper_fraction.back());
206 template<
class OutputFun_>
207 void compute(
const std::size_t len, Iterator_ begin, Iterator_ end, OutputFun_ output) {
211 assert(len ==
static_cast<std::size_t
>(end - begin));
212 auto& stack = initialize_stack(len);
218 std::size_t sorted_up_to = 0;
220 const auto nprobs = stack.index.size();
221 for (I<
decltype(nprobs)> p = 0; p < nprobs; ++p) {
222 const auto curindex = stack.index[p];
223 const auto curlower = stack.lower_fraction[p];
224 const auto curupper = stack.upper_fraction[p];
226 const auto target = begin + curindex;
227 if (curindex >= sorted_up_to) {
228 std::nth_element(begin + sorted_up_to, target, end);
229 sorted_up_to = curindex + 1;
231 const auto lower_val = *target;
234 const auto curindex_p1 = curindex + 1;
235 const auto target_p1 = begin + curindex_p1;
236 if (curindex_p1 >= sorted_up_to) {
238 std::swap(*target_p1, *std::min_element(target_p1, end));
239 sorted_up_to = curindex_p1 + 1;
241 const auto upper_val = *target_p1;
242 output(p, lower_val * curlower + upper_val * curupper);
244 output(p, lower_val);
250template<
typename Stat_>
251using MaybeMultipleQuantiles = std::optional<MultipleQuantiles<Stat_, Stat_*> >;
253template<
typename Stat_>
254MaybeMultipleQuantiles<Stat_> setup_multiple_quantiles(
const std::optional<std::vector<double> >& requested,
const std::size_t ngroups) {
255 MaybeMultipleQuantiles<Stat_> output;
256 if (requested.has_value()) {
257 output.emplace(*requested, ngroups);
262template<
typename Stat_,
typename Gene_,
typename Rank_>
263void summarize_comparisons(
264 const std::size_t ngroups,
265 const Stat_*
const effects,
266 const std::size_t group,
268 const SummaryBuffers<Stat_, Rank_>& output,
269 MaybeMultipleQuantiles<Stat_>& quantile_calculators,
270 std::vector<Stat_>& buffer
273 std::size_t ncomps = 0;
274 for (I<
decltype(ngroups)> r = 0; r < ngroups; ++r) {
275 if (r == group || std::isnan(effects[r])) {
278 buffer[ncomps] = effects[r];
283 Stat_ val = (ncomps == 0 ? std::numeric_limits<Stat_>::quiet_NaN() : buffer[0]);
285 output.min[gene] = val;
288 output.mean[gene] = val;
291 output.max[gene] = val;
294 output.median[gene] = val;
296 if (output.quantiles.has_value()) {
297 for (const auto& quan : *(output.quantiles)) {
303 const auto ebegin = buffer.data(), elast = ebegin + ncomps;
305 output.min[gene] = *std::min_element(ebegin, elast);
308 output.mean[gene] = std::accumulate(ebegin, elast,
static_cast<Stat_
>(0)) / ncomps;
311 output.max[gene] = *std::max_element(ebegin, elast);
315 output.median[gene] = tatami_stats::medians::direct(ebegin, ncomps,
false);
317 if (output.quantiles.has_value()) {
318 quantile_calculators->compute(
322 [&](
const std::size_t i,
const Stat_ value) ->
void {
323 (*output.quantiles)[i][gene] = value;
330template<
typename Gene_,
typename Stat_,
typename Rank_>
331void summarize_comparisons(
333 const std::size_t ngroups,
334 const Stat_*
const effects,
335 const std::optional<std::vector<double> >& compute_quantiles,
336 const std::vector<SummaryBuffers<Stat_, Rank_> >& output,
340 auto summary_qcalcs = setup_multiple_quantiles<Stat_>(compute_quantiles, ngroups);
341 auto buffer = sanisizer::create<std::vector<Stat_> >(ngroups);
343 for (Gene_ gene = start, end = start + length; gene < end; ++gene) {
344 for (I<
decltype(ngroups)> l = 0; l < ngroups; ++l) {
345 const auto current_effects = effects + sanisizer::nd_offset<std::size_t>(0, ngroups, l, ngroups, gene);
346 summarize_comparisons(ngroups, current_effects, l, gene, output[l], summary_qcalcs, buffer);
352template<
typename Stat_,
typename Gene_>
353Gene_ fill_and_sort_rank_buffer(
const Stat_*
const effects,
const std::size_t stride, std::vector<std::pair<Stat_, Gene_> >& buffer) {
355 for (Gene_ i = 0, end = buffer.size(); i < end; ++i) {
356 const auto cureffect = effects[sanisizer::product_unsafe<std::size_t>(i, stride)];
357 if (!std::isnan(cureffect)) {
358 auto& current = buffer[counter];
359 current.first = cureffect;
367 buffer.begin() + counter,
368 [&](
const std::pair<Stat_, Gene_>& left,
const std::pair<Stat_, Gene_>& right) ->
bool {
370 if (left.first == right.first) {
371 return left.second < right.second;
373 return left.first > right.first;
381template<
typename Stat_,
typename Gene_,
typename Rank_>
382void compute_min_rank_pairwise(
384 const std::size_t ngroups,
385 const Stat_*
const effects,
386 const std::vector<SummaryBuffers<Stat_, Rank_> >& output,
387 const bool preserve_ties,
390 const auto ngroups2 = sanisizer::product_unsafe<std::size_t>(ngroups, ngroups);
391 const auto maxrank_placeholder = sanisizer::cast<Rank_>(ngenes);
393 tatami::parallelize([&](
const int,
const std::size_t start,
const std::size_t length) ->
void {
394 auto buffer = sanisizer::create<std::vector<std::pair<Stat_, Gene_> > >(ngenes);
395 for (I<
decltype(start)> g = start, end = start + length; g < end; ++g) {
396 const auto target = output[g].min_rank;
397 if (target == NULL) {
400 std::fill_n(target, ngenes, maxrank_placeholder);
402 for (I<
decltype(ngroups)> g2 = 0; g2 < ngroups; ++g2) {
406 const auto offset = sanisizer::nd_offset<std::size_t>(g2, ngroups, g);
407 const auto used = fill_and_sort_rank_buffer(effects + offset, ngroups2, buffer);
409 if (!preserve_ties) {
411 for (Gene_ i = 0; i < used; ++i) {
412 auto& current = target[buffer[i].second];
413 if (counter < current) {
422 const auto original = i;
423 const auto val = buffer[i].first;
425 auto& current = target[buffer[i].second];
426 if (counter < current) {
430 while (++i < used && buffer[i].first == val) {
431 auto& current = target[buffer[i].second];
432 if (counter < current) {
437 counter += i - original;
442 }, ngroups, threads);
445template<
typename Gene_,
typename Stat_,
typename Rank_>
446SummaryBuffers<Stat_, Rank_> fill_summary_results(
448 SummaryResults<Stat_, Rank_>& out,
449 const bool compute_min,
450 const bool compute_mean,
451 const bool compute_median,
452 const bool compute_max,
453 const std::optional<std::vector<double> >& compute_quantiles,
454 const bool compute_min_rank
456 SummaryBuffers<Stat_, Rank_> ptr;
457 const auto out_len = sanisizer::cast<typename std::vector<Stat_>::size_type>(ngenes);
460 out.min.resize(out_len
461#ifdef SCRAN_MARKERS_TEST_INIT
462 , SCRAN_MARKERS_TEST_INIT
465 ptr.min = out.min.data();
469 out.mean.resize(out_len
470#ifdef SCRAN_MARKERS_TEST_INIT
471 , SCRAN_MARKERS_TEST_INIT
474 ptr.mean = out.mean.data();
477 if (compute_median) {
478 out.median.resize(out_len
479#ifdef SCRAN_MARKERS_TEST_INIT
480 , SCRAN_MARKERS_TEST_INIT
483 ptr.median = out.median.data();
487 out.max.resize(out_len
488#ifdef SCRAN_MARKERS_TEST_INIT
489 , SCRAN_MARKERS_TEST_INIT
492 ptr.max = out.max.data();
495 if (compute_quantiles.has_value()) {
496 out.quantiles.emplace();
497 ptr.quantiles.emplace();
498 out.quantiles->reserve(compute_quantiles->size());
499 ptr.quantiles->reserve(compute_quantiles->size());
500 for ([[maybe_unused]]
const auto quan : *compute_quantiles) {
501 out.quantiles->emplace_back(out_len
502#ifdef SCRAN_MARKERS_TEST_INIT
503 , SCRAN_MARKERS_TEST_INIT
506 ptr.quantiles->push_back(out.quantiles->back().data());
510 if (compute_min_rank) {
511 out.min_rank.resize(out_len
512#ifdef SCRAN_MARKERS_TEST_INIT
513 , SCRAN_MARKERS_TEST_INIT
516 ptr.min_rank = out.min_rank.data();
522template<
typename Gene_,
typename Stat_,
typename Rank_>
523std::vector<SummaryBuffers<Stat_, Rank_> > fill_summary_results(
525 const std::size_t ngroups,
526 std::vector<SummaryResults<Stat_, Rank_> >& outputs,
527 const bool compute_min,
528 const bool compute_mean,
529 const bool compute_median,
530 const bool compute_max,
531 const std::optional<std::vector<double> >& compute_quantiles,
532 const bool compute_min_rank
534 sanisizer::resize(outputs, ngroups);
535 std::vector<SummaryBuffers<Stat_, Rank_> > ptrs;
536 ptrs.reserve(ngroups);
537 for (I<
decltype(ngroups)> g = 0; g < ngroups; ++g) {
539 fill_summary_results(
Marker detection for single-cell data.
Definition score_markers_pairwise.hpp:26
void parallelize(Function_ fun, const Index_ tasks, const int threads)
Pointers to arrays to hold the summary statistics.
Definition summarize_comparisons.hpp:32
Stat_ * mean
Definition summarize_comparisons.hpp:45
Stat_ * median
Definition summarize_comparisons.hpp:52
Stat_ * min
Definition summarize_comparisons.hpp:38
Stat_ * max
Definition summarize_comparisons.hpp:59
Rank_ * min_rank
Definition summarize_comparisons.hpp:81
std::optional< std::vector< Stat_ * > > quantiles
Definition summarize_comparisons.hpp:74
Container for the summary statistics.
Definition summarize_comparisons.hpp:91
std::vector< Stat_ > mean
Definition summarize_comparisons.hpp:102
std::vector< Stat_ > median
Definition summarize_comparisons.hpp:108
std::optional< std::vector< std::vector< Stat_ > > > quantiles
Definition summarize_comparisons.hpp:122
std::vector< Stat_ > min
Definition summarize_comparisons.hpp:96
std::vector< Rank_ > min_rank
Definition summarize_comparisons.hpp:128
std::vector< Stat_ > max
Definition summarize_comparisons.hpp:114