1#ifndef SCRAN_MARKERS_SUMMARIZE_COMPARISONS_HPP
2#define SCRAN_MARKERS_SUMMARIZE_COMPARISONS_HPP
10#include "tatami_stats/tatami_stats.hpp"
11#include "sanisizer/sanisizer.hpp"
28template<
typename Stat_ =
double,
typename Rank_ =
int>
72template<
typename Stat_ =
double,
typename Rank_ =
int>
78 std::vector<Stat_>
min;
96 std::vector<Stat_>
max;
110template<
typename Stat_,
typename Gene_,
typename Rank_>
111void summarize_comparisons(
112 const std::size_t ngroups,
113 const Stat_*
const effects,
114 const std::size_t group,
117 std::vector<Stat_>& buffer)
120 std::size_t ncomps = 0;
121 for (
decltype(I(ngroups)) r = 0; r < ngroups; ++r) {
122 if (r == group || std::isnan(effects[r])) {
125 buffer[ncomps] = effects[r];
130 Stat_ val = (ncomps == 0 ? std::numeric_limits<Stat_>::quiet_NaN() : buffer[0]);
132 output.
min[gene] = val;
135 output.
mean[gene] = val;
138 output.
max[gene] = val;
141 output.
median[gene] = val;
145 const auto ebegin = buffer.data(), elast = ebegin + ncomps;
147 output.
min[gene] = *std::min_element(ebegin, elast);
150 output.
mean[gene] = std::accumulate(ebegin, elast,
static_cast<Stat_
>(0)) / ncomps;
153 output.
max[gene] = *std::max_element(ebegin, elast);
156 output.
median[gene] = tatami_stats::medians::direct(ebegin, ncomps,
false);
161template<
typename Gene_,
typename Stat_,
typename Rank_>
162void summarize_comparisons(
164 const std::size_t ngroups,
165 const Stat_*
const effects,
166 const std::vector<SummaryBuffers<Stat_, Rank_> >& output,
170 auto buffer = sanisizer::create<std::vector<Stat_> >(ngroups);
171 for (Gene_ gene = start, end = start + length; gene < end; ++gene) {
172 for (
decltype(I(ngroups)) l = 0; l < ngroups; ++l) {
173 const auto current_effects = effects + sanisizer::nd_offset<std::size_t>(0, ngroups, l, ngroups, gene);
174 summarize_comparisons(ngroups, current_effects, l, gene, output[l], buffer);
180template<
typename Stat_,
typename Gene_>
181Gene_ fill_and_sort_rank_buffer(
const Stat_*
const effects,
const std::size_t stride, std::vector<std::pair<Stat_, Gene_> >& buffer) {
183 for (Gene_ i = 0, end = buffer.size(); i < end; ++i) {
184 const auto cureffect = effects[sanisizer::product_unsafe<std::size_t>(i, stride)];
185 if (!std::isnan(cureffect)) {
186 auto& current = buffer[counter];
187 current.first = cureffect;
195 buffer.begin() + counter,
196 [&](
const std::pair<Stat_, Gene_>& left,
const std::pair<Stat_, Gene_>& right) ->
bool {
198 if (left.first == right.first) {
199 return left.second < right.second;
201 return left.first > right.first;
209template<
typename Stat_,
typename Gene_,
typename Rank_>
210void compute_min_rank_pairwise(
212 const std::size_t ngroups,
213 const Stat_*
const effects,
214 const std::vector<SummaryBuffers<Stat_, Rank_> >& output,
215 const bool preserve_ties,
218 const auto ngroups2 = sanisizer::product_unsafe<std::size_t>(ngroups, ngroups);
219 const auto maxrank_placeholder = sanisizer::cast<Rank_>(ngenes);
221 tatami::parallelize([&](
const int,
const std::size_t start,
const std::size_t length) ->
void {
222 auto buffer = sanisizer::create<std::vector<std::pair<Stat_, Gene_> > >(ngenes);
223 for (
decltype(I(start)) g = start, end = start + length; g < end; ++g) {
224 const auto target = output[g].
min_rank;
225 if (target == NULL) {
228 std::fill_n(target, ngenes, maxrank_placeholder);
230 for (
decltype(I(ngroups)) g2 = 0; g2 < ngroups; ++g2) {
234 const auto offset = sanisizer::nd_offset<std::size_t>(g2, ngroups, g);
235 const auto used = fill_and_sort_rank_buffer(effects + offset, ngroups2, buffer);
237 if (!preserve_ties) {
239 for (Gene_ i = 0; i < used; ++i) {
240 auto& current = target[buffer[i].second];
241 if (counter < current) {
250 const auto original = i;
251 const auto val = buffer[i].first;
253 auto& current = target[buffer[i].second];
254 if (counter < current) {
258 while (++i < used && buffer[i].first == val) {
259 auto& current = target[buffer[i].second];
260 if (counter < current) {
265 counter += i - original;
270 }, ngroups, threads);
273template<
typename Gene_,
typename Stat_,
typename Rank_>
274SummaryBuffers<Stat_, Rank_> fill_summary_results(
276 SummaryResults<Stat_, Rank_>& out,
277 const bool compute_min,
278 const bool compute_mean,
279 const bool compute_median,
280 const bool compute_max,
281 const bool compute_min_rank)
283 SummaryBuffers<Stat_, Rank_> ptr;
284 const auto out_len = sanisizer::cast<typename std::vector<Stat_>::size_type>(ngenes);
287 out.min.resize(out_len
288#ifdef SCRAN_MARKERS_TEST_INIT
289 , SCRAN_MARKERS_TEST_INIT
292 ptr.min = out.min.data();
295 out.mean.resize(out_len
296#ifdef SCRAN_MARKERS_TEST_INIT
297 , SCRAN_MARKERS_TEST_INIT
300 ptr.mean = out.mean.data();
302 if (compute_median) {
303 out.median.resize(out_len
304#ifdef SCRAN_MARKERS_TEST_INIT
305 , SCRAN_MARKERS_TEST_INIT
308 ptr.median = out.median.data();
311 out.max.resize(out_len
312#ifdef SCRAN_MARKERS_TEST_INIT
313 , SCRAN_MARKERS_TEST_INIT
316 ptr.max = out.max.data();
318 if (compute_min_rank) {
319 out.min_rank.resize(out_len
320#ifdef SCRAN_MARKERS_TEST_INIT
321 , SCRAN_MARKERS_TEST_INIT
324 ptr.min_rank = out.min_rank.data();
330template<
typename Gene_,
typename Stat_,
typename Rank_>
331std::vector<SummaryBuffers<Stat_, Rank_> > fill_summary_results(
333 const std::size_t ngroups,
334 std::vector<SummaryResults<Stat_, Rank_> >& outputs,
335 const bool compute_min,
336 const bool compute_mean,
337 const bool compute_median,
338 const bool compute_max,
339 const bool compute_min_rank)
341 sanisizer::resize(outputs, ngroups);
342 std::vector<SummaryBuffers<Stat_, Rank_> > ptrs;
343 ptrs.reserve(ngroups);
344 for (
decltype(I(ngroups)) g = 0; g < ngroups; ++g) {
345 ptrs.emplace_back(fill_summary_results(ngenes, outputs[g], compute_min, compute_mean, compute_median, compute_max, compute_min_rank));
Marker detection for single-cell data.
Definition score_markers_pairwise.hpp:25
void parallelize(Function_ fun, const Index_ tasks, const int threads)
Pointers to arrays to hold the summary statistics.
Definition summarize_comparisons.hpp:29
Stat_ * mean
Definition summarize_comparisons.hpp:42
Stat_ * median
Definition summarize_comparisons.hpp:49
Stat_ * min
Definition summarize_comparisons.hpp:35
Stat_ * max
Definition summarize_comparisons.hpp:56
Rank_ * min_rank
Definition summarize_comparisons.hpp:63
Container for the summary statistics.
Definition summarize_comparisons.hpp:73
std::vector< Stat_ > mean
Definition summarize_comparisons.hpp:84
std::vector< Stat_ > median
Definition summarize_comparisons.hpp:90
std::vector< Stat_ > min
Definition summarize_comparisons.hpp:78
std::vector< Rank_ > min_rank
Definition summarize_comparisons.hpp:102
std::vector< Stat_ > max
Definition summarize_comparisons.hpp:96