scran_markers
Marker detection for single-cell data
Loading...
Searching...
No Matches
summarize_comparisons.hpp
Go to the documentation of this file.
1#ifndef SCRAN_MARKERS_SUMMARIZE_COMPARISONS_HPP
2#define SCRAN_MARKERS_SUMMARIZE_COMPARISONS_HPP
3
4#include <algorithm>
5#include <numeric>
6#include <vector>
7#include <cmath>
8#include <cstddef>
9
10#include "tatami_stats/tatami_stats.hpp"
11#include "sanisizer/sanisizer.hpp"
12
13#include "utils.hpp"
14
20namespace scran_markers {
21
28template<typename Stat_ = double, typename Rank_ = int>
35 Stat_* min = NULL;
36
42 Stat_* mean = NULL;
43
49 Stat_* median = NULL;
50
56 Stat_* max = NULL;
57
63 Rank_* min_rank = NULL;
64};
65
72template<typename Stat_ = double, typename Rank_ = int>
78 std::vector<Stat_> min;
79
84 std::vector<Stat_> mean;
85
90 std::vector<Stat_> median;
91
96 std::vector<Stat_> max;
97
102 std::vector<Rank_> min_rank;
103};
104
108namespace internal {
109
110template<typename Stat_, typename Gene_, typename Rank_>
111void summarize_comparisons(
112 const std::size_t ngroups,
113 const Stat_* const effects,
114 const std::size_t group,
115 const Gene_ gene,
116 const SummaryBuffers<Stat_, Rank_>& output,
117 std::vector<Stat_>& buffer)
118{
119 // Ignoring the self comparison and pruning out NaNs.
120 std::size_t ncomps = 0;
121 for (decltype(I(ngroups)) r = 0; r < ngroups; ++r) {
122 if (r == group || std::isnan(effects[r])) {
123 continue;
124 }
125 buffer[ncomps] = effects[r];
126 ++ncomps;
127 }
128
129 if (ncomps <= 1) {
130 Stat_ val = (ncomps == 0 ? std::numeric_limits<Stat_>::quiet_NaN() : buffer[0]);
131 if (output.min) {
132 output.min[gene] = val;
133 }
134 if (output.mean) {
135 output.mean[gene] = val;
136 }
137 if (output.max) {
138 output.max[gene] = val;
139 }
140 if (output.median) {
141 output.median[gene] = val;
142 }
143
144 } else {
145 const auto ebegin = buffer.data(), elast = ebegin + ncomps;
146 if (output.min) {
147 output.min[gene] = *std::min_element(ebegin, elast);
148 }
149 if (output.mean) {
150 output.mean[gene] = std::accumulate(ebegin, elast, static_cast<Stat_>(0)) / ncomps;
151 }
152 if (output.max) {
153 output.max[gene] = *std::max_element(ebegin, elast);
154 }
155 if (output.median) { // this mutates the buffer, so we put this last to avoid surprises.
156 output.median[gene] = tatami_stats::medians::direct(ebegin, ncomps, /* skip_nan = */ false);
157 }
158 }
159}
160
161template<typename Gene_, typename Stat_, typename Rank_>
162void summarize_comparisons(
163 const Gene_ ngenes,
164 const std::size_t ngroups,
165 const Stat_* const effects,
166 const std::vector<SummaryBuffers<Stat_, Rank_> >& output,
167 const int threads)
168{
169 tatami::parallelize([&](const int, const Gene_ start, const Gene_ length) -> void {
170 auto buffer = sanisizer::create<std::vector<Stat_> >(ngroups);
171 for (Gene_ gene = start, end = start + length; gene < end; ++gene) {
172 for (decltype(I(ngroups)) l = 0; l < ngroups; ++l) {
173 const auto current_effects = effects + sanisizer::nd_offset<std::size_t>(0, ngroups, l, ngroups, gene);
174 summarize_comparisons(ngroups, current_effects, l, gene, output[l], buffer);
175 }
176 }
177 }, ngenes, threads);
178}
179
180template<typename Stat_, typename Gene_>
181Gene_ fill_and_sort_rank_buffer(const Stat_* const effects, const std::size_t stride, std::vector<std::pair<Stat_, Gene_> >& buffer) {
182 Gene_ counter = 0;
183 for (Gene_ i = 0, end = buffer.size(); i < end; ++i) {
184 const auto cureffect = effects[sanisizer::product_unsafe<std::size_t>(i, stride)];
185 if (!std::isnan(cureffect)) {
186 auto& current = buffer[counter];
187 current.first = cureffect;
188 current.second = i;
189 ++counter;
190 }
191 }
192
193 std::sort(
194 buffer.begin(),
195 buffer.begin() + counter,
196 [&](const std::pair<Stat_, Gene_>& left, const std::pair<Stat_, Gene_>& right) -> bool {
197 // Sort by decreasing first element, then break ties by increasing second element.
198 if (left.first == right.first) {
199 return left.second < right.second;
200 } else {
201 return left.first > right.first;
202 }
203 }
204 );
205
206 return counter;
207}
208
209template<typename Stat_, typename Gene_, typename Rank_>
210void compute_min_rank_internal(const Gene_ use, const std::vector<std::pair<Stat_, Gene_> >& buffer, Rank_* const output) {
211 Rank_ counter = 1;
212 for (Gene_ i = 0; i < use; ++i) {
213 auto& current = output[buffer[i].second];
214 if (counter < current) {
215 current = counter;
216 }
217 ++counter;
218 }
219}
220
221template<typename Stat_, typename Gene_, typename Rank_>
222void compute_min_rank_for_group(const Gene_ ngenes, const std::size_t ngroups, const std::size_t group, const Stat_* const effects, Rank_* const output, const int threads) {
223 std::vector<std::vector<Rank_> > stores(threads - 1);
224 const auto maxrank_placeholder = ngenes; // using the maximum possible rank (i.e., 'ngenes') as the default.
225 std::fill_n(output, ngenes, maxrank_placeholder);
226
227 tatami::parallelize([&](const int t, const std::size_t start, const std::size_t length) -> void {
228 Rank_* curoutput;
229 if (t == 0) {
230 curoutput = output;
231 } else {
232 auto& curstore = stores[t - 1];
233 if (curstore.empty()) {
234 sanisizer::resize(curstore, ngenes, maxrank_placeholder);
235 }
236 curoutput = curstore.data();
237 }
238
239 auto buffer = sanisizer::create<std::vector<std::pair<Stat_, Gene_> > >(ngenes);
240 for (decltype(I(start)) g = start, end = start + length; g < end; ++g) {
241 if (g == group) {
242 continue;
243 }
244 const auto used = fill_and_sort_rank_buffer(effects + g, ngroups, buffer);
245 compute_min_rank_internal(used, buffer, curoutput);
246 }
247 }, ngroups, threads);
248
249 for (const auto& curstore : stores) {
250 auto copy = output;
251 for (auto x : curstore) {
252 if (x < *copy) {
253 *copy = x;
254 }
255 ++copy;
256 }
257 }
258}
259
260template<typename Stat_, typename Gene_, typename Rank_>
261void compute_min_rank_pairwise(
262 const Gene_ ngenes,
263 const std::size_t ngroups,
264 const Stat_* const effects,
265 const std::vector<SummaryBuffers<Stat_, Rank_> >& output,
266 const int threads)
267{
268 const auto ngroups2 = sanisizer::product_unsafe<std::size_t>(ngroups, ngroups);
269
270 tatami::parallelize([&](const int, const std::size_t start, const std::size_t length) -> void {
271 auto buffer = sanisizer::create<std::vector<std::pair<Stat_, Gene_> > >(ngenes);
272 for (decltype(I(start)) g = start, end = start + length; g < end; ++g) {
273 const auto target = output[g].min_rank;
274 if (target == NULL) {
275 continue;
276 }
277
278 std::fill_n(target, ngenes, ngenes); // using the maximum rank as the default.
279
280 for (decltype(I(ngroups)) g2 = 0; g2 < ngroups; ++g2) {
281 if (g == g2) {
282 continue;
283 }
284 const auto offset = sanisizer::nd_offset<std::size_t>(g2, ngroups, g);
285 const auto used = fill_and_sort_rank_buffer(effects + offset, ngroups2, buffer);
286 compute_min_rank_internal(used, buffer, target);
287 }
288 }
289 }, ngroups, threads);
290}
291
292template<typename Gene_, typename Stat_, typename Rank_>
293SummaryBuffers<Stat_, Rank_> fill_summary_results(
294 const Gene_ ngenes,
295 SummaryResults<Stat_, Rank_>& out,
296 const bool compute_min,
297 const bool compute_mean,
298 const bool compute_median,
299 const bool compute_max,
300 const bool compute_min_rank)
301{
302 SummaryBuffers<Stat_, Rank_> ptr;
303 const auto out_len = sanisizer::cast<typename std::vector<Stat_>::size_type>(ngenes);
304
305 if (compute_min) {
306 out.min.resize(out_len
307#ifdef SCRAN_MARKERS_TEST_INIT
308 , SCRAN_MARKERS_TEST_INIT
309#endif
310 );
311 ptr.min = out.min.data();
312 }
313 if (compute_mean) {
314 out.mean.resize(out_len
315#ifdef SCRAN_MARKERS_TEST_INIT
316 , SCRAN_MARKERS_TEST_INIT
317#endif
318 );
319 ptr.mean = out.mean.data();
320 }
321 if (compute_median) {
322 out.median.resize(out_len
323#ifdef SCRAN_MARKERS_TEST_INIT
324 , SCRAN_MARKERS_TEST_INIT
325#endif
326 );
327 ptr.median = out.median.data();
328 }
329 if (compute_max) {
330 out.max.resize(out_len
331#ifdef SCRAN_MARKERS_TEST_INIT
332 , SCRAN_MARKERS_TEST_INIT
333#endif
334 );
335 ptr.max = out.max.data();
336 }
337 if (compute_min_rank) {
338 out.min_rank.resize(out_len
339#ifdef SCRAN_MARKERS_TEST_INIT
340 , SCRAN_MARKERS_TEST_INIT
341#endif
342 );
343 ptr.min_rank = out.min_rank.data();
344 }
345
346 return ptr;
347}
348
349template<typename Gene_, typename Stat_, typename Rank_>
350std::vector<SummaryBuffers<Stat_, Rank_> > fill_summary_results(
351 Gene_ ngenes,
352 const std::size_t ngroups,
353 std::vector<SummaryResults<Stat_, Rank_> >& outputs,
354 const bool compute_min,
355 const bool compute_mean,
356 const bool compute_median,
357 const bool compute_max,
358 const bool compute_min_rank)
359{
360 sanisizer::resize(outputs, ngroups);
361 std::vector<SummaryBuffers<Stat_, Rank_> > ptrs;
362 ptrs.reserve(ngroups);
363 for (decltype(I(ngroups)) g = 0; g < ngroups; ++g) {
364 ptrs.emplace_back(fill_summary_results(ngenes, outputs[g], compute_min, compute_mean, compute_median, compute_max, compute_min_rank));
365 }
366 return ptrs;
367}
368
369}
374}
375
376#endif
Marker detection for single-cell data.
Definition score_markers_pairwise.hpp:25
void parallelize(Function_ fun, Index_ tasks, int threads)
Pointers to arrays to hold the summary statistics.
Definition summarize_comparisons.hpp:29
Stat_ * mean
Definition summarize_comparisons.hpp:42
Stat_ * median
Definition summarize_comparisons.hpp:49
Stat_ * min
Definition summarize_comparisons.hpp:35
Stat_ * max
Definition summarize_comparisons.hpp:56
Rank_ * min_rank
Definition summarize_comparisons.hpp:63
Container for the summary statistics.
Definition summarize_comparisons.hpp:73
std::vector< Stat_ > mean
Definition summarize_comparisons.hpp:84
std::vector< Stat_ > median
Definition summarize_comparisons.hpp:90
std::vector< Stat_ > min
Definition summarize_comparisons.hpp:78
std::vector< Rank_ > min_rank
Definition summarize_comparisons.hpp:102
std::vector< Stat_ > max
Definition summarize_comparisons.hpp:96