scran_markers
Marker detection for single-cell data
Loading...
Searching...
No Matches
summarize_comparisons.hpp
Go to the documentation of this file.
1#ifndef SCRAN_MARKERS_SUMMARIZE_COMPARISONS_HPP
2#define SCRAN_MARKERS_SUMMARIZE_COMPARISONS_HPP
3
4#include <algorithm>
5#include <numeric>
6#include <vector>
7#include <cmath>
8#include <cstddef>
9
10#include "tatami_stats/tatami_stats.hpp"
11#include "sanisizer/sanisizer.hpp"
12
18namespace scran_markers {
19
26template<typename Stat_ = double, typename Rank_ = int>
33 Stat_* min = NULL;
34
40 Stat_* mean = NULL;
41
47 Stat_* median = NULL;
48
54 Stat_* max = NULL;
55
61 Rank_* min_rank = NULL;
62};
63
70template<typename Stat_ = double, typename Rank_ = int>
76 std::vector<Stat_> min;
77
82 std::vector<Stat_> mean;
83
88 std::vector<Stat_> median;
89
94 std::vector<Stat_> max;
95
100 std::vector<Rank_> min_rank;
101};
102
106namespace internal {
107
108template<typename Stat_, typename Gene_, typename Rank_>
109void summarize_comparisons(std::size_t ngroups, const Stat_* effects, std::size_t group, Gene_ gene, const SummaryBuffers<Stat_, Rank_>& output, std::vector<Stat_>& buffer) {
110 // Ignoring the self comparison and pruning out NaNs.
111 std::size_t ncomps = 0;
112 for (decltype(ngroups) r = 0; r < ngroups; ++r) {
113 if (r == group || std::isnan(effects[r])) {
114 continue;
115 }
116 buffer[ncomps] = effects[r];
117 ++ncomps;
118 }
119
120 if (ncomps <= 1) {
121 Stat_ val = (ncomps == 0 ? std::numeric_limits<Stat_>::quiet_NaN() : buffer[0]);
122 if (output.min) {
123 output.min[gene] = val;
124 }
125 if (output.mean) {
126 output.mean[gene] = val;
127 }
128 if (output.max) {
129 output.max[gene] = val;
130 }
131 if (output.median) {
132 output.median[gene] = val;
133 }
134
135 } else {
136 auto ebegin = buffer.data(), elast = ebegin + ncomps;
137 if (output.min) {
138 output.min[gene] = *std::min_element(ebegin, elast);
139 }
140 if (output.mean) {
141 output.mean[gene] = std::accumulate(ebegin, elast, static_cast<Stat_>(0)) / ncomps;
142 }
143 if (output.max) {
144 output.max[gene] = *std::max_element(ebegin, elast);
145 }
146 if (output.median) { // this mutates the buffer, so we put this last to avoid surprises.
147 output.median[gene] = tatami_stats::medians::direct(ebegin, ncomps, /* skip_nan = */ false);
148 }
149 }
150}
151
152template<typename Gene_, typename Stat_, typename Rank_>
153void summarize_comparisons(Gene_ ngenes, std::size_t ngroups, const Stat_* effects, const std::vector<SummaryBuffers<Stat_, Rank_> >& output, int threads) {
154 tatami::parallelize([&](int, Gene_ start, Gene_ length) -> void {
155 auto buffer = sanisizer::create<std::vector<Stat_> >(ngroups);
156 for (Gene_ gene = start, end = start + length; gene < end; ++gene) {
157 for (decltype(ngroups) l = 0; l < ngroups; ++l) {
158 auto current_effects = effects + sanisizer::nd_offset<std::size_t>(0, ngroups, l, ngroups, gene);
159 summarize_comparisons(ngroups, current_effects, l, gene, output[l], buffer);
160 }
161 }
162 }, ngenes, threads);
163}
164
165template<typename Stat_, typename Gene_>
166Gene_ fill_and_sort_rank_buffer(const Stat_* effects, std::size_t stride, std::vector<std::pair<Stat_, Gene_> >& buffer) {
167 Gene_ counter = 0;
168 for (Gene_ i = 0, end = buffer.size(); i < end; ++i) {
169 auto cureffect = effects[sanisizer::product_unsafe<std::size_t>(i, stride)];
170 if (!std::isnan(cureffect)) {
171 auto& current = buffer[counter];
172 current.first = cureffect;
173 current.second = i;
174 ++counter;
175 }
176 }
177
178 std::sort(
179 buffer.begin(),
180 buffer.begin() + counter,
181 [&](const std::pair<Stat_, Gene_>& left, const std::pair<Stat_, Gene_>& right) -> bool {
182 // Sort by decreasing first element, then break ties by increasing second element.
183 if (left.first == right.first) {
184 return left.second < right.second;
185 } else {
186 return left.first > right.first;
187 }
188 }
189 );
190
191 return counter;
192}
193
194template<typename Stat_, typename Gene_, typename Rank_>
195void compute_min_rank_internal(Gene_ use, const std::vector<std::pair<Stat_, Gene_> >& buffer, Rank_* output) {
196 Rank_ counter = 1;
197 for (Gene_ i = 0; i < use; ++i) {
198 auto& current = output[buffer[i].second];
199 if (counter < current) {
200 current = counter;
201 }
202 ++counter;
203 }
204}
205
206template<typename Stat_, typename Gene_, typename Rank_>
207void compute_min_rank_for_group(Gene_ ngenes, std::size_t ngroups, std::size_t group, const Stat_* effects, Rank_* output, int threads) {
208 std::vector<std::vector<Rank_> > stores(threads - 1);
209 std::fill_n(output, ngenes, ngenes); // using the maximum possible rank (i.e., 'ngenes') as the default.
210
211 tatami::parallelize([&](int t, std::size_t start, std::size_t length) -> void {
212 Rank_* curoutput;
213 if (t == 0) {
214 curoutput = output;
215 } else {
216 auto& curstore = stores[t - 1];
217 if (curstore.empty()) {
218 curstore.resize(ngenes, ngenes + 1);
219 }
220 curoutput = curstore.data();
221 }
222
223 auto buffer = sanisizer::create<std::vector<std::pair<Stat_, Gene_> > >(ngenes);
224 for (auto g = start, end = start + length; g < end; ++g) {
225 if (g == group) {
226 continue;
227 }
228 auto used = fill_and_sort_rank_buffer(effects + g, ngroups, buffer);
229 compute_min_rank_internal(used, buffer, curoutput);
230 }
231 }, ngroups, threads);
232
233 for (const auto& curstore : stores) {
234 auto copy = output;
235 for (auto x : curstore) {
236 if (x < *copy) {
237 *copy = x;
238 }
239 ++copy;
240 }
241 }
242}
243
244template<typename Stat_, typename Gene_, typename Rank_>
245void compute_min_rank_pairwise(Gene_ ngenes, std::size_t ngroups, const Stat_* effects, const std::vector<SummaryBuffers<Stat_, Rank_> >& output, int threads) {
246 const auto ngroups2 = sanisizer::product_unsafe<std::size_t>(ngroups, ngroups);
247
248 tatami::parallelize([&](int, std::size_t start, std::size_t length) -> void {
249 auto buffer = sanisizer::create<std::vector<std::pair<Stat_, Gene_> > >(ngenes);
250 for (auto g = start, end = start + length; g < end; ++g) {
251 auto target = output[g].min_rank;
252 if (target == NULL) {
253 continue;
254 }
255
256 std::fill_n(target, ngenes, ngenes); // using the maximum rank as the default.
257
258 for (decltype(ngroups) g2 = 0; g2 < ngroups; ++g2) {
259 if (g == g2) {
260 continue;
261 }
262 auto offset = sanisizer::nd_offset<std::size_t>(g2, ngroups, g);
263 auto used = fill_and_sort_rank_buffer(effects + offset, ngroups2, buffer);
264 compute_min_rank_internal(used, buffer, target);
265 }
266 }
267 }, ngroups, threads);
268}
269
270template<typename Gene_, typename Stat_, typename Rank_>
271SummaryBuffers<Stat_, Rank_> fill_summary_results(
272 Gene_ ngenes,
273 SummaryResults<Stat_, Rank_>& out,
274 bool compute_min,
275 bool compute_mean,
276 bool compute_median,
277 bool compute_max,
278 bool compute_min_rank)
279{
280 SummaryBuffers<Stat_, Rank_> ptr;
281 auto out_len = sanisizer::cast<typename std::vector<Stat_>::size_type>(ngenes);
282
283 if (compute_min) {
284 out.min.resize(out_len
285#ifdef SCRAN_MARKERS_TEST_INIT
286 , SCRAN_MARKERS_TEST_INIT
287#endif
288 );
289 ptr.min = out.min.data();
290 }
291 if (compute_mean) {
292 out.mean.resize(out_len
293#ifdef SCRAN_MARKERS_TEST_INIT
294 , SCRAN_MARKERS_TEST_INIT
295#endif
296 );
297 ptr.mean = out.mean.data();
298 }
299 if (compute_median) {
300 out.median.resize(out_len
301#ifdef SCRAN_MARKERS_TEST_INIT
302 , SCRAN_MARKERS_TEST_INIT
303#endif
304 );
305 ptr.median = out.median.data();
306 }
307 if (compute_max) {
308 out.max.resize(out_len
309#ifdef SCRAN_MARKERS_TEST_INIT
310 , SCRAN_MARKERS_TEST_INIT
311#endif
312 );
313 ptr.max = out.max.data();
314 }
315 if (compute_min_rank) {
316 out.min_rank.resize(out_len
317#ifdef SCRAN_MARKERS_TEST_INIT
318 , SCRAN_MARKERS_TEST_INIT
319#endif
320 );
321 ptr.min_rank = out.min_rank.data();
322 }
323
324 return ptr;
325}
326
327template<typename Gene_, typename Stat_, typename Rank_>
328std::vector<SummaryBuffers<Stat_, Rank_> > fill_summary_results(
329 Gene_ ngenes,
330 std::size_t ngroups,
331 std::vector<SummaryResults<Stat_, Rank_> >& outputs,
332 bool compute_min,
333 bool compute_mean,
334 bool compute_median,
335 bool compute_max,
336 bool compute_min_rank)
337{
338 outputs.resize(sanisizer::cast<decltype(outputs.size())>(ngroups));
339 std::vector<SummaryBuffers<Stat_, Rank_> > ptrs;
340 ptrs.reserve(ngroups);
341 for (decltype(ngroups) g = 0; g < ngroups; ++g) {
342 ptrs.emplace_back(fill_summary_results(ngenes, outputs[g], compute_min, compute_mean, compute_median, compute_max, compute_min_rank));
343 }
344 return ptrs;
345}
346
347}
352}
353
354#endif
Marker detection for single-cell data.
Definition score_markers_pairwise.hpp:25
void parallelize(Function_ fun, Index_ tasks, int threads)
Pointers to arrays to hold the summary statistics.
Definition summarize_comparisons.hpp:27
Stat_ * mean
Definition summarize_comparisons.hpp:40
Stat_ * median
Definition summarize_comparisons.hpp:47
Stat_ * min
Definition summarize_comparisons.hpp:33
Stat_ * max
Definition summarize_comparisons.hpp:54
Rank_ * min_rank
Definition summarize_comparisons.hpp:61
Container for the summary statistics.
Definition summarize_comparisons.hpp:71
std::vector< Stat_ > mean
Definition summarize_comparisons.hpp:82
std::vector< Stat_ > median
Definition summarize_comparisons.hpp:88
std::vector< Stat_ > min
Definition summarize_comparisons.hpp:76
std::vector< Rank_ > min_rank
Definition summarize_comparisons.hpp:100
std::vector< Stat_ > max
Definition summarize_comparisons.hpp:94