scran_markers
Marker detection for single-cell data
Loading...
Searching...
No Matches
summarize_comparisons.hpp
Go to the documentation of this file.
1#ifndef SCRAN_MARKERS_SUMMARIZE_COMPARISONS_HPP
2#define SCRAN_MARKERS_SUMMARIZE_COMPARISONS_HPP
3
4#include "tatami_stats/tatami_stats.hpp"
5
6#include <algorithm>
7#include <numeric>
8#include <vector>
9#include <cmath>
10
16namespace scran_markers {
17
24template<typename Stat_ = double, typename Rank_ = int>
61
68template<typename Stat_ = double, typename Rank_ = int>
74 std::vector<Stat_> min;
75
80 std::vector<Stat_> mean;
81
86 std::vector<Stat_> median;
87
92 std::vector<Stat_> max;
93
98 std::vector<Rank_> min_rank;
99};
100
104namespace internal {
105
106template<typename Stat_, typename Rank_>
107void summarize_comparisons(size_t ngroups, const Stat_* effects, size_t group, size_t gene, const SummaryBuffers<Stat_, Rank_>& output, std::vector<Stat_>& buffer) {
108 auto ebegin = buffer.data();
109 auto elast = ebegin;
110
111 // Ignoring the self comparison and pruning out NaNs.
112 {
113 auto eptr = effects;
114 for (size_t r = 0; r < ngroups; ++r, ++eptr) {
115 if (r == group || std::isnan(*eptr)) {
116 continue;
117 }
118 *elast = *eptr;
119 ++elast;
120 }
121 }
122
123 size_t ncomps = elast - ebegin;
124 if (ncomps <= 1) {
125 Stat_ val = (ncomps == 0 ? std::numeric_limits<Stat_>::quiet_NaN() : *ebegin);
126 if (output.min) {
127 output.min[gene] = val;
128 }
129 if (output.mean) {
130 output.mean[gene] = val;
131 }
132 if (output.max) {
133 output.max[gene] = val;
134 }
135 if (output.median) {
136 output.median[gene] = val;
137 }
138
139 } else {
140 if (output.min) {
141 output.min[gene] = *std::min_element(ebegin, elast);
142 }
143 if (output.mean) {
144 output.mean[gene] = std::accumulate(ebegin, elast, 0.0) / ncomps;
145 }
146 if (output.max) {
147 output.max[gene] = *std::max_element(ebegin, elast);
148 }
149 if (output.median) { // this mutates the buffer, so we put this last to avoid surprises.
150 output.median[gene] = tatami_stats::medians::direct(ebegin, ncomps, /* skip_nan = */ false);
151 }
152 }
153}
154
155template<typename Stat_, typename Rank_>
156void summarize_comparisons(size_t ngenes, size_t ngroups, const Stat_* effects, const std::vector<SummaryBuffers<Stat_, Rank_> >& output, int threads) {
157 size_t shift = ngroups * ngroups;
158
159 tatami::parallelize([&](size_t, size_t start, size_t length) -> void {
160 std::vector<Stat_> buffer(ngroups);
161 auto effect_ptr = effects + start * shift; // everything is already a size_t, so it's fine.
162
163 for (size_t gene = start, end = start + length; gene < end; ++gene, effect_ptr += shift) {
164 auto current_effects = effect_ptr;
165 for (size_t l = 0; l < ngroups; ++l, current_effects += ngroups) {
166 summarize_comparisons(ngroups, current_effects, l, gene, output[l], buffer);
167 }
168 }
169 }, ngenes, threads);
170}
171
172template<typename Stat_, typename Index_>
173Index_ fill_and_sort_rank_buffer(const Stat_* effects, size_t stride, std::vector<std::pair<Stat_, Index_> >& buffer) {
174 auto bIt = buffer.begin();
175 for (Index_ i = 0, end = buffer.size(); i < end; ++i, effects += stride) {
176 if (!std::isnan(*effects)) {
177 bIt->first = -*effects; // negative to sort by decreasing value.
178 bIt->second = i;
179 ++bIt;
180 }
181 }
182 std::sort(buffer.begin(), bIt);
183 return bIt - buffer.begin();
184}
185
186template<typename Stat_, typename Index_, typename Rank_>
187void compute_min_rank_internal(Index_ use, const std::vector<std::pair<Stat_, Index_> >& buffer, Rank_* output) {
188 Rank_ counter = 1;
189 for (Index_ i = 0; i < use; ++i) {
190 auto& current = output[buffer[i].second];
191 if (counter < current) {
192 current = counter;
193 }
194 ++counter;
195 }
196}
197
198template<typename Stat_, typename Index_, typename Rank_>
199void compute_min_rank_for_group(Index_ ngenes, size_t ngroups, size_t group, const Stat_* effects, Rank_* output, int threads) {
200 std::vector<std::vector<Rank_> > stores(threads - 1);
201 std::fill_n(output, ngenes, ngenes + 1);
202
203 tatami::parallelize([&](size_t t, size_t start, size_t length) {
204 Rank_* curoutput;
205 if (t == 0) {
206 curoutput = output;
207 } else {
208 auto& curstore = stores[t - 1];
209 if (curstore.empty()) {
210 curstore.resize(ngenes, ngenes + 1);
211 }
212 curoutput = curstore.data();
213 }
214 std::vector<std::pair<Stat_, Index_> > buffer(ngenes);
215
216 for (size_t g = start, end = start + length; g < end; ++g) {
217 if (g == group) {
218 continue;
219 }
220 auto used = fill_and_sort_rank_buffer(effects + g, ngroups, buffer);
221 compute_min_rank_internal(used, buffer, curoutput);
222 }
223 }, ngroups, threads);
224
225 for (const auto& curstore : stores) {
226 auto copy = output;
227 for (auto x : curstore) {
228 if (x < *copy) {
229 *copy = x;
230 }
231 ++copy;
232 }
233 }
234}
235
236template<typename Stat_, typename Index_, typename Rank_>
237void compute_min_rank_pairwise(Index_ ngenes, size_t ngroups, const Stat_* effects, const std::vector<SummaryBuffers<Stat_, Rank_> >& output, int threads) {
238 size_t shift = ngroups * ngroups;
239
240 tatami::parallelize([&](size_t, size_t start, size_t length) {
241 std::vector<std::pair<Stat_, Index_> > buffer(ngenes);
242 for (size_t g = start, end = start + length; g < end; ++g) {
243 auto target = output[g].min_rank;
244 if (target == NULL) {
245 continue;
246 }
247
248 std::fill_n(target, ngenes, ngenes + 1);
249 auto base = effects + g * ngroups;
250
251 for (size_t g2 = 0; g2 < ngroups; ++g2) {
252 if (g == g2) {
253 continue;
254 }
255 auto used = fill_and_sort_rank_buffer(base + g2, shift, buffer);
256 compute_min_rank_internal(used, buffer, target);
257 }
258 }
259 }, ngroups, threads);
260}
261
262template<typename Stat_, typename Rank_>
263SummaryBuffers<Stat_, Rank_> fill_summary_results(
264 size_t ngenes,
265 SummaryResults<Stat_, Rank_>& out,
266 bool compute_min,
267 bool compute_mean,
268 bool compute_median,
269 bool compute_max,
270 bool compute_min_rank)
271{
272 SummaryBuffers<Stat_, Rank_> ptr;
273
274 if (compute_min) {
275 out.min.resize(ngenes
276#ifdef SCRAN_MARKERS_TEST_INIT
277 , SCRAN_MARKERS_TEST_INIT
278#endif
279 );
280 ptr.min = out.min.data();
281 }
282 if (compute_mean) {
283 out.mean.resize(ngenes
284#ifdef SCRAN_MARKERS_TEST_INIT
285 , SCRAN_MARKERS_TEST_INIT
286#endif
287 );
288 ptr.mean = out.mean.data();
289 }
290 if (compute_median) {
291 out.median.resize(ngenes
292#ifdef SCRAN_MARKERS_TEST_INIT
293 , SCRAN_MARKERS_TEST_INIT
294#endif
295 );
296 ptr.median = out.median.data();
297 }
298 if (compute_max) {
299 out.max.resize(ngenes
300#ifdef SCRAN_MARKERS_TEST_INIT
301 , SCRAN_MARKERS_TEST_INIT
302#endif
303 );
304 ptr.max = out.max.data();
305 }
306 if (compute_min_rank) {
307 out.min_rank.resize(ngenes
308#ifdef SCRAN_MARKERS_TEST_INIT
309 , SCRAN_MARKERS_TEST_INIT
310#endif
311 );
312 ptr.min_rank = out.min_rank.data();
313 }
314
315 return ptr;
316}
317
318template<typename Stat_, typename Rank_>
319std::vector<SummaryBuffers<Stat_, Rank_> > fill_summary_results(
320 size_t ngenes,
321 size_t ngroups,
322 std::vector<SummaryResults<Stat_, Rank_> >& outputs,
323 bool compute_min,
324 bool compute_mean,
325 bool compute_median,
326 bool compute_max,
327 bool compute_min_rank)
328{
329 outputs.resize(ngroups);
330 std::vector<SummaryBuffers<Stat_, Rank_> > ptrs;
331 ptrs.reserve(ngroups);
332 for (size_t g = 0; g < ngroups; ++g) {
333 ptrs.emplace_back(fill_summary_results(ngenes, outputs[g], compute_min, compute_mean, compute_median, compute_max, compute_min_rank));
334 }
335 return ptrs;
336}
337
338}
343}
344
345#endif
Marker detection for single-cell data.
Definition score_markers_pairwise.hpp:23
void parallelize(Function_ fun, Index_ tasks, int threads)
Buffers for score_markers_pairwise() and friends.
Definition score_markers_pairwise.hpp:82
std::vector< Stat_ * > mean
Definition score_markers_pairwise.hpp:88
Pointers to arrays to hold the summary statistics.
Definition summarize_comparisons.hpp:25
Stat_ * mean
Definition summarize_comparisons.hpp:38
Stat_ * median
Definition summarize_comparisons.hpp:45
Stat_ * min
Definition summarize_comparisons.hpp:31
Stat_ * max
Definition summarize_comparisons.hpp:52
Rank_ * min_rank
Definition summarize_comparisons.hpp:59
Container for the summary statistics.
Definition summarize_comparisons.hpp:69
std::vector< Stat_ > mean
Definition summarize_comparisons.hpp:80
std::vector< Stat_ > median
Definition summarize_comparisons.hpp:86
std::vector< Stat_ > min
Definition summarize_comparisons.hpp:74
std::vector< Rank_ > min_rank
Definition summarize_comparisons.hpp:98
std::vector< Stat_ > max
Definition summarize_comparisons.hpp:92