scran_markers
Marker detection for single-cell data
Loading...
Searching...
No Matches
summarize_comparisons.hpp
Go to the documentation of this file.
1#ifndef SCRAN_MARKERS_SUMMARIZE_COMPARISONS_HPP
2#define SCRAN_MARKERS_SUMMARIZE_COMPARISONS_HPP
3
4#include "tatami_stats/tatami_stats.hpp"
5
6#include <algorithm>
7#include <numeric>
8#include <vector>
9#include <cmath>
10
16namespace scran_markers {
17
24template<typename Stat_ = double, typename Rank_ = int>
61
68template<typename Stat_ = double, typename Rank_ = int>
74 std::vector<Stat_> min;
75
80 std::vector<Stat_> mean;
81
86 std::vector<Stat_> median;
87
92 std::vector<Stat_> max;
93
98 std::vector<Rank_> min_rank;
99};
100
104namespace internal {
105
106template<typename Stat_, typename Rank_>
107void summarize_comparisons(size_t ngroups, const Stat_* effects, size_t group, size_t gene, const SummaryBuffers<Stat_, Rank_>& output, std::vector<Stat_>& buffer) {
108 auto ebegin = buffer.data();
109 auto elast = ebegin;
110
111 // Ignoring the self comparison and pruning out NaNs.
112 {
113 auto eptr = effects;
114 for (size_t r = 0; r < ngroups; ++r, ++eptr) {
115 if (r == group || std::isnan(*eptr)) {
116 continue;
117 }
118 *elast = *eptr;
119 ++elast;
120 }
121 }
122
123 size_t ncomps = elast - ebegin;
124 if (ncomps <= 1) {
125 Stat_ val = (ncomps == 0 ? std::numeric_limits<Stat_>::quiet_NaN() : *ebegin);
126 if (output.min) {
127 output.min[gene] = val;
128 }
129 if (output.mean) {
130 output.mean[gene] = val;
131 }
132 if (output.max) {
133 output.max[gene] = val;
134 }
135 if (output.median) {
136 output.median[gene] = val;
137 }
138
139 } else {
140 if (output.min) {
141 output.min[gene] = *std::min_element(ebegin, elast);
142 }
143 if (output.mean) {
144 output.mean[gene] = std::accumulate(ebegin, elast, 0.0) / ncomps;
145 }
146 if (output.max) {
147 output.max[gene] = *std::max_element(ebegin, elast);
148 }
149 if (output.median) { // this mutates the buffer, so we put this last to avoid surprises.
150 output.median[gene] = tatami_stats::medians::direct(ebegin, ncomps, /* skip_nan = */ false);
151 }
152 }
153}
154
155template<typename Stat_, typename Rank_>
156void summarize_comparisons(size_t ngenes, size_t ngroups, const Stat_* effects, const std::vector<SummaryBuffers<Stat_, Rank_> >& output, int threads) {
157 size_t shift = ngroups * ngroups;
158
159 tatami::parallelize([&](size_t, size_t start, size_t length) -> void {
160 std::vector<Stat_> buffer(ngroups);
161 auto effect_ptr = effects + start * shift; // everything is already a size_t, so it's fine.
162
163 for (size_t gene = start, end = start + length; gene < end; ++gene, effect_ptr += shift) {
164 auto current_effects = effect_ptr;
165 for (size_t l = 0; l < ngroups; ++l, current_effects += ngroups) {
166 summarize_comparisons(ngroups, current_effects, l, gene, output[l], buffer);
167 }
168 }
169 }, ngenes, threads);
170}
171
172template<typename Stat_, typename Index_>
173Index_ fill_and_sort_rank_buffer(const Stat_* effects, size_t stride, std::vector<std::pair<Stat_, Index_> >& buffer) {
174 auto bIt = buffer.begin();
175 for (Index_ i = 0, end = buffer.size(); i < end; ++i, effects += stride) {
176 if (!std::isnan(*effects)) {
177 bIt->first = -*effects; // negative to sort by decreasing value.
178 bIt->second = i;
179 ++bIt;
180 }
181 }
182 std::sort(buffer.begin(), bIt);
183 return bIt - buffer.begin();
184}
185
186template<typename Stat_, typename Index_, typename Rank_>
187void compute_min_rank_internal(Index_ use, const std::vector<std::pair<Stat_, Index_> >& buffer, Rank_* output) {
188 Rank_ counter = 1;
189 for (Index_ i = 0; i < use; ++i) {
190 auto& current = output[buffer[i].second];
191 if (counter < current) {
192 current = counter;
193 }
194 ++counter;
195 }
196}
197
198template<typename Stat_, typename Index_, typename Rank_>
199void compute_min_rank_for_group(Index_ ngenes, size_t ngroups, size_t group, const Stat_* effects, Rank_* output, int threads) {
200 std::vector<std::vector<Rank_> > stores(threads - 1);
201
202 tatami::parallelize([&](size_t t, size_t start, size_t length) {
203 Rank_* curoutput;
204 if (t == 0) {
205 curoutput = output;
206 std::fill_n(curoutput, ngenes, ngenes + 1);
207 } else {
208 stores[t - 1].resize(ngenes, ngenes + 1);
209 curoutput = stores[t - 1].data();
210 }
211 std::vector<std::pair<Stat_, Index_> > buffer(ngenes);
212
213 for (size_t g = start, end = start + length; g < end; ++g) {
214 if (g == group) {
215 continue;
216 }
217 auto used = fill_and_sort_rank_buffer(effects + g, ngroups, buffer);
218 compute_min_rank_internal(used, buffer, curoutput);
219 }
220 }, ngroups, threads);
221
222 for (const auto& curstore : stores) {
223 auto copy = output;
224 for (auto x : curstore) {
225 if (x < *copy) {
226 *copy = x;
227 }
228 ++copy;
229 }
230 }
231}
232
233template<typename Stat_, typename Index_, typename Rank_>
234void compute_min_rank_pairwise(Index_ ngenes, size_t ngroups, const Stat_* effects, const std::vector<SummaryBuffers<Stat_, Rank_> >& output, int threads) {
235 size_t shift = ngroups * ngroups;
236
237 tatami::parallelize([&](size_t, size_t start, size_t length) {
238 std::vector<std::pair<Stat_, Index_> > buffer(ngenes);
239 for (size_t g = start, end = start + length; g < end; ++g) {
240 auto target = output[g].min_rank;
241 if (target == NULL) {
242 continue;
243 }
244
245 std::fill_n(target, ngenes, ngenes + 1);
246 auto base = effects + g * ngroups;
247
248 for (size_t g2 = 0; g2 < ngroups; ++g2) {
249 if (g == g2) {
250 continue;
251 }
252 auto used = fill_and_sort_rank_buffer(base + g2, shift, buffer);
253 compute_min_rank_internal(used, buffer, target);
254 }
255 }
256 }, ngroups, threads);
257}
258
259template<typename Stat_, typename Rank_>
260SummaryBuffers<Stat_, Rank_> fill_summary_results(
261 size_t ngenes,
262 SummaryResults<Stat_, Rank_>& out,
263 bool compute_min,
264 bool compute_mean,
265 bool compute_median,
266 bool compute_max,
267 bool compute_min_rank)
268{
269 SummaryBuffers<Stat_, Rank_> ptr;
270
271 if (compute_min) {
272 out.min.resize(ngenes);
273 ptr.min = out.min.data();
274 }
275 if (compute_mean) {
276 out.mean.resize(ngenes);
277 ptr.mean = out.mean.data();
278 }
279 if (compute_median) {
280 out.median.resize(ngenes);
281 ptr.median = out.median.data();
282 }
283 if (compute_max) {
284 out.max.resize(ngenes);
285 ptr.max = out.max.data();
286 }
287 if (compute_min_rank) {
288 out.min_rank.resize(ngenes);
289 ptr.min_rank = out.min_rank.data();
290 }
291
292 return ptr;
293}
294
295template<typename Stat_, typename Rank_>
296std::vector<SummaryBuffers<Stat_, Rank_> > fill_summary_results(
297 size_t ngenes,
298 size_t ngroups,
299 std::vector<SummaryResults<Stat_, Rank_> >& outputs,
300 bool compute_min,
301 bool compute_mean,
302 bool compute_median,
303 bool compute_max,
304 bool compute_min_rank)
305{
306 outputs.resize(ngroups);
307 std::vector<SummaryBuffers<Stat_, Rank_> > ptrs;
308 ptrs.reserve(ngroups);
309 for (size_t g = 0; g < ngroups; ++g) {
310 ptrs.emplace_back(fill_summary_results(ngenes, outputs[g], compute_min, compute_mean, compute_median, compute_max, compute_min_rank));
311 }
312 return ptrs;
313}
314
315}
320}
321
322#endif
Marker detection for single-cell data.
Definition score_markers_pairwise.hpp:23
void parallelize(Function_ fun, Index_ tasks, int threads)
Buffers for score_markers_pairwise() and friends.
Definition score_markers_pairwise.hpp:82
std::vector< Stat_ * > mean
Definition score_markers_pairwise.hpp:88
Pointers to arrays to hold the summary statistics.
Definition summarize_comparisons.hpp:25
Stat_ * mean
Definition summarize_comparisons.hpp:38
Stat_ * median
Definition summarize_comparisons.hpp:45
Stat_ * min
Definition summarize_comparisons.hpp:31
Stat_ * max
Definition summarize_comparisons.hpp:52
Rank_ * min_rank
Definition summarize_comparisons.hpp:59
Container for the summary statistics.
Definition summarize_comparisons.hpp:69
std::vector< Stat_ > mean
Definition summarize_comparisons.hpp:80
std::vector< Stat_ > median
Definition summarize_comparisons.hpp:86
std::vector< Stat_ > min
Definition summarize_comparisons.hpp:74
std::vector< Rank_ > min_rank
Definition summarize_comparisons.hpp:98
std::vector< Stat_ > max
Definition summarize_comparisons.hpp:92