scran_qc
Simple quality control on single-cell data
Loading...
Searching...
No Matches
choose_filter_thresholds.hpp
Go to the documentation of this file.
1#ifndef SCRAN_QC_CHOOSE_FILTER_THRESHOLDS_HPP
2#define SCRAN_QC_CHOOSE_FILTER_THRESHOLDS_HPP
3
4#include <vector>
5#include <limits>
6#include <cmath>
7#include <algorithm>
8#include <cstddef>
9#include <cassert>
10
11#include "sanisizer/sanisizer.hpp"
12#include "quickstats/quickstats.hpp"
13
14#include "utils.hpp"
15
21namespace scran_qc {
22
31 bool lower = true;
32
37 bool upper = true;
38
44 double num_mads = 3;
45
51 double min_diff = 0;
52
70 bool log = false;
71};
72
77template<typename Float_>
84 Float_ lower = 0;
85
91 Float_ upper = 0;
92};
93
97template<typename Float_>
98Float_ unlog_threshold(const Float_ val, const bool was_logged) {
99 if (was_logged) {
100 if (std::isinf(val)) {
101 if (val < 0) {
102 return 0;
103 }
104 } else {
105 return std::exp(val);
106 }
107 }
108 return val;
109}
110
111template<typename Float_>
112ChooseFilterThresholdsResults<Float_> choose_filter_thresholds_internal(
113 std::size_t num_cells,
114 Float_* metrics,
115 const ChooseFilterThresholdsOptions& options
116) {
117 static_assert(std::is_floating_point<Float_>::value);
118
119 // Rotate all the NaNs to the front of the buffer and ignore them.
120 I<decltype(num_cells)> lost = 0;
121 for (I<decltype(num_cells)> i = 0; i < num_cells; ++i) {
122 if (std::isnan(metrics[i])) {
123 std::swap(metrics[i], metrics[lost]);
124 ++lost;
125 }
126 }
127 metrics += lost;
128 num_cells -= lost;
129
130 // Maybe we do some log-transformation, if that's requested.
131 if (options.log) {
132 for (I<decltype(num_cells)> i = 0; i < num_cells; ++i) {
133 auto& val = metrics[i];
134 if (val > 0) {
135 val = std::log(val);
136 } else if (val == 0) {
137 val = -std::numeric_limits<double>::infinity();
138 } else {
139 throw std::runtime_error("cannot log-transform negative values");
140 }
141 }
142 }
143
144 ChooseFilterThresholdsResults<Float_> output;
145 Float_& lthresh = output.lower;
146 Float_& uthresh = output.upper;
147 lthresh = -std::numeric_limits<Float_>::infinity();
148 uthresh = std::numeric_limits<Float_>::infinity();
149
150 const auto median = quickstats::median<Float_>(num_cells, metrics);
151 const auto mad = quickstats::scale_mad_to_sd(quickstats::mad_with_infinities<Float_>(num_cells, metrics, median));
152
153 if (!std::isnan(mad)) {
154 const auto delta = std::max(static_cast<Float_>(options.min_diff), static_cast<Float_>(options.num_mads * mad));
155 if (options.lower) {
156 const auto threshold = median - delta;
157 if (!std::isnan(threshold)) {
158 lthresh = unlog_threshold(threshold, options.log);
159 }
160 }
161 if (options.upper) {
162 const auto threshold = median + delta;
163 if (!std::isnan(threshold)) {
164 uthresh = unlog_threshold(threshold, options.log);
165 }
166 }
167 }
168
169 return output;
170}
193template<typename Value_, typename Float_>
195 const std::size_t num_cells,
196 const Value_* const metrics,
197 Float_* const buffer,
198 const ChooseFilterThresholdsOptions& options
199) {
200 // Only copying it if it's not exactly the same.
201 [&](){
202 if constexpr(std::is_same<Value_, Float_>::value) {
203 if (metrics == buffer) {
204 return;
205 }
206 }
207 std::copy_n(metrics, num_cells, buffer);
208 }();
209 return choose_filter_thresholds_internal(num_cells, buffer, options);
210}
211
219template<typename Float_>
229 template<typename Block_>
230 ChooseFilterThresholdsBlockedWorkspace(const std::size_t num_cells, const Block_* const block, const std::size_t num_blocks) {
231 reset_choose_filter_thresholds_blocked_workspace(*this, num_cells, block, num_blocks);
232 }
233
238
242public:
243 std::vector<Float_> buffer;
244 std::vector<std::size_t> block_starts;
245 std::vector<std::size_t> block_offsets;
249};
250
265template<typename Float_, typename Block_>
268 const std::size_t num_cells,
269 const Block_* const block,
270 const std::size_t num_blocks
271) {
272 work.block_starts.clear();
273
274 sanisizer::resize(work.block_starts, num_blocks);
275 for (I<decltype(num_cells)> i = 0; i < num_cells; ++i) {
276 ++work.block_starts[block[i]];
277 }
278
279 std::size_t sofar = 0;
280 for (auto& s : work.block_starts) {
281 const auto last = sofar;
282 sofar += s;
283 s = last;
284 }
285
286 sanisizer::resize(work.buffer, num_cells
287#ifdef SCRAN_QC_TEST_INIT
288 , SCRAN_QC_TEST_INIT
289#endif
290 );
291
292 sanisizer::resize(work.block_offsets, num_blocks
293#ifdef SCRAN_QC_TEST_INIT
294 , SCRAN_QC_TEST_INIT
295#endif
296 );
297}
298
326template<typename Value_, typename Block_, typename Float_>
327std::vector<ChooseFilterThresholdsResults<Float_> > choose_filter_thresholds_blocked(
328 const std::size_t num_cells,
329 const Value_* const metrics,
330 const Block_* const block,
331 const std::size_t num_blocks,
333 const ChooseFilterThresholdsOptions& options
334) {
335 std::vector<ChooseFilterThresholdsResults<Float_> > output;
336 output.reserve(num_blocks);
337 process_blocks_for_choose_filter_thresholds(
338 num_cells,
339 metrics,
340 block,
341 num_blocks,
342 workspace,
343 [&](const std::size_t len, Float_* const ptr) -> void {
344 output.push_back(choose_filter_thresholds_internal<Float_>(len, ptr, options));
345 }
346 );
347 return output;
348}
349
353template<typename Value_, typename Block_, typename Float_, class Function_>
354void process_blocks_for_choose_filter_thresholds(
355 const std::size_t num_cells,
356 const Value_* const metrics,
357 const Block_* const block,
358 const std::size_t num_blocks,
359 ChooseFilterThresholdsBlockedWorkspace<Float_>& workspace,
360 Function_ fun
361) {
362 assert(num_cells == workspace.buffer.size());
363 assert(num_blocks == workspace.block_starts.size());
364
365 auto& buffer = workspace.buffer;
366 const auto& starts = workspace.block_starts;
367 auto& offsets = workspace.block_offsets;
368 std::copy(starts.begin(), starts.end(), offsets.begin());
369 for (I<decltype(num_cells)> i = 0; i < num_cells; ++i) {
370 auto& pos = offsets[block[i]];
371 buffer[pos] = metrics[i];
372 ++pos;
373 }
374
375 for (I<decltype(num_blocks)> g = 0; g < num_blocks; ++g) {
376 fun(offsets[g] - starts[g], buffer.data() + starts[g]);
377 }
378}
379
380
381
382template<bool lower_, typename Float_>
383std::vector<Float_> extract_filter_thresholds(const std::vector<ChooseFilterThresholdsResults<Float_> >& res) {
384 std::vector<Float_> output;
385 output.reserve(res.size());
386 for (const auto& r : res) {
387 if constexpr(lower_) {
388 output.push_back(r.lower);
389 } else {
390 output.push_back(r.upper);
391 }
392 }
393 return output;
394}
399}
400
401#endif
Simple quality control for single-cell data.
Definition adt_quality_control.hpp:22
std::vector< ChooseFilterThresholdsResults< Float_ > > choose_filter_thresholds_blocked(const std::size_t num_cells, const Value_ *const metrics, const Block_ *const block, const std::size_t num_blocks, ChooseFilterThresholdsBlockedWorkspace< Float_ > &workspace, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:327
void reset_choose_filter_thresholds_blocked_workspace(ChooseFilterThresholdsBlockedWorkspace< Float_ > &work, const std::size_t num_cells, const Block_ *const block, const std::size_t num_blocks)
Definition choose_filter_thresholds.hpp:266
ChooseFilterThresholdsResults< Float_ > choose_filter_thresholds(const std::size_t num_cells, const Value_ *const metrics, Float_ *const buffer, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:194
Workspace for choose_filter_thresholds_blocked().
Definition choose_filter_thresholds.hpp:220
ChooseFilterThresholdsBlockedWorkspace(const std::size_t num_cells, const Block_ *const block, const std::size_t num_blocks)
Definition choose_filter_thresholds.hpp:230
Options for choose_filter_thresholds().
Definition choose_filter_thresholds.hpp:26
bool upper
Definition choose_filter_thresholds.hpp:37
bool log
Definition choose_filter_thresholds.hpp:70
double num_mads
Definition choose_filter_thresholds.hpp:44
double min_diff
Definition choose_filter_thresholds.hpp:51
bool lower
Definition choose_filter_thresholds.hpp:31
Results of compute_adt_qc_metrics().
Definition choose_filter_thresholds.hpp:78
Float_ upper
Definition choose_filter_thresholds.hpp:91
Float_ lower
Definition choose_filter_thresholds.hpp:84