scran_qc
Simple quality control on single-cell data
Loading...
Searching...
No Matches
adt_quality_control.hpp
Go to the documentation of this file.
1#ifndef SCRAN_QC_ADT_QUALITY_CONTROL_HPP
2#define SCRAN_QC_ADT_QUALITY_CONTROL_HPP
3
4#include <vector>
5#include <limits>
6#include <algorithm>
7#include <type_traits>
8
9#include "tatami/tatami.hpp"
10
11#include "find_median_mad.hpp"
14
20namespace scran_qc {
21
32
40template<typename Sum_ = double, typename Detected_ = int>
46 Sum_* sum;
47
52 Detected_* detected;
53
59 std::vector<Sum_*> subset_sum;
60};
61
91template<typename Value_, typename Index_, typename Subset_, typename Sum_, typename Detected_>
94 const std::vector<Subset_>& subsets,
96 const ComputeAdtQcMetricsOptions& options)
97{
99 tmp.sum = output.sum;
100 tmp.detected = output.detected;
101 tmp.subset_sum = output.subset_sum;
102
104 opt.num_threads = options.num_threads;
105 per_cell_qc_metrics(mat, subsets, tmp, opt);
106}
107
113template<typename Sum_ = double, typename Detected_ = int>
118 std::vector<Sum_> sum;
119
123 std::vector<Detected_> detected;
124
129 std::vector<std::vector<Sum_> > subset_sum;
130};
131
149template<typename Sum_ = double, typename Detected_ = int, typename Value_ = double, typename Index_ = int, typename Subset_ = const uint8_t*>
152 const std::vector<Subset_>& subsets,
153 const ComputeAdtQcMetricsOptions& options)
154{
155 auto NC = mat.ncol();
158
159 output.sum.resize(NC);
160 x.sum = output.sum.data();
161
162 output.detected.resize(NC);
163 x.detected = output.detected.data();
164
165 size_t nsubsets = subsets.size();
166 x.subset_sum.resize(nsubsets);
167 output.subset_sum.resize(nsubsets);
168 for (size_t s = 0; s < nsubsets; ++s) {
169 output.subset_sum[s].resize(NC);
170 x.subset_sum[s] = output.subset_sum[s].data();
171 }
172
173 compute_adt_qc_metrics(mat, subsets, x, options);
174 return output;
175}
176
186
191 double detected_min_drop = 0.1;
192
198};
199
203namespace internal {
204
205template<typename Float_, class Host_, typename Sum_, typename Detected_, typename BlockSource_>
206void adt_populate(Host_& host, size_t n, const ComputeAdtQcMetricsBuffers<Sum_, Detected_>& res, BlockSource_ block, const ComputeAdtQcFiltersOptions& options) {
207 constexpr bool unblocked = std::is_same<BlockSource_, bool>::value;
208 auto buffer = [&]() {
209 if constexpr(unblocked) {
210 return std::vector<Float_>(n);
211 } else {
213 }
214 }();
215
216 {
218 opts.num_mads = options.detected_num_mads;
219 opts.log = true;
220 opts.upper = false;
221 opts.min_diff = -std::log(1 - options.detected_min_drop);
222 host.get_detected() = [&]() {
223 if constexpr(unblocked) {
224 return choose_filter_thresholds(n, res.detected, buffer.data(), opts).lower;
225 } else {
226 return internal::strip_threshold<true>(choose_filter_thresholds_blocked(n, res.detected, block, &buffer, opts));
227 }
228 }();
229 }
230
231 {
232 size_t nsubsets = res.subset_sum.size();
233 host.get_subset_sum().resize(nsubsets);
234
236 opts.num_mads = options.subset_sum_num_mads;
237 opts.log = true;
238 opts.lower = false;
239
240 for (size_t s = 0; s < nsubsets; ++s) {
241 auto sub = res.subset_sum[s];
242 host.get_subset_sum()[s] = [&]() {
243 if constexpr(unblocked) {
244 return choose_filter_thresholds(n, sub, buffer.data(), opts).upper;
245 } else {
246 return internal::strip_threshold<false>(choose_filter_thresholds_blocked(n, sub, block, &buffer, opts));
247 }
248 }();
249 }
250 }
251}
252
253template<class Host_, typename Sum_, typename Detected_, typename BlockSource_, typename Output_>
255 constexpr bool unblocked = std::is_same<BlockSource_, bool>::value;
256 std::fill_n(output, n, 1);
257
258 const auto& detected = host.get_detected();
259 for (size_t i = 0; i < n; ++i) {
260 auto thresh = [&]() {
261 if constexpr(unblocked) {
262 return detected;
263 } else {
264 return detected[block[i]];
265 }
266 }();
267 output[i] = output[i] && (metrics.detected[i] >= thresh);
268 }
269
270 size_t nsubsets = metrics.subset_sum.size();
271 for (size_t s = 0; s < nsubsets; ++s) {
272 auto sub = metrics.subset_sum[s];
273 const auto& sthresh = host.get_subset_sum()[s];
274 for (size_t i = 0; i < n; ++i) {
275 auto thresh = [&]() {
276 if constexpr(unblocked) {
277 return sthresh;
278 } else {
279 return sthresh[block[i]];
280 }
281 }();
282 output[i] = output[i] && (sub[i] <= thresh);
283 }
284 }
285}
286
287template<typename Sum_, typename Detected_>
290 buffer.sum = metrics.sum.data();
291 buffer.detected = metrics.detected.data();
292 buffer.subset_sum.reserve(metrics.subset_sum.size());
293 for (const auto& s : metrics.subset_sum) {
294 buffer.subset_sum.push_back(s.data());
295 }
296 return buffer;
297}
298
299}
310template<typename Float_ = double>
312public:
317 return my_detected;
318 }
319
324 const std::vector<Float_>& get_subset_sum() const {
325 return my_subset_sum;
326 }
327
332 return my_detected;
333 }
334
339 std::vector<Float_>& get_subset_sum() {
340 return my_subset_sum;
341 }
342
343private:
344 Float_ my_detected = 0;
345 std::vector<Float_> my_subset_sum;
346
347public:
360 template<typename Sum_, typename Detected_, typename Output_>
362 internal::adt_filter(*this, num, metrics, false, output);
363 }
364
375 template<typename Sum_, typename Detected_, typename Output_>
377 return filter(metrics.detected.size(), internal::to_buffer(metrics), output);
378 }
379
390 template<typename Output_ = uint8_t, typename Sum_ = double, typename Detected_ = int>
391 std::vector<Output_> filter(const ComputeAdtQcMetricsResults<Sum_, Detected_>& metrics) const {
392 std::vector<Output_> output(metrics.detected.size());
393 filter(metrics, output.data());
394 return output;
395 }
396};
397
419template<typename Float_ = double, typename Sum_, typename Detected_>
425
436template<typename Float_ = double, typename Sum_, typename Detected_>
440
447template<typename Float_ = double>
449public:
454 const std::vector<Float_>& get_detected() const {
455 return my_detected;
456 }
457
463 const std::vector<std::vector<Float_> >& get_subset_sum() const {
464 return my_subset_sum;
465 }
466
471 std::vector<Float_>& get_detected() {
472 return my_detected;
473 }
474
480 std::vector<std::vector<Float_> >& get_subset_sum() {
481 return my_subset_sum;
482 }
483
484private:
485 std::vector<Float_> my_sum;
486 std::vector<Float_> my_detected;
487 std::vector<std::vector<Float_> > my_subset_sum;
488
489public:
504 template<typename Index_, typename Sum_, typename Detected_, typename Block_, typename Output_>
506 internal::adt_filter(*this, num, metrics, block, output);
507 }
508
522 template<typename Sum_, typename Detected_, typename Block_, typename Output_>
524 return filter(metrics.detected.size(), internal::to_buffer(metrics), block, output);
525 }
526
540 template<typename Output_ = uint8_t, typename Sum_ = double, typename Detected_ = int, typename Block_ = int>
541 std::vector<Output_> filter(const ComputeAdtQcMetricsResults<Sum_, Detected_>& metrics, const Block_* block) const {
542 std::vector<Output_> output(metrics.detected.size());
543 filter(metrics, block, output.data());
544 return output;
545 }
546};
547
565template<typename Float_ = double, typename Sum_, typename Detected_, typename Block_>
576
589template<typename Float_ = double, typename Sum_, typename Detected_, typename Block_>
597
598}
599
600#endif
Define QC filter thresholds using a MAD-based approach.
Filter on ADT-based QC metrics with blocking.
Definition adt_quality_control.hpp:448
std::vector< Output_ > filter(const ComputeAdtQcMetricsResults< Sum_, Detected_ > &metrics, const Block_ *block) const
Definition adt_quality_control.hpp:541
std::vector< std::vector< Float_ > > & get_subset_sum()
Definition adt_quality_control.hpp:480
std::vector< Float_ > & get_detected()
Definition adt_quality_control.hpp:471
void filter(const ComputeAdtQcMetricsResults< Sum_, Detected_ > &metrics, const Block_ *block, Output_ *output) const
Definition adt_quality_control.hpp:523
const std::vector< Float_ > & get_detected() const
Definition adt_quality_control.hpp:454
const std::vector< std::vector< Float_ > > & get_subset_sum() const
Definition adt_quality_control.hpp:463
void filter(Index_ num, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &metrics, const Block_ *block, Output_ *output) const
Definition adt_quality_control.hpp:505
Filter for high-quality cells using ADT-based metrics.
Definition adt_quality_control.hpp:311
void filter(const ComputeAdtQcMetricsResults< Sum_, Detected_ > &metrics, Output_ *output) const
Definition adt_quality_control.hpp:376
void filter(size_t num, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &metrics, Output_ *output) const
Definition adt_quality_control.hpp:361
Float_ & get_detected()
Definition adt_quality_control.hpp:331
std::vector< Output_ > filter(const ComputeAdtQcMetricsResults< Sum_, Detected_ > &metrics) const
Definition adt_quality_control.hpp:391
Float_ get_detected() const
Definition adt_quality_control.hpp:316
const std::vector< Float_ > & get_subset_sum() const
Definition adt_quality_control.hpp:324
std::vector< Float_ > & get_subset_sum()
Definition adt_quality_control.hpp:339
Temporary data structures for find_median_mad_blocked().
Definition find_median_mad.hpp:172
virtual Index_ ncol() const=0
Compute the median and MAD from an array of values.
Simple quality control for single-cell data.
Definition adt_quality_control.hpp:20
AdtQcBlockedFilters< Float_ > compute_adt_qc_filters_blocked(size_t num, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &metrics, const Block_ *block, const ComputeAdtQcFiltersOptions &options)
Definition adt_quality_control.hpp:566
AdtQcFilters< Float_ > compute_adt_qc_filters(size_t num, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &metrics, const ComputeAdtQcFiltersOptions &options)
Definition adt_quality_control.hpp:420
std::vector< ChooseFilterThresholdsResults< Float_ > > choose_filter_thresholds_blocked(const std::vector< FindMedianMadResults< Float_ > > mms, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:217
void compute_adt_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &output, const ComputeAdtQcMetricsOptions &options)
Definition adt_quality_control.hpp:92
void per_cell_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const PerCellQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &output, const PerCellQcMetricsOptions &options)
Definition per_cell_qc_metrics.hpp:810
ChooseFilterThresholdsResults< Float_ > choose_filter_thresholds(const FindMedianMadResults< Float_ > &mm, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:133
Compute per-cell quality control metrics.
Options for choose_filter_thresholds().
Definition choose_filter_thresholds.hpp:20
Options for compute_adt_qc_filters().
Definition adt_quality_control.hpp:180
double detected_min_drop
Definition adt_quality_control.hpp:191
double detected_num_mads
Definition adt_quality_control.hpp:185
double subset_sum_num_mads
Definition adt_quality_control.hpp:197
Buffers for compute_adt_qc_metrics().
Definition adt_quality_control.hpp:41
Detected_ * detected
Definition adt_quality_control.hpp:52
std::vector< Sum_ * > subset_sum
Definition adt_quality_control.hpp:59
Sum_ * sum
Definition adt_quality_control.hpp:46
Options for compute_adt_qc_metrics().
Definition adt_quality_control.hpp:25
int num_threads
Definition adt_quality_control.hpp:30
Results of compute_adt_qc_metrics().
Definition adt_quality_control.hpp:114
std::vector< Detected_ > detected
Definition adt_quality_control.hpp:123
std::vector< Sum_ > sum
Definition adt_quality_control.hpp:118
std::vector< std::vector< Sum_ > > subset_sum
Definition adt_quality_control.hpp:129
Buffers for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:75
Sum_ * sum
Definition per_cell_qc_metrics.hpp:90
Detected_ * detected
Definition per_cell_qc_metrics.hpp:96
std::vector< Sum_ * > subset_sum
Definition per_cell_qc_metrics.hpp:116
Options for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:22
int num_threads
Definition per_cell_qc_metrics.hpp:63