scran_qc
Simple quality control on single-cell data
Loading...
Searching...
No Matches
adt_quality_control.hpp
Go to the documentation of this file.
1#ifndef SCRAN_QC_ADT_QUALITY_CONTROL_HPP
2#define SCRAN_QC_ADT_QUALITY_CONTROL_HPP
3
4#include <vector>
5#include <limits>
6#include <algorithm>
7#include <type_traits>
8
9#include "tatami/tatami.hpp"
10
11#include "find_median_mad.hpp"
14
20namespace scran_qc {
21
32
40template<typename Sum_ = double, typename Detected_ = int>
46 Sum_* sum;
47
52 Detected_* detected;
53
59 std::vector<Sum_*> subset_sum;
60};
61
93template<typename Value_, typename Index_, typename Subset_, typename Sum_, typename Detected_>
96 const std::vector<Subset_>& subsets,
98 const ComputeAdtQcMetricsOptions& options)
99{
101 tmp.sum = output.sum;
102 tmp.detected = output.detected;
103 tmp.subset_sum = output.subset_sum;
104
106 opt.num_threads = options.num_threads;
107 per_cell_qc_metrics(mat, subsets, tmp, opt);
108}
109
115template<typename Sum_ = double, typename Detected_ = int>
120 std::vector<Sum_> sum;
121
125 std::vector<Detected_> detected;
126
131 std::vector<std::vector<Sum_> > subset_sum;
132};
133
151template<typename Sum_ = double, typename Detected_ = int, typename Value_ = double, typename Index_ = int, typename Subset_ = const uint8_t*>
154 const std::vector<Subset_>& subsets,
155 const ComputeAdtQcMetricsOptions& options)
156{
157 auto NC = mat.ncol();
160
161 output.sum.resize(NC
162#ifdef SCRAN_QC_TEST_INIT
163 , SCRAN_QC_TEST_INIT
164#endif
165 );
166 x.sum = output.sum.data();
167
168 output.detected.resize(NC
169#ifdef SCRAN_QC_TEST_INIT
170 , SCRAN_QC_TEST_INIT
171#endif
172 );
173 x.detected = output.detected.data();
174
175 size_t nsubsets = subsets.size();
176 x.subset_sum.resize(nsubsets);
177 output.subset_sum.resize(nsubsets);
178 for (size_t s = 0; s < nsubsets; ++s) {
179 output.subset_sum[s].resize(NC
180#ifdef SCRAN_QC_TEST_INIT
181 , SCRAN_QC_TEST_INIT
182#endif
183 );
184 x.subset_sum[s] = output.subset_sum[s].data();
185 }
186
187 compute_adt_qc_metrics(mat, subsets, x, options);
188 return output;
189}
190
200
205 double detected_min_drop = 0.1;
206
212};
213
217namespace internal {
218
219template<typename Float_, class Host_, typename Sum_, typename Detected_, typename BlockSource_>
220void adt_populate(Host_& host, size_t n, const ComputeAdtQcMetricsBuffers<Sum_, Detected_>& res, BlockSource_ block, const ComputeAdtQcFiltersOptions& options) {
221 constexpr bool unblocked = std::is_same<BlockSource_, bool>::value;
222 auto buffer = [&]() {
223 if constexpr(unblocked) {
224 return std::vector<Float_>(n);
225 } else {
227 }
228 }();
229
230 {
232 opts.num_mads = options.detected_num_mads;
233 opts.log = true;
234 opts.upper = false;
235 opts.min_diff = -std::log(1 - options.detected_min_drop);
236 host.get_detected() = [&]() {
237 if constexpr(unblocked) {
238 return choose_filter_thresholds(n, res.detected, buffer.data(), opts).lower;
239 } else {
240 return internal::strip_threshold<true>(choose_filter_thresholds_blocked(n, res.detected, block, &buffer, opts));
241 }
242 }();
243 }
244
245 {
246 size_t nsubsets = res.subset_sum.size();
247 host.get_subset_sum().resize(nsubsets);
248
250 opts.num_mads = options.subset_sum_num_mads;
251 opts.log = true;
252 opts.lower = false;
253
254 for (size_t s = 0; s < nsubsets; ++s) {
255 auto sub = res.subset_sum[s];
256 host.get_subset_sum()[s] = [&]() {
257 if constexpr(unblocked) {
258 return choose_filter_thresholds(n, sub, buffer.data(), opts).upper;
259 } else {
260 return internal::strip_threshold<false>(choose_filter_thresholds_blocked(n, sub, block, &buffer, opts));
261 }
262 }();
263 }
264 }
265}
266
267template<class Host_, typename Sum_, typename Detected_, typename BlockSource_, typename Output_>
269 constexpr bool unblocked = std::is_same<BlockSource_, bool>::value;
270 std::fill_n(output, n, 1);
271
272 const auto& detected = host.get_detected();
273 for (size_t i = 0; i < n; ++i) {
274 auto thresh = [&]() {
275 if constexpr(unblocked) {
276 return detected;
277 } else {
278 return detected[block[i]];
279 }
280 }();
281 output[i] = output[i] && (metrics.detected[i] >= thresh);
282 }
283
284 size_t nsubsets = metrics.subset_sum.size();
285 for (size_t s = 0; s < nsubsets; ++s) {
286 auto sub = metrics.subset_sum[s];
287 const auto& sthresh = host.get_subset_sum()[s];
288 for (size_t i = 0; i < n; ++i) {
289 auto thresh = [&]() {
290 if constexpr(unblocked) {
291 return sthresh;
292 } else {
293 return sthresh[block[i]];
294 }
295 }();
296 output[i] = output[i] && (sub[i] <= thresh);
297 }
298 }
299}
300
301template<typename Sum_, typename Detected_>
304 buffer.sum = metrics.sum.data();
305 buffer.detected = metrics.detected.data();
306 buffer.subset_sum.reserve(metrics.subset_sum.size());
307 for (const auto& s : metrics.subset_sum) {
308 buffer.subset_sum.push_back(s.data());
309 }
310 return buffer;
311}
312
313}
324template<typename Float_ = double>
326public:
331 return my_detected;
332 }
333
338 const std::vector<Float_>& get_subset_sum() const {
339 return my_subset_sum;
340 }
341
346 return my_detected;
347 }
348
353 std::vector<Float_>& get_subset_sum() {
354 return my_subset_sum;
355 }
356
357private:
358 Float_ my_detected = 0;
359 std::vector<Float_> my_subset_sum;
360
361public:
374 template<typename Sum_, typename Detected_, typename Output_>
376 internal::adt_filter(*this, num, metrics, false, output);
377 }
378
389 template<typename Sum_, typename Detected_, typename Output_>
391 return filter(metrics.detected.size(), internal::to_buffer(metrics), output);
392 }
393
404 template<typename Output_ = uint8_t, typename Sum_ = double, typename Detected_ = int>
405 std::vector<Output_> filter(const ComputeAdtQcMetricsResults<Sum_, Detected_>& metrics) const {
406 std::vector<Output_> output(metrics.detected.size()
409#endif
410 );
411 filter(metrics, output.data());
412 return output;
413 }
414};
415
437template<typename Float_ = double, typename Sum_, typename Detected_>
443
454template<typename Float_ = double, typename Sum_, typename Detected_>
458
465template<typename Float_ = double>
467public:
472 const std::vector<Float_>& get_detected() const {
473 return my_detected;
474 }
475
481 const std::vector<std::vector<Float_> >& get_subset_sum() const {
482 return my_subset_sum;
483 }
484
489 std::vector<Float_>& get_detected() {
490 return my_detected;
491 }
492
498 std::vector<std::vector<Float_> >& get_subset_sum() {
499 return my_subset_sum;
500 }
501
502private:
503 std::vector<Float_> my_sum;
504 std::vector<Float_> my_detected;
505 std::vector<std::vector<Float_> > my_subset_sum;
506
507public:
522 template<typename Index_, typename Sum_, typename Detected_, typename Block_, typename Output_>
524 internal::adt_filter(*this, num, metrics, block, output);
525 }
526
540 template<typename Sum_, typename Detected_, typename Block_, typename Output_>
542 return filter(metrics.detected.size(), internal::to_buffer(metrics), block, output);
543 }
544
558 template<typename Output_ = uint8_t, typename Sum_ = double, typename Detected_ = int, typename Block_ = int>
559 std::vector<Output_> filter(const ComputeAdtQcMetricsResults<Sum_, Detected_>& metrics, const Block_* block) const {
560 std::vector<Output_> output(metrics.detected.size()
563#endif
564 );
565 filter(metrics, block, output.data());
566 return output;
567 }
568};
569
587template<typename Float_ = double, typename Sum_, typename Detected_, typename Block_>
598
611template<typename Float_ = double, typename Sum_, typename Detected_, typename Block_>
619
620}
621
622#endif
Define QC filter thresholds using a MAD-based approach.
Filter on ADT-based QC metrics with blocking.
Definition adt_quality_control.hpp:466
std::vector< Output_ > filter(const ComputeAdtQcMetricsResults< Sum_, Detected_ > &metrics, const Block_ *block) const
Definition adt_quality_control.hpp:559
std::vector< std::vector< Float_ > > & get_subset_sum()
Definition adt_quality_control.hpp:498
std::vector< Float_ > & get_detected()
Definition adt_quality_control.hpp:489
void filter(const ComputeAdtQcMetricsResults< Sum_, Detected_ > &metrics, const Block_ *block, Output_ *output) const
Definition adt_quality_control.hpp:541
const std::vector< Float_ > & get_detected() const
Definition adt_quality_control.hpp:472
const std::vector< std::vector< Float_ > > & get_subset_sum() const
Definition adt_quality_control.hpp:481
void filter(Index_ num, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &metrics, const Block_ *block, Output_ *output) const
Definition adt_quality_control.hpp:523
Filter for high-quality cells using ADT-based metrics.
Definition adt_quality_control.hpp:325
void filter(const ComputeAdtQcMetricsResults< Sum_, Detected_ > &metrics, Output_ *output) const
Definition adt_quality_control.hpp:390
void filter(size_t num, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &metrics, Output_ *output) const
Definition adt_quality_control.hpp:375
Float_ & get_detected()
Definition adt_quality_control.hpp:345
std::vector< Output_ > filter(const ComputeAdtQcMetricsResults< Sum_, Detected_ > &metrics) const
Definition adt_quality_control.hpp:405
Float_ get_detected() const
Definition adt_quality_control.hpp:330
const std::vector< Float_ > & get_subset_sum() const
Definition adt_quality_control.hpp:338
std::vector< Float_ > & get_subset_sum()
Definition adt_quality_control.hpp:353
Temporary data structures for find_median_mad_blocked().
Definition find_median_mad.hpp:176
virtual Index_ ncol() const=0
Compute the median and MAD from an array of values.
Simple quality control for single-cell data.
Definition adt_quality_control.hpp:20
AdtQcBlockedFilters< Float_ > compute_adt_qc_filters_blocked(size_t num, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &metrics, const Block_ *block, const ComputeAdtQcFiltersOptions &options)
Definition adt_quality_control.hpp:588
AdtQcFilters< Float_ > compute_adt_qc_filters(size_t num, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &metrics, const ComputeAdtQcFiltersOptions &options)
Definition adt_quality_control.hpp:438
std::vector< ChooseFilterThresholdsResults< Float_ > > choose_filter_thresholds_blocked(const std::vector< FindMedianMadResults< Float_ > > mms, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:217
void compute_adt_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &output, const ComputeAdtQcMetricsOptions &options)
Definition adt_quality_control.hpp:94
void per_cell_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const PerCellQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &output, const PerCellQcMetricsOptions &options)
Definition per_cell_qc_metrics.hpp:810
ChooseFilterThresholdsResults< Float_ > choose_filter_thresholds(const FindMedianMadResults< Float_ > &mm, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:133
Compute per-cell quality control metrics.
Options for choose_filter_thresholds().
Definition choose_filter_thresholds.hpp:20
Options for compute_adt_qc_filters().
Definition adt_quality_control.hpp:194
double detected_min_drop
Definition adt_quality_control.hpp:205
double detected_num_mads
Definition adt_quality_control.hpp:199
double subset_sum_num_mads
Definition adt_quality_control.hpp:211
Buffers for compute_adt_qc_metrics().
Definition adt_quality_control.hpp:41
Detected_ * detected
Definition adt_quality_control.hpp:52
std::vector< Sum_ * > subset_sum
Definition adt_quality_control.hpp:59
Sum_ * sum
Definition adt_quality_control.hpp:46
Options for compute_adt_qc_metrics().
Definition adt_quality_control.hpp:25
int num_threads
Definition adt_quality_control.hpp:30
Results of compute_adt_qc_metrics().
Definition adt_quality_control.hpp:116
std::vector< Detected_ > detected
Definition adt_quality_control.hpp:125
std::vector< Sum_ > sum
Definition adt_quality_control.hpp:120
std::vector< std::vector< Sum_ > > subset_sum
Definition adt_quality_control.hpp:131
Buffers for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:75
Sum_ * sum
Definition per_cell_qc_metrics.hpp:90
Detected_ * detected
Definition per_cell_qc_metrics.hpp:96
std::vector< Sum_ * > subset_sum
Definition per_cell_qc_metrics.hpp:116
Options for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:22
int num_threads
Definition per_cell_qc_metrics.hpp:63