scran_qc
Simple quality control on single-cell data
Loading...
Searching...
No Matches
adt_quality_control.hpp
Go to the documentation of this file.
1#ifndef SCRAN_QC_ADT_QUALITY_CONTROL_HPP
2#define SCRAN_QC_ADT_QUALITY_CONTROL_HPP
3
4#include <vector>
5#include <limits>
6#include <algorithm>
7#include <type_traits>
8#include <cstddef>
9
10#include "tatami/tatami.hpp"
11#include "sanisizer/sanisizer.hpp"
12
13#include "find_median_mad.hpp"
16#include "utils.hpp"
17
23namespace scran_qc {
24
35
43template<typename Sum_ = double, typename Detected_ = int>
49 Sum_* sum;
50
55 Detected_* detected;
56
62 std::vector<Sum_*> subset_sum;
63};
64
96template<typename Value_, typename Index_, typename Subset_, typename Sum_, typename Detected_>
99 const std::vector<Subset_>& subsets,
101 const ComputeAdtQcMetricsOptions& options)
102{
104 tmp.sum = output.sum;
105 tmp.detected = output.detected;
106 tmp.subset_sum = output.subset_sum;
107
109 opt.num_threads = options.num_threads;
110 per_cell_qc_metrics(mat, subsets, tmp, opt);
111}
112
118template<typename Sum_ = double, typename Detected_ = int>
123 std::vector<Sum_> sum;
124
128 std::vector<Detected_> detected;
129
134 std::vector<std::vector<Sum_> > subset_sum;
135};
136
154template<typename Sum_ = double, typename Detected_ = int, typename Value_, typename Index_, typename Subset_>
157 const std::vector<Subset_>& subsets,
158 const ComputeAdtQcMetricsOptions& options)
159{
160 const auto NC = mat.ncol();
163
165#ifdef SCRAN_QC_TEST_INIT
166 , SCRAN_QC_TEST_INIT
167#endif
168 );
169 x.sum = output.sum.data();
170
172#ifdef SCRAN_QC_TEST_INIT
173 , SCRAN_QC_TEST_INIT
174#endif
175 );
176 x.detected = output.detected.data();
177
178 const auto nsubsets = subsets.size();
179 sanisizer::resize(x.subset_sum, nsubsets);
180 sanisizer::resize(output.subset_sum, nsubsets);
181 for (decltype(I(nsubsets)) s = 0; s < nsubsets; ++s) {
183#ifdef SCRAN_QC_TEST_INIT
184 , SCRAN_QC_TEST_INIT
185#endif
186 );
187 x.subset_sum[s] = output.subset_sum[s].data();
188 }
189
190 compute_adt_qc_metrics(mat, subsets, x, options);
191 return output;
192}
193
203
208 double detected_min_drop = 0.1;
209
215};
216
220namespace internal {
221
222template<typename Float_, class Host_, typename Sum_, typename Detected_, typename BlockSource_>
223void adt_populate(Host_& host, const std::size_t n, const ComputeAdtQcMetricsBuffers<Sum_, Detected_>& res, BlockSource_ block, const ComputeAdtQcFiltersOptions& options) {
224 constexpr bool unblocked = std::is_same<BlockSource_, bool>::value;
225 auto buffer = [&]{
226 if constexpr(unblocked) {
227 return sanisizer::create<std::vector<Float_> >(n);
228 } else {
229 return FindMedianMadWorkspace<Float_>(n, block);
230 }
231 }();
232
233 {
235 opts.num_mads = options.detected_num_mads;
236 opts.log = true;
237 opts.upper = false;
238 opts.min_diff = -std::log(1 - options.detected_min_drop);
239 host.get_detected() = [&]{
240 if constexpr(unblocked) {
241 return choose_filter_thresholds(n, res.detected, buffer.data(), opts).lower;
242 } else {
243 return internal::strip_threshold<true>(choose_filter_thresholds_blocked(n, res.detected, block, &buffer, opts));
244 }
245 }();
246 }
247
248 {
249 const auto nsubsets = res.subset_sum.size();
250 auto& host_subsets = host.get_subset_sum();
251 sanisizer::resize(host_subsets, nsubsets);
252
254 opts.num_mads = options.subset_sum_num_mads;
255 opts.log = true;
256 opts.lower = false;
257
258 for (decltype(I(nsubsets)) s = 0; s < nsubsets; ++s) {
259 const auto sub = res.subset_sum[s];
260 host.get_subset_sum()[s] = [&]{
261 if constexpr(unblocked) {
262 return choose_filter_thresholds(n, sub, buffer.data(), opts).upper;
263 } else {
264 return internal::strip_threshold<false>(choose_filter_thresholds_blocked(n, sub, block, &buffer, opts));
265 }
266 }();
267 }
268 }
269}
270
271template<class Host_, typename Sum_, typename Detected_, typename BlockSource_, typename Output_>
272void adt_filter(const Host_& host, const std::size_t n, const ComputeAdtQcMetricsBuffers<Sum_, Detected_>& metrics, BlockSource_ block, Output_* const output) {
273 constexpr bool unblocked = std::is_same<BlockSource_, bool>::value;
274 std::fill_n(output, n, 1);
275
276 const auto& detected = host.get_detected();
277 for (decltype(I(n)) i = 0; i < n; ++i) {
278 auto thresh = [&]{
279 if constexpr(unblocked) {
280 return detected;
281 } else {
282 return detected[block[i]];
283 }
284 }();
285 output[i] = output[i] && (metrics.detected[i] >= thresh);
286 }
287
288 const auto nsubsets = metrics.subset_sum.size();
289 for (decltype(I(nsubsets)) s = 0; s < nsubsets; ++s) {
290 const auto sub = metrics.subset_sum[s];
291 const auto& sthresh = host.get_subset_sum()[s];
292 for (decltype(I(n)) i = 0; i < n; ++i) {
293 auto thresh = [&]{
294 if constexpr(unblocked) {
295 return sthresh;
296 } else {
297 return sthresh[block[i]];
298 }
299 }();
300 output[i] = output[i] && (sub[i] <= thresh);
301 }
302 }
303}
304
305template<typename Sum_, typename Detected_>
306ComputeAdtQcMetricsBuffers<const Sum_, const Detected_> to_buffer(const ComputeAdtQcMetricsResults<Sum_, Detected_>& metrics) {
307 ComputeAdtQcMetricsBuffers<const Sum_, const Detected_> buffer;
308 buffer.sum = metrics.sum.data();
309 buffer.detected = metrics.detected.data();
310 buffer.subset_sum.reserve(metrics.subset_sum.size());
311 for (const auto& s : metrics.subset_sum) {
312 buffer.subset_sum.push_back(s.data());
313 }
314 return buffer;
315}
316
317}
328template<typename Float_ = double>
330public:
334 Float_ get_detected() const {
335 return my_detected;
336 }
337
342 const std::vector<Float_>& get_subset_sum() const {
343 return my_subset_sum;
344 }
345
349 Float_& get_detected() {
350 return my_detected;
351 }
352
357 std::vector<Float_>& get_subset_sum() {
358 return my_subset_sum;
359 }
360
361private:
362 Float_ my_detected = 0;
363 std::vector<Float_> my_subset_sum;
364
365public:
378 template<typename Sum_, typename Detected_, typename Output_>
379 void filter(const std::size_t num, const ComputeAdtQcMetricsBuffers<Sum_, Detected_>& metrics, Output_* const output) const {
380 internal::adt_filter(*this, num, metrics, false, output);
381 }
382
393 template<typename Sum_, typename Detected_, typename Output_>
394 void filter(const ComputeAdtQcMetricsResults<Sum_, Detected_>& metrics, Output_* const output) const {
395 return filter(metrics.detected.size(), internal::to_buffer(metrics), output);
396 }
397
408 template<typename Output_ = unsigned char, typename Sum_, typename Detected_>
409 std::vector<Output_> filter(const ComputeAdtQcMetricsResults<Sum_, Detected_>& metrics) const {
410 auto output = sanisizer::create<std::vector<Output_> >(metrics.detected.size()
411#ifdef SCRAN_QC_TEST_INIT
412 , SCRAN_QC_TEST_INIT
413#endif
414 );
415 filter(metrics, output.data());
416 return output;
417 }
418};
419
444template<typename Float_ = double, typename Sum_, typename Detected_>
447 internal::adt_populate<Float_>(output, num, metrics, false, options);
448 return output;
449}
450
461template<typename Float_ = double, typename Sum_, typename Detected_>
463 return compute_adt_qc_filters(metrics.detected.size(), internal::to_buffer(metrics), options);
464}
465
472template<typename Float_ = double>
474public:
479 const std::vector<Float_>& get_detected() const {
480 return my_detected;
481 }
482
488 const std::vector<std::vector<Float_> >& get_subset_sum() const {
489 return my_subset_sum;
490 }
491
496 std::vector<Float_>& get_detected() {
497 return my_detected;
498 }
499
505 std::vector<std::vector<Float_> >& get_subset_sum() {
506 return my_subset_sum;
507 }
508
509private:
510 std::vector<Float_> my_sum;
511 std::vector<Float_> my_detected;
512 std::vector<std::vector<Float_> > my_subset_sum;
513
514public:
529 template<typename Sum_, typename Detected_, typename Block_, typename Output_>
530 void filter(const std::size_t num, const ComputeAdtQcMetricsBuffers<Sum_, Detected_>& metrics, const Block_* block, Output_* const output) const {
531 internal::adt_filter(*this, num, metrics, block, output);
532 }
533
547 template<typename Sum_, typename Detected_, typename Block_, typename Output_>
548 void filter(const ComputeAdtQcMetricsResults<Sum_, Detected_>& metrics, const Block_* const block, Output_* const output) const {
549 return filter(metrics.detected.size(), internal::to_buffer(metrics), block, output);
550 }
551
565 template<typename Output_ = unsigned char, typename Sum_, typename Detected_, typename Block_>
566 std::vector<Output_> filter(const ComputeAdtQcMetricsResults<Sum_, Detected_>& metrics, const Block_* const block) const {
567 auto output = sanisizer::create<std::vector<Output_> >(metrics.detected.size()
568#ifdef SCRAN_QC_TEST_INIT
569 , SCRAN_QC_TEST_INIT
570#endif
571 );
572 filter(metrics, block, output.data());
573 return output;
574 }
575};
576
594template<typename Float_ = double, typename Sum_, typename Detected_, typename Block_>
596 const std::size_t num,
598 const Block_* const block,
599 const ComputeAdtQcFiltersOptions& options)
600{
602 internal::adt_populate<Float_>(output, num, metrics, block, options);
603 return output;
604}
605
618template<typename Float_ = double, typename Sum_, typename Detected_, typename Block_>
621 const Block_* const block,
622 const ComputeAdtQcFiltersOptions& options)
623{
624 return compute_adt_qc_filters_blocked(metrics.detected.size(), internal::to_buffer(metrics), block, options);
625}
626
627}
628
629#endif
Define QC filter thresholds using a MAD-based approach.
Filter on ADT-based QC metrics with blocking.
Definition adt_quality_control.hpp:473
void filter(const ComputeAdtQcMetricsResults< Sum_, Detected_ > &metrics, const Block_ *const block, Output_ *const output) const
Definition adt_quality_control.hpp:548
std::vector< std::vector< Float_ > > & get_subset_sum()
Definition adt_quality_control.hpp:505
std::vector< Float_ > & get_detected()
Definition adt_quality_control.hpp:496
void filter(const std::size_t num, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &metrics, const Block_ *block, Output_ *const output) const
Definition adt_quality_control.hpp:530
const std::vector< Float_ > & get_detected() const
Definition adt_quality_control.hpp:479
std::vector< Output_ > filter(const ComputeAdtQcMetricsResults< Sum_, Detected_ > &metrics, const Block_ *const block) const
Definition adt_quality_control.hpp:566
const std::vector< std::vector< Float_ > > & get_subset_sum() const
Definition adt_quality_control.hpp:488
Filter for high-quality cells using ADT-based metrics.
Definition adt_quality_control.hpp:329
Float_ & get_detected()
Definition adt_quality_control.hpp:349
void filter(const std::size_t num, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &metrics, Output_ *const output) const
Definition adt_quality_control.hpp:379
std::vector< Output_ > filter(const ComputeAdtQcMetricsResults< Sum_, Detected_ > &metrics) const
Definition adt_quality_control.hpp:409
Float_ get_detected() const
Definition adt_quality_control.hpp:334
const std::vector< Float_ > & get_subset_sum() const
Definition adt_quality_control.hpp:342
std::vector< Float_ > & get_subset_sum()
Definition adt_quality_control.hpp:357
void filter(const ComputeAdtQcMetricsResults< Sum_, Detected_ > &metrics, Output_ *const output) const
Definition adt_quality_control.hpp:394
Temporary data structures for find_median_mad_blocked().
Definition find_median_mad.hpp:175
virtual Index_ ncol() const=0
Compute the median and MAD from an array of values.
Simple quality control for single-cell data.
Definition adt_quality_control.hpp:23
std::vector< ChooseFilterThresholdsResults< Float_ > > choose_filter_thresholds_blocked(const std::vector< FindMedianMadResults< Float_ > > &mms, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:218
AdtQcFilters< Float_ > compute_adt_qc_filters(const std::size_t num, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &metrics, const ComputeAdtQcFiltersOptions &options)
Definition adt_quality_control.hpp:445
AdtQcBlockedFilters< Float_ > compute_adt_qc_filters_blocked(const std::size_t num, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &metrics, const Block_ *const block, const ComputeAdtQcFiltersOptions &options)
Definition adt_quality_control.hpp:595
void compute_adt_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &output, const ComputeAdtQcMetricsOptions &options)
Definition adt_quality_control.hpp:97
void per_cell_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const PerCellQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &output, const PerCellQcMetricsOptions &options)
Definition per_cell_qc_metrics.hpp:828
ChooseFilterThresholdsResults< Float_ > choose_filter_thresholds(const FindMedianMadResults< Float_ > &mm, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:136
void resize_container_to_Index_size(Container_ &container, Index_ x, Args_ &&... args)
Compute per-cell quality control metrics.
Options for choose_filter_thresholds().
Definition choose_filter_thresholds.hpp:21
bool upper
Definition choose_filter_thresholds.hpp:32
bool log
Definition choose_filter_thresholds.hpp:59
double num_mads
Definition choose_filter_thresholds.hpp:39
double min_diff
Definition choose_filter_thresholds.hpp:46
bool lower
Definition choose_filter_thresholds.hpp:26
Options for compute_adt_qc_filters().
Definition adt_quality_control.hpp:197
double detected_min_drop
Definition adt_quality_control.hpp:208
double detected_num_mads
Definition adt_quality_control.hpp:202
double subset_sum_num_mads
Definition adt_quality_control.hpp:214
Buffers for compute_adt_qc_metrics().
Definition adt_quality_control.hpp:44
Detected_ * detected
Definition adt_quality_control.hpp:55
std::vector< Sum_ * > subset_sum
Definition adt_quality_control.hpp:62
Sum_ * sum
Definition adt_quality_control.hpp:49
Options for compute_adt_qc_metrics().
Definition adt_quality_control.hpp:28
int num_threads
Definition adt_quality_control.hpp:33
Results of compute_adt_qc_metrics().
Definition adt_quality_control.hpp:119
std::vector< Detected_ > detected
Definition adt_quality_control.hpp:128
std::vector< Sum_ > sum
Definition adt_quality_control.hpp:123
std::vector< std::vector< Sum_ > > subset_sum
Definition adt_quality_control.hpp:134
Buffers for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:78
Sum_ * sum
Definition per_cell_qc_metrics.hpp:96
Detected_ * detected
Definition per_cell_qc_metrics.hpp:102
std::vector< Sum_ * > subset_sum
Definition per_cell_qc_metrics.hpp:122
Options for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:25
int num_threads
Definition per_cell_qc_metrics.hpp:66