scran_qc
Simple quality control on single-cell data
Loading...
Searching...
No Matches
adt_quality_control.hpp
Go to the documentation of this file.
1#ifndef SCRAN_QC_ADT_QUALITY_CONTROL_HPP
2#define SCRAN_QC_ADT_QUALITY_CONTROL_HPP
3
4#include <vector>
5#include <limits>
6#include <algorithm>
7#include <type_traits>
8#include <cstddef>
9
10#include "tatami/tatami.hpp"
11#include "sanisizer/sanisizer.hpp"
12
13#include "find_median_mad.hpp"
16#include "utils.hpp"
17
23namespace scran_qc {
24
35
45template<typename Sum_ = double, typename Detected_ = int>
51 Sum_* sum;
52
57 Detected_* detected;
58
64 std::vector<Sum_*> subset_sum;
65};
66
100template<typename Value_, typename Index_, typename Subset_, typename Sum_, typename Detected_>
103 const std::vector<Subset_>& subsets,
105 const ComputeAdtQcMetricsOptions& options)
106{
108 tmp.sum = output.sum;
109 tmp.detected = output.detected;
110 tmp.subset_sum = output.subset_sum;
111
113 opt.num_threads = options.num_threads;
114 per_cell_qc_metrics(mat, subsets, tmp, opt);
115}
116
124template<typename Sum_ = double, typename Detected_ = int>
129 std::vector<Sum_> sum;
130
134 std::vector<Detected_> detected;
135
140 std::vector<std::vector<Sum_> > subset_sum;
141};
142
162template<typename Sum_ = double, typename Detected_ = int, typename Value_, typename Index_, typename Subset_>
165 const std::vector<Subset_>& subsets,
166 const ComputeAdtQcMetricsOptions& options)
167{
168 const auto NC = mat.ncol();
171
173#ifdef SCRAN_QC_TEST_INIT
174 , SCRAN_QC_TEST_INIT
175#endif
176 );
177 x.sum = output.sum.data();
178
180#ifdef SCRAN_QC_TEST_INIT
181 , SCRAN_QC_TEST_INIT
182#endif
183 );
184 x.detected = output.detected.data();
185
186 const auto nsubsets = subsets.size();
187 sanisizer::resize(x.subset_sum, nsubsets);
188 sanisizer::resize(output.subset_sum, nsubsets);
189 for (I<decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
191#ifdef SCRAN_QC_TEST_INIT
192 , SCRAN_QC_TEST_INIT
193#endif
194 );
195 x.subset_sum[s] = output.subset_sum[s].data();
196 }
197
198 compute_adt_qc_metrics(mat, subsets, x, options);
199 return output;
200}
201
211
216 double detected_min_drop = 0.1;
217
223};
224
228namespace internal {
229
230template<typename Float_, class Host_, typename Sum_, typename Detected_, typename BlockSource_>
231void adt_populate(Host_& host, const std::size_t n, const ComputeAdtQcMetricsBuffers<Sum_, Detected_>& res, BlockSource_ block, const ComputeAdtQcFiltersOptions& options) {
232 constexpr bool unblocked = std::is_same<BlockSource_, bool>::value;
233 auto buffer = [&]{
234 if constexpr(unblocked) {
235 return sanisizer::create<std::vector<Float_> >(n);
236 } else {
237 return FindMedianMadWorkspace<Float_>(n, block);
238 }
239 }();
240
241 {
243 opts.num_mads = options.detected_num_mads;
244 opts.log = true;
245 opts.upper = false;
246 opts.min_diff = -std::log(1 - options.detected_min_drop);
247 host.get_detected() = [&]{
248 if constexpr(unblocked) {
249 return choose_filter_thresholds(n, res.detected, buffer.data(), opts).lower;
250 } else {
251 return internal::strip_threshold<true>(choose_filter_thresholds_blocked(n, res.detected, block, &buffer, opts));
252 }
253 }();
254 }
255
256 {
257 const auto nsubsets = res.subset_sum.size();
258 auto& host_subsets = host.get_subset_sum();
259 sanisizer::resize(host_subsets, nsubsets);
260
262 opts.num_mads = options.subset_sum_num_mads;
263 opts.log = true;
264 opts.lower = false;
265
266 for (I<decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
267 const auto sub = res.subset_sum[s];
268 host.get_subset_sum()[s] = [&]{
269 if constexpr(unblocked) {
270 return choose_filter_thresholds(n, sub, buffer.data(), opts).upper;
271 } else {
272 return internal::strip_threshold<false>(choose_filter_thresholds_blocked(n, sub, block, &buffer, opts));
273 }
274 }();
275 }
276 }
277}
278
279template<class Host_, typename Sum_, typename Detected_, typename BlockSource_, typename Output_>
280void adt_filter(const Host_& host, const std::size_t n, const ComputeAdtQcMetricsBuffers<Sum_, Detected_>& metrics, BlockSource_ block, Output_* const output) {
281 constexpr bool unblocked = std::is_same<BlockSource_, bool>::value;
282 std::fill_n(output, n, 1);
283
284 const auto& detected = host.get_detected();
285 for (I<decltype(n)> i = 0; i < n; ++i) {
286 auto thresh = [&]{
287 if constexpr(unblocked) {
288 return detected;
289 } else {
290 return detected[block[i]];
291 }
292 }();
293 output[i] = output[i] && (metrics.detected[i] >= thresh);
294 }
295
296 const auto nsubsets = metrics.subset_sum.size();
297 for (I<decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
298 const auto sub = metrics.subset_sum[s];
299 const auto& sthresh = host.get_subset_sum()[s];
300 for (I<decltype(n)> i = 0; i < n; ++i) {
301 auto thresh = [&]{
302 if constexpr(unblocked) {
303 return sthresh;
304 } else {
305 return sthresh[block[i]];
306 }
307 }();
308 output[i] = output[i] && (sub[i] <= thresh);
309 }
310 }
311}
312
313template<typename Sum_, typename Detected_>
314ComputeAdtQcMetricsBuffers<const Sum_, const Detected_> to_buffer(const ComputeAdtQcMetricsResults<Sum_, Detected_>& metrics) {
315 ComputeAdtQcMetricsBuffers<const Sum_, const Detected_> buffer;
316 buffer.sum = metrics.sum.data();
317 buffer.detected = metrics.detected.data();
318 buffer.subset_sum.reserve(metrics.subset_sum.size());
319 for (const auto& s : metrics.subset_sum) {
320 buffer.subset_sum.push_back(s.data());
321 }
322 return buffer;
323}
324
325}
336template<typename Float_ = double>
338public:
342 Float_ get_detected() const {
343 return my_detected;
344 }
345
350 const std::vector<Float_>& get_subset_sum() const {
351 return my_subset_sum;
352 }
353
357 Float_& get_detected() {
358 return my_detected;
359 }
360
365 std::vector<Float_>& get_subset_sum() {
366 return my_subset_sum;
367 }
368
369private:
370 Float_ my_detected = 0;
371 std::vector<Float_> my_subset_sum;
372
373public:
386 template<typename Sum_, typename Detected_, typename Output_>
387 void filter(const std::size_t num, const ComputeAdtQcMetricsBuffers<Sum_, Detected_>& metrics, Output_* const output) const {
388 internal::adt_filter(*this, num, metrics, false, output);
389 }
390
401 template<typename Sum_, typename Detected_, typename Output_>
402 void filter(const ComputeAdtQcMetricsResults<Sum_, Detected_>& metrics, Output_* const output) const {
403 return filter(metrics.detected.size(), internal::to_buffer(metrics), output);
404 }
405
416 template<typename Output_ = unsigned char, typename Sum_, typename Detected_>
417 std::vector<Output_> filter(const ComputeAdtQcMetricsResults<Sum_, Detected_>& metrics) const {
418 auto output = sanisizer::create<std::vector<Output_> >(metrics.detected.size()
419#ifdef SCRAN_QC_TEST_INIT
420 , SCRAN_QC_TEST_INIT
421#endif
422 );
423 filter(metrics, output.data());
424 return output;
425 }
426};
427
453template<typename Float_ = double, typename Sum_, typename Detected_>
456 internal::adt_populate<Float_>(output, num, metrics, false, options);
457 return output;
458}
459
470template<typename Float_ = double, typename Sum_, typename Detected_>
472 return compute_adt_qc_filters(metrics.detected.size(), internal::to_buffer(metrics), options);
473}
474
481template<typename Float_ = double>
483public:
488 const std::vector<Float_>& get_detected() const {
489 return my_detected;
490 }
491
497 const std::vector<std::vector<Float_> >& get_subset_sum() const {
498 return my_subset_sum;
499 }
500
505 std::vector<Float_>& get_detected() {
506 return my_detected;
507 }
508
514 std::vector<std::vector<Float_> >& get_subset_sum() {
515 return my_subset_sum;
516 }
517
518private:
519 std::vector<Float_> my_sum;
520 std::vector<Float_> my_detected;
521 std::vector<std::vector<Float_> > my_subset_sum;
522
523public:
538 template<typename Sum_, typename Detected_, typename Block_, typename Output_>
539 void filter(const std::size_t num, const ComputeAdtQcMetricsBuffers<Sum_, Detected_>& metrics, const Block_* block, Output_* const output) const {
540 internal::adt_filter(*this, num, metrics, block, output);
541 }
542
556 template<typename Sum_, typename Detected_, typename Block_, typename Output_>
557 void filter(const ComputeAdtQcMetricsResults<Sum_, Detected_>& metrics, const Block_* const block, Output_* const output) const {
558 return filter(metrics.detected.size(), internal::to_buffer(metrics), block, output);
559 }
560
574 template<typename Output_ = unsigned char, typename Sum_, typename Detected_, typename Block_>
575 std::vector<Output_> filter(const ComputeAdtQcMetricsResults<Sum_, Detected_>& metrics, const Block_* const block) const {
576 auto output = sanisizer::create<std::vector<Output_> >(metrics.detected.size()
577#ifdef SCRAN_QC_TEST_INIT
578 , SCRAN_QC_TEST_INIT
579#endif
580 );
581 filter(metrics, block, output.data());
582 return output;
583 }
584};
585
603template<typename Float_ = double, typename Sum_, typename Detected_, typename Block_>
605 const std::size_t num,
607 const Block_* const block,
608 const ComputeAdtQcFiltersOptions& options)
609{
611 internal::adt_populate<Float_>(output, num, metrics, block, options);
612 return output;
613}
614
627template<typename Float_ = double, typename Sum_, typename Detected_, typename Block_>
630 const Block_* const block,
631 const ComputeAdtQcFiltersOptions& options)
632{
633 return compute_adt_qc_filters_blocked(metrics.detected.size(), internal::to_buffer(metrics), block, options);
634}
635
636}
637
638#endif
Define QC filter thresholds using a MAD-based approach.
Filter on ADT-based QC metrics with blocking.
Definition adt_quality_control.hpp:482
void filter(const ComputeAdtQcMetricsResults< Sum_, Detected_ > &metrics, const Block_ *const block, Output_ *const output) const
Definition adt_quality_control.hpp:557
std::vector< std::vector< Float_ > > & get_subset_sum()
Definition adt_quality_control.hpp:514
std::vector< Float_ > & get_detected()
Definition adt_quality_control.hpp:505
void filter(const std::size_t num, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &metrics, const Block_ *block, Output_ *const output) const
Definition adt_quality_control.hpp:539
const std::vector< Float_ > & get_detected() const
Definition adt_quality_control.hpp:488
std::vector< Output_ > filter(const ComputeAdtQcMetricsResults< Sum_, Detected_ > &metrics, const Block_ *const block) const
Definition adt_quality_control.hpp:575
const std::vector< std::vector< Float_ > > & get_subset_sum() const
Definition adt_quality_control.hpp:497
Filter for high-quality cells using ADT-based metrics.
Definition adt_quality_control.hpp:337
Float_ & get_detected()
Definition adt_quality_control.hpp:357
void filter(const std::size_t num, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &metrics, Output_ *const output) const
Definition adt_quality_control.hpp:387
std::vector< Output_ > filter(const ComputeAdtQcMetricsResults< Sum_, Detected_ > &metrics) const
Definition adt_quality_control.hpp:417
Float_ get_detected() const
Definition adt_quality_control.hpp:342
const std::vector< Float_ > & get_subset_sum() const
Definition adt_quality_control.hpp:350
std::vector< Float_ > & get_subset_sum()
Definition adt_quality_control.hpp:365
void filter(const ComputeAdtQcMetricsResults< Sum_, Detected_ > &metrics, Output_ *const output) const
Definition adt_quality_control.hpp:402
Temporary data structures for find_median_mad_blocked().
Definition find_median_mad.hpp:175
virtual Index_ ncol() const=0
Compute the median and MAD from an array of values.
Simple quality control for single-cell data.
Definition adt_quality_control.hpp:23
std::vector< ChooseFilterThresholdsResults< Float_ > > choose_filter_thresholds_blocked(const std::vector< FindMedianMadResults< Float_ > > &mms, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:225
AdtQcFilters< Float_ > compute_adt_qc_filters(const std::size_t num, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &metrics, const ComputeAdtQcFiltersOptions &options)
Definition adt_quality_control.hpp:454
AdtQcBlockedFilters< Float_ > compute_adt_qc_filters_blocked(const std::size_t num, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &metrics, const Block_ *const block, const ComputeAdtQcFiltersOptions &options)
Definition adt_quality_control.hpp:604
void compute_adt_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &output, const ComputeAdtQcMetricsOptions &options)
Definition adt_quality_control.hpp:101
void per_cell_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const PerCellQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &output, const PerCellQcMetricsOptions &options)
Definition per_cell_qc_metrics.hpp:834
ChooseFilterThresholdsResults< Float_ > choose_filter_thresholds(const FindMedianMadResults< Float_ > &mm, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:143
void resize_container_to_Index_size(Container_ &container, const Index_ x, Args_ &&... args)
Compute per-cell quality control metrics.
Options for choose_filter_thresholds().
Definition choose_filter_thresholds.hpp:21
bool upper
Definition choose_filter_thresholds.hpp:32
bool log
Definition choose_filter_thresholds.hpp:66
double num_mads
Definition choose_filter_thresholds.hpp:39
double min_diff
Definition choose_filter_thresholds.hpp:46
bool lower
Definition choose_filter_thresholds.hpp:26
Options for compute_adt_qc_filters().
Definition adt_quality_control.hpp:205
double detected_min_drop
Definition adt_quality_control.hpp:216
double detected_num_mads
Definition adt_quality_control.hpp:210
double subset_sum_num_mads
Definition adt_quality_control.hpp:222
Buffers for compute_adt_qc_metrics().
Definition adt_quality_control.hpp:46
Detected_ * detected
Definition adt_quality_control.hpp:57
std::vector< Sum_ * > subset_sum
Definition adt_quality_control.hpp:64
Sum_ * sum
Definition adt_quality_control.hpp:51
Options for compute_adt_qc_metrics().
Definition adt_quality_control.hpp:28
int num_threads
Definition adt_quality_control.hpp:33
Results of compute_adt_qc_metrics().
Definition adt_quality_control.hpp:125
std::vector< Detected_ > detected
Definition adt_quality_control.hpp:134
std::vector< Sum_ > sum
Definition adt_quality_control.hpp:129
std::vector< std::vector< Sum_ > > subset_sum
Definition adt_quality_control.hpp:140
Buffers for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:80
Sum_ * sum
Definition per_cell_qc_metrics.hpp:98
Detected_ * detected
Definition per_cell_qc_metrics.hpp:104
std::vector< Sum_ * > subset_sum
Definition per_cell_qc_metrics.hpp:124
Options for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:25
int num_threads
Definition per_cell_qc_metrics.hpp:66