scran_qc
Simple quality control on single-cell data
Loading...
Searching...
No Matches
adt_quality_control.hpp
Go to the documentation of this file.
1#ifndef SCRAN_QC_ADT_QUALITY_CONTROL_HPP
2#define SCRAN_QC_ADT_QUALITY_CONTROL_HPP
3
4#include <vector>
5#include <limits>
6#include <algorithm>
7#include <type_traits>
8#include <cstddef>
9
10#include "tatami/tatami.hpp"
11#include "sanisizer/sanisizer.hpp"
12
13#include "find_median_mad.hpp"
16#include "utils.hpp"
17
23namespace scran_qc {
24
35
43template<typename Sum_ = double, typename Detected_ = int>
49 Sum_* sum;
50
55 Detected_* detected;
56
62 std::vector<Sum_*> subset_sum;
63};
64
96template<typename Value_, typename Index_, typename Subset_, typename Sum_, typename Detected_>
99 const std::vector<Subset_>& subsets,
101 const ComputeAdtQcMetricsOptions& options)
102{
104 tmp.sum = output.sum;
105 tmp.detected = output.detected;
106 tmp.subset_sum = output.subset_sum;
107
109 opt.num_threads = options.num_threads;
110 per_cell_qc_metrics(mat, subsets, tmp, opt);
111}
112
118template<typename Sum_ = double, typename Detected_ = int>
123 std::vector<Sum_> sum;
124
128 std::vector<Detected_> detected;
129
134 std::vector<std::vector<Sum_> > subset_sum;
135};
136
154template<typename Sum_ = double, typename Detected_ = int, typename Value_, typename Index_, typename Subset_>
157 const std::vector<Subset_>& subsets,
158 const ComputeAdtQcMetricsOptions& options)
159{
160 const auto NC = mat.ncol();
163
165#ifdef SCRAN_QC_TEST_INIT
166 , SCRAN_QC_TEST_INIT
167#endif
168 );
169 x.sum = output.sum.data();
170
172#ifdef SCRAN_QC_TEST_INIT
173 , SCRAN_QC_TEST_INIT
174#endif
175 );
176 x.detected = output.detected.data();
177
178 const auto nsubsets = subsets.size();
179 sanisizer::resize(x.subset_sum, nsubsets);
180 sanisizer::resize(output.subset_sum, nsubsets);
181 for (decltype(I(nsubsets)) s = 0; s < nsubsets; ++s) {
183#ifdef SCRAN_QC_TEST_INIT
184 , SCRAN_QC_TEST_INIT
185#endif
186 );
187 x.subset_sum[s] = output.subset_sum[s].data();
188 }
189
190 compute_adt_qc_metrics(mat, subsets, x, options);
191 return output;
192}
193
203
208 double detected_min_drop = 0.1;
209
215};
216
220namespace internal {
221
222template<typename Float_, class Host_, typename Sum_, typename Detected_, typename BlockSource_>
223void adt_populate(Host_& host, const std::size_t n, const ComputeAdtQcMetricsBuffers<Sum_, Detected_>& res, BlockSource_ block, const ComputeAdtQcFiltersOptions& options) {
224 constexpr bool unblocked = std::is_same<BlockSource_, bool>::value;
225 auto buffer = [&]{
226 if constexpr(unblocked) {
227 return sanisizer::create<std::vector<Float_> >(n);
228 } else {
229 return FindMedianMadWorkspace<Float_>(n, block);
230 }
231 }();
232
233 {
235 opts.num_mads = options.detected_num_mads;
236 opts.log = true;
237 opts.upper = false;
238 opts.min_diff = -std::log(1 - options.detected_min_drop);
239 host.get_detected() = [&]{
240 if constexpr(unblocked) {
241 return choose_filter_thresholds(n, res.detected, buffer.data(), opts).lower;
242 } else {
243 return internal::strip_threshold<true>(choose_filter_thresholds_blocked(n, res.detected, block, &buffer, opts));
244 }
245 }();
246 }
247
248 {
249 const auto nsubsets = res.subset_sum.size();
250 auto& host_subsets = host.get_subset_sum();
251 sanisizer::resize(host_subsets, nsubsets);
252
254 opts.num_mads = options.subset_sum_num_mads;
255 opts.log = true;
256 opts.lower = false;
257
258 for (decltype(I(nsubsets)) s = 0; s < nsubsets; ++s) {
259 const auto sub = res.subset_sum[s];
260 host.get_subset_sum()[s] = [&]{
261 if constexpr(unblocked) {
262 return choose_filter_thresholds(n, sub, buffer.data(), opts).upper;
263 } else {
264 return internal::strip_threshold<false>(choose_filter_thresholds_blocked(n, sub, block, &buffer, opts));
265 }
266 }();
267 }
268 }
269}
270
271template<class Host_, typename Sum_, typename Detected_, typename BlockSource_, typename Output_>
272void adt_filter(const Host_& host, const std::size_t n, const ComputeAdtQcMetricsBuffers<Sum_, Detected_>& metrics, BlockSource_ block, Output_* const output) {
273 constexpr bool unblocked = std::is_same<BlockSource_, bool>::value;
274 std::fill_n(output, n, 1);
275
276 const auto& detected = host.get_detected();
277 for (decltype(I(n)) i = 0; i < n; ++i) {
278 auto thresh = [&]{
279 if constexpr(unblocked) {
280 return detected;
281 } else {
282 return detected[block[i]];
283 }
284 }();
285 output[i] = output[i] && (metrics.detected[i] >= thresh);
286 }
287
288 const auto nsubsets = metrics.subset_sum.size();
289 for (decltype(I(nsubsets)) s = 0; s < nsubsets; ++s) {
290 const auto sub = metrics.subset_sum[s];
291 const auto& sthresh = host.get_subset_sum()[s];
292 for (decltype(I(n)) i = 0; i < n; ++i) {
293 auto thresh = [&]{
294 if constexpr(unblocked) {
295 return sthresh;
296 } else {
297 return sthresh[block[i]];
298 }
299 }();
300 output[i] = output[i] && (sub[i] <= thresh);
301 }
302 }
303}
304
305template<typename Sum_, typename Detected_>
306ComputeAdtQcMetricsBuffers<const Sum_, const Detected_> to_buffer(const ComputeAdtQcMetricsResults<Sum_, Detected_>& metrics) {
307 ComputeAdtQcMetricsBuffers<const Sum_, const Detected_> buffer;
308 buffer.sum = metrics.sum.data();
309 buffer.detected = metrics.detected.data();
310 buffer.subset_sum.reserve(metrics.subset_sum.size());
311 for (const auto& s : metrics.subset_sum) {
312 buffer.subset_sum.push_back(s.data());
313 }
314 return buffer;
315}
316
317}
328template<typename Float_ = double>
330public:
334 Float_ get_detected() const {
335 return my_detected;
336 }
337
342 const std::vector<Float_>& get_subset_sum() const {
343 return my_subset_sum;
344 }
345
349 Float_& get_detected() {
350 return my_detected;
351 }
352
357 std::vector<Float_>& get_subset_sum() {
358 return my_subset_sum;
359 }
360
361private:
362 Float_ my_detected = 0;
363 std::vector<Float_> my_subset_sum;
364
365public:
378 template<typename Sum_, typename Detected_, typename Output_>
379 void filter(const std::size_t num, const ComputeAdtQcMetricsBuffers<Sum_, Detected_>& metrics, Output_* const output) const {
380 internal::adt_filter(*this, num, metrics, false, output);
381 }
382
393 template<typename Sum_, typename Detected_, typename Output_>
394 void filter(const ComputeAdtQcMetricsResults<Sum_, Detected_>& metrics, Output_* const output) const {
395 return filter(metrics.detected.size(), internal::to_buffer(metrics), output);
396 }
397
408 template<typename Output_ = unsigned char, typename Sum_, typename Detected_>
409 std::vector<Output_> filter(const ComputeAdtQcMetricsResults<Sum_, Detected_>& metrics) const {
410 auto output = sanisizer::create<std::vector<Output_> >(metrics.detected.size()
411#ifdef SCRAN_QC_TEST_INIT
412 , SCRAN_QC_TEST_INIT
413#endif
414 );
415 filter(metrics, output.data());
416 return output;
417 }
418};
419
445template<typename Float_ = double, typename Sum_, typename Detected_>
448 internal::adt_populate<Float_>(output, num, metrics, false, options);
449 return output;
450}
451
462template<typename Float_ = double, typename Sum_, typename Detected_>
464 return compute_adt_qc_filters(metrics.detected.size(), internal::to_buffer(metrics), options);
465}
466
473template<typename Float_ = double>
475public:
480 const std::vector<Float_>& get_detected() const {
481 return my_detected;
482 }
483
489 const std::vector<std::vector<Float_> >& get_subset_sum() const {
490 return my_subset_sum;
491 }
492
497 std::vector<Float_>& get_detected() {
498 return my_detected;
499 }
500
506 std::vector<std::vector<Float_> >& get_subset_sum() {
507 return my_subset_sum;
508 }
509
510private:
511 std::vector<Float_> my_sum;
512 std::vector<Float_> my_detected;
513 std::vector<std::vector<Float_> > my_subset_sum;
514
515public:
530 template<typename Sum_, typename Detected_, typename Block_, typename Output_>
531 void filter(const std::size_t num, const ComputeAdtQcMetricsBuffers<Sum_, Detected_>& metrics, const Block_* block, Output_* const output) const {
532 internal::adt_filter(*this, num, metrics, block, output);
533 }
534
548 template<typename Sum_, typename Detected_, typename Block_, typename Output_>
549 void filter(const ComputeAdtQcMetricsResults<Sum_, Detected_>& metrics, const Block_* const block, Output_* const output) const {
550 return filter(metrics.detected.size(), internal::to_buffer(metrics), block, output);
551 }
552
566 template<typename Output_ = unsigned char, typename Sum_, typename Detected_, typename Block_>
567 std::vector<Output_> filter(const ComputeAdtQcMetricsResults<Sum_, Detected_>& metrics, const Block_* const block) const {
568 auto output = sanisizer::create<std::vector<Output_> >(metrics.detected.size()
569#ifdef SCRAN_QC_TEST_INIT
570 , SCRAN_QC_TEST_INIT
571#endif
572 );
573 filter(metrics, block, output.data());
574 return output;
575 }
576};
577
595template<typename Float_ = double, typename Sum_, typename Detected_, typename Block_>
597 const std::size_t num,
599 const Block_* const block,
600 const ComputeAdtQcFiltersOptions& options)
601{
603 internal::adt_populate<Float_>(output, num, metrics, block, options);
604 return output;
605}
606
619template<typename Float_ = double, typename Sum_, typename Detected_, typename Block_>
622 const Block_* const block,
623 const ComputeAdtQcFiltersOptions& options)
624{
625 return compute_adt_qc_filters_blocked(metrics.detected.size(), internal::to_buffer(metrics), block, options);
626}
627
628}
629
630#endif
Define QC filter thresholds using a MAD-based approach.
Filter on ADT-based QC metrics with blocking.
Definition adt_quality_control.hpp:474
void filter(const ComputeAdtQcMetricsResults< Sum_, Detected_ > &metrics, const Block_ *const block, Output_ *const output) const
Definition adt_quality_control.hpp:549
std::vector< std::vector< Float_ > > & get_subset_sum()
Definition adt_quality_control.hpp:506
std::vector< Float_ > & get_detected()
Definition adt_quality_control.hpp:497
void filter(const std::size_t num, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &metrics, const Block_ *block, Output_ *const output) const
Definition adt_quality_control.hpp:531
const std::vector< Float_ > & get_detected() const
Definition adt_quality_control.hpp:480
std::vector< Output_ > filter(const ComputeAdtQcMetricsResults< Sum_, Detected_ > &metrics, const Block_ *const block) const
Definition adt_quality_control.hpp:567
const std::vector< std::vector< Float_ > > & get_subset_sum() const
Definition adt_quality_control.hpp:489
Filter for high-quality cells using ADT-based metrics.
Definition adt_quality_control.hpp:329
Float_ & get_detected()
Definition adt_quality_control.hpp:349
void filter(const std::size_t num, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &metrics, Output_ *const output) const
Definition adt_quality_control.hpp:379
std::vector< Output_ > filter(const ComputeAdtQcMetricsResults< Sum_, Detected_ > &metrics) const
Definition adt_quality_control.hpp:409
Float_ get_detected() const
Definition adt_quality_control.hpp:334
const std::vector< Float_ > & get_subset_sum() const
Definition adt_quality_control.hpp:342
std::vector< Float_ > & get_subset_sum()
Definition adt_quality_control.hpp:357
void filter(const ComputeAdtQcMetricsResults< Sum_, Detected_ > &metrics, Output_ *const output) const
Definition adt_quality_control.hpp:394
Temporary data structures for find_median_mad_blocked().
Definition find_median_mad.hpp:175
virtual Index_ ncol() const=0
Compute the median and MAD from an array of values.
Simple quality control for single-cell data.
Definition adt_quality_control.hpp:23
std::vector< ChooseFilterThresholdsResults< Float_ > > choose_filter_thresholds_blocked(const std::vector< FindMedianMadResults< Float_ > > &mms, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:225
AdtQcFilters< Float_ > compute_adt_qc_filters(const std::size_t num, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &metrics, const ComputeAdtQcFiltersOptions &options)
Definition adt_quality_control.hpp:446
AdtQcBlockedFilters< Float_ > compute_adt_qc_filters_blocked(const std::size_t num, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &metrics, const Block_ *const block, const ComputeAdtQcFiltersOptions &options)
Definition adt_quality_control.hpp:596
void compute_adt_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &output, const ComputeAdtQcMetricsOptions &options)
Definition adt_quality_control.hpp:97
void per_cell_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const PerCellQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &output, const PerCellQcMetricsOptions &options)
Definition per_cell_qc_metrics.hpp:828
ChooseFilterThresholdsResults< Float_ > choose_filter_thresholds(const FindMedianMadResults< Float_ > &mm, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:143
void resize_container_to_Index_size(Container_ &container, const Index_ x, Args_ &&... args)
Compute per-cell quality control metrics.
Options for choose_filter_thresholds().
Definition choose_filter_thresholds.hpp:21
bool upper
Definition choose_filter_thresholds.hpp:32
bool log
Definition choose_filter_thresholds.hpp:66
double num_mads
Definition choose_filter_thresholds.hpp:39
double min_diff
Definition choose_filter_thresholds.hpp:46
bool lower
Definition choose_filter_thresholds.hpp:26
Options for compute_adt_qc_filters().
Definition adt_quality_control.hpp:197
double detected_min_drop
Definition adt_quality_control.hpp:208
double detected_num_mads
Definition adt_quality_control.hpp:202
double subset_sum_num_mads
Definition adt_quality_control.hpp:214
Buffers for compute_adt_qc_metrics().
Definition adt_quality_control.hpp:44
Detected_ * detected
Definition adt_quality_control.hpp:55
std::vector< Sum_ * > subset_sum
Definition adt_quality_control.hpp:62
Sum_ * sum
Definition adt_quality_control.hpp:49
Options for compute_adt_qc_metrics().
Definition adt_quality_control.hpp:28
int num_threads
Definition adt_quality_control.hpp:33
Results of compute_adt_qc_metrics().
Definition adt_quality_control.hpp:119
std::vector< Detected_ > detected
Definition adt_quality_control.hpp:128
std::vector< Sum_ > sum
Definition adt_quality_control.hpp:123
std::vector< std::vector< Sum_ > > subset_sum
Definition adt_quality_control.hpp:134
Buffers for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:78
Sum_ * sum
Definition per_cell_qc_metrics.hpp:96
Detected_ * detected
Definition per_cell_qc_metrics.hpp:102
std::vector< Sum_ * > subset_sum
Definition per_cell_qc_metrics.hpp:122
Options for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:25
int num_threads
Definition per_cell_qc_metrics.hpp:66