scran_qc
Simple quality control on single-cell data
Loading...
Searching...
No Matches
adt_quality_control.hpp
Go to the documentation of this file.
1#ifndef SCRAN_QC_ADT_QUALITY_CONTROL_HPP
2#define SCRAN_QC_ADT_QUALITY_CONTROL_HPP
3
4#include <vector>
5#include <limits>
6#include <algorithm>
7#include <type_traits>
8#include <cstddef>
9
10#include "tatami/tatami.hpp"
11#include "sanisizer/sanisizer.hpp"
12
15#include "utils.hpp"
16
22namespace scran_qc {
23
40
50template<typename Sum_ = double, typename Detected_ = int>
57 Sum_* sum;
58
64 Detected_* detected;
65
72 std::vector<Sum_*> subset_sum;
73};
74
108template<typename Value_, typename Index_, typename Subset_, typename Sum_, typename Detected_>
111 const std::vector<Subset_>& subsets,
113 const ComputeAdtQcMetricsOptions& options)
114{
116 tmp.sum = output.sum;
117 tmp.detected = output.detected;
118 tmp.subset_sum = output.subset_sum;
119
121 opt.num_threads = options.num_threads;
123 per_cell_qc_metrics(mat, subsets, tmp, opt);
124}
125
133template<typename Sum_ = double, typename Detected_ = int>
139 std::vector<Sum_> sum;
140
145 std::vector<Detected_> detected;
146
152 std::vector<std::vector<Sum_> > subset_sum;
153};
154
174template<typename Sum_ = double, typename Detected_ = int, typename Value_, typename Index_, typename Subset_>
177 const std::vector<Subset_>& subsets,
178 const ComputeAdtQcMetricsOptions& options
179) {
180 const auto NC = mat.ncol();
183
185#ifdef SCRAN_QC_TEST_INIT
186 , SCRAN_QC_TEST_INIT
187#endif
188 );
189 x.sum = output.sum.data();
190
192#ifdef SCRAN_QC_TEST_INIT
193 , SCRAN_QC_TEST_INIT
194#endif
195 );
196 x.detected = output.detected.data();
197
198 const auto nsubsets = subsets.size();
199 sanisizer::resize(x.subset_sum, nsubsets);
200 sanisizer::resize(output.subset_sum, nsubsets);
201 for (I<decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
203#ifdef SCRAN_QC_TEST_INIT
204 , SCRAN_QC_TEST_INIT
205#endif
206 );
207 x.subset_sum[s] = output.subset_sum[s].data();
208 }
209
210 compute_adt_qc_metrics(mat, subsets, x, options);
211 return output;
212}
213
223
228 double detected_min_drop = 0.1;
229
235};
236
240template<typename Float_, class Host_, typename Sum_, typename Detected_, typename BlockSource_>
241void compute_adt_qc_filters_internal(
242 Host_& host,
243 const std::size_t num_cells,
245 BlockSource_ block,
246 const std::size_t num_blocks,
247 const ComputeAdtQcFiltersOptions& options
248) {
249 constexpr bool unblocked = std::is_same<BlockSource_, bool>::value;
250 auto buffer = [&]{
251 if constexpr(unblocked) {
252 return sanisizer::create<std::vector<Float_> >(num_cells);
253 } else {
254 return ChooseFilterThresholdsBlockedWorkspace<Float_>(num_cells, block, num_blocks);
255 }
256 }();
257
258 {
259 ChooseFilterThresholdsOptions opts;
260 opts.num_mads = options.detected_num_mads;
261 opts.log = true;
262 opts.upper = false;
263 opts.min_diff = -std::log(1 - options.detected_min_drop);
264 host.get_detected() = [&]{
265 if constexpr(unblocked) {
266 return choose_filter_thresholds(num_cells, res.detected, buffer.data(), opts).lower;
267 } else {
268 return extract_filter_thresholds<true>(choose_filter_thresholds_blocked(num_cells, res.detected, block, num_blocks, buffer, opts));
269 }
270 }();
271 }
272
273 {
274 const auto nsubsets = res.subset_sum.size();
275 auto& host_subsets = host.get_subset_sum();
276 sanisizer::resize(host_subsets, nsubsets);
277
278 ChooseFilterThresholdsOptions opts;
279 opts.num_mads = options.subset_sum_num_mads;
280 opts.log = true;
281 opts.lower = false;
282
283 for (I<decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
284 const auto sub = res.subset_sum[s];
285 host.get_subset_sum()[s] = [&]{
286 if constexpr(unblocked) {
287 return choose_filter_thresholds(num_cells, sub, buffer.data(), opts).upper;
288 } else {
289 return extract_filter_thresholds<false>(choose_filter_thresholds_blocked(num_cells, sub, block, num_blocks, buffer, opts));
290 }
291 }();
292 }
293 }
294}
295
296template<class Host_, typename Sum_, typename Detected_, typename BlockSource_, typename Output_>
297void apply_adt_qc_filters_internal(
298 const Host_& host,
299 const std::size_t num_cells,
300 const ComputeAdtQcMetricsBuffers<Sum_, Detected_>& metrics,
301 BlockSource_ block,
302 Output_* const output
303) {
304 constexpr bool unblocked = std::is_same<BlockSource_, bool>::value;
305 std::fill_n(output, num_cells, 1);
306
307 const auto& detected = host.get_detected();
308 for (I<decltype(num_cells)> i = 0; i < num_cells; ++i) {
309 auto thresh = [&]{
310 if constexpr(unblocked) {
311 return detected;
312 } else {
313 return detected[block[i]];
314 }
315 }();
316 output[i] = output[i] && (metrics.detected[i] >= thresh);
317 }
318
319 const auto nsubsets = metrics.subset_sum.size();
320 for (I<decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
321 const auto sub = metrics.subset_sum[s];
322 const auto& sthresh = host.get_subset_sum()[s];
323 for (I<decltype(num_cells)> i = 0; i < num_cells; ++i) {
324 auto thresh = [&]{
325 if constexpr(unblocked) {
326 return sthresh;
327 } else {
328 return sthresh[block[i]];
329 }
330 }();
331 output[i] = output[i] && (sub[i] <= thresh);
332 }
333 }
334}
335
336template<typename Sum_, typename Detected_>
337ComputeAdtQcMetricsBuffers<const Sum_, const Detected_> adt_qc_results_to_buffers(const ComputeAdtQcMetricsResults<Sum_, Detected_>& metrics) {
338 ComputeAdtQcMetricsBuffers<const Sum_, const Detected_> buffer;
339 buffer.sum = metrics.sum.data();
340 buffer.detected = metrics.detected.data();
341 buffer.subset_sum.reserve(metrics.subset_sum.size());
342 for (const auto& s : metrics.subset_sum) {
343 buffer.subset_sum.push_back(s.data());
344 }
345 return buffer;
346}
357template<typename Float_ = double>
359public:
363 Float_ get_detected() const {
364 return my_detected;
365 }
366
371 const std::vector<Float_>& get_subset_sum() const {
372 return my_subset_sum;
373 }
374
378 Float_& get_detected() {
379 return my_detected;
380 }
381
386 std::vector<Float_>& get_subset_sum() {
387 return my_subset_sum;
388 }
389
390private:
391 Float_ my_detected = 0;
392 std::vector<Float_> my_subset_sum;
393
394public:
407 template<typename Sum_, typename Detected_, typename Output_>
408 void filter(const std::size_t num_cells, const ComputeAdtQcMetricsBuffers<Sum_, Detected_>& metrics, Output_* const output) const {
409 apply_adt_qc_filters_internal(*this, num_cells, metrics, false, output);
410 }
411
422 template<typename Sum_, typename Detected_, typename Output_>
423 void filter(const ComputeAdtQcMetricsResults<Sum_, Detected_>& metrics, Output_* const output) const {
424 return filter(metrics.detected.size(), adt_qc_results_to_buffers(metrics), output);
425 }
426
437 template<typename Output_ = unsigned char, typename Sum_, typename Detected_>
438 std::vector<Output_> filter(const ComputeAdtQcMetricsResults<Sum_, Detected_>& metrics) const {
439 auto output = sanisizer::create<std::vector<Output_> >(metrics.detected.size()
440#ifdef SCRAN_QC_TEST_INIT
441 , SCRAN_QC_TEST_INIT
442#endif
443 );
444 filter(metrics, output.data());
445 return output;
446 }
447};
448
474template<typename Float_ = double, typename Sum_, typename Detected_>
476 const std::size_t num_cells,
478 const ComputeAdtQcFiltersOptions& options
479) {
481 compute_adt_qc_filters_internal<Float_>(output, num_cells, metrics, false, 0, options);
482 return output;
483}
484
495template<typename Float_ = double, typename Sum_, typename Detected_>
497 return compute_adt_qc_filters(metrics.detected.size(), adt_qc_results_to_buffers(metrics), options);
498}
499
506template<typename Float_ = double>
508public:
513 const std::vector<Float_>& get_detected() const {
514 return my_detected;
515 }
516
522 const std::vector<std::vector<Float_> >& get_subset_sum() const {
523 return my_subset_sum;
524 }
525
530 std::vector<Float_>& get_detected() {
531 return my_detected;
532 }
533
539 std::vector<std::vector<Float_> >& get_subset_sum() {
540 return my_subset_sum;
541 }
542
543private:
544 std::vector<Float_> my_sum;
545 std::vector<Float_> my_detected;
546 std::vector<std::vector<Float_> > my_subset_sum;
547
548public:
563 template<typename Sum_, typename Detected_, typename Block_, typename Output_>
564 void filter(const std::size_t num_cells, const ComputeAdtQcMetricsBuffers<Sum_, Detected_>& metrics, const Block_* block, Output_* const output) const {
565 apply_adt_qc_filters_internal(*this, num_cells, metrics, block, output);
566 }
567
581 template<typename Sum_, typename Detected_, typename Block_, typename Output_>
582 void filter(const ComputeAdtQcMetricsResults<Sum_, Detected_>& metrics, const Block_* const block, Output_* const output) const {
583 return filter(metrics.detected.size(), adt_qc_results_to_buffers(metrics), block, output);
584 }
585
599 template<typename Output_ = unsigned char, typename Sum_, typename Detected_, typename Block_>
600 std::vector<Output_> filter(const ComputeAdtQcMetricsResults<Sum_, Detected_>& metrics, const Block_* const block) const {
601 auto output = sanisizer::create<std::vector<Output_> >(metrics.detected.size()
602#ifdef SCRAN_QC_TEST_INIT
603 , SCRAN_QC_TEST_INIT
604#endif
605 );
606 filter(metrics, block, output.data());
607 return output;
608 }
609};
610
629template<typename Float_ = double, typename Sum_, typename Detected_, typename Block_>
631 const std::size_t num_cells,
633 const Block_* const block,
634 const std::size_t num_blocks,
635 const ComputeAdtQcFiltersOptions& options
636) {
638 compute_adt_qc_filters_internal<Float_>(output, num_cells, metrics, block, num_blocks, options);
639 return output;
640}
641
655template<typename Float_ = double, typename Sum_, typename Detected_, typename Block_>
658 const Block_* const block,
659 const std::size_t num_blocks,
660 const ComputeAdtQcFiltersOptions& options
661) {
662 return compute_adt_qc_filters_blocked(metrics.detected.size(), adt_qc_results_to_buffers(metrics), block, num_blocks, options);
663}
664
665}
666
667#endif
Define QC filter thresholds using a MAD-based approach.
Filter on ADT-based QC metrics with blocking.
Definition adt_quality_control.hpp:507
void filter(const ComputeAdtQcMetricsResults< Sum_, Detected_ > &metrics, const Block_ *const block, Output_ *const output) const
Definition adt_quality_control.hpp:582
std::vector< std::vector< Float_ > > & get_subset_sum()
Definition adt_quality_control.hpp:539
void filter(const std::size_t num_cells, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &metrics, const Block_ *block, Output_ *const output) const
Definition adt_quality_control.hpp:564
std::vector< Float_ > & get_detected()
Definition adt_quality_control.hpp:530
const std::vector< Float_ > & get_detected() const
Definition adt_quality_control.hpp:513
std::vector< Output_ > filter(const ComputeAdtQcMetricsResults< Sum_, Detected_ > &metrics, const Block_ *const block) const
Definition adt_quality_control.hpp:600
const std::vector< std::vector< Float_ > > & get_subset_sum() const
Definition adt_quality_control.hpp:522
Filter for high-quality cells using ADT-based metrics.
Definition adt_quality_control.hpp:358
Float_ & get_detected()
Definition adt_quality_control.hpp:378
std::vector< Output_ > filter(const ComputeAdtQcMetricsResults< Sum_, Detected_ > &metrics) const
Definition adt_quality_control.hpp:438
Float_ get_detected() const
Definition adt_quality_control.hpp:363
const std::vector< Float_ > & get_subset_sum() const
Definition adt_quality_control.hpp:371
std::vector< Float_ > & get_subset_sum()
Definition adt_quality_control.hpp:386
void filter(const ComputeAdtQcMetricsResults< Sum_, Detected_ > &metrics, Output_ *const output) const
Definition adt_quality_control.hpp:423
void filter(const std::size_t num_cells, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &metrics, Output_ *const output) const
Definition adt_quality_control.hpp:408
virtual Index_ ncol() const=0
Simple quality control for single-cell data.
Definition adt_quality_control.hpp:22
std::vector< ChooseFilterThresholdsResults< Float_ > > choose_filter_thresholds_blocked(const std::size_t num_cells, const Value_ *const metrics, const Block_ *const block, const std::size_t num_blocks, ChooseFilterThresholdsBlockedWorkspace< Float_ > &workspace, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:327
AdtQcFilters< Float_ > compute_adt_qc_filters(const std::size_t num_cells, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &metrics, const ComputeAdtQcFiltersOptions &options)
Definition adt_quality_control.hpp:475
AdtQcBlockedFilters< Float_ > compute_adt_qc_filters_blocked(const std::size_t num_cells, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &metrics, const Block_ *const block, const std::size_t num_blocks, const ComputeAdtQcFiltersOptions &options)
Definition adt_quality_control.hpp:630
ChooseFilterThresholdsResults< Float_ > choose_filter_thresholds(const std::size_t num_cells, const Value_ *const metrics, Float_ *const buffer, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:194
void compute_adt_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const ComputeAdtQcMetricsBuffers< Sum_, Detected_ > &output, const ComputeAdtQcMetricsOptions &options)
Definition adt_quality_control.hpp:109
void per_cell_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const PerCellQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &output, const PerCellQcMetricsOptions &options)
Definition per_cell_qc_metrics.hpp:1001
void resize_container_to_Index_size(Container_ &container, const Index_ x, Args_ &&... args)
Compute per-cell quality control metrics.
Workspace for choose_filter_thresholds_blocked().
Definition choose_filter_thresholds.hpp:220
Options for compute_adt_qc_filters().
Definition adt_quality_control.hpp:217
double detected_min_drop
Definition adt_quality_control.hpp:228
double detected_num_mads
Definition adt_quality_control.hpp:222
double subset_sum_num_mads
Definition adt_quality_control.hpp:234
Buffers for compute_adt_qc_metrics().
Definition adt_quality_control.hpp:51
Detected_ * detected
Definition adt_quality_control.hpp:64
std::vector< Sum_ * > subset_sum
Definition adt_quality_control.hpp:72
Sum_ * sum
Definition adt_quality_control.hpp:57
Options for compute_adt_qc_metrics().
Definition adt_quality_control.hpp:27
bool subset_containers_have_indices
Definition adt_quality_control.hpp:38
int num_threads
Definition adt_quality_control.hpp:32
Results of compute_adt_qc_metrics().
Definition adt_quality_control.hpp:134
std::vector< Detected_ > detected
Definition adt_quality_control.hpp:145
std::vector< Sum_ > sum
Definition adt_quality_control.hpp:139
std::vector< std::vector< Sum_ > > subset_sum
Definition adt_quality_control.hpp:152
Buffers for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:97
Sum_ * sum
Definition per_cell_qc_metrics.hpp:115
Detected_ * detected
Definition per_cell_qc_metrics.hpp:121
std::vector< Sum_ * > subset_sum
Definition per_cell_qc_metrics.hpp:142
Options for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:25
bool subset_containers_have_indices
Definition per_cell_qc_metrics.hpp:77
int num_threads
Definition per_cell_qc_metrics.hpp:83