scran_qc
Simple quality control on single-cell data
Loading...
Searching...
No Matches
rna_quality_control.hpp
Go to the documentation of this file.
1#ifndef SCRAN_QC_RNA_QUALITY_CONTROL_HPP
2#define SCRAN_QC_RNA_QUALITY_CONTROL_HPP
3
4#include <vector>
5#include <limits>
6#include <algorithm>
7#include <type_traits>
8#include <cstddef>
9
10#include "tatami/tatami.hpp"
11
12#include "find_median_mad.hpp"
15#include "utils.hpp"
16
22namespace scran_qc {
23
34
45template<typename Sum_ = double, typename Detected_ = int, typename Proportion_ = double>
51 Sum_* sum = NULL;
52
57 Detected_* detected = NULL;
58
64 std::vector<Proportion_*> subset_proportion;
65};
66
97template<typename Value_, typename Index_, typename Subset_, typename Sum_, typename Detected_, typename Proportion_>
100 const std::vector<Subset_>& subsets,
102 const ComputeRnaQcMetricsOptions& options)
103{
104 const auto NC = mat.ncol();
105 const auto nsubsets = subsets.size();
106
108 tmp.sum = output.sum;
109 tmp.detected = output.detected;
110
111 constexpr bool same_type = std::is_same<Sum_, Proportion_>::value;
112 typename std::conditional<same_type, bool, std::vector<std::vector<Sum_> > >::type placeholder_subset;
113
114 if (output.subset_proportion.size()) {
115 // Providing space for the subset sums if they're not the same type.
116 if constexpr(same_type) {
117 tmp.subset_sum = output.subset_proportion;
118 } else {
119 sanisizer::resize(placeholder_subset, nsubsets);
120 sanisizer::resize(tmp.subset_sum, nsubsets);
121 for (I<decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
122 auto& b = placeholder_subset[s];
124 tmp.subset_sum[s] = b.data();
125 }
126 }
127 }
128
130 opt.num_threads = options.num_threads;
131 per_cell_qc_metrics(mat, subsets, tmp, opt);
132
133 for (I<decltype(nsubsets)> s = 0 ; s < nsubsets; ++s) {
134 const auto dest = output.subset_proportion[s];
135 if (dest) {
136 const auto src = tmp.subset_sum[s];
137 for (Index_ c = 0; c < NC; ++c) {
138 dest[c] = static_cast<Proportion_>(src[c]) / static_cast<Proportion_>(tmp.sum[c]);
139 }
140 }
141 }
142}
143
152template<typename Sum_ = double, typename Detected_ = int, typename Proportion_ = double>
157 std::vector<Sum_> sum;
158
162 std::vector<Detected_> detected;
163
168 std::vector<std::vector<Proportion_> > subset_proportion;
169};
170
192template<typename Sum_ = double, typename Detected_ = int, typename Proportion_ = double, typename Value_, typename Index_, typename Subset_>
194 const auto NC = mat.ncol();
197
199#ifdef SCRAN_QC_TEST_INIT
200 , SCRAN_QC_TEST_INIT
201#endif
202 );
203 buffers.sum = output.sum.data();
204
206#ifdef SCRAN_QC_TEST_INIT
207 , SCRAN_QC_TEST_INIT
208#endif
209 );
210 buffers.detected = output.detected.data();
211
212 const auto nsubsets = subsets.size();
213 sanisizer::resize(buffers.subset_proportion, nsubsets);
214 sanisizer::resize(output.subset_proportion, nsubsets);
215 for (I<decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
217#ifdef SCRAN_QC_TEST_INIT
218 , SCRAN_QC_TEST_INIT
219#endif
220 );
221 buffers.subset_proportion[s] = output.subset_proportion[s].data();
222 }
223
224 compute_rna_qc_metrics(mat, subsets, buffers, options);
225 return output;
226}
227
250
254namespace internal {
255
256template<typename Float_, class Host_, typename Sum_, typename Detected_, typename Proportion_, typename BlockSource_>
257void rna_populate(Host_& host, const std::size_t n, const ComputeRnaQcMetricsBuffers<Sum_, Detected_, Proportion_>& res, BlockSource_ block, const ComputeRnaQcFiltersOptions& options) {
258 constexpr bool unblocked = std::is_same<BlockSource_, bool>::value;
259 auto buffer = [&]{
260 if constexpr(unblocked) {
261 return sanisizer::create<std::vector<Float_> >(n);
262 } else {
263 return FindMedianMadWorkspace<Float_>(n, block);
264 }
265 }();
266
267 {
269 opts.num_mads = options.sum_num_mads;
270 opts.log = true;
271 opts.upper = false;
272 host.get_sum() = [&]{
273 if constexpr(unblocked) {
274 return choose_filter_thresholds(n, res.sum, buffer.data(), opts).lower;
275 } else {
276 return internal::strip_threshold<true>(choose_filter_thresholds_blocked(n, res.sum, block, &buffer, opts));
277 }
278 }();
279 }
280
281 {
283 opts.num_mads = options.detected_num_mads;
284 opts.log = true;
285 opts.upper = false;
286 host.get_detected() = [&]{
287 if constexpr(unblocked) {
288 return choose_filter_thresholds(n, res.detected, buffer.data(), opts).lower;
289 } else {
290 return internal::strip_threshold<true>(choose_filter_thresholds_blocked(n, res.detected, block, &buffer, opts));
291 }
292 }();
293 }
294
295 {
298 opts.lower = false;
299
300 const auto nsubsets = res.subset_proportion.size();
301 auto& subhost = host.get_subset_proportion();
302 sanisizer::resize(subhost, nsubsets);
303 for (I<decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
304 const auto sub = res.subset_proportion[s];
305 subhost[s] = [&]{
306 if constexpr(unblocked) {
307 return choose_filter_thresholds(n, sub, buffer.data(), opts).upper;
308 } else {
309 return internal::strip_threshold<false>(choose_filter_thresholds_blocked(n, sub, block, &buffer, opts));
310 }
311 }();
312 }
313 }
314}
315
316template<class Host_, typename Sum_, typename Detected_, typename Proportion_, typename BlockSource_, typename Output_>
317void rna_filter(const Host_& host, const std::size_t n, const ComputeRnaQcMetricsBuffers<Sum_, Detected_, Proportion_>& metrics, BlockSource_ block, Output_* const output) {
318 constexpr bool unblocked = std::is_same<BlockSource_, bool>::value;
319 std::fill_n(output, n, 1);
320
321 const auto& sum = host.get_sum();
322 for (I<decltype(n)> i = 0; i < n; ++i) {
323 auto thresh = [&]{
324 if constexpr(unblocked) {
325 return sum;
326 } else {
327 return sum[block[i]];
328 }
329 }();
330 output[i] = output[i] && (metrics.sum[i] >= thresh);
331 }
332
333 const auto& detected = host.get_detected();
334 for (I<decltype(n)> i = 0; i < n; ++i) {
335 auto thresh = [&]{
336 if constexpr(unblocked) {
337 return detected;
338 } else {
339 return detected[block[i]];
340 }
341 }();
342 output[i] = output[i] && (metrics.detected[i] >= thresh);
343 }
344
345 const auto nsubsets = metrics.subset_proportion.size();
346 for (I<decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
347 const auto sub = metrics.subset_proportion[s];
348 const auto& sthresh = host.get_subset_proportion()[s];
349 for (I<decltype(n)> i = 0; i < n; ++i) {
350 const auto thresh = [&]{
351 if constexpr(unblocked) {
352 return sthresh;
353 } else {
354 return sthresh[block[i]];
355 }
356 }();
357 output[i] = output[i] && (sub[i] <= thresh);
358 }
359 }
360}
361
362template<typename Sum_, typename Detected_, typename Proportion_>
363ComputeRnaQcMetricsBuffers<const Sum_, const Detected_, const Proportion_> to_buffer(const ComputeRnaQcMetricsResults<Sum_, Detected_, Proportion_>& metrics) {
364 ComputeRnaQcMetricsBuffers<const Sum_, const Detected_, const Proportion_> buffer;
365 buffer.sum = metrics.sum.data();
366 buffer.detected = metrics.detected.data();
367 buffer.subset_proportion.reserve(metrics.subset_proportion.size());
368 for (const auto& s : metrics.subset_proportion) {
369 buffer.subset_proportion.push_back(s.data());
370 }
371 return buffer;
372}
373
374}
383template<typename Float_ = double>
385public:
389 Float_ get_sum() const {
390 return my_sum;
391 }
392
396 Float_ get_detected() const {
397 return my_detected;
398 }
399
404 const std::vector<Float_>& get_subset_proportion() const {
405 return my_subset_proportion;
406 }
407
411 Float_& get_sum() {
412 return my_sum;
413 }
414
418 Float_& get_detected() {
419 return my_detected;
420 }
421
426 std::vector<Float_>& get_subset_proportion() {
427 return my_subset_proportion;
428 }
429
430private:
431 Float_ my_sum = 0;
432 Float_ my_detected = 0;
433 std::vector<Float_> my_subset_proportion;
434
435public:
447 template<typename Sum_, typename Detected_, typename Proportion_, typename Output_>
448 void filter(const std::size_t num, const ComputeRnaQcMetricsBuffers<Sum_, Detected_, Proportion_>& metrics, Output_* const output) const {
449 internal::rna_filter(*this, num, metrics, false, output);
450 }
451
462 template<typename Sum_, typename Detected_, typename Proportion_, typename Output_>
463 void filter(const ComputeRnaQcMetricsResults<Sum_, Detected_, Proportion_>& metrics, Output_* const output) const {
464 return filter(metrics.sum.size(), internal::to_buffer(metrics), output);
465 }
466
476 template<typename Output_ = unsigned char, typename Sum_, typename Detected_, typename Proportion_>
477 std::vector<Output_> filter(const ComputeRnaQcMetricsResults<Sum_, Detected_, Proportion_>& metrics) const {
478 auto output = sanisizer::create<std::vector<Output_> >(metrics.sum.size()
479#ifdef SCRAN_QC_TEST_INIT
480 , SCRAN_QC_TEST_INIT
481#endif
482 );
483 filter(metrics, output.data());
484 return output;
485 }
486};
487
506template<typename Float_ = double, typename Sum_, typename Detected_, typename Proportion_>
509 internal::rna_populate<Float_>(output, num, metrics, false, options);
510 return output;
511}
512
528template<typename Float_ = double, typename Sum_, typename Detected_, typename Proportion_>
530 return compute_rna_qc_filters(metrics.sum.size(), internal::to_buffer(metrics), options);
531}
532
537template<typename Float_ = double>
539public:
544 const std::vector<Float_>& get_sum() const {
545 return my_sum;
546 }
547
552 const std::vector<Float_>& get_detected() const {
553 return my_detected;
554 }
555
561 const std::vector<std::vector<Float_> >& get_subset_proportion() const {
562 return my_subset_proportion;
563 }
564
569 std::vector<Float_>& get_sum() {
570 return my_sum;
571 }
572
577 std::vector<Float_>& get_detected() {
578 return my_detected;
579 }
580
586 std::vector<std::vector<Float_> >& get_subset_proportion() {
587 return my_subset_proportion;
588 }
589
590private:
591 std::vector<Float_> my_sum;
592 std::vector<Float_> my_detected;
593 std::vector<std::vector<Float_> > my_subset_proportion;
594
595public:
611 template<typename Sum_, typename Detected_, typename Proportion_, typename Block_, typename Output_>
612 void filter(const std::size_t num, const ComputeRnaQcMetricsBuffers<Sum_, Detected_, Proportion_>& metrics, const Block_* const block, Output_* const output) const {
613 internal::rna_filter(*this, num, metrics, block, output);
614 }
615
630 template<typename Sum_, typename Detected_, typename Proportion_, typename Block_, typename Output_>
631 void filter(const ComputeRnaQcMetricsResults<Sum_, Detected_, Proportion_>& metrics, const Block_* const block, Output_* const output) const {
632 return filter(metrics.sum.size(), internal::to_buffer(metrics), block, output);
633 }
634
649 template<typename Output_ = unsigned char, typename Sum_, typename Detected_, typename Proportion_, typename Block_>
650 std::vector<Output_> filter(const ComputeRnaQcMetricsResults<Sum_, Detected_, Proportion_>& metrics, const Block_* const block) const {
651 auto output = sanisizer::create<std::vector<Output_> >(metrics.sum.size()
652#ifdef SCRAN_QC_TEST_INIT
653 , SCRAN_QC_TEST_INIT
654#endif
655 );
656 filter(metrics, block, output.data());
657 return output;
658 }
659};
660
675template<typename Float_ = double, typename Sum_, typename Detected_, typename Proportion_, typename Block_>
677 const std::size_t num,
679 const Block_* const block,
680 const ComputeRnaQcFiltersOptions& options)
681{
683 internal::rna_populate<Float_>(output, num, metrics, block, options);
684 return output;
685}
686
700template<typename Float_ = double, typename Sum_, typename Detected_, typename Proportion_, typename Block_>
703 const Block_* const block,
704 const ComputeRnaQcFiltersOptions& options)
705{
706 return compute_rna_qc_filters_blocked(metrics.sum.size(), internal::to_buffer(metrics), block, options);
707}
708
709}
710
711#endif
Define QC filter thresholds using a MAD-based approach.
Temporary data structures for find_median_mad_blocked().
Definition find_median_mad.hpp:175
Filter for high-quality cells using RNA-based metrics with blocking.
Definition rna_quality_control.hpp:538
void filter(const std::size_t num, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &metrics, const Block_ *const block, Output_ *const output) const
Definition rna_quality_control.hpp:612
std::vector< Output_ > filter(const ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > &metrics, const Block_ *const block) const
Definition rna_quality_control.hpp:650
std::vector< Float_ > & get_detected()
Definition rna_quality_control.hpp:577
std::vector< std::vector< Float_ > > & get_subset_proportion()
Definition rna_quality_control.hpp:586
std::vector< Float_ > & get_sum()
Definition rna_quality_control.hpp:569
void filter(const ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > &metrics, const Block_ *const block, Output_ *const output) const
Definition rna_quality_control.hpp:631
const std::vector< Float_ > & get_detected() const
Definition rna_quality_control.hpp:552
const std::vector< Float_ > & get_sum() const
Definition rna_quality_control.hpp:544
const std::vector< std::vector< Float_ > > & get_subset_proportion() const
Definition rna_quality_control.hpp:561
Filter for high-quality cells using RNA-based metrics.
Definition rna_quality_control.hpp:384
Float_ & get_detected()
Definition rna_quality_control.hpp:418
Float_ get_detected() const
Definition rna_quality_control.hpp:396
void filter(const std::size_t num, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &metrics, Output_ *const output) const
Definition rna_quality_control.hpp:448
void filter(const ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > &metrics, Output_ *const output) const
Definition rna_quality_control.hpp:463
const std::vector< Float_ > & get_subset_proportion() const
Definition rna_quality_control.hpp:404
std::vector< Float_ > & get_subset_proportion()
Definition rna_quality_control.hpp:426
Float_ get_sum() const
Definition rna_quality_control.hpp:389
Float_ & get_sum()
Definition rna_quality_control.hpp:411
std::vector< Output_ > filter(const ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > &metrics) const
Definition rna_quality_control.hpp:477
virtual Index_ ncol() const=0
Compute the median and MAD from an array of values.
Simple quality control for single-cell data.
Definition adt_quality_control.hpp:23
void compute_rna_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &output, const ComputeRnaQcMetricsOptions &options)
Definition rna_quality_control.hpp:98
RnaQcFilters< Float_ > compute_rna_qc_filters(const std::size_t num, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &metrics, const ComputeRnaQcFiltersOptions &options)
Definition rna_quality_control.hpp:507
RnaQcBlockedFilters< Float_ > compute_rna_qc_filters_blocked(const std::size_t num, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &metrics, const Block_ *const block, const ComputeRnaQcFiltersOptions &options)
Definition rna_quality_control.hpp:676
std::vector< ChooseFilterThresholdsResults< Float_ > > choose_filter_thresholds_blocked(const std::vector< FindMedianMadResults< Float_ > > &mms, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:225
void per_cell_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const PerCellQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &output, const PerCellQcMetricsOptions &options)
Definition per_cell_qc_metrics.hpp:834
ChooseFilterThresholdsResults< Float_ > choose_filter_thresholds(const FindMedianMadResults< Float_ > &mm, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:143
void resize_container_to_Index_size(Container_ &container, const Index_ x, Args_ &&... args)
Compute per-cell quality control metrics.
Options for choose_filter_thresholds().
Definition choose_filter_thresholds.hpp:21
bool upper
Definition choose_filter_thresholds.hpp:32
bool log
Definition choose_filter_thresholds.hpp:66
double num_mads
Definition choose_filter_thresholds.hpp:39
bool lower
Definition choose_filter_thresholds.hpp:26
Options for compute_rna_qc_filters().
Definition rna_quality_control.hpp:231
double detected_num_mads
Definition rna_quality_control.hpp:236
double subset_proportion_num_mads
Definition rna_quality_control.hpp:248
double sum_num_mads
Definition rna_quality_control.hpp:242
Buffers for compute_rna_qc_metrics().
Definition rna_quality_control.hpp:46
Detected_ * detected
Definition rna_quality_control.hpp:57
Sum_ * sum
Definition rna_quality_control.hpp:51
std::vector< Proportion_ * > subset_proportion
Definition rna_quality_control.hpp:64
Options for compute_rna_qc_metrics().
Definition rna_quality_control.hpp:27
int num_threads
Definition rna_quality_control.hpp:32
Results of compute_rna_qc_metrics().
Definition rna_quality_control.hpp:153
std::vector< Sum_ > sum
Definition rna_quality_control.hpp:157
std::vector< Detected_ > detected
Definition rna_quality_control.hpp:162
std::vector< std::vector< Proportion_ > > subset_proportion
Definition rna_quality_control.hpp:168
Buffers for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:80
Sum_ * sum
Definition per_cell_qc_metrics.hpp:98
Detected_ * detected
Definition per_cell_qc_metrics.hpp:104
std::vector< Sum_ * > subset_sum
Definition per_cell_qc_metrics.hpp:124
Options for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:25
int num_threads
Definition per_cell_qc_metrics.hpp:66