scran_qc
Simple quality control on single-cell data
Loading...
Searching...
No Matches
rna_quality_control.hpp
Go to the documentation of this file.
1#ifndef SCRAN_QC_RNA_QUALITY_CONTROL_HPP
2#define SCRAN_QC_RNA_QUALITY_CONTROL_HPP
3
4#include <vector>
5#include <limits>
6#include <algorithm>
7#include <type_traits>
8#include <cstddef>
9
10#include "tatami/tatami.hpp"
11
12#include "find_median_mad.hpp"
15#include "utils.hpp"
16
22namespace scran_qc {
23
34
43template<typename Sum_ = double, typename Detected_ = int, typename Proportion_ = double>
49 Sum_* sum = NULL;
50
55 Detected_* detected = NULL;
56
62 std::vector<Proportion_*> subset_proportion;
63};
64
93template<typename Value_, typename Index_, typename Subset_, typename Sum_, typename Detected_, typename Proportion_>
96 const std::vector<Subset_>& subsets,
98 const ComputeRnaQcMetricsOptions& options)
99{
100 const auto NC = mat.ncol();
101 const auto nsubsets = subsets.size();
102
104 tmp.sum = output.sum;
105 tmp.detected = output.detected;
106
107 constexpr bool same_type = std::is_same<Sum_, Proportion_>::value;
108 typename std::conditional<same_type, bool, std::vector<std::vector<Sum_> > >::type placeholder_subset;
109
110 if (output.subset_proportion.size()) {
111 // Providing space for the subset sums if they're not the same type.
112 if constexpr(same_type) {
113 tmp.subset_sum = output.subset_proportion;
114 } else {
115 sanisizer::resize(placeholder_subset, nsubsets);
116 sanisizer::resize(tmp.subset_sum, nsubsets);
117 for (decltype(I(nsubsets)) s = 0; s < nsubsets; ++s) {
118 auto& b = placeholder_subset[s];
120 tmp.subset_sum[s] = b.data();
121 }
122 }
123 }
124
126 opt.num_threads = options.num_threads;
127 per_cell_qc_metrics(mat, subsets, tmp, opt);
128
129 for (decltype(I(nsubsets)) s = 0 ; s < nsubsets; ++s) {
130 const auto dest = output.subset_proportion[s];
131 if (dest) {
132 const auto src = tmp.subset_sum[s];
133 for (Index_ c = 0; c < NC; ++c) {
134 dest[c] = static_cast<Proportion_>(src[c]) / static_cast<Proportion_>(tmp.sum[c]);
135 }
136 }
137 }
138}
139
146template<typename Sum_ = double, typename Detected_ = int, typename Proportion_ = double>
151 std::vector<Sum_> sum;
152
156 std::vector<Detected_> detected;
157
162 std::vector<std::vector<Proportion_> > subset_proportion;
163};
164
184template<typename Sum_ = double, typename Detected_ = int, typename Proportion_ = double, typename Value_, typename Index_, typename Subset_>
186 const auto NC = mat.ncol();
189
191#ifdef SCRAN_QC_TEST_INIT
192 , SCRAN_QC_TEST_INIT
193#endif
194 );
195 buffers.sum = output.sum.data();
196
198#ifdef SCRAN_QC_TEST_INIT
199 , SCRAN_QC_TEST_INIT
200#endif
201 );
202 buffers.detected = output.detected.data();
203
204 const auto nsubsets = subsets.size();
205 sanisizer::resize(buffers.subset_proportion, nsubsets);
206 sanisizer::resize(output.subset_proportion, nsubsets);
207 for (decltype(I(nsubsets)) s = 0; s < nsubsets; ++s) {
209#ifdef SCRAN_QC_TEST_INIT
210 , SCRAN_QC_TEST_INIT
211#endif
212 );
213 buffers.subset_proportion[s] = output.subset_proportion[s].data();
214 }
215
216 compute_rna_qc_metrics(mat, subsets, buffers, options);
217 return output;
218}
219
242
246namespace internal {
247
248template<typename Float_, class Host_, typename Sum_, typename Detected_, typename Proportion_, typename BlockSource_>
249void rna_populate(Host_& host, const std::size_t n, const ComputeRnaQcMetricsBuffers<Sum_, Detected_, Proportion_>& res, BlockSource_ block, const ComputeRnaQcFiltersOptions& options) {
250 constexpr bool unblocked = std::is_same<BlockSource_, bool>::value;
251 auto buffer = [&]{
252 if constexpr(unblocked) {
253 return sanisizer::create<std::vector<Float_> >(n);
254 } else {
255 return FindMedianMadWorkspace<Float_>(n, block);
256 }
257 }();
258
259 {
261 opts.num_mads = options.sum_num_mads;
262 opts.log = true;
263 opts.upper = false;
264 host.get_sum() = [&]{
265 if constexpr(unblocked) {
266 return choose_filter_thresholds(n, res.sum, buffer.data(), opts).lower;
267 } else {
268 return internal::strip_threshold<true>(choose_filter_thresholds_blocked(n, res.sum, block, &buffer, opts));
269 }
270 }();
271 }
272
273 {
275 opts.num_mads = options.detected_num_mads;
276 opts.log = true;
277 opts.upper = false;
278 host.get_detected() = [&]{
279 if constexpr(unblocked) {
280 return choose_filter_thresholds(n, res.detected, buffer.data(), opts).lower;
281 } else {
282 return internal::strip_threshold<true>(choose_filter_thresholds_blocked(n, res.detected, block, &buffer, opts));
283 }
284 }();
285 }
286
287 {
290 opts.lower = false;
291
292 const auto nsubsets = res.subset_proportion.size();
293 auto& subhost = host.get_subset_proportion();
294 sanisizer::resize(subhost, nsubsets);
295 for (decltype(I(nsubsets)) s = 0; s < nsubsets; ++s) {
296 const auto sub = res.subset_proportion[s];
297 subhost[s] = [&]{
298 if constexpr(unblocked) {
299 return choose_filter_thresholds(n, sub, buffer.data(), opts).upper;
300 } else {
301 return internal::strip_threshold<false>(choose_filter_thresholds_blocked(n, sub, block, &buffer, opts));
302 }
303 }();
304 }
305 }
306}
307
308template<class Host_, typename Sum_, typename Detected_, typename Proportion_, typename BlockSource_, typename Output_>
309void rna_filter(const Host_& host, const std::size_t n, const ComputeRnaQcMetricsBuffers<Sum_, Detected_, Proportion_>& metrics, BlockSource_ block, Output_* const output) {
310 constexpr bool unblocked = std::is_same<BlockSource_, bool>::value;
311 std::fill_n(output, n, 1);
312
313 const auto& sum = host.get_sum();
314 for (decltype(I(n)) i = 0; i < n; ++i) {
315 auto thresh = [&]{
316 if constexpr(unblocked) {
317 return sum;
318 } else {
319 return sum[block[i]];
320 }
321 }();
322 output[i] = output[i] && (metrics.sum[i] >= thresh);
323 }
324
325 const auto& detected = host.get_detected();
326 for (decltype(I(n)) i = 0; i < n; ++i) {
327 auto thresh = [&]{
328 if constexpr(unblocked) {
329 return detected;
330 } else {
331 return detected[block[i]];
332 }
333 }();
334 output[i] = output[i] && (metrics.detected[i] >= thresh);
335 }
336
337 const auto nsubsets = metrics.subset_proportion.size();
338 for (decltype(I(nsubsets)) s = 0; s < nsubsets; ++s) {
339 const auto sub = metrics.subset_proportion[s];
340 const auto& sthresh = host.get_subset_proportion()[s];
341 for (decltype(I(n)) i = 0; i < n; ++i) {
342 const auto thresh = [&]{
343 if constexpr(unblocked) {
344 return sthresh;
345 } else {
346 return sthresh[block[i]];
347 }
348 }();
349 output[i] = output[i] && (sub[i] <= thresh);
350 }
351 }
352}
353
354template<typename Sum_, typename Detected_, typename Proportion_>
355ComputeRnaQcMetricsBuffers<const Sum_, const Detected_, const Proportion_> to_buffer(const ComputeRnaQcMetricsResults<Sum_, Detected_, Proportion_>& metrics) {
356 ComputeRnaQcMetricsBuffers<const Sum_, const Detected_, const Proportion_> buffer;
357 buffer.sum = metrics.sum.data();
358 buffer.detected = metrics.detected.data();
359 buffer.subset_proportion.reserve(metrics.subset_proportion.size());
360 for (const auto& s : metrics.subset_proportion) {
361 buffer.subset_proportion.push_back(s.data());
362 }
363 return buffer;
364}
365
366}
375template<typename Float_ = double>
377public:
381 Float_ get_sum() const {
382 return my_sum;
383 }
384
388 Float_ get_detected() const {
389 return my_detected;
390 }
391
396 const std::vector<Float_>& get_subset_proportion() const {
397 return my_subset_proportion;
398 }
399
403 Float_& get_sum() {
404 return my_sum;
405 }
406
410 Float_& get_detected() {
411 return my_detected;
412 }
413
418 std::vector<Float_>& get_subset_proportion() {
419 return my_subset_proportion;
420 }
421
422private:
423 Float_ my_sum = 0;
424 Float_ my_detected = 0;
425 std::vector<Float_> my_subset_proportion;
426
427public:
439 template<typename Sum_, typename Detected_, typename Proportion_, typename Output_>
440 void filter(const std::size_t num, const ComputeRnaQcMetricsBuffers<Sum_, Detected_, Proportion_>& metrics, Output_* const output) const {
441 internal::rna_filter(*this, num, metrics, false, output);
442 }
443
454 template<typename Sum_, typename Detected_, typename Proportion_, typename Output_>
455 void filter(const ComputeRnaQcMetricsResults<Sum_, Detected_, Proportion_>& metrics, Output_* const output) const {
456 return filter(metrics.sum.size(), internal::to_buffer(metrics), output);
457 }
458
468 template<typename Output_ = unsigned char, typename Sum_, typename Detected_, typename Proportion_>
469 std::vector<Output_> filter(const ComputeRnaQcMetricsResults<Sum_, Detected_, Proportion_>& metrics) const {
470 auto output = sanisizer::create<std::vector<Output_> >(metrics.sum.size()
471#ifdef SCRAN_QC_TEST_INIT
472 , SCRAN_QC_TEST_INIT
473#endif
474 );
475 filter(metrics, output.data());
476 return output;
477 }
478};
479
497template<typename Float_ = double, typename Sum_, typename Detected_, typename Proportion_>
500 internal::rna_populate<Float_>(output, num, metrics, false, options);
501 return output;
502}
503
519template<typename Float_ = double, typename Sum_, typename Detected_, typename Proportion_>
521 return compute_rna_qc_filters(metrics.sum.size(), internal::to_buffer(metrics), options);
522}
523
528template<typename Float_ = double>
530public:
535 const std::vector<Float_>& get_sum() const {
536 return my_sum;
537 }
538
543 const std::vector<Float_>& get_detected() const {
544 return my_detected;
545 }
546
552 const std::vector<std::vector<Float_> >& get_subset_proportion() const {
553 return my_subset_proportion;
554 }
555
560 std::vector<Float_>& get_sum() {
561 return my_sum;
562 }
563
568 std::vector<Float_>& get_detected() {
569 return my_detected;
570 }
571
577 std::vector<std::vector<Float_> >& get_subset_proportion() {
578 return my_subset_proportion;
579 }
580
581private:
582 std::vector<Float_> my_sum;
583 std::vector<Float_> my_detected;
584 std::vector<std::vector<Float_> > my_subset_proportion;
585
586public:
602 template<typename Sum_, typename Detected_, typename Proportion_, typename Block_, typename Output_>
603 void filter(const std::size_t num, const ComputeRnaQcMetricsBuffers<Sum_, Detected_, Proportion_>& metrics, const Block_* const block, Output_* const output) const {
604 internal::rna_filter(*this, num, metrics, block, output);
605 }
606
621 template<typename Sum_, typename Detected_, typename Proportion_, typename Block_, typename Output_>
622 void filter(const ComputeRnaQcMetricsResults<Sum_, Detected_, Proportion_>& metrics, const Block_* const block, Output_* const output) const {
623 return filter(metrics.sum.size(), internal::to_buffer(metrics), block, output);
624 }
625
640 template<typename Output_ = unsigned char, typename Sum_, typename Detected_, typename Proportion_, typename Block_>
641 std::vector<Output_> filter(const ComputeRnaQcMetricsResults<Sum_, Detected_, Proportion_>& metrics, const Block_* const block) const {
642 auto output = sanisizer::create<std::vector<Output_> >(metrics.sum.size()
643#ifdef SCRAN_QC_TEST_INIT
644 , SCRAN_QC_TEST_INIT
645#endif
646 );
647 filter(metrics, block, output.data());
648 return output;
649 }
650};
651
666template<typename Float_ = double, typename Sum_, typename Detected_, typename Proportion_, typename Block_>
668 const std::size_t num,
670 const Block_* const block,
671 const ComputeRnaQcFiltersOptions& options)
672{
674 internal::rna_populate<Float_>(output, num, metrics, block, options);
675 return output;
676}
677
691template<typename Float_ = double, typename Sum_, typename Detected_, typename Proportion_, typename Block_>
694 const Block_* const block,
695 const ComputeRnaQcFiltersOptions& options)
696{
697 return compute_rna_qc_filters_blocked(metrics.sum.size(), internal::to_buffer(metrics), block, options);
698}
699
700}
701
702#endif
Define QC filter thresholds using a MAD-based approach.
Temporary data structures for find_median_mad_blocked().
Definition find_median_mad.hpp:175
Filter for high-quality cells using RNA-based metrics with blocking.
Definition rna_quality_control.hpp:529
void filter(const std::size_t num, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &metrics, const Block_ *const block, Output_ *const output) const
Definition rna_quality_control.hpp:603
std::vector< Output_ > filter(const ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > &metrics, const Block_ *const block) const
Definition rna_quality_control.hpp:641
std::vector< Float_ > & get_detected()
Definition rna_quality_control.hpp:568
std::vector< std::vector< Float_ > > & get_subset_proportion()
Definition rna_quality_control.hpp:577
std::vector< Float_ > & get_sum()
Definition rna_quality_control.hpp:560
void filter(const ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > &metrics, const Block_ *const block, Output_ *const output) const
Definition rna_quality_control.hpp:622
const std::vector< Float_ > & get_detected() const
Definition rna_quality_control.hpp:543
const std::vector< Float_ > & get_sum() const
Definition rna_quality_control.hpp:535
const std::vector< std::vector< Float_ > > & get_subset_proportion() const
Definition rna_quality_control.hpp:552
Filter for high-quality cells using RNA-based metrics.
Definition rna_quality_control.hpp:376
Float_ & get_detected()
Definition rna_quality_control.hpp:410
Float_ get_detected() const
Definition rna_quality_control.hpp:388
void filter(const std::size_t num, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &metrics, Output_ *const output) const
Definition rna_quality_control.hpp:440
void filter(const ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > &metrics, Output_ *const output) const
Definition rna_quality_control.hpp:455
const std::vector< Float_ > & get_subset_proportion() const
Definition rna_quality_control.hpp:396
std::vector< Float_ > & get_subset_proportion()
Definition rna_quality_control.hpp:418
Float_ get_sum() const
Definition rna_quality_control.hpp:381
Float_ & get_sum()
Definition rna_quality_control.hpp:403
std::vector< Output_ > filter(const ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > &metrics) const
Definition rna_quality_control.hpp:469
virtual Index_ ncol() const=0
Compute the median and MAD from an array of values.
Simple quality control for single-cell data.
Definition adt_quality_control.hpp:23
void compute_rna_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &output, const ComputeRnaQcMetricsOptions &options)
Definition rna_quality_control.hpp:94
RnaQcFilters< Float_ > compute_rna_qc_filters(const std::size_t num, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &metrics, const ComputeRnaQcFiltersOptions &options)
Definition rna_quality_control.hpp:498
RnaQcBlockedFilters< Float_ > compute_rna_qc_filters_blocked(const std::size_t num, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &metrics, const Block_ *const block, const ComputeRnaQcFiltersOptions &options)
Definition rna_quality_control.hpp:667
std::vector< ChooseFilterThresholdsResults< Float_ > > choose_filter_thresholds_blocked(const std::vector< FindMedianMadResults< Float_ > > &mms, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:218
void per_cell_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const PerCellQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &output, const PerCellQcMetricsOptions &options)
Definition per_cell_qc_metrics.hpp:828
ChooseFilterThresholdsResults< Float_ > choose_filter_thresholds(const FindMedianMadResults< Float_ > &mm, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:136
void resize_container_to_Index_size(Container_ &container, Index_ x, Args_ &&... args)
Compute per-cell quality control metrics.
Options for choose_filter_thresholds().
Definition choose_filter_thresholds.hpp:21
bool upper
Definition choose_filter_thresholds.hpp:32
bool log
Definition choose_filter_thresholds.hpp:59
double num_mads
Definition choose_filter_thresholds.hpp:39
bool lower
Definition choose_filter_thresholds.hpp:26
Options for compute_rna_qc_filters().
Definition rna_quality_control.hpp:223
double detected_num_mads
Definition rna_quality_control.hpp:228
double subset_proportion_num_mads
Definition rna_quality_control.hpp:240
double sum_num_mads
Definition rna_quality_control.hpp:234
Buffers for compute_rna_qc_metrics().
Definition rna_quality_control.hpp:44
Detected_ * detected
Definition rna_quality_control.hpp:55
Sum_ * sum
Definition rna_quality_control.hpp:49
std::vector< Proportion_ * > subset_proportion
Definition rna_quality_control.hpp:62
Options for compute_rna_qc_metrics().
Definition rna_quality_control.hpp:27
int num_threads
Definition rna_quality_control.hpp:32
Results of compute_rna_qc_metrics().
Definition rna_quality_control.hpp:147
std::vector< Sum_ > sum
Definition rna_quality_control.hpp:151
std::vector< Detected_ > detected
Definition rna_quality_control.hpp:156
std::vector< std::vector< Proportion_ > > subset_proportion
Definition rna_quality_control.hpp:162
Buffers for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:78
Sum_ * sum
Definition per_cell_qc_metrics.hpp:96
Detected_ * detected
Definition per_cell_qc_metrics.hpp:102
std::vector< Sum_ * > subset_sum
Definition per_cell_qc_metrics.hpp:122
Options for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:25
int num_threads
Definition per_cell_qc_metrics.hpp:66