scran_qc
Simple quality control on single-cell data
Loading...
Searching...
No Matches
rna_quality_control.hpp
Go to the documentation of this file.
1#ifndef SCRAN_QC_RNA_QUALITY_CONTROL_HPP
2#define SCRAN_QC_RNA_QUALITY_CONTROL_HPP
3
4#include <vector>
5#include <limits>
6#include <algorithm>
7#include <type_traits>
8#include <cstddef>
9
10#include "tatami/tatami.hpp"
11
12#include "find_median_mad.hpp"
15#include "utils.hpp"
16
22namespace scran_qc {
23
34
43template<typename Sum_ = double, typename Detected_ = int, typename Proportion_ = double>
49 Sum_* sum = NULL;
50
55 Detected_* detected = NULL;
56
62 std::vector<Proportion_*> subset_proportion;
63};
64
93template<typename Value_, typename Index_, typename Subset_, typename Sum_, typename Detected_, typename Proportion_>
96 const std::vector<Subset_>& subsets,
98 const ComputeRnaQcMetricsOptions& options)
99{
100 const auto NC = mat.ncol();
101 const auto nsubsets = subsets.size();
102
104 tmp.sum = output.sum;
105 tmp.detected = output.detected;
106
107 constexpr bool same_type = std::is_same<Sum_, Proportion_>::value;
108 typename std::conditional<same_type, bool, std::vector<std::vector<Sum_> > >::type placeholder_subset;
109
110 if (output.subset_proportion.size()) {
111 // Providing space for the subset sums if they're not the same type.
112 if constexpr(same_type) {
113 tmp.subset_sum = output.subset_proportion;
114 } else {
115 sanisizer::resize(placeholder_subset, nsubsets);
116 sanisizer::resize(tmp.subset_sum, nsubsets);
117 for (decltype(I(nsubsets)) s = 0; s < nsubsets; ++s) {
118 auto& b = placeholder_subset[s];
120 tmp.subset_sum[s] = b.data();
121 }
122 }
123 }
124
126 opt.num_threads = options.num_threads;
127 per_cell_qc_metrics(mat, subsets, tmp, opt);
128
129 for (decltype(I(nsubsets)) s = 0 ; s < nsubsets; ++s) {
130 const auto dest = output.subset_proportion[s];
131 if (dest) {
132 const auto src = tmp.subset_sum[s];
133 for (Index_ c = 0; c < NC; ++c) {
134 dest[c] = static_cast<Proportion_>(src[c]) / static_cast<Proportion_>(tmp.sum[c]);
135 }
136 }
137 }
138}
139
146template<typename Sum_ = double, typename Detected_ = int, typename Proportion_ = double>
151 std::vector<Sum_> sum;
152
156 std::vector<Detected_> detected;
157
162 std::vector<std::vector<Proportion_> > subset_proportion;
163};
164
184template<typename Sum_ = double, typename Detected_ = int, typename Proportion_ = double, typename Value_, typename Index_, typename Subset_>
186 const auto NC = mat.ncol();
189
191#ifdef SCRAN_QC_TEST_INIT
192 , SCRAN_QC_TEST_INIT
193#endif
194 );
195 buffers.sum = output.sum.data();
196
198#ifdef SCRAN_QC_TEST_INIT
199 , SCRAN_QC_TEST_INIT
200#endif
201 );
202 buffers.detected = output.detected.data();
203
204 const auto nsubsets = subsets.size();
205 sanisizer::resize(buffers.subset_proportion, nsubsets);
206 sanisizer::resize(output.subset_proportion, nsubsets);
207 for (decltype(I(nsubsets)) s = 0; s < nsubsets; ++s) {
209#ifdef SCRAN_QC_TEST_INIT
210 , SCRAN_QC_TEST_INIT
211#endif
212 );
213 buffers.subset_proportion[s] = output.subset_proportion[s].data();
214 }
215
216 compute_rna_qc_metrics(mat, subsets, buffers, options);
217 return output;
218}
219
242
246namespace internal {
247
248template<typename Float_, class Host_, typename Sum_, typename Detected_, typename Proportion_, typename BlockSource_>
249void rna_populate(Host_& host, const std::size_t n, const ComputeRnaQcMetricsBuffers<Sum_, Detected_, Proportion_>& res, BlockSource_ block, const ComputeRnaQcFiltersOptions& options) {
250 constexpr bool unblocked = std::is_same<BlockSource_, bool>::value;
251 auto buffer = [&]{
252 if constexpr(unblocked) {
253 return sanisizer::create<std::vector<Float_> >(n);
254 } else {
255 return FindMedianMadWorkspace<Float_>(n, block);
256 }
257 }();
258
259 {
261 opts.num_mads = options.sum_num_mads;
262 opts.log = true;
263 opts.upper = false;
264 host.get_sum() = [&]{
265 if constexpr(unblocked) {
266 return choose_filter_thresholds(n, res.sum, buffer.data(), opts).lower;
267 } else {
268 return internal::strip_threshold<true>(choose_filter_thresholds_blocked(n, res.sum, block, &buffer, opts));
269 }
270 }();
271 }
272
273 {
275 opts.num_mads = options.detected_num_mads;
276 opts.log = true;
277 opts.upper = false;
278 host.get_detected() = [&]{
279 if constexpr(unblocked) {
280 return choose_filter_thresholds(n, res.detected, buffer.data(), opts).lower;
281 } else {
282 return internal::strip_threshold<true>(choose_filter_thresholds_blocked(n, res.detected, block, &buffer, opts));
283 }
284 }();
285 }
286
287 {
290 opts.lower = false;
291
292 const auto nsubsets = res.subset_proportion.size();
293 auto& subhost = host.get_subset_proportion();
294 sanisizer::resize(subhost, nsubsets);
295 for (decltype(I(nsubsets)) s = 0; s < nsubsets; ++s) {
296 const auto sub = res.subset_proportion[s];
297 subhost[s] = [&]{
298 if constexpr(unblocked) {
299 return choose_filter_thresholds(n, sub, buffer.data(), opts).upper;
300 } else {
301 return internal::strip_threshold<false>(choose_filter_thresholds_blocked(n, sub, block, &buffer, opts));
302 }
303 }();
304 }
305 }
306}
307
308template<class Host_, typename Sum_, typename Detected_, typename Proportion_, typename BlockSource_, typename Output_>
309void rna_filter(const Host_& host, const std::size_t n, const ComputeRnaQcMetricsBuffers<Sum_, Detected_, Proportion_>& metrics, BlockSource_ block, Output_* const output) {
310 constexpr bool unblocked = std::is_same<BlockSource_, bool>::value;
311 std::fill_n(output, n, 1);
312
313 const auto& sum = host.get_sum();
314 for (decltype(I(n)) i = 0; i < n; ++i) {
315 auto thresh = [&]{
316 if constexpr(unblocked) {
317 return sum;
318 } else {
319 return sum[block[i]];
320 }
321 }();
322 output[i] = output[i] && (metrics.sum[i] >= thresh);
323 }
324
325 const auto& detected = host.get_detected();
326 for (decltype(I(n)) i = 0; i < n; ++i) {
327 auto thresh = [&]{
328 if constexpr(unblocked) {
329 return detected;
330 } else {
331 return detected[block[i]];
332 }
333 }();
334 output[i] = output[i] && (metrics.detected[i] >= thresh);
335 }
336
337 const auto nsubsets = metrics.subset_proportion.size();
338 for (decltype(I(nsubsets)) s = 0; s < nsubsets; ++s) {
339 const auto sub = metrics.subset_proportion[s];
340 const auto& sthresh = host.get_subset_proportion()[s];
341 for (decltype(I(n)) i = 0; i < n; ++i) {
342 const auto thresh = [&]{
343 if constexpr(unblocked) {
344 return sthresh;
345 } else {
346 return sthresh[block[i]];
347 }
348 }();
349 output[i] = output[i] && (sub[i] <= thresh);
350 }
351 }
352}
353
354template<typename Sum_, typename Detected_, typename Proportion_>
355ComputeRnaQcMetricsBuffers<const Sum_, const Detected_, const Proportion_> to_buffer(const ComputeRnaQcMetricsResults<Sum_, Detected_, Proportion_>& metrics) {
356 ComputeRnaQcMetricsBuffers<const Sum_, const Detected_, const Proportion_> buffer;
357 buffer.sum = metrics.sum.data();
358 buffer.detected = metrics.detected.data();
359 buffer.subset_proportion.reserve(metrics.subset_proportion.size());
360 for (const auto& s : metrics.subset_proportion) {
361 buffer.subset_proportion.push_back(s.data());
362 }
363 return buffer;
364}
365
366}
375template<typename Float_ = double>
377public:
381 Float_ get_sum() const {
382 return my_sum;
383 }
384
388 Float_ get_detected() const {
389 return my_detected;
390 }
391
396 const std::vector<Float_>& get_subset_proportion() const {
397 return my_subset_proportion;
398 }
399
403 Float_& get_sum() {
404 return my_sum;
405 }
406
410 Float_& get_detected() {
411 return my_detected;
412 }
413
418 std::vector<Float_>& get_subset_proportion() {
419 return my_subset_proportion;
420 }
421
422private:
423 Float_ my_sum = 0;
424 Float_ my_detected = 0;
425 std::vector<Float_> my_subset_proportion;
426
427public:
439 template<typename Sum_, typename Detected_, typename Proportion_, typename Output_>
440 void filter(const std::size_t num, const ComputeRnaQcMetricsBuffers<Sum_, Detected_, Proportion_>& metrics, Output_* const output) const {
441 internal::rna_filter(*this, num, metrics, false, output);
442 }
443
454 template<typename Sum_, typename Detected_, typename Proportion_, typename Output_>
455 void filter(const ComputeRnaQcMetricsResults<Sum_, Detected_, Proportion_>& metrics, Output_* const output) const {
456 return filter(metrics.sum.size(), internal::to_buffer(metrics), output);
457 }
458
468 template<typename Output_ = unsigned char, typename Sum_, typename Detected_, typename Proportion_>
469 std::vector<Output_> filter(const ComputeRnaQcMetricsResults<Sum_, Detected_, Proportion_>& metrics) const {
470 auto output = sanisizer::create<std::vector<Output_> >(metrics.sum.size()
471#ifdef SCRAN_QC_TEST_INIT
472 , SCRAN_QC_TEST_INIT
473#endif
474 );
475 filter(metrics, output.data());
476 return output;
477 }
478};
479
498template<typename Float_ = double, typename Sum_, typename Detected_, typename Proportion_>
501 internal::rna_populate<Float_>(output, num, metrics, false, options);
502 return output;
503}
504
520template<typename Float_ = double, typename Sum_, typename Detected_, typename Proportion_>
522 return compute_rna_qc_filters(metrics.sum.size(), internal::to_buffer(metrics), options);
523}
524
529template<typename Float_ = double>
531public:
536 const std::vector<Float_>& get_sum() const {
537 return my_sum;
538 }
539
544 const std::vector<Float_>& get_detected() const {
545 return my_detected;
546 }
547
553 const std::vector<std::vector<Float_> >& get_subset_proportion() const {
554 return my_subset_proportion;
555 }
556
561 std::vector<Float_>& get_sum() {
562 return my_sum;
563 }
564
569 std::vector<Float_>& get_detected() {
570 return my_detected;
571 }
572
578 std::vector<std::vector<Float_> >& get_subset_proportion() {
579 return my_subset_proportion;
580 }
581
582private:
583 std::vector<Float_> my_sum;
584 std::vector<Float_> my_detected;
585 std::vector<std::vector<Float_> > my_subset_proportion;
586
587public:
603 template<typename Sum_, typename Detected_, typename Proportion_, typename Block_, typename Output_>
604 void filter(const std::size_t num, const ComputeRnaQcMetricsBuffers<Sum_, Detected_, Proportion_>& metrics, const Block_* const block, Output_* const output) const {
605 internal::rna_filter(*this, num, metrics, block, output);
606 }
607
622 template<typename Sum_, typename Detected_, typename Proportion_, typename Block_, typename Output_>
623 void filter(const ComputeRnaQcMetricsResults<Sum_, Detected_, Proportion_>& metrics, const Block_* const block, Output_* const output) const {
624 return filter(metrics.sum.size(), internal::to_buffer(metrics), block, output);
625 }
626
641 template<typename Output_ = unsigned char, typename Sum_, typename Detected_, typename Proportion_, typename Block_>
642 std::vector<Output_> filter(const ComputeRnaQcMetricsResults<Sum_, Detected_, Proportion_>& metrics, const Block_* const block) const {
643 auto output = sanisizer::create<std::vector<Output_> >(metrics.sum.size()
644#ifdef SCRAN_QC_TEST_INIT
645 , SCRAN_QC_TEST_INIT
646#endif
647 );
648 filter(metrics, block, output.data());
649 return output;
650 }
651};
652
667template<typename Float_ = double, typename Sum_, typename Detected_, typename Proportion_, typename Block_>
669 const std::size_t num,
671 const Block_* const block,
672 const ComputeRnaQcFiltersOptions& options)
673{
675 internal::rna_populate<Float_>(output, num, metrics, block, options);
676 return output;
677}
678
692template<typename Float_ = double, typename Sum_, typename Detected_, typename Proportion_, typename Block_>
695 const Block_* const block,
696 const ComputeRnaQcFiltersOptions& options)
697{
698 return compute_rna_qc_filters_blocked(metrics.sum.size(), internal::to_buffer(metrics), block, options);
699}
700
701}
702
703#endif
Define QC filter thresholds using a MAD-based approach.
Temporary data structures for find_median_mad_blocked().
Definition find_median_mad.hpp:175
Filter for high-quality cells using RNA-based metrics with blocking.
Definition rna_quality_control.hpp:530
void filter(const std::size_t num, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &metrics, const Block_ *const block, Output_ *const output) const
Definition rna_quality_control.hpp:604
std::vector< Output_ > filter(const ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > &metrics, const Block_ *const block) const
Definition rna_quality_control.hpp:642
std::vector< Float_ > & get_detected()
Definition rna_quality_control.hpp:569
std::vector< std::vector< Float_ > > & get_subset_proportion()
Definition rna_quality_control.hpp:578
std::vector< Float_ > & get_sum()
Definition rna_quality_control.hpp:561
void filter(const ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > &metrics, const Block_ *const block, Output_ *const output) const
Definition rna_quality_control.hpp:623
const std::vector< Float_ > & get_detected() const
Definition rna_quality_control.hpp:544
const std::vector< Float_ > & get_sum() const
Definition rna_quality_control.hpp:536
const std::vector< std::vector< Float_ > > & get_subset_proportion() const
Definition rna_quality_control.hpp:553
Filter for high-quality cells using RNA-based metrics.
Definition rna_quality_control.hpp:376
Float_ & get_detected()
Definition rna_quality_control.hpp:410
Float_ get_detected() const
Definition rna_quality_control.hpp:388
void filter(const std::size_t num, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &metrics, Output_ *const output) const
Definition rna_quality_control.hpp:440
void filter(const ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > &metrics, Output_ *const output) const
Definition rna_quality_control.hpp:455
const std::vector< Float_ > & get_subset_proportion() const
Definition rna_quality_control.hpp:396
std::vector< Float_ > & get_subset_proportion()
Definition rna_quality_control.hpp:418
Float_ get_sum() const
Definition rna_quality_control.hpp:381
Float_ & get_sum()
Definition rna_quality_control.hpp:403
std::vector< Output_ > filter(const ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > &metrics) const
Definition rna_quality_control.hpp:469
virtual Index_ ncol() const=0
Compute the median and MAD from an array of values.
Simple quality control for single-cell data.
Definition adt_quality_control.hpp:23
void compute_rna_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &output, const ComputeRnaQcMetricsOptions &options)
Definition rna_quality_control.hpp:94
RnaQcFilters< Float_ > compute_rna_qc_filters(const std::size_t num, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &metrics, const ComputeRnaQcFiltersOptions &options)
Definition rna_quality_control.hpp:499
RnaQcBlockedFilters< Float_ > compute_rna_qc_filters_blocked(const std::size_t num, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &metrics, const Block_ *const block, const ComputeRnaQcFiltersOptions &options)
Definition rna_quality_control.hpp:668
std::vector< ChooseFilterThresholdsResults< Float_ > > choose_filter_thresholds_blocked(const std::vector< FindMedianMadResults< Float_ > > &mms, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:225
void per_cell_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const PerCellQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &output, const PerCellQcMetricsOptions &options)
Definition per_cell_qc_metrics.hpp:828
ChooseFilterThresholdsResults< Float_ > choose_filter_thresholds(const FindMedianMadResults< Float_ > &mm, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:143
void resize_container_to_Index_size(Container_ &container, const Index_ x, Args_ &&... args)
Compute per-cell quality control metrics.
Options for choose_filter_thresholds().
Definition choose_filter_thresholds.hpp:21
bool upper
Definition choose_filter_thresholds.hpp:32
bool log
Definition choose_filter_thresholds.hpp:66
double num_mads
Definition choose_filter_thresholds.hpp:39
bool lower
Definition choose_filter_thresholds.hpp:26
Options for compute_rna_qc_filters().
Definition rna_quality_control.hpp:223
double detected_num_mads
Definition rna_quality_control.hpp:228
double subset_proportion_num_mads
Definition rna_quality_control.hpp:240
double sum_num_mads
Definition rna_quality_control.hpp:234
Buffers for compute_rna_qc_metrics().
Definition rna_quality_control.hpp:44
Detected_ * detected
Definition rna_quality_control.hpp:55
Sum_ * sum
Definition rna_quality_control.hpp:49
std::vector< Proportion_ * > subset_proportion
Definition rna_quality_control.hpp:62
Options for compute_rna_qc_metrics().
Definition rna_quality_control.hpp:27
int num_threads
Definition rna_quality_control.hpp:32
Results of compute_rna_qc_metrics().
Definition rna_quality_control.hpp:147
std::vector< Sum_ > sum
Definition rna_quality_control.hpp:151
std::vector< Detected_ > detected
Definition rna_quality_control.hpp:156
std::vector< std::vector< Proportion_ > > subset_proportion
Definition rna_quality_control.hpp:162
Buffers for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:78
Sum_ * sum
Definition per_cell_qc_metrics.hpp:96
Detected_ * detected
Definition per_cell_qc_metrics.hpp:102
std::vector< Sum_ * > subset_sum
Definition per_cell_qc_metrics.hpp:122
Options for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:25
int num_threads
Definition per_cell_qc_metrics.hpp:66