scran_qc
Simple quality control on single-cell data
Loading...
Searching...
No Matches
rna_quality_control.hpp
Go to the documentation of this file.
1#ifndef SCRAN_QC_RNA_QUALITY_CONTROL_HPP
2#define SCRAN_QC_RNA_QUALITY_CONTROL_HPP
3
4#include <vector>
5#include <limits>
6#include <algorithm>
7#include <type_traits>
8
9#include "tatami/tatami.hpp"
10
11#include "find_median_mad.hpp"
14
20namespace scran_qc {
21
32
41template<typename Sum_ = double, typename Detected_ = int, typename Proportion_ = double>
48
54
60 std::vector<Proportion_*> subset_proportion;
61};
62
91template<typename Value_, typename Index_, typename Subset_, typename Sum_, typename Detected_, typename Proportion_>
94 const std::vector<Subset_>& subsets,
97{
98 auto NC = mat.ncol();
99 size_t nsubsets = subsets.size();
100
102 tmp.sum = output.sum;
103 tmp.detected = output.detected;
104
105 constexpr bool same_type = std::is_same<Sum_, Proportion_>::value;
106 typename std::conditional<same_type, bool, std::vector<std::vector<Sum_> > >::type placeholder_subset;
107
108 if (output.subset_proportion.size()) {
109 // Providing space for the subset sums if they're not the same type.
110 if constexpr(same_type) {
111 tmp.subset_sum = output.subset_proportion;
112 } else {
114 tmp.subset_sum.resize(nsubsets);
115 for (size_t s = 0; s < nsubsets; ++s) {
116 auto& b = placeholder_subset[s];
117 b.resize(NC);
118 tmp.subset_sum[s] = b.data();
119 }
120 }
121 }
122
124 opt.num_threads = options.num_threads;
126
127 for (size_t s = 0 ; s < nsubsets; ++s) {
128 auto dest = output.subset_proportion[s];
129 if (dest) {
130 auto src = tmp.subset_sum[s];
131 for (Index_ c = 0; c < NC; ++c) {
132 dest[c] = static_cast<Proportion_>(src[c]) / static_cast<Proportion_>(tmp.sum[c]);
133 }
134 }
135 }
136}
137
144template<typename Sum_ = double, typename Detected_ = int, typename Proportion_ = double>
149 std::vector<Sum_> sum;
150
154 std::vector<Detected_> detected;
155
160 std::vector<std::vector<Proportion_> > subset_proportion;
161};
162
182template<typename Sum_ = double, typename Detected_ = int, typename Proportion_ = double, typename Value_ = double, typename Index_ = int, typename Subset_ = const uint8_t*>
184 auto NC = mat.ncol();
187
188 output.sum.resize(NC
191#endif
192 );
193 x.sum = output.sum.data();
194
195 output.detected.resize(NC
198#endif
199 );
200 x.detected = output.detected.data();
201
202 size_t nsubsets = subsets.size();
203 x.subset_proportion.resize(nsubsets);
204 output.subset_proportion.resize(nsubsets);
205 for (size_t s = 0; s < nsubsets; ++s) {
206 output.subset_proportion[s].resize(NC
209#endif
210 );
211 x.subset_proportion[s] = output.subset_proportion[s].data();
212 }
213
215 return output;
216}
217
240
244namespace internal {
245
246template<typename Float_, class Host_, typename Sum_, typename Detected_, typename Proportion_, typename BlockSource_>
248 constexpr bool unblocked = std::is_same<BlockSource_, bool>::value;
249 auto buffer = [&]() {
250 if constexpr(unblocked) {
251 return std::vector<Float_>(n);
252 } else {
254 }
255 }();
256
257 {
259 opts.num_mads = options.sum_num_mads;
260 opts.log = true;
261 opts.upper = false;
262 host.get_sum() = [&]() {
263 if constexpr(unblocked) {
264 return choose_filter_thresholds(n, res.sum, buffer.data(), opts).lower;
265 } else {
266 return internal::strip_threshold<true>(choose_filter_thresholds_blocked(n, res.sum, block, &buffer, opts));
267 }
268 }();
269 }
270
271 {
273 opts.num_mads = options.detected_num_mads;
274 opts.log = true;
275 opts.upper = false;
276 host.get_detected() = [&]() {
277 if constexpr(unblocked) {
278 return choose_filter_thresholds(n, res.detected, buffer.data(), opts).lower;
279 } else {
280 return internal::strip_threshold<true>(choose_filter_thresholds_blocked(n, res.detected, block, &buffer, opts));
281 }
282 }();
283 }
284
285 {
287 opts.num_mads = options.subset_proportion_num_mads;
288 opts.lower = false;
289
290 size_t nsubsets = res.subset_proportion.size();
291 host.get_subset_proportion().resize(nsubsets);
292 for (size_t s = 0; s < nsubsets; ++s) {
293 auto sub = res.subset_proportion[s];
294 host.get_subset_proportion()[s] = [&]() {
295 if constexpr(unblocked) {
296 return choose_filter_thresholds(n, sub, buffer.data(), opts).upper;
297 } else {
298 return internal::strip_threshold<false>(choose_filter_thresholds_blocked(n, sub, block, &buffer, opts));
299 }
300 }();
301 }
302 }
303}
304
305template<class Host_, typename Sum_, typename Detected_, typename Proportion_, typename BlockSource_, typename Output_>
307 constexpr bool unblocked = std::is_same<BlockSource_, bool>::value;
308 std::fill_n(output, n, 1);
309
310 const auto& sum = host.get_sum();
311 for (size_t i = 0; i < n; ++i) {
312 auto thresh = [&]() {
313 if constexpr(unblocked) {
314 return sum;
315 } else {
316 return sum[block[i]];
317 }
318 }();
319 output[i] = output[i] && (metrics.sum[i] >= thresh);
320 }
321
322 const auto& detected = host.get_detected();
323 for (size_t i = 0; i < n; ++i) {
324 auto thresh = [&]() {
325 if constexpr(unblocked) {
326 return detected;
327 } else {
328 return detected[block[i]];
329 }
330 }();
331 output[i] = output[i] && (metrics.detected[i] >= thresh);
332 }
333
334 size_t nsubsets = metrics.subset_proportion.size();
335 for (size_t s = 0; s < nsubsets; ++s) {
336 auto sub = metrics.subset_proportion[s];
337 const auto& sthresh = host.get_subset_proportion()[s];
338 for (size_t i = 0; i < n; ++i) {
339 auto thresh = [&]() {
340 if constexpr(unblocked) {
341 return sthresh;
342 } else {
343 return sthresh[block[i]];
344 }
345 }();
346 output[i] = output[i] && (sub[i] <= thresh);
347 }
348 }
349}
350
351template<typename Sum_, typename Detected_, typename Proportion_>
354 buffer.sum = metrics.sum.data();
355 buffer.detected = metrics.detected.data();
356 buffer.subset_proportion.reserve(metrics.subset_proportion.size());
357 for (const auto& s : metrics.subset_proportion) {
358 buffer.subset_proportion.push_back(s.data());
359 }
360 return buffer;
361}
362
363}
372template<typename Float_ = double>
374public:
378 Float_ get_sum() const {
379 return my_sum;
380 }
381
386 return my_detected;
387 }
388
393 const std::vector<Float_>& get_subset_proportion() const {
394 return my_subset_proportion;
395 }
396
401 return my_sum;
402 }
403
408 return my_detected;
409 }
410
415 std::vector<Float_>& get_subset_proportion() {
416 return my_subset_proportion;
417 }
418
419private:
420 Float_ my_sum = 0;
421 Float_ my_detected = 0;
422 std::vector<Float_> my_subset_proportion;
423
424public:
436 template<typename Sum_, typename Detected_, typename Proportion_, typename Output_>
438 internal::rna_filter(*this, num, metrics, false, output);
439 }
440
451 template<typename Sum_, typename Detected_, typename Proportion_, typename Output_>
453 return filter(metrics.sum.size(), internal::to_buffer(metrics), output);
454 }
455
465 template<typename Output_ = uint8_t, typename Sum_ = double, typename Detected_ = int, typename Proportion_ = double>
467 std::vector<Output_> output(metrics.sum.size()
470#endif
471 );
472 filter(metrics, output.data());
473 return output;
474 }
475};
476
494template<typename Float_ = double, typename Sum_ = double, typename Detected_ = int, typename Proportion_ = double>
500
516template<typename Float_ = double, typename Sum_ = double, typename Detected_ = int, typename Proportion_ = double>
520
525template<typename Float_ = double>
527public:
532 const std::vector<Float_>& get_sum() const {
533 return my_sum;
534 }
535
540 const std::vector<Float_>& get_detected() const {
541 return my_detected;
542 }
543
549 const std::vector<std::vector<Float_> >& get_subset_proportion() const {
550 return my_subset_proportion;
551 }
552
557 std::vector<Float_>& get_sum() {
558 return my_sum;
559 }
560
565 std::vector<Float_>& get_detected() {
566 return my_detected;
567 }
568
574 std::vector<std::vector<Float_> >& get_subset_proportion() {
575 return my_subset_proportion;
576 }
577
578private:
579 std::vector<Float_> my_sum;
580 std::vector<Float_> my_detected;
581 std::vector<std::vector<Float_> > my_subset_proportion;
582
583public:
599 template<typename Index_, typename Sum_, typename Detected_, typename Proportion_, typename Block_, typename Output_>
601 internal::rna_filter(*this, num, metrics, block, output);
602 }
603
618 template<typename Sum_, typename Detected_, typename Proportion_, typename Block_, typename Output_>
620 return filter(metrics.sum.size(), internal::to_buffer(metrics), block, output);
621 }
622
637 template<typename Output_ = uint8_t, typename Sum_ = double, typename Detected_ = int, typename Proportion_ = double, typename Block_ = int>
639 std::vector<Output_> output(metrics.sum.size()
642#endif
643 );
644 filter(metrics, block, output.data());
645 return output;
646 }
647};
648
663template<typename Float_ = double, typename Sum_, typename Detected_, typename Proportion_, typename Block_>
674
688template<typename Float_ = double, typename Sum_, typename Detected_, typename Proportion_, typename Block_>
696
697}
698
699#endif
Define QC filter thresholds using a MAD-based approach.
Temporary data structures for find_median_mad_blocked().
Definition find_median_mad.hpp:176
Filter for high-quality cells using RNA-based metrics with blocking.
Definition rna_quality_control.hpp:526
void filter(Index_ num, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &metrics, const Block_ *block, Output_ *output) const
Definition rna_quality_control.hpp:600
std::vector< Float_ > & get_detected()
Definition rna_quality_control.hpp:565
void filter(const ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > &metrics, const Block_ *block, Output_ *output) const
Definition rna_quality_control.hpp:619
std::vector< Output_ > filter(const ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > &metrics, const Block_ *block) const
Definition rna_quality_control.hpp:638
std::vector< std::vector< Float_ > > & get_subset_proportion()
Definition rna_quality_control.hpp:574
std::vector< Float_ > & get_sum()
Definition rna_quality_control.hpp:557
const std::vector< Float_ > & get_detected() const
Definition rna_quality_control.hpp:540
const std::vector< Float_ > & get_sum() const
Definition rna_quality_control.hpp:532
const std::vector< std::vector< Float_ > > & get_subset_proportion() const
Definition rna_quality_control.hpp:549
Filter for high-quality cells using RNA-based metrics.
Definition rna_quality_control.hpp:373
Float_ & get_detected()
Definition rna_quality_control.hpp:407
Float_ get_detected() const
Definition rna_quality_control.hpp:385
const std::vector< Float_ > & get_subset_proportion() const
Definition rna_quality_control.hpp:393
std::vector< Float_ > & get_subset_proportion()
Definition rna_quality_control.hpp:415
void filter(const ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > &metrics, Output_ *output) const
Definition rna_quality_control.hpp:452
Float_ get_sum() const
Definition rna_quality_control.hpp:378
void filter(size_t num, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &metrics, Output_ *output) const
Definition rna_quality_control.hpp:437
Float_ & get_sum()
Definition rna_quality_control.hpp:400
std::vector< Output_ > filter(const ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > &metrics) const
Definition rna_quality_control.hpp:466
Compute the median and MAD from an array of values.
Simple quality control for single-cell data.
Definition adt_quality_control.hpp:20
void compute_rna_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &output, const ComputeRnaQcMetricsOptions &options)
Definition rna_quality_control.hpp:92
std::vector< ChooseFilterThresholdsResults< Float_ > > choose_filter_thresholds_blocked(const std::vector< FindMedianMadResults< Float_ > > mms, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:217
RnaQcBlockedFilters< Float_ > compute_rna_qc_filters_blocked(size_t num, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &metrics, const Block_ *block, const ComputeRnaQcFiltersOptions &options)
Definition rna_quality_control.hpp:664
void per_cell_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const PerCellQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &output, const PerCellQcMetricsOptions &options)
Definition per_cell_qc_metrics.hpp:810
ChooseFilterThresholdsResults< Float_ > choose_filter_thresholds(const FindMedianMadResults< Float_ > &mm, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:133
RnaQcFilters< Float_ > compute_rna_qc_filters(size_t num, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &metrics, const ComputeRnaQcFiltersOptions &options)
Definition rna_quality_control.hpp:495
Compute per-cell quality control metrics.
Options for choose_filter_thresholds().
Definition choose_filter_thresholds.hpp:20
Options for compute_rna_qc_filters().
Definition rna_quality_control.hpp:221
double detected_num_mads
Definition rna_quality_control.hpp:226
double subset_proportion_num_mads
Definition rna_quality_control.hpp:238
double sum_num_mads
Definition rna_quality_control.hpp:232
Buffers for compute_rna_qc_metrics().
Definition rna_quality_control.hpp:42
Detected_ * detected
Definition rna_quality_control.hpp:53
Sum_ * sum
Definition rna_quality_control.hpp:47
std::vector< Proportion_ * > subset_proportion
Definition rna_quality_control.hpp:60
Options for compute_rna_qc_metrics().
Definition rna_quality_control.hpp:25
int num_threads
Definition rna_quality_control.hpp:30
Results of compute_rna_qc_metrics().
Definition rna_quality_control.hpp:145
std::vector< Sum_ > sum
Definition rna_quality_control.hpp:149
std::vector< Detected_ > detected
Definition rna_quality_control.hpp:154
std::vector< std::vector< Proportion_ > > subset_proportion
Definition rna_quality_control.hpp:160
Options for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:22