scran_qc
Simple quality control on single-cell data
Loading...
Searching...
No Matches
rna_quality_control.hpp
Go to the documentation of this file.
1#ifndef SCRAN_QC_RNA_QUALITY_CONTROL_HPP
2#define SCRAN_QC_RNA_QUALITY_CONTROL_HPP
3
4#include <vector>
5#include <limits>
6#include <algorithm>
7#include <type_traits>
8
9#include "tatami/tatami.hpp"
10
11#include "find_median_mad.hpp"
14
20namespace scran_qc {
21
32
41template<typename Sum_ = double, typename Detected_ = int, typename Proportion_ = double>
48
54
60 std::vector<Proportion_*> subset_proportion;
61};
62
91template<typename Value_, typename Index_, typename Subset_, typename Sum_, typename Detected_, typename Proportion_>
94 const std::vector<Subset_>& subsets,
97{
98 auto NC = mat.ncol();
99 size_t nsubsets = subsets.size();
100
102 tmp.sum = output.sum;
103 tmp.detected = output.detected;
104
105 constexpr bool same_type = std::is_same<Sum_, Proportion_>::value;
106 typename std::conditional<same_type, bool, std::vector<std::vector<Sum_> > >::type placeholder_subset;
107
108 if (output.subset_proportion.size()) {
109 // Providing space for the subset sums if they're not the same type.
110 if constexpr(same_type) {
111 tmp.subset_sum = output.subset_proportion;
112 } else {
114 tmp.subset_sum.resize(nsubsets);
115 for (size_t s = 0; s < nsubsets; ++s) {
116 auto& b = placeholder_subset[s];
117 b.resize(NC);
118 tmp.subset_sum[s] = b.data();
119 }
120 }
121 }
122
124 opt.num_threads = options.num_threads;
126
127 for (size_t s = 0 ; s < nsubsets; ++s) {
128 auto dest = output.subset_proportion[s];
129 if (dest) {
130 auto src = tmp.subset_sum[s];
131 for (Index_ c = 0; c < NC; ++c) {
132 dest[c] = static_cast<Proportion_>(src[c]) / static_cast<Proportion_>(tmp.sum[c]);
133 }
134 }
135 }
136}
137
144template<typename Sum_ = double, typename Detected_ = int, typename Proportion_ = double>
149 std::vector<Sum_> sum;
150
154 std::vector<Detected_> detected;
155
160 std::vector<std::vector<Proportion_> > subset_proportion;
161};
162
182template<typename Sum_ = double, typename Detected_ = int, typename Proportion_ = double, typename Value_ = double, typename Index_ = int, typename Subset_ = const uint8_t*>
184 auto NC = mat.ncol();
187
188 output.sum.resize(NC);
189 x.sum = output.sum.data();
190
191 output.detected.resize(NC);
192 x.detected = output.detected.data();
193
194 size_t nsubsets = subsets.size();
195 x.subset_proportion.resize(nsubsets);
196 output.subset_proportion.resize(nsubsets);
197 for (size_t s = 0; s < nsubsets; ++s) {
198 output.subset_proportion[s].resize(NC);
199 x.subset_proportion[s] = output.subset_proportion[s].data();
200 }
201
203 return output;
204}
205
228
232namespace internal {
233
234template<typename Float_, class Host_, typename Sum_, typename Detected_, typename Proportion_, typename BlockSource_>
236 constexpr bool unblocked = std::is_same<BlockSource_, bool>::value;
237 auto buffer = [&]() {
238 if constexpr(unblocked) {
239 return std::vector<Float_>(n);
240 } else {
242 }
243 }();
244
245 {
247 opts.num_mads = options.sum_num_mads;
248 opts.log = true;
249 opts.upper = false;
250 host.get_sum() = [&]() {
251 if constexpr(unblocked) {
252 return choose_filter_thresholds(n, res.sum, buffer.data(), opts).lower;
253 } else {
254 return internal::strip_threshold<true>(choose_filter_thresholds_blocked(n, res.sum, block, &buffer, opts));
255 }
256 }();
257 }
258
259 {
261 opts.num_mads = options.detected_num_mads;
262 opts.log = true;
263 opts.upper = false;
264 host.get_detected() = [&]() {
265 if constexpr(unblocked) {
266 return choose_filter_thresholds(n, res.detected, buffer.data(), opts).lower;
267 } else {
268 return internal::strip_threshold<true>(choose_filter_thresholds_blocked(n, res.detected, block, &buffer, opts));
269 }
270 }();
271 }
272
273 {
275 opts.num_mads = options.subset_proportion_num_mads;
276 opts.lower = false;
277
278 size_t nsubsets = res.subset_proportion.size();
279 host.get_subset_proportion().resize(nsubsets);
280 for (size_t s = 0; s < nsubsets; ++s) {
281 auto sub = res.subset_proportion[s];
282 host.get_subset_proportion()[s] = [&]() {
283 if constexpr(unblocked) {
284 return choose_filter_thresholds(n, sub, buffer.data(), opts).upper;
285 } else {
286 return internal::strip_threshold<false>(choose_filter_thresholds_blocked(n, sub, block, &buffer, opts));
287 }
288 }();
289 }
290 }
291}
292
293template<class Host_, typename Sum_, typename Detected_, typename Proportion_, typename BlockSource_, typename Output_>
295 constexpr bool unblocked = std::is_same<BlockSource_, bool>::value;
296 std::fill_n(output, n, 1);
297
298 const auto& sum = host.get_sum();
299 for (size_t i = 0; i < n; ++i) {
300 auto thresh = [&]() {
301 if constexpr(unblocked) {
302 return sum;
303 } else {
304 return sum[block[i]];
305 }
306 }();
307 output[i] = output[i] && (metrics.sum[i] >= thresh);
308 }
309
310 const auto& detected = host.get_detected();
311 for (size_t i = 0; i < n; ++i) {
312 auto thresh = [&]() {
313 if constexpr(unblocked) {
314 return detected;
315 } else {
316 return detected[block[i]];
317 }
318 }();
319 output[i] = output[i] && (metrics.detected[i] >= thresh);
320 }
321
322 size_t nsubsets = metrics.subset_proportion.size();
323 for (size_t s = 0; s < nsubsets; ++s) {
324 auto sub = metrics.subset_proportion[s];
325 const auto& sthresh = host.get_subset_proportion()[s];
326 for (size_t i = 0; i < n; ++i) {
327 auto thresh = [&]() {
328 if constexpr(unblocked) {
329 return sthresh;
330 } else {
331 return sthresh[block[i]];
332 }
333 }();
334 output[i] = output[i] && (sub[i] <= thresh);
335 }
336 }
337}
338
339template<typename Sum_, typename Detected_, typename Proportion_>
342 buffer.sum = metrics.sum.data();
343 buffer.detected = metrics.detected.data();
344 buffer.subset_proportion.reserve(metrics.subset_proportion.size());
345 for (const auto& s : metrics.subset_proportion) {
346 buffer.subset_proportion.push_back(s.data());
347 }
348 return buffer;
349}
350
351}
360template<typename Float_ = double>
362public:
366 Float_ get_sum() const {
367 return my_sum;
368 }
369
374 return my_detected;
375 }
376
381 const std::vector<Float_>& get_subset_proportion() const {
382 return my_subset_proportion;
383 }
384
389 return my_sum;
390 }
391
396 return my_detected;
397 }
398
403 std::vector<Float_>& get_subset_proportion() {
404 return my_subset_proportion;
405 }
406
407private:
408 Float_ my_sum = 0;
409 Float_ my_detected = 0;
410 std::vector<Float_> my_subset_proportion;
411
412public:
424 template<typename Sum_, typename Detected_, typename Proportion_, typename Output_>
426 internal::rna_filter(*this, num, metrics, false, output);
427 }
428
439 template<typename Sum_, typename Detected_, typename Proportion_, typename Output_>
441 return filter(metrics.sum.size(), internal::to_buffer(metrics), output);
442 }
443
453 template<typename Output_ = uint8_t, typename Sum_ = double, typename Detected_ = int, typename Proportion_ = double>
455 std::vector<Output_> output(metrics.sum.size());
456 filter(metrics, output.data());
457 return output;
458 }
459};
460
478template<typename Float_ = double, typename Sum_ = double, typename Detected_ = int, typename Proportion_ = double>
484
500template<typename Float_ = double, typename Sum_ = double, typename Detected_ = int, typename Proportion_ = double>
504
509template<typename Float_ = double>
511public:
516 const std::vector<Float_>& get_sum() const {
517 return my_sum;
518 }
519
524 const std::vector<Float_>& get_detected() const {
525 return my_detected;
526 }
527
533 const std::vector<std::vector<Float_> >& get_subset_proportion() const {
534 return my_subset_proportion;
535 }
536
541 std::vector<Float_>& get_sum() {
542 return my_sum;
543 }
544
549 std::vector<Float_>& get_detected() {
550 return my_detected;
551 }
552
558 std::vector<std::vector<Float_> >& get_subset_proportion() {
559 return my_subset_proportion;
560 }
561
562private:
563 std::vector<Float_> my_sum;
564 std::vector<Float_> my_detected;
565 std::vector<std::vector<Float_> > my_subset_proportion;
566
567public:
583 template<typename Index_, typename Sum_, typename Detected_, typename Proportion_, typename Block_, typename Output_>
585 internal::rna_filter(*this, num, metrics, block, output);
586 }
587
602 template<typename Sum_, typename Detected_, typename Proportion_, typename Block_, typename Output_>
604 return filter(metrics.sum.size(), internal::to_buffer(metrics), block, output);
605 }
606
621 template<typename Output_ = uint8_t, typename Sum_ = double, typename Detected_ = int, typename Proportion_ = double, typename Block_ = int>
623 std::vector<Output_> output(metrics.sum.size());
624 filter(metrics, block, output.data());
625 return output;
626 }
627};
628
643template<typename Float_ = double, typename Sum_, typename Detected_, typename Proportion_, typename Block_>
654
668template<typename Float_ = double, typename Sum_, typename Detected_, typename Proportion_, typename Block_>
676
677}
678
679#endif
Define QC filter thresholds using a MAD-based approach.
Temporary data structures for find_median_mad_blocked().
Definition find_median_mad.hpp:172
Filter for high-quality cells using RNA-based metrics with blocking.
Definition rna_quality_control.hpp:510
void filter(Index_ num, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &metrics, const Block_ *block, Output_ *output) const
Definition rna_quality_control.hpp:584
std::vector< Float_ > & get_detected()
Definition rna_quality_control.hpp:549
void filter(const ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > &metrics, const Block_ *block, Output_ *output) const
Definition rna_quality_control.hpp:603
std::vector< Output_ > filter(const ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > &metrics, const Block_ *block) const
Definition rna_quality_control.hpp:622
std::vector< std::vector< Float_ > > & get_subset_proportion()
Definition rna_quality_control.hpp:558
std::vector< Float_ > & get_sum()
Definition rna_quality_control.hpp:541
const std::vector< Float_ > & get_detected() const
Definition rna_quality_control.hpp:524
const std::vector< Float_ > & get_sum() const
Definition rna_quality_control.hpp:516
const std::vector< std::vector< Float_ > > & get_subset_proportion() const
Definition rna_quality_control.hpp:533
Filter for high-quality cells using RNA-based metrics.
Definition rna_quality_control.hpp:361
Float_ & get_detected()
Definition rna_quality_control.hpp:395
Float_ get_detected() const
Definition rna_quality_control.hpp:373
const std::vector< Float_ > & get_subset_proportion() const
Definition rna_quality_control.hpp:381
std::vector< Float_ > & get_subset_proportion()
Definition rna_quality_control.hpp:403
void filter(const ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > &metrics, Output_ *output) const
Definition rna_quality_control.hpp:440
Float_ get_sum() const
Definition rna_quality_control.hpp:366
void filter(size_t num, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &metrics, Output_ *output) const
Definition rna_quality_control.hpp:425
Float_ & get_sum()
Definition rna_quality_control.hpp:388
std::vector< Output_ > filter(const ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > &metrics) const
Definition rna_quality_control.hpp:454
Compute the median and MAD from an array of values.
Simple quality control for single-cell data.
Definition adt_quality_control.hpp:20
void compute_rna_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &output, const ComputeRnaQcMetricsOptions &options)
Definition rna_quality_control.hpp:92
std::vector< ChooseFilterThresholdsResults< Float_ > > choose_filter_thresholds_blocked(const std::vector< FindMedianMadResults< Float_ > > mms, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:217
RnaQcBlockedFilters< Float_ > compute_rna_qc_filters_blocked(size_t num, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &metrics, const Block_ *block, const ComputeRnaQcFiltersOptions &options)
Definition rna_quality_control.hpp:644
void per_cell_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const PerCellQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &output, const PerCellQcMetricsOptions &options)
Definition per_cell_qc_metrics.hpp:810
ChooseFilterThresholdsResults< Float_ > choose_filter_thresholds(const FindMedianMadResults< Float_ > &mm, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:133
RnaQcFilters< Float_ > compute_rna_qc_filters(size_t num, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &metrics, const ComputeRnaQcFiltersOptions &options)
Definition rna_quality_control.hpp:479
Compute per-cell quality control metrics.
Options for choose_filter_thresholds().
Definition choose_filter_thresholds.hpp:20
Options for compute_rna_qc_filters().
Definition rna_quality_control.hpp:209
double detected_num_mads
Definition rna_quality_control.hpp:214
double subset_proportion_num_mads
Definition rna_quality_control.hpp:226
double sum_num_mads
Definition rna_quality_control.hpp:220
Buffers for compute_rna_qc_metrics().
Definition rna_quality_control.hpp:42
Detected_ * detected
Definition rna_quality_control.hpp:53
Sum_ * sum
Definition rna_quality_control.hpp:47
std::vector< Proportion_ * > subset_proportion
Definition rna_quality_control.hpp:60
Options for compute_rna_qc_metrics().
Definition rna_quality_control.hpp:25
int num_threads
Definition rna_quality_control.hpp:30
Results of compute_rna_qc_metrics().
Definition rna_quality_control.hpp:145
std::vector< Sum_ > sum
Definition rna_quality_control.hpp:149
std::vector< Detected_ > detected
Definition rna_quality_control.hpp:154
std::vector< std::vector< Proportion_ > > subset_proportion
Definition rna_quality_control.hpp:160
Options for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:22