scran_qc
Simple quality control on single-cell data
Loading...
Searching...
No Matches
rna_quality_control.hpp
Go to the documentation of this file.
1#ifndef SCRAN_QC_RNA_QUALITY_CONTROL_HPP
2#define SCRAN_QC_RNA_QUALITY_CONTROL_HPP
3
4#include <vector>
5#include <limits>
6#include <algorithm>
7#include <type_traits>
8#include <cstddef>
9
10#include "tatami/tatami.hpp"
11
14#include "utils.hpp"
15
21namespace scran_qc {
22
39
50template<typename Sum_ = double, typename Detected_ = int, typename Proportion_ = double>
57 Sum_* sum = NULL;
58
64 Detected_* detected = NULL;
65
72 std::vector<Proportion_*> subset_proportion;
73};
74
105template<typename Value_, typename Index_, typename Subset_, typename Sum_, typename Detected_, typename Proportion_>
108 const std::vector<Subset_>& subsets,
110 const ComputeRnaQcMetricsOptions& options)
111{
112 const auto NC = mat.ncol();
113 const auto nsubsets = subsets.size();
114
116 tmp.sum = output.sum;
117 tmp.detected = output.detected;
118
119 constexpr bool same_type = std::is_same<Sum_, Proportion_>::value;
120 typename std::conditional<same_type, bool, std::vector<std::vector<Sum_> > >::type placeholder_subset;
121
122 if (output.subset_proportion.size()) {
123 // Providing space for the subset sums if they're not the same type.
124 if constexpr(same_type) {
125 tmp.subset_sum = output.subset_proportion;
126 } else {
127 sanisizer::resize(placeholder_subset, nsubsets);
128 sanisizer::resize(tmp.subset_sum, nsubsets);
129 for (I<decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
130 auto& b = placeholder_subset[s];
132 tmp.subset_sum[s] = b.data();
133 }
134 }
135 }
136
138 opt.num_threads = options.num_threads;
140 per_cell_qc_metrics(mat, subsets, tmp, opt);
141
142 for (I<decltype(nsubsets)> s = 0 ; s < nsubsets; ++s) {
143 const auto dest = output.subset_proportion[s];
144 if (dest) {
145 const auto src = tmp.subset_sum[s];
146 for (Index_ c = 0; c < NC; ++c) {
147 dest[c] = static_cast<Proportion_>(src[c]) / static_cast<Proportion_>(tmp.sum[c]);
148 }
149 }
150 }
151}
152
161template<typename Sum_ = double, typename Detected_ = int, typename Proportion_ = double>
167 std::vector<Sum_> sum;
168
173 std::vector<Detected_> detected;
174
180 std::vector<std::vector<Proportion_> > subset_proportion;
181};
182
204template<typename Sum_ = double, typename Detected_ = int, typename Proportion_ = double, typename Value_, typename Index_, typename Subset_>
207 const std::vector<Subset_>& subsets,
208 const ComputeRnaQcMetricsOptions& options
209) {
210 const auto NC = mat.ncol();
213
215#ifdef SCRAN_QC_TEST_INIT
216 , SCRAN_QC_TEST_INIT
217#endif
218 );
219 buffers.sum = output.sum.data();
220
222#ifdef SCRAN_QC_TEST_INIT
223 , SCRAN_QC_TEST_INIT
224#endif
225 );
226 buffers.detected = output.detected.data();
227
228 const auto nsubsets = subsets.size();
229 sanisizer::resize(buffers.subset_proportion, nsubsets);
230 sanisizer::resize(output.subset_proportion, nsubsets);
231 for (I<decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
233#ifdef SCRAN_QC_TEST_INIT
234 , SCRAN_QC_TEST_INIT
235#endif
236 );
237 buffers.subset_proportion[s] = output.subset_proportion[s].data();
238 }
239
240 compute_rna_qc_metrics(mat, subsets, buffers, options);
241 return output;
242}
243
266
270template<typename Float_, class Host_, typename Sum_, typename Detected_, typename Proportion_, typename BlockSource_>
271void compute_rna_qc_filters_internal(
272 Host_& host,
273 const std::size_t num_cells,
275 BlockSource_ block,
276 const std::size_t num_blocks,
277 const ComputeRnaQcFiltersOptions& options
278) {
279 constexpr bool unblocked = std::is_same<BlockSource_, bool>::value;
280 auto buffer = [&]{
281 if constexpr(unblocked) {
282 return sanisizer::create<std::vector<Float_> >(num_cells);
283 } else {
284 return ChooseFilterThresholdsBlockedWorkspace<Float_>(num_cells, block, num_blocks);
285 }
286 }();
287
288 {
289 ChooseFilterThresholdsOptions opts;
290 opts.num_mads = options.sum_num_mads;
291 opts.log = true;
292 opts.upper = false;
293 host.get_sum() = [&]{
294 if constexpr(unblocked) {
295 return choose_filter_thresholds(num_cells, res.sum, buffer.data(), opts).lower;
296 } else {
297 return extract_filter_thresholds<true>(choose_filter_thresholds_blocked(num_cells, res.sum, block, num_blocks, buffer, opts));
298 }
299 }();
300 }
301
302 {
303 ChooseFilterThresholdsOptions opts;
304 opts.num_mads = options.detected_num_mads;
305 opts.log = true;
306 opts.upper = false;
307 host.get_detected() = [&]{
308 if constexpr(unblocked) {
309 return choose_filter_thresholds(num_cells, res.detected, buffer.data(), opts).lower;
310 } else {
311 return extract_filter_thresholds<true>(choose_filter_thresholds_blocked(num_cells, res.detected, block, num_blocks, buffer, opts));
312 }
313 }();
314 }
315
316 {
317 ChooseFilterThresholdsOptions opts;
318 opts.num_mads = options.subset_proportion_num_mads;
319 opts.lower = false;
320
321 const auto nsubsets = res.subset_proportion.size();
322 auto& subhost = host.get_subset_proportion();
323 sanisizer::resize(subhost, nsubsets);
324 for (I<decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
325 const auto sub = res.subset_proportion[s];
326 subhost[s] = [&]{
327 if constexpr(unblocked) {
328 return choose_filter_thresholds(num_cells, sub, buffer.data(), opts).upper;
329 } else {
330 return extract_filter_thresholds<false>(choose_filter_thresholds_blocked(num_cells, sub, block, num_blocks, buffer, opts));
331 }
332 }();
333 }
334 }
335}
336
337template<class Host_, typename Sum_, typename Detected_, typename Proportion_, typename BlockSource_, typename Output_>
338void apply_rna_qc_filters_internal(
339 const Host_& host,
340 const std::size_t num_cells,
341 const ComputeRnaQcMetricsBuffers<Sum_, Detected_, Proportion_>& metrics,
342 BlockSource_ block,
343 Output_* const output
344) {
345 constexpr bool unblocked = std::is_same<BlockSource_, bool>::value;
346 std::fill_n(output, num_cells, 1);
347
348 const auto& sum = host.get_sum();
349 for (I<decltype(num_cells)> i = 0; i < num_cells; ++i) {
350 auto thresh = [&]{
351 if constexpr(unblocked) {
352 return sum;
353 } else {
354 return sum[block[i]];
355 }
356 }();
357 output[i] = output[i] && (metrics.sum[i] >= thresh);
358 }
359
360 const auto& detected = host.get_detected();
361 for (I<decltype(num_cells)> i = 0; i < num_cells; ++i) {
362 auto thresh = [&]{
363 if constexpr(unblocked) {
364 return detected;
365 } else {
366 return detected[block[i]];
367 }
368 }();
369 output[i] = output[i] && (metrics.detected[i] >= thresh);
370 }
371
372 const auto nsubsets = metrics.subset_proportion.size();
373 for (I<decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
374 const auto sub = metrics.subset_proportion[s];
375 const auto& sthresh = host.get_subset_proportion()[s];
376 for (I<decltype(num_cells)> i = 0; i < num_cells; ++i) {
377 const auto thresh = [&]{
378 if constexpr(unblocked) {
379 return sthresh;
380 } else {
381 return sthresh[block[i]];
382 }
383 }();
384 output[i] = output[i] && (sub[i] <= thresh);
385 }
386 }
387}
388
389template<typename Sum_, typename Detected_, typename Proportion_>
390ComputeRnaQcMetricsBuffers<const Sum_, const Detected_, const Proportion_> rna_qc_results_to_buffers(const ComputeRnaQcMetricsResults<Sum_, Detected_, Proportion_>& metrics) {
391 ComputeRnaQcMetricsBuffers<const Sum_, const Detected_, const Proportion_> buffer;
392 buffer.sum = metrics.sum.data();
393 buffer.detected = metrics.detected.data();
394 buffer.subset_proportion.reserve(metrics.subset_proportion.size());
395 for (const auto& s : metrics.subset_proportion) {
396 buffer.subset_proportion.push_back(s.data());
397 }
398 return buffer;
399}
408template<typename Float_ = double>
410public:
414 Float_ get_sum() const {
415 return my_sum;
416 }
417
421 Float_ get_detected() const {
422 return my_detected;
423 }
424
429 const std::vector<Float_>& get_subset_proportion() const {
430 return my_subset_proportion;
431 }
432
436 Float_& get_sum() {
437 return my_sum;
438 }
439
443 Float_& get_detected() {
444 return my_detected;
445 }
446
451 std::vector<Float_>& get_subset_proportion() {
452 return my_subset_proportion;
453 }
454
455private:
456 Float_ my_sum = 0;
457 Float_ my_detected = 0;
458 std::vector<Float_> my_subset_proportion;
459
460public:
472 template<typename Sum_, typename Detected_, typename Proportion_, typename Output_>
473 void filter(const std::size_t num_cells, const ComputeRnaQcMetricsBuffers<Sum_, Detected_, Proportion_>& metrics, Output_* const output) const {
474 apply_rna_qc_filters_internal(*this, num_cells, metrics, false, output);
475 }
476
487 template<typename Sum_, typename Detected_, typename Proportion_, typename Output_>
488 void filter(const ComputeRnaQcMetricsResults<Sum_, Detected_, Proportion_>& metrics, Output_* const output) const {
489 return filter(metrics.sum.size(), rna_qc_results_to_buffers(metrics), output);
490 }
491
501 template<typename Output_ = unsigned char, typename Sum_, typename Detected_, typename Proportion_>
502 std::vector<Output_> filter(const ComputeRnaQcMetricsResults<Sum_, Detected_, Proportion_>& metrics) const {
503 auto output = sanisizer::create<std::vector<Output_> >(metrics.sum.size()
504#ifdef SCRAN_QC_TEST_INIT
505 , SCRAN_QC_TEST_INIT
506#endif
507 );
508 filter(metrics, output.data());
509 return output;
510 }
511};
512
531template<typename Float_ = double, typename Sum_, typename Detected_, typename Proportion_>
533 const std::size_t num_cells,
535 const ComputeRnaQcFiltersOptions& options
536) {
538 compute_rna_qc_filters_internal<Float_>(output, num_cells, metrics, false, 0, options);
539 return output;
540}
541
555template<typename Float_ = double, typename Sum_, typename Detected_, typename Proportion_>
558 const ComputeRnaQcFiltersOptions& options
559) {
560 return compute_rna_qc_filters(metrics.sum.size(), rna_qc_results_to_buffers(metrics), options);
561}
562
567template<typename Float_ = double>
569public:
574 const std::vector<Float_>& get_sum() const {
575 return my_sum;
576 }
577
582 const std::vector<Float_>& get_detected() const {
583 return my_detected;
584 }
585
591 const std::vector<std::vector<Float_> >& get_subset_proportion() const {
592 return my_subset_proportion;
593 }
594
599 std::vector<Float_>& get_sum() {
600 return my_sum;
601 }
602
607 std::vector<Float_>& get_detected() {
608 return my_detected;
609 }
610
616 std::vector<std::vector<Float_> >& get_subset_proportion() {
617 return my_subset_proportion;
618 }
619
620private:
621 std::vector<Float_> my_sum;
622 std::vector<Float_> my_detected;
623 std::vector<std::vector<Float_> > my_subset_proportion;
624
625public:
641 template<typename Sum_, typename Detected_, typename Proportion_, typename Block_, typename Output_>
642 void filter(const std::size_t num_cells, const ComputeRnaQcMetricsBuffers<Sum_, Detected_, Proportion_>& metrics, const Block_* const block, Output_* const output) const {
643 apply_rna_qc_filters_internal(*this, num_cells, metrics, block, output);
644 }
645
660 template<typename Sum_, typename Detected_, typename Proportion_, typename Block_, typename Output_>
661 void filter(const ComputeRnaQcMetricsResults<Sum_, Detected_, Proportion_>& metrics, const Block_* const block, Output_* const output) const {
662 return filter(metrics.sum.size(), rna_qc_results_to_buffers(metrics), block, output);
663 }
664
679 template<typename Output_ = unsigned char, typename Sum_, typename Detected_, typename Proportion_, typename Block_>
680 std::vector<Output_> filter(const ComputeRnaQcMetricsResults<Sum_, Detected_, Proportion_>& metrics, const Block_* const block) const {
681 auto output = sanisizer::create<std::vector<Output_> >(metrics.sum.size()
682#ifdef SCRAN_QC_TEST_INIT
683 , SCRAN_QC_TEST_INIT
684#endif
685 );
686 filter(metrics, block, output.data());
687 return output;
688 }
689};
690
706template<typename Float_ = double, typename Sum_, typename Detected_, typename Proportion_, typename Block_>
708 const std::size_t num_cells,
710 const Block_* const block,
711 const std::size_t num_blocks,
712 const ComputeRnaQcFiltersOptions& options
713) {
715 compute_rna_qc_filters_internal<Float_>(output, num_cells, metrics, block, num_blocks, options);
716 return output;
717}
718
733template<typename Float_ = double, typename Sum_, typename Detected_, typename Proportion_, typename Block_>
736 const Block_* const block,
737 const std::size_t num_blocks,
738 const ComputeRnaQcFiltersOptions& options
739) {
740 return compute_rna_qc_filters_blocked(metrics.sum.size(), rna_qc_results_to_buffers(metrics), block, num_blocks, options);
741}
742
743}
744
745#endif
Define QC filter thresholds using a MAD-based approach.
Filter for high-quality cells using RNA-based metrics with blocking.
Definition rna_quality_control.hpp:568
std::vector< Output_ > filter(const ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > &metrics, const Block_ *const block) const
Definition rna_quality_control.hpp:680
std::vector< Float_ > & get_detected()
Definition rna_quality_control.hpp:607
std::vector< std::vector< Float_ > > & get_subset_proportion()
Definition rna_quality_control.hpp:616
std::vector< Float_ > & get_sum()
Definition rna_quality_control.hpp:599
void filter(const std::size_t num_cells, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &metrics, const Block_ *const block, Output_ *const output) const
Definition rna_quality_control.hpp:642
void filter(const ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > &metrics, const Block_ *const block, Output_ *const output) const
Definition rna_quality_control.hpp:661
const std::vector< Float_ > & get_detected() const
Definition rna_quality_control.hpp:582
const std::vector< Float_ > & get_sum() const
Definition rna_quality_control.hpp:574
const std::vector< std::vector< Float_ > > & get_subset_proportion() const
Definition rna_quality_control.hpp:591
Filter for high-quality cells using RNA-based metrics.
Definition rna_quality_control.hpp:409
void filter(const std::size_t num_cells, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &metrics, Output_ *const output) const
Definition rna_quality_control.hpp:473
Float_ & get_detected()
Definition rna_quality_control.hpp:443
Float_ get_detected() const
Definition rna_quality_control.hpp:421
void filter(const ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > &metrics, Output_ *const output) const
Definition rna_quality_control.hpp:488
const std::vector< Float_ > & get_subset_proportion() const
Definition rna_quality_control.hpp:429
std::vector< Float_ > & get_subset_proportion()
Definition rna_quality_control.hpp:451
Float_ get_sum() const
Definition rna_quality_control.hpp:414
Float_ & get_sum()
Definition rna_quality_control.hpp:436
std::vector< Output_ > filter(const ComputeRnaQcMetricsResults< Sum_, Detected_, Proportion_ > &metrics) const
Definition rna_quality_control.hpp:502
virtual Index_ ncol() const=0
Simple quality control for single-cell data.
Definition adt_quality_control.hpp:22
void compute_rna_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &output, const ComputeRnaQcMetricsOptions &options)
Definition rna_quality_control.hpp:106
std::vector< ChooseFilterThresholdsResults< Float_ > > choose_filter_thresholds_blocked(const std::size_t num_cells, const Value_ *const metrics, const Block_ *const block, const std::size_t num_blocks, ChooseFilterThresholdsBlockedWorkspace< Float_ > &workspace, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:327
RnaQcFilters< Float_ > compute_rna_qc_filters(const std::size_t num_cells, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &metrics, const ComputeRnaQcFiltersOptions &options)
Definition rna_quality_control.hpp:532
ChooseFilterThresholdsResults< Float_ > choose_filter_thresholds(const std::size_t num_cells, const Value_ *const metrics, Float_ *const buffer, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:194
RnaQcBlockedFilters< Float_ > compute_rna_qc_filters_blocked(const std::size_t num_cells, const ComputeRnaQcMetricsBuffers< Sum_, Detected_, Proportion_ > &metrics, const Block_ *const block, const std::size_t num_blocks, const ComputeRnaQcFiltersOptions &options)
Definition rna_quality_control.hpp:707
void per_cell_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const PerCellQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &output, const PerCellQcMetricsOptions &options)
Definition per_cell_qc_metrics.hpp:1001
void resize_container_to_Index_size(Container_ &container, const Index_ x, Args_ &&... args)
Compute per-cell quality control metrics.
Workspace for choose_filter_thresholds_blocked().
Definition choose_filter_thresholds.hpp:220
Options for compute_rna_qc_filters().
Definition rna_quality_control.hpp:247
double detected_num_mads
Definition rna_quality_control.hpp:252
double subset_proportion_num_mads
Definition rna_quality_control.hpp:264
double sum_num_mads
Definition rna_quality_control.hpp:258
Buffers for compute_rna_qc_metrics().
Definition rna_quality_control.hpp:51
Detected_ * detected
Definition rna_quality_control.hpp:64
Sum_ * sum
Definition rna_quality_control.hpp:57
std::vector< Proportion_ * > subset_proportion
Definition rna_quality_control.hpp:72
Options for compute_rna_qc_metrics().
Definition rna_quality_control.hpp:26
bool subset_containers_have_indices
Definition rna_quality_control.hpp:37
int num_threads
Definition rna_quality_control.hpp:31
Results of compute_rna_qc_metrics().
Definition rna_quality_control.hpp:162
std::vector< Sum_ > sum
Definition rna_quality_control.hpp:167
std::vector< Detected_ > detected
Definition rna_quality_control.hpp:173
std::vector< std::vector< Proportion_ > > subset_proportion
Definition rna_quality_control.hpp:180
Buffers for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:97
Sum_ * sum
Definition per_cell_qc_metrics.hpp:115
Detected_ * detected
Definition per_cell_qc_metrics.hpp:121
std::vector< Sum_ * > subset_sum
Definition per_cell_qc_metrics.hpp:142
Options for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:25
bool subset_containers_have_indices
Definition per_cell_qc_metrics.hpp:77
int num_threads
Definition per_cell_qc_metrics.hpp:83