scran_qc
Simple quality control on single-cell data
Loading...
Searching...
No Matches
crispr_quality_control.hpp
Go to the documentation of this file.
1#ifndef SCRAN_QC_CRISPR_QUALITY_CONTROL_HPP
2#define SCRAN_QC_CRISPR_QUALITY_CONTROL_HPP
3
4#include <vector>
5#include <limits>
6#include <algorithm>
7#include <type_traits>
8#include <cstddef>
9
10#include "tatami/tatami.hpp"
11#include "sanisizer/sanisizer.hpp"
12
13#include "find_median_mad.hpp"
16#include "utils.hpp"
17
23namespace scran_qc {
24
35
48template<typename Sum_ = double, typename Detected_ = int, typename Value_ = double, typename Index_ = int>
54 Sum_* sum;
55
60 Detected_* detected;
61
66 Value_* max_value;
67
72 Index_* max_index;
73};
74
102template<typename Value_, typename Index_, typename Sum_, typename Detected_>
106 const ComputeCrisprQcMetricsOptions& options)
107{
109 tmp.sum = output.sum;
110 tmp.detected = output.detected;
111 tmp.max_value = output.max_value;
112 tmp.max_index = output.max_index;
113
115 opt.num_threads = options.num_threads;
116 per_cell_qc_metrics(mat, std::vector<const unsigned char*>{}, tmp, opt);
117}
118
131template<typename Sum_ = double, typename Detected_ = int, typename Value_ = double, typename Index_ = int>
136 std::vector<Sum_> sum;
137
141 std::vector<Detected_> detected;
142
146 std::vector<Value_> max_value;
147
151 std::vector<Index_> max_index;
152};
153
171template<typename Sum_ = double, typename Detected_ = int, typename Value_ = double, typename Index_ = int>
174 const ComputeCrisprQcMetricsOptions& options)
175{
176 const auto NC = mat.ncol();
179
181#ifdef SCRAN_QC_TEST_INIT
182 , SCRAN_QC_TEST_INIT
183#endif
184 );
185 x.sum = output.sum.data();
186
188#ifdef SCRAN_QC_TEST_INIT
189 , SCRAN_QC_TEST_INIT
190#endif
191 );
192 x.detected = output.detected.data();
193
195#ifdef SCRAN_QC_TEST_INIT
196 , SCRAN_QC_TEST_INIT
197#endif
198 );
199 x.max_value = output.max_value.data();
200
202#ifdef SCRAN_QC_TEST_INIT
203 , SCRAN_QC_TEST_INIT
204#endif
205 );
206 x.max_index = output.max_index.data();
207
208 compute_crispr_qc_metrics(mat, x, options);
209 return output;
210}
211
222
226namespace internal {
227
228template<typename Float_, class Host_, typename Sum_, typename Detected_, typename Value_, typename Index_, typename BlockSource_>
229void crispr_populate(Host_& host, const std::size_t n, const ComputeCrisprQcMetricsBuffers<Sum_, Detected_, Value_, Index_>& res, BlockSource_ block, const ComputeCrisprQcFiltersOptions& options) {
230 constexpr bool unblocked = std::is_same<BlockSource_, bool>::value;
231 auto buffer = [&]{
232 if constexpr(unblocked) {
233 return sanisizer::create<std::vector<Float_> >(n);
234 } else {
235 return FindMedianMadWorkspace<Float_>(n, block);
236 }
237 }();
238
239 // Subsetting to the observations in the top 50% of proportions.
240 static_assert(std::is_floating_point<Float_>::value);
241 std::vector<Float_> maxprop;
242 maxprop.reserve(n);
243 for (I<decltype(n)> i = 0; i < n; ++i) {
244 maxprop.push_back(static_cast<Float_>(res.max_value[i]) / static_cast<Float_>(res.sum[i]));
245 }
246
248 fopt.median_only = true;
249 auto prop_res = [&]{
250 if constexpr(unblocked) {
251 return find_median_mad(n, maxprop.data(), buffer.data(), fopt);
252 } else {
253 return find_median_mad_blocked(n, maxprop.data(), block, &buffer, fopt);
254 }
255 }();
256
257 for (I<decltype(n)> i = 0; i < n; ++i) {
258 auto limit = [&]{
259 if constexpr(unblocked){
260 return prop_res.median;
261 } else {
262 return prop_res[block[i]].median;
263 }
264 }();
265 if (maxprop[i] >= limit) {
266 maxprop[i] = res.max_value[i];
267 } else {
268 maxprop[i] = std::numeric_limits<Float_>::quiet_NaN(); // ignored during threshold calculation.
269 }
270 }
271
272 // Filtering on the max counts.
274 copt.num_mads = options.max_value_num_mads;
275 copt.log = true;
276 copt.upper = false;
277 host.get_max_value() = [&]{
278 if constexpr(unblocked) {
279 return choose_filter_thresholds(n, maxprop.data(), buffer.data(), copt).lower;
280 } else {
281 return internal::strip_threshold<true>(choose_filter_thresholds_blocked(n, maxprop.data(), block, &buffer, copt));
282 }
283 }();
284}
285
286template<class Host_, typename Sum_, typename Detected_, typename Value_, typename Index_, typename BlockSource_, typename Output_>
287void crispr_filter(const Host_& host, const std::size_t n, const ComputeCrisprQcMetricsBuffers<Sum_, Detected_, Value_, Index_>& metrics, BlockSource_ block, Output_* const output) {
288 constexpr bool unblocked = std::is_same<BlockSource_, bool>::value;
289 std::fill_n(output, n, 1);
290
291 const auto& mv = host.get_max_value();
292 for (I<decltype(n)> i = 0; i < n; ++i) {
293 auto thresh = [&]{
294 if constexpr(unblocked) {
295 return mv;
296 } else {
297 return mv[block[i]];
298 }
299 }();
300 output[i] = output[i] && (metrics.max_value[i] >= thresh);
301 }
302}
303
304template<typename Sum_, typename Detected_, typename Value_, typename Index_>
305ComputeCrisprQcMetricsBuffers<const Sum_, const Detected_, const Value_, const Index_> to_buffer(const ComputeCrisprQcMetricsResults<Sum_, Detected_, Value_, Index_>& metrics) {
306 ComputeCrisprQcMetricsBuffers<const Sum_, const Detected_, const Value_, const Index_> buffer;
307 buffer.sum = metrics.sum.data();
308 buffer.detected = metrics.detected.data();
309 buffer.max_value = metrics.max_value.data();
310 buffer.max_index = metrics.max_index.data();
311 return buffer;
312}
313
314}
325template<typename Float_ = double>
327public:
331 Float_ get_max_value() const {
332 return my_max_value;
333 }
334
338 Float_& get_max_value() {
339 return my_max_value;
340 }
341
342private:
343 Float_ my_max_value = 0;
344
345public:
358 template<typename Sum_, typename Detected_, typename Value_, typename Index_, typename Output_>
359 void filter(const std::size_t num, const ComputeCrisprQcMetricsBuffers<Sum_, Detected_, Value_, Index_>& metrics, Output_* const output) const {
360 internal::crispr_filter(*this, num, metrics, false, output);
361 }
362
374 template<typename Sum_, typename Detected_, typename Value_, typename Index_, typename Output_>
375 void filter(const ComputeCrisprQcMetricsResults<Sum_, Detected_, Value_, Index_>& metrics, Output_* const output) const {
376 return filter(metrics.max_value.size(), internal::to_buffer(metrics), output);
377 }
378
389 template<typename Output_ = unsigned char, typename Sum_, typename Detected_, typename Value_, typename Index_>
391 auto output = sanisizer::create<std::vector<Output_> >(metrics.max_value.size()
392#ifdef SCRAN_QC_TEST_INIT
393 , SCRAN_QC_TEST_INIT
394#endif
395 );
396 filter(metrics, output.data());
397 return output;
398 }
399};
400
434template<typename Float_ = double, typename Sum_, typename Detected_, typename Value_, typename Index_>
436 const std::size_t num,
438 const ComputeCrisprQcFiltersOptions& options)
439{
441 internal::crispr_populate<Float_>(output, num, metrics, false, options);
442 return output;
443}
444
457template<typename Float_ = double, typename Sum_, typename Detected_, typename Value_, typename Index_>
460 const ComputeCrisprQcFiltersOptions& options)
461{
462 return compute_crispr_qc_filters(metrics.max_value.size(), internal::to_buffer(metrics), options);
463}
464
470template<typename Float_ = double>
472public:
477 const std::vector<Float_>& get_max_value() const {
478 return my_max_value;
479 }
480
485 std::vector<Float_>& get_max_value() {
486 return my_max_value;
487 }
488
489private:
490 std::vector<Float_> my_sum;
491 std::vector<Float_> my_max_value;
492
493public:
509 template<typename Sum_, typename Detected_, typename Value_, typename Index_, typename Block_, typename Output_>
510 void filter(const std::size_t num, const ComputeCrisprQcMetricsBuffers<Sum_, Detected_, Value_, Index_>& metrics, const Block_* const block, Output_* const output) const {
511 internal::crispr_filter(*this, num, metrics, block, output);
512 }
513
528 template<typename Sum_, typename Detected_, typename Value_, typename Index_, typename Block_, typename Output_>
529 void filter(const ComputeCrisprQcMetricsResults<Sum_, Detected_, Value_, Index_>& metrics, const Block_* const block, Output_* const output) const {
530 filter(metrics.max_value.size(), internal::to_buffer(metrics), block, output);
531 }
532
547 template<typename Output_ = unsigned char, typename Sum_, typename Detected_, typename Value_, typename Index_, typename Block_>
548 std::vector<Output_> filter(const ComputeCrisprQcMetricsResults<Sum_, Detected_, Value_, Index_>& metrics, const Block_* const block) const {
549 auto output = sanisizer::create<std::vector<Output_> >(metrics.max_value.size()
550#ifdef SCRAN_QC_TEST_INIT
551 , SCRAN_QC_TEST_INIT
552#endif
553 );
554 filter(metrics, block, output.data());
555 return output;
556 }
557};
558
578template<typename Float_ = double, typename Sum_, typename Detected_, typename Value_, typename Index_, typename Block_>
580 const std::size_t num,
582 const Block_* const block,
583 const ComputeCrisprQcFiltersOptions& options)
584{
586 internal::crispr_populate<Float_>(output, num, metrics, block, options);
587 return output;
588}
589
604template<typename Float_ = double, typename Sum_, typename Detected_, typename Value_, typename Index_, typename Block_>
607 const Block_* const block,
608 const ComputeCrisprQcFiltersOptions& options)
609{
610 return compute_crispr_qc_filters_blocked(metrics.max_value.size(), internal::to_buffer(metrics), block, options);
611}
612
613}
614
615#endif
Define QC filter thresholds using a MAD-based approach.
Filter on using CRISPR-based QC metrics with blocking.
Definition crispr_quality_control.hpp:471
void filter(const std::size_t num, const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &metrics, const Block_ *const block, Output_ *const output) const
Definition crispr_quality_control.hpp:510
std::vector< Float_ > & get_max_value()
Definition crispr_quality_control.hpp:485
void filter(const ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_ > &metrics, const Block_ *const block, Output_ *const output) const
Definition crispr_quality_control.hpp:529
const std::vector< Float_ > & get_max_value() const
Definition crispr_quality_control.hpp:477
std::vector< Output_ > filter(const ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_ > &metrics, const Block_ *const block) const
Definition crispr_quality_control.hpp:548
Filter for high-quality cells using CRISPR-based metrics.
Definition crispr_quality_control.hpp:326
std::vector< Output_ > filter(const ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_ > &metrics) const
Definition crispr_quality_control.hpp:390
Float_ get_max_value() const
Definition crispr_quality_control.hpp:331
Float_ & get_max_value()
Definition crispr_quality_control.hpp:338
void filter(const std::size_t num, const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &metrics, Output_ *const output) const
Definition crispr_quality_control.hpp:359
void filter(const ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_ > &metrics, Output_ *const output) const
Definition crispr_quality_control.hpp:375
Temporary data structures for find_median_mad_blocked().
Definition find_median_mad.hpp:175
virtual Index_ ncol() const=0
Compute the median and MAD from an array of values.
Simple quality control for single-cell data.
Definition adt_quality_control.hpp:23
CrisprQcBlockedFilters< Float_ > compute_crispr_qc_filters_blocked(const std::size_t num, const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &metrics, const Block_ *const block, const ComputeCrisprQcFiltersOptions &options)
Definition crispr_quality_control.hpp:579
std::vector< ChooseFilterThresholdsResults< Float_ > > choose_filter_thresholds_blocked(const std::vector< FindMedianMadResults< Float_ > > &mms, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:225
void compute_crispr_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &output, const ComputeCrisprQcMetricsOptions &options)
Definition crispr_quality_control.hpp:103
CrisprQcFilters< Float_ > compute_crispr_qc_filters(const std::size_t num, const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &metrics, const ComputeCrisprQcFiltersOptions &options)
Definition crispr_quality_control.hpp:435
FindMedianMadResults< Float_ > find_median_mad(std::size_t num, Float_ *metrics, const FindMedianMadOptions &options)
Definition find_median_mad.hpp:81
void per_cell_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const PerCellQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &output, const PerCellQcMetricsOptions &options)
Definition per_cell_qc_metrics.hpp:834
std::vector< FindMedianMadResults< Output_ > > find_median_mad_blocked(const std::size_t num, const Value_ *const metrics, const Block_ *const block, FindMedianMadWorkspace< Output_ > *workspace, const FindMedianMadOptions &options)
Definition find_median_mad.hpp:273
ChooseFilterThresholdsResults< Float_ > choose_filter_thresholds(const FindMedianMadResults< Float_ > &mm, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:143
void resize_container_to_Index_size(Container_ &container, const Index_ x, Args_ &&... args)
Compute per-cell quality control metrics.
Options for choose_filter_thresholds().
Definition choose_filter_thresholds.hpp:21
bool upper
Definition choose_filter_thresholds.hpp:32
bool log
Definition choose_filter_thresholds.hpp:66
double num_mads
Definition choose_filter_thresholds.hpp:39
Options for compute_crispr_qc_filters().
Definition crispr_quality_control.hpp:215
double max_value_num_mads
Definition crispr_quality_control.hpp:220
Buffers for compute_crispr_qc_metrics().
Definition crispr_quality_control.hpp:49
Sum_ * sum
Definition crispr_quality_control.hpp:54
Detected_ * detected
Definition crispr_quality_control.hpp:60
Index_ * max_index
Definition crispr_quality_control.hpp:72
Value_ * max_value
Definition crispr_quality_control.hpp:66
Options for compute_crispr_qc_metrics().
Definition crispr_quality_control.hpp:28
int num_threads
Definition crispr_quality_control.hpp:33
Results of compute_crispr_qc_metrics().
Definition crispr_quality_control.hpp:132
std::vector< Index_ > max_index
Definition crispr_quality_control.hpp:151
std::vector< Value_ > max_value
Definition crispr_quality_control.hpp:146
std::vector< Detected_ > detected
Definition crispr_quality_control.hpp:141
std::vector< Sum_ > sum
Definition crispr_quality_control.hpp:136
Options for find_median_mad().
Definition find_median_mad.hpp:25
bool median_only
Definition find_median_mad.hpp:37
Buffers for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:80
Value_ * max_value
Definition per_cell_qc_metrics.hpp:116
Index_ * max_index
Definition per_cell_qc_metrics.hpp:110
Sum_ * sum
Definition per_cell_qc_metrics.hpp:98
Detected_ * detected
Definition per_cell_qc_metrics.hpp:104
Options for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:25
int num_threads
Definition per_cell_qc_metrics.hpp:66