scran_qc
Simple quality control on single-cell data
Loading...
Searching...
No Matches
crispr_quality_control.hpp
Go to the documentation of this file.
1#ifndef SCRAN_QC_CRISPR_QUALITY_CONTROL_HPP
2#define SCRAN_QC_CRISPR_QUALITY_CONTROL_HPP
3
4#include <vector>
5#include <limits>
6#include <algorithm>
7#include <type_traits>
8#include <cstddef>
9
10#include "tatami/tatami.hpp"
11#include "sanisizer/sanisizer.hpp"
12
13#include "find_median_mad.hpp"
16#include "utils.hpp"
17
23namespace scran_qc {
24
35
45template<typename Sum_ = double, typename Detected_ = int, typename Value_ = double, typename Index_ = int>
51 Sum_* sum;
52
57 Detected_* detected;
58
63 Value_* max_value;
64
69 Index_* max_index;
70};
71
97template<typename Value_, typename Index_, typename Sum_, typename Detected_>
101 const ComputeCrisprQcMetricsOptions& options)
102{
104 tmp.sum = output.sum;
105 tmp.detected = output.detected;
106 tmp.max_value = output.max_value;
107 tmp.max_index = output.max_index;
108
110 opt.num_threads = options.num_threads;
111 per_cell_qc_metrics(mat, std::vector<const unsigned char*>{}, tmp, opt);
112}
113
123template<typename Sum_ = double, typename Detected_ = int, typename Value_ = double, typename Index_ = int>
128 std::vector<Sum_> sum;
129
133 std::vector<Detected_> detected;
134
138 std::vector<Value_> max_value;
139
143 std::vector<Index_> max_index;
144};
145
161template<typename Sum_ = double, typename Detected_ = int, typename Value_ = double, typename Index_ = int>
164 const ComputeCrisprQcMetricsOptions& options)
165{
166 const auto NC = mat.ncol();
169
171#ifdef SCRAN_QC_TEST_INIT
172 , SCRAN_QC_TEST_INIT
173#endif
174 );
175 x.sum = output.sum.data();
176
178#ifdef SCRAN_QC_TEST_INIT
179 , SCRAN_QC_TEST_INIT
180#endif
181 );
182 x.detected = output.detected.data();
183
185#ifdef SCRAN_QC_TEST_INIT
186 , SCRAN_QC_TEST_INIT
187#endif
188 );
189 x.max_value = output.max_value.data();
190
192#ifdef SCRAN_QC_TEST_INIT
193 , SCRAN_QC_TEST_INIT
194#endif
195 );
196 x.max_index = output.max_index.data();
197
198 compute_crispr_qc_metrics(mat, x, options);
199 return output;
200}
201
212
216namespace internal {
217
218template<typename Float_, class Host_, typename Sum_, typename Detected_, typename Value_, typename Index_, typename BlockSource_>
219void crispr_populate(Host_& host, const std::size_t n, const ComputeCrisprQcMetricsBuffers<Sum_, Detected_, Value_, Index_>& res, BlockSource_ block, const ComputeCrisprQcFiltersOptions& options) {
220 constexpr bool unblocked = std::is_same<BlockSource_, bool>::value;
221 auto buffer = [&]{
222 if constexpr(unblocked) {
223 return sanisizer::create<std::vector<Float_> >(n);
224 } else {
225 return FindMedianMadWorkspace<Float_>(n, block);
226 }
227 }();
228
229 // Subsetting to the observations in the top 50% of proportions.
230 static_assert(std::is_floating_point<Float_>::value);
231 std::vector<Float_> maxprop;
232 maxprop.reserve(n);
233 for (decltype(I(n)) i = 0; i < n; ++i) {
234 maxprop.push_back(static_cast<Float_>(res.max_value[i]) / static_cast<Float_>(res.sum[i]));
235 }
236
238 fopt.median_only = true;
239 auto prop_res = [&]{
240 if constexpr(unblocked) {
241 return find_median_mad(n, maxprop.data(), buffer.data(), fopt);
242 } else {
243 return find_median_mad_blocked(n, maxprop.data(), block, &buffer, fopt);
244 }
245 }();
246
247 for (decltype(I(n)) i = 0; i < n; ++i) {
248 auto limit = [&]{
249 if constexpr(unblocked){
250 return prop_res.median;
251 } else {
252 return prop_res[block[i]].median;
253 }
254 }();
255 if (maxprop[i] >= limit) {
256 maxprop[i] = res.max_value[i];
257 } else {
258 maxprop[i] = std::numeric_limits<Float_>::quiet_NaN(); // ignored during threshold calculation.
259 }
260 }
261
262 // Filtering on the max counts.
264 copt.num_mads = options.max_value_num_mads;
265 copt.log = true;
266 copt.upper = false;
267 host.get_max_value() = [&]{
268 if constexpr(unblocked) {
269 return choose_filter_thresholds(n, maxprop.data(), buffer.data(), copt).lower;
270 } else {
271 return internal::strip_threshold<true>(choose_filter_thresholds_blocked(n, maxprop.data(), block, &buffer, copt));
272 }
273 }();
274}
275
276template<class Host_, typename Sum_, typename Detected_, typename Value_, typename Index_, typename BlockSource_, typename Output_>
277void crispr_filter(const Host_& host, const std::size_t n, const ComputeCrisprQcMetricsBuffers<Sum_, Detected_, Value_, Index_>& metrics, BlockSource_ block, Output_* const output) {
278 constexpr bool unblocked = std::is_same<BlockSource_, bool>::value;
279 std::fill_n(output, n, 1);
280
281 const auto& mv = host.get_max_value();
282 for (decltype(I(n)) i = 0; i < n; ++i) {
283 auto thresh = [&]{
284 if constexpr(unblocked) {
285 return mv;
286 } else {
287 return mv[block[i]];
288 }
289 }();
290 output[i] = output[i] && (metrics.max_value[i] >= thresh);
291 }
292}
293
294template<typename Sum_, typename Detected_, typename Value_, typename Index_>
295ComputeCrisprQcMetricsBuffers<const Sum_, const Detected_, const Value_, const Index_> to_buffer(const ComputeCrisprQcMetricsResults<Sum_, Detected_, Value_, Index_>& metrics) {
296 ComputeCrisprQcMetricsBuffers<const Sum_, const Detected_, const Value_, const Index_> buffer;
297 buffer.sum = metrics.sum.data();
298 buffer.detected = metrics.detected.data();
299 buffer.max_value = metrics.max_value.data();
300 buffer.max_index = metrics.max_index.data();
301 return buffer;
302}
303
304}
315template<typename Float_ = double>
317public:
321 Float_ get_max_value() const {
322 return my_max_value;
323 }
324
328 Float_& get_max_value() {
329 return my_max_value;
330 }
331
332private:
333 Float_ my_max_value = 0;
334
335public:
348 template<typename Sum_, typename Detected_, typename Value_, typename Index_, typename Output_>
349 void filter(const std::size_t num, const ComputeCrisprQcMetricsBuffers<Sum_, Detected_, Value_, Index_>& metrics, Output_* const output) const {
350 internal::crispr_filter(*this, num, metrics, false, output);
351 }
352
364 template<typename Sum_, typename Detected_, typename Value_, typename Index_, typename Output_>
365 void filter(const ComputeCrisprQcMetricsResults<Sum_, Detected_, Value_, Index_>& metrics, Output_* const output) const {
366 return filter(metrics.max_value.size(), internal::to_buffer(metrics), output);
367 }
368
379 template<typename Output_ = unsigned char, typename Sum_, typename Detected_, typename Value_, typename Index_>
381 auto output = sanisizer::create<std::vector<Output_> >(metrics.max_value.size()
382#ifdef SCRAN_QC_TEST_INIT
383 , SCRAN_QC_TEST_INIT
384#endif
385 );
386 filter(metrics, output.data());
387 return output;
388 }
389};
390
424template<typename Float_ = double, typename Sum_, typename Detected_, typename Value_, typename Index_>
426 const std::size_t num,
428 const ComputeCrisprQcFiltersOptions& options)
429{
431 internal::crispr_populate<Float_>(output, num, metrics, false, options);
432 return output;
433}
434
447template<typename Float_ = double, typename Sum_, typename Detected_, typename Value_, typename Index_>
450 const ComputeCrisprQcFiltersOptions& options)
451{
452 return compute_crispr_qc_filters(metrics.max_value.size(), internal::to_buffer(metrics), options);
453}
454
460template<typename Float_ = double>
462public:
467 const std::vector<Float_>& get_max_value() const {
468 return my_max_value;
469 }
470
475 std::vector<Float_>& get_max_value() {
476 return my_max_value;
477 }
478
479private:
480 std::vector<Float_> my_sum;
481 std::vector<Float_> my_max_value;
482
483public:
499 template<typename Sum_, typename Detected_, typename Value_, typename Index_, typename Block_, typename Output_>
500 void filter(const std::size_t num, const ComputeCrisprQcMetricsBuffers<Sum_, Detected_, Value_, Index_>& metrics, const Block_* const block, Output_* const output) const {
501 internal::crispr_filter(*this, num, metrics, block, output);
502 }
503
518 template<typename Sum_, typename Detected_, typename Value_, typename Index_, typename Block_, typename Output_>
519 void filter(const ComputeCrisprQcMetricsResults<Sum_, Detected_, Value_, Index_>& metrics, const Block_* const block, Output_* const output) const {
520 filter(metrics.max_value.size(), internal::to_buffer(metrics), block, output);
521 }
522
537 template<typename Output_ = unsigned char, typename Sum_, typename Detected_, typename Value_, typename Index_, typename Block_>
538 std::vector<Output_> filter(const ComputeCrisprQcMetricsResults<Sum_, Detected_, Value_, Index_>& metrics, const Block_* const block) const {
539 auto output = sanisizer::create<std::vector<Output_> >(metrics.max_value.size()
540#ifdef SCRAN_QC_TEST_INIT
541 , SCRAN_QC_TEST_INIT
542#endif
543 );
544 filter(metrics, block, output.data());
545 return output;
546 }
547};
548
568template<typename Float_ = double, typename Sum_, typename Detected_, typename Value_, typename Index_, typename Block_>
570 const std::size_t num,
572 const Block_* const block,
573 const ComputeCrisprQcFiltersOptions& options)
574{
576 internal::crispr_populate<Float_>(output, num, metrics, block, options);
577 return output;
578}
579
594template<typename Float_ = double, typename Sum_, typename Detected_, typename Value_, typename Index_, typename Block_>
597 const Block_* const block,
598 const ComputeCrisprQcFiltersOptions& options)
599{
600 return compute_crispr_qc_filters_blocked(metrics.max_value.size(), internal::to_buffer(metrics), block, options);
601}
602
603}
604
605#endif
Define QC filter thresholds using a MAD-based approach.
Filter on using CRISPR-based QC metrics with blocking.
Definition crispr_quality_control.hpp:461
void filter(const std::size_t num, const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &metrics, const Block_ *const block, Output_ *const output) const
Definition crispr_quality_control.hpp:500
std::vector< Float_ > & get_max_value()
Definition crispr_quality_control.hpp:475
void filter(const ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_ > &metrics, const Block_ *const block, Output_ *const output) const
Definition crispr_quality_control.hpp:519
const std::vector< Float_ > & get_max_value() const
Definition crispr_quality_control.hpp:467
std::vector< Output_ > filter(const ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_ > &metrics, const Block_ *const block) const
Definition crispr_quality_control.hpp:538
Filter for high-quality cells using CRISPR-based metrics.
Definition crispr_quality_control.hpp:316
std::vector< Output_ > filter(const ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_ > &metrics) const
Definition crispr_quality_control.hpp:380
Float_ get_max_value() const
Definition crispr_quality_control.hpp:321
Float_ & get_max_value()
Definition crispr_quality_control.hpp:328
void filter(const std::size_t num, const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &metrics, Output_ *const output) const
Definition crispr_quality_control.hpp:349
void filter(const ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_ > &metrics, Output_ *const output) const
Definition crispr_quality_control.hpp:365
Temporary data structures for find_median_mad_blocked().
Definition find_median_mad.hpp:175
virtual Index_ ncol() const=0
Compute the median and MAD from an array of values.
Simple quality control for single-cell data.
Definition adt_quality_control.hpp:23
CrisprQcBlockedFilters< Float_ > compute_crispr_qc_filters_blocked(const std::size_t num, const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &metrics, const Block_ *const block, const ComputeCrisprQcFiltersOptions &options)
Definition crispr_quality_control.hpp:569
std::vector< ChooseFilterThresholdsResults< Float_ > > choose_filter_thresholds_blocked(const std::vector< FindMedianMadResults< Float_ > > &mms, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:218
void compute_crispr_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &output, const ComputeCrisprQcMetricsOptions &options)
Definition crispr_quality_control.hpp:98
CrisprQcFilters< Float_ > compute_crispr_qc_filters(const std::size_t num, const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &metrics, const ComputeCrisprQcFiltersOptions &options)
Definition crispr_quality_control.hpp:425
FindMedianMadResults< Float_ > find_median_mad(std::size_t num, Float_ *metrics, const FindMedianMadOptions &options)
Definition find_median_mad.hpp:81
void per_cell_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const PerCellQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &output, const PerCellQcMetricsOptions &options)
Definition per_cell_qc_metrics.hpp:828
std::vector< FindMedianMadResults< Output_ > > find_median_mad_blocked(const std::size_t num, const Value_ *const metrics, const Block_ *const block, FindMedianMadWorkspace< Output_ > *workspace, const FindMedianMadOptions &options)
Definition find_median_mad.hpp:273
ChooseFilterThresholdsResults< Float_ > choose_filter_thresholds(const FindMedianMadResults< Float_ > &mm, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:136
void resize_container_to_Index_size(Container_ &container, Index_ x, Args_ &&... args)
Compute per-cell quality control metrics.
Options for choose_filter_thresholds().
Definition choose_filter_thresholds.hpp:21
bool upper
Definition choose_filter_thresholds.hpp:32
bool log
Definition choose_filter_thresholds.hpp:59
double num_mads
Definition choose_filter_thresholds.hpp:39
Options for compute_crispr_qc_filters().
Definition crispr_quality_control.hpp:205
double max_value_num_mads
Definition crispr_quality_control.hpp:210
Buffers for compute_crispr_qc_metrics().
Definition crispr_quality_control.hpp:46
Sum_ * sum
Definition crispr_quality_control.hpp:51
Detected_ * detected
Definition crispr_quality_control.hpp:57
Index_ * max_index
Definition crispr_quality_control.hpp:69
Value_ * max_value
Definition crispr_quality_control.hpp:63
Options for compute_crispr_qc_metrics().
Definition crispr_quality_control.hpp:28
int num_threads
Definition crispr_quality_control.hpp:33
Results of compute_crispr_qc_metrics().
Definition crispr_quality_control.hpp:124
std::vector< Index_ > max_index
Definition crispr_quality_control.hpp:143
std::vector< Value_ > max_value
Definition crispr_quality_control.hpp:138
std::vector< Detected_ > detected
Definition crispr_quality_control.hpp:133
std::vector< Sum_ > sum
Definition crispr_quality_control.hpp:128
Options for find_median_mad().
Definition find_median_mad.hpp:25
bool median_only
Definition find_median_mad.hpp:37
Buffers for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:78
Value_ * max_value
Definition per_cell_qc_metrics.hpp:114
Index_ * max_index
Definition per_cell_qc_metrics.hpp:108
Sum_ * sum
Definition per_cell_qc_metrics.hpp:96
Detected_ * detected
Definition per_cell_qc_metrics.hpp:102
Options for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:25
int num_threads
Definition per_cell_qc_metrics.hpp:66