scran_qc
Simple quality control on single-cell data
Loading...
Searching...
No Matches
crispr_quality_control.hpp
Go to the documentation of this file.
1#ifndef SCRAN_QC_CRISPR_QUALITY_CONTROL_HPP
2#define SCRAN_QC_CRISPR_QUALITY_CONTROL_HPP
3
4#include <vector>
5#include <limits>
6#include <algorithm>
7#include <type_traits>
8#include <cstddef>
9
10#include "tatami/tatami.hpp"
11#include "sanisizer/sanisizer.hpp"
12#include "quickstats/quickstats.hpp"
13
16#include "utils.hpp"
17
23namespace scran_qc {
24
35
48template<typename Sum_ = double, typename Detected_ = int, typename Value_ = double, typename Index_ = int>
55 Sum_* sum;
56
62 Detected_* detected;
63
69 Value_* max_value;
70
75 Index_* max_index;
76};
77
105template<typename Value_, typename Index_, typename Sum_, typename Detected_>
109 const ComputeCrisprQcMetricsOptions& options)
110{
112 tmp.sum = output.sum;
113 tmp.detected = output.detected;
114 tmp.max_value = output.max_value;
115 tmp.max_index = output.max_index;
116
118 opt.num_threads = options.num_threads;
119 per_cell_qc_metrics(mat, std::vector<const unsigned char*>{}, tmp, opt);
120}
121
134template<typename Sum_ = double, typename Detected_ = int, typename Value_ = double, typename Index_ = int>
140 std::vector<Sum_> sum;
141
146 std::vector<Detected_> detected;
147
152 std::vector<Value_> max_value;
153
157 std::vector<Index_> max_index;
158};
159
177template<typename Sum_ = double, typename Detected_ = int, typename Value_ = double, typename Index_ = int>
180 const ComputeCrisprQcMetricsOptions& options)
181{
182 const auto NC = mat.ncol();
185
187#ifdef SCRAN_QC_TEST_INIT
188 , SCRAN_QC_TEST_INIT
189#endif
190 );
191 x.sum = output.sum.data();
192
194#ifdef SCRAN_QC_TEST_INIT
195 , SCRAN_QC_TEST_INIT
196#endif
197 );
198 x.detected = output.detected.data();
199
201#ifdef SCRAN_QC_TEST_INIT
202 , SCRAN_QC_TEST_INIT
203#endif
204 );
205 x.max_value = output.max_value.data();
206
208#ifdef SCRAN_QC_TEST_INIT
209 , SCRAN_QC_TEST_INIT
210#endif
211 );
212 x.max_index = output.max_index.data();
213
214 compute_crispr_qc_metrics(mat, x, options);
215 return output;
216}
217
228
232template<typename Float_, class Host_, typename Sum_, typename Detected_, typename Value_, typename Index_, typename BlockSource_>
233void compute_crispr_qc_filters_internal(
234 Host_& host,
235 const std::size_t num_cells,
237 BlockSource_ block,
238 const std::size_t num_blocks,
239 const ComputeCrisprQcFiltersOptions& options
240) {
241 constexpr bool unblocked = std::is_same<BlockSource_, bool>::value;
242 auto buffer = [&]{
243 if constexpr(unblocked) {
244 return sanisizer::create<std::vector<Float_> >(num_cells);
245 } else {
246 return ChooseFilterThresholdsBlockedWorkspace<Float_>(num_cells, block, num_blocks);
247 }
248 }();
249
250 // Subsetting to the observations in the top 50% of proportions.
251 static_assert(std::is_floating_point<Float_>::value);
252 std::vector<Float_> maxprop;
253 maxprop.reserve(num_cells);
254 for (I<decltype(num_cells)> i = 0; i < num_cells; ++i) {
255 maxprop.push_back(static_cast<Float_>(res.max_value[i]) / static_cast<Float_>(res.sum[i]));
256 }
257
258 auto prop_res = [&]{
259 if constexpr(unblocked) {
260 std::copy_n(maxprop.begin(), num_cells, buffer.begin());
261 return quickstats::median<Float_>(num_cells, buffer.data());
262 } else {
263 std::vector<Float_> output;
264 output.reserve(num_blocks);
265 process_blocks_for_choose_filter_thresholds(
266 num_cells,
267 maxprop.data(),
268 block,
269 num_blocks,
270 buffer,
271 [&](const std::size_t len, Float_* const ptr) -> void {
272 output.push_back(quickstats::median<Float_>(len, ptr));
273 }
274 );
275 return output;
276 }
277 }();
278
279 for (I<decltype(num_cells)> i = 0; i < num_cells; ++i) {
280 auto limit = [&]{
281 if constexpr(unblocked){
282 return prop_res;
283 } else {
284 return prop_res[block[i]];
285 }
286 }();
287 if (maxprop[i] >= limit) {
288 maxprop[i] = res.max_value[i];
289 } else {
290 maxprop[i] = std::numeric_limits<Float_>::quiet_NaN(); // ignored during threshold calculation.
291 }
292 }
293
294 // Filtering on the max counts.
295 ChooseFilterThresholdsOptions copt;
296 copt.num_mads = options.max_value_num_mads;
297 copt.log = true;
298 copt.upper = false;
299 host.get_max_value() = [&]{
300 if constexpr(unblocked) {
301 return choose_filter_thresholds(num_cells, maxprop.data(), buffer.data(), copt).lower;
302 } else {
303 return extract_filter_thresholds<true>(choose_filter_thresholds_blocked(num_cells, maxprop.data(), block, num_blocks, buffer, copt));
304 }
305 }();
306}
307
308template<class Host_, typename Sum_, typename Detected_, typename Value_, typename Index_, typename BlockSource_, typename Output_>
309void apply_crispr_qc_filters_internal(
310 const Host_& host,
311 const std::size_t n,
312 const ComputeCrisprQcMetricsBuffers<Sum_, Detected_, Value_, Index_>& metrics,
313 BlockSource_ block,
314 Output_* const output
315) {
316 constexpr bool unblocked = std::is_same<BlockSource_, bool>::value;
317 std::fill_n(output, n, 1);
318
319 const auto& mv = host.get_max_value();
320 for (I<decltype(n)> i = 0; i < n; ++i) {
321 auto thresh = [&]{
322 if constexpr(unblocked) {
323 return mv;
324 } else {
325 return mv[block[i]];
326 }
327 }();
328 output[i] = output[i] && (metrics.max_value[i] >= thresh);
329 }
330}
331
332template<typename Sum_, typename Detected_, typename Value_, typename Index_>
333ComputeCrisprQcMetricsBuffers<const Sum_, const Detected_, const Value_, const Index_> crispr_qc_results_to_buffers(const ComputeCrisprQcMetricsResults<Sum_, Detected_, Value_, Index_>& metrics) {
334 ComputeCrisprQcMetricsBuffers<const Sum_, const Detected_, const Value_, const Index_> buffer;
335 buffer.sum = metrics.sum.data();
336 buffer.detected = metrics.detected.data();
337 buffer.max_value = metrics.max_value.data();
338 buffer.max_index = metrics.max_index.data();
339 return buffer;
340}
351template<typename Float_ = double>
353public:
357 Float_ get_max_value() const {
358 return my_max_value;
359 }
360
364 Float_& get_max_value() {
365 return my_max_value;
366 }
367
368private:
369 Float_ my_max_value = 0;
370
371public:
384 template<typename Sum_, typename Detected_, typename Value_, typename Index_, typename Output_>
385 void filter(const std::size_t num_cells, const ComputeCrisprQcMetricsBuffers<Sum_, Detected_, Value_, Index_>& metrics, Output_* const output) const {
386 apply_crispr_qc_filters_internal(*this, num_cells, metrics, false, output);
387 }
388
400 template<typename Sum_, typename Detected_, typename Value_, typename Index_, typename Output_>
401 void filter(const ComputeCrisprQcMetricsResults<Sum_, Detected_, Value_, Index_>& metrics, Output_* const output) const {
402 return filter(metrics.max_value.size(), crispr_qc_results_to_buffers(metrics), output);
403 }
404
415 template<typename Output_ = unsigned char, typename Sum_, typename Detected_, typename Value_, typename Index_>
417 auto output = sanisizer::create<std::vector<Output_> >(metrics.max_value.size()
418#ifdef SCRAN_QC_TEST_INIT
419 , SCRAN_QC_TEST_INIT
420#endif
421 );
422 filter(metrics, output.data());
423 return output;
424 }
425};
426
460template<typename Float_ = double, typename Sum_, typename Detected_, typename Value_, typename Index_>
462 const std::size_t num_cells,
464 const ComputeCrisprQcFiltersOptions& options)
465{
467 compute_crispr_qc_filters_internal<Float_>(output, num_cells, metrics, false, 0, options);
468 return output;
469}
470
483template<typename Float_ = double, typename Sum_, typename Detected_, typename Value_, typename Index_>
486 const ComputeCrisprQcFiltersOptions& options)
487{
488 return compute_crispr_qc_filters(metrics.max_value.size(), crispr_qc_results_to_buffers(metrics), options);
489}
490
496template<typename Float_ = double>
498public:
503 const std::vector<Float_>& get_max_value() const {
504 return my_max_value;
505 }
506
511 std::vector<Float_>& get_max_value() {
512 return my_max_value;
513 }
514
515private:
516 std::vector<Float_> my_sum;
517 std::vector<Float_> my_max_value;
518
519public:
535 template<typename Sum_, typename Detected_, typename Value_, typename Index_, typename Block_, typename Output_>
536 void filter(const std::size_t num_cells, const ComputeCrisprQcMetricsBuffers<Sum_, Detected_, Value_, Index_>& metrics, const Block_* const block, Output_* const output) const {
537 apply_crispr_qc_filters_internal(*this, num_cells, metrics, block, output);
538 }
539
554 template<typename Sum_, typename Detected_, typename Value_, typename Index_, typename Block_, typename Output_>
555 void filter(const ComputeCrisprQcMetricsResults<Sum_, Detected_, Value_, Index_>& metrics, const Block_* const block, Output_* const output) const {
556 filter(metrics.max_value.size(), crispr_qc_results_to_buffers(metrics), block, output);
557 }
558
573 template<typename Output_ = unsigned char, typename Sum_, typename Detected_, typename Value_, typename Index_, typename Block_>
574 std::vector<Output_> filter(const ComputeCrisprQcMetricsResults<Sum_, Detected_, Value_, Index_>& metrics, const Block_* const block) const {
575 auto output = sanisizer::create<std::vector<Output_> >(metrics.max_value.size()
576#ifdef SCRAN_QC_TEST_INIT
577 , SCRAN_QC_TEST_INIT
578#endif
579 );
580 filter(metrics, block, output.data());
581 return output;
582 }
583};
584
605template<typename Float_ = double, typename Sum_, typename Detected_, typename Value_, typename Index_, typename Block_>
607 const std::size_t num_cells,
609 const Block_* const block,
610 const std::size_t num_blocks,
611 const ComputeCrisprQcFiltersOptions& options
612) {
614 compute_crispr_qc_filters_internal<Float_>(output, num_cells, metrics, block, num_blocks, options);
615 return output;
616}
617
633template<typename Float_ = double, typename Sum_, typename Detected_, typename Value_, typename Index_, typename Block_>
636 const Block_* const block,
637 const std::size_t num_blocks,
638 const ComputeCrisprQcFiltersOptions& options
639) {
640 return compute_crispr_qc_filters_blocked(metrics.max_value.size(), crispr_qc_results_to_buffers(metrics), block, num_blocks, options);
641}
642
643}
644
645#endif
Define QC filter thresholds using a MAD-based approach.
Filter on using CRISPR-based QC metrics with blocking.
Definition crispr_quality_control.hpp:497
std::vector< Float_ > & get_max_value()
Definition crispr_quality_control.hpp:511
void filter(const ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_ > &metrics, const Block_ *const block, Output_ *const output) const
Definition crispr_quality_control.hpp:555
void filter(const std::size_t num_cells, const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &metrics, const Block_ *const block, Output_ *const output) const
Definition crispr_quality_control.hpp:536
const std::vector< Float_ > & get_max_value() const
Definition crispr_quality_control.hpp:503
std::vector< Output_ > filter(const ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_ > &metrics, const Block_ *const block) const
Definition crispr_quality_control.hpp:574
Filter for high-quality cells using CRISPR-based metrics.
Definition crispr_quality_control.hpp:352
std::vector< Output_ > filter(const ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_ > &metrics) const
Definition crispr_quality_control.hpp:416
Float_ get_max_value() const
Definition crispr_quality_control.hpp:357
Float_ & get_max_value()
Definition crispr_quality_control.hpp:364
void filter(const ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_ > &metrics, Output_ *const output) const
Definition crispr_quality_control.hpp:401
void filter(const std::size_t num_cells, const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &metrics, Output_ *const output) const
Definition crispr_quality_control.hpp:385
virtual Index_ ncol() const=0
Simple quality control for single-cell data.
Definition adt_quality_control.hpp:22
std::vector< ChooseFilterThresholdsResults< Float_ > > choose_filter_thresholds_blocked(const std::size_t num_cells, const Value_ *const metrics, const Block_ *const block, const std::size_t num_blocks, ChooseFilterThresholdsBlockedWorkspace< Float_ > &workspace, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:327
ChooseFilterThresholdsResults< Float_ > choose_filter_thresholds(const std::size_t num_cells, const Value_ *const metrics, Float_ *const buffer, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:194
void compute_crispr_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &output, const ComputeCrisprQcMetricsOptions &options)
Definition crispr_quality_control.hpp:106
CrisprQcFilters< Float_ > compute_crispr_qc_filters(const std::size_t num_cells, const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &metrics, const ComputeCrisprQcFiltersOptions &options)
Definition crispr_quality_control.hpp:461
CrisprQcBlockedFilters< Float_ > compute_crispr_qc_filters_blocked(const std::size_t num_cells, const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &metrics, const Block_ *const block, const std::size_t num_blocks, const ComputeCrisprQcFiltersOptions &options)
Definition crispr_quality_control.hpp:606
void per_cell_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const PerCellQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &output, const PerCellQcMetricsOptions &options)
Definition per_cell_qc_metrics.hpp:1001
void resize_container_to_Index_size(Container_ &container, const Index_ x, Args_ &&... args)
Compute per-cell quality control metrics.
Workspace for choose_filter_thresholds_blocked().
Definition choose_filter_thresholds.hpp:220
Options for compute_crispr_qc_filters().
Definition crispr_quality_control.hpp:221
double max_value_num_mads
Definition crispr_quality_control.hpp:226
Buffers for compute_crispr_qc_metrics().
Definition crispr_quality_control.hpp:49
Sum_ * sum
Definition crispr_quality_control.hpp:55
Detected_ * detected
Definition crispr_quality_control.hpp:62
Index_ * max_index
Definition crispr_quality_control.hpp:75
Value_ * max_value
Definition crispr_quality_control.hpp:69
Options for compute_crispr_qc_metrics().
Definition crispr_quality_control.hpp:28
int num_threads
Definition crispr_quality_control.hpp:33
Results of compute_crispr_qc_metrics().
Definition crispr_quality_control.hpp:135
std::vector< Index_ > max_index
Definition crispr_quality_control.hpp:157
std::vector< Value_ > max_value
Definition crispr_quality_control.hpp:152
std::vector< Detected_ > detected
Definition crispr_quality_control.hpp:146
std::vector< Sum_ > sum
Definition crispr_quality_control.hpp:140
Buffers for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:97
Value_ * max_value
Definition per_cell_qc_metrics.hpp:127
Index_ * max_index
Definition per_cell_qc_metrics.hpp:134
Sum_ * sum
Definition per_cell_qc_metrics.hpp:115
Detected_ * detected
Definition per_cell_qc_metrics.hpp:121
Options for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:25
int num_threads
Definition per_cell_qc_metrics.hpp:83