scran_qc
Simple quality control on single-cell data
Loading...
Searching...
No Matches
crispr_quality_control.hpp
Go to the documentation of this file.
1#ifndef SCRAN_QC_CRISPR_QUALITY_CONTROL_HPP
2#define SCRAN_QC_CRISPR_QUALITY_CONTROL_HPP
3
4#include <vector>
5#include <limits>
6#include <algorithm>
7#include <type_traits>
8
9#include "tatami/tatami.hpp"
10
11#include "find_median_mad.hpp"
14
20namespace scran_qc {
21
32
42template<typename Sum_ = double, typename Detected_ = int, typename Value_ = double, typename Index_ = int>
68
98template<typename Value_, typename Index_, typename Sum_, typename Detected_>
103{
105 tmp.sum = output.sum;
106 tmp.detected = output.detected;
107 tmp.max_value = output.max_value;
108 tmp.max_index = output.max_index;
109
111 opt.num_threads = options.num_threads;
112 per_cell_qc_metrics(mat, std::vector<uint8_t*>{}, tmp, opt);
113}
114
122template<typename Sum_ = double, typename Detected_ = int, typename Value_ = double, typename Index_ = int>
127 std::vector<Sum_> sum;
128
132 std::vector<Detected_> detected;
133
137 std::vector<Value_> max_value;
138
142 std::vector<Index_> max_index;
143};
144
160template<typename Sum_ = double, typename Detected_ = int, typename Value_ = double, typename Index_ = int>
164{
165 auto NC = mat.ncol();
168
169 output.sum.resize(NC);
170 x.sum = output.sum.data();
171
172 output.detected.resize(NC);
173 x.detected = output.detected.data();
174
175 output.max_value.resize(NC);
176 x.max_value = output.max_value.data();
177
178 output.max_index.resize(NC);
179 x.max_index = output.max_index.data();
180
182 return output;
183}
184
195
199namespace internal {
200
201template<typename Float_, class Host_, typename Sum_, typename Detected_, typename Value_, typename Index_, typename BlockSource_>
203 constexpr bool unblocked = std::is_same<BlockSource_, bool>::value;
204 auto buffer = [&]() {
205 if constexpr(unblocked) {
206 return std::vector<Float_>(n);
207 } else {
209 }
210 }();
211
212 // Subsetting to the observations in the top 50% of proportions.
213 static_assert(std::is_floating_point<Float_>::value);
214 std::vector<Float_> maxprop;
215 maxprop.reserve(n);
216 for (size_t i = 0; i < n; ++i) {
217 maxprop.push_back(static_cast<Float_>(res.max_value[i]) / static_cast<Float_>(res.sum[i]));
218 }
219
221 fopt.median_only = true;
222 auto prop_res = [&]() {
223 if constexpr(unblocked) {
224 return find_median_mad(n, maxprop.data(), buffer.data(), fopt);
225 } else {
226 return find_median_mad_blocked(n, maxprop.data(), block, &buffer, fopt);
227 }
228 }();
229
230 for (size_t i = 0; i < n; ++i) {
231 auto limit = [&]() {
232 if constexpr(unblocked){
233 return prop_res.median;
234 } else {
235 return prop_res[block[i]].median;
236 }
237 }();
238 if (maxprop[i] >= limit) {
239 maxprop[i] = res.max_value[i];
240 } else {
241 maxprop[i] = std::numeric_limits<Float_>::quiet_NaN(); // ignored during threshold calculation.
242 }
243 }
244
245 // Filtering on the max counts.
247 copt.num_mads = options.max_value_num_mads;
248 copt.log = true;
249 copt.upper = false;
250 host.get_max_value() = [&]() {
251 if constexpr(unblocked) {
252 return choose_filter_thresholds(n, maxprop.data(), buffer.data(), copt).lower;
253 } else {
254 return internal::strip_threshold<true>(choose_filter_thresholds_blocked(n, maxprop.data(), block, &buffer, copt));
255 }
256 }();
257}
258
259template<class Host_, typename Sum_, typename Detected_, typename Value_, typename Index_, typename BlockSource_, typename Output_>
261 constexpr bool unblocked = std::is_same<BlockSource_, bool>::value;
262 std::fill_n(output, n, 1);
263
264 const auto& mv = host.get_max_value();
265 for (size_t i = 0; i < n; ++i) {
266 auto thresh = [&]() {
267 if constexpr(unblocked) {
268 return mv;
269 } else {
270 return mv[block[i]];
271 }
272 }();
273 output[i] = output[i] && (metrics.max_value[i] >= thresh);
274 }
275}
276
277template<typename Sum_, typename Detected_, typename Value_, typename Index_>
280 buffer.sum = metrics.sum.data();
281 buffer.detected = metrics.detected.data();
282 buffer.max_value = metrics.max_value.data();
283 buffer.max_index = metrics.max_index.data();
284 return buffer;
285}
286
287}
298template<typename Float_ = double>
300public:
305 return my_max_value;
306 }
307
312 return my_max_value;
313 }
314
315private:
316 Float_ my_max_value = 0;
317
318public:
331 template<typename Sum_, typename Detected_, typename Value_, typename Index_, typename Output_>
333 internal::crispr_filter(*this, num, metrics, false, output);
334 }
335
347 template<typename Sum_, typename Detected_, typename Value_, typename Index_, typename Output_>
349 return filter(metrics.max_value.size(), internal::to_buffer(metrics), output);
350 }
351
362 template<typename Output_ = uint8_t, typename Sum_ = double, typename Detected_ = int, typename Value_ = double, typename Index_ = int>
364 std::vector<Output_> output(metrics.max_value.size());
365 filter(metrics, output.data());
366 return output;
367 }
368};
369
403template<typename Float_ = double, typename Sum_, typename Detected_, typename Value_, typename Index_>
413
426template<typename Float_ = double, typename Sum_ = double, typename Detected_ = int, typename Value_ = double, typename Index_ = int>
433
439template<typename Float_ = double>
441public:
446 const std::vector<Float_>& get_max_value() const {
447 return my_max_value;
448 }
449
454 std::vector<Float_>& get_max_value() {
455 return my_max_value;
456 }
457
458private:
459 std::vector<Float_> my_sum;
460 std::vector<Float_> my_max_value;
461
462public:
478 template<typename Sum_, typename Detected_, typename Value_, typename Index_, typename Block_, typename Output_>
480 internal::crispr_filter(*this, num, metrics, block, output);
481 }
482
497 template<typename Sum_, typename Detected_, typename Value_, typename Index_, typename Block_, typename Output_>
499 filter(metrics.max_value.size(), internal::to_buffer(metrics), block, output);
500 }
501
516 template<typename Output_ = uint8_t, typename Sum_ = double, typename Detected_ = int, typename Value_ = double, typename Index_ = int, typename Block_ = int>
518 std::vector<Output_> output(metrics.max_value.size());
519 filter(metrics, block, output.data());
520 return output;
521 }
522};
523
543template<typename Float_ = double, typename Sum_, typename Detected_, typename Value_, typename Index_, typename Block_>
554
569template<typename Float_ = double, typename Sum_, typename Detected_, typename Value_, typename Index_, typename Block_>
577
578}
579
580#endif
Define QC filter thresholds using a MAD-based approach.
Filter on using CRISPR-based QC metrics with blocking.
Definition crispr_quality_control.hpp:440
std::vector< Output_ > filter(const ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_ > &metrics, const Block_ *block) const
Definition crispr_quality_control.hpp:517
void filter(size_t num, const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &metrics, const Block_ *block, Output_ *output) const
Definition crispr_quality_control.hpp:479
std::vector< Float_ > & get_max_value()
Definition crispr_quality_control.hpp:454
void filter(const ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_ > &metrics, const Block_ *block, Output_ *output) const
Definition crispr_quality_control.hpp:498
const std::vector< Float_ > & get_max_value() const
Definition crispr_quality_control.hpp:446
Filter for high-quality cells using CRISPR-based metrics.
Definition crispr_quality_control.hpp:299
std::vector< Output_ > filter(const ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_ > &metrics) const
Definition crispr_quality_control.hpp:363
void filter(const ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_ > &metrics, Output_ *output) const
Definition crispr_quality_control.hpp:348
Float_ get_max_value() const
Definition crispr_quality_control.hpp:304
Float_ & get_max_value()
Definition crispr_quality_control.hpp:311
void filter(size_t num, const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &metrics, Output_ *output) const
Definition crispr_quality_control.hpp:332
Temporary data structures for find_median_mad_blocked().
Definition find_median_mad.hpp:172
Compute the median and MAD from an array of values.
Simple quality control for single-cell data.
Definition adt_quality_control.hpp:20
CrisprQcBlockedFilters< Float_ > compute_crispr_qc_filters_blocked(size_t num, const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &metrics, const Block_ *block, const ComputeCrisprQcFiltersOptions &options)
Definition crispr_quality_control.hpp:544
std::vector< ChooseFilterThresholdsResults< Float_ > > choose_filter_thresholds_blocked(const std::vector< FindMedianMadResults< Float_ > > mms, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:217
void compute_crispr_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &output, const ComputeCrisprQcMetricsOptions &options)
Definition crispr_quality_control.hpp:99
FindMedianMadResults< Float_ > find_median_mad(Index_ num, Float_ *metrics, const FindMedianMadOptions &options)
Definition find_median_mad.hpp:79
void per_cell_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const PerCellQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &output, const PerCellQcMetricsOptions &options)
Definition per_cell_qc_metrics.hpp:810
CrisprQcFilters< Float_ > compute_crispr_qc_filters(size_t num, const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &metrics, const ComputeCrisprQcFiltersOptions &options)
Definition crispr_quality_control.hpp:404
ChooseFilterThresholdsResults< Float_ > choose_filter_thresholds(const FindMedianMadResults< Float_ > &mm, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:133
std::vector< FindMedianMadResults< Output_ > > find_median_mad_blocked(Index_ num, const Value_ *metrics, const Block_ *block, FindMedianMadWorkspace< Output_, Index_ > *workspace, const FindMedianMadOptions &options)
Definition find_median_mad.hpp:261
Compute per-cell quality control metrics.
Options for choose_filter_thresholds().
Definition choose_filter_thresholds.hpp:20
Options for compute_crispr_qc_filters().
Definition crispr_quality_control.hpp:188
double max_value_num_mads
Definition crispr_quality_control.hpp:193
Buffers for compute_crispr_qc_metrics().
Definition crispr_quality_control.hpp:43
Sum_ * sum
Definition crispr_quality_control.hpp:48
Detected_ * detected
Definition crispr_quality_control.hpp:54
Index_ * max_index
Definition crispr_quality_control.hpp:66
Value_ * max_value
Definition crispr_quality_control.hpp:60
Options for compute_crispr_qc_metrics().
Definition crispr_quality_control.hpp:25
int num_threads
Definition crispr_quality_control.hpp:30
Results of compute_crispr_qc_metrics().
Definition crispr_quality_control.hpp:123
std::vector< Index_ > max_index
Definition crispr_quality_control.hpp:142
std::vector< Value_ > max_value
Definition crispr_quality_control.hpp:137
std::vector< Detected_ > detected
Definition crispr_quality_control.hpp:132
std::vector< Sum_ > sum
Definition crispr_quality_control.hpp:127
Options for find_median_mad().
Definition find_median_mad.hpp:22
Options for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:22