scran_qc
Simple quality control on single-cell data
Loading...
Searching...
No Matches
crispr_quality_control.hpp
Go to the documentation of this file.
1#ifndef SCRAN_QC_CRISPR_QUALITY_CONTROL_HPP
2#define SCRAN_QC_CRISPR_QUALITY_CONTROL_HPP
3
4#include <vector>
5#include <limits>
6#include <algorithm>
7#include <type_traits>
8
9#include "tatami/tatami.hpp"
10
11#include "find_median_mad.hpp"
14
20namespace scran_qc {
21
32
42template<typename Sum_ = double, typename Detected_ = int, typename Value_ = double, typename Index_ = int>
68
98template<typename Value_, typename Index_, typename Sum_, typename Detected_>
103{
105 tmp.sum = output.sum;
106 tmp.detected = output.detected;
107 tmp.max_value = output.max_value;
108 tmp.max_index = output.max_index;
109
111 opt.num_threads = options.num_threads;
112 per_cell_qc_metrics(mat, std::vector<uint8_t*>{}, tmp, opt);
113}
114
122template<typename Sum_ = double, typename Detected_ = int, typename Value_ = double, typename Index_ = int>
127 std::vector<Sum_> sum;
128
132 std::vector<Detected_> detected;
133
137 std::vector<Value_> max_value;
138
142 std::vector<Index_> max_index;
143};
144
160template<typename Sum_ = double, typename Detected_ = int, typename Value_ = double, typename Index_ = int>
164{
165 auto NC = mat.ncol();
168
169 output.sum.resize(NC
172#endif
173 );
174 x.sum = output.sum.data();
175
176 output.detected.resize(NC
179#endif
180 );
181 x.detected = output.detected.data();
182
183 output.max_value.resize(NC
186#endif
187 );
188 x.max_value = output.max_value.data();
189
190 output.max_index.resize(NC
193#endif
194 );
195 x.max_index = output.max_index.data();
196
198 return output;
199}
200
211
215namespace internal {
216
217template<typename Float_, class Host_, typename Sum_, typename Detected_, typename Value_, typename Index_, typename BlockSource_>
219 constexpr bool unblocked = std::is_same<BlockSource_, bool>::value;
220 auto buffer = [&]() {
221 if constexpr(unblocked) {
222 return std::vector<Float_>(n);
223 } else {
225 }
226 }();
227
228 // Subsetting to the observations in the top 50% of proportions.
229 static_assert(std::is_floating_point<Float_>::value);
230 std::vector<Float_> maxprop;
231 maxprop.reserve(n);
232 for (size_t i = 0; i < n; ++i) {
233 maxprop.push_back(static_cast<Float_>(res.max_value[i]) / static_cast<Float_>(res.sum[i]));
234 }
235
237 fopt.median_only = true;
238 auto prop_res = [&]() {
239 if constexpr(unblocked) {
240 return find_median_mad(n, maxprop.data(), buffer.data(), fopt);
241 } else {
242 return find_median_mad_blocked(n, maxprop.data(), block, &buffer, fopt);
243 }
244 }();
245
246 for (size_t i = 0; i < n; ++i) {
247 auto limit = [&]() {
248 if constexpr(unblocked){
249 return prop_res.median;
250 } else {
251 return prop_res[block[i]].median;
252 }
253 }();
254 if (maxprop[i] >= limit) {
255 maxprop[i] = res.max_value[i];
256 } else {
257 maxprop[i] = std::numeric_limits<Float_>::quiet_NaN(); // ignored during threshold calculation.
258 }
259 }
260
261 // Filtering on the max counts.
263 copt.num_mads = options.max_value_num_mads;
264 copt.log = true;
265 copt.upper = false;
266 host.get_max_value() = [&]() {
267 if constexpr(unblocked) {
268 return choose_filter_thresholds(n, maxprop.data(), buffer.data(), copt).lower;
269 } else {
270 return internal::strip_threshold<true>(choose_filter_thresholds_blocked(n, maxprop.data(), block, &buffer, copt));
271 }
272 }();
273}
274
275template<class Host_, typename Sum_, typename Detected_, typename Value_, typename Index_, typename BlockSource_, typename Output_>
277 constexpr bool unblocked = std::is_same<BlockSource_, bool>::value;
278 std::fill_n(output, n, 1);
279
280 const auto& mv = host.get_max_value();
281 for (size_t i = 0; i < n; ++i) {
282 auto thresh = [&]() {
283 if constexpr(unblocked) {
284 return mv;
285 } else {
286 return mv[block[i]];
287 }
288 }();
289 output[i] = output[i] && (metrics.max_value[i] >= thresh);
290 }
291}
292
293template<typename Sum_, typename Detected_, typename Value_, typename Index_>
296 buffer.sum = metrics.sum.data();
297 buffer.detected = metrics.detected.data();
298 buffer.max_value = metrics.max_value.data();
299 buffer.max_index = metrics.max_index.data();
300 return buffer;
301}
302
303}
314template<typename Float_ = double>
316public:
321 return my_max_value;
322 }
323
328 return my_max_value;
329 }
330
331private:
332 Float_ my_max_value = 0;
333
334public:
347 template<typename Sum_, typename Detected_, typename Value_, typename Index_, typename Output_>
349 internal::crispr_filter(*this, num, metrics, false, output);
350 }
351
363 template<typename Sum_, typename Detected_, typename Value_, typename Index_, typename Output_>
365 return filter(metrics.max_value.size(), internal::to_buffer(metrics), output);
366 }
367
378 template<typename Output_ = uint8_t, typename Sum_ = double, typename Detected_ = int, typename Value_ = double, typename Index_ = int>
380 std::vector<Output_> output(metrics.max_value.size()
383#endif
384 );
385 filter(metrics, output.data());
386 return output;
387 }
388};
389
423template<typename Float_ = double, typename Sum_, typename Detected_, typename Value_, typename Index_>
433
446template<typename Float_ = double, typename Sum_ = double, typename Detected_ = int, typename Value_ = double, typename Index_ = int>
453
459template<typename Float_ = double>
461public:
466 const std::vector<Float_>& get_max_value() const {
467 return my_max_value;
468 }
469
474 std::vector<Float_>& get_max_value() {
475 return my_max_value;
476 }
477
478private:
479 std::vector<Float_> my_sum;
480 std::vector<Float_> my_max_value;
481
482public:
498 template<typename Sum_, typename Detected_, typename Value_, typename Index_, typename Block_, typename Output_>
500 internal::crispr_filter(*this, num, metrics, block, output);
501 }
502
517 template<typename Sum_, typename Detected_, typename Value_, typename Index_, typename Block_, typename Output_>
519 filter(metrics.max_value.size(), internal::to_buffer(metrics), block, output);
520 }
521
536 template<typename Output_ = uint8_t, typename Sum_ = double, typename Detected_ = int, typename Value_ = double, typename Index_ = int, typename Block_ = int>
538 std::vector<Output_> output(metrics.max_value.size()
541#endif
542 );
543 filter(metrics, block, output.data());
544 return output;
545 }
546};
547
567template<typename Float_ = double, typename Sum_, typename Detected_, typename Value_, typename Index_, typename Block_>
578
593template<typename Float_ = double, typename Sum_, typename Detected_, typename Value_, typename Index_, typename Block_>
601
602}
603
604#endif
Define QC filter thresholds using a MAD-based approach.
Filter on using CRISPR-based QC metrics with blocking.
Definition crispr_quality_control.hpp:460
std::vector< Output_ > filter(const ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_ > &metrics, const Block_ *block) const
Definition crispr_quality_control.hpp:537
void filter(size_t num, const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &metrics, const Block_ *block, Output_ *output) const
Definition crispr_quality_control.hpp:499
std::vector< Float_ > & get_max_value()
Definition crispr_quality_control.hpp:474
void filter(const ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_ > &metrics, const Block_ *block, Output_ *output) const
Definition crispr_quality_control.hpp:518
const std::vector< Float_ > & get_max_value() const
Definition crispr_quality_control.hpp:466
Filter for high-quality cells using CRISPR-based metrics.
Definition crispr_quality_control.hpp:315
std::vector< Output_ > filter(const ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_ > &metrics) const
Definition crispr_quality_control.hpp:379
void filter(const ComputeCrisprQcMetricsResults< Sum_, Detected_, Value_, Index_ > &metrics, Output_ *output) const
Definition crispr_quality_control.hpp:364
Float_ get_max_value() const
Definition crispr_quality_control.hpp:320
Float_ & get_max_value()
Definition crispr_quality_control.hpp:327
void filter(size_t num, const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &metrics, Output_ *output) const
Definition crispr_quality_control.hpp:348
Temporary data structures for find_median_mad_blocked().
Definition find_median_mad.hpp:176
Compute the median and MAD from an array of values.
Simple quality control for single-cell data.
Definition adt_quality_control.hpp:20
CrisprQcBlockedFilters< Float_ > compute_crispr_qc_filters_blocked(size_t num, const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &metrics, const Block_ *block, const ComputeCrisprQcFiltersOptions &options)
Definition crispr_quality_control.hpp:568
std::vector< ChooseFilterThresholdsResults< Float_ > > choose_filter_thresholds_blocked(const std::vector< FindMedianMadResults< Float_ > > mms, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:217
void compute_crispr_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &output, const ComputeCrisprQcMetricsOptions &options)
Definition crispr_quality_control.hpp:99
FindMedianMadResults< Float_ > find_median_mad(Index_ num, Float_ *metrics, const FindMedianMadOptions &options)
Definition find_median_mad.hpp:79
void per_cell_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const PerCellQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &output, const PerCellQcMetricsOptions &options)
Definition per_cell_qc_metrics.hpp:810
CrisprQcFilters< Float_ > compute_crispr_qc_filters(size_t num, const ComputeCrisprQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &metrics, const ComputeCrisprQcFiltersOptions &options)
Definition crispr_quality_control.hpp:424
ChooseFilterThresholdsResults< Float_ > choose_filter_thresholds(const FindMedianMadResults< Float_ > &mm, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:133
std::vector< FindMedianMadResults< Output_ > > find_median_mad_blocked(Index_ num, const Value_ *metrics, const Block_ *block, FindMedianMadWorkspace< Output_, Index_ > *workspace, const FindMedianMadOptions &options)
Definition find_median_mad.hpp:269
Compute per-cell quality control metrics.
Options for choose_filter_thresholds().
Definition choose_filter_thresholds.hpp:20
Options for compute_crispr_qc_filters().
Definition crispr_quality_control.hpp:204
double max_value_num_mads
Definition crispr_quality_control.hpp:209
Buffers for compute_crispr_qc_metrics().
Definition crispr_quality_control.hpp:43
Sum_ * sum
Definition crispr_quality_control.hpp:48
Detected_ * detected
Definition crispr_quality_control.hpp:54
Index_ * max_index
Definition crispr_quality_control.hpp:66
Value_ * max_value
Definition crispr_quality_control.hpp:60
Options for compute_crispr_qc_metrics().
Definition crispr_quality_control.hpp:25
int num_threads
Definition crispr_quality_control.hpp:30
Results of compute_crispr_qc_metrics().
Definition crispr_quality_control.hpp:123
std::vector< Index_ > max_index
Definition crispr_quality_control.hpp:142
std::vector< Value_ > max_value
Definition crispr_quality_control.hpp:137
std::vector< Detected_ > detected
Definition crispr_quality_control.hpp:132
std::vector< Sum_ > sum
Definition crispr_quality_control.hpp:127
Options for find_median_mad().
Definition find_median_mad.hpp:22
Options for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:22