1#ifndef SCRAN_QC_CHOOSE_FILTER_THRESHOLDS_HPP
2#define SCRAN_QC_CHOOSE_FILTER_THRESHOLDS_HPP
11#include "sanisizer/sanisizer.hpp"
12#include "quickstats/quickstats.hpp"
77template<
typename Float_>
97template<
typename Float_>
98Float_ unlog_threshold(
const Float_ val,
const bool was_logged) {
100 if (std::isinf(val)) {
105 return std::exp(val);
111template<
typename Float_>
112ChooseFilterThresholdsResults<Float_> choose_filter_thresholds_internal(
113 std::size_t num_cells,
115 const ChooseFilterThresholdsOptions& options
117 static_assert(std::is_floating_point<Float_>::value);
120 I<
decltype(num_cells)> lost = 0;
121 for (I<
decltype(num_cells)> i = 0; i < num_cells; ++i) {
122 if (std::isnan(metrics[i])) {
123 std::swap(metrics[i], metrics[lost]);
132 for (I<
decltype(num_cells)> i = 0; i < num_cells; ++i) {
133 auto& val = metrics[i];
136 }
else if (val == 0) {
137 val = -std::numeric_limits<double>::infinity();
139 throw std::runtime_error(
"cannot log-transform negative values");
144 ChooseFilterThresholdsResults<Float_> output;
145 Float_& lthresh = output.
lower;
146 Float_& uthresh = output.upper;
147 lthresh = -std::numeric_limits<Float_>::infinity();
148 uthresh = std::numeric_limits<Float_>::infinity();
150 const auto median = quickstats::median<Float_>(num_cells, metrics);
151 const auto mad = quickstats::scale_mad_to_sd(quickstats::mad_with_infinities<Float_>(num_cells, metrics, median));
153 if (!std::isnan(mad)) {
154 const auto delta = std::max(
static_cast<Float_
>(options.min_diff),
static_cast<Float_
>(options.num_mads * mad));
156 const auto threshold = median - delta;
157 if (!std::isnan(threshold)) {
158 lthresh = unlog_threshold(threshold, options.log);
162 const auto threshold = median + delta;
163 if (!std::isnan(threshold)) {
164 uthresh = unlog_threshold(threshold, options.log);
193template<
typename Value_,
typename Float_>
195 const std::size_t num_cells,
196 const Value_*
const metrics,
197 Float_*
const buffer,
202 if constexpr(std::is_same<Value_, Float_>::value) {
203 if (metrics == buffer) {
207 std::copy_n(metrics, num_cells, buffer);
209 return choose_filter_thresholds_internal(num_cells, buffer, options);
219template<
typename Float_>
229 template<
typename Block_>
243 std::vector<Float_> buffer;
244 std::vector<std::size_t> block_starts;
245 std::vector<std::size_t> block_offsets;
265template<
typename Float_,
typename Block_>
268 const std::size_t num_cells,
269 const Block_*
const block,
270 const std::size_t num_blocks
272 work.block_starts.clear();
274 sanisizer::resize(work.block_starts, num_blocks);
275 for (I<
decltype(num_cells)> i = 0; i < num_cells; ++i) {
276 ++work.block_starts[block[i]];
279 std::size_t sofar = 0;
280 for (
auto& s : work.block_starts) {
281 const auto last = sofar;
286 sanisizer::resize(work.buffer, num_cells
287#ifdef SCRAN_QC_TEST_INIT
292 sanisizer::resize(work.block_offsets, num_blocks
293#ifdef SCRAN_QC_TEST_INIT
326template<
typename Value_,
typename Block_,
typename Float_>
328 const std::size_t num_cells,
329 const Value_*
const metrics,
330 const Block_*
const block,
331 const std::size_t num_blocks,
335 std::vector<ChooseFilterThresholdsResults<Float_> > output;
336 output.reserve(num_blocks);
337 process_blocks_for_choose_filter_thresholds(
343 [&](
const std::size_t len, Float_*
const ptr) ->
void {
344 output.push_back(choose_filter_thresholds_internal<Float_>(len, ptr, options));
353template<
typename Value_,
typename Block_,
typename Float_,
class Function_>
354void process_blocks_for_choose_filter_thresholds(
355 const std::size_t num_cells,
356 const Value_*
const metrics,
357 const Block_*
const block,
358 const std::size_t num_blocks,
359 ChooseFilterThresholdsBlockedWorkspace<Float_>& workspace,
362 assert(num_cells == workspace.buffer.size());
363 assert(num_blocks == workspace.block_starts.size());
365 auto& buffer = workspace.buffer;
366 const auto& starts = workspace.block_starts;
367 auto& offsets = workspace.block_offsets;
368 std::copy(starts.begin(), starts.end(), offsets.begin());
369 for (I<
decltype(num_cells)> i = 0; i < num_cells; ++i) {
370 auto& pos = offsets[block[i]];
371 buffer[pos] = metrics[i];
375 for (I<
decltype(num_blocks)> g = 0; g < num_blocks; ++g) {
376 fun(offsets[g] - starts[g], buffer.data() + starts[g]);
382template<
bool lower_,
typename Float_>
383std::vector<Float_> extract_filter_thresholds(
const std::vector<ChooseFilterThresholdsResults<Float_> >& res) {
384 std::vector<Float_> output;
385 output.reserve(res.size());
386 for (
const auto& r : res) {
387 if constexpr(lower_) {
388 output.push_back(r.lower);
390 output.push_back(r.upper);
Simple quality control for single-cell data.
Definition adt_quality_control.hpp:22
std::vector< ChooseFilterThresholdsResults< Float_ > > choose_filter_thresholds_blocked(const std::size_t num_cells, const Value_ *const metrics, const Block_ *const block, const std::size_t num_blocks, ChooseFilterThresholdsBlockedWorkspace< Float_ > &workspace, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:327
void reset_choose_filter_thresholds_blocked_workspace(ChooseFilterThresholdsBlockedWorkspace< Float_ > &work, const std::size_t num_cells, const Block_ *const block, const std::size_t num_blocks)
Definition choose_filter_thresholds.hpp:266
ChooseFilterThresholdsResults< Float_ > choose_filter_thresholds(const std::size_t num_cells, const Value_ *const metrics, Float_ *const buffer, const ChooseFilterThresholdsOptions &options)
Definition choose_filter_thresholds.hpp:194
Workspace for choose_filter_thresholds_blocked().
Definition choose_filter_thresholds.hpp:220
ChooseFilterThresholdsBlockedWorkspace()=default
ChooseFilterThresholdsBlockedWorkspace(const std::size_t num_cells, const Block_ *const block, const std::size_t num_blocks)
Definition choose_filter_thresholds.hpp:230
Options for choose_filter_thresholds().
Definition choose_filter_thresholds.hpp:26
bool upper
Definition choose_filter_thresholds.hpp:37
bool log
Definition choose_filter_thresholds.hpp:70
double num_mads
Definition choose_filter_thresholds.hpp:44
double min_diff
Definition choose_filter_thresholds.hpp:51
bool lower
Definition choose_filter_thresholds.hpp:31
Results of compute_adt_qc_metrics().
Definition choose_filter_thresholds.hpp:78
Float_ upper
Definition choose_filter_thresholds.hpp:91
Float_ lower
Definition choose_filter_thresholds.hpp:84