1#ifndef SCRAN_QC_FIND_MEDIAN_MAD_H
2#define SCRAN_QC_FIND_MEDIAN_MAD_H
10#include "tatami_stats/tatami_stats.hpp"
11#include "sanisizer/sanisizer.hpp"
44template<
typename Float_>
80template<
typename Float_>
82 static_assert(std::is_floating_point<Float_>::value);
85 decltype(I(num)) lost = 0;
86 for (
decltype(I(num)) i = 0; i < num; ++i) {
87 if (std::isnan(metrics[i])) {
88 std::swap(metrics[i], metrics[lost]);
96 for (
decltype(I(num)) i = 0; i < num; ++i) {
97 auto& val = metrics[i];
100 }
else if (val == 0) {
101 val = -std::numeric_limits<double>::infinity();
103 throw std::runtime_error(
"cannot log-transform negative values");
109 const auto median = tatami_stats::medians::direct<Float_>(metrics, num,
false);
114 }
else if (std::isinf(median)) {
127 for (
decltype(I(num)) i = 0; i < num; ++i) {
128 metrics[i] = std::abs(metrics[i] - median);
130 auto mad = tatami_stats::medians::direct<Float_>(metrics, num,
false);
152template<
typename Float_ =
double,
typename Value_>
154 std::vector<Float_> xbuffer;
155 if (buffer == NULL) {
156 sanisizer::resize(xbuffer, num
157#ifdef SCRAN_QC_TEST_INIT
161 buffer = xbuffer.data();
163 std::copy_n(metrics, num, buffer);
174template<
typename Float_>
184 template<
typename Block_>
186 my_buffer(sanisizer::cast<decltype(I(my_buffer.size()))>(num))
203 template<
typename Block_>
204 void set(
const std::size_t num,
const Block_*
const block) {
205 my_block_starts.clear();
208 for (
decltype(I(num)) i = 0; i < num; ++i) {
209 const auto candidate = block[i];
210 if (sanisizer::is_greater_than_or_equal(candidate, my_block_starts.size())) {
211 my_block_starts.resize(sanisizer::sum<
decltype(I(my_block_starts.size()))>(candidate, 1));
213 ++my_block_starts[candidate];
216 std::size_t sofar = 0;
217 for (
auto& s : my_block_starts) {
218 const auto last = sofar;
224 sanisizer::resize(my_buffer, num
225#ifdef SCRAN_QC_TEST_INIT
229 sanisizer::resize(my_block_ends, num
230#ifdef SCRAN_QC_TEST_INIT
242 std::vector<Float_> my_buffer;
243 std::vector<std::size_t> my_block_starts;
244 std::vector<std::size_t> my_block_ends;
272template<
typename Output_ =
double,
typename Value_,
typename Block_>
274 const std::size_t num,
275 const Value_*
const metrics,
276 const Block_*
const block,
280 std::unique_ptr<FindMedianMadWorkspace<Output_> > xworkspace;
281 if (workspace == NULL) {
282 xworkspace = std::make_unique<FindMedianMadWorkspace<Output_> >(num, block);
283 workspace = xworkspace.get();
286 std::vector<FindMedianMadResults<Output_> > output;
288 auto& buffer = workspace->my_buffer;
290 std::copy_n(metrics, num, buffer.begin());
295 const auto& starts = workspace->my_block_starts;
296 auto& ends = workspace->my_block_ends;
297 std::copy(starts.begin(), starts.end(), ends.begin());
298 for (
decltype(I(num)) i = 0; i < num; ++i) {
299 auto& pos = ends[block[i]];
300 buffer[pos] = metrics[i];
305 const auto nblocks = starts.size();
306 output.reserve(nblocks);
307 for (
decltype(I(nblocks)) g = 0; g < nblocks; ++g) {
308 output.push_back(
find_median_mad(ends[g] - starts[g], buffer.data() + starts[g], options));
Simple quality control for single-cell data.
Definition adt_quality_control.hpp:23
FindMedianMadResults< Float_ > find_median_mad(std::size_t num, Float_ *metrics, const FindMedianMadOptions &options)
Definition find_median_mad.hpp:81
std::vector< FindMedianMadResults< Output_ > > find_median_mad_blocked(const std::size_t num, const Value_ *const metrics, const Block_ *const block, FindMedianMadWorkspace< Output_ > *workspace, const FindMedianMadOptions &options)
Definition find_median_mad.hpp:273