1#ifndef SCRAN_QC_PER_CELL_QC_METRICS_HPP
2#define SCRAN_QC_PER_CELL_QC_METRICS_HPP
10#include "tatami_stats/tatami_stats.hpp"
11#include "sanisizer/sanisizer.hpp"
77template<
typename Sum_,
typename Detected_,
typename Value_,
typename Index_>
138template<
typename Value_,
typename Index_,
typename Subset_,
typename Sum_,
typename Detected_>
139void compute_qc_direct_dense(
141 const std::vector<Subset_>& subsets,
143 const int num_threads)
145 std::vector<std::vector<Index_> > subset_indices;
147 if constexpr(std::is_pointer<Subset_>::value) {
148 const auto nsubsets = subsets.size();
149 sanisizer::resize(subset_indices, nsubsets);
150 const auto NR = mat.
nrow();
152 for (
decltype(I(nsubsets)) s = 0; s < nsubsets; ++s) {
153 auto& current = subset_indices[s];
154 const auto& source = subsets[s];
155 for (
decltype(I(NR)) i = 0; i < NR; ++i) {
157 current.push_back(i);
165 const auto NR = mat.
nrow();
171 const auto nsubsets = subsets.size();
173 for (Index_ c = start, end = start + length; c < end; ++c) {
174 auto ptr = ext->fetch(c, vbuffer.data());
177 output.
sum[c] = std::accumulate(ptr, ptr + NR,
static_cast<Sum_
>(0));
182 for (
decltype(I(NR)) r = 0; r < NR; ++r) {
183 count += (ptr[r] != 0);
189 Index_ max_index = 0;
190 Value_ max_value = 0;
194 for (
decltype(I(NR)) r = 1; r < NR; ++r) {
195 if (max_value < ptr[r]) {
211 for (
decltype(I(nsubsets)) s = 0; s < nsubsets; ++s) {
212 const auto& sub = [&]() ->
const auto& {
213 if constexpr(std::is_pointer<Subset_>::value) {
214 return subset_indices[s];
222 for (
const auto r : sub) {
229 Detected_ current = 0;
230 for (
const auto r : sub) {
231 current += ptr[r] != 0;
238 }, mat.
ncol(), num_threads);
241template<
typename Index_,
typename Subset_,
typename Sum_,
typename Detected_,
typename Value_>
242std::vector<std::vector<unsigned char> > boolify_subsets(
const Index_ NR,
const std::vector<Subset_>& subsets,
const PerCellQcMetricsBuffers<Sum_, Detected_, Value_, Index_>& output) {
243 std::vector<std::vector<unsigned char> > is_in_subset;
245 if (!output.subset_sum.empty() || !output.subset_detected.empty()) {
246 if constexpr(!std::is_pointer<Subset_>::value) {
247 const auto nsubsets = subsets.size();
248 for (
decltype(I(nsubsets)) s = 0; s < nsubsets; ++s) {
249 is_in_subset.emplace_back(NR);
250 auto& last = is_in_subset.back();
251 for (
const auto i : subsets[s]) {
261template<
typename Value_,
typename Index_,
typename Subset_,
typename Sum_,
typename Detected_>
262void compute_qc_direct_sparse(
264 const std::vector<Subset_>& subsets,
265 const PerCellQcMetricsBuffers<Sum_, Detected_, Value_, Index_>& output,
266 const int num_threads)
268 const auto is_in_subset = boolify_subsets(mat.
nrow(), subsets, output);
271 const auto NR = mat.
nrow();
276 const bool do_max = output.max_index || output.max_value;
278 const auto nsubsets = subsets.size();
280 for (Index_ c = start, end = start + length; c < end; ++c) {
281 auto range = ext->fetch(vbuffer.data(), ibuffer.data());
284 output.sum[c] = std::accumulate(range.value, range.value + range.number,
static_cast<Sum_
>(0));
287 if (output.detected) {
288 Detected_ current = 0;
289 for (Index_ i = 0; i < range.number; ++i) {
290 current += (range.value[i] != 0);
292 output.detected[c] = current;
296 Index_ max_index = 0;
297 Value_ max_value = 0;
300 max_value = range.value[0];
301 max_index = range.index[0];
302 for (Index_ i = 1; i < range.number; ++i) {
303 if (max_value < range.value[i]) {
304 max_value = range.value[i];
305 max_index = range.index[i];
309 if (max_value <= 0 && range.number < NR) {
310 if (output.max_index) {
313 for (Index_ i = 0; i < range.number; ++i) {
314 if (range.index[i] > last) {
316 }
else if (range.value[i] == 0) {
319 last = range.index[i] + 1;
329 if (output.max_index) {
330 output.max_index[c] = max_index;
332 if (output.max_value) {
333 output.max_value[c] = max_value;
337 if (!output.subset_sum.empty() || !output.subset_detected.empty()) {
338 for (
decltype(I(nsubsets)) s = 0; s < nsubsets; ++s) {
339 const auto& sub = [&]() ->
const auto& {
340 if constexpr(std::is_pointer<Subset_>::value) {
343 return is_in_subset[s];
347 if (!output.subset_sum.empty() && output.subset_sum[s]) {
349 for (Index_ i = 0; i < range.number; ++i) {
350 current += (sub[range.index[i]] != 0) * range.value[i];
352 output.subset_sum[s][c] = current;
355 if (!output.subset_detected.empty() && output.subset_detected[s]) {
356 Detected_ current = 0;
357 for (Index_ i = 0; i < range.number; ++i) {
358 current += (sub[range.index[i]] != 0) * (range.value[i] != 0);
360 output.subset_detected[s][c] = current;
365 }, mat.
ncol(), num_threads);
368template<
typename Sum_,
typename Detected_,
typename Value_,
typename Index_>
369class PerCellQcMetricsRunningBuffers {
371 PerCellQcMetricsRunningBuffers(
const PerCellQcMetricsBuffers<Sum_, Detected_, Value_, Index_>& output,
const int thread,
const Index_ start,
const Index_ len) {
373 my_sum = tatami_stats::LocalOutputBuffer<Sum_>(thread, start, len, output.sum);
376 if (output.detected) {
377 my_detected = tatami_stats::LocalOutputBuffer<Detected_>(thread, start, len, output.detected);
380 if (output.max_value) {
381 my_max_value = tatami_stats::LocalOutputBuffer<Value_>(thread, start, len, output.max_value);
382 }
else if (output.max_index) {
386 if (output.max_index) {
387 my_max_index = tatami_stats::LocalOutputBuffer<Index_>(thread, start, len, output.max_index);
391 const auto nsubsets = output.subset_sum.size();
392 sanisizer::resize(my_subset_sum, nsubsets);
393 for (
decltype(I(nsubsets)) s = 0; s < nsubsets; ++s) {
394 if (output.subset_sum[s]) {
395 my_subset_sum[s] = tatami_stats::LocalOutputBuffer<Sum_>(thread, start, len, output.subset_sum[s]);
401 const auto nsubsets = output.subset_detected.size();
402 sanisizer::resize(my_subset_detected, nsubsets);
403 for (
decltype(I(nsubsets)) s = 0; s < nsubsets; ++s) {
404 if (output.subset_detected[s]) {
405 my_subset_detected[s] = tatami_stats::LocalOutputBuffer<Detected_>(thread, start, len, output.subset_detected[s]);
412 tatami_stats::LocalOutputBuffer<Sum_> my_sum;
413 tatami_stats::LocalOutputBuffer<Detected_> my_detected;
415 tatami_stats::LocalOutputBuffer<Value_> my_max_value;
416 std::vector<Value_> my_holding_max_value;
417 tatami_stats::LocalOutputBuffer<Index_> my_max_index;
419 std::vector<tatami_stats::LocalOutputBuffer<Sum_> > my_subset_sum;
420 std::vector<tatami_stats::LocalOutputBuffer<Detected_> > my_subset_detected;
424 return my_sum.data();
427 Detected_* detected_data() {
428 return my_detected.data();
431 Value_* max_value_data() {
432 auto dptr = my_max_value.data();
433 return (dptr ? dptr : my_holding_max_value.data());
436 Index_* max_index_data() {
437 return my_max_index.data();
440 std::vector<Sum_*> subset_sum_data() {
441 std::vector<Sum_*> output;
442 output.reserve(my_subset_sum.size());
443 for (
auto& s : my_subset_sum) {
444 output.push_back(s.data());
449 std::vector<Detected_*> subset_detected_data() {
450 std::vector<Detected_*> output;
451 output.reserve(my_subset_detected.size());
452 for (
auto& s : my_subset_detected) {
453 output.push_back(s.data());
462 if (my_detected.data()) {
463 my_detected.transfer();
466 if (my_max_value.data()) {
467 my_max_value.transfer();
469 if (my_max_index.data()) {
470 my_max_index.transfer();
473 for (
auto& s : my_subset_sum) {
479 for (
auto& s : my_subset_detected) {
487template<
typename Value_,
typename Index_,
typename Subset_,
typename Sum_,
typename Detected_>
488void compute_qc_running_dense(
490 const std::vector<Subset_>& subsets,
491 const PerCellQcMetricsBuffers<Sum_, Detected_, Value_, Index_>& output,
492 const int num_threads)
494 const auto is_in_subset = boolify_subsets(mat.
nrow(), subsets, output);
497 const auto NR = mat.
nrow();
501 PerCellQcMetricsRunningBuffers<Sum_, Detected_, Value_, Index_> locals(output, thread, start, len);
502 const auto outt = locals.sum_data();
503 const auto outd = locals.detected_data();
504 const auto outmi = locals.max_index_data();
505 const auto outmc = locals.max_value_data();
506 const bool do_max = (outmi || outmc);
507 const auto outst = locals.subset_sum_data();
508 const auto outsd = locals.subset_detected_data();
510 const auto nsubsets = subsets.size();
512 for (Index_ r = 0; r < NR; ++r) {
513 auto ptr = ext->fetch(vbuffer.data());
516 for (Index_ i = 0; i < len; ++i) {
522 for (Index_ i = 0; i < len; ++i) {
523 outd[i] += (ptr[i] != 0);
529 std::copy_n(ptr, len, outmc);
531 std::fill_n(outmi, len, 0);
534 for (Index_ i = 0; i < len; ++i) {
535 auto& curmax = outmc[i];
536 if (curmax < ptr[i]) {
546 if (!outst.empty() || !outsd.empty()) {
547 for (
decltype(I(nsubsets)) s = 0; s < nsubsets; ++s) {
548 const auto& sub = [&]() ->
const auto& {
549 if constexpr(std::is_pointer<Subset_>::value) {
552 return is_in_subset[s];
559 if (!outst.empty() && outst[s]) {
560 auto current = outst[s];
561 for (Index_ i = 0; i < len; ++i) {
562 current[i] += ptr[i];
566 if (!outsd.empty() && outsd[s]) {
567 auto current = outsd[s];
568 for (Index_ i = 0; i < len; ++i) {
569 current[i] += (ptr[i] != 0);
577 }, mat.
ncol(), num_threads);
580template<
typename Value_,
typename Index_,
typename Subset_,
typename Sum_,
typename Detected_>
581void compute_qc_running_sparse(
583 const std::vector<Subset_>& subsets,
584 const PerCellQcMetricsBuffers<Sum_, Detected_, Value_, Index_>& output,
585 const int num_threads)
589 const auto is_in_subset = boolify_subsets(mat.
nrow(), subsets, output);
592 const auto NR = mat.
nrow();
597 PerCellQcMetricsRunningBuffers<Sum_, Detected_, Value_, Index_> locals(output, thread, start, len);
598 const auto outt = locals.sum_data();
599 const auto outd = locals.detected_data();
600 const auto outmi = locals.max_index_data();
601 const auto outmc = locals.max_value_data();
602 const bool do_max = (outmi || outmc);
603 const auto outst = locals.subset_sum_data();
604 const auto outsd = locals.subset_detected_data();
606 const auto nsubsets = subsets.size();
608 std::vector<Index_> last_consecutive_nonzero;
613 for (Index_ r = 0; r < NR; ++r) {
614 auto range = ext->fetch(vbuffer.data(), ibuffer.data());
617 for (Index_ i = 0; i < range.number; ++i) {
618 outt[range.index[i] - start] += range.value[i];
623 for (Index_ i = 0; i < range.number; ++i) {
624 outd[range.index[i] - start] += (range.value[i] != 0);
630 std::fill_n(outmc, len, 0);
631 for (Index_ i = 0; i < range.number; ++i) {
632 auto j = range.index[i] - start;
633 outmc[j] = range.value[i];
634 last_consecutive_nonzero[j] = 1;
637 std::fill_n(outmi, len, 0);
641 for (Index_ i = 0; i < range.number; ++i) {
642 auto j = range.index[i] - start;
643 auto& curmax = outmc[j];
645 auto val = range.value[i];
656 auto& last = last_consecutive_nonzero[j];
664 if (!outst.empty() || !outsd.empty()) {
665 for (
decltype(I(nsubsets)) s = 0; s < nsubsets; ++s) {
666 const auto& sub = [&]() ->
const auto& {
667 if constexpr(std::is_pointer<Subset_>::value) {
670 return is_in_subset[s];
677 if (!outst.empty() && outst[s]) {
678 auto current = outst[s];
679 for (Index_ i = 0; i < range.number; ++i) {
680 current[range.index[i] - start] += range.value[i];
684 if (!outsd.empty() && outsd[s]) {
685 auto current = outsd[s];
686 for (Index_ i = 0; i < range.number; ++i) {
687 current[range.index[i] - start] += (range.value[i] != 0);
695 const auto NR = mat.
nrow();
699 for (Index_ c = 0; c < len; ++c) {
700 auto last_nz = last_consecutive_nonzero[c];
705 auto& current = outmc[c];
718 }, mat.
ncol(), num_threads);
737template<
typename Sum_,
typename Detected_,
typename Value_,
typename Index_>
827template<
typename Value_,
typename Index_,
typename Subset_,
typename Sum_,
typename Detected_>
830 const std::vector<Subset_>& subsets,
836 internal::compute_qc_running_sparse(mat, subsets, output, options.
num_threads);
838 internal::compute_qc_direct_sparse(mat, subsets, output, options.
num_threads);
842 internal::compute_qc_running_dense(mat, subsets, output, options.
num_threads);
844 internal::compute_qc_direct_dense(mat, subsets, output, options.
num_threads);
868template<
typename Sum_ =
double,
typename Detected_ =
int,
typename Value_,
typename Index_,
typename Subset_>
871 const std::vector<Subset_>& subsets,
876 const auto ncells = mat.
ncol();
880#ifdef SCRAN_QC_TEST_INIT
884 buffers.
sum = output.
sum.data();
888#ifdef SCRAN_QC_TEST_INIT
896#ifdef SCRAN_QC_TEST_INIT
904#ifdef SCRAN_QC_TEST_INIT
911 const auto nsubsets = subsets.size();
914 sanisizer::resize(output.
subset_sum, nsubsets);
915 sanisizer::resize(buffers.
subset_sum, nsubsets);
916 for (
decltype(I(nsubsets)) s = 0; s < nsubsets; ++s) {
918#ifdef SCRAN_QC_TEST_INIT
929 for (
decltype(I(nsubsets)) s = 0; s < nsubsets; ++s) {
931#ifdef SCRAN_QC_TEST_INIT
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
Simple quality control for single-cell data.
Definition adt_quality_control.hpp:23
void per_cell_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const PerCellQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &output, const PerCellQcMetricsOptions &options)
Definition per_cell_qc_metrics.hpp:828
void parallelize(Function_ fun, Index_ tasks, int threads)
void resize_container_to_Index_size(Container_ &container, Index_ x, Args_ &&... args)
Container_ create_container_of_Index_size(Index_ x, Args_ &&... args)
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, bool row, Index_ iter_start, Index_ iter_length, Args_ &&... args)
Buffers for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:78
Value_ * max_value
Definition per_cell_qc_metrics.hpp:114
Index_ * max_index
Definition per_cell_qc_metrics.hpp:108
Sum_ * sum
Definition per_cell_qc_metrics.hpp:96
Detected_ * detected
Definition per_cell_qc_metrics.hpp:102
std::vector< Detected_ * > subset_detected
Definition per_cell_qc_metrics.hpp:130
std::vector< Sum_ * > subset_sum
Definition per_cell_qc_metrics.hpp:122
Options for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:25
bool compute_sum
Definition per_cell_qc_metrics.hpp:30
bool compute_subset_sum
Definition per_cell_qc_metrics.hpp:54
bool compute_max_value
Definition per_cell_qc_metrics.hpp:42
bool compute_subset_detected
Definition per_cell_qc_metrics.hpp:60
bool compute_detected
Definition per_cell_qc_metrics.hpp:36
bool compute_max_index
Definition per_cell_qc_metrics.hpp:48
int num_threads
Definition per_cell_qc_metrics.hpp:66
Result store for QC metric calculations.
Definition per_cell_qc_metrics.hpp:738
std::vector< Index_ > max_index
Definition per_cell_qc_metrics.hpp:769
std::vector< Value_ > max_value
Definition per_cell_qc_metrics.hpp:775
std::vector< std::vector< Detected_ > > subset_detected
Definition per_cell_qc_metrics.hpp:789
std::vector< Detected_ > detected
Definition per_cell_qc_metrics.hpp:762
std::vector< Sum_ > sum
Definition per_cell_qc_metrics.hpp:756
std::vector< std::vector< Sum_ > > subset_sum
Definition per_cell_qc_metrics.hpp:782
bool sparse_ordered_index