1#ifndef SCRAN_QC_PER_CELL_QC_METRICS_HPP
2#define SCRAN_QC_PER_CELL_QC_METRICS_HPP
10#include "tatami_stats/tatami_stats.hpp"
11#include "sanisizer/sanisizer.hpp"
79template<
typename Sum_,
typename Detected_,
typename Value_,
typename Index_>
140template<
typename Value_,
typename Index_,
typename Subset_,
typename Sum_,
typename Detected_>
141void compute_qc_direct_dense(
143 const std::vector<Subset_>& subsets,
145 const int num_threads)
147 std::vector<std::vector<Index_> > subset_indices;
149 if constexpr(std::is_pointer<Subset_>::value) {
150 const auto nsubsets = subsets.size();
151 sanisizer::resize(subset_indices, nsubsets);
152 const auto NR = mat.
nrow();
154 for (I<
decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
155 auto& current = subset_indices[s];
156 const auto& source = subsets[s];
157 for (I<
decltype(NR)> i = 0; i < NR; ++i) {
159 current.push_back(i);
167 const auto NR = mat.
nrow();
173 const auto nsubsets = subsets.size();
175 for (Index_ c = start, end = start + length; c < end; ++c) {
176 auto ptr = ext->fetch(c, vbuffer.data());
179 output.
sum[c] = std::accumulate(ptr, ptr + NR,
static_cast<Sum_
>(0));
184 for (I<
decltype(NR)> r = 0; r < NR; ++r) {
185 count += (ptr[r] != 0);
191 Index_ max_index = 0;
192 Value_ max_value = 0;
196 for (I<
decltype(NR)> r = 1; r < NR; ++r) {
197 if (max_value < ptr[r]) {
213 for (I<
decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
214 const auto& sub = [&]() ->
const auto& {
215 if constexpr(std::is_pointer<Subset_>::value) {
216 return subset_indices[s];
224 for (
const auto r : sub) {
231 Detected_ current = 0;
232 for (
const auto r : sub) {
233 current += ptr[r] != 0;
240 }, mat.
ncol(), num_threads);
243template<
typename Index_,
typename Subset_,
typename Sum_,
typename Detected_,
typename Value_>
244std::vector<std::vector<unsigned char> > boolify_subsets(
const Index_ NR,
const std::vector<Subset_>& subsets,
const PerCellQcMetricsBuffers<Sum_, Detected_, Value_, Index_>& output) {
245 std::vector<std::vector<unsigned char> > is_in_subset;
247 if (!output.subset_sum.empty() || !output.subset_detected.empty()) {
248 if constexpr(!std::is_pointer<Subset_>::value) {
249 const auto nsubsets = subsets.size();
250 for (I<
decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
251 is_in_subset.emplace_back(NR);
252 auto& last = is_in_subset.back();
253 for (
const auto i : subsets[s]) {
263template<
typename Value_,
typename Index_,
typename Subset_,
typename Sum_,
typename Detected_>
264void compute_qc_direct_sparse(
266 const std::vector<Subset_>& subsets,
267 const PerCellQcMetricsBuffers<Sum_, Detected_, Value_, Index_>& output,
268 const int num_threads)
270 const auto is_in_subset = boolify_subsets(mat.
nrow(), subsets, output);
273 const auto NR = mat.
nrow();
278 const bool do_max = output.max_index || output.max_value;
280 const auto nsubsets = subsets.size();
282 for (Index_ c = start, end = start + length; c < end; ++c) {
283 auto range = ext->fetch(vbuffer.data(), ibuffer.data());
286 output.sum[c] = std::accumulate(range.value, range.value + range.number,
static_cast<Sum_
>(0));
289 if (output.detected) {
290 Detected_ current = 0;
291 for (Index_ i = 0; i < range.number; ++i) {
292 current += (range.value[i] != 0);
294 output.detected[c] = current;
298 Index_ max_index = 0;
299 Value_ max_value = 0;
302 max_value = range.value[0];
303 max_index = range.index[0];
304 for (Index_ i = 1; i < range.number; ++i) {
305 if (max_value < range.value[i]) {
306 max_value = range.value[i];
307 max_index = range.index[i];
311 if (max_value <= 0 && range.number < NR) {
312 if (output.max_index) {
315 for (Index_ i = 0; i < range.number; ++i) {
316 if (range.index[i] > last) {
318 }
else if (range.value[i] == 0) {
321 last = range.index[i] + 1;
331 if (output.max_index) {
332 output.max_index[c] = max_index;
334 if (output.max_value) {
335 output.max_value[c] = max_value;
339 if (!output.subset_sum.empty() || !output.subset_detected.empty()) {
340 for (I<
decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
341 const auto& sub = [&]() ->
const auto& {
342 if constexpr(std::is_pointer<Subset_>::value) {
345 return is_in_subset[s];
349 if (!output.subset_sum.empty() && output.subset_sum[s]) {
351 for (Index_ i = 0; i < range.number; ++i) {
352 current += (sub[range.index[i]] != 0) * range.value[i];
354 output.subset_sum[s][c] = current;
357 if (!output.subset_detected.empty() && output.subset_detected[s]) {
358 Detected_ current = 0;
359 for (Index_ i = 0; i < range.number; ++i) {
360 current += (sub[range.index[i]] != 0) * (range.value[i] != 0);
362 output.subset_detected[s][c] = current;
367 }, mat.
ncol(), num_threads);
370template<
typename Sum_,
typename Detected_,
typename Value_,
typename Index_>
371class PerCellQcMetricsRunningBuffers {
373 PerCellQcMetricsRunningBuffers(
const PerCellQcMetricsBuffers<Sum_, Detected_, Value_, Index_>& output,
const int thread,
const Index_ start,
const Index_ len) {
375 my_sum = tatami_stats::LocalOutputBuffer<Sum_>(thread, start, len, output.sum);
378 if (output.detected) {
379 my_detected = tatami_stats::LocalOutputBuffer<Detected_>(thread, start, len, output.detected);
382 if (output.max_value) {
383 my_max_value = tatami_stats::LocalOutputBuffer<Value_>(thread, start, len, output.max_value);
384 }
else if (output.max_index) {
388 if (output.max_index) {
389 my_max_index = tatami_stats::LocalOutputBuffer<Index_>(thread, start, len, output.max_index);
393 const auto nsubsets = output.subset_sum.size();
394 sanisizer::resize(my_subset_sum, nsubsets);
395 for (I<
decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
396 if (output.subset_sum[s]) {
397 my_subset_sum[s] = tatami_stats::LocalOutputBuffer<Sum_>(thread, start, len, output.subset_sum[s]);
403 const auto nsubsets = output.subset_detected.size();
404 sanisizer::resize(my_subset_detected, nsubsets);
405 for (I<
decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
406 if (output.subset_detected[s]) {
407 my_subset_detected[s] = tatami_stats::LocalOutputBuffer<Detected_>(thread, start, len, output.subset_detected[s]);
414 tatami_stats::LocalOutputBuffer<Sum_> my_sum;
415 tatami_stats::LocalOutputBuffer<Detected_> my_detected;
417 tatami_stats::LocalOutputBuffer<Value_> my_max_value;
418 std::vector<Value_> my_holding_max_value;
419 tatami_stats::LocalOutputBuffer<Index_> my_max_index;
421 std::vector<tatami_stats::LocalOutputBuffer<Sum_> > my_subset_sum;
422 std::vector<tatami_stats::LocalOutputBuffer<Detected_> > my_subset_detected;
426 return my_sum.data();
429 Detected_* detected_data() {
430 return my_detected.data();
433 Value_* max_value_data() {
434 auto dptr = my_max_value.data();
435 return (dptr ? dptr : my_holding_max_value.data());
438 Index_* max_index_data() {
439 return my_max_index.data();
442 std::vector<Sum_*> subset_sum_data() {
443 std::vector<Sum_*> output;
444 output.reserve(my_subset_sum.size());
445 for (
auto& s : my_subset_sum) {
446 output.push_back(s.data());
451 std::vector<Detected_*> subset_detected_data() {
452 std::vector<Detected_*> output;
453 output.reserve(my_subset_detected.size());
454 for (
auto& s : my_subset_detected) {
455 output.push_back(s.data());
464 if (my_detected.data()) {
465 my_detected.transfer();
468 if (my_max_value.data()) {
469 my_max_value.transfer();
471 if (my_max_index.data()) {
472 my_max_index.transfer();
475 for (
auto& s : my_subset_sum) {
481 for (
auto& s : my_subset_detected) {
489template<
typename Value_,
typename Index_,
typename Subset_,
typename Sum_,
typename Detected_>
490void compute_qc_running_dense(
492 const std::vector<Subset_>& subsets,
493 const PerCellQcMetricsBuffers<Sum_, Detected_, Value_, Index_>& output,
494 const int num_threads)
496 const auto is_in_subset = boolify_subsets(mat.
nrow(), subsets, output);
499 const auto NR = mat.
nrow();
503 PerCellQcMetricsRunningBuffers<Sum_, Detected_, Value_, Index_> locals(output, thread, start, len);
504 const auto outt = locals.sum_data();
505 const auto outd = locals.detected_data();
506 const auto outmi = locals.max_index_data();
507 const auto outmc = locals.max_value_data();
508 const bool do_max = (outmi || outmc);
509 const auto outst = locals.subset_sum_data();
510 const auto outsd = locals.subset_detected_data();
512 const auto nsubsets = subsets.size();
514 for (Index_ r = 0; r < NR; ++r) {
515 auto ptr = ext->fetch(vbuffer.data());
518 for (Index_ i = 0; i < len; ++i) {
524 for (Index_ i = 0; i < len; ++i) {
525 outd[i] += (ptr[i] != 0);
531 std::copy_n(ptr, len, outmc);
533 std::fill_n(outmi, len, 0);
536 for (Index_ i = 0; i < len; ++i) {
537 auto& curmax = outmc[i];
538 if (curmax < ptr[i]) {
548 if (!outst.empty() || !outsd.empty()) {
549 for (I<
decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
550 const auto& sub = [&]() ->
const auto& {
551 if constexpr(std::is_pointer<Subset_>::value) {
554 return is_in_subset[s];
561 if (!outst.empty() && outst[s]) {
562 auto current = outst[s];
563 for (Index_ i = 0; i < len; ++i) {
564 current[i] += ptr[i];
568 if (!outsd.empty() && outsd[s]) {
569 auto current = outsd[s];
570 for (Index_ i = 0; i < len; ++i) {
571 current[i] += (ptr[i] != 0);
579 }, mat.
ncol(), num_threads);
582template<
typename Value_,
typename Index_,
typename Subset_,
typename Sum_,
typename Detected_>
583void compute_qc_running_sparse(
585 const std::vector<Subset_>& subsets,
586 const PerCellQcMetricsBuffers<Sum_, Detected_, Value_, Index_>& output,
587 const int num_threads)
591 const auto is_in_subset = boolify_subsets(mat.
nrow(), subsets, output);
594 const auto NR = mat.
nrow();
599 PerCellQcMetricsRunningBuffers<Sum_, Detected_, Value_, Index_> locals(output, thread, start, len);
600 const auto outt = locals.sum_data();
601 const auto outd = locals.detected_data();
602 const auto outmi = locals.max_index_data();
603 const auto outmc = locals.max_value_data();
604 const bool do_max = (outmi || outmc);
605 const auto outst = locals.subset_sum_data();
606 const auto outsd = locals.subset_detected_data();
608 const auto nsubsets = subsets.size();
610 std::vector<Index_> last_consecutive_nonzero;
615 for (Index_ r = 0; r < NR; ++r) {
616 auto range = ext->fetch(vbuffer.data(), ibuffer.data());
619 for (Index_ i = 0; i < range.number; ++i) {
620 outt[range.index[i] - start] += range.value[i];
625 for (Index_ i = 0; i < range.number; ++i) {
626 outd[range.index[i] - start] += (range.value[i] != 0);
632 std::fill_n(outmc, len, 0);
633 for (Index_ i = 0; i < range.number; ++i) {
634 auto j = range.index[i] - start;
635 outmc[j] = range.value[i];
636 last_consecutive_nonzero[j] = 1;
639 std::fill_n(outmi, len, 0);
643 for (Index_ i = 0; i < range.number; ++i) {
644 auto j = range.index[i] - start;
645 auto& curmax = outmc[j];
647 auto val = range.value[i];
658 auto& last = last_consecutive_nonzero[j];
666 if (!outst.empty() || !outsd.empty()) {
667 for (I<
decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
668 const auto& sub = [&]() ->
const auto& {
669 if constexpr(std::is_pointer<Subset_>::value) {
672 return is_in_subset[s];
679 if (!outst.empty() && outst[s]) {
680 auto current = outst[s];
681 for (Index_ i = 0; i < range.number; ++i) {
682 current[range.index[i] - start] += range.value[i];
686 if (!outsd.empty() && outsd[s]) {
687 auto current = outsd[s];
688 for (Index_ i = 0; i < range.number; ++i) {
689 current[range.index[i] - start] += (range.value[i] != 0);
697 const auto NR = mat.
nrow();
701 for (Index_ c = 0; c < len; ++c) {
702 auto last_nz = last_consecutive_nonzero[c];
707 auto& current = outmc[c];
720 }, mat.
ncol(), num_threads);
741template<
typename Sum_,
typename Detected_,
typename Value_,
typename Index_>
833template<
typename Value_,
typename Index_,
typename Subset_,
typename Sum_,
typename Detected_>
836 const std::vector<Subset_>& subsets,
842 internal::compute_qc_running_sparse(mat, subsets, output, options.
num_threads);
844 internal::compute_qc_direct_sparse(mat, subsets, output, options.
num_threads);
848 internal::compute_qc_running_dense(mat, subsets, output, options.
num_threads);
850 internal::compute_qc_direct_dense(mat, subsets, output, options.
num_threads);
876template<
typename Sum_ =
double,
typename Detected_ =
int,
typename Value_,
typename Index_,
typename Subset_>
879 const std::vector<Subset_>& subsets,
884 const auto ncells = mat.
ncol();
888#ifdef SCRAN_QC_TEST_INIT
892 buffers.
sum = output.
sum.data();
896#ifdef SCRAN_QC_TEST_INIT
904#ifdef SCRAN_QC_TEST_INIT
912#ifdef SCRAN_QC_TEST_INIT
919 const auto nsubsets = subsets.size();
922 sanisizer::resize(output.
subset_sum, nsubsets);
923 sanisizer::resize(buffers.
subset_sum, nsubsets);
924 for (I<
decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
926#ifdef SCRAN_QC_TEST_INIT
937 for (I<
decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
939#ifdef SCRAN_QC_TEST_INIT
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
Simple quality control for single-cell data.
Definition adt_quality_control.hpp:23
void per_cell_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const PerCellQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &output, const PerCellQcMetricsOptions &options)
Definition per_cell_qc_metrics.hpp:834
void resize_container_to_Index_size(Container_ &container, const Index_ x, Args_ &&... args)
int parallelize(Function_ fun, const Index_ tasks, const int workers)
Container_ create_container_of_Index_size(const Index_ x, Args_ &&... args)
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, const bool row, const Index_ iter_start, const Index_ iter_length, Args_ &&... args)
Buffers for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:80
Value_ * max_value
Definition per_cell_qc_metrics.hpp:116
Index_ * max_index
Definition per_cell_qc_metrics.hpp:110
Sum_ * sum
Definition per_cell_qc_metrics.hpp:98
Detected_ * detected
Definition per_cell_qc_metrics.hpp:104
std::vector< Detected_ * > subset_detected
Definition per_cell_qc_metrics.hpp:132
std::vector< Sum_ * > subset_sum
Definition per_cell_qc_metrics.hpp:124
Options for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:25
bool compute_sum
Definition per_cell_qc_metrics.hpp:30
bool compute_subset_sum
Definition per_cell_qc_metrics.hpp:54
bool compute_max_value
Definition per_cell_qc_metrics.hpp:42
bool compute_subset_detected
Definition per_cell_qc_metrics.hpp:60
bool compute_detected
Definition per_cell_qc_metrics.hpp:36
bool compute_max_index
Definition per_cell_qc_metrics.hpp:48
int num_threads
Definition per_cell_qc_metrics.hpp:66
Result store for QC metric calculations.
Definition per_cell_qc_metrics.hpp:742
std::vector< Index_ > max_index
Definition per_cell_qc_metrics.hpp:773
std::vector< Value_ > max_value
Definition per_cell_qc_metrics.hpp:779
std::vector< std::vector< Detected_ > > subset_detected
Definition per_cell_qc_metrics.hpp:793
std::vector< Detected_ > detected
Definition per_cell_qc_metrics.hpp:766
std::vector< Sum_ > sum
Definition per_cell_qc_metrics.hpp:760
std::vector< std::vector< Sum_ > > subset_sum
Definition per_cell_qc_metrics.hpp:786
bool sparse_ordered_index