1#ifndef SCRAN_QC_PER_CELL_QC_METRICS_HPP
2#define SCRAN_QC_PER_CELL_QC_METRICS_HPP
11#include "sanisizer/sanisizer.hpp"
96template<
typename Sum_,
typename Detected_,
typename Value_,
typename Index_>
156template<
typename Value_,
typename Index_,
typename Subset_,
typename Sum_,
typename Detected_>
157void per_cell_qc_metrics_direct_dense(
159 const std::vector<Subset_>& subsets,
163 const auto NR = mat.
nrow();
165 const auto nsubsets = subsets.size();
168 std::optional<std::vector<std::vector<Index_> > > subset_indices;
169 if (report_subsets) {
171 if constexpr(!std::is_pointer<Subset_>::value) {
177 subset_indices.emplace(sanisizer::cast<I<
decltype(subset_indices->size())> >(nsubsets));
178 for (I<
decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
179 auto& current = (*subset_indices)[s];
180 const auto& source = subsets[s];
181 for (I<
decltype(NR)> i = 0; i < NR; ++i) {
183 current.push_back(i);
194 for (Index_ c = start, end = start + length; c < end; ++c) {
195 auto ptr = ext->fetch(c, vbuffer.data());
198 output.
sum[c] = std::accumulate(ptr, ptr + NR,
static_cast<Sum_
>(0));
203 for (I<
decltype(NR)> r = 0; r < NR; ++r) {
204 count += (ptr[r] != 0);
211 const auto it = std::max_element(ptr, ptr + NR);
228 if (report_subsets) {
229 for (I<
decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
230 if constexpr(!std::is_pointer<Subset_>::value) {
234 const auto& sub = subsets[s];
237 for (
const auto r : sub) {
243 Detected_ current = 0;
244 for (
const auto r : sub) {
245 current += ptr[r] != 0;
253 const auto& sub = (*subset_indices)[s];
256 for (
const auto r : sub) {
262 Detected_ current = 0;
263 for (
const auto r : sub) {
264 current += ptr[r] != 0;
274template<
typename Index_,
typename Subset_>
275std::vector<std::vector<unsigned char> > boolify_subsets(
const Index_ NR,
const std::vector<Subset_>& subsets) {
276 const auto nsubsets = subsets.size();
277 auto output = sanisizer::create<std::vector<std::vector<unsigned char> > >(nsubsets);
278 for (I<
decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
279 auto& current = output[s];
281 for (
const auto i : subsets[s]) {
288template<
typename Value_,
typename Index_,
typename Subset_,
typename Sum_,
typename Detected_>
289void per_cell_qc_metrics_direct_sparse(
291 const std::vector<Subset_>& subsets,
292 const PerCellQcMetricsBuffers<Sum_, Detected_, Value_, Index_>& output,
293 const PerCellQcMetricsOptions& options
295 const auto NR = mat.
nrow();
296 const bool report_max = output.max_value || output.max_index;
297 const auto nsubsets = subsets.size();
298 const bool report_subsets = output.subset_sum.size() || output.subset_detected.size();
300 std::optional<std::vector<std::vector<unsigned char> > > is_in_subset;
301 if (report_subsets) {
302 if constexpr(!std::is_pointer<Subset_>::value) {
303 if (options.subset_containers_have_indices) {
304 is_in_subset = boolify_subsets(NR, subsets);
314 for (Index_ c = start, end = start + length; c < end; ++c) {
315 auto range = ext->fetch(vbuffer.data(), ibuffer.data());
318 output.sum[c] = std::accumulate(range.value, range.value + range.number,
static_cast<Sum_
>(0));
321 if (output.detected) {
322 Detected_ current = 0;
323 for (Index_ i = 0; i < range.number; ++i) {
324 current += (range.value[i] != 0);
326 output.detected[c] = current;
331 const auto it = std::max_element(range.value, range.value + range.number);
332 if (*it > 0 || range.number == NR) {
333 if (output.max_value) {
334 output.max_value[c] = *it;
336 if (output.max_index) {
337 output.max_index[c] = range.index[it - range.value];
340 if (output.max_value) {
341 output.max_value[c] = 0;
343 if (output.max_index) {
346 output.max_index[c] = range.number;
347 for (Index_ i = 0; i < range.number; ++i) {
348 if (range.index[i] != i) {
349 output.max_index[c] = i;
355 const Index_ candidate = it - range.value;
356 output.max_index[c] = range.index[candidate];
357 for (Index_ i = 0; i <= candidate; ++i) {
358 if (range.index[i] != i) {
359 output.max_index[c] = i;
367 if (output.max_value) {
368 output.max_value[c] = 0;
370 if (output.max_index) {
371 output.max_index[c] = 0;
376 if (report_subsets) {
377 for (I<
decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
378 if constexpr(!std::is_pointer<Subset_>::value) {
379 if (options.subset_containers_have_indices) {
380 const auto& sub = (*is_in_subset)[s];
381 if (!output.subset_sum.empty() && output.subset_sum[s]) {
383 for (Index_ i = 0; i < range.number; ++i) {
384 current += (sub[range.index[i]] != 0) * range.value[i];
386 output.subset_sum[s][c] = current;
388 if (!output.subset_detected.empty() && output.subset_detected[s]) {
389 Detected_ current = 0;
390 for (Index_ i = 0; i < range.number; ++i) {
391 current += (sub[range.index[i]] != 0) * (range.value[i] != 0);
393 output.subset_detected[s][c] = current;
399 const auto& sub = subsets[s];
400 if (!output.subset_sum.empty() && output.subset_sum[s]) {
402 for (Index_ i = 0; i < range.number; ++i) {
403 current += (sub[range.index[i]] != 0) * range.value[i];
405 output.subset_sum[s][c] = current;
407 if (!output.subset_detected.empty() && output.subset_detected[s]) {
408 Detected_ current = 0;
409 for (Index_ i = 0; i < range.number; ++i) {
410 current += (sub[range.index[i]] != 0) * (range.value[i] != 0);
412 output.subset_detected[s][c] = current;
417 }, mat.
ncol(), options.num_threads);
420template<
typename Value_,
typename Index_,
typename Subset_,
typename Sum_,
typename Detected_>
421void per_cell_qc_metrics_running(
423 const std::vector<Subset_>& subsets,
424 const PerCellQcMetricsBuffers<Sum_, Detected_, Value_, Index_>& output,
425 const PerCellQcMetricsOptions& options
427 const auto NR = mat.
nrow();
428 const auto NC = mat.
ncol();
435 const bool report_max = output.max_value || output.max_index;
436 std::optional<std::vector<Value_> > tmp_max_value;
437 Value_* max_value_output_ptr;
439 if (!output.max_value) {
441 max_value_output_ptr = tmp_max_value->data();
443 max_value_output_ptr = output.max_value;
447 const bool do_parallel = options.num_threads > 1;
448 std::optional<std::vector<std::optional<std::vector<Sum_> > > > partial_sum;
449 std::optional<std::vector<std::optional<std::vector<Detected_> > > > partial_detected;
450 std::optional<std::vector<std::optional<std::vector<Value_> > > > partial_max_value;
451 std::optional<std::vector<std::optional<std::vector<Index_> > > > partial_max_index;
452 std::optional<std::vector<std::optional<std::vector<std::vector<Sum_> > > > > partial_subset_sum;
453 std::optional<std::vector<std::optional<std::vector<std::vector<Detected_> > > > > partial_subset_detected;
458 if (output.detected) {
463 if (output.max_index) {
467 if (output.subset_sum.size()) {
470 if (output.subset_detected.size()) {
477 std::fill_n(output.sum, NC, 0);
479 if (output.detected) {
480 std::fill_n(output.detected, NC, 0);
482 if (report_max && NR == 0) {
483 std::fill_n(output.max_value, NC, 0);
484 if (output.max_index) {
485 std::fill_n(output.max_index, NC, 0);
488 for (
const auto sptr : output.subset_sum) {
490 std::fill_n(sptr, NC, 0);
493 for (
const auto sptr : output.subset_detected) {
495 std::fill_n(sptr, NC, 0);
499 const auto nsubsets = subsets.size();
500 const bool report_subsets = output.subset_sum.size() || output.subset_detected.size();
501 std::optional<std::vector<std::vector<unsigned char> > > is_in_subset;
502 if (report_subsets) {
503 if constexpr(!std::is_pointer<Subset_>::value) {
504 if (options.subset_containers_have_indices) {
505 is_in_subset = boolify_subsets(NR, subsets);
510 const auto num_used =
tatami::parallelize([&](
int thread, Index_ start, Index_ len) ->
void {
515 Sum_* sum_ptr = NULL;
516 std::optional<std::vector<Sum_> > sum_buffer;
517 Detected_* detected_ptr = NULL;
518 std::optional<std::vector<Detected_> > detected_buffer;
519 Value_* max_value_ptr = NULL;
520 std::optional<std::vector<Value_> > max_value_buffer;
521 Index_* max_index_ptr = NULL;
522 std::optional<std::vector<Index_> > max_index_buffer;
524 Sum_*
const * subset_sum_ptr = NULL;
525 std::optional<std::vector<Sum_*> > subset_sum_ptrs;
526 std::optional<std::vector<std::vector<Sum_> > > subset_sum_buffers;
527 Detected_*
const * subset_detected_ptr = NULL;
528 std::optional<std::vector<Detected_*> > subset_detected_ptrs;
529 std::optional<std::vector<std::vector<Detected_> > > subset_detected_buffers;
531 if (!do_parallel || thread == 0) {
532 sum_ptr = output.sum;
533 detected_ptr = output.detected;
534 max_value_ptr = max_value_output_ptr;
535 max_index_ptr = output.max_index;
536 if (output.subset_sum.size()) {
537 subset_sum_ptr = output.subset_sum.data();
539 if (output.subset_detected.size()) {
540 subset_detected_ptr = output.subset_detected.data();
546 sum_ptr = sum_buffer->data();
548 if (output.detected) {
550 detected_ptr = detected_buffer->data();
554 max_value_ptr = max_value_buffer->data();
555 if (output.max_index) {
557 max_index_ptr = max_index_buffer->data();
560 if (output.subset_sum.size()) {
563 for (I<
decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
564 if (output.subset_sum[s]) {
566 (*subset_sum_ptrs)[s] = (*subset_sum_buffers)[s].data();
568 (*subset_sum_ptrs)[s] = NULL;
573 if (output.subset_detected.size()) {
576 for (I<
decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
577 if (output.subset_detected[s]) {
579 (*subset_detected_ptrs)[s] = (*subset_detected_buffers)[s].data();
581 (*subset_detected_ptrs)[s] = NULL;
599 std::optional<std::vector<Index_> > nonzeros_at_start;
604 for (Index_ r = 0; r < len; ++r) {
605 auto range = ext->fetch(vbuffer.data(), ibuffer.data());
607 for (Index_ i = 0; i < range.number; ++i) {
608 sum_ptr[range.index[i]] += range.value[i];
612 for (Index_ i = 0; i < range.number; ++i) {
613 detected_ptr[range.index[i]] += (range.value[i] != 0);
619 std::fill_n(max_value_ptr, NC, 0);
620 for (Index_ i = 0; i < range.number; ++i) {
621 const auto j = range.index[i];
622 max_value_ptr[j] = range.value[i];
623 (*nonzeros_at_start)[j] = 1;
626 std::fill_n(max_index_ptr, NC, start);
630 for (Index_ i = 0; i < range.number; ++i) {
631 const auto val = range.value[i];
632 const auto j = range.index[i];
633 auto& curmax = max_value_ptr[j];
637 max_index_ptr[j] = start + r;
640 auto& last = (*nonzeros_at_start)[j];
649 if (report_subsets) {
650 for (I<
decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
651 const auto in_subset = [&]() ->
bool {
652 if constexpr(!std::is_pointer<Subset_>::value) {
653 if (options.subset_containers_have_indices) {
654 return (*is_in_subset)[s][r];
657 return subsets[s][r];
663 if (subset_sum_ptr && subset_sum_ptr[s]) {
664 const auto current = subset_sum_ptr[s];
665 for (Index_ i = 0; i < range.number; ++i) {
666 current[range.index[i]] += range.value[i];
670 if (subset_detected_ptr && subset_detected_ptr[s]) {
671 const auto current = subset_detected_ptr[s];
672 for (Index_ i = 0; i < range.number; ++i) {
673 current[range.index[i]] += (range.value[i] != 0);
682 for (Index_ c = 0; c < NC; ++c) {
683 const auto last_nz = (*nonzeros_at_start)[c];
684 if (last_nz == len) {
687 auto& current = max_value_ptr[c];
695 max_index_ptr[c] = start + last_nz;
701 const Index_ first_structural_zero = start + last_nz;
702 if (first_structural_zero < max_index_ptr[c]) {
703 max_index_ptr[c] = first_structural_zero;
717 for (Index_ r = 0; r < len; ++r) {
718 auto ptr = ext->fetch(vbuffer.data());
721 for (Index_ i = 0; i < NC; ++i) {
722 sum_ptr[i] += ptr[i];
727 for (Index_ i = 0; i < NC; ++i) {
728 detected_ptr[i] += (ptr[i] != 0);
734 std::copy_n(ptr, NC, max_value_ptr);
736 std::fill_n(max_index_ptr, NC, start);
739 for (Index_ i = 0; i < NC; ++i) {
740 auto& curmax = max_value_ptr[i];
741 if (curmax < ptr[i]) {
744 max_index_ptr[i] = start + r;
751 if (report_subsets) {
752 for (I<
decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
753 const auto in_subset = [&]() ->
bool {
754 if constexpr(!std::is_pointer<Subset_>::value) {
755 if (options.subset_containers_have_indices) {
756 return (*is_in_subset)[s][r];
759 return subsets[s][r];
765 if (subset_sum_ptr && subset_sum_ptr[s]) {
766 const auto current = subset_sum_ptr[s];
767 for (Index_ i = 0; i < NC; ++i) {
768 current[i] += ptr[i];
772 if (subset_detected_ptr && subset_detected_ptr[s]) {
773 const auto current = subset_detected_ptr[s];
774 for (Index_ i = 0; i < NC; ++i) {
775 current[i] += (ptr[i] != 0);
790 (*partial_sum)[thread - 1] = std::move(sum_buffer);
792 if (output.detected) {
793 (*partial_detected)[thread - 1] = std::move(detected_buffer);
796 (*partial_max_value)[thread - 1] = std::move(max_value_buffer);
797 if (output.max_index) {
798 (*partial_max_index)[thread - 1] = std::move(max_index_buffer);
801 if (output.subset_sum.size()) {
802 (*partial_subset_sum)[thread - 1] = std::move(subset_sum_buffers);
804 if (output.subset_detected.size()) {
805 (*partial_subset_detected)[thread - 1] = std::move(subset_detected_buffers);
809 }, NR, options.num_threads);
817 for (
int u = 1; u < num_used; ++u) {
818 const auto& cursum = *((*partial_sum)[u - 1]);
819 for (Index_ c = 0; c < NC; ++c) {
820 output.sum[c] += cursum[c];
825 if (output.detected) {
826 for (
int u = 1; u < num_used; ++u) {
827 const auto& curdetected = *((*partial_detected)[u - 1]);
828 for (Index_ c = 0; c < NC; ++c) {
829 output.detected[c] += curdetected[c];
835 if (output.max_index) {
836 for (
int u = 1; u < num_used; ++u) {
837 const auto& curmaxval = *((*partial_max_value)[u - 1]);
838 const auto& curmaxidx = *((*partial_max_index)[u - 1]);
839 for (Index_ c = 0; c < NC; ++c) {
840 if (curmaxval[c] > max_value_output_ptr[c]) {
841 max_value_output_ptr[c] = curmaxval[c];
842 output.max_index[c] = curmaxidx[c];
846 }
else if (output.max_value) {
847 for (
int u = 1; u < num_used; ++u) {
848 const auto& curmaxval = *((*partial_max_value)[u - 1]);
849 for (Index_ c = 0; c < NC; ++c) {
850 if (curmaxval[c] > output.max_value[c]) {
851 output.max_value[c] = curmaxval[c];
857 if (output.subset_sum.size()) {
858 for (I<
decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
859 const auto outptr = output.subset_sum[s];
860 if (outptr == NULL) {
863 for (
int u = 1; u < num_used; ++u) {
864 const auto& cursubset = (*((*partial_subset_sum)[u - 1]))[s];
865 for (Index_ c = 0; c < NC; ++c) {
866 outptr[c] += cursubset[c];
872 if (output.subset_detected.size()) {
873 for (I<
decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
874 const auto outptr = output.subset_detected[s];
875 if (outptr == NULL) {
878 for (
int u = 1; u < num_used; ++u) {
879 const auto& cursubset = (*((*partial_subset_detected)[u - 1]))[s];
880 for (Index_ c = 0; c < NC; ++c) {
881 outptr[c] += cursubset[c];
905template<
typename Sum_,
typename Detected_,
typename Value_,
typename Index_>
1000template<
typename Value_,
typename Index_,
typename Subset_,
typename Sum_,
typename Detected_>
1003 const std::vector<Subset_>& subsets,
1008 per_cell_qc_metrics_running(mat, subsets, output, options);
1011 per_cell_qc_metrics_direct_sparse(mat, subsets, output, options);
1013 per_cell_qc_metrics_direct_dense(mat, subsets, output, options);
1039template<
typename Sum_ =
double,
typename Detected_ =
int,
typename Value_,
typename Index_,
typename Subset_>
1042 const std::vector<Subset_>& subsets,
1047 const auto ncells = mat.
ncol();
1051#ifdef SCRAN_QC_TEST_INIT
1052 , SCRAN_QC_TEST_INIT
1055 buffers.
sum = output.
sum.data();
1060#ifdef SCRAN_QC_TEST_INIT
1061 , SCRAN_QC_TEST_INIT
1069#ifdef SCRAN_QC_TEST_INIT
1070 , SCRAN_QC_TEST_INIT
1077#ifdef SCRAN_QC_TEST_INIT
1078 , SCRAN_QC_TEST_INIT
1084 const auto nsubsets = subsets.size();
1087 sanisizer::resize(output.
subset_sum, nsubsets);
1088 sanisizer::resize(buffers.
subset_sum, nsubsets);
1089 for (I<
decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
1091#ifdef SCRAN_QC_TEST_INIT
1092 , SCRAN_QC_TEST_INIT
1102 for (I<
decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
1104#ifdef SCRAN_QC_TEST_INIT
1105 , SCRAN_QC_TEST_INIT
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
virtual bool is_sparse() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
Simple quality control for single-cell data.
Definition adt_quality_control.hpp:22
void per_cell_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const PerCellQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &output, const PerCellQcMetricsOptions &options)
Definition per_cell_qc_metrics.hpp:1001
void resize_container_to_Index_size(Container_ &container, const Index_ x, Args_ &&... args)
int parallelize(Function_ fun, const Index_ tasks, const int workers)
I< decltype(std::declval< Container_ >().size())> cast_Index_to_container_size(const Index_ x)
Container_ create_container_of_Index_size(const Index_ x, Args_ &&... args)
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, const bool row, const Index_ iter_start, const Index_ iter_length, Args_ &&... args)
Buffers for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:97
Value_ * max_value
Definition per_cell_qc_metrics.hpp:127
Index_ * max_index
Definition per_cell_qc_metrics.hpp:134
Sum_ * sum
Definition per_cell_qc_metrics.hpp:115
Detected_ * detected
Definition per_cell_qc_metrics.hpp:121
std::vector< Detected_ * > subset_detected
Definition per_cell_qc_metrics.hpp:150
std::vector< Sum_ * > subset_sum
Definition per_cell_qc_metrics.hpp:142
Options for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:25
bool compute_sum
Definition per_cell_qc_metrics.hpp:30
bool compute_subset_sum
Definition per_cell_qc_metrics.hpp:54
bool compute_max_value
Definition per_cell_qc_metrics.hpp:42
bool subset_containers_have_indices
Definition per_cell_qc_metrics.hpp:77
bool compute_subset_detected
Definition per_cell_qc_metrics.hpp:60
bool compute_detected
Definition per_cell_qc_metrics.hpp:36
bool compute_max_index
Definition per_cell_qc_metrics.hpp:48
int num_threads
Definition per_cell_qc_metrics.hpp:83
Result store for QC metric calculations.
Definition per_cell_qc_metrics.hpp:906
std::vector< Index_ > max_index
Definition per_cell_qc_metrics.hpp:943
std::vector< Value_ > max_value
Definition per_cell_qc_metrics.hpp:936
std::vector< std::vector< Detected_ > > subset_detected
Definition per_cell_qc_metrics.hpp:957
std::vector< Detected_ > detected
Definition per_cell_qc_metrics.hpp:930
std::vector< Sum_ > sum
Definition per_cell_qc_metrics.hpp:924
std::vector< std::vector< Sum_ > > subset_sum
Definition per_cell_qc_metrics.hpp:950
bool sparse_ordered_index