1#ifndef SCRAN_QC_PER_CELL_QC_METRICS_HPP
2#define SCRAN_QC_PER_CELL_QC_METRICS_HPP
10#include "tatami_stats/tatami_stats.hpp"
74template<
typename Sum_,
typename Detected_,
typename Value_,
typename Index_>
132template<
typename Value_,
typename Index_,
typename Subset_,
typename Sum_,
typename Detected_>
133void compute_qc_direct_dense(
135 const std::vector<Subset_>& subsets,
139 std::vector<std::vector<Index_> > subset_indices;
141 if constexpr(std::is_pointer<Subset_>::value) {
142 size_t nsubsets = subsets.size();
143 subset_indices.resize(nsubsets);
144 Index_ NR = mat.
nrow();
146 for (
size_t s = 0; s < nsubsets; ++s) {
147 auto& current = subset_indices[s];
148 const auto& source = subsets[s];
149 for (Index_ i = 0; i < NR; ++i) {
151 current.push_back(i);
159 auto NR = mat.
nrow();
161 std::vector<Value_> vbuffer(NR);
165 size_t nsubsets = subsets.size();
167 for (Index_ c = start, end = start + length; c < end; ++c) {
168 auto ptr = ext->fetch(c, vbuffer.data());
171 output.
sum[c] = std::accumulate(ptr, ptr + NR,
static_cast<Sum_
>(0));
176 for (Index_ r = 0; r < NR; ++r) {
177 count += (ptr[r] != 0);
183 Index_ max_index = 0;
184 Value_ max_value = 0;
188 for (Index_ r = 1; r < NR; ++r) {
189 if (max_value < ptr[r]) {
205 for (
size_t s = 0; s < nsubsets; ++s) {
206 const auto& sub = [&]() ->
const auto& {
207 if constexpr(std::is_pointer<Subset_>::value) {
208 return subset_indices[s];
223 Detected_ current = 0;
225 current += ptr[r] != 0;
232 }, mat.
ncol(), num_threads);
235template<
typename Index_,
typename Subset_,
typename Sum_,
typename Detected_,
typename Value_>
236std::vector<std::vector<uint8_t> > boolify_subsets(Index_ NR,
const std::vector<Subset_>& subsets,
const PerCellQcMetricsBuffers<Sum_, Detected_, Value_, Index_>& output) {
237 std::vector<std::vector<uint8_t> > is_in_subset;
239 if (!output.subset_sum.empty() || !output.subset_detected.empty()) {
240 if constexpr(!std::is_pointer<Subset_>::value) {
241 size_t nsubsets = subsets.size();
242 for (
size_t s = 0; s < nsubsets; ++s) {
243 is_in_subset.emplace_back(NR);
244 auto& last = is_in_subset.back();
245 for (
auto i : subsets[s]) {
255template<
typename Value_,
typename Index_,
typename Subset_,
typename Sum_,
typename Detected_>
256void compute_qc_direct_sparse(
258 const std::vector<Subset_>& subsets,
259 const PerCellQcMetricsBuffers<Sum_, Detected_, Value_, Index_>& output,
262 auto is_in_subset = boolify_subsets(mat.
nrow(), subsets, output);
265 auto NR = mat.
nrow();
267 std::vector<Value_> vbuffer(NR);
268 std::vector<Index_> ibuffer(NR);
270 bool do_max = output.max_index || output.max_value;
272 size_t nsubsets = subsets.size();
274 for (Index_ c = start, end = start + length; c < end; ++c) {
275 auto range = ext->fetch(vbuffer.data(), ibuffer.data());
278 output.sum[c] = std::accumulate(range.value, range.value + range.number,
static_cast<Sum_
>(0));
281 if (output.detected) {
282 Detected_ current = 0;
283 for (Index_ i = 0; i < range.number; ++i) {
284 current += (range.value[i] != 0);
286 output.detected[c] = current;
290 Index_ max_index = 0;
291 Value_ max_value = 0;
294 max_value = range.value[0];
295 max_index = range.index[0];
296 for (Index_ i = 1; i < range.number; ++i) {
297 if (max_value < range.value[i]) {
298 max_value = range.value[i];
299 max_index = range.index[i];
303 if (max_value <= 0 && range.number < NR) {
304 if (output.max_index) {
307 for (Index_ i = 0; i < range.number; ++i) {
308 if (range.index[i] > last) {
310 }
else if (range.value[i] == 0) {
313 last = range.index[i] + 1;
323 if (output.max_index) {
324 output.max_index[c] = max_index;
326 if (output.max_value) {
327 output.max_value[c] = max_value;
331 if (!output.subset_sum.empty() || !output.subset_detected.empty()) {
332 for (
size_t s = 0; s < nsubsets; ++s) {
333 const auto& sub = [&]() ->
const auto& {
334 if constexpr(std::is_pointer<Subset_>::value) {
337 return is_in_subset[s];
341 if (!output.subset_sum.empty() && output.subset_sum[s]) {
343 for (Index_ i = 0; i < range.number; ++i) {
344 current += (sub[range.index[i]] != 0) * range.value[i];
346 output.subset_sum[s][c] = current;
349 if (!output.subset_detected.empty() && output.subset_detected[s]) {
350 Detected_ current = 0;
351 for (Index_ i = 0; i < range.number; ++i) {
352 current += (sub[range.index[i]] != 0) * (range.value[i] != 0);
354 output.subset_detected[s][c] = current;
359 }, mat.
ncol(), num_threads);
362template<
typename Sum_,
typename Detected_,
typename Value_,
typename Index_>
363class PerCellQcMetricsRunningBuffers {
365 PerCellQcMetricsRunningBuffers(
const PerCellQcMetricsBuffers<Sum_, Detected_, Value_, Index_>& output,
int thread, Index_ start, Index_ len) {
367 my_sum = tatami_stats::LocalOutputBuffer<Sum_>(thread, start, len, output.sum);
370 if (output.detected) {
371 my_detected = tatami_stats::LocalOutputBuffer<Detected_>(thread, start, len, output.detected);
374 if (output.max_value) {
375 my_max_value = tatami_stats::LocalOutputBuffer<Value_>(thread, start, len, output.max_value);
376 }
else if (output.max_index) {
377 my_holding_max_value.resize(len);
380 if (output.max_index) {
381 my_max_index = tatami_stats::LocalOutputBuffer<Index_>(thread, start, len, output.max_index);
384 my_subset_sum.resize(output.subset_sum.size());
385 for (
size_t s = 0, send = output.subset_sum.size(); s < send; ++s) {
386 if (output.subset_sum[s]) {
387 my_subset_sum[s] = tatami_stats::LocalOutputBuffer<Sum_>(thread, start, len, output.subset_sum[s]);
391 my_subset_detected.resize(output.subset_detected.size());
392 for (
size_t s = 0, send = output.subset_detected.size(); s < send; ++s) {
393 if (output.subset_detected[s]) {
394 my_subset_detected[s] = tatami_stats::LocalOutputBuffer<Detected_>(thread, start, len, output.subset_detected[s]);
400 tatami_stats::LocalOutputBuffer<Sum_> my_sum;
401 tatami_stats::LocalOutputBuffer<Detected_> my_detected;
403 tatami_stats::LocalOutputBuffer<Value_> my_max_value;
404 std::vector<Value_> my_holding_max_value;
405 tatami_stats::LocalOutputBuffer<Index_> my_max_index;
407 std::vector<tatami_stats::LocalOutputBuffer<Sum_> > my_subset_sum;
408 std::vector<tatami_stats::LocalOutputBuffer<Detected_> > my_subset_detected;
412 return my_sum.data();
415 Detected_* detected_data() {
416 return my_detected.data();
419 Value_* max_value_data() {
420 auto dptr = my_max_value.data();
421 return (dptr ? dptr : my_holding_max_value.data());
424 Index_* max_index_data() {
425 return my_max_index.data();
428 std::vector<Sum_*> subset_sum_data() {
429 std::vector<Sum_*> output;
430 output.reserve(my_subset_sum.size());
431 for (
auto& s : my_subset_sum) {
432 output.push_back(s.data());
437 std::vector<Detected_*> subset_detected_data() {
438 std::vector<Detected_*> output;
439 output.reserve(my_subset_detected.size());
440 for (
auto& s : my_subset_detected) {
441 output.push_back(s.data());
450 if (my_detected.data()) {
451 my_detected.transfer();
454 if (my_max_value.data()) {
455 my_max_value.transfer();
457 if (my_max_index.data()) {
458 my_max_index.transfer();
461 for (
auto& s : my_subset_sum) {
467 for (
auto& s : my_subset_detected) {
475template<
typename Value_,
typename Index_,
typename Subset_,
typename Sum_,
typename Detected_>
476void compute_qc_running_dense(
478 const std::vector<Subset_>& subsets,
479 const PerCellQcMetricsBuffers<Sum_, Detected_, Value_, Index_>& output,
482 auto is_in_subset = boolify_subsets(mat.
nrow(), subsets, output);
485 auto NR = mat.
nrow();
487 std::vector<Value_> vbuffer(len);
489 PerCellQcMetricsRunningBuffers<Sum_, Detected_, Value_, Index_> locals(output, thread, start, len);
490 auto outt = locals.sum_data();
491 auto outd = locals.detected_data();
492 auto outmi = locals.max_index_data();
493 auto outmc = locals.max_value_data();
494 bool do_max = (outmi || outmc);
495 auto outst = locals.subset_sum_data();
496 auto outsd = locals.subset_detected_data();
498 size_t nsubsets = subsets.size();
500 for (Index_ r = 0; r < NR; ++r) {
501 auto ptr = ext->fetch(vbuffer.data());
504 for (Index_ i = 0; i < len; ++i) {
510 for (Index_ i = 0; i < len; ++i) {
511 outd[i] += (ptr[i] != 0);
517 std::copy_n(ptr, len, outmc);
519 std::fill_n(outmi, len, 0);
522 for (Index_ i = 0; i < len; ++i) {
523 auto& curmax = outmc[i];
524 if (curmax < ptr[i]) {
534 if (!outst.empty() || !outsd.empty()) {
535 for (
size_t s = 0; s < nsubsets; ++s) {
536 const auto& sub = [&]() ->
const auto& {
537 if constexpr(std::is_pointer<Subset_>::value) {
540 return is_in_subset[s];
547 if (!outst.empty() && outst[s]) {
548 auto current = outst[s];
549 for (Index_ i = 0; i < len; ++i) {
550 current[i] += ptr[i];
554 if (!outsd.empty() && outsd[s]) {
555 auto current = outsd[s];
556 for (Index_ i = 0; i < len; ++i) {
557 current[i] += (ptr[i] != 0);
565 }, mat.
ncol(), num_threads);
568template<
typename Value_,
typename Index_,
typename Subset_,
typename Sum_,
typename Detected_>
569void compute_qc_running_sparse(
571 const std::vector<Subset_>& subsets,
572 const PerCellQcMetricsBuffers<Sum_, Detected_, Value_, Index_>& output,
577 auto is_in_subset = boolify_subsets(mat.
nrow(), subsets, output);
580 auto NR = mat.
nrow();
582 std::vector<Value_> vbuffer(len);
583 std::vector<Index_> ibuffer(len);
585 PerCellQcMetricsRunningBuffers<Sum_, Detected_, Value_, Index_> locals(output, thread, start, len);
586 auto outt = locals.sum_data();
587 auto outd = locals.detected_data();
588 auto outmi = locals.max_index_data();
589 auto outmc = locals.max_value_data();
590 bool do_max = (outmi || outmc);
591 auto outst = locals.subset_sum_data();
592 auto outsd = locals.subset_detected_data();
594 size_t nsubsets = subsets.size();
596 std::vector<Index_> last_consecutive_nonzero(do_max ? len : 0);
598 for (Index_ r = 0; r < NR; ++r) {
599 auto range = ext->fetch(vbuffer.data(), ibuffer.data());
602 for (Index_ i = 0; i < range.number; ++i) {
603 outt[range.index[i] - start] += range.value[i];
608 for (Index_ i = 0; i < range.number; ++i) {
609 outd[range.index[i] - start] += (range.value[i] != 0);
615 std::fill_n(outmc, len, 0);
616 for (Index_ i = 0; i < range.number; ++i) {
617 auto j = range.index[i] - start;
618 outmc[j] = range.value[i];
619 last_consecutive_nonzero[j] = 1;
622 std::fill_n(outmi, len, 0);
626 for (Index_ i = 0; i < range.number; ++i) {
627 auto j = range.index[i] - start;
628 auto& curmax = outmc[j];
630 auto val = range.value[i];
641 auto& last = last_consecutive_nonzero[j];
649 if (!outst.empty() || !outsd.empty()) {
650 for (
size_t s = 0; s < nsubsets; ++s) {
651 const auto& sub = [&]() ->
const auto& {
652 if constexpr(std::is_pointer<Subset_>::value) {
655 return is_in_subset[s];
662 if (!outst.empty() && outst[s]) {
663 auto current = outst[s];
664 for (Index_ i = 0; i < range.number; ++i) {
665 current[range.index[i] - start] += range.value[i];
669 if (!outsd.empty() && outsd[s]) {
670 auto current = outsd[s];
671 for (Index_ i = 0; i < range.number; ++i) {
672 current[range.index[i] - start] += (range.value[i] != 0);
680 auto NR = mat.
nrow();
684 for (Index_ c = 0; c < len; ++c) {
685 auto last_nz = last_consecutive_nonzero[c];
690 auto& current = outmc[c];
703 }, mat.
ncol(), num_threads);
722template<
typename Sum_,
typename Detected_,
typename Value_,
typename Index_>
809template<
typename Value_,
typename Index_,
typename Subset_,
typename Sum_,
typename Detected_>
812 const std::vector<Subset_>& subsets,
818 internal::compute_qc_running_sparse(mat, subsets, output, options.
num_threads);
820 internal::compute_qc_direct_sparse(mat, subsets, output, options.
num_threads);
824 internal::compute_qc_running_dense(mat, subsets, output, options.
num_threads);
826 internal::compute_qc_direct_dense(mat, subsets, output, options.
num_threads);
850template<
typename Sum_ =
double,
typename Detected_ =
int,
typename Value_,
typename Index_,
typename Subset_>
853 const std::vector<Subset_>& subsets,
858 auto ncells = mat.
ncol();
861 output.
sum.resize(ncells
862#ifdef SCRAN_QC_TEST_INIT
866 buffers.
sum = output.
sum.data();
870#ifdef SCRAN_QC_TEST_INIT
878#ifdef SCRAN_QC_TEST_INIT
886#ifdef SCRAN_QC_TEST_INIT
893 size_t nsubsets = subsets.size();
898 for (
size_t s = 0; s < nsubsets; ++s) {
900#ifdef SCRAN_QC_TEST_INIT
911 for (
size_t s = 0; s < nsubsets; ++s) {
913#ifdef SCRAN_QC_TEST_INIT
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
Simple quality control for single-cell data.
Definition adt_quality_control.hpp:20
void per_cell_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const PerCellQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &output, const PerCellQcMetricsOptions &options)
Definition per_cell_qc_metrics.hpp:810
void parallelize(Function_ fun, Index_ tasks, int threads)
auto consecutive_extractor(const Matrix< Value_, Index_ > *mat, bool row, Index_ iter_start, Index_ iter_length, Args_ &&... args)
Buffers for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:75
Value_ * max_value
Definition per_cell_qc_metrics.hpp:108
Index_ * max_index
Definition per_cell_qc_metrics.hpp:102
Sum_ * sum
Definition per_cell_qc_metrics.hpp:90
Detected_ * detected
Definition per_cell_qc_metrics.hpp:96
std::vector< Detected_ * > subset_detected
Definition per_cell_qc_metrics.hpp:124
std::vector< Sum_ * > subset_sum
Definition per_cell_qc_metrics.hpp:116
Options for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:22
bool compute_sum
Definition per_cell_qc_metrics.hpp:27
bool compute_subset_sum
Definition per_cell_qc_metrics.hpp:51
bool compute_max_value
Definition per_cell_qc_metrics.hpp:39
bool compute_subset_detected
Definition per_cell_qc_metrics.hpp:57
bool compute_detected
Definition per_cell_qc_metrics.hpp:33
bool compute_max_index
Definition per_cell_qc_metrics.hpp:45
int num_threads
Definition per_cell_qc_metrics.hpp:63
Result store for QC metric calculations.
Definition per_cell_qc_metrics.hpp:723
std::vector< Index_ > max_index
Definition per_cell_qc_metrics.hpp:751
std::vector< Value_ > max_value
Definition per_cell_qc_metrics.hpp:757
std::vector< std::vector< Detected_ > > subset_detected
Definition per_cell_qc_metrics.hpp:771
std::vector< Detected_ > detected
Definition per_cell_qc_metrics.hpp:744
std::vector< Sum_ > sum
Definition per_cell_qc_metrics.hpp:738
std::vector< std::vector< Sum_ > > subset_sum
Definition per_cell_qc_metrics.hpp:764
bool sparse_ordered_index