scran_qc
Simple quality control on single-cell data
Loading...
Searching...
No Matches
per_cell_qc_metrics.hpp
Go to the documentation of this file.
1#ifndef SCRAN_QC_PER_CELL_QC_METRICS_HPP
2#define SCRAN_QC_PER_CELL_QC_METRICS_HPP
3
4#include <vector>
5#include <algorithm>
6#include <limits>
7#include <cstddef>
8#include <optional>
9
10#include "tatami/tatami.hpp"
11#include "sanisizer/sanisizer.hpp"
12
13#include "utils.hpp"
14
20namespace scran_qc {
21
30 bool compute_sum = true;
31
36 bool compute_detected = true;
37
42 bool compute_max_value = true;
43
48 bool compute_max_index = true;
49
54 bool compute_subset_sum = true;
55
61
78
83 int num_threads = 1;
84};
85
96template<typename Sum_, typename Detected_, typename Value_, typename Index_>
101 PerCellQcMetricsBuffers() = default;
102
103 PerCellQcMetricsBuffers(const std::size_t nsubsets) :
104 subset_sum(sanisizer::cast<I<decltype(subset_sum.size())> >(nsubsets), NULL),
105 subset_detected(sanisizer::cast<I<decltype(subset_detected.size())> >(nsubsets), NULL)
106 {}
115 Sum_* sum = NULL;
116
121 Detected_* detected = NULL;
122
127 Value_* max_value = NULL;
128
134 Index_* max_index = NULL;
135
142 std::vector<Sum_*> subset_sum;
143
150 std::vector<Detected_*> subset_detected;
151};
152
156template<typename Value_, typename Index_, typename Subset_, typename Sum_, typename Detected_>
157void per_cell_qc_metrics_direct_dense(
159 const std::vector<Subset_>& subsets,
161 const PerCellQcMetricsOptions& options
162) {
163 const auto NR = mat.nrow();
164 const bool report_max = output.max_value || output.max_index;
165 const auto nsubsets = subsets.size();
166 const bool report_subsets = output.subset_sum.size() || output.subset_detected.size();
167
168 std::optional<std::vector<std::vector<Index_> > > subset_indices;
169 if (report_subsets) {
170 [&](){ // use an IIFE for easy control via return.
171 if constexpr(!std::is_pointer<Subset_>::value) {
172 if (options.subset_containers_have_indices) {
173 return;
174 }
175 }
176
177 subset_indices.emplace(sanisizer::cast<I<decltype(subset_indices->size())> >(nsubsets));
178 for (I<decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
179 auto& current = (*subset_indices)[s];
180 const auto& source = subsets[s];
181 for (I<decltype(NR)> i = 0; i < NR; ++i) {
182 if (source[i]) {
183 current.push_back(i);
184 }
185 }
186 }
187 }();
188 }
189
190 tatami::parallelize([&](const int, const Index_ start, const Index_ length) -> void {
191 auto ext = tatami::consecutive_extractor<false>(mat, false, start, length);
193
194 for (Index_ c = start, end = start + length; c < end; ++c) {
195 auto ptr = ext->fetch(c, vbuffer.data());
196
197 if (output.sum) {
198 output.sum[c] = std::accumulate(ptr, ptr + NR, static_cast<Sum_>(0));
199 }
200
201 if (output.detected) {
202 Detected_ count = 0;
203 for (I<decltype(NR)> r = 0; r < NR; ++r) {
204 count += (ptr[r] != 0);
205 }
206 output.detected[c] = count;
207 }
208
209 if (report_max) {
210 if (NR) {
211 const auto it = std::max_element(ptr, ptr + NR);
212 if (output.max_value) {
213 output.max_value[c] = *it;
214 }
215 if (output.max_index) {
216 output.max_index[c] = it - ptr;
217 }
218 } else {
219 if (output.max_value) {
220 output.max_value[c] = 0;
221 }
222 if (output.max_index) {
223 output.max_index[c] = 0;
224 }
225 }
226 }
227
228 if (report_subsets) {
229 for (I<decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
230 if constexpr(!std::is_pointer<Subset_>::value) {
231 // Seeing if the subsets are already indices, in which case we use it directly.
232 // It is assumed that there are no duplicate values here.
233 if (options.subset_containers_have_indices) {
234 const auto& sub = subsets[s];
235 if (!output.subset_sum.empty() && output.subset_sum[s]) {
236 Sum_ current = 0;
237 for (const auto r : sub) {
238 current += ptr[r];
239 }
240 output.subset_sum[s][c] = current;
241 }
242 if (!output.subset_detected.empty() && output.subset_detected[s]) {
243 Detected_ current = 0;
244 for (const auto r : sub) {
245 current += ptr[r] != 0;
246 }
247 output.subset_detected[s][c] = current;
248 }
249 continue;
250 }
251 }
252
253 const auto& sub = (*subset_indices)[s];
254 if (!output.subset_sum.empty() && output.subset_sum[s]) {
255 Sum_ current = 0;
256 for (const auto r : sub) {
257 current += ptr[r];
258 }
259 output.subset_sum[s][c] = current;
260 }
261 if (!output.subset_detected.empty() && output.subset_detected[s]) {
262 Detected_ current = 0;
263 for (const auto r : sub) {
264 current += ptr[r] != 0;
265 }
266 output.subset_detected[s][c] = current;
267 }
268 }
269 }
270 }
271 }, mat.ncol(), options.num_threads);
272}
273
274template<typename Index_, typename Subset_>
275std::vector<std::vector<unsigned char> > boolify_subsets(const Index_ NR, const std::vector<Subset_>& subsets) {
276 const auto nsubsets = subsets.size();
277 auto output = sanisizer::create<std::vector<std::vector<unsigned char> > >(nsubsets);
278 for (I<decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
279 auto& current = output[s];
281 for (const auto i : subsets[s]) {
282 current[i] = 1;
283 }
284 }
285 return output;
286}
287
288template<typename Value_, typename Index_, typename Subset_, typename Sum_, typename Detected_>
289void per_cell_qc_metrics_direct_sparse(
291 const std::vector<Subset_>& subsets,
292 const PerCellQcMetricsBuffers<Sum_, Detected_, Value_, Index_>& output,
293 const PerCellQcMetricsOptions& options
294) {
295 const auto NR = mat.nrow();
296 const bool report_max = output.max_value || output.max_index;
297 const auto nsubsets = subsets.size();
298 const bool report_subsets = output.subset_sum.size() || output.subset_detected.size();
299
300 std::optional<std::vector<std::vector<unsigned char> > > is_in_subset;
301 if (report_subsets) {
302 if constexpr(!std::is_pointer<Subset_>::value) {
303 if (options.subset_containers_have_indices) {
304 is_in_subset = boolify_subsets(NR, subsets);
305 }
306 }
307 }
308
309 tatami::parallelize([&](const int, const Index_ start, const Index_ length) -> void {
310 auto ext = tatami::consecutive_extractor<true>(mat, false, start, length);
313
314 for (Index_ c = start, end = start + length; c < end; ++c) {
315 auto range = ext->fetch(vbuffer.data(), ibuffer.data());
316
317 if (output.sum) {
318 output.sum[c] = std::accumulate(range.value, range.value + range.number, static_cast<Sum_>(0));
319 }
320
321 if (output.detected) {
322 Detected_ current = 0;
323 for (Index_ i = 0; i < range.number; ++i) {
324 current += (range.value[i] != 0);
325 }
326 output.detected[c] = current;
327 }
328
329 if (report_max) {
330 if (range.number) {
331 const auto it = std::max_element(range.value, range.value + range.number);
332 if (*it > 0 || range.number == NR) {
333 if (output.max_value) {
334 output.max_value[c] = *it;
335 }
336 if (output.max_index) {
337 output.max_index[c] = range.index[it - range.value];
338 }
339 } else {
340 if (output.max_value) {
341 output.max_value[c] = 0;
342 }
343 if (output.max_index) {
344 if (*it < 0) {
345 // Find the first structural zero.
346 output.max_index[c] = range.number;
347 for (Index_ i = 0; i < range.number; ++i) {
348 if (range.index[i] != i) {
349 output.max_index[c] = i;
350 break;
351 }
352 }
353 } else {
354 // Find the first structural zero that occurs before the structural non-zero with a value of zero.
355 const Index_ candidate = it - range.value;
356 output.max_index[c] = range.index[candidate];
357 for (Index_ i = 0; i <= candidate; ++i) {
358 if (range.index[i] != i) {
359 output.max_index[c] = i;
360 break;
361 }
362 }
363 }
364 }
365 }
366 } else {
367 if (output.max_value) {
368 output.max_value[c] = 0;
369 }
370 if (output.max_index) {
371 output.max_index[c] = 0;
372 }
373 }
374 }
375
376 if (report_subsets) {
377 for (I<decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
378 if constexpr(!std::is_pointer<Subset_>::value) {
379 if (options.subset_containers_have_indices) {
380 const auto& sub = (*is_in_subset)[s];
381 if (!output.subset_sum.empty() && output.subset_sum[s]) {
382 Sum_ current = 0;
383 for (Index_ i = 0; i < range.number; ++i) {
384 current += (sub[range.index[i]] != 0) * range.value[i];
385 }
386 output.subset_sum[s][c] = current;
387 }
388 if (!output.subset_detected.empty() && output.subset_detected[s]) {
389 Detected_ current = 0;
390 for (Index_ i = 0; i < range.number; ++i) {
391 current += (sub[range.index[i]] != 0) * (range.value[i] != 0);
392 }
393 output.subset_detected[s][c] = current;
394 }
395 continue;
396 }
397 }
398
399 const auto& sub = subsets[s];
400 if (!output.subset_sum.empty() && output.subset_sum[s]) {
401 Sum_ current = 0;
402 for (Index_ i = 0; i < range.number; ++i) {
403 current += (sub[range.index[i]] != 0) * range.value[i];
404 }
405 output.subset_sum[s][c] = current;
406 }
407 if (!output.subset_detected.empty() && output.subset_detected[s]) {
408 Detected_ current = 0;
409 for (Index_ i = 0; i < range.number; ++i) {
410 current += (sub[range.index[i]] != 0) * (range.value[i] != 0);
411 }
412 output.subset_detected[s][c] = current;
413 }
414 }
415 }
416 }
417 }, mat.ncol(), options.num_threads);
418}
419
420template<typename Value_, typename Index_, typename Subset_, typename Sum_, typename Detected_>
421void per_cell_qc_metrics_running(
423 const std::vector<Subset_>& subsets,
424 const PerCellQcMetricsBuffers<Sum_, Detected_, Value_, Index_>& output,
425 const PerCellQcMetricsOptions& options
426) {
427 const auto NR = mat.nrow();
428 const auto NC = mat.ncol();
429 const bool is_sparse = mat.is_sparse();
430
431 /************************************
432 *** Setting up result containers ***
433 ************************************/
434
435 const bool report_max = output.max_value || output.max_index;
436 std::optional<std::vector<Value_> > tmp_max_value;
437 Value_* max_value_output_ptr;
438 if (report_max) {
439 if (!output.max_value) {
440 tmp_max_value.emplace(tatami::cast_Index_to_container_size<std::vector<Index_> >(NC));
441 max_value_output_ptr = tmp_max_value->data();
442 } else {
443 max_value_output_ptr = output.max_value;
444 }
445 }
446
447 const bool do_parallel = options.num_threads > 1;
448 std::optional<std::vector<std::optional<std::vector<Sum_> > > > partial_sum;
449 std::optional<std::vector<std::optional<std::vector<Detected_> > > > partial_detected;
450 std::optional<std::vector<std::optional<std::vector<Value_> > > > partial_max_value;
451 std::optional<std::vector<std::optional<std::vector<Index_> > > > partial_max_index;
452 std::optional<std::vector<std::optional<std::vector<std::vector<Sum_> > > > > partial_subset_sum;
453 std::optional<std::vector<std::optional<std::vector<std::vector<Detected_> > > > > partial_subset_detected;
454 if (do_parallel) {
455 if (output.sum) {
456 partial_sum.emplace(tatami::cast_Index_to_container_size<I<decltype(*partial_sum)> >(options.num_threads - 1));
457 }
458 if (output.detected) {
459 partial_detected.emplace(tatami::cast_Index_to_container_size<I<decltype(*partial_detected)> >(options.num_threads - 1));
460 }
461 if (report_max) {
462 partial_max_value.emplace(tatami::cast_Index_to_container_size<I<decltype(*partial_max_value)> >(options.num_threads - 1));
463 if (output.max_index) {
464 partial_max_index.emplace(tatami::cast_Index_to_container_size<I<decltype(*partial_max_index)> >(options.num_threads - 1));
465 }
466 }
467 if (output.subset_sum.size()) {
468 partial_subset_sum.emplace(tatami::cast_Index_to_container_size<I<decltype(*partial_subset_sum)> >(options.num_threads - 1));
469 }
470 if (output.subset_detected.size()) {
471 partial_subset_detected.emplace(tatami::cast_Index_to_container_size<I<decltype(*partial_subset_detected)> >(options.num_threads - 1));
472 }
473 }
474
475 // Zeroing the output arrays.
476 if (output.sum) {
477 std::fill_n(output.sum, NC, 0);
478 }
479 if (output.detected) {
480 std::fill_n(output.detected, NC, 0);
481 }
482 if (report_max && NR == 0) { // no need to zero if it's not empty, as it'll get filled by thread 0 upon encountering the first column.
483 std::fill_n(output.max_value, NC, 0);
484 if (output.max_index) {
485 std::fill_n(output.max_index, NC, 0);
486 }
487 }
488 for (const auto sptr : output.subset_sum) {
489 if (sptr) {
490 std::fill_n(sptr, NC, 0);
491 }
492 }
493 for (const auto sptr : output.subset_detected) {
494 if (sptr) {
495 std::fill_n(sptr, NC, 0);
496 }
497 }
498
499 const auto nsubsets = subsets.size();
500 const bool report_subsets = output.subset_sum.size() || output.subset_detected.size();
501 std::optional<std::vector<std::vector<unsigned char> > > is_in_subset;
502 if (report_subsets) {
503 if constexpr(!std::is_pointer<Subset_>::value) {
504 if (options.subset_containers_have_indices) {
505 is_in_subset = boolify_subsets(NR, subsets);
506 }
507 }
508 }
509
510 const auto num_used = tatami::parallelize([&](int thread, Index_ start, Index_ len) -> void {
511 /*********************************************************
512 *** Thread-local containers to mitigate false sharing ***
513 *********************************************************/
514
515 Sum_* sum_ptr = NULL;
516 std::optional<std::vector<Sum_> > sum_buffer;
517 Detected_* detected_ptr = NULL;
518 std::optional<std::vector<Detected_> > detected_buffer;
519 Value_* max_value_ptr = NULL;
520 std::optional<std::vector<Value_> > max_value_buffer;
521 Index_* max_index_ptr = NULL;
522 std::optional<std::vector<Index_> > max_index_buffer;
523
524 Sum_* const * subset_sum_ptr = NULL;
525 std::optional<std::vector<Sum_*> > subset_sum_ptrs;
526 std::optional<std::vector<std::vector<Sum_> > > subset_sum_buffers;
527 Detected_* const * subset_detected_ptr = NULL;
528 std::optional<std::vector<Detected_*> > subset_detected_ptrs;
529 std::optional<std::vector<std::vector<Detected_> > > subset_detected_buffers;
530
531 if (!do_parallel || thread == 0) {
532 sum_ptr = output.sum;
533 detected_ptr = output.detected;
534 max_value_ptr = max_value_output_ptr;
535 max_index_ptr = output.max_index;
536 if (output.subset_sum.size()) {
537 subset_sum_ptr = output.subset_sum.data();
538 }
539 if (output.subset_detected.size()) {
540 subset_detected_ptr = output.subset_detected.data();
541 }
542
543 } else {
544 if (output.sum) {
545 sum_buffer.emplace(tatami::cast_Index_to_container_size<I<decltype(*sum_buffer)> >(NC));
546 sum_ptr = sum_buffer->data();
547 }
548 if (output.detected) {
549 detected_buffer.emplace(tatami::cast_Index_to_container_size<I<decltype(*detected_buffer)> >(NC));
550 detected_ptr = detected_buffer->data();
551 }
552 if (report_max) {
553 max_value_buffer.emplace(tatami::cast_Index_to_container_size<I<decltype(*max_value_buffer)> >(NC));
554 max_value_ptr = max_value_buffer->data();
555 if (output.max_index) {
556 max_index_buffer.emplace(tatami::cast_Index_to_container_size<I<decltype(*max_index_buffer)> >(NC));
557 max_index_ptr = max_index_buffer->data();
558 }
559 }
560 if (output.subset_sum.size()) {
561 subset_sum_ptrs.emplace(tatami::cast_Index_to_container_size<I<decltype(*subset_sum_ptrs)> >(nsubsets));
562 subset_sum_buffers.emplace(tatami::cast_Index_to_container_size<I<decltype(*subset_sum_buffers)> >(nsubsets));
563 for (I<decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
564 if (output.subset_sum[s]) {
565 tatami::resize_container_to_Index_size((*subset_sum_buffers)[s], NC);
566 (*subset_sum_ptrs)[s] = (*subset_sum_buffers)[s].data();
567 } else {
568 (*subset_sum_ptrs)[s] = NULL;
569 }
570
571 }
572 }
573 if (output.subset_detected.size()) {
574 subset_detected_ptrs.emplace(tatami::cast_Index_to_container_size<I<decltype(*subset_detected_ptrs)> >(nsubsets));
575 subset_detected_buffers.emplace(tatami::cast_Index_to_container_size<I<decltype(*subset_detected_buffers)> >(nsubsets));
576 for (I<decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
577 if (output.subset_detected[s]) {
578 tatami::resize_container_to_Index_size((*subset_detected_buffers)[s], NC);
579 (*subset_detected_ptrs)[s] = (*subset_detected_buffers)[s].data();
580 } else {
581 (*subset_detected_ptrs)[s] = NULL;
582 }
583 }
584 }
585 }
586
587 if (is_sparse) {
588 /*****************************
589 *** Sparse loop over rows ***
590 *****************************/
591
592 tatami::Options opt;
593 opt.sparse_ordered_index = false;
594 auto ext = tatami::consecutive_extractor<true>(mat, true, start, len, opt);
597
598 // nonzeros_at_start contains the number of consecutive non-zero elements at the start, i.e., from r = 0.
599 std::optional<std::vector<Index_> > nonzeros_at_start;
600 if (max_value_ptr) {
601 nonzeros_at_start.emplace(tatami::cast_Index_to_container_size<std::vector<Index_> >(NC));
602 }
603
604 for (Index_ r = 0; r < len; ++r) {
605 auto range = ext->fetch(vbuffer.data(), ibuffer.data());
606 if (sum_ptr) {
607 for (Index_ i = 0; i < range.number; ++i) {
608 sum_ptr[range.index[i]] += range.value[i];
609 }
610 }
611 if (detected_ptr) {
612 for (Index_ i = 0; i < range.number; ++i) {
613 detected_ptr[range.index[i]] += (range.value[i] != 0);
614 }
615 }
616
617 if (report_max) {
618 if (r == 0) {
619 std::fill_n(max_value_ptr, NC, 0);
620 for (Index_ i = 0; i < range.number; ++i) {
621 const auto j = range.index[i];
622 max_value_ptr[j] = range.value[i];
623 (*nonzeros_at_start)[j] = 1;
624 }
625 if (max_index_ptr) {
626 std::fill_n(max_index_ptr, NC, start);
627 }
628
629 } else {
630 for (Index_ i = 0; i < range.number; ++i) {
631 const auto val = range.value[i];
632 const auto j = range.index[i];
633 auto& curmax = max_value_ptr[j];
634 if (curmax < val) {
635 curmax = val;
636 if (max_index_ptr) {
637 max_index_ptr[j] = start + r;
638 }
639 }
640 auto& last = (*nonzeros_at_start)[j];
641 if (last == r) {
642 // If we don't have an unbroken run of structural non-zeros from the start, we stop incrementing.
643 ++last;
644 }
645 }
646 }
647 }
648
649 if (report_subsets) {
650 for (I<decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
651 const auto in_subset = [&]() -> bool {
652 if constexpr(!std::is_pointer<Subset_>::value) {
653 if (options.subset_containers_have_indices) {
654 return (*is_in_subset)[s][r];
655 }
656 }
657 return subsets[s][r];
658 }();
659 if (!in_subset) {
660 continue;
661 }
662
663 if (subset_sum_ptr && subset_sum_ptr[s]) {
664 const auto current = subset_sum_ptr[s];
665 for (Index_ i = 0; i < range.number; ++i) {
666 current[range.index[i]] += range.value[i];
667 }
668 }
669
670 if (subset_detected_ptr && subset_detected_ptr[s]) {
671 const auto current = subset_detected_ptr[s];
672 for (Index_ i = 0; i < range.number; ++i) {
673 current[range.index[i]] += (range.value[i] != 0);
674 }
675 }
676 }
677 }
678 }
679
680 if (report_max) {
681 // Checking anything with non-positive maximum, and replacing it with zero if there are any structural zeros.
682 for (Index_ c = 0; c < NC; ++c) {
683 const auto last_nz = (*nonzeros_at_start)[c];
684 if (last_nz == len) { // i.e., no structural zeros.
685 continue;
686 }
687 auto& current = max_value_ptr[c];
688 if (current > 0) { // doesn't defeat the current maximum.
689 continue;
690 }
691
692 if (current < 0) {
693 current = 0;
694 if (max_index_ptr) {
695 max_index_ptr[c] = start + last_nz;
696 }
697 } else {
698 // Sometimes, structural non-zeros have a value of zero.
699 // This check ensures that we return the first occurrence of any zero.
700 if (max_index_ptr) {
701 const Index_ first_structural_zero = start + last_nz;
702 if (first_structural_zero < max_index_ptr[c]) {
703 max_index_ptr[c] = first_structural_zero;
704 }
705 }
706 }
707 }
708 }
709
710 } else {
711 /****************************
712 *** Dense loop over rows ***
713 ****************************/
714
715 auto ext = tatami::consecutive_extractor<false>(mat, true, start, len);
717 for (Index_ r = 0; r < len; ++r) {
718 auto ptr = ext->fetch(vbuffer.data());
719
720 if (sum_ptr) {
721 for (Index_ i = 0; i < NC; ++i) {
722 sum_ptr[i] += ptr[i];
723 }
724 }
725
726 if (detected_ptr) {
727 for (Index_ i = 0; i < NC; ++i) {
728 detected_ptr[i] += (ptr[i] != 0);
729 }
730 }
731
732 if (report_max) {
733 if (r == 0) {
734 std::copy_n(ptr, NC, max_value_ptr);
735 if (max_index_ptr) {
736 std::fill_n(max_index_ptr, NC, start);
737 }
738 } else {
739 for (Index_ i = 0; i < NC; ++i) {
740 auto& curmax = max_value_ptr[i];
741 if (curmax < ptr[i]) {
742 curmax = ptr[i];
743 if (max_index_ptr) {
744 max_index_ptr[i] = start + r;
745 }
746 }
747 }
748 }
749 }
750
751 if (report_subsets) {
752 for (I<decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
753 const auto in_subset = [&]() -> bool {
754 if constexpr(!std::is_pointer<Subset_>::value) {
755 if (options.subset_containers_have_indices) {
756 return (*is_in_subset)[s][r];
757 }
758 }
759 return subsets[s][r];
760 }();
761 if (!in_subset) {
762 continue;
763 }
764
765 if (subset_sum_ptr && subset_sum_ptr[s]) {
766 const auto current = subset_sum_ptr[s];
767 for (Index_ i = 0; i < NC; ++i) {
768 current[i] += ptr[i];
769 }
770 }
771
772 if (subset_detected_ptr && subset_detected_ptr[s]) {
773 const auto current = subset_detected_ptr[s];
774 for (Index_ i = 0; i < NC; ++i) {
775 current[i] += (ptr[i] != 0);
776 }
777 }
778 }
779 }
780 }
781 }
782
783 /********************************************
784 *** Migrate results to serial containers ***
785 ********************************************/
786
787 if (do_parallel) {
788 if (thread > 0) {
789 if (output.sum) {
790 (*partial_sum)[thread - 1] = std::move(sum_buffer);
791 }
792 if (output.detected) {
793 (*partial_detected)[thread - 1] = std::move(detected_buffer);
794 }
795 if (report_max) {
796 (*partial_max_value)[thread - 1] = std::move(max_value_buffer);
797 if (output.max_index) {
798 (*partial_max_index)[thread - 1] = std::move(max_index_buffer);
799 }
800 }
801 if (output.subset_sum.size()) {
802 (*partial_subset_sum)[thread - 1] = std::move(subset_sum_buffers);
803 }
804 if (output.subset_detected.size()) {
805 (*partial_subset_detected)[thread - 1] = std::move(subset_detected_buffers);
806 }
807 }
808 }
809 }, NR, options.num_threads);
810
811 /************************************
812 *** Reduction into output arrays ***
813 ************************************/
814
815 if (do_parallel) {
816 if (output.sum) {
817 for (int u = 1; u < num_used; ++u) {
818 const auto& cursum = *((*partial_sum)[u - 1]);
819 for (Index_ c = 0; c < NC; ++c) {
820 output.sum[c] += cursum[c];
821 }
822 }
823 }
824
825 if (output.detected) {
826 for (int u = 1; u < num_used; ++u) {
827 const auto& curdetected = *((*partial_detected)[u - 1]);
828 for (Index_ c = 0; c < NC; ++c) {
829 output.detected[c] += curdetected[c];
830 }
831 }
832 }
833
834 // All used threads will have processed non-empty ranges, so we don't need to worry about the validity of the maxima from each thread.
835 if (output.max_index) {
836 for (int u = 1; u < num_used; ++u) {
837 const auto& curmaxval = *((*partial_max_value)[u - 1]);
838 const auto& curmaxidx = *((*partial_max_index)[u - 1]);
839 for (Index_ c = 0; c < NC; ++c) {
840 if (curmaxval[c] > max_value_output_ptr[c]) {
841 max_value_output_ptr[c] = curmaxval[c];
842 output.max_index[c] = curmaxidx[c];
843 }
844 }
845 }
846 } else if (output.max_value) {
847 for (int u = 1; u < num_used; ++u) {
848 const auto& curmaxval = *((*partial_max_value)[u - 1]);
849 for (Index_ c = 0; c < NC; ++c) {
850 if (curmaxval[c] > output.max_value[c]) {
851 output.max_value[c] = curmaxval[c];
852 }
853 }
854 }
855 }
856
857 if (output.subset_sum.size()) {
858 for (I<decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
859 const auto outptr = output.subset_sum[s];
860 if (outptr == NULL) {
861 continue;
862 }
863 for (int u = 1; u < num_used; ++u) {
864 const auto& cursubset = (*((*partial_subset_sum)[u - 1]))[s];
865 for (Index_ c = 0; c < NC; ++c) {
866 outptr[c] += cursubset[c];
867 }
868 }
869 }
870 }
871
872 if (output.subset_detected.size()) {
873 for (I<decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
874 const auto outptr = output.subset_detected[s];
875 if (outptr == NULL) {
876 continue;
877 }
878 for (int u = 1; u < num_used; ++u) {
879 const auto& cursubset = (*((*partial_subset_detected)[u - 1]))[s];
880 for (Index_ c = 0; c < NC; ++c) {
881 outptr[c] += cursubset[c];
882 }
883 }
884 }
885 }
886 }
887}
905template<typename Sum_, typename Detected_, typename Value_, typename Index_>
910 PerCellQcMetricsResults() = default;
911
912 PerCellQcMetricsResults(const std::size_t nsubsets) :
913 subset_sum(sanisizer::cast<I<decltype(subset_sum.size())> >(nsubsets)),
914 subset_detected(sanisizer::cast<I<decltype(subset_detected.size())> >(nsubsets))
915 {}
924 std::vector<Sum_> sum;
925
930 std::vector<Detected_> detected;
931
936 std::vector<Value_> max_value;
937
943 std::vector<Index_> max_index;
944
950 std::vector<std::vector<Sum_> > subset_sum;
951
957 std::vector<std::vector<Detected_> > subset_detected;
958};
959
1000template<typename Value_, typename Index_, typename Subset_, typename Sum_, typename Detected_>
1003 const std::vector<Subset_>& subsets,
1005 const PerCellQcMetricsOptions& options)
1006{
1007 if (mat.prefer_rows()) {
1008 per_cell_qc_metrics_running(mat, subsets, output, options);
1009 } else {
1010 if (mat.sparse()) {
1011 per_cell_qc_metrics_direct_sparse(mat, subsets, output, options);
1012 } else {
1013 per_cell_qc_metrics_direct_dense(mat, subsets, output, options);
1014 }
1015 }
1016}
1017
1039template<typename Sum_ = double, typename Detected_ = int, typename Value_, typename Index_, typename Subset_>
1042 const std::vector<Subset_>& subsets,
1043 const PerCellQcMetricsOptions& options)
1044{
1047 const auto ncells = mat.ncol();
1048
1049 if (options.compute_sum) {
1051#ifdef SCRAN_QC_TEST_INIT
1052 , SCRAN_QC_TEST_INIT
1053#endif
1054 );
1055 buffers.sum = output.sum.data();
1056 }
1057
1058 if (options.compute_detected) {
1060#ifdef SCRAN_QC_TEST_INIT
1061 , SCRAN_QC_TEST_INIT
1062#endif
1063 );
1064 buffers.detected = output.detected.data();
1065 }
1066
1067 if (options.compute_max_value) {
1069#ifdef SCRAN_QC_TEST_INIT
1070 , SCRAN_QC_TEST_INIT
1071#endif
1072 );
1073 buffers.max_value = output.max_value.data();
1074 }
1075 if (options.compute_max_index) {
1077#ifdef SCRAN_QC_TEST_INIT
1078 , SCRAN_QC_TEST_INIT
1079#endif
1080 );
1081 buffers.max_index = output.max_index.data();
1082 }
1083
1084 const auto nsubsets = subsets.size();
1085
1086 if (options.compute_subset_sum) {
1087 sanisizer::resize(output.subset_sum, nsubsets);
1088 sanisizer::resize(buffers.subset_sum, nsubsets);
1089 for (I<decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
1091#ifdef SCRAN_QC_TEST_INIT
1092 , SCRAN_QC_TEST_INIT
1093#endif
1094 );
1095 buffers.subset_sum[s] = output.subset_sum[s].data();
1096 }
1097 }
1098
1099 if (options.compute_subset_detected) {
1100 sanisizer::resize(output.subset_detected, nsubsets);
1101 sanisizer::resize(buffers.subset_detected, nsubsets);
1102 for (I<decltype(nsubsets)> s = 0; s < nsubsets; ++s) {
1104#ifdef SCRAN_QC_TEST_INIT
1105 , SCRAN_QC_TEST_INIT
1106#endif
1107 );
1108 buffers.subset_detected[s] = output.subset_detected[s].data();
1109 }
1110 }
1111
1112 per_cell_qc_metrics(mat, subsets, buffers, options);
1113 return output;
1114}
1115
1116}
1117
1118#endif
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
virtual bool is_sparse() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
Simple quality control for single-cell data.
Definition adt_quality_control.hpp:22
void per_cell_qc_metrics(const tatami::Matrix< Value_, Index_ > &mat, const std::vector< Subset_ > &subsets, const PerCellQcMetricsBuffers< Sum_, Detected_, Value_, Index_ > &output, const PerCellQcMetricsOptions &options)
Definition per_cell_qc_metrics.hpp:1001
void resize_container_to_Index_size(Container_ &container, const Index_ x, Args_ &&... args)
int parallelize(Function_ fun, const Index_ tasks, const int workers)
I< decltype(std::declval< Container_ >().size())> cast_Index_to_container_size(const Index_ x)
Container_ create_container_of_Index_size(const Index_ x, Args_ &&... args)
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, const bool row, const Index_ iter_start, const Index_ iter_length, Args_ &&... args)
Buffers for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:97
Value_ * max_value
Definition per_cell_qc_metrics.hpp:127
Index_ * max_index
Definition per_cell_qc_metrics.hpp:134
Sum_ * sum
Definition per_cell_qc_metrics.hpp:115
Detected_ * detected
Definition per_cell_qc_metrics.hpp:121
std::vector< Detected_ * > subset_detected
Definition per_cell_qc_metrics.hpp:150
std::vector< Sum_ * > subset_sum
Definition per_cell_qc_metrics.hpp:142
Options for per_cell_qc_metrics().
Definition per_cell_qc_metrics.hpp:25
bool compute_sum
Definition per_cell_qc_metrics.hpp:30
bool compute_subset_sum
Definition per_cell_qc_metrics.hpp:54
bool compute_max_value
Definition per_cell_qc_metrics.hpp:42
bool subset_containers_have_indices
Definition per_cell_qc_metrics.hpp:77
bool compute_subset_detected
Definition per_cell_qc_metrics.hpp:60
bool compute_detected
Definition per_cell_qc_metrics.hpp:36
bool compute_max_index
Definition per_cell_qc_metrics.hpp:48
int num_threads
Definition per_cell_qc_metrics.hpp:83
Result store for QC metric calculations.
Definition per_cell_qc_metrics.hpp:906
std::vector< Index_ > max_index
Definition per_cell_qc_metrics.hpp:943
std::vector< Value_ > max_value
Definition per_cell_qc_metrics.hpp:936
std::vector< std::vector< Detected_ > > subset_detected
Definition per_cell_qc_metrics.hpp:957
std::vector< Detected_ > detected
Definition per_cell_qc_metrics.hpp:930
std::vector< Sum_ > sum
Definition per_cell_qc_metrics.hpp:924
std::vector< std::vector< Sum_ > > subset_sum
Definition per_cell_qc_metrics.hpp:950
bool sparse_ordered_index