scran_aggregate
Aggregate expression values across cells
Loading...
Searching...
No Matches
aggregate_across_cells.hpp
Go to the documentation of this file.
1#ifndef SCRAN_AGGREGATE_AGGREGATE_ACROSS_CELLS_HPP
2#define SCRAN_AGGREGATE_AGGREGATE_ACROSS_CELLS_HPP
3
4#include <algorithm>
5#include <vector>
6#include <cstddef>
7#include <type_traits>
8
9#include "tatami/tatami.hpp"
10#include "tatami_stats/tatami_stats.hpp"
11#include "sanisizer/sanisizer.hpp"
12
18namespace scran_aggregate {
19
28 bool compute_sums = true;
29
34 bool compute_detected = true;
35
40 int num_threads = 1;
41};
42
48template <typename Sum_, typename Detected_>
57 std::vector<Sum_*> sums;
58
66 std::vector<Detected_*> detected;
67
68};
69
75template <typename Sum_, typename Detected_>
84 std::vector<std::vector<Sum_> > sums;
85
93 std::vector<std::vector<Detected_> > detected;
94};
95
99namespace internal {
100
101template<bool sparse_, typename Data_, typename Index_, typename Factor_, typename Sum_, typename Detected_>
102void compute_aggregate_by_row(
104 const Factor_* factor,
106 const AggregateAcrossCellsOptions& options)
107{
108 tatami::Options opt;
109 opt.sparse_ordered_index = false;
110
111 tatami::parallelize([&](int, Index_ s, Index_ l) -> void {
112 auto ext = tatami::consecutive_extractor<sparse_>(&p, true, s, l, opt);
113 auto nsums = buffers.sums.size();
114 auto tmp_sums = sanisizer::create<std::vector<Sum_> >(nsums);
115 auto ndetected = buffers.detected.size();
116 auto tmp_detected = sanisizer::create<std::vector<Detected_> >(ndetected);
117
118 auto NC = p.ncol();
120 auto ibuffer = [&]{
121 if constexpr(sparse_) {
123 } else {
124 return false;
125 }
126 }();
127
128 for (Index_ x = s, end = s + l; x < end; ++x) {
129 auto row = [&]{
130 if constexpr(sparse_) {
131 return ext->fetch(vbuffer.data(), ibuffer.data());
132 } else {
133 return ext->fetch(vbuffer.data());
134 }
135 }();
136
137 if (nsums) {
138 std::fill(tmp_sums.begin(), tmp_sums.end(), 0);
139
140 if constexpr(sparse_) {
141 for (Index_ j = 0; j < row.number; ++j) {
142 tmp_sums[factor[row.index[j]]] += row.value[j];
143 }
144 } else {
145 for (Index_ j = 0; j < NC; ++j) {
146 tmp_sums[factor[j]] += row[j];
147 }
148 }
149
150 // Computing before transferring for more cache-friendliness.
151 for (decltype(nsums) l = 0; l < nsums; ++l) {
152 buffers.sums[l][x] = tmp_sums[l];
153 }
154 }
155
156 if (ndetected) {
157 std::fill(tmp_detected.begin(), tmp_detected.end(), 0);
158
159 if constexpr(sparse_) {
160 for (Index_ j = 0; j < row.number; ++j) {
161 tmp_detected[factor[row.index[j]]] += (row.value[j] > 0);
162 }
163 } else {
164 for (Index_ j = 0; j < NC; ++j) {
165 tmp_detected[factor[j]] += (row[j] > 0);
166 }
167 }
168
169 for (decltype(ndetected) l = 0; l < ndetected; ++l) {
170 buffers.detected[l][x] = tmp_detected[l];
171 }
172 }
173 }
174 }, p.nrow(), options.num_threads);
175}
176
177template<bool sparse_, typename Data_, typename Index_, typename Factor_, typename Sum_, typename Detected_>
178void compute_aggregate_by_column(
180 const Factor_* factor,
181 const AggregateAcrossCellsBuffers<Sum_, Detected_>& buffers,
182 const AggregateAcrossCellsOptions& options)
183{
184 tatami::Options opt;
185 opt.sparse_ordered_index = false;
186
187 tatami::parallelize([&](int t, Index_ start, Index_ length) -> void {
188 auto NC = p.ncol();
189 auto ext = tatami::consecutive_extractor<sparse_>(&p, false, static_cast<Index_>(0), NC, start, length, opt);
191 auto ibuffer = [&]{
192 if constexpr(sparse_) {
194 } else {
195 return false;
196 }
197 }();
198
199 auto num_sums = buffers.sums.size();
200 auto get_sum = [&](Index_ i) -> Sum_* { return buffers.sums[i]; };
201 tatami_stats::LocalOutputBuffers<Sum_, decltype(get_sum)> local_sums(t, num_sums, start, length, std::move(get_sum));
202 auto get_detected = [&](Index_ i) -> Detected_* { return buffers.detected[i]; };
203 auto num_detected = buffers.detected.size();
204 tatami_stats::LocalOutputBuffers<Detected_, decltype(get_detected)> local_detected(t, num_detected, start, length, std::move(get_detected));
205
206 for (Index_ x = 0; x < NC; ++x) {
207 auto current = factor[x];
208
209 if constexpr(sparse_) {
210 auto col = ext->fetch(vbuffer.data(), ibuffer.data());
211 if (num_sums) {
212 auto cursum = local_sums.data(current);
213 for (Index_ i = 0; i < col.number; ++i) {
214 cursum[col.index[i] - start] += col.value[i];
215 }
216 }
217 if (num_detected) {
218 auto curdetected = local_detected.data(current);
219 for (Index_ i = 0; i < col.number; ++i) {
220 curdetected[col.index[i] - start] += (col.value[i] > 0);
221 }
222 }
223
224 } else {
225 auto col = ext->fetch(vbuffer.data());
226 if (num_sums) {
227 auto cursum = local_sums.data(current);
228 for (Index_ i = 0; i < length; ++i) {
229 cursum[i] += col[i];
230 }
231 }
232 if (num_detected) {
233 auto curdetected = local_detected.data(current);
234 for (Index_ i = 0; i < length; ++i) {
235 curdetected[i] += (col[i] > 0);
236 }
237 }
238 }
239 }
240
241 local_sums.transfer();
242 local_detected.transfer();
243 }, p.nrow(), options.num_threads);
244}
245
246}
270template<typename Data_, typename Index_, typename Factor_, typename Sum_, typename Detected_>
273 const Factor_* factor,
275 const AggregateAcrossCellsOptions& options)
276{
277 if (input.prefer_rows()) {
278 if (input.sparse()) {
279 internal::compute_aggregate_by_row<true>(input, factor, buffers, options);
280 } else {
281 internal::compute_aggregate_by_row<false>(input, factor, buffers, options);
282 }
283 } else {
284 if (input.sparse()) {
285 internal::compute_aggregate_by_column<true>(input, factor, buffers, options);
286 } else {
287 internal::compute_aggregate_by_column<false>(input, factor, buffers, options);
288 }
289 }
290}
291
309template<typename Sum_ = double, typename Detected_ = int, typename Data_, typename Index_, typename Factor_>
312 const Factor_* factor,
313 const AggregateAcrossCellsOptions& options)
314{
315 Index_ NR = input.nrow();
316 Index_ NC = input.ncol();
317 std::size_t nlevels = [&]{
318 if (NC) {
319 return sanisizer::sum<std::size_t>(*std::max_element(factor, factor + NC), 1);
320 } else {
321 return static_cast<std::size_t>(0);
322 }
323 }();
324
327
328 if (options.compute_sums) {
329 output.sums.resize(
330 sanisizer::cast<decltype(output.sums.size())>(nlevels),
331 tatami::create_container_of_Index_size<std::vector<Sum_> >(NR
332#ifdef SCRAN_AGGREGATE_TEST_INIT
333 , SCRAN_AGGREGATE_TEST_INIT
334#endif
335 )
336 );
337 buffers.sums.resize(
338 sanisizer::cast<decltype(buffers.sums.size())>(nlevels)
339 );
340 for (decltype(nlevels) l = 0; l < nlevels; ++l) {
341 buffers.sums[l] = output.sums[l].data();
342 }
343 }
344
345 if (options.compute_detected) {
346 output.detected.resize(
347 sanisizer::cast<decltype(output.detected.size())>(nlevels),
348 tatami::create_container_of_Index_size<std::vector<Detected_> >(NR
349#ifdef SCRAN_AGGREGATE_TEST_INIT
350 , SCRAN_AGGREGATE_TEST_INIT
351#endif
352 )
353 );
354 buffers.detected.resize(
355 sanisizer::cast<decltype(buffers.detected.size())>(nlevels)
356 );
357 for (decltype(nlevels) l = 0; l < nlevels; ++l) {
358 buffers.detected[l] = output.detected[l].data();
359 }
360 }
361
362 aggregate_across_cells(input, factor, buffers, options);
363 return output;
364}
365
366}
367
368#endif
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
Aggregate single-cell expression values.
Definition aggregate_across_cells.hpp:18
void aggregate_across_cells(const tatami::Matrix< Data_, Index_ > &input, const Factor_ *factor, const AggregateAcrossCellsBuffers< Sum_, Detected_ > &buffers, const AggregateAcrossCellsOptions &options)
Definition aggregate_across_cells.hpp:271
void parallelize(Function_ fun, Index_ tasks, int threads)
Container_ create_container_of_Index_size(Index_ x, Args_ &&... args)
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, bool row, Index_ iter_start, Index_ iter_length, Args_ &&... args)
Buffers for aggregate_across_cells().
Definition aggregate_across_cells.hpp:49
std::vector< Detected_ * > detected
Definition aggregate_across_cells.hpp:66
std::vector< Sum_ * > sums
Definition aggregate_across_cells.hpp:57
Options for aggregate_across_cells().
Definition aggregate_across_cells.hpp:23
int num_threads
Definition aggregate_across_cells.hpp:40
bool compute_detected
Definition aggregate_across_cells.hpp:34
bool compute_sums
Definition aggregate_across_cells.hpp:28
Results of aggregate_across_cells().
Definition aggregate_across_cells.hpp:76
std::vector< std::vector< Detected_ > > detected
Definition aggregate_across_cells.hpp:93
std::vector< std::vector< Sum_ > > sums
Definition aggregate_across_cells.hpp:84
bool sparse_ordered_index