scran_aggregate
Aggregate expression values across cells
Loading...
Searching...
No Matches
aggregate_across_cells.hpp
Go to the documentation of this file.
1#ifndef SCRAN_AGGREGATE_AGGREGATE_ACROSS_CELLS_HPP
2#define SCRAN_AGGREGATE_AGGREGATE_ACROSS_CELLS_HPP
3
4#include <algorithm>
5#include <vector>
6#include <cstddef>
7#include <type_traits>
8
9#include "tatami/tatami.hpp"
10#include "tatami_stats/tatami_stats.hpp"
11#include "sanisizer/sanisizer.hpp"
12
13#include "utils.hpp"
14
20namespace scran_aggregate {
21
30 bool compute_sums = true;
31
36 bool compute_detected = true;
37
42 int num_threads = 1;
43};
44
50template <typename Sum_, typename Detected_>
59 std::vector<Sum_*> sums;
60
68 std::vector<Detected_*> detected;
69
70};
71
77template <typename Sum_, typename Detected_>
86 std::vector<std::vector<Sum_> > sums;
87
95 std::vector<std::vector<Detected_> > detected;
96};
97
101template<bool sparse_, typename Data_, typename Index_, typename Group_, typename Sum_, typename Detected_>
102void aggregate_across_cells_by_row(
104 const Group_* const group,
106 const AggregateAcrossCellsOptions& options)
107{
108 tatami::Options opt;
109 opt.sparse_ordered_index = false;
110
111 tatami::parallelize([&](const int, const Index_ s, const Index_ l) -> void {
112 auto ext = tatami::consecutive_extractor<sparse_>(p, true, s, l, opt);
113 const auto nsums = buffers.sums.size();
114 auto tmp_sums = sanisizer::create<std::vector<Sum_> >(nsums);
115 const auto ndetected = buffers.detected.size();
116 auto tmp_detected = sanisizer::create<std::vector<Detected_> >(ndetected);
117
118 const auto NC = p.ncol();
120 auto ibuffer = [&]{
121 if constexpr(sparse_) {
123 } else {
124 return false;
125 }
126 }();
127
128 for (Index_ x = s, end = s + l; x < end; ++x) {
129 const auto row = [&]{
130 if constexpr(sparse_) {
131 return ext->fetch(vbuffer.data(), ibuffer.data());
132 } else {
133 return ext->fetch(vbuffer.data());
134 }
135 }();
136
137 if (nsums) {
138 std::fill(tmp_sums.begin(), tmp_sums.end(), 0);
139
140 if constexpr(sparse_) {
141 for (Index_ j = 0; j < row.number; ++j) {
142 tmp_sums[group[row.index[j]]] += row.value[j];
143 }
144 } else {
145 for (Index_ j = 0; j < NC; ++j) {
146 tmp_sums[group[j]] += row[j];
147 }
148 }
149
150 // Computing before transferring for more cache-friendliness.
151 for (I<decltype(nsums)> l = 0; l < nsums; ++l) {
152 buffers.sums[l][x] = tmp_sums[l];
153 }
154 }
155
156 if (ndetected) {
157 std::fill(tmp_detected.begin(), tmp_detected.end(), 0);
158
159 if constexpr(sparse_) {
160 for (Index_ j = 0; j < row.number; ++j) {
161 tmp_detected[group[row.index[j]]] += (row.value[j] > 0);
162 }
163 } else {
164 for (Index_ j = 0; j < NC; ++j) {
165 tmp_detected[group[j]] += (row[j] > 0);
166 }
167 }
168
169 for (I<decltype(ndetected)> l = 0; l < ndetected; ++l) {
170 buffers.detected[l][x] = tmp_detected[l];
171 }
172 }
173 }
174 }, p.nrow(), options.num_threads);
175}
176
177template<bool sparse_, typename Data_, typename Index_, typename Group_, typename Sum_, typename Detected_>
178void aggregate_across_cells_by_column(
180 const Group_* const group,
181 const AggregateAcrossCellsBuffers<Sum_, Detected_>& buffers,
182 const AggregateAcrossCellsOptions& options)
183{
184 tatami::Options opt;
185 opt.sparse_ordered_index = false;
186
187 tatami::parallelize([&](const int t, const Index_ start, const Index_ length) -> void {
188 const auto NC = p.ncol();
189 auto ext = tatami::consecutive_extractor<sparse_>(p, false, static_cast<Index_>(0), NC, start, length, opt);
191 auto ibuffer = [&]{
192 if constexpr(sparse_) {
194 } else {
195 return false;
196 }
197 }();
198
199 const auto num_sums = buffers.sums.size();
200 auto get_sum = [&](Index_ i) -> Sum_* { return buffers.sums[i]; };
201 tatami_stats::LocalOutputBuffers<Sum_, I<decltype(get_sum)>> local_sums(t, num_sums, start, length, std::move(get_sum));
202
203 const auto num_detected = buffers.detected.size();
204 auto get_detected = [&](Index_ i) -> Detected_* { return buffers.detected[i]; };
205 tatami_stats::LocalOutputBuffers<Detected_, I<decltype(get_detected)>> local_detected(t, num_detected, start, length, std::move(get_detected));
206
207 for (Index_ x = 0; x < NC; ++x) {
208 const auto current = group[x];
209
210 if constexpr(sparse_) {
211 const auto col = ext->fetch(vbuffer.data(), ibuffer.data());
212 if (num_sums) {
213 const auto cursum = local_sums.data(current);
214 for (Index_ i = 0; i < col.number; ++i) {
215 cursum[col.index[i] - start] += col.value[i];
216 }
217 }
218 if (num_detected) {
219 const auto curdetected = local_detected.data(current);
220 for (Index_ i = 0; i < col.number; ++i) {
221 curdetected[col.index[i] - start] += (col.value[i] > 0);
222 }
223 }
224
225 } else {
226 const auto col = ext->fetch(vbuffer.data());
227 if (num_sums) {
228 const auto cursum = local_sums.data(current);
229 for (Index_ i = 0; i < length; ++i) {
230 cursum[i] += col[i];
231 }
232 }
233 if (num_detected) {
234 const auto curdetected = local_detected.data(current);
235 for (Index_ i = 0; i < length; ++i) {
236 curdetected[i] += (col[i] > 0);
237 }
238 }
239 }
240 }
241
242 local_sums.transfer();
243 local_detected.transfer();
244 }, p.nrow(), options.num_threads);
245}
268template<typename Data_, typename Index_, typename Group_, typename Sum_, typename Detected_>
271 const Group_* const group,
273 const AggregateAcrossCellsOptions& options)
274{
275 if (input.prefer_rows()) {
276 if (input.sparse()) {
277 aggregate_across_cells_by_row<true>(input, group, buffers, options);
278 } else {
279 aggregate_across_cells_by_row<false>(input, group, buffers, options);
280 }
281 } else {
282 if (input.sparse()) {
283 aggregate_across_cells_by_column<true>(input, group, buffers, options);
284 } else {
285 aggregate_across_cells_by_column<false>(input, group, buffers, options);
286 }
287 }
288}
289
307template<typename Sum_ = double, typename Detected_ = int, typename Data_, typename Index_, typename Group_>
310 const Group_* const group,
311 const AggregateAcrossCellsOptions& options)
312{
313 const Index_ NR = input.nrow();
314 const Index_ NC = input.ncol();
315 const std::size_t ngroups = [&]{
316 if (NC) {
317 return sanisizer::sum<std::size_t>(*std::max_element(group, group + NC), 1);
318 } else {
319 return static_cast<std::size_t>(0);
320 }
321 }();
322
325
326 if (options.compute_sums) {
327 sanisizer::resize(output.sums, ngroups);
328 sanisizer::resize(buffers.sums, ngroups);
329 for (I<decltype(ngroups)> l = 0; l < ngroups; ++l) {
330 auto& cursum = output.sums[l];
331 tatami::resize_container_to_Index_size<I<decltype(cursum)>>(cursum, NR
332#ifdef SCRAN_AGGREGATE_TEST_INIT
333 , SCRAN_AGGREGATE_TEST_INIT
334#endif
335 );
336 buffers.sums[l] = cursum.data();
337 }
338 }
339
340 if (options.compute_detected) {
341 sanisizer::resize(output.detected, ngroups);
342 sanisizer::resize(buffers.detected, ngroups);
343 for (I<decltype(ngroups)> l = 0; l < ngroups; ++l) {
344 auto& curdet = output.detected[l];
345 tatami::resize_container_to_Index_size<I<decltype(curdet)>>(curdet, NR
346#ifdef SCRAN_AGGREGATE_TEST_INIT
347 , SCRAN_AGGREGATE_TEST_INIT
348#endif
349 );
350 buffers.detected[l] = curdet.data();
351 }
352 }
353
354 aggregate_across_cells(input, group, buffers, options);
355 return output;
356}
357
358}
359
360#endif
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
Aggregate single-cell expression values.
Definition aggregate_across_cells.hpp:20
void aggregate_across_cells(const tatami::Matrix< Data_, Index_ > &input, const Group_ *const group, const AggregateAcrossCellsBuffers< Sum_, Detected_ > &buffers, const AggregateAcrossCellsOptions &options)
Definition aggregate_across_cells.hpp:269
void parallelize(Function_ fun, const Index_ tasks, const int threads)
void resize_container_to_Index_size(Container_ &container, const Index_ x, Args_ &&... args)
Container_ create_container_of_Index_size(const Index_ x, Args_ &&... args)
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, const bool row, const Index_ iter_start, const Index_ iter_length, Args_ &&... args)
Buffers for aggregate_across_cells().
Definition aggregate_across_cells.hpp:51
std::vector< Detected_ * > detected
Definition aggregate_across_cells.hpp:68
std::vector< Sum_ * > sums
Definition aggregate_across_cells.hpp:59
Options for aggregate_across_cells().
Definition aggregate_across_cells.hpp:25
int num_threads
Definition aggregate_across_cells.hpp:42
bool compute_detected
Definition aggregate_across_cells.hpp:36
bool compute_sums
Definition aggregate_across_cells.hpp:30
Results of aggregate_across_cells().
Definition aggregate_across_cells.hpp:78
std::vector< std::vector< Detected_ > > detected
Definition aggregate_across_cells.hpp:95
std::vector< std::vector< Sum_ > > sums
Definition aggregate_across_cells.hpp:86
bool sparse_ordered_index