scran_aggregate
Aggregate expression values across cells
Loading...
Searching...
No Matches
aggregate_across_cells.hpp
Go to the documentation of this file.
1#ifndef SCRAN_AGGREGATE_AGGREGATE_ACROSS_CELLS_HPP
2#define SCRAN_AGGREGATE_AGGREGATE_ACROSS_CELLS_HPP
3
4#include <algorithm>
5#include <vector>
6#include "tatami/tatami.hpp"
7
13namespace scran_aggregate {
14
23 bool compute_sums = true;
24
29 bool compute_detected = true;
30
35 int num_threads = 1;
36};
37
43template <typename Sum_, typename Detected_>
52 std::vector<Sum_*> sums;
53
61 std::vector<Detected_*> detected;
62
63};
64
70template <typename Sum_, typename Detected_>
79 std::vector<std::vector<Sum_> > sums;
80
88 std::vector<std::vector<Detected_> > detected;
89};
90
94namespace internal {
95
96template<bool sparse_, typename Data_, typename Index_, typename Factor_, typename Sum_, typename Detected_>
97void compute_aggregate_by_row(
99 const Factor_* factor,
101 const AggregateAcrossCellsOptions& options)
102{
103 tatami::Options opt;
104 opt.sparse_ordered_index = false;
105
106 tatami::parallelize([&](size_t, Index_ s, Index_ l) {
107 auto ext = tatami::consecutive_extractor<sparse_>(&p, true, s, l, opt);
108 size_t nsums = buffers.sums.size();
109 std::vector<Sum_> tmp_sums(nsums);
110 size_t ndetected = buffers.detected.size();
111 std::vector<Detected_> tmp_detected(ndetected);
112
113 auto NC = p.ncol();
114 std::vector<Data_> vbuffer(NC);
115 typename std::conditional<sparse_, std::vector<Index_>, Index_>::type ibuffer(NC);
116
117 for (Index_ x = s, end = s + l; x < end; ++x) {
118 auto row = [&]() {
119 if constexpr(sparse_) {
120 return ext->fetch(vbuffer.data(), ibuffer.data());
121 } else {
122 return ext->fetch(vbuffer.data());
123 }
124 }();
125
126 if (nsums) {
127 std::fill(tmp_sums.begin(), tmp_sums.end(), 0);
128
129 if constexpr(sparse_) {
130 for (Index_ j = 0; j < row.number; ++j) {
131 tmp_sums[factor[row.index[j]]] += row.value[j];
132 }
133 } else {
134 for (Index_ j = 0; j < NC; ++j) {
135 tmp_sums[factor[j]] += row[j];
136 }
137 }
138
139 // Computing before transferring for more cache-friendliness.
140 for (size_t l = 0; l < nsums; ++l) {
141 buffers.sums[l][x] = tmp_sums[l];
142 }
143 }
144
145 if (ndetected) {
146 std::fill(tmp_detected.begin(), tmp_detected.end(), 0);
147
148 if constexpr(sparse_) {
149 for (Index_ j = 0; j < row.number; ++j) {
150 tmp_detected[factor[row.index[j]]] += (row.value[j] > 0);
151 }
152 } else {
153 for (Index_ j = 0; j < NC; ++j) {
154 tmp_detected[factor[j]] += (row[j] > 0);
155 }
156 }
157
158 for (size_t l = 0; l < ndetected; ++l) {
159 buffers.detected[l][x] = tmp_detected[l];
160 }
161 }
162 }
163 }, p.nrow(), options.num_threads);
164}
165
166template<bool sparse_, typename Data_, typename Index_, typename Factor_, typename Sum_, typename Detected_>
167void compute_aggregate_by_column(
169 const Factor_* factor,
170 const AggregateAcrossCellsBuffers<Sum_, Detected_>& buffers,
171 const AggregateAcrossCellsOptions& options)
172{
173 tatami::Options opt;
174 opt.sparse_ordered_index = false;
175
176 tatami::parallelize([&](size_t, Index_ s, Index_ l) {
177 auto NC = p.ncol();
178 auto ext = tatami::consecutive_extractor<sparse_>(&p, false, static_cast<Index_>(0), NC, s, l, opt);
179 std::vector<Data_> vbuffer(l);
180 typename std::conditional<sparse_, std::vector<Index_>, Index_>::type ibuffer(l);
181
182 for (Index_ x = 0; x < NC; ++x) {
183 auto current = factor[x];
184
185 if constexpr(sparse_) {
186 auto col = ext->fetch(vbuffer.data(), ibuffer.data());
187 if (buffers.sums.size()) {
188 auto& cursum = buffers.sums[current];
189 for (Index_ i = 0; i < col.number; ++i) {
190 cursum[col.index[i]] += col.value[i];
191 }
192 }
193
194 if (buffers.detected.size()) {
195 auto& curdetected = buffers.detected[current];
196 for (Index_ i = 0; i < col.number; ++i) {
197 curdetected[col.index[i]] += (col.value[i] > 0);
198 }
199 }
200
201 } else {
202 auto col = ext->fetch(vbuffer.data());
203
204 if (buffers.sums.size()) {
205 auto cursum = buffers.sums[current] + s;
206 for (Index_ i = 0; i < l; ++i) {
207 cursum[i] += col[i];
208 }
209 }
210
211 if (buffers.detected.size()) {
212 auto curdetected = buffers.detected[current] + s;
213 for (Index_ i = 0; i < l; ++i) {
214 curdetected[i] += (col[i] > 0);
215 }
216 }
217 }
218 }
219 }, p.nrow(), options.num_threads);
220}
221
222}
246template<typename Data_, typename Index_, typename Factor_, typename Sum_, typename Detected_>
249 const Factor_* factor,
251 const AggregateAcrossCellsOptions& options)
252{
253 if (input.prefer_rows()) {
254 if (input.sparse()) {
255 internal::compute_aggregate_by_row<true>(input, factor, buffers, options);
256 } else {
257 internal::compute_aggregate_by_row<false>(input, factor, buffers, options);
258 }
259 } else {
260 if (input.sparse()) {
261 internal::compute_aggregate_by_column<true>(input, factor, buffers, options);
262 } else {
263 internal::compute_aggregate_by_column<false>(input, factor, buffers, options);
264 }
265 }
266}
267
285template<typename Sum_ = double, typename Detected_ = int, typename Data_, typename Index_, typename Factor_>
288 const Factor_* factor,
289 const AggregateAcrossCellsOptions& options)
290{
291 size_t NC = input.ncol();
292 size_t nlevels = (NC ? *std::max_element(factor, factor + NC) + 1 : 0);
293 size_t ngenes = input.nrow();
294
297
298 if (options.compute_sums) {
299 output.sums.resize(nlevels, std::vector<Sum_>(ngenes));
300 buffers.sums.resize(nlevels);
301 for (size_t l = 0; l < nlevels; ++l) {
302 buffers.sums[l] = output.sums[l].data();
303 }
304 }
305
306 if (options.compute_detected) {
307 output.detected.resize(nlevels, std::vector<Detected_>(ngenes));
308 buffers.detected.resize(nlevels);
309 for (size_t l = 0; l < nlevels; ++l) {
310 buffers.detected[l] = output.detected[l].data();
311 }
312 }
313
314 aggregate_across_cells(input, factor, buffers, options);
315 return output;
316}
317
318}
319
320#endif
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
Aggregate single-cell expression values.
Definition aggregate_across_cells.hpp:13
void aggregate_across_cells(const tatami::Matrix< Data_, Index_ > &input, const Factor_ *factor, const AggregateAcrossCellsBuffers< Sum_, Detected_ > &buffers, const AggregateAcrossCellsOptions &options)
Definition aggregate_across_cells.hpp:247
void parallelize(Function_ fun, Index_ tasks, int threads)
auto consecutive_extractor(const Matrix< Value_, Index_ > *mat, bool row, Index_ iter_start, Index_ iter_length, Args_ &&... args)
Buffers for aggregate_across_cells().
Definition aggregate_across_cells.hpp:44
std::vector< Detected_ * > detected
Definition aggregate_across_cells.hpp:61
std::vector< Sum_ * > sums
Definition aggregate_across_cells.hpp:52
Options for aggregate_across_cells().
Definition aggregate_across_cells.hpp:18
int num_threads
Definition aggregate_across_cells.hpp:35
bool compute_detected
Definition aggregate_across_cells.hpp:29
bool compute_sums
Definition aggregate_across_cells.hpp:23
Results of aggregate_across_cells().
Definition aggregate_across_cells.hpp:71
std::vector< std::vector< Detected_ > > detected
Definition aggregate_across_cells.hpp:88
std::vector< std::vector< Sum_ > > sums
Definition aggregate_across_cells.hpp:79
bool sparse_ordered_index