1#ifndef SCRAN_AGGREGATE_AGGREGATE_ACROSS_GENES_HPP
2#define SCRAN_AGGREGATE_AGGREGATE_ACROSS_GENES_HPP
6#include <unordered_set>
11#include "tatami_stats/tatami_stats.hpp"
12#include "sanisizer/sanisizer.hpp"
44template <
typename Sum_>
51 std::vector<Sum_*>
sum;
58template <
typename Sum_>
65 std::vector<std::vector<Sum_> >
sum;
71namespace aggregate_across_genes_internal {
73template<
typename Index_,
typename Gene_,
typename Weight_>
74std::vector<Gene_> create_subset(
const std::vector<std::tuple<std::size_t, const Gene_*, const Weight_*> >& gene_sets,
const Index_ nrow) {
75 std::unordered_set<Gene_> of_interest;
76 for (
const auto& set : gene_sets) {
77 const auto set_size = std::get<0>(set);
78 const auto set_genes = std::get<1>(set);
79 of_interest.insert(set_genes, set_genes + set_size);
82 std::vector<Index_> subset(of_interest.begin(), of_interest.end());
83 if (!subset.empty()) {
84 std::sort(subset.begin(), subset.end());
85 if (subset.front() < 0 || subset.back() >= nrow) {
86 throw std::runtime_error(
"set indices are out of range");
93template<
typename Index_>
94std::pair<std::vector<Index_>, Index_> create_subset_mapping(
const std::vector<Index_>& subset) {
95 const Index_ offset = subset.front();
96 const Index_ span = subset.back() - offset + 1;
98 const auto nsubs = subset.size();
99 for (
decltype(I(nsubs)) i = 0; i < nsubs; ++i) {
100 mapping[subset[i] - offset] = i;
102 return std::make_pair(std::move(mapping), offset);
105template<
typename Data_,
typename Index_,
typename Gene_,
typename Weight_,
typename Sum_>
106void compute_aggregate_by_column(
108 const std::vector<std::tuple<std::size_t, const Gene_*, const Weight_*> >& gene_sets,
109 const AggregateAcrossGenesBuffers<Sum_>& buffers,
110 const AggregateAcrossGenesOptions& options)
114 const auto& subset = *subset_of_interest;
115 const Index_ nsubs = subset.size();
118 const auto num_sets = gene_sets.size();
119 auto remapping = sanisizer::create<std::vector<std::pair<std::vector<Index_>,
const Weight_*> > >(num_sets);
121 const auto sub_mapping = create_subset_mapping(subset);
122 const auto& mapping = sub_mapping.first;
123 const Gene_ offset = sub_mapping.second;
125 for (
decltype(I(num_sets)) s = 0; s < num_sets; ++s) {
126 const auto& set = gene_sets[s];
127 const auto set_size = std::get<0>(set);
128 const auto set_genes = std::get<1>(set);
130 auto& remapped = remapping[s].first;
131 remapped.reserve(set_size);
132 for (
decltype(I(set_size)) g = 0; g < set_size; ++g) {
133 remapped.push_back(mapping[set_genes[g] - offset]);
135 remapping[s].second = std::get<2>(set);
145 for (Index_ x = start, end = start + length; x < end; ++x) {
146 const auto ptr = ext->fetch(vbuffer.data());
147 for (
decltype(I(num_sets)) s = 0; s < num_sets; ++s) {
148 const auto& set = remapping[s];
152 for (
decltype(I(set.first.size())) i = 0, send = set.first.size(); i < send; ++i) {
153 value += ptr[set.first[i]] * set.second[i];
156 for (
const auto ix : set.first) {
161 buffers.sum[s][x] = value;
165 }, p.
ncol(), options.num_threads);
168template<
typename Data_,
typename Index_,
typename Gene_,
typename Weight_,
typename Sum_>
169void compute_aggregate_by_row(
171 const std::vector<std::tuple<std::size_t, const Gene_*, const Weight_*> >& gene_sets,
172 const AggregateAcrossGenesBuffers<Sum_>& buffers,
173 const AggregateAcrossGenesOptions& options)
176 const auto subset = create_subset<Index_>(gene_sets, p.
nrow());
177 const Index_ nsubs = subset.size();
178 const auto sub_oracle = std::make_shared<tatami::FixedViewOracle<Index_> >(subset.data(), nsubs);
180 const auto num_sets = gene_sets.size();
183 const auto sub_mapping = create_subset_mapping(subset);
184 const auto& mapping = sub_mapping.first;
185 const Gene_ offset = sub_mapping.second;
187 for (
decltype(I(num_sets)) s = 0; s < num_sets; ++s) {
188 const auto& set = gene_sets[s];
189 const auto set_size = std::get<0>(set);
190 const auto set_genes = std::get<1>(set);
191 const auto set_weights = std::get<2>(set);
194 for (
decltype(I(set_size)) g = 0; g < set_size; ++g) {
195 remapping[mapping[set_genes[g] - offset]].emplace_back(s, set_weights[g]);
198 for (
decltype(I(set_size)) g = 0; g < set_size; ++g) {
199 remapping[mapping[set_genes[g] - offset]].emplace_back(s, 1);
206 auto get_sum = [&](Index_ i) -> Sum_* {
return buffers.sum[i]; };
207 tatami_stats::LocalOutputBuffers<Sum_,
decltype(I(get_sum))> local_sums(t, num_sets, start, length, std::move(get_sum));
214 for (Index_ sub = 0; sub < nsubs; ++sub) {
215 const auto range = ext->fetch(vbuffer.data(), ibuffer.data());
217 for (
const auto& sw : remapping[sub]) {
218 const auto outptr = local_sums.data(sw.first);
219 const auto wt = sw.second;
220 for (Index_ c = 0; c < range.number; ++c) {
221 outptr[range.index[c] - start] += range.value[c] * wt;
230 for (Index_ sub = 0; sub < nsubs; ++sub) {
231 const auto ptr = ext->fetch(vbuffer.data());
232 for (
const auto& sw : remapping[sub]) {
233 const auto outptr = local_sums.data(sw.first);
234 const auto wt = sw.second;
235 for (Index_ cell = 0; cell < length; ++cell) {
236 outptr[cell] += ptr[cell] * wt;
242 local_sums.transfer();
243 }, p.
ncol(), options.num_threads);
273template<
typename Data_,
typename Index_,
typename Gene_,
typename Weight_,
typename Sum_>
276 const std::vector<std::tuple<std::size_t, const Gene_*, const Weight_*> >& gene_sets,
281 aggregate_across_genes_internal::compute_aggregate_by_row(input, gene_sets, buffers, options);
283 aggregate_across_genes_internal::compute_aggregate_by_column(input, gene_sets, buffers, options);
287 const auto nsets = gene_sets.size();
289 const Index_ NC = input.
ncol();
290 for (Index_ s = start, end = start + length; s < end; ++s) {
291 const auto& set = gene_sets[s];
292 const auto set_size = std::get<0>(set);
295 const auto set_weights = std::get<2>(set);
297 denom = std::accumulate(set_weights, set_weights + set_size,
static_cast<Sum_
>(0));
302 const auto current = buffers.
sum[s];
303 for (Index_ c = 0; c < NC; ++c) {
330template<
typename Sum_ =
double,
typename Data_,
typename Index_,
typename Gene_,
typename Weight_>
333 const std::vector<std::tuple<std::size_t, const Gene_*, const Weight_*> >& gene_sets,
339 const Index_ NC = input.
ncol();
340 const auto nsets = gene_sets.size();
341 sanisizer::resize(output.
sum, nsets);
342 sanisizer::resize(buffers.
sum, nsets);
344 for (
decltype(I(nsets)) s = 0; s < nsets; ++s) {
348#ifdef SCRAN_AGGREGATE_TEST_INIT
349 , SCRAN_AGGREGATE_TEST_INIT
352 buffers.
sum[s] = output.
sum[s].data();
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
Aggregate single-cell expression values.
Definition aggregate_across_cells.hpp:20
void aggregate_across_genes(const tatami::Matrix< Data_, Index_ > &input, const std::vector< std::tuple< std::size_t, const Gene_ *, const Weight_ * > > &gene_sets, const AggregateAcrossGenesBuffers< Sum_ > &buffers, const AggregateAcrossGenesOptions &options)
Definition aggregate_across_genes.hpp:274
std::shared_ptr< const std::vector< Index_ > > VectorPtr
void parallelize(Function_ fun, Index_ tasks, int threads)
void resize_container_to_Index_size(Container_ &container, Index_ x, Args_ &&... args)
Container_ create_container_of_Index_size(Index_ x, Args_ &&... args)
auto new_extractor(const Matrix< Value_, Index_ > &matrix, bool row, MaybeOracle< oracle_, Index_ > oracle, Args_ &&... args)
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, bool row, Index_ iter_start, Index_ iter_length, Args_ &&... args)
Buffers for aggregate_across_genes().
Definition aggregate_across_genes.hpp:45
std::vector< Sum_ * > sum
Definition aggregate_across_genes.hpp:51
Options for aggregate_across_genes().
Definition aggregate_across_genes.hpp:26
bool average
Definition aggregate_across_genes.hpp:37
int num_threads
Definition aggregate_across_genes.hpp:31
Results of aggregate_across_genes().
Definition aggregate_across_genes.hpp:59
std::vector< std::vector< Sum_ > > sum
Definition aggregate_across_genes.hpp:65