1#ifndef SCRAN_AGGREGATE_AGGREGATE_ACROSS_GENES_HPP 
    2#define SCRAN_AGGREGATE_AGGREGATE_ACROSS_GENES_HPP 
    6#include <unordered_set> 
   11#include "tatami_stats/tatami_stats.hpp" 
   12#include "sanisizer/sanisizer.hpp" 
   44template <
typename Sum_>
 
   51    std::vector<Sum_*> 
sum;
 
 
   58template <
typename Sum_>
 
   65    std::vector<std::vector<Sum_> > 
sum;
 
 
   71namespace aggregate_across_genes_internal {
 
   73template<
typename Index_, 
typename Gene_, 
typename Weight_>
 
   74std::vector<Gene_> create_subset(
const std::vector<std::tuple<std::size_t, const Gene_*, const Weight_*> >& gene_sets, 
const Index_ nrow) {
 
   75    std::unordered_set<Gene_> of_interest;
 
   76    for (
const auto& set : gene_sets) {
 
   77        const auto set_size = std::get<0>(set);
 
   78        const auto set_genes = std::get<1>(set);
 
   79        of_interest.insert(set_genes, set_genes + set_size);
 
   82    std::vector<Index_> subset(of_interest.begin(), of_interest.end());
 
   83    if (!subset.empty()) {
 
   84        std::sort(subset.begin(), subset.end());
 
   85        if (subset.front() < 0 || subset.back() >= nrow) {
 
   86            throw std::runtime_error(
"set indices are out of range");
 
   93template<
typename Index_>
 
   94std::pair<std::vector<Index_>, Index_> create_subset_mapping(
const std::vector<Index_>& subset) {
 
   95    const Index_ offset = subset.front();
 
   96    const Index_ span = subset.back() - offset + 1;
 
   98    const auto nsubs = subset.size();
 
   99    for (
decltype(I(nsubs)) i = 0; i < nsubs; ++i) {
 
  100        mapping[subset[i] - offset] = i;
 
  102    return std::make_pair(std::move(mapping), offset);
 
  105template<
typename Data_, 
typename Index_, 
typename Gene_, 
typename Weight_, 
typename Sum_>
 
  106void compute_aggregate_by_column(
 
  108    const std::vector<std::tuple<std::size_t, const Gene_*, const Weight_*> >& gene_sets,
 
  109    const AggregateAcrossGenesBuffers<Sum_>& buffers,
 
  110    const AggregateAcrossGenesOptions& options)
 
  114    const auto& subset = *subset_of_interest;
 
  115    const Index_ nsubs = subset.size();
 
  118    const auto num_sets = gene_sets.size();
 
  119    auto remapping = sanisizer::create<std::vector<std::pair<std::vector<Index_>, 
const Weight_*> > >(num_sets);
 
  121        const auto sub_mapping = create_subset_mapping(subset);
 
  122        const auto& mapping = sub_mapping.first;
 
  123        const Gene_ offset = sub_mapping.second;
 
  125        for (
decltype(I(num_sets)) s = 0; s < num_sets; ++s) {
 
  126            const auto& set = gene_sets[s];
 
  127            const auto set_size = std::get<0>(set);
 
  128            const auto set_genes = std::get<1>(set);
 
  130            auto& remapped = remapping[s].first;
 
  131            remapped.reserve(set_size);
 
  132            for (
decltype(I(set_size)) g = 0; g < set_size; ++g) {
 
  133                remapped.push_back(mapping[set_genes[g] - offset]);
 
  135            remapping[s].second = std::get<2>(set);
 
  145        for (Index_ x = start, end = start + length; x < end; ++x) {
 
  146            const auto ptr = ext->fetch(vbuffer.data());
 
  147            for (
decltype(I(num_sets)) s = 0; s < num_sets; ++s) {
 
  148                const auto& set = remapping[s];
 
  152                    for (
decltype(I(set.first.size())) i = 0, send = set.first.size(); i < send; ++i) {
 
  153                        value += ptr[set.first[i]] * set.second[i];
 
  156                    for (
const auto ix : set.first) {
 
  161                buffers.sum[s][x] = value;
 
  165    }, p.
ncol(), options.num_threads);
 
  168template<
typename Data_, 
typename Index_, 
typename Gene_, 
typename Weight_, 
typename Sum_>
 
  169void compute_aggregate_by_row(
 
  171    const std::vector<std::tuple<std::size_t, const Gene_*, const Weight_*> >& gene_sets,
 
  172    const AggregateAcrossGenesBuffers<Sum_>& buffers,
 
  173    const AggregateAcrossGenesOptions& options)
 
  176    const auto subset = create_subset<Index_>(gene_sets, p.
nrow());
 
  177    const Index_ nsubs = subset.size();
 
  178    const auto sub_oracle = std::make_shared<tatami::FixedViewOracle<Index_> >(subset.data(), nsubs);
 
  180    const auto num_sets = gene_sets.size();
 
  183        const auto sub_mapping = create_subset_mapping(subset);
 
  184        const auto& mapping = sub_mapping.first;
 
  185        const Gene_ offset = sub_mapping.second;
 
  187        for (
decltype(I(num_sets)) s = 0; s < num_sets; ++s) {
 
  188            const auto& set = gene_sets[s];
 
  189            const auto set_size = std::get<0>(set);
 
  190            const auto set_genes = std::get<1>(set);
 
  191            const auto set_weights = std::get<2>(set);
 
  194                for (
decltype(I(set_size)) g = 0; g < set_size; ++g) {
 
  195                    remapping[mapping[set_genes[g] - offset]].emplace_back(s, set_weights[g]); 
 
  198                for (
decltype(I(set_size)) g = 0; g < set_size; ++g) {
 
  199                    remapping[mapping[set_genes[g] - offset]].emplace_back(s, 1);
 
  206        auto get_sum = [&](Index_ i) -> Sum_* { 
return buffers.sum[i]; };
 
  207        tatami_stats::LocalOutputBuffers<Sum_, 
decltype(I(get_sum))> local_sums(t, num_sets, start, length, std::move(get_sum));
 
  214            for (Index_ sub = 0; sub < nsubs; ++sub) {
 
  215                const auto range = ext->fetch(vbuffer.data(), ibuffer.data());
 
  217                for (
const auto& sw : remapping[sub]) {
 
  218                    const auto outptr = local_sums.data(sw.first);
 
  219                    const auto wt = sw.second;
 
  220                    for (Index_ c = 0; c < range.number; ++c) {
 
  221                        outptr[range.index[c] - start] += range.value[c] * wt;
 
  230            for (Index_ sub = 0; sub < nsubs; ++sub) {
 
  231                const auto ptr = ext->fetch(vbuffer.data());
 
  232                for (
const auto& sw : remapping[sub]) {
 
  233                    const auto outptr = local_sums.data(sw.first);
 
  234                    const auto wt = sw.second;
 
  235                    for (Index_ cell = 0; cell < length; ++cell) {
 
  236                        outptr[cell] += ptr[cell] * wt;
 
  242        local_sums.transfer();
 
  243    }, p.
ncol(), options.num_threads);
 
  273template<
typename Data_, 
typename Index_, 
typename Gene_, 
typename Weight_, 
typename Sum_>
 
  276    const std::vector<std::tuple<std::size_t, const Gene_*, const Weight_*> >& gene_sets,
 
  281        aggregate_across_genes_internal::compute_aggregate_by_row(input, gene_sets, buffers, options);
 
  283        aggregate_across_genes_internal::compute_aggregate_by_column(input, gene_sets, buffers, options);
 
  287        const auto nsets = gene_sets.size();
 
  289            const Index_ NC = input.
ncol();
 
  290            for (Index_ s = start, end = start + length; s < end; ++s) {
 
  291                const auto& set = gene_sets[s];
 
  292                const auto set_size = std::get<0>(set);
 
  295                const auto set_weights = std::get<2>(set);
 
  297                    denom = std::accumulate(set_weights, set_weights + set_size, 
static_cast<Sum_
>(0)); 
 
  302                const auto current = buffers.
sum[s];
 
  303                for (Index_ c = 0; c < NC; ++c) {
 
 
  330template<
typename Sum_ = 
double, 
typename Data_, 
typename Index_, 
typename Gene_, 
typename Weight_>
 
  333    const std::vector<std::tuple<std::size_t, const Gene_*, const Weight_*> >& gene_sets,
 
  339    const Index_ NC = input.
ncol();
 
  340    const auto nsets = gene_sets.size();
 
  341    sanisizer::resize(output.
sum, nsets);
 
  342    sanisizer::resize(buffers.
sum, nsets);
 
  344    for (
decltype(I(nsets)) s = 0; s < nsets; ++s) {
 
  348#ifdef SCRAN_AGGREGATE_TEST_INIT
 
  349            , SCRAN_AGGREGATE_TEST_INIT
 
  352        buffers.
sum[s] = output.
sum[s].data();
 
 
virtual Index_ ncol() const=0
 
virtual Index_ nrow() const=0
 
virtual bool prefer_rows() const=0
 
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
 
Aggregate single-cell expression values.
Definition aggregate_across_cells.hpp:20
 
void aggregate_across_genes(const tatami::Matrix< Data_, Index_ > &input, const std::vector< std::tuple< std::size_t, const Gene_ *, const Weight_ * > > &gene_sets, const AggregateAcrossGenesBuffers< Sum_ > &buffers, const AggregateAcrossGenesOptions &options)
Definition aggregate_across_genes.hpp:274
 
std::shared_ptr< const std::vector< Index_ > > VectorPtr
 
void parallelize(Function_ fun, Index_ tasks, int threads)
 
void resize_container_to_Index_size(Container_ &container, Index_ x, Args_ &&... args)
 
Container_ create_container_of_Index_size(Index_ x, Args_ &&... args)
 
auto new_extractor(const Matrix< Value_, Index_ > &matrix, bool row, MaybeOracle< oracle_, Index_ > oracle, Args_ &&... args)
 
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, bool row, Index_ iter_start, Index_ iter_length, Args_ &&... args)
 
Buffers for aggregate_across_genes().
Definition aggregate_across_genes.hpp:45
 
std::vector< Sum_ * > sum
Definition aggregate_across_genes.hpp:51
 
Options for aggregate_across_genes().
Definition aggregate_across_genes.hpp:26
 
bool average
Definition aggregate_across_genes.hpp:37
 
int num_threads
Definition aggregate_across_genes.hpp:31
 
Results of aggregate_across_genes().
Definition aggregate_across_genes.hpp:59
 
std::vector< std::vector< Sum_ > > sum
Definition aggregate_across_genes.hpp:65