scran_aggregate
Aggregate expression values across cells
Loading...
Searching...
No Matches
combine_factors.hpp
Go to the documentation of this file.
1#ifndef SCRAN_AGGREGATE_COMBINE_FACTORS_HPP
2#define SCRAN_AGGREGATE_COMBINE_FACTORS_HPP
3
4#include <algorithm>
5#include <vector>
6#include <map>
7#include <unordered_map>
8#include <typeindex>
9
10#include "clean_factor.hpp"
11
17namespace scran_aggregate {
18
39template<typename Factor_, typename Combined_>
40std::vector<std::vector<Factor_> > combine_factors(size_t n, const std::vector<const Factor_*>& factors, Combined_* combined) {
41 size_t nfac = factors.size();
42 std::vector<std::vector<Factor_> > output(nfac);
43
44 // Handling the special cases.
45 if (nfac == 0) {
46 std::fill_n(combined, n, 0);
47 return output;
48 }
49 if (nfac == 1) {
50 output[0] = clean_factor(n, factors.front(), combined);
51 return output;
52 }
53
54 // Creating a hashmap on the combinations of each factor.
55 struct Combination {
56 Combination(size_t i) : index(i) {}
57 size_t index;
58 };
59
60 auto unique = [&]{ // scoping this in an IIFE to release map memory sooner.
61 // Using a map with a custom comparator that uses the index
62 // of first occurrence of each factor as the key. Currently using a map
63 // to (i) avoid issues with collisions of combined hashes and (ii)
64 // avoid having to write more code for sorting a vector of arrays.
65 auto cmp = [&](const Combination& left, const Combination& right) -> bool {
66 for (auto curf : factors) {
67 if (curf[left.index] < curf[right.index]) {
68 return true;
69 } else if (curf[left.index] > curf[right.index]) {
70 return false;
71 }
72 }
73 return false;
74 };
75
76 auto eq = [&](const Combination& left, const Combination& right) -> bool {
77 for (auto curf : factors) {
78 if (curf[left.index] != curf[right.index]) {
79 return false;
80 }
81 }
82 return true;
83 };
84
85 std::map<Combination, Combined_, decltype(cmp)> mapping(std::move(cmp));
86 for (size_t i = 0; i < n; ++i) {
87 Combination current(i);
88 auto mIt = mapping.find(current);
89 if (mIt == mapping.end() || !eq(mIt->first, current)) {
90 Combined_ alt = mapping.size();
91 mapping.insert(mIt, std::make_pair(current, alt));
92 combined[i] = alt;
93 } else {
94 combined[i] = mIt->second;
95 }
96 }
97
98 return std::vector<std::pair<Combination, Combined_> >(mapping.begin(), mapping.end());
99 }();
100
101 // Remapping to a sorted set.
102 size_t nuniq = unique.size();
103 for (auto& ofac : output) {
104 ofac.reserve(nuniq);
105 }
106 std::vector<Combined_> remapping(nuniq);
107 for (size_t u = 0; u < nuniq; ++u) {
108 auto ix = unique[u].first.index;
109 for (size_t f = 0; f < nfac; ++f) {
110 output[f].push_back(factors[f][ix]);
111 }
112 remapping[unique[u].second] = u;
113 }
114
115 // Mapping each cell to its sorted combination.
116 for (size_t i = 0; i < n; ++i) {
117 combined[i] = remapping[combined[i]];
118 }
119
120 return output;
121}
122
145template<typename Factor_, typename Number_, typename Combined_>
146std::vector<std::vector<Factor_> > combine_factors_unused(size_t n, const std::vector<std::pair<const Factor_*, Number_> >& factors, Combined_* combined) {
147 size_t nfac = factors.size();
148 std::vector<std::vector<Factor_> > output(nfac);
149
150 // Handling the special cases.
151 if (nfac == 0) {
152 std::fill_n(combined, n, 0);
153 return output;
154 }
155 if (nfac == 1) {
156 output[0].resize(factors[0].second);
157 std::iota(output[0].begin(), output[0].end(), static_cast<Combined_>(0));
158 std::copy_n(factors[0].first, n, combined);
159 return output;
160 }
161
162 // We iterate from back to front, where the first factor is the slowest changing.
163 std::copy_n(factors[nfac - 1].first, n, combined);
164 Combined_ mult = factors[nfac - 1].second;
165 for (size_t f = nfac - 1; f > 0; --f) {
166 const auto& finfo = factors[f - 1];
167 auto ff = finfo.first;
168 for (size_t i = 0; i < n; ++i) {
169 combined[i] += mult * ff[i];
170 }
171 mult *= finfo.second;
172 }
173
174 auto ncombos = mult;
175 Combined_ outer_repeats = mult;
176 Combined_ inner_repeats = 1;
177 for (size_t f = nfac; f > 0; --f) {
178 auto& out = output[f - 1];
179 out.reserve(ncombos);
180
181 const auto& finfo = factors[f - 1];
182 size_t initial_size = inner_repeats * finfo.second;
183 out.resize(initial_size);
184
185 if (inner_repeats == 1) {
186 std::iota(out.begin(), out.end(), static_cast<Combined_>(0));
187 } else {
188 auto oIt = out.begin();
189 for (Number_ l = 0; l < finfo.second; ++l) {
190 std::fill_n(oIt, inner_repeats, l);
191 oIt += inner_repeats;
192 }
193 }
194 inner_repeats = initial_size;
195
196 outer_repeats /= finfo.second;
197 for (Combined_ r = 1; r < outer_repeats; ++r) {
198 out.insert(out.end(), out.begin(), out.begin() + initial_size);
199 }
200 }
201
202 return output;
203}
204
205}
206
207#endif
Clean up a categorical factor.
Aggregate single-cell expression values.
Definition aggregate_across_cells.hpp:13
std::vector< std::vector< Factor_ > > combine_factors_unused(size_t n, const std::vector< std::pair< const Factor_ *, Number_ > > &factors, Combined_ *combined)
Definition combine_factors.hpp:146
std::vector< Factor_ > clean_factor(size_t n, const Factor_ *factor, Output_ *cleaned)
Definition clean_factor.hpp:33
std::vector< std::vector< Factor_ > > combine_factors(size_t n, const std::vector< const Factor_ * > &factors, Combined_ *combined)
Definition combine_factors.hpp:40