scran_aggregate
Aggregate expression values across cells
Loading...
Searching...
No Matches
combine_factors.hpp
Go to the documentation of this file.
1#ifndef SCRAN_AGGREGATE_COMBINE_FACTORS_HPP
2#define SCRAN_AGGREGATE_COMBINE_FACTORS_HPP
3
4#include <algorithm>
5#include <vector>
6#include <map>
7#include <unordered_map>
8#include <cstddef>
9
10#include "clean_factor.hpp"
11
12#include "sanisizer/sanisizer.hpp"
13
19namespace scran_aggregate {
20
41template<typename Factor_, typename Combined_>
42std::vector<std::vector<Factor_> > combine_factors(std::size_t n, const std::vector<const Factor_*>& factors, Combined_* combined) {
43 auto nfac = factors.size();
44 auto output = sanisizer::create<std::vector<std::vector<Factor_> > >(nfac);
45
46 // Handling the special cases.
47 if (nfac == 0) {
48 std::fill_n(combined, n, 0);
49 return output;
50 }
51 if (nfac == 1) {
52 output[0] = clean_factor(n, factors.front(), combined);
53 return output;
54 }
55
56 // Creating a hashmap on the combinations of each factor.
57 struct Combination {
58 Combination(std::size_t i) : index(i) {}
59 std::size_t index;
60 };
61
62 auto unique = [&]{ // scoping this in an IIFE to release map memory sooner.
63 // Using a map with a custom comparator that uses the index
64 // of first occurrence of each factor as the key. Currently using a map
65 // to (i) avoid issues with collisions of combined hashes and (ii)
66 // avoid having to write more code for sorting a vector of arrays.
67 auto cmp = [&](const Combination& left, const Combination& right) -> bool {
68 for (auto curf : factors) {
69 if (curf[left.index] < curf[right.index]) {
70 return true;
71 } else if (curf[left.index] > curf[right.index]) {
72 return false;
73 }
74 }
75 return false;
76 };
77
78 auto eq = [&](const Combination& left, const Combination& right) -> bool {
79 for (auto curf : factors) {
80 if (curf[left.index] != curf[right.index]) {
81 return false;
82 }
83 }
84 return true;
85 };
86
87 std::map<Combination, Combined_, decltype(cmp)> mapping(std::move(cmp));
88 for (decltype(n) i = 0; i < n; ++i) {
89 Combination current(i);
90 auto mIt = mapping.find(current);
91 if (mIt == mapping.end() || !eq(mIt->first, current)) {
92 Combined_ alt = mapping.size();
93 mapping.insert(mIt, std::make_pair(current, alt));
94 combined[i] = alt;
95 } else {
96 combined[i] = mIt->second;
97 }
98 }
99
100 return std::vector<std::pair<Combination, Combined_> >(mapping.begin(), mapping.end());
101 }();
102
103 // Remapping to a sorted set.
104 auto nuniq = unique.size();
105 for (auto& ofac : output) {
106 ofac.reserve(nuniq);
107 }
108 auto remapping = sanisizer::create<std::vector<Combined_> >(nuniq);
109 for (decltype(nuniq) u = 0; u < nuniq; ++u) {
110 auto ix = unique[u].first.index;
111 for (decltype(nfac) f = 0; f < nfac; ++f) {
112 output[f].push_back(factors[f][ix]);
113 }
114 remapping[unique[u].second] = u;
115 }
116
117 // Mapping each cell to its sorted combination.
118 for (decltype(n) i = 0; i < n; ++i) {
119 combined[i] = remapping[combined[i]];
120 }
121
122 return output;
123}
124
147template<typename Factor_, typename Number_, typename Combined_>
148std::vector<std::vector<Factor_> > combine_factors_unused(std::size_t n, const std::vector<std::pair<const Factor_*, Number_> >& factors, Combined_* combined) {
149 auto nfac = factors.size();
150 auto output = sanisizer::create<std::vector<std::vector<Factor_> > >(nfac);
151
152 // Handling the special cases.
153 if (nfac == 0) {
154 std::fill_n(combined, n, 0);
155 return output;
156 }
157 if (nfac == 1) {
158 output[0].resize(factors[0].second);
159 std::iota(output[0].begin(), output[0].end(), static_cast<Combined_>(0));
160 std::copy_n(factors[0].first, n, combined);
161 return output;
162 }
163
164 // We iterate from back to front, where the first factor is the slowest changing.
165 std::copy_n(factors[nfac - 1].first, n, combined);
166 Combined_ ncombos = factors[nfac - 1].second;
167 for (decltype(nfac) f = nfac - 1; f > 0; --f) {
168 const auto& finfo = factors[f - 1];
169 auto next_ncombos = sanisizer::product<Combined_>(ncombos, finfo.second);
170 auto ff = finfo.first;
171 for (decltype(n) i = 0; i < n; ++i) {
172 combined[i] += ncombos * ff[i];
173 }
174 ncombos = next_ncombos;
175 }
176
177 sanisizer::cast<decltype(output[0].size())>(ncombos); // check that we can actually make the output vectors.
178 Combined_ outer_repeats = ncombos;
179 Combined_ inner_repeats = 1;
180 for (decltype(nfac) f = nfac; f > 0; --f) {
181 auto& out = output[f - 1];
182 out.reserve(ncombos);
183
184 const auto& finfo = factors[f - 1];
185 Combined_ initial_size = inner_repeats * finfo.second;
186 out.resize(initial_size);
187
188 if (inner_repeats == 1) {
189 std::iota(out.begin(), out.end(), static_cast<Combined_>(0));
190 } else {
191 auto oIt = out.begin();
192 for (Number_ l = 0; l < finfo.second; ++l) {
193 std::fill_n(oIt, inner_repeats, l);
194 oIt += inner_repeats;
195 }
196 }
197 inner_repeats = initial_size;
198
199 outer_repeats /= finfo.second;
200 for (Combined_ r = 1; r < outer_repeats; ++r) {
201 out.insert(out.end(), out.begin(), out.begin() + initial_size);
202 }
203 }
204
205 return output;
206}
207
208}
209
210#endif
Clean up a categorical factor.
Aggregate single-cell expression values.
Definition aggregate_across_cells.hpp:18
std::vector< std::vector< Factor_ > > combine_factors_unused(std::size_t n, const std::vector< std::pair< const Factor_ *, Number_ > > &factors, Combined_ *combined)
Definition combine_factors.hpp:148
std::vector< std::vector< Factor_ > > combine_factors(std::size_t n, const std::vector< const Factor_ * > &factors, Combined_ *combined)
Definition combine_factors.hpp:42
std::vector< Factor_ > clean_factor(std::size_t n, const Factor_ *factor, Output_ *cleaned)
Definition clean_factor.hpp:36