scran_norm
Scaling normalization of single-cell data
Loading...
Searching...
No Matches
center_size_factors.hpp
Go to the documentation of this file.
1#ifndef SCRAN_NORM_CENTER_SIZE_FACTORS_HPP
2#define SCRAN_NORM_CENTER_SIZE_FACTORS_HPP
3
4#include "tatami_stats/tatami_stats.hpp"
5
6#include <vector>
7#include <numeric>
8#include <algorithm>
9#include <type_traits>
10
12
18namespace scran_norm {
19
23enum class CenterBlockMode : char { PER_BLOCK, LOWEST };
24
46 CenterBlockMode block_mode = CenterBlockMode::LOWEST;
47
60 bool ignore_invalid = true;
61};
62
77template<typename SizeFactor_>
78SizeFactor_ center_size_factors_mean(size_t num, const SizeFactor_* size_factors, SizeFactorDiagnostics* diagnostics, const CenterSizeFactorsOptions& options) {
79 static_assert(std::is_floating_point<SizeFactor_>::value);
80 SizeFactor_ mean = 0;
81 size_t denom = 0;
82
83 if (options.ignore_invalid) {
85 auto& diag = (diagnostics == NULL ? tmpdiag : *diagnostics);
86 for (size_t i = 0; i < num; ++i) {
87 auto val = size_factors[i];
88 if (!internal::is_invalid(val, diag)) {
89 mean += val;
90 ++denom;
91 }
92 }
93 } else {
94 mean = std::accumulate(size_factors, size_factors + num, static_cast<SizeFactor_>(0));
95 denom = num;
96 }
97
98 if (denom) {
99 return mean/denom;
100 } else {
101 return 0;
102 }
103}
104
123template<typename SizeFactor_>
124SizeFactor_ center_size_factors(size_t num, SizeFactor_* size_factors, SizeFactorDiagnostics* diagnostics, const CenterSizeFactorsOptions& options) {
125 auto mean = center_size_factors_mean(num, size_factors, diagnostics, options);
126 if (mean) {
127 for (size_t i = 0; i < num; ++i){
128 size_factors[i] /= mean;
129 }
130 }
131 return mean;
132}
133
152template<typename SizeFactor_, typename Block_>
153std::vector<SizeFactor_> center_size_factors_blocked_mean(size_t num, const SizeFactor_* size_factors, const Block_* block, SizeFactorDiagnostics* diagnostics, const CenterSizeFactorsOptions& options) {
154 static_assert(std::is_floating_point<SizeFactor_>::value);
155 size_t ngroups = tatami_stats::total_groups(block, num);
156 std::vector<SizeFactor_> group_mean(ngroups);
157 std::vector<size_t> group_num(ngroups);
158
159 if (options.ignore_invalid) {
160 SizeFactorDiagnostics tmpdiag;
161 auto& diag = (diagnostics == NULL ? tmpdiag : *diagnostics);
162 for (size_t i = 0; i < num; ++i) {
163 auto val = size_factors[i];
164 if (!internal::is_invalid(val, diag)) {
165 auto b = block[i];
166 group_mean[b] += val;
167 ++(group_num[b]);
168 }
169 }
170 } else {
171 for (size_t i = 0; i < num; ++i) {
172 auto b = block[i];
173 group_mean[b] += size_factors[i];
174 ++(group_num[b]);
175 }
176 }
177
178 for (size_t g = 0; g < ngroups; ++g) {
179 if (group_num[g]) {
180 group_mean[g] /= group_num[g];
181 }
182 }
183
184 return group_mean;
185}
186
205template<typename SizeFactor_, typename Block_>
206std::vector<SizeFactor_> center_size_factors_blocked(size_t num, SizeFactor_* size_factors, const Block_* block, SizeFactorDiagnostics* diagnostics, const CenterSizeFactorsOptions& options) {
207 auto group_mean = center_size_factors_blocked_mean(num, size_factors, block, diagnostics, options);
208
209 if (options.block_mode == CenterBlockMode::PER_BLOCK) {
210 for (size_t i = 0; i < num; ++i) {
211 const auto& div = group_mean[block[i]];
212 if (div) {
213 size_factors[i] /= div;
214 }
215 }
216
217 } else if (options.block_mode == CenterBlockMode::LOWEST) {
218 SizeFactor_ min = 0;
219 bool found = false;
220 for (auto m : group_mean) {
221 // Ignore groups with means of zeros, either because they're full
222 // of zeros themselves or they have no cells associated with them.
223 if (m) {
224 if (!found || m < min) {
225 min = m;
226 found = true;
227 }
228 }
229 }
230
231 if (min > 0) {
232 for (size_t i = 0; i < num; ++i) {
233 size_factors[i] /= min;
234 }
235 }
236 }
237
238 return group_mean;
239}
240
241}
242
243#endif
Scaling normalization of single-cell data.
Definition center_size_factors.hpp:18
SizeFactor_ center_size_factors_mean(size_t num, const SizeFactor_ *size_factors, SizeFactorDiagnostics *diagnostics, const CenterSizeFactorsOptions &options)
Definition center_size_factors.hpp:78
CenterBlockMode
Definition center_size_factors.hpp:23
SizeFactor_ center_size_factors(size_t num, SizeFactor_ *size_factors, SizeFactorDiagnostics *diagnostics, const CenterSizeFactorsOptions &options)
Definition center_size_factors.hpp:124
std::vector< SizeFactor_ > center_size_factors_blocked(size_t num, SizeFactor_ *size_factors, const Block_ *block, SizeFactorDiagnostics *diagnostics, const CenterSizeFactorsOptions &options)
Definition center_size_factors.hpp:206
std::vector< SizeFactor_ > center_size_factors_blocked_mean(size_t num, const SizeFactor_ *size_factors, const Block_ *block, SizeFactorDiagnostics *diagnostics, const CenterSizeFactorsOptions &options)
Definition center_size_factors.hpp:153
Sanitize invalid size factors.
Options for center_size_factors() and center_size_factors_blocked().
Definition center_size_factors.hpp:28
CenterBlockMode block_mode
Definition center_size_factors.hpp:46
bool ignore_invalid
Definition center_size_factors.hpp:60
Diagnostics for the size factors.
Definition sanitize_size_factors.hpp:17