scran_norm
Scaling normalization of single-cell data
Loading...
Searching...
No Matches
center_size_factors.hpp
Go to the documentation of this file.
1#ifndef SCRAN_NORM_CENTER_SIZE_FACTORS_HPP
2#define SCRAN_NORM_CENTER_SIZE_FACTORS_HPP
3
4#include "tatami_stats/tatami_stats.hpp"
5
6#include <vector>
7#include <numeric>
8#include <algorithm>
9#include <type_traits>
10#include <cstddef>
11
13
19namespace scran_norm {
20
24enum class CenterBlockMode : char { PER_BLOCK, LOWEST };
25
47 CenterBlockMode block_mode = CenterBlockMode::LOWEST;
48
61 bool ignore_invalid = true;
62};
63
78template<typename SizeFactor_>
79SizeFactor_ center_size_factors_mean(std::size_t num, const SizeFactor_* size_factors, SizeFactorDiagnostics* diagnostics, const CenterSizeFactorsOptions& options) {
80 static_assert(std::is_floating_point<SizeFactor_>::value);
81 SizeFactor_ mean = 0;
82 decltype(num) denom = 0;
83
84 if (options.ignore_invalid) {
86 auto& diag = (diagnostics == NULL ? tmpdiag : *diagnostics);
87 for (decltype(num) i = 0; i < num; ++i) {
88 auto val = size_factors[i];
89 if (!internal::is_invalid(val, diag)) {
90 mean += val;
91 ++denom;
92 }
93 }
94 } else {
95 mean = std::accumulate(size_factors, size_factors + num, static_cast<SizeFactor_>(0));
96 denom = num;
97 }
98
99 if (denom) {
100 return mean/denom;
101 } else {
102 return 0;
103 }
104}
105
124template<typename SizeFactor_>
125SizeFactor_ center_size_factors(std::size_t num, SizeFactor_* size_factors, SizeFactorDiagnostics* diagnostics, const CenterSizeFactorsOptions& options) {
126 auto mean = center_size_factors_mean(num, size_factors, diagnostics, options);
127 if (mean) {
128 for (decltype(num) i = 0; i < num; ++i){
129 size_factors[i] /= mean;
130 }
131 }
132 return mean;
133}
134
153template<typename SizeFactor_, typename Block_>
154std::vector<SizeFactor_> center_size_factors_blocked_mean(std::size_t num, const SizeFactor_* size_factors, const Block_* block, SizeFactorDiagnostics* diagnostics, const CenterSizeFactorsOptions& options) {
155 static_assert(std::is_floating_point<SizeFactor_>::value);
156 auto ngroups = tatami_stats::total_groups(block, num);
157 auto group_mean = sanisizer::create<std::vector<SizeFactor_> >(ngroups);
158 auto group_num = sanisizer::create<std::vector<decltype(num)> >(ngroups);
159
160 if (options.ignore_invalid) {
161 SizeFactorDiagnostics tmpdiag;
162 auto& diag = (diagnostics == NULL ? tmpdiag : *diagnostics);
163 for (decltype(num) i = 0; i < num; ++i) {
164 auto val = size_factors[i];
165 if (!internal::is_invalid(val, diag)) {
166 auto b = block[i];
167 group_mean[b] += val;
168 ++(group_num[b]);
169 }
170 }
171 } else {
172 for (decltype(num) i = 0; i < num; ++i) {
173 auto b = block[i];
174 group_mean[b] += size_factors[i];
175 ++(group_num[b]);
176 }
177 }
178
179 for (decltype(ngroups) g = 0; g < ngroups; ++g) {
180 if (group_num[g]) {
181 group_mean[g] /= group_num[g];
182 }
183 }
184
185 return group_mean;
186}
187
206template<typename SizeFactor_, typename Block_>
207std::vector<SizeFactor_> center_size_factors_blocked(std::size_t num, SizeFactor_* size_factors, const Block_* block, SizeFactorDiagnostics* diagnostics, const CenterSizeFactorsOptions& options) {
208 auto group_mean = center_size_factors_blocked_mean(num, size_factors, block, diagnostics, options);
209
210 if (options.block_mode == CenterBlockMode::PER_BLOCK) {
211 for (decltype(num) i = 0; i < num; ++i) {
212 const auto& div = group_mean[block[i]];
213 if (div) {
214 size_factors[i] /= div;
215 }
216 }
217
218 } else if (options.block_mode == CenterBlockMode::LOWEST) {
219 SizeFactor_ min = 0;
220 bool found = false;
221 for (auto m : group_mean) {
222 // Ignore groups with means of zeros, either because they're full
223 // of zeros themselves or they have no cells associated with them.
224 if (m) {
225 if (!found || m < min) {
226 min = m;
227 found = true;
228 }
229 }
230 }
231
232 if (min > 0) {
233 for (decltype(num) i = 0; i < num; ++i) {
234 size_factors[i] /= min;
235 }
236 }
237 }
238
239 return group_mean;
240}
241
242}
243
244#endif
Scaling normalization of single-cell data.
Definition center_size_factors.hpp:19
SizeFactor_ center_size_factors(std::size_t num, SizeFactor_ *size_factors, SizeFactorDiagnostics *diagnostics, const CenterSizeFactorsOptions &options)
Definition center_size_factors.hpp:125
std::vector< SizeFactor_ > center_size_factors_blocked(std::size_t num, SizeFactor_ *size_factors, const Block_ *block, SizeFactorDiagnostics *diagnostics, const CenterSizeFactorsOptions &options)
Definition center_size_factors.hpp:207
CenterBlockMode
Definition center_size_factors.hpp:24
std::vector< SizeFactor_ > center_size_factors_blocked_mean(std::size_t num, const SizeFactor_ *size_factors, const Block_ *block, SizeFactorDiagnostics *diagnostics, const CenterSizeFactorsOptions &options)
Definition center_size_factors.hpp:154
SizeFactor_ center_size_factors_mean(std::size_t num, const SizeFactor_ *size_factors, SizeFactorDiagnostics *diagnostics, const CenterSizeFactorsOptions &options)
Definition center_size_factors.hpp:79
Sanitize invalid size factors.
Options for center_size_factors() and center_size_factors_blocked().
Definition center_size_factors.hpp:29
CenterBlockMode block_mode
Definition center_size_factors.hpp:47
bool ignore_invalid
Definition center_size_factors.hpp:61
Diagnostics for the size factors.
Definition sanitize_size_factors.hpp:18