scran_norm
Scaling normalization of single-cell data
Loading...
Searching...
No Matches
sanitize_size_factors.hpp
Go to the documentation of this file.
1#ifndef SCRAN_SANITIZE_SIZE_FACTORS_HPP
2#define SCRAN_SANITIZE_SIZE_FACTORS_HPP
3
4#include <cmath>
5#include <stdexcept>
6
12namespace scran_norm {
13
21 bool has_negative = false;
22
26 bool has_zero = false;
27
31 bool has_nan = false;
32
36 bool has_infinite = false;
37};
38
42namespace internal {
43
44template<typename SizeFactor_>
45bool is_invalid(SizeFactor_ sf, SizeFactorDiagnostics& output) {
46 if (sf < 0) {
47 output.has_negative = true;
48 return true;
49 }
50
51 if (sf == 0) {
52 output.has_zero = true;
53 return true;
54 }
55
56 if (std::isnan(sf)) {
57 output.has_nan = true;
58 return true;
59 }
60
61 if (std::isinf(sf)) {
62 output.has_infinite = true;
63 return true;
64 }
65
66 return false;
67}
68
69template<typename SizeFactor_>
70SizeFactor_ find_smallest_valid_factor(size_t num, const SizeFactor_* size_factors) {
71 SizeFactor_ smallest = 1;
72 bool found = false;
73
74 for (size_t i = 0; i < num; ++i) {
75 auto s = size_factors[i];
76 if (std::isfinite(s) && s > 0) {
77 if (!found || smallest > s) {
78 smallest = s;
79 found = true;
80 }
81 }
82 }
83
84 return smallest;
85}
86
87template<typename SizeFactor_>
88double find_largest_valid_factor(size_t num, const SizeFactor_* size_factors) {
89 SizeFactor_ largest = 1;
90 bool found = false;
91
92 for (size_t i = 0; i < num; ++i) {
93 auto s = size_factors[i];
94 if (std::isfinite(s) && s > 0) {
95 if (!found || largest < s) {
96 largest = s;
97 found = true;
98 }
99 }
100 }
101
102 return largest;
103}
104
105}
121template<typename SizeFactor_>
122SizeFactorDiagnostics check_size_factor_sanity(size_t num, const SizeFactor_* size_factors) {
124 for (size_t i = 0; i < num; ++i) {
125 internal::is_invalid(size_factors[i], output);
126 }
127 return output;
128}
129
137enum class SanitizeAction : char { IGNORE, ERROR, SANITIZE };
138
154 SanitizeAction handle_zero = SanitizeAction::ERROR;
155
161 SanitizeAction handle_negative = SanitizeAction::ERROR;
162
167 SanitizeAction handle_nan = SanitizeAction::ERROR;
168
174 SanitizeAction handle_infinite = SanitizeAction::ERROR;
175};
176
196template<typename SizeFactor_>
197void sanitize_size_factors(size_t num, SizeFactor_* size_factors, const SizeFactorDiagnostics& status, const SanitizeSizeFactorsOptions& options) {
198 SizeFactor_ smallest = -1;
199
200 if (status.has_negative) {
201 if (options.handle_negative == SanitizeAction::ERROR) {
202 throw std::runtime_error("detected negative size factor");
203 } else if (options.handle_negative == SanitizeAction::SANITIZE) {
204 smallest = internal::find_smallest_valid_factor(num, size_factors);
205 for (size_t i = 0; i < num; ++i) {
206 auto& s = size_factors[i];
207 if (s < 0) {
208 s = smallest;
209 }
210 }
211 }
212 }
213
214 if (status.has_zero) {
215 if (options.handle_zero == SanitizeAction::ERROR) {
216 throw std::runtime_error("detected size factor of zero");
217 } else if (options.handle_zero == SanitizeAction::SANITIZE) {
218 if (smallest < 0) {
219 smallest = internal::find_smallest_valid_factor(num, size_factors);
220 }
221 for (size_t i = 0; i < num; ++i) {
222 auto& s = size_factors[i];
223 if (s == 0) {
224 s = smallest;
225 }
226 }
227 }
228 }
229
230 if (status.has_nan) {
231 if (options.handle_nan == SanitizeAction::ERROR) {
232 throw std::runtime_error("detected NaN size factor");
233 } else if (options.handle_nan == SanitizeAction::SANITIZE) {
234 for (size_t i = 0; i < num; ++i) {
235 auto& s = size_factors[i];
236 if (std::isnan(s)) {
237 s = 1;
238 }
239 }
240 }
241 }
242
243 if (status.has_infinite) {
244 if (options.handle_infinite == SanitizeAction::ERROR) {
245 throw std::runtime_error("detected infinite size factor");
246 } else if (options.handle_infinite == SanitizeAction::SANITIZE) {
247 auto largest = internal::find_largest_valid_factor(num, size_factors);
248 for (size_t i = 0; i < num; ++i) {
249 auto& s = size_factors[i];
250 if (std::isinf(s)) {
251 s = largest;
252 }
253 }
254 }
255 }
256}
257
270template<typename SizeFactor_>
271SizeFactorDiagnostics sanitize_size_factors(size_t num, SizeFactor_* size_factors, const SanitizeSizeFactorsOptions& options) {
272 auto output = check_size_factor_sanity(num, size_factors);
273 sanitize_size_factors(num, size_factors, output, options);
274 return output;
275}
276
277}
278
279#endif
Scaling normalization of single-cell data.
Definition center_size_factors.hpp:18
void sanitize_size_factors(size_t num, SizeFactor_ *size_factors, const SizeFactorDiagnostics &status, const SanitizeSizeFactorsOptions &options)
Definition sanitize_size_factors.hpp:197
SizeFactorDiagnostics check_size_factor_sanity(size_t num, const SizeFactor_ *size_factors)
Definition sanitize_size_factors.hpp:122
SanitizeAction
Definition sanitize_size_factors.hpp:137
Options for sanitize_size_factors().
Definition sanitize_size_factors.hpp:142
SanitizeAction handle_zero
Definition sanitize_size_factors.hpp:154
SanitizeAction handle_nan
Definition sanitize_size_factors.hpp:167
SanitizeAction handle_negative
Definition sanitize_size_factors.hpp:161
SanitizeAction handle_infinite
Definition sanitize_size_factors.hpp:174
Diagnostics for the size factors.
Definition sanitize_size_factors.hpp:17
bool has_negative
Definition sanitize_size_factors.hpp:21
bool has_infinite
Definition sanitize_size_factors.hpp:36
bool has_zero
Definition sanitize_size_factors.hpp:26
bool has_nan
Definition sanitize_size_factors.hpp:31