scran_norm
Scaling normalization of single-cell data
Loading...
Searching...
No Matches
sanitize_size_factors.hpp
Go to the documentation of this file.
1#ifndef SCRAN_SANITIZE_SIZE_FACTORS_HPP
2#define SCRAN_SANITIZE_SIZE_FACTORS_HPP
3
4#include <cmath>
5#include <stdexcept>
6#include <cstddef>
7
8#include "utils.hpp"
9
15namespace scran_norm {
16
24 bool has_negative = false;
25
29 bool has_zero = false;
30
34 bool has_nan = false;
35
39 bool has_infinite = false;
40};
41
45namespace internal {
46
47template<typename SizeFactor_>
48bool is_invalid(SizeFactor_ sf, SizeFactorDiagnostics& output) {
49 if (sf < 0) {
50 output.has_negative = true;
51 return true;
52 }
53
54 if (sf == 0) {
55 output.has_zero = true;
56 return true;
57 }
58
59 if (std::isnan(sf)) {
60 output.has_nan = true;
61 return true;
62 }
63
64 if (std::isinf(sf)) {
65 output.has_infinite = true;
66 return true;
67 }
68
69 return false;
70}
71
72template<typename SizeFactor_>
73SizeFactor_ find_smallest_valid_factor(const std::size_t num, const SizeFactor_* const size_factors) {
74 SizeFactor_ smallest = 1;
75 bool found = false;
76
77 for (decltype(I(num)) i = 0; i < num; ++i) {
78 const auto s = size_factors[i];
79 if (std::isfinite(s) && s > 0) {
80 if (!found || smallest > s) {
81 smallest = s;
82 found = true;
83 }
84 }
85 }
86
87 return smallest;
88}
89
90template<typename SizeFactor_>
91SizeFactor_ find_largest_valid_factor(const std::size_t num, const SizeFactor_* const size_factors) {
92 SizeFactor_ largest = 1;
93 bool found = false;
94
95 for (decltype(I(num)) i = 0; i < num; ++i) {
96 const auto s = size_factors[i];
97 if (std::isfinite(s) && s > 0) {
98 if (!found || largest < s) {
99 largest = s;
100 found = true;
101 }
102 }
103 }
104
105 return largest;
106}
107
108}
124template<typename SizeFactor_>
125SizeFactorDiagnostics check_size_factor_sanity(const std::size_t num, const SizeFactor_* const size_factors) {
127 for (decltype(I(num)) i = 0; i < num; ++i) {
128 internal::is_invalid(size_factors[i], output);
129 }
130 return output;
131}
132
140enum class SanitizeAction : char { IGNORE, ERROR, SANITIZE };
141
156 SanitizeAction handle_zero = SanitizeAction::ERROR;
157
164 SanitizeAction handle_negative = SanitizeAction::ERROR;
165
171 SanitizeAction handle_nan = SanitizeAction::ERROR;
172
179 SanitizeAction handle_infinite = SanitizeAction::ERROR;
180};
181
200template<typename SizeFactor_>
201void sanitize_size_factors(const std::size_t num, SizeFactor_* const size_factors, const SizeFactorDiagnostics& status, const SanitizeSizeFactorsOptions& options) {
202 SizeFactor_ smallest = -1;
203
204 if (status.has_negative) {
205 if (options.handle_negative == SanitizeAction::ERROR) {
206 throw std::runtime_error("detected negative size factor");
207 } else if (options.handle_negative == SanitizeAction::SANITIZE) {
208 smallest = internal::find_smallest_valid_factor(num, size_factors);
209 for (decltype(I(num)) i = 0; i < num; ++i) {
210 auto& s = size_factors[i];
211 if (s < 0) {
212 s = smallest;
213 }
214 }
215 }
216 }
217
218 if (status.has_zero) {
219 if (options.handle_zero == SanitizeAction::ERROR) {
220 throw std::runtime_error("detected size factor of zero");
221 } else if (options.handle_zero == SanitizeAction::SANITIZE) {
222 if (smallest < 0) {
223 smallest = internal::find_smallest_valid_factor(num, size_factors);
224 }
225 for (decltype(I(num)) i = 0; i < num; ++i) {
226 auto& s = size_factors[i];
227 if (s == 0) {
228 s = smallest;
229 }
230 }
231 }
232 }
233
234 if (status.has_nan) {
235 if (options.handle_nan == SanitizeAction::ERROR) {
236 throw std::runtime_error("detected NaN size factor");
237 } else if (options.handle_nan == SanitizeAction::SANITIZE) {
238 for (decltype(I(num)) i = 0; i < num; ++i) {
239 auto& s = size_factors[i];
240 if (std::isnan(s)) {
241 s = 1;
242 }
243 }
244 }
245 }
246
247 if (status.has_infinite) {
248 if (options.handle_infinite == SanitizeAction::ERROR) {
249 throw std::runtime_error("detected infinite size factor");
250 } else if (options.handle_infinite == SanitizeAction::SANITIZE) {
251 auto largest = internal::find_largest_valid_factor(num, size_factors);
252 for (decltype(I(num)) i = 0; i < num; ++i) {
253 auto& s = size_factors[i];
254 if (std::isinf(s)) {
255 s = largest;
256 }
257 }
258 }
259 }
260}
261
274template<typename SizeFactor_>
275SizeFactorDiagnostics sanitize_size_factors(const std::size_t num, SizeFactor_* const size_factors, const SanitizeSizeFactorsOptions& options) {
276 const auto output = check_size_factor_sanity(num, size_factors);
277 sanitize_size_factors(num, size_factors, output, options);
278 return output;
279}
280
281}
282
283#endif
Scaling normalization of single-cell data.
Definition center_size_factors.hpp:20
void sanitize_size_factors(const std::size_t num, SizeFactor_ *const size_factors, const SizeFactorDiagnostics &status, const SanitizeSizeFactorsOptions &options)
Definition sanitize_size_factors.hpp:201
SizeFactorDiagnostics check_size_factor_sanity(const std::size_t num, const SizeFactor_ *const size_factors)
Definition sanitize_size_factors.hpp:125
SanitizeAction
Definition sanitize_size_factors.hpp:140
Options for sanitize_size_factors().
Definition sanitize_size_factors.hpp:145
SanitizeAction handle_zero
Definition sanitize_size_factors.hpp:156
SanitizeAction handle_nan
Definition sanitize_size_factors.hpp:171
SanitizeAction handle_negative
Definition sanitize_size_factors.hpp:164
SanitizeAction handle_infinite
Definition sanitize_size_factors.hpp:179
Diagnostics for the size factors.
Definition sanitize_size_factors.hpp:20
bool has_negative
Definition sanitize_size_factors.hpp:24
bool has_infinite
Definition sanitize_size_factors.hpp:39
bool has_zero
Definition sanitize_size_factors.hpp:29
bool has_nan
Definition sanitize_size_factors.hpp:34