scran_norm
Scaling normalization of single-cell data
Loading...
Searching...
No Matches
sanitize_size_factors.hpp
Go to the documentation of this file.
1#ifndef SCRAN_SANITIZE_SIZE_FACTORS_HPP
2#define SCRAN_SANITIZE_SIZE_FACTORS_HPP
3
4#include <cmath>
5#include <stdexcept>
6#include <cstddef>
7#include <optional>
8
9#include "utils.hpp"
10
16namespace scran_norm {
17
25 bool has_negative = false;
26
30 bool has_zero = false;
31
35 bool has_nan = false;
36
40 bool has_infinite = false;
41};
42
46namespace internal {
47
48template<typename SizeFactor_>
49bool is_invalid(SizeFactor_ sf, SizeFactorDiagnostics& output) {
50 if (sf < 0) {
51 output.has_negative = true;
52 return true;
53 }
54
55 if (sf == 0) {
56 output.has_zero = true;
57 return true;
58 }
59
60 if (std::isnan(sf)) {
61 output.has_nan = true;
62 return true;
63 }
64
65 if (std::isinf(sf)) {
66 output.has_infinite = true;
67 return true;
68 }
69
70 return false;
71}
72
73template<typename SizeFactor_>
74SizeFactor_ find_smallest_valid_factor(const std::size_t num, const SizeFactor_* const size_factors) {
75 SizeFactor_ smallest = 1;
76 bool found = false;
77
78 for (I<decltype(num)> i = 0; i < num; ++i) {
79 const auto s = size_factors[i];
80 if (std::isfinite(s) && s > 0) {
81 if (!found || smallest > s) {
82 smallest = s;
83 found = true;
84 }
85 }
86 }
87
88 return smallest;
89}
90
91template<typename SizeFactor_>
92SizeFactor_ find_largest_valid_factor(const std::size_t num, const SizeFactor_* const size_factors) {
93 SizeFactor_ largest = 1;
94 bool found = false;
95
96 for (I<decltype(num)> i = 0; i < num; ++i) {
97 const auto s = size_factors[i];
98 if (std::isfinite(s) && s > 0) {
99 if (!found || largest < s) {
100 largest = s;
101 found = true;
102 }
103 }
104 }
105
106 return largest;
107}
108
109}
125template<typename SizeFactor_>
126SizeFactorDiagnostics check_size_factor_sanity(const std::size_t num, const SizeFactor_* const size_factors) {
128 for (I<decltype(num)> i = 0; i < num; ++i) {
129 internal::is_invalid(size_factors[i], output);
130 }
131 return output;
132}
133
141enum class SanitizeAction : char { IGNORE, ERROR, SANITIZE };
142
157 SanitizeAction handle_zero = SanitizeAction::SANITIZE;
158
165 SanitizeAction handle_negative = SanitizeAction::SANITIZE;
166
172 SanitizeAction handle_nan = SanitizeAction::SANITIZE;
173
180 SanitizeAction handle_infinite = SanitizeAction::SANITIZE;
181};
182
201template<typename SizeFactor_>
202void sanitize_size_factors(const std::size_t num, SizeFactor_* const size_factors, const SizeFactorDiagnostics& status, const SanitizeSizeFactorsOptions& options) {
203 std::optional<SizeFactor_> smallest;
204
205 if (status.has_negative) {
206 if (options.handle_negative == SanitizeAction::ERROR) {
207 throw std::runtime_error("detected negative size factor");
208 } else if (options.handle_negative == SanitizeAction::SANITIZE) {
209 smallest = internal::find_smallest_valid_factor(num, size_factors);
210 for (I<decltype(num)> i = 0; i < num; ++i) {
211 auto& s = size_factors[i];
212 if (s < 0) {
213 s = *smallest;
214 }
215 }
216 }
217 }
218
219 if (status.has_zero) {
220 if (options.handle_zero == SanitizeAction::ERROR) {
221 throw std::runtime_error("detected size factor of zero");
222 } else if (options.handle_zero == SanitizeAction::SANITIZE) {
223 if (!smallest.has_value()) {
224 smallest = internal::find_smallest_valid_factor(num, size_factors);
225 }
226 for (I<decltype(num)> i = 0; i < num; ++i) {
227 auto& s = size_factors[i];
228 if (s == 0) {
229 s = *smallest;
230 }
231 }
232 }
233 }
234
235 if (status.has_nan) {
236 if (options.handle_nan == SanitizeAction::ERROR) {
237 throw std::runtime_error("detected NaN size factor");
238 } else if (options.handle_nan == SanitizeAction::SANITIZE) {
239 for (I<decltype(num)> i = 0; i < num; ++i) {
240 auto& s = size_factors[i];
241 if (std::isnan(s)) {
242 s = 1;
243 }
244 }
245 }
246 }
247
248 if (status.has_infinite) {
249 if (options.handle_infinite == SanitizeAction::ERROR) {
250 throw std::runtime_error("detected infinite size factor");
251 } else if (options.handle_infinite == SanitizeAction::SANITIZE) {
252 auto largest = internal::find_largest_valid_factor(num, size_factors);
253 for (I<decltype(num)> i = 0; i < num; ++i) {
254 auto& s = size_factors[i];
255 if (std::isinf(s)) {
256 s = largest;
257 }
258 }
259 }
260 }
261}
262
275template<typename SizeFactor_>
276SizeFactorDiagnostics sanitize_size_factors(const std::size_t num, SizeFactor_* const size_factors, const SanitizeSizeFactorsOptions& options) {
277 const auto output = check_size_factor_sanity(num, size_factors);
278 sanitize_size_factors(num, size_factors, output, options);
279 return output;
280}
281
282}
283
284#endif
Scaling normalization of single-cell data.
Definition center_size_factors.hpp:20
void sanitize_size_factors(const std::size_t num, SizeFactor_ *const size_factors, const SizeFactorDiagnostics &status, const SanitizeSizeFactorsOptions &options)
Definition sanitize_size_factors.hpp:202
SizeFactorDiagnostics check_size_factor_sanity(const std::size_t num, const SizeFactor_ *const size_factors)
Definition sanitize_size_factors.hpp:126
SanitizeAction
Definition sanitize_size_factors.hpp:141
Options for sanitize_size_factors().
Definition sanitize_size_factors.hpp:146
SanitizeAction handle_zero
Definition sanitize_size_factors.hpp:157
SanitizeAction handle_nan
Definition sanitize_size_factors.hpp:172
SanitizeAction handle_negative
Definition sanitize_size_factors.hpp:165
SanitizeAction handle_infinite
Definition sanitize_size_factors.hpp:180
Diagnostics for the size factors.
Definition sanitize_size_factors.hpp:21
bool has_negative
Definition sanitize_size_factors.hpp:25
bool has_infinite
Definition sanitize_size_factors.hpp:40
bool has_zero
Definition sanitize_size_factors.hpp:30
bool has_nan
Definition sanitize_size_factors.hpp:35