scran_norm
Scaling normalization of single-cell data
Loading...
Searching...
No Matches
center_size_factors.hpp
Go to the documentation of this file.
1#ifndef SCRAN_NORM_CENTER_SIZE_FACTORS_HPP
2#define SCRAN_NORM_CENTER_SIZE_FACTORS_HPP
3
4#include <vector>
5#include <numeric>
6#include <algorithm>
7#include <type_traits>
8#include <cstddef>
9
10#include "sanisizer/sanisizer.hpp"
11
13#include "utils.hpp"
14
20namespace scran_norm {
21
43
56template<typename SizeFactor_>
57SizeFactor_ compute_mean_size_factor(const std::size_t num_cells, const SizeFactor_* const size_factors, const ComputeMeanSizeFactorOptions& options) {
58 static_assert(std::is_floating_point<SizeFactor_>::value);
59 SizeFactor_ mean = 0;
60 I<decltype(num_cells)> denom = 0;
61
62 if (options.ignore_invalid) {
64 for (I<decltype(num_cells)> i = 0; i < num_cells; ++i) {
65 const auto val = size_factors[i];
66 if (!internal::is_invalid(val, tmpdiag)) {
67 mean += val;
68 ++denom;
69 }
70 }
71 if (options.diagnostics != NULL) {
72 *(options.diagnostics) = tmpdiag;
73 }
74
75 } else {
76 mean = std::accumulate(size_factors, size_factors + num_cells, static_cast<SizeFactor_>(0));
77 denom = num_cells;
78 }
79
80 if (denom) {
81 return mean / denom;
82 } else {
83 return 0;
84 }
85}
86
103template<typename SizeFactor_, typename Block_>
104std::vector<SizeFactor_> compute_mean_size_factor_blocked(
105 const std::size_t num_cells,
106 const SizeFactor_* const size_factors,
107 const Block_* const block,
108 const std::size_t num_blocks,
109 const ComputeMeanSizeFactorOptions& options
110) {
111 static_assert(std::is_floating_point<SizeFactor_>::value);
112 auto block_mean = sanisizer::create<std::vector<SizeFactor_> >(num_blocks);
113 auto block_num = sanisizer::create<std::vector<I<decltype(num_cells)> > >(num_blocks);
114
115 if (options.ignore_invalid) {
116 SizeFactorDiagnostics tmpdiag;
117 for (I<decltype(num_cells)> i = 0; i < num_cells; ++i) {
118 const auto val = size_factors[i];
119 if (!internal::is_invalid(val, tmpdiag)) {
120 const auto b = block[i];
121 block_mean[b] += val;
122 ++(block_num[b]);
123 }
124 }
125 if (options.diagnostics != NULL) {
126 *(options.diagnostics) = tmpdiag;
127 }
128
129 } else {
130 for (I<decltype(num_cells)> i = 0; i < num_cells; ++i) {
131 const auto b = block[i];
132 block_mean[b] += size_factors[i];
133 ++(block_num[b]);
134 }
135 }
136
137 for (I<decltype(num_blocks)> g = 0; g < num_blocks; ++g) {
138 if (block_num[g]) {
139 block_mean[g] /= block_num[g];
140 }
141 }
142
143 return block_mean;
144}
145
158 bool ignore_invalid = true;
159
165 double center = 1;
166
172
177 bool report_final = false;
178};
179
196template<typename SizeFactor_>
197SizeFactor_ center_size_factors(const std::size_t num_cells, SizeFactor_* const size_factors, const CenterSizeFactorsOptions& options) {
199 copt.ignore_invalid = options.ignore_invalid;
200 copt.diagnostics = options.diagnostics;
201 const auto mean = compute_mean_size_factor(num_cells, size_factors, copt);
202
203 if (mean == 0) {
204 return 0;
205 }
206
207 const SizeFactor_ mult = options.center / mean;
208 for (I<decltype(num_cells)> i = 0; i < num_cells; ++i){
209 size_factors[i] *= mult;
210 }
211
212 if (options.report_final) {
213 return options.center;
214 } else {
215 return mean;
216 }
217}
218
222enum class CenterBlockMode : char { PER_BLOCK, LOWEST, CUSTOM };
223
232 bool ignore_invalid = true;
233
239
263 CenterBlockMode block_mode = CenterBlockMode::LOWEST;
264
269 std::optional<std::vector<double> > custom_centers;
270
275 bool report_final = false;
276};
277
297template<typename SizeFactor_, typename Block_>
298std::vector<SizeFactor_> center_size_factors_blocked(
299 const std::size_t num_cells,
300 SizeFactor_* const size_factors,
301 const Block_* const block,
302 const std::size_t num_blocks,
304) {
306 copt.ignore_invalid = options.ignore_invalid;
307 copt.diagnostics = options.diagnostics;
308 auto block_mean = compute_mean_size_factor_blocked(num_cells, size_factors, block, num_blocks, copt);
309
310 if (options.block_mode == CenterBlockMode::PER_BLOCK) {
311 std::vector<SizeFactor_> fac;
312 fac.reserve(num_blocks);
313 for (I<decltype(num_blocks)> g = 0; g < num_blocks; ++g) {
314 const auto gm = block_mean[g];
315 if (gm) {
316 fac.emplace_back(1 / gm);
317 } else {
318 fac.emplace_back(1); // i.e., no-op.
319 }
320 }
321
322 for (I<decltype(num_cells)> i = 0; i < num_cells; ++i) {
323 size_factors[i] *= fac[block[i]];
324 }
325
326 if (options.report_final) {
327 for (auto& gm : block_mean) {
328 if (gm) {
329 gm = 1;
330 }
331 }
332 }
333
334 return block_mean;
335
336 } else if (options.block_mode == CenterBlockMode::LOWEST) {
337 SizeFactor_ min = 0;
338 bool found = false;
339 for (const auto m : block_mean) {
340 // Ignore blocks with means of zeros, either because they're full
341 // of zeros themselves or they have no cells associated with them.
342 if (m) {
343 if (!found || m < min) {
344 min = m;
345 found = true;
346 }
347 }
348 }
349
350 if (min) {
351 const SizeFactor_ mult = 1 / min;
352 for (I<decltype(num_cells)> i = 0; i < num_cells; ++i) {
353 size_factors[i] *= mult;
354 }
355
356 if (options.report_final) {
357 for (auto& gm : block_mean) {
358 gm /= min;
359 }
360 }
361 }
362
363 return block_mean;
364
365 } else { // i.e., options.block_mode == CenterBlockMode::CUSTOM
366 if (!options.custom_centers.has_value()) {
367 throw std::runtime_error("'custom_centers' should be set for custom block centers");
368 }
369 const auto& custom = *(options.custom_centers);
370 if (custom.size() != num_blocks) {
371 throw std::runtime_error("length of 'custom_centers' should be equal to the number of blocks");
372 }
373
374 std::vector<SizeFactor_> fac;
375 fac.reserve(num_blocks);
376 for (I<decltype(num_blocks)> g = 0; g < num_blocks; ++g) {
377 const auto gm = block_mean[g];
378 if (gm) {
379 fac.emplace_back(custom[g] / gm);
380 } else {
381 fac.emplace_back(1);
382 }
383 }
384
385 for (I<decltype(num_cells)> i = 0; i < num_cells; ++i) {
386 size_factors[i] *= fac[block[i]];
387 }
388
389 if (options.report_final) {
390 for (I<decltype(num_blocks)> g = 0; g < num_blocks; ++g) {
391 auto& gm = block_mean[g];
392 if (gm) {
393 gm = custom[g];
394 }
395 }
396 }
397
398 return block_mean;
399 }
400}
401
402}
403
404#endif
Scaling normalization of single-cell data.
Definition center_size_factors.hpp:20
SizeFactor_ center_size_factors(const std::size_t num_cells, SizeFactor_ *const size_factors, const CenterSizeFactorsOptions &options)
Definition center_size_factors.hpp:197
std::vector< SizeFactor_ > center_size_factors_blocked(const std::size_t num_cells, SizeFactor_ *const size_factors, const Block_ *const block, const std::size_t num_blocks, const CenterSizeFactorsBlockedOptions &options)
Definition center_size_factors.hpp:298
CenterBlockMode
Definition center_size_factors.hpp:222
SizeFactor_ compute_mean_size_factor(const std::size_t num_cells, const SizeFactor_ *const size_factors, const ComputeMeanSizeFactorOptions &options)
Definition center_size_factors.hpp:57
std::vector< SizeFactor_ > compute_mean_size_factor_blocked(const std::size_t num_cells, const SizeFactor_ *const size_factors, const Block_ *const block, const std::size_t num_blocks, const ComputeMeanSizeFactorOptions &options)
Definition center_size_factors.hpp:104
Sanitize invalid size factors.
Options for center_size_factors_blocked().
Definition center_size_factors.hpp:227
std::optional< std::vector< double > > custom_centers
Definition center_size_factors.hpp:269
CenterBlockMode block_mode
Definition center_size_factors.hpp:263
bool ignore_invalid
Definition center_size_factors.hpp:232
SizeFactorDiagnostics * diagnostics
Definition center_size_factors.hpp:238
bool report_final
Definition center_size_factors.hpp:275
Options for center_size_factors().
Definition center_size_factors.hpp:149
bool ignore_invalid
Definition center_size_factors.hpp:158
bool report_final
Definition center_size_factors.hpp:177
double center
Definition center_size_factors.hpp:165
SizeFactorDiagnostics * diagnostics
Definition center_size_factors.hpp:171
Options for compute_mean_size_factor() and compute_mean_size_factor_blocked().
Definition center_size_factors.hpp:25
bool ignore_invalid
Definition center_size_factors.hpp:32
SizeFactorDiagnostics * diagnostics
Definition center_size_factors.hpp:41
Diagnostics for the size factors.
Definition sanitize_size_factors.hpp:21