scran_norm
Scaling normalization of single-cell data
Loading...
Searching...
No Matches
center_size_factors.hpp
Go to the documentation of this file.
1#ifndef SCRAN_NORM_CENTER_SIZE_FACTORS_HPP
2#define SCRAN_NORM_CENTER_SIZE_FACTORS_HPP
3
4#include <vector>
5#include <numeric>
6#include <algorithm>
7#include <type_traits>
8#include <cstddef>
9
10#include "tatami_stats/tatami_stats.hpp"
11
13#include "utils.hpp"
14
20namespace scran_norm {
21
43
55template<typename SizeFactor_>
56SizeFactor_ compute_mean_size_factor(const std::size_t num, const SizeFactor_* const size_factors, const ComputeMeanSizeFactorOptions& options) {
57 static_assert(std::is_floating_point<SizeFactor_>::value);
58 SizeFactor_ mean = 0;
59 I<decltype(num)> denom = 0;
60
61 if (options.ignore_invalid) {
63 auto& diag = (options.diagnostics == NULL ? tmpdiag : *(options.diagnostics));
64 for (I<decltype(num)> i = 0; i < num; ++i) {
65 const auto val = size_factors[i];
66 if (!internal::is_invalid(val, diag)) {
67 mean += val;
68 ++denom;
69 }
70 }
71 } else {
72 mean = std::accumulate(size_factors, size_factors + num, static_cast<SizeFactor_>(0));
73 denom = num;
74 }
75
76 if (denom) {
77 return mean/denom;
78 } else {
79 return 0;
80 }
81}
82
97template<typename SizeFactor_, typename Block_>
98std::vector<SizeFactor_> compute_mean_size_factor_blocked(
99 const std::size_t num,
100 const SizeFactor_* const size_factors,
101 const Block_* const block,
102 const ComputeMeanSizeFactorOptions& options
103) {
104 static_assert(std::is_floating_point<SizeFactor_>::value);
105 const auto ngroups = tatami_stats::total_groups(block, num);
106 auto group_mean = sanisizer::create<std::vector<SizeFactor_> >(ngroups);
107 auto group_num = sanisizer::create<std::vector<I<decltype(num)>> >(ngroups);
108
109 if (options.ignore_invalid) {
110 SizeFactorDiagnostics tmpdiag;
111 auto& diag = (options.diagnostics == NULL ? tmpdiag : *(options.diagnostics));
112 for (I<decltype(num)> i = 0; i < num; ++i) {
113 const auto val = size_factors[i];
114 if (!internal::is_invalid(val, diag)) {
115 const auto b = block[i];
116 group_mean[b] += val;
117 ++(group_num[b]);
118 }
119 }
120 } else {
121 for (I<decltype(num)> i = 0; i < num; ++i) {
122 const auto b = block[i];
123 group_mean[b] += size_factors[i];
124 ++(group_num[b]);
125 }
126 }
127
128 for (I<decltype(ngroups)> g = 0; g < ngroups; ++g) {
129 if (group_num[g]) {
130 group_mean[g] /= group_num[g];
131 }
132 }
133
134 return group_mean;
135}
136
149 bool ignore_invalid = true;
150
156 double center = 1;
157
163
168 bool report_final = false;
169};
170
187template<typename SizeFactor_>
188SizeFactor_ center_size_factors(const std::size_t num, SizeFactor_* const size_factors, const CenterSizeFactorsOptions& options) {
190 copt.ignore_invalid = options.ignore_invalid;
191 copt.diagnostics = options.diagnostics;
192 const auto mean = compute_mean_size_factor(num, size_factors, copt);
193
194 if (mean == 0) {
195 return 0;
196 }
197
198 const SizeFactor_ mult = options.center / mean;
199 for (I<decltype(num)> i = 0; i < num; ++i){
200 size_factors[i] *= mult;
201 }
202
203 if (options.report_final) {
204 return options.center;
205 } else {
206 return mean;
207 }
208}
209
213enum class CenterBlockMode : char { PER_BLOCK, LOWEST, CUSTOM };
214
224 bool ignore_invalid = true;
225
231
255 CenterBlockMode block_mode = CenterBlockMode::LOWEST;
256
262 std::optional<std::vector<double> > custom_centers;
263
268 bool report_final = false;
269};
270
287template<typename SizeFactor_, typename Block_>
288std::vector<SizeFactor_> center_size_factors_blocked(
289 const std::size_t num,
290 SizeFactor_* const size_factors,
291 const Block_* const block,
293) {
295 copt.ignore_invalid = options.ignore_invalid;
296 copt.diagnostics = options.diagnostics;
297 auto group_mean = compute_mean_size_factor_blocked(num, size_factors, block, copt);
298 const auto ngroups = group_mean.size();
299
300 if (options.block_mode == CenterBlockMode::PER_BLOCK) {
301 std::vector<SizeFactor_> fac;
302 fac.reserve(ngroups);
303 for (I<decltype(ngroups)> g = 0; g < ngroups; ++g) {
304 const auto gm = group_mean[g];
305 if (gm) {
306 fac.emplace_back(1 / gm);
307 } else {
308 fac.emplace_back(1); // i.e., no-op.
309 }
310 }
311
312 for (I<decltype(num)> i = 0; i < num; ++i) {
313 size_factors[i] *= fac[block[i]];
314 }
315
316 if (options.report_final) {
317 for (auto& gm : group_mean) {
318 if (gm) {
319 gm = 1;
320 }
321 }
322 }
323 return group_mean;
324
325 } else if (options.block_mode == CenterBlockMode::LOWEST) {
326 SizeFactor_ min = 0;
327 bool found = false;
328 for (const auto m : group_mean) {
329 // Ignore groups with means of zeros, either because they're full
330 // of zeros themselves or they have no cells associated with them.
331 if (m) {
332 if (!found || m < min) {
333 min = m;
334 found = true;
335 }
336 }
337 }
338
339 if (min) {
340 const SizeFactor_ mult = 1 / min;
341 for (I<decltype(num)> i = 0; i < num; ++i) {
342 size_factors[i] *= mult;
343 }
344
345 if (options.report_final) {
346 for (auto& gm : group_mean) {
347 gm /= min;
348 }
349 }
350 }
351
352 return group_mean;
353
354 } else { // i.e., options.block_mode == CenterBlockMode::CUSTOM
355 if (!options.custom_centers.has_value()) {
356 throw std::runtime_error("'custom_centers' should be set for custom block centers");
357 }
358 const auto& custom = *(options.custom_centers);
359 const auto ngroups = group_mean.size();
360 if (custom.size() != ngroups) {
361 throw std::runtime_error("length of 'custom_centers' should be equal to the number of groups");
362 }
363
364 std::vector<SizeFactor_> fac;
365 fac.reserve(ngroups);
366 for (I<decltype(ngroups)> g = 0; g < ngroups; ++g) {
367 const auto gm = group_mean[g];
368 if (gm) {
369 fac.emplace_back(custom[g] / gm);
370 } else {
371 fac.emplace_back(1);
372 }
373 }
374
375 for (I<decltype(num)> i = 0; i < num; ++i) {
376 size_factors[i] *= fac[block[i]];
377 }
378
379 if (options.report_final) {
380 for (I<decltype(ngroups)> g = 0; g < ngroups; ++g) {
381 auto& gm = group_mean[g];
382 if (gm) {
383 gm = custom[g];
384 }
385 }
386 }
387 return group_mean;
388 }
389}
390
391}
392
393#endif
Scaling normalization of single-cell data.
Definition center_size_factors.hpp:20
std::vector< SizeFactor_ > compute_mean_size_factor_blocked(const std::size_t num, const SizeFactor_ *const size_factors, const Block_ *const block, const ComputeMeanSizeFactorOptions &options)
Definition center_size_factors.hpp:98
SizeFactor_ compute_mean_size_factor(const std::size_t num, const SizeFactor_ *const size_factors, const ComputeMeanSizeFactorOptions &options)
Definition center_size_factors.hpp:56
std::vector< SizeFactor_ > center_size_factors_blocked(const std::size_t num, SizeFactor_ *const size_factors, const Block_ *const block, const CenterSizeFactorsBlockedOptions &options)
Definition center_size_factors.hpp:288
CenterBlockMode
Definition center_size_factors.hpp:213
SizeFactor_ center_size_factors(const std::size_t num, SizeFactor_ *const size_factors, const CenterSizeFactorsOptions &options)
Definition center_size_factors.hpp:188
Sanitize invalid size factors.
Options for center_size_factors() and center_size_factors_blocked().
Definition center_size_factors.hpp:218
std::optional< std::vector< double > > custom_centers
Definition center_size_factors.hpp:262
CenterBlockMode block_mode
Definition center_size_factors.hpp:255
bool ignore_invalid
Definition center_size_factors.hpp:224
SizeFactorDiagnostics * diagnostics
Definition center_size_factors.hpp:230
bool report_final
Definition center_size_factors.hpp:268
Options for center_size_factors().
Definition center_size_factors.hpp:140
bool ignore_invalid
Definition center_size_factors.hpp:149
bool report_final
Definition center_size_factors.hpp:168
double center
Definition center_size_factors.hpp:156
SizeFactorDiagnostics * diagnostics
Definition center_size_factors.hpp:162
Options for compute_mean_size_factor() and compute_mean_size_factor_blocked().
Definition center_size_factors.hpp:25
bool ignore_invalid
Definition center_size_factors.hpp:32
SizeFactorDiagnostics * diagnostics
Definition center_size_factors.hpp:41
Diagnostics for the size factors.
Definition sanitize_size_factors.hpp:20