scran_norm
Scaling normalization of single-cell data
Loading...
Searching...
No Matches
center_size_factors.hpp
Go to the documentation of this file.
1#ifndef SCRAN_NORM_CENTER_SIZE_FACTORS_HPP
2#define SCRAN_NORM_CENTER_SIZE_FACTORS_HPP
3
4#include <vector>
5#include <numeric>
6#include <algorithm>
7#include <type_traits>
8#include <cstddef>
9
10#include "tatami_stats/tatami_stats.hpp"
11
13#include "utils.hpp"
14
20namespace scran_norm {
21
43
55template<typename SizeFactor_>
56SizeFactor_ compute_mean_size_factor(const std::size_t num, const SizeFactor_* const size_factors, const ComputeMeanSizeFactorOptions& options) {
57 static_assert(std::is_floating_point<SizeFactor_>::value);
58 SizeFactor_ mean = 0;
59 I<decltype(num)> denom = 0;
60
61 if (options.ignore_invalid) {
63 for (I<decltype(num)> i = 0; i < num; ++i) {
64 const auto val = size_factors[i];
65 if (!internal::is_invalid(val, tmpdiag)) {
66 mean += val;
67 ++denom;
68 }
69 }
70 if (options.diagnostics != NULL) {
71 *(options.diagnostics) = tmpdiag;
72 }
73
74 } else {
75 mean = std::accumulate(size_factors, size_factors + num, static_cast<SizeFactor_>(0));
76 denom = num;
77 }
78
79 if (denom) {
80 return mean / denom;
81 } else {
82 return 0;
83 }
84}
85
100template<typename SizeFactor_, typename Block_>
101std::vector<SizeFactor_> compute_mean_size_factor_blocked(
102 const std::size_t num,
103 const SizeFactor_* const size_factors,
104 const Block_* const block,
105 const ComputeMeanSizeFactorOptions& options
106) {
107 static_assert(std::is_floating_point<SizeFactor_>::value);
108 const auto ngroups = tatami_stats::total_groups(block, num);
109 auto group_mean = sanisizer::create<std::vector<SizeFactor_> >(ngroups);
110 auto group_num = sanisizer::create<std::vector<I<decltype(num)>> >(ngroups);
111
112 if (options.ignore_invalid) {
113 SizeFactorDiagnostics tmpdiag;
114 for (I<decltype(num)> i = 0; i < num; ++i) {
115 const auto val = size_factors[i];
116 if (!internal::is_invalid(val, tmpdiag)) {
117 const auto b = block[i];
118 group_mean[b] += val;
119 ++(group_num[b]);
120 }
121 }
122 if (options.diagnostics != NULL) {
123 *(options.diagnostics) = tmpdiag;
124 }
125
126 } else {
127 for (I<decltype(num)> i = 0; i < num; ++i) {
128 const auto b = block[i];
129 group_mean[b] += size_factors[i];
130 ++(group_num[b]);
131 }
132 }
133
134 for (I<decltype(ngroups)> g = 0; g < ngroups; ++g) {
135 if (group_num[g]) {
136 group_mean[g] /= group_num[g];
137 }
138 }
139
140 return group_mean;
141}
142
155 bool ignore_invalid = true;
156
162 double center = 1;
163
169
174 bool report_final = false;
175};
176
193template<typename SizeFactor_>
194SizeFactor_ center_size_factors(const std::size_t num, SizeFactor_* const size_factors, const CenterSizeFactorsOptions& options) {
196 copt.ignore_invalid = options.ignore_invalid;
197 copt.diagnostics = options.diagnostics;
198 const auto mean = compute_mean_size_factor(num, size_factors, copt);
199
200 if (mean == 0) {
201 return 0;
202 }
203
204 const SizeFactor_ mult = options.center / mean;
205 for (I<decltype(num)> i = 0; i < num; ++i){
206 size_factors[i] *= mult;
207 }
208
209 if (options.report_final) {
210 return options.center;
211 } else {
212 return mean;
213 }
214}
215
219enum class CenterBlockMode : char { PER_BLOCK, LOWEST, CUSTOM };
220
229 bool ignore_invalid = true;
230
236
260 CenterBlockMode block_mode = CenterBlockMode::LOWEST;
261
266 std::optional<std::vector<double> > custom_centers;
267
272 bool report_final = false;
273};
274
293template<typename SizeFactor_, typename Block_>
294std::vector<SizeFactor_> center_size_factors_blocked(
295 const std::size_t num,
296 SizeFactor_* const size_factors,
297 const Block_* const block,
299) {
301 copt.ignore_invalid = options.ignore_invalid;
302 copt.diagnostics = options.diagnostics;
303 auto group_mean = compute_mean_size_factor_blocked(num, size_factors, block, copt);
304 const auto ngroups = group_mean.size();
305
306 if (options.block_mode == CenterBlockMode::PER_BLOCK) {
307 std::vector<SizeFactor_> fac;
308 fac.reserve(ngroups);
309 for (I<decltype(ngroups)> g = 0; g < ngroups; ++g) {
310 const auto gm = group_mean[g];
311 if (gm) {
312 fac.emplace_back(1 / gm);
313 } else {
314 fac.emplace_back(1); // i.e., no-op.
315 }
316 }
317
318 for (I<decltype(num)> i = 0; i < num; ++i) {
319 size_factors[i] *= fac[block[i]];
320 }
321
322 if (options.report_final) {
323 for (auto& gm : group_mean) {
324 if (gm) {
325 gm = 1;
326 }
327 }
328 }
329
330 return group_mean;
331
332 } else if (options.block_mode == CenterBlockMode::LOWEST) {
333 SizeFactor_ min = 0;
334 bool found = false;
335 for (const auto m : group_mean) {
336 // Ignore groups with means of zeros, either because they're full
337 // of zeros themselves or they have no cells associated with them.
338 if (m) {
339 if (!found || m < min) {
340 min = m;
341 found = true;
342 }
343 }
344 }
345
346 if (min) {
347 const SizeFactor_ mult = 1 / min;
348 for (I<decltype(num)> i = 0; i < num; ++i) {
349 size_factors[i] *= mult;
350 }
351
352 if (options.report_final) {
353 for (auto& gm : group_mean) {
354 gm /= min;
355 }
356 }
357 }
358
359 return group_mean;
360
361 } else { // i.e., options.block_mode == CenterBlockMode::CUSTOM
362 if (!options.custom_centers.has_value()) {
363 throw std::runtime_error("'custom_centers' should be set for custom block centers");
364 }
365 const auto& custom = *(options.custom_centers);
366 const auto ngroups = group_mean.size();
367 if (custom.size() != ngroups) {
368 throw std::runtime_error("length of 'custom_centers' should be equal to the number of groups");
369 }
370
371 std::vector<SizeFactor_> fac;
372 fac.reserve(ngroups);
373 for (I<decltype(ngroups)> g = 0; g < ngroups; ++g) {
374 const auto gm = group_mean[g];
375 if (gm) {
376 fac.emplace_back(custom[g] / gm);
377 } else {
378 fac.emplace_back(1);
379 }
380 }
381
382 for (I<decltype(num)> i = 0; i < num; ++i) {
383 size_factors[i] *= fac[block[i]];
384 }
385
386 if (options.report_final) {
387 for (I<decltype(ngroups)> g = 0; g < ngroups; ++g) {
388 auto& gm = group_mean[g];
389 if (gm) {
390 gm = custom[g];
391 }
392 }
393 }
394
395 return group_mean;
396 }
397}
398
399}
400
401#endif
Scaling normalization of single-cell data.
Definition center_size_factors.hpp:20
std::vector< SizeFactor_ > compute_mean_size_factor_blocked(const std::size_t num, const SizeFactor_ *const size_factors, const Block_ *const block, const ComputeMeanSizeFactorOptions &options)
Definition center_size_factors.hpp:101
SizeFactor_ compute_mean_size_factor(const std::size_t num, const SizeFactor_ *const size_factors, const ComputeMeanSizeFactorOptions &options)
Definition center_size_factors.hpp:56
std::vector< SizeFactor_ > center_size_factors_blocked(const std::size_t num, SizeFactor_ *const size_factors, const Block_ *const block, const CenterSizeFactorsBlockedOptions &options)
Definition center_size_factors.hpp:294
CenterBlockMode
Definition center_size_factors.hpp:219
SizeFactor_ center_size_factors(const std::size_t num, SizeFactor_ *const size_factors, const CenterSizeFactorsOptions &options)
Definition center_size_factors.hpp:194
Sanitize invalid size factors.
Options for center_size_factors_blocked().
Definition center_size_factors.hpp:224
std::optional< std::vector< double > > custom_centers
Definition center_size_factors.hpp:266
CenterBlockMode block_mode
Definition center_size_factors.hpp:260
bool ignore_invalid
Definition center_size_factors.hpp:229
SizeFactorDiagnostics * diagnostics
Definition center_size_factors.hpp:235
bool report_final
Definition center_size_factors.hpp:272
Options for center_size_factors().
Definition center_size_factors.hpp:146
bool ignore_invalid
Definition center_size_factors.hpp:155
bool report_final
Definition center_size_factors.hpp:174
double center
Definition center_size_factors.hpp:162
SizeFactorDiagnostics * diagnostics
Definition center_size_factors.hpp:168
Options for compute_mean_size_factor() and compute_mean_size_factor_blocked().
Definition center_size_factors.hpp:25
bool ignore_invalid
Definition center_size_factors.hpp:32
SizeFactorDiagnostics * diagnostics
Definition center_size_factors.hpp:41
Diagnostics for the size factors.
Definition sanitize_size_factors.hpp:20