scran_norm
Scaling normalization of single-cell data
Loading...
Searching...
No Matches
center_size_factors.hpp
Go to the documentation of this file.
1#ifndef SCRAN_NORM_CENTER_SIZE_FACTORS_HPP
2#define SCRAN_NORM_CENTER_SIZE_FACTORS_HPP
3
4#include "tatami_stats/tatami_stats.hpp"
5
6#include <vector>
7#include <numeric>
8#include <algorithm>
9#include <type_traits>
10
12
18namespace scran_norm {
19
23enum class CenterBlockMode : char { PER_BLOCK, LOWEST };
24
42 CenterBlockMode block_mode = CenterBlockMode::LOWEST;
43
56 bool ignore_invalid = true;
57};
58
73template<typename SizeFactor_>
74SizeFactor_ center_size_factors_mean(size_t num, const SizeFactor_* size_factors, SizeFactorDiagnostics* diagnostics, const CenterSizeFactorsOptions& options) {
75 static_assert(std::is_floating_point<SizeFactor_>::value);
76 SizeFactor_ mean = 0;
77 size_t denom = 0;
78
79 if (options.ignore_invalid) {
81 auto& diag = (diagnostics == NULL ? tmpdiag : *diagnostics);
82 for (size_t i = 0; i < num; ++i) {
83 auto val = size_factors[i];
84 if (!internal::is_invalid(val, diag)) {
85 mean += val;
86 ++denom;
87 }
88 }
89 } else {
90 mean = std::accumulate(size_factors, size_factors + num, static_cast<SizeFactor_>(0));
91 denom = num;
92 }
93
94 if (denom) {
95 return mean/denom;
96 } else {
97 return 0;
98 }
99}
100
119template<typename SizeFactor_>
120SizeFactor_ center_size_factors(size_t num, SizeFactor_* size_factors, SizeFactorDiagnostics* diagnostics, const CenterSizeFactorsOptions& options) {
121 auto mean = center_size_factors_mean(num, size_factors, diagnostics, options);
122 if (mean) {
123 for (size_t i = 0; i < num; ++i){
124 size_factors[i] /= mean;
125 }
126 }
127 return mean;
128}
129
148template<typename SizeFactor_, typename Block_>
149std::vector<SizeFactor_> center_size_factors_blocked_mean(size_t num, const SizeFactor_* size_factors, const Block_* block, SizeFactorDiagnostics* diagnostics, const CenterSizeFactorsOptions& options) {
150 static_assert(std::is_floating_point<SizeFactor_>::value);
151 size_t ngroups = tatami_stats::total_groups(block, num);
152 std::vector<SizeFactor_> group_mean(ngroups);
153 std::vector<size_t> group_num(ngroups);
154
155 if (options.ignore_invalid) {
156 SizeFactorDiagnostics tmpdiag;
157 auto& diag = (diagnostics == NULL ? tmpdiag : *diagnostics);
158 for (size_t i = 0; i < num; ++i) {
159 auto val = size_factors[i];
160 if (!internal::is_invalid(val, diag)) {
161 auto b = block[i];
162 group_mean[b] += val;
163 ++(group_num[b]);
164 }
165 }
166 } else {
167 for (size_t i = 0; i < num; ++i) {
168 auto b = block[i];
169 group_mean[b] += size_factors[i];
170 ++(group_num[b]);
171 }
172 }
173
174 for (size_t g = 0; g < ngroups; ++g) {
175 if (group_num[g]) {
176 group_mean[g] /= group_num[g];
177 }
178 }
179
180 return group_mean;
181}
182
201template<typename SizeFactor_, typename Block_>
202std::vector<SizeFactor_> center_size_factors_blocked(size_t num, SizeFactor_* size_factors, const Block_* block, SizeFactorDiagnostics* diagnostics, const CenterSizeFactorsOptions& options) {
203 auto group_mean = center_size_factors_blocked_mean(num, size_factors, block, diagnostics, options);
204
205 if (options.block_mode == CenterBlockMode::PER_BLOCK) {
206 for (size_t i = 0; i < num; ++i) {
207 const auto& div = group_mean[block[i]];
208 if (div) {
209 size_factors[i] /= div;
210 }
211 }
212
213 } else if (options.block_mode == CenterBlockMode::LOWEST) {
214 SizeFactor_ min = 0;
215 bool found = false;
216 for (auto m : group_mean) {
217 // Ignore groups with means of zeros, either because they're full
218 // of zeros themselves or they have no cells associated with them.
219 if (m) {
220 if (!found || m < min) {
221 min = m;
222 found = true;
223 }
224 }
225 }
226
227 if (min > 0) {
228 for (size_t i = 0; i < num; ++i) {
229 size_factors[i] /= min;
230 }
231 }
232 }
233
234 return group_mean;
235}
236
237}
238
239#endif
Scaling normalization of single-cell data.
Definition center_size_factors.hpp:18
SizeFactor_ center_size_factors_mean(size_t num, const SizeFactor_ *size_factors, SizeFactorDiagnostics *diagnostics, const CenterSizeFactorsOptions &options)
Definition center_size_factors.hpp:74
CenterBlockMode
Definition center_size_factors.hpp:23
SizeFactor_ center_size_factors(size_t num, SizeFactor_ *size_factors, SizeFactorDiagnostics *diagnostics, const CenterSizeFactorsOptions &options)
Definition center_size_factors.hpp:120
std::vector< SizeFactor_ > center_size_factors_blocked(size_t num, SizeFactor_ *size_factors, const Block_ *block, SizeFactorDiagnostics *diagnostics, const CenterSizeFactorsOptions &options)
Definition center_size_factors.hpp:202
std::vector< SizeFactor_ > center_size_factors_blocked_mean(size_t num, const SizeFactor_ *size_factors, const Block_ *block, SizeFactorDiagnostics *diagnostics, const CenterSizeFactorsOptions &options)
Definition center_size_factors.hpp:149
Sanitize invalid size factors.
Options for center_size_factors() and center_size_factors_blocked().
Definition center_size_factors.hpp:28
CenterBlockMode block_mode
Definition center_size_factors.hpp:42
bool ignore_invalid
Definition center_size_factors.hpp:56
Diagnostics for the size factors.
Definition sanitize_size_factors.hpp:17