scran_norm
Scaling normalization of single-cell data
Loading...
Searching...
No Matches
center_size_factors.hpp
Go to the documentation of this file.
1#ifndef SCRAN_NORM_CENTER_SIZE_FACTORS_HPP
2#define SCRAN_NORM_CENTER_SIZE_FACTORS_HPP
3
4#include <vector>
5#include <numeric>
6#include <algorithm>
7#include <type_traits>
8#include <cstddef>
9
10#include "tatami_stats/tatami_stats.hpp"
11
13#include "utils.hpp"
14
20namespace scran_norm {
21
25enum class CenterBlockMode : char { PER_BLOCK, LOWEST };
26
48 CenterBlockMode block_mode = CenterBlockMode::LOWEST;
49
62 bool ignore_invalid = true;
63};
64
79template<typename SizeFactor_>
80SizeFactor_ center_size_factors_mean(const std::size_t num, const SizeFactor_* const size_factors, SizeFactorDiagnostics* const diagnostics, const CenterSizeFactorsOptions& options) {
81 static_assert(std::is_floating_point<SizeFactor_>::value);
82 SizeFactor_ mean = 0;
83 decltype(I(num)) denom = 0;
84
85 if (options.ignore_invalid) {
87 auto& diag = (diagnostics == NULL ? tmpdiag : *diagnostics);
88 for (decltype(I(num)) i = 0; i < num; ++i) {
89 const auto val = size_factors[i];
90 if (!internal::is_invalid(val, diag)) {
91 mean += val;
92 ++denom;
93 }
94 }
95 } else {
96 mean = std::accumulate(size_factors, size_factors + num, static_cast<SizeFactor_>(0));
97 denom = num;
98 }
99
100 if (denom) {
101 return mean/denom;
102 } else {
103 return 0;
104 }
105}
106
125template<typename SizeFactor_>
126SizeFactor_ center_size_factors(const std::size_t num, SizeFactor_* const size_factors, SizeFactorDiagnostics* const diagnostics, const CenterSizeFactorsOptions& options) {
127 const auto mean = center_size_factors_mean(num, size_factors, diagnostics, options);
128 if (mean) {
129 for (decltype(I(num)) i = 0; i < num; ++i){
130 size_factors[i] /= mean;
131 }
132 }
133 return mean;
134}
135
155template<typename SizeFactor_, typename Block_>
156std::vector<SizeFactor_> center_size_factors_blocked_mean(
157 const std::size_t num,
158 const SizeFactor_* const size_factors,
159 const Block_* const block,
160 SizeFactorDiagnostics* const diagnostics,
161 const CenterSizeFactorsOptions& options)
162{
163 static_assert(std::is_floating_point<SizeFactor_>::value);
164 const auto ngroups = tatami_stats::total_groups(block, num);
165 auto group_mean = sanisizer::create<std::vector<SizeFactor_> >(ngroups);
166 auto group_num = sanisizer::create<std::vector<decltype(I(num))> >(ngroups);
167
168 if (options.ignore_invalid) {
169 SizeFactorDiagnostics tmpdiag;
170 auto& diag = (diagnostics == NULL ? tmpdiag : *diagnostics);
171 for (decltype(I(num)) i = 0; i < num; ++i) {
172 const auto val = size_factors[i];
173 if (!internal::is_invalid(val, diag)) {
174 const auto b = block[i];
175 group_mean[b] += val;
176 ++(group_num[b]);
177 }
178 }
179 } else {
180 for (decltype(I(num)) i = 0; i < num; ++i) {
181 const auto b = block[i];
182 group_mean[b] += size_factors[i];
183 ++(group_num[b]);
184 }
185 }
186
187 for (decltype(I(ngroups)) g = 0; g < ngroups; ++g) {
188 if (group_num[g]) {
189 group_mean[g] /= group_num[g];
190 }
191 }
192
193 return group_mean;
194}
195
215template<typename SizeFactor_, typename Block_>
216std::vector<SizeFactor_> center_size_factors_blocked(
217 const std::size_t num,
218 SizeFactor_* const size_factors,
219 const Block_* const block,
220 SizeFactorDiagnostics* const diagnostics,
221 const CenterSizeFactorsOptions& options)
222{
223 const auto group_mean = center_size_factors_blocked_mean(num, size_factors, block, diagnostics, options);
224
225 if (options.block_mode == CenterBlockMode::PER_BLOCK) {
226 for (decltype(I(num)) i = 0; i < num; ++i) {
227 const auto& div = group_mean[block[i]];
228 if (div) {
229 size_factors[i] /= div;
230 }
231 }
232
233 } else if (options.block_mode == CenterBlockMode::LOWEST) {
234 SizeFactor_ min = 0;
235 bool found = false;
236 for (const auto m : group_mean) {
237 // Ignore groups with means of zeros, either because they're full
238 // of zeros themselves or they have no cells associated with them.
239 if (m) {
240 if (!found || m < min) {
241 min = m;
242 found = true;
243 }
244 }
245 }
246
247 if (min > 0) {
248 for (decltype(I(num)) i = 0; i < num; ++i) {
249 size_factors[i] /= min;
250 }
251 }
252 }
253
254 return group_mean;
255}
256
257}
258
259#endif
Scaling normalization of single-cell data.
Definition center_size_factors.hpp:20
std::vector< SizeFactor_ > center_size_factors_blocked(const std::size_t num, SizeFactor_ *const size_factors, const Block_ *const block, SizeFactorDiagnostics *const diagnostics, const CenterSizeFactorsOptions &options)
Definition center_size_factors.hpp:216
CenterBlockMode
Definition center_size_factors.hpp:25
SizeFactor_ center_size_factors_mean(const std::size_t num, const SizeFactor_ *const size_factors, SizeFactorDiagnostics *const diagnostics, const CenterSizeFactorsOptions &options)
Definition center_size_factors.hpp:80
std::vector< SizeFactor_ > center_size_factors_blocked_mean(const std::size_t num, const SizeFactor_ *const size_factors, const Block_ *const block, SizeFactorDiagnostics *const diagnostics, const CenterSizeFactorsOptions &options)
Definition center_size_factors.hpp:156
SizeFactor_ center_size_factors(const std::size_t num, SizeFactor_ *const size_factors, SizeFactorDiagnostics *const diagnostics, const CenterSizeFactorsOptions &options)
Definition center_size_factors.hpp:126
Sanitize invalid size factors.
Options for center_size_factors() and center_size_factors_blocked().
Definition center_size_factors.hpp:30
CenterBlockMode block_mode
Definition center_size_factors.hpp:48
bool ignore_invalid
Definition center_size_factors.hpp:62
Diagnostics for the size factors.
Definition sanitize_size_factors.hpp:20