scran_norm
Scaling normalization of single-cell data
Loading...
Searching...
No Matches
normalize_counts.hpp
Go to the documentation of this file.
1#ifndef SCRAN_NORM_NORMALIZE_COUNTS_HPP
2#define SCRAN_NORM_NORMALIZE_COUNTS_HPP
3
4#include <type_traits>
5#include <vector>
6#include <memory>
7#include <cassert>
8
9#include "tatami/tatami.hpp"
10
16namespace scran_norm {
17
32template<typename OutputValue_, typename InputValue_, typename Index_, typename ReciprocalSizeFactors_>
33class DelayedLogNormalizeHelper final : public tatami::DelayedUnaryIsometricOperationHelper<OutputValue_, InputValue_, Index_> {
34public:
44 DelayedLogNormalizeHelper(ReciprocalSizeFactors_ reciprocal_size_factors, OutputValue_ log_base, OutputValue_ pseudo_count) :
45 my_reciprocal_sf(std::move(reciprocal_size_factors)),
46 my_reciprocal_denom(1.0 / std::log(log_base)),
47 my_pseudo(pseudo_count)
48 {
49 sanisizer::cast<Index_>(my_reciprocal_sf.size()); // check that cast is safe in ncol().
50 if (my_pseudo != 1) {
51 my_sparse = false;
52 }
53 for (const auto x : my_reciprocal_sf) {
54 if (!std::isfinite(x)) {
55 my_sparse = false;
56 my_has_weird_sf = true;
57 break;
58 }
59 }
60 }
61
62private:
63 ReciprocalSizeFactors_ my_reciprocal_sf;
64 OutputValue_ my_reciprocal_denom, my_pseudo;
65 bool my_has_weird_sf = false, my_sparse = true;
66
67public:
68 std::optional<Index_> nrow() const {
69 return std::nullopt;
70 }
71
72 std::optional<Index_> ncol() const {
73 return my_reciprocal_sf.size();
74 }
75
76public:
77 bool zero_depends_on_row() const {
78 return false;
79 }
80
81 bool zero_depends_on_column() const {
82 return my_has_weird_sf;
83 }
84
85 bool non_zero_depends_on_row() const {
86 return false;
87 }
88
89 bool non_zero_depends_on_column() const {
90 return true;
91 }
92
93private:
94 void log_normalize(const Index_ idx, const Index_ length, const InputValue_* input, OutputValue_* const output) const {
95 const auto current_rsf = my_reciprocal_sf[idx];
96 for (Index_ i = 0; i < length; ++i) {
97 output[i] = std::log(input[i] * current_rsf + my_pseudo) * my_reciprocal_denom;
98 }
99 }
100
101public:
102 void dense(const bool row, const Index_ idx, const Index_ start, const Index_ length, const InputValue_* input, OutputValue_* const output) const {
103 if constexpr(std::is_same<InputValue_, OutputValue_>::value) {
104 input = output; // basically an assertion to the compiler to skip aliasing protection.
105 }
106
107 if (row) {
108 for (Index_ i = 0; i < length; ++i) {
109 output[i] = std::log(input[i] * my_reciprocal_sf[i + start] + my_pseudo) * my_reciprocal_denom;
110 }
111 } else {
112 log_normalize(idx, length, input, output);
113 }
114 }
115
116 void dense(const bool row, const Index_ idx, const std::vector<Index_>& indices, const InputValue_* input, OutputValue_* const output) const {
117 if constexpr(std::is_same<InputValue_, OutputValue_>::value) {
118 input = output; // basically an assertion to the compiler to skip aliasing protection.
119 }
120 const Index_ length = indices.size();
121
122 if (row) {
123 for (Index_ i = 0; i < length; ++i) {
124 output[i] = std::log(input[i] * my_reciprocal_sf[indices[i]] + my_pseudo) * my_reciprocal_denom;
125 }
126 } else {
127 log_normalize(idx, length, input, output);
128 }
129 }
130
131public:
132 bool is_sparse() const {
133 return my_sparse;
134 }
135
136 void sparse(
137 const bool row,
138 const Index_ idx,
139 const Index_ number,
140 const InputValue_* input_value,
141 const Index_* const index,
142 OutputValue_* const output_value
143 ) const {
144 if constexpr(std::is_same<InputValue_, OutputValue_>::value) {
145 input_value = output_value; // basically an assertion to the compiler to skip aliasing protection.
146 }
147
148 if (row) {
149 for (Index_ i = 0; i < number; ++i) {
150 output_value[i] = std::log(input_value[i] * my_reciprocal_sf[index[i]] + my_pseudo) * my_reciprocal_denom;
151 }
152 } else {
153 log_normalize(idx, number, input_value, output_value);
154 }
155 }
156
157 OutputValue_ fill(const bool row, const Index_ idx) const {
158 if (row) {
159 // This should never be called in the presence of size factors of zero, as these will lead to NaNs.
160 assert(!my_has_weird_sf);
161 return std::log(my_pseudo) * my_reciprocal_denom;
162 } else {
163 return std::log(static_cast<InputValue_>(0) * my_reciprocal_sf[idx] + my_pseudo) * my_reciprocal_denom;
164 }
165 }
166};
167
179 double pseudo_count = 1;
180
188 bool preserve_sparsity = false;
189
193 bool log = true;
194
199 double log_base = 2;
200};
201
233template<typename OutputValue_ = double, typename InputValue_, typename Index_, class SizeFactors_>
234std::shared_ptr<tatami::Matrix<OutputValue_, Index_> > normalize_counts(
235 std::shared_ptr<const tatami::Matrix<InputValue_, Index_> > counts,
236 SizeFactors_ size_factors,
237 const NormalizeCountsOptions& options)
238{
239 auto current_pseudo = options.pseudo_count;
240 if (options.preserve_sparsity && current_pseudo != 1 && options.log) {
241 for (auto& x : size_factors) {
242 x *= current_pseudo;
243 }
244 current_pseudo = 1;
245 }
246
247 static_assert(std::is_floating_point<OutputValue_>::value);
248 if (static_cast<size_t>(size_factors.size()) != static_cast<size_t>(counts->ncol())) {
249 throw std::runtime_error("length of 'size_factors' should be equal to the number of columns of 'counts'");
250 }
251
252 for (auto& x : size_factors) {
253 x = static_cast<OutputValue_>(1.0) / x;
254 }
255
256 if (!options.log) {
257 return std::make_shared<tatami::DelayedUnaryIsometricOperation<OutputValue_, InputValue_, Index_> >(
258 std::move(counts),
259 std::make_shared<tatami::DelayedUnaryIsometricMultiplyVectorHelper<OutputValue_, InputValue_, Index_, SizeFactors_> >(std::move(size_factors), false)
260 );
261 } else {
262 return std::make_shared<tatami::DelayedUnaryIsometricOperation<OutputValue_, InputValue_, Index_> >(
263 std::move(counts),
264 std::make_shared<DelayedLogNormalizeHelper<OutputValue_, InputValue_, Index_, SizeFactors_> >(std::move(size_factors), options.log_base, current_pseudo)
265 );
266 }
267};
268
272// Overload for template deduction.
273template<typename OutputValue_ = double, typename InputValue_, typename Index_, class SizeFactors_>
274std::shared_ptr<tatami::Matrix<OutputValue_, Index_> > normalize_counts(
275 std::shared_ptr<tatami::Matrix<InputValue_, Index_> > counts,
276 SizeFactors_ size_factors,
277 const NormalizeCountsOptions& options)
278{
279 return normalize_counts(std::shared_ptr<const tatami::Matrix<InputValue_, Index_> >(std::move(counts)), std::move(size_factors), options);
280}
285}
286
287#endif
Helper for delayed log-normalization.
Definition normalize_counts.hpp:33
DelayedLogNormalizeHelper(ReciprocalSizeFactors_ reciprocal_size_factors, OutputValue_ log_base, OutputValue_ pseudo_count)
Definition normalize_counts.hpp:44
Scaling normalization of single-cell data.
Definition center_size_factors.hpp:20
std::shared_ptr< tatami::Matrix< OutputValue_, Index_ > > normalize_counts(std::shared_ptr< const tatami::Matrix< InputValue_, Index_ > > counts, SizeFactors_ size_factors, const NormalizeCountsOptions &options)
Definition normalize_counts.hpp:234
Options for normalize_counts().
Definition normalize_counts.hpp:171
double log_base
Definition normalize_counts.hpp:199
double pseudo_count
Definition normalize_counts.hpp:179
bool log
Definition normalize_counts.hpp:193
bool preserve_sparsity
Definition normalize_counts.hpp:188