25template<
typename Stat_>
64template<
typename Input_>
65std::remove_cv_t<std::remove_reference_t<Input_> > I(Input_ x) {
69template<
bool keep_index_,
typename Index_,
typename Stat_,
class Output_,
class Cmp_>
70void filter_genes_by_threshold(
const Index_ n,
const Stat_* statistic, Output_& output,
const Cmp_ cmp,
const Stat_ threshold) {
72 for (Index_ i = 0; i < n; ++i) {
73 const bool ok = cmp(statistic[i], threshold);
74 if constexpr(keep_index_) {
84template<
bool keep_index_,
typename Index_,
typename Stat_,
class Output_,
class Cmp_>
85void select_top_genes_by_threshold(
const Index_ top,
const Stat_* statistic, Output_& output,
const Cmp_ cmp,
const Stat_ threshold,
const std::vector<Index_>& semi_sorted) {
89 const auto pos = semi_sorted[counter];
90 if (cmp(statistic[pos], threshold)) {
91 if constexpr(keep_index_) {
92 output.push_back(pos);
100template<
bool keep_index_,
typename Index_,
typename Stat_,
class Output_,
class CmpNotEqual_,
class CmpEqual_>
101void pick_top_genes(
const Index_ n,
const Stat_* statistic,
const Index_ top, Output_& output,
const CmpNotEqual_ cmpne,
const CmpEqual_ cmpeq,
const PickTopGenesOptions<Stat_>& options) {
103 if constexpr(keep_index_) {
106 std::fill_n(output, n,
false);
112 if constexpr(std::numeric_limits<Stat_>::has_quiet_NaN) {
113 if (options.check_nan) {
114 for (Index_ i = 0; i < n; ++i) {
115 num_nan += std::isnan(statistic[i]);
120 if (top >= n - num_nan) {
121 if (options.bound.has_value()) {
122 if (options.open_bound) {
123 filter_genes_by_threshold<keep_index_>(n, statistic, output, cmpne, *(options.bound));
125 filter_genes_by_threshold<keep_index_>(n, statistic, output, cmpeq, *(options.bound));
127 }
else if (num_nan == 0) {
128 if constexpr(keep_index_) {
129 sanisizer::resize(output, n);
130 std::iota(output.begin(), output.end(),
static_cast<Index_
>(0));
132 std::fill_n(output, n,
true);
135 if constexpr(keep_index_) {
136 output.reserve(n - num_nan);
137 for (Index_ i = 0; i < n; ++i) {
138 if (!std::isnan(statistic[i])) {
143 for (Index_ i = 0; i < n; ++i) {
144 output[i] = !std::isnan(statistic[i]);
151 std::vector<Index_> semi_sorted;
153 sanisizer::resize(semi_sorted, n);
154 std::iota(semi_sorted.begin(), semi_sorted.end(),
static_cast<Index_
>(0));
156 semi_sorted.reserve(n - num_nan);
157 for (Index_ i = 0; i < n; ++i) {
158 if (!std::isnan(statistic[i])) {
159 semi_sorted.push_back(i);
164 const auto cBegin = semi_sorted.begin(), cMid = cBegin + top - 1, cEnd = semi_sorted.end();
165 std::nth_element(cBegin, cMid, cEnd, [&](
const Index_ l,
const Index_ r) ->
bool {
166 const auto L = statistic[l], R = statistic[r];
173 const Stat_ threshold = statistic[*cMid];
175 if (options.keep_ties) {
176 if (options.bound.has_value()) {
177 const auto bound = *(options.bound);
179 if (options.open_bound) {
180 if (!cmpne(threshold, bound)) {
181 filter_genes_by_threshold<keep_index_>(n, statistic, output, cmpne, *(options.bound));
185 if (!cmpeq(threshold, bound)) {
186 filter_genes_by_threshold<keep_index_>(n, statistic, output, cmpeq, *(options.bound));
192 filter_genes_by_threshold<keep_index_>(n, statistic, output, cmpeq, threshold);
196 if constexpr(keep_index_) {
197 output.reserve(sanisizer::cast<
decltype(I(output.size()))>(top));
199 std::fill_n(output, n,
false);
202 if (options.bound.has_value()) {
204 if (options.open_bound) {
205 select_top_genes_by_threshold<keep_index_>(
static_cast<Index_
>(top), statistic, output, cmpne, *(options.bound), semi_sorted);
207 select_top_genes_by_threshold<keep_index_>(
static_cast<Index_
>(top), statistic, output, cmpeq, *(options.bound), semi_sorted);
210 if constexpr(keep_index_) {
211 output.insert(output.end(), semi_sorted.begin(), semi_sorted.begin() + top);
213 for (
decltype(I(top)) i = 0; i < top; ++i) {
214 output[semi_sorted[i]] =
true;
219 if constexpr(keep_index_) {
220 std::sort(output.begin(), output.end());
242template<
typename Stat_,
typename Bool_>
245 internal::pick_top_genes<false>(
250 [](Stat_ l, Stat_ r) ->
bool {
return l > r; },
251 [](Stat_ l, Stat_ r) ->
bool {
return l >= r; },
255 internal::pick_top_genes<false>(
260 [](Stat_ l, Stat_ r) ->
bool {
return l < r; },
261 [](Stat_ l, Stat_ r) ->
bool {
return l <= r; },
280template<
typename Bool_,
typename Stat_>
282 auto output = sanisizer::create<std::vector<Bool_> >(n
283#ifdef SCRAN_VARIANCES_TEST_INIT
284 , SCRAN_VARIANCES_TEST_INIT
287 pick_top_genes(n, statistic, top, larger, output.data(), options);
305template<
typename Index_,
typename Stat_>
307 std::vector<Index_> output;
309 internal::pick_top_genes<true>(
314 [](Stat_ l, Stat_ r) ->
bool {
return l > r; },
315 [](Stat_ l, Stat_ r) ->
bool {
return l >= r; },
319 internal::pick_top_genes<true>(
324 [](Stat_ l, Stat_ r) ->
bool {
return l < r; },
325 [](Stat_ l, Stat_ r) ->
bool {
return l <= r; },