25template<
typename Stat_>
64template<
typename Input_>
65using I =
typename std::remove_cv<typename std::remove_reference<Input_>::type>::type;
67template<
bool keep_index_,
typename Index_,
typename Stat_,
class Output_,
class Cmp_>
68void filter_genes_by_threshold(
const Index_ n,
const Stat_* statistic, Output_& output,
const Cmp_ cmp,
const Stat_ threshold) {
70 for (Index_ i = 0; i < n; ++i) {
71 const bool ok = cmp(statistic[i], threshold);
72 if constexpr(keep_index_) {
82template<
bool keep_index_,
typename Index_,
typename Stat_,
class Output_,
class Cmp_>
83void select_top_genes_by_threshold(
const Index_ top,
const Stat_* statistic, Output_& output,
const Cmp_ cmp,
const Stat_ threshold,
const std::vector<Index_>& semi_sorted) {
87 const auto pos = semi_sorted[counter];
88 if (cmp(statistic[pos], threshold)) {
89 if constexpr(keep_index_) {
90 output.push_back(pos);
98template<
bool keep_index_,
typename Index_,
typename Stat_,
class Output_,
class CmpNotEqual_,
class CmpEqual_>
99void pick_top_genes(
const Index_ n,
const Stat_* statistic,
const Index_ top, Output_& output,
const CmpNotEqual_ cmpne,
const CmpEqual_ cmpeq,
const PickTopGenesOptions<Stat_>& options) {
101 if constexpr(keep_index_) {
104 std::fill_n(output, n,
false);
110 if constexpr(std::numeric_limits<Stat_>::has_quiet_NaN) {
111 if (options.check_nan) {
112 for (Index_ i = 0; i < n; ++i) {
113 num_nan += std::isnan(statistic[i]);
118 if (top >= n - num_nan) {
119 if (options.bound.has_value()) {
120 if (options.open_bound) {
121 filter_genes_by_threshold<keep_index_>(n, statistic, output, cmpne, *(options.bound));
123 filter_genes_by_threshold<keep_index_>(n, statistic, output, cmpeq, *(options.bound));
125 }
else if (num_nan == 0) {
126 if constexpr(keep_index_) {
127 sanisizer::resize(output, n);
128 std::iota(output.begin(), output.end(),
static_cast<Index_
>(0));
130 std::fill_n(output, n,
true);
133 if constexpr(keep_index_) {
134 output.reserve(n - num_nan);
135 for (Index_ i = 0; i < n; ++i) {
136 if (!std::isnan(statistic[i])) {
141 for (Index_ i = 0; i < n; ++i) {
142 output[i] = !std::isnan(statistic[i]);
149 std::vector<Index_> semi_sorted;
151 sanisizer::resize(semi_sorted, n);
152 std::iota(semi_sorted.begin(), semi_sorted.end(),
static_cast<Index_
>(0));
154 semi_sorted.reserve(n - num_nan);
155 for (Index_ i = 0; i < n; ++i) {
156 if (!std::isnan(statistic[i])) {
157 semi_sorted.push_back(i);
162 const auto cBegin = semi_sorted.begin(), cMid = cBegin + top - 1, cEnd = semi_sorted.end();
163 std::nth_element(cBegin, cMid, cEnd, [&](
const Index_ l,
const Index_ r) ->
bool {
164 const auto L = statistic[l], R = statistic[r];
171 const Stat_ threshold = statistic[*cMid];
173 if (options.keep_ties) {
174 if (options.bound.has_value()) {
175 const auto bound = *(options.bound);
177 if (options.open_bound) {
178 if (!cmpne(threshold, bound)) {
179 filter_genes_by_threshold<keep_index_>(n, statistic, output, cmpne, *(options.bound));
183 if (!cmpeq(threshold, bound)) {
184 filter_genes_by_threshold<keep_index_>(n, statistic, output, cmpeq, *(options.bound));
190 filter_genes_by_threshold<keep_index_>(n, statistic, output, cmpeq, threshold);
194 if constexpr(keep_index_) {
195 output.reserve(sanisizer::cast<I<
decltype(output.size())> >(top));
197 std::fill_n(output, n,
false);
200 if (options.bound.has_value()) {
202 if (options.open_bound) {
203 select_top_genes_by_threshold<keep_index_>(
static_cast<Index_
>(top), statistic, output, cmpne, *(options.bound), semi_sorted);
205 select_top_genes_by_threshold<keep_index_>(
static_cast<Index_
>(top), statistic, output, cmpeq, *(options.bound), semi_sorted);
208 if constexpr(keep_index_) {
209 output.insert(output.end(), semi_sorted.begin(), semi_sorted.begin() + top);
211 for (I<
decltype(top)> i = 0; i < top; ++i) {
212 output[semi_sorted[i]] =
true;
217 if constexpr(keep_index_) {
218 std::sort(output.begin(), output.end());
240template<
typename Stat_,
typename Bool_>
243 internal::pick_top_genes<false>(
248 [](Stat_ l, Stat_ r) ->
bool {
return l > r; },
249 [](Stat_ l, Stat_ r) ->
bool {
return l >= r; },
253 internal::pick_top_genes<false>(
258 [](Stat_ l, Stat_ r) ->
bool {
return l < r; },
259 [](Stat_ l, Stat_ r) ->
bool {
return l <= r; },
278template<
typename Bool_,
typename Stat_>
280 auto output = sanisizer::create<std::vector<Bool_> >(n
281#ifdef SCRAN_VARIANCES_TEST_INIT
282 , SCRAN_VARIANCES_TEST_INIT
285 pick_top_genes(n, statistic, top, larger, output.data(), options);
303template<
typename Index_,
typename Stat_>
305 std::vector<Index_> output;
307 internal::pick_top_genes<true>(
312 [](Stat_ l, Stat_ r) ->
bool {
return l > r; },
313 [](Stat_ l, Stat_ r) ->
bool {
return l >= r; },
317 internal::pick_top_genes<true>(
322 [](Stat_ l, Stat_ r) ->
bool {
return l < r; },
323 [](Stat_ l, Stat_ r) ->
bool {
return l <= r; },