37 std::pair<bool, double>
bound = std::make_pair<bool, double>(
false, 0);
51template<
typename Index_,
typename Stat_,
class Cmp_>
52std::vector<Index_> create_semisorted_indices(
size_t n,
const Stat_* statistic, Cmp_ cmp,
size_t top) {
53 std::vector<Index_> collected(n);
54 std::iota(collected.begin(), collected.end(),
static_cast<Index_
>(0));
55 auto cBegin = collected.begin(), cMid = cBegin + top - 1, cEnd = collected.end();
56 std::nth_element(cBegin, cMid, cEnd, [&](Index_ l, Index_ r) ->
bool {
57 auto L = statistic[l], R = statistic[r];
67template<
typename Stat_,
class Output_,
class Cmp_,
class CmpEqual_>
68void choose_highly_variable_genes(
size_t n,
const Stat_* statistic, Output_* output, Cmp_ cmp, CmpEqual_ cmpeq,
const ChooseHighlyVariableGenesOptions& options) {
69 if (options.top == 0) {
70 std::fill_n(output, n,
false);
74 Stat_ bound = options.bound.second;
75 if (options.top >= n) {
76 if (options.bound.first) {
77 for (
size_t i = 0; i < n; ++i) {
78 output[i] = cmp(statistic[i], bound);
81 std::fill_n(output, n,
true);
86 auto collected = create_semisorted_indices<size_t>(n, statistic, cmp, options.top);
87 Stat_ threshold = statistic[collected[options.top - 1]];
89 if (options.keep_ties) {
90 if (options.bound.first && !cmp(threshold, bound)) {
91 for (
size_t i = 0; i < n; ++i) {
92 output[i] = cmp(statistic[i], bound);
95 for (
size_t i = 0; i < n; ++i) {
96 output[i] = cmpeq(statistic[i], threshold);
102 std::fill_n(output, n,
false);
103 size_t counter = options.top;
104 if (options.bound.first && !cmp(threshold, bound)) {
106 while (counter > 0) {
108 if (cmp(statistic[collected[counter]], bound)) {
115 for (
size_t i = 0; i < counter; ++i) {
116 output[collected[i]] =
true;
120template<
typename Index_,
typename Stat_,
class Cmp_,
class CmpEqual_>
122 std::vector<Index_> output;
123 if (options.top == 0) {
127 Stat_ bound = options.bound.second;
128 if (options.top >= n) {
129 if (options.bound.first) {
130 for (
size_t i = 0; i < n; ++i) {
131 if (options.bound.first && cmp(statistic[i], bound)) {
137 std::iota(output.begin(), output.end(),
static_cast<Index_
>(0));
142 output = create_semisorted_indices<Index_>(n, statistic, cmp, options.top);
143 Stat_ threshold = statistic[output[options.top - 1]];
145 if (options.keep_ties) {
147 if (options.bound.first && !cmp(threshold, bound)) {
148 for (
size_t i = 0; i < n; ++i) {
149 if (cmp(statistic[i], bound)) {
154 for (
size_t i = 0; i < n; ++i) {
155 if (cmpeq(statistic[i], threshold)) {
163 size_t counter = options.top;
164 if (options.bound.first && !cmp(threshold, bound)) {
166 while (counter > 0) {
168 if (cmp(statistic[output[counter]], bound)) {
175 output.resize(counter);
176 std::sort(output.begin(), output.end());
195template<
typename Stat_,
typename Bool_>
198 internal::choose_highly_variable_genes(
202 [](Stat_ l, Stat_ r) ->
bool {
return l > r; },
203 [](Stat_ l, Stat_ r) ->
bool {
return l >= r; },
207 internal::choose_highly_variable_genes(
211 [](Stat_ l, Stat_ r) ->
bool {
return l < r; },
212 [](Stat_ l, Stat_ r) ->
bool {
return l <= r; },
228template<
typename Bool_ = u
int8_t,
typename Stat_>
230 std::vector<Bool_> output(n);
246template<
typename Index_,
typename Stat_>
249 return internal::choose_highly_variable_genes_index<Index_>(
257 return internal::choose_highly_variable_genes_index<Index_>(