60template<
typename Index_,
typename Stat_,
class Cmp_>
61std::vector<Index_> create_semisorted_indices(
size_t n,
const Stat_* statistic, Cmp_ cmp,
size_t top) {
62 std::vector<Index_> collected(n);
63 std::iota(collected.begin(), collected.end(),
static_cast<Index_
>(0));
64 auto cBegin = collected.begin(), cMid = cBegin + top - 1, cEnd = collected.end();
65 std::nth_element(cBegin, cMid, cEnd, [&](Index_ l, Index_ r) ->
bool {
66 auto L = statistic[l], R = statistic[r];
76template<
typename Stat_,
class Output_,
class Cmp_,
class CmpEqual_>
77void choose_highly_variable_genes(
size_t n,
const Stat_* statistic, Output_* output, Cmp_ cmp, CmpEqual_ cmpeq,
const ChooseHighlyVariableGenesOptions& options) {
78 if (options.top == 0) {
79 std::fill_n(output, n,
false);
83 Stat_ bound = options.bound;
84 if (options.top >= n) {
85 if (options.use_bound) {
86 for (
size_t i = 0; i < n; ++i) {
87 output[i] = cmp(statistic[i], bound);
90 std::fill_n(output, n,
true);
95 auto collected = create_semisorted_indices<size_t>(n, statistic, cmp, options.top);
96 Stat_ threshold = statistic[collected[options.top - 1]];
98 if (options.keep_ties) {
99 if (options.use_bound && !cmp(threshold, bound)) {
100 for (
size_t i = 0; i < n; ++i) {
101 output[i] = cmp(statistic[i], bound);
104 for (
size_t i = 0; i < n; ++i) {
105 output[i] = cmpeq(statistic[i], threshold);
111 std::fill_n(output, n,
false);
112 size_t counter = options.top;
113 if (options.use_bound && !cmp(threshold, bound)) {
115 while (counter > 0) {
117 if (cmp(statistic[collected[counter]], bound)) {
124 for (
size_t i = 0; i < counter; ++i) {
125 output[collected[i]] =
true;
129template<
typename Index_,
typename Stat_,
class Cmp_,
class CmpEqual_>
131 std::vector<Index_> output;
132 if (options.top == 0) {
136 Stat_ bound = options.bound;
137 if (options.top >= n) {
138 if (options.use_bound) {
139 for (
size_t i = 0; i < n; ++i) {
140 if (options.use_bound && cmp(statistic[i], bound)) {
146 std::iota(output.begin(), output.end(),
static_cast<Index_
>(0));
151 output = create_semisorted_indices<Index_>(n, statistic, cmp, options.top);
152 Stat_ threshold = statistic[output[options.top - 1]];
154 if (options.keep_ties) {
156 if (options.use_bound && !cmp(threshold, bound)) {
157 for (
size_t i = 0; i < n; ++i) {
158 if (cmp(statistic[i], bound)) {
163 for (
size_t i = 0; i < n; ++i) {
164 if (cmpeq(statistic[i], threshold)) {
172 size_t counter = options.top;
173 if (options.use_bound && !cmp(threshold, bound)) {
175 while (counter > 0) {
177 if (cmp(statistic[output[counter]], bound)) {
184 output.resize(counter);
185 std::sort(output.begin(), output.end());
204template<
typename Stat_,
typename Bool_>
207 internal::choose_highly_variable_genes(
211 [](Stat_ l, Stat_ r) ->
bool {
return l > r; },
212 [](Stat_ l, Stat_ r) ->
bool {
return l >= r; },
216 internal::choose_highly_variable_genes(
220 [](Stat_ l, Stat_ r) ->
bool {
return l < r; },
221 [](Stat_ l, Stat_ r) ->
bool {
return l <= r; },
237template<
typename Bool_ = u
int8_t,
typename Stat_>
239 std::vector<Bool_> output(n);
255template<
typename Index_,
typename Stat_>
258 return internal::choose_highly_variable_genes_index<Index_>(
266 return internal::choose_highly_variable_genes_index<Index_>(