topicks
Pick top genes for downstream analyses
Loading...
Searching...
No Matches
TopQueue.hpp
Go to the documentation of this file.
1#ifndef TOPICKS_TOP_QUEUE_HPP
2#define TOPICKS_TOP_QUEUE_HPP
3
4#include <vector>
5#include <algorithm>
6#include <optional>
7#include <limits>
8#include <cmath>
9
10#include "sanisizer/sanisizer.hpp"
11
17namespace topicks {
18
23template<typename Stat_>
36 std::optional<Stat_> bound;
37
42 bool open_bound = true;
43
48 bool keep_ties = true;
49
54 bool check_nan = false;
55};
56
66template<typename Stat_, typename Index_>
67class TopQueue {
68private:
69 typedef std::pair<Stat_, Index_> Element;
70 std::vector<Element> my_queue, my_ties;
71
72 struct SortGreater {
73 bool operator()(const Element& left, const Element& right) const {
74 if (left.first == right.first) {
75 return left.second < right.second;
76 } else {
77 return left.first > right.first;
78 }
79 }
80 };
81
82 struct SortLess {
83 bool operator()(const Element& left, const Element& right) const {
84 if (left.first == right.first) {
85 return left.second < right.second;
86 } else {
87 return left.first < right.first;
88 }
89 }
90 };
91
92 struct SortTies {
93 bool operator()(const Element& left, const Element& right) const {
94 return left.second < right.second;
95 }
96 };
97
98 Index_ my_top;
99 bool my_larger;
100 std::optional<Stat_> my_bound;
101 bool my_open_bound;
102 bool my_keep_ties;
103 bool my_check_nan;
104
105private:
106 template<class Compare_, class Skip_>
107 void push_internal(std::pair<Stat_, Index_> gene, const Compare_ cmp, const Skip_ skip) {
108 if (my_bound.has_value()) {
109 if (skip(*my_bound, gene.first)) {
110 return;
111 } else if (my_open_bound && *my_bound == gene.first) {
112 return;
113 }
114 }
115
116 if constexpr(std::numeric_limits<Stat_>::has_quiet_NaN) {
117 if (my_check_nan && std::isnan(gene.first)) {
118 return;
119 }
120 }
121
122 if (sanisizer::is_less_than(my_queue.size(), my_top)) {
123 my_queue.push_back(std::move(gene));
124 std::push_heap(my_queue.begin(), my_queue.end(), cmp);
125 return;
126 }
127
128 if (my_top == 0) {
129 return;
130 }
131 const auto& top = my_queue.front();
132 if (skip(top.first, gene.first)) {
133 return;
134 }
135
136 if (!my_keep_ties) {
137 if (top.first != gene.first || gene.second < top.second) {
138 std::pop_heap(my_queue.begin(), my_queue.end(), cmp);
139 my_queue.back() = std::move(gene);
140 std::push_heap(my_queue.begin(), my_queue.end(), cmp);
141 }
142 return;
143 }
144
145 if (top.first == gene.first) {
146 // Always making sure that all elements in my_ties are of equal or lower priority than the tied element in my_queue.
147 if (gene.second >= top.second) {
148 my_ties.push_back(std::move(gene));
149 std::push_heap(my_ties.begin(), my_ties.end(), SortTies());
150 } else {
151 auto retop = top;
152 std::pop_heap(my_queue.begin(), my_queue.end(), cmp);
153 my_queue.back() = std::move(gene);
154 std::push_heap(my_queue.begin(), my_queue.end(), cmp);
155 my_ties.push_back(std::move(retop));
156 std::push_heap(my_ties.begin(), my_ties.end(), SortTies());
157 }
158 return;
159 }
160
161 if (my_top == 1) {
162 my_queue.front() = std::move(gene);
163 return;
164 }
165
166 // The idea is to check whether the top 2 elements of the queue are tied,
167 // and if so, shift the top value into the tie queue.
168 auto retop = top;
169 std::pop_heap(my_queue.begin(), my_queue.end(), cmp);
170 if (retop.first == my_queue.front().first) {
171 my_ties.push_back(std::move(retop));
172 std::push_heap(my_ties.begin(), my_ties.end(), SortTies());
173 } else {
174 my_ties.clear();
175 }
176 my_queue.back() = std::move(gene);
177 std::push_heap(my_queue.begin(), my_queue.end(), cmp);
178 }
179
180 template<class Compare_>
181 void pop_internal(Compare_ cmp) {
182 if (my_ties.size()) {
183 std::pop_heap(my_ties.begin(), my_ties.end(), SortTies());
184 my_ties.pop_back();
185 } else {
186 std::pop_heap(my_queue.begin(), my_queue.end(), cmp);
187 my_queue.pop_back();
188 }
189 }
190
191public:
198 TopQueue(const Index_ top, const bool larger, const TopQueueOptions<Stat_>& options) :
199 my_top(top),
200 my_larger(larger),
201 my_bound(options.bound),
202 my_open_bound(options.open_bound),
203 my_keep_ties(options.keep_ties),
204 my_check_nan(options.check_nan)
205 {}
206
211 void push(std::pair<Stat_, Index_> gene) {
212 if (my_larger) {
213 push_internal(
214 std::move(gene),
215 SortGreater(),
216 [](const Stat_ existing, const Stat_ latest) -> bool { return existing > latest; }
217 );
218 } else {
219 push_internal(
220 std::move(gene),
221 SortLess(),
222 [](const Stat_ existing, const Stat_ latest) -> bool { return existing < latest; }
223 );
224 }
225 }
226
233 void emplace(Stat_ statistic, Index_ index) {
234 push({ statistic, index });
235 }
236
241 Index_ size() const {
242 return my_queue.size() + my_ties.size();
243 }
244
248 bool empty() const {
249 return size() == 0;
250 }
251
258 const std::pair<Stat_, Index_>& top() const {
259 if (my_ties.size()) {
260 return my_ties.front();
261 } else {
262 return my_queue.front();
263 }
264 }
265
270 void pop() {
271 if (my_larger) {
272 pop_internal(SortGreater());
273 } else {
274 pop_internal(SortLess());
275 }
276 }
277};
278
279}
280
281#endif
Priority queue for retaining top genes.
Definition TopQueue.hpp:67
void push(std::pair< Stat_, Index_ > gene)
Definition TopQueue.hpp:211
void pop()
Definition TopQueue.hpp:270
void emplace(Stat_ statistic, Index_ index)
Definition TopQueue.hpp:233
const std::pair< Stat_, Index_ > & top() const
Definition TopQueue.hpp:258
TopQueue(const Index_ top, const bool larger, const TopQueueOptions< Stat_ > &options)
Definition TopQueue.hpp:198
bool empty() const
Definition TopQueue.hpp:248
Index_ size() const
Definition TopQueue.hpp:241
Pick top genes from their statistics.
Definition pick_top_genes.hpp:19
Options for TopQueue.
Definition TopQueue.hpp:24
std::optional< Stat_ > bound
Definition TopQueue.hpp:36
bool check_nan
Definition TopQueue.hpp:54
bool open_bound
Definition TopQueue.hpp:42
bool keep_ties
Definition TopQueue.hpp:48