topicks
Pick top genes for downstream analyses
Loading...
Searching...
No Matches
TopQueue.hpp
Go to the documentation of this file.
1#ifndef TOPICKS_TOP_QUEUE_HPP
2#define TOPICKS_TOP_QUEUE_HPP
3
4#include <vector>
5#include <algorithm>
6#include <optional>
7#include <limits>
8#include <cmath>
9#include <cstddef>
10
11#include "sanisizer/sanisizer.hpp"
12
18namespace topicks {
19
24template<typename Stat_>
37 std::optional<Stat_> bound;
38
43 bool open_bound = true;
44
49 bool keep_ties = true;
50
55 bool check_nan = false;
56
62 std::optional<std::size_t> reserve;
63};
64
74template<typename Stat_, typename Index_>
75class TopQueue {
76private:
77 typedef std::pair<Stat_, Index_> Element;
78 std::vector<Element> my_queue, my_ties;
79
80 struct SortGreater {
81 bool operator()(const Element& left, const Element& right) const {
82 if (left.first == right.first) {
83 return left.second < right.second;
84 } else {
85 return left.first > right.first;
86 }
87 }
88 };
89
90 struct SortLess {
91 bool operator()(const Element& left, const Element& right) const {
92 if (left.first == right.first) {
93 return left.second < right.second;
94 } else {
95 return left.first < right.first;
96 }
97 }
98 };
99
100 struct SortTies {
101 bool operator()(const Element& left, const Element& right) const {
102 return left.second < right.second;
103 }
104 };
105
106 Index_ my_top;
107 bool my_larger;
108 std::optional<Stat_> my_bound;
109 bool my_open_bound;
110 bool my_keep_ties;
111 bool my_check_nan;
112
113private:
114 template<class Compare_, class Skip_>
115 void push_internal(std::pair<Stat_, Index_> gene, const Compare_ cmp, const Skip_ skip) {
116 if (my_bound.has_value()) {
117 if (skip(*my_bound, gene.first)) {
118 return;
119 } else if (my_open_bound && *my_bound == gene.first) {
120 return;
121 }
122 }
123
124 if constexpr(std::numeric_limits<Stat_>::has_quiet_NaN) {
125 if (my_check_nan && std::isnan(gene.first)) {
126 return;
127 }
128 }
129
130 if (sanisizer::is_less_than(my_queue.size(), my_top)) {
131 my_queue.push_back(std::move(gene));
132 std::push_heap(my_queue.begin(), my_queue.end(), cmp);
133 return;
134 }
135
136 if (my_top == 0) {
137 return;
138 }
139 const auto& top = my_queue.front();
140 if (skip(top.first, gene.first)) {
141 return;
142 }
143
144 if (!my_keep_ties) {
145 if (top.first != gene.first || gene.second < top.second) {
146 std::pop_heap(my_queue.begin(), my_queue.end(), cmp);
147 my_queue.back() = std::move(gene);
148 std::push_heap(my_queue.begin(), my_queue.end(), cmp);
149 }
150 return;
151 }
152
153 if (top.first == gene.first) {
154 // Always making sure that all elements in my_ties are of equal or lower priority than the tied element in my_queue.
155 if (gene.second >= top.second) {
156 my_ties.push_back(std::move(gene));
157 std::push_heap(my_ties.begin(), my_ties.end(), SortTies());
158 } else {
159 auto retop = top;
160 std::pop_heap(my_queue.begin(), my_queue.end(), cmp);
161 my_queue.back() = std::move(gene);
162 std::push_heap(my_queue.begin(), my_queue.end(), cmp);
163 my_ties.push_back(std::move(retop));
164 std::push_heap(my_ties.begin(), my_ties.end(), SortTies());
165 }
166 return;
167 }
168
169 if (my_top == 1) {
170 my_queue.front() = std::move(gene);
171 return;
172 }
173
174 // The idea is to check whether the top 2 elements of the queue are tied,
175 // and if so, shift the top value into the tie queue.
176 auto retop = top;
177 std::pop_heap(my_queue.begin(), my_queue.end(), cmp);
178 if (retop.first == my_queue.front().first) {
179 my_ties.push_back(std::move(retop));
180 std::push_heap(my_ties.begin(), my_ties.end(), SortTies());
181 } else {
182 my_ties.clear();
183 }
184 my_queue.back() = std::move(gene);
185 std::push_heap(my_queue.begin(), my_queue.end(), cmp);
186 }
187
188 template<class Compare_>
189 void pop_internal(Compare_ cmp) {
190 if (my_ties.size()) {
191 std::pop_heap(my_ties.begin(), my_ties.end(), SortTies());
192 my_ties.pop_back();
193 } else {
194 std::pop_heap(my_queue.begin(), my_queue.end(), cmp);
195 my_queue.pop_back();
196 }
197 }
198
199public:
206 TopQueue(const Index_ top, const bool larger, const TopQueueOptions<Stat_>& options) :
207 my_top(top),
208 my_larger(larger),
209 my_bound(options.bound),
210 my_open_bound(options.open_bound),
211 my_keep_ties(options.keep_ties),
212 my_check_nan(options.check_nan)
213 {
214 if (options.reserve.has_value()) {
215 my_queue.reserve(*(options.reserve));
216 } else {
217 my_queue.reserve(my_top);
218 }
219 }
220
225 void push(std::pair<Stat_, Index_> gene) {
226 if (my_larger) {
227 push_internal(
228 std::move(gene),
229 SortGreater(),
230 [](const Stat_ existing, const Stat_ latest) -> bool { return existing > latest; }
231 );
232 } else {
233 push_internal(
234 std::move(gene),
235 SortLess(),
236 [](const Stat_ existing, const Stat_ latest) -> bool { return existing < latest; }
237 );
238 }
239 }
240
247 void emplace(Stat_ statistic, Index_ index) {
248 push({ statistic, index });
249 }
250
255 Index_ size() const {
256 return my_queue.size() + my_ties.size();
257 }
258
262 bool empty() const {
263 return size() == 0;
264 }
265
272 const std::pair<Stat_, Index_>& top() const {
273 if (my_ties.size()) {
274 return my_ties.front();
275 } else {
276 return my_queue.front();
277 }
278 }
279
284 void pop() {
285 if (my_larger) {
286 pop_internal(SortGreater());
287 } else {
288 pop_internal(SortLess());
289 }
290 }
291};
292
293}
294
295#endif
Priority queue for retaining top genes.
Definition TopQueue.hpp:75
void push(std::pair< Stat_, Index_ > gene)
Definition TopQueue.hpp:225
void pop()
Definition TopQueue.hpp:284
void emplace(Stat_ statistic, Index_ index)
Definition TopQueue.hpp:247
const std::pair< Stat_, Index_ > & top() const
Definition TopQueue.hpp:272
TopQueue(const Index_ top, const bool larger, const TopQueueOptions< Stat_ > &options)
Definition TopQueue.hpp:206
bool empty() const
Definition TopQueue.hpp:262
Index_ size() const
Definition TopQueue.hpp:255
Pick top genes from their statistics.
Definition pick_top_genes.hpp:19
Options for TopQueue.
Definition TopQueue.hpp:25
std::optional< std::size_t > reserve
Definition TopQueue.hpp:62
std::optional< Stat_ > bound
Definition TopQueue.hpp:37
bool check_nan
Definition TopQueue.hpp:55
bool open_bound
Definition TopQueue.hpp:43
bool keep_ties
Definition TopQueue.hpp:49