scran_graph_cluster
Graph-based clustering of cells
Loading...
Searching...
No Matches
build_snn_graph.hpp
Go to the documentation of this file.
1#ifndef SCRAN_GRAPH_CLUSTER_BUILD_SNN_GRAPH_HPP
2#define SCRAN_GRAPH_CLUSTER_BUILD_SNN_GRAPH_HPP
3
4#include <vector>
5#include <algorithm>
6#include <memory>
7#include <cstddef>
8
10#include "sanisizer/sanisizer.hpp"
11
12#if __has_include("igraph.h")
13#include "raiigraph/raiigraph.hpp"
14#include "edges_to_graph.hpp"
15#endif
16
23
45enum class SnnWeightScheme : char { RANKED, NUMBER, JACCARD };
46
56 int num_neighbors = 10;
57
61 SnnWeightScheme weighting_scheme = SnnWeightScheme::RANKED;
62
67 int num_threads = 1;
68};
69
76template<typename Node_, typename Weight_>
81 Node_ num_cells;
82
87 std::vector<Node_> edges;
88
93 std::vector<Weight_> weights;
94};
95
96#if __has_include("igraph.h")
97typedef igraph_integer_t DefaultNode;
98typedef igraph_real_t DefaultWeight;
99#else
104typedef int DefaultNode;
105
110typedef double DefaultWeight;
111#endif
112
116template<typename Input_>
117std::remove_cv_t<std::remove_reference_t<Input_> > I(Input_ x) {
118 return x;
119}
150template<typename Node_ = DefaultNode, typename Weight_ = DefaultWeight, typename Index_, class GetNeighbors_, class GetIndex_>
151void build_snn_graph(const Index_ num_cells, const GetNeighbors_ get_neighbors, const GetIndex_ get_index, const BuildSnnGraphOptions& options, BuildSnnGraphResults<Node_, Weight_>& output) {
152 // Reverse mapping is not parallel-frendly, so we don't construct this with the neighbor search.
153 std::vector<std::vector<Node_> > simple_hosts;
154 std::vector<std::vector<std::pair<Node_, Weight_> > > ranked_hosts;
155
156 // Check that all implicit casts from Index_ to Node_ will be safe.
157 sanisizer::cast<Node_>(num_cells);
158
159 if (options.weighting_scheme == SnnWeightScheme::RANKED) {
160 sanisizer::resize(ranked_hosts, num_cells);
161 for (Index_ i = 0; i < num_cells; ++i) {
162 ranked_hosts[i].emplace_back(i, 0); // each point is its own 0-th nearest neighbor
163 const auto& current = get_neighbors(i);
164 Weight_ rank = 1;
165 for (const auto& x : current) {
166 ranked_hosts[get_index(x)].emplace_back(i, rank);
167 ++rank;
168 }
169 }
170 } else {
171 sanisizer::resize(simple_hosts, num_cells);
172 for (Index_ i = 0; i < num_cells; ++i) {
173 simple_hosts[i].emplace_back(i); // each point is its own 0-th nearest neighbor
174 const auto& current = get_neighbors(i);
175 for (const auto& x : current) {
176 simple_hosts[get_index(x)].emplace_back(i);
177 }
178 }
179 }
180
181 // Constructing the shared neighbor graph.
182 auto edge_stores = sanisizer::create<std::vector<std::vector<Node_> > >(num_cells);
183 auto weight_stores = sanisizer::create<std::vector<std::vector<Weight_> > >(num_cells);
184
185 knncolle::parallelize(options.num_threads, num_cells, [&](const int, const Index_ start, const Index_ length) -> void {
186 auto current_score = sanisizer::create<std::vector<Weight_> >(num_cells);
187 std::vector<Node_> current_added;
188 current_added.reserve(num_cells);
189
190 for (Index_ j = start, end = start + length; j < end; ++j) {
191 const auto& current_neighbors = get_neighbors(j);
192 const auto nneighbors = current_neighbors.size();
193
194 for (decltype(I(nneighbors)) i = 0; i <= nneighbors; ++i) {
195 // First iteration treats 'j' as the zero-th neighbor.
196 // Remaining iterations go through the neighbors of 'j'.
197 const Node_ cur_neighbor = (i == 0 ? j : get_index(current_neighbors[i-1]));
198
199 // Going through all observations 'h' for which 'cur_neighbor'
200 // is a nearest neighbor, a.k.a., 'cur_neighbor' is a shared
201 // neighbor of both 'h' and 'j'.
202 if (options.weighting_scheme == SnnWeightScheme::RANKED) {
203 for (const auto& h : ranked_hosts[cur_neighbor]) {
204 const auto othernode = h.first;
205 if (othernode < static_cast<Node_>(j)) { // avoid duplicates from symmetry in the SNN calculations.
206 auto& existing_other = current_score[othernode];
207
208 // Recording the lowest combined rank per neighbor.
209 const Weight_ currank = h.second + static_cast<Weight_>(i);
210 if (existing_other == 0) {
211 existing_other = currank;
212 current_added.push_back(othernode);
213 } else if (existing_other > currank) {
214 existing_other = currank;
215 }
216 }
217 }
218
219 } else {
220 for (const auto& othernode : simple_hosts[cur_neighbor]) {
221 if (othernode < static_cast<Node_>(j)) { // avoid duplicates from symmetry in the SNN calculations.
222 auto& existing_other = current_score[othernode];
223
224 // Recording the number of shared neighbors.
225 if (existing_other == 0) {
226 current_added.push_back(othernode);
227 }
228 ++existing_other;
229 }
230 }
231 }
232 }
233
234 // Converting to edges.
235 auto& current_edges = edge_stores[j];
236 current_edges.reserve(current_added.size() * 2);
237 auto& current_weights = weight_stores[j];
238 current_weights.reserve(current_added.size());
239
240 for (const auto othernode : current_added) {
241 current_edges.push_back(j);
242 current_edges.push_back(othernode);
243
244 Weight_& otherscore = current_score[othernode];
245 current_weights.push_back([&]{
246 if (options.weighting_scheme == SnnWeightScheme::RANKED) {
247 const Weight_ preliminary = static_cast<Weight_>(nneighbors) - otherscore / 2;
248 return std::max(preliminary, static_cast<Weight_>(1e-6)); // Ensuring that an edge with a positive weight is always reported.
249 } else if (options.weighting_scheme == SnnWeightScheme::JACCARD) {
250 return otherscore / (2 * (static_cast<Weight_>(nneighbors) + 1) - otherscore);
251 } else {
252 return otherscore;
253 }
254 }());
255
256 // Resetting all those added to zero.
257 otherscore = 0;
258 }
259 current_added.clear();
260 }
261 });
262
263 // Collating the total number of edges.
264 std::size_t nedges = 0;
265 for (const auto& w : weight_stores) {
266 nedges = sanisizer::sum<std::size_t>(nedges, w.size());
267 }
268
269 output.num_cells = num_cells;
270 output.weights.clear();
271 output.weights.reserve(nedges);
272 for (const auto& w : weight_stores) {
273 output.weights.insert(output.weights.end(), w.begin(), w.end());
274 }
275 weight_stores.clear();
276 weight_stores.shrink_to_fit(); // forcibly release memory so that we have some more space for edges.
277
278 output.edges.clear();
279 output.edges.reserve(nedges * 2);
280 for (const auto& e : edge_stores) {
281 output.edges.insert(output.edges.end(), e.begin(), e.end());
282 }
283
284 return;
285}
286
305template<typename Node_ = DefaultNode, typename Weight_ = DefaultWeight, typename Index_, typename Distance_>
309 sanisizer::cast<Index_>(neighbors.size()),
310 [&](const Index_ i) -> const std::vector<std::pair<Index_, Distance_> >& { return neighbors[i]; },
311 [](const std::pair<Index_, Distance_>& x) -> Index_ { return x.first; },
312 options,
313 output
314 );
315 return output;
316}
317
332template<typename Node_ = int, typename Weight_ = double, typename Index_>
333BuildSnnGraphResults<Node_, Weight_> build_snn_graph(const std::vector<std::vector<Index_> >& neighbors, const BuildSnnGraphOptions& options) {
336 sanisizer::cast<Index_>(neighbors.size()),
337 [&](const Index_ i) -> const std::vector<Index_>& { return neighbors[i]; },
338 [](const Index_ x) -> Index_ { return x; },
339 options,
340 output
341 );
342 return output;
343}
344
360template<typename Node_ = DefaultNode, typename Weight_ = DefaultWeight, typename Index_, typename Input_, typename Distance_>
362 const auto neighbors = knncolle::find_nearest_neighbors_index_only(prebuilt, options.num_neighbors, options.num_threads);
363 return build_snn_graph<Node_, Weight_>(neighbors, options);
364}
365
385template<typename Node_ = DefaultNode, typename Weight_ = DefaultWeight, typename Index_, typename Input_, typename Distance_, class Matrix_ = knncolle::Matrix<Index_, Input_> >
387 const std::size_t num_dims,
388 const Index_ num_cells,
389 const Input_* data,
391 const BuildSnnGraphOptions& options)
392{
393 const auto prebuilt = knn_method.build_unique(knncolle::SimpleMatrix(num_dims, num_cells, data));
394 return build_snn_graph<Node_, Weight_>(*prebuilt, options);
395}
396
397#if __has_include("igraph.h")
408template<typename Node_ = DefaultNode, typename Weight_ = DefaultWeight>
409raiigraph::Graph convert_to_graph(const BuildSnnGraphResults<Node_, Weight_>& result) {
410 return edges_to_graph(result.edges.size(), result.edges.data(), result.num_cells, IGRAPH_UNDIRECTED);
411}
412#endif
413
414}
415
416#endif
std::unique_ptr< Prebuilt< Index_, Data_, Distance_ > > build_unique(const Matrix_ &data) const
Convert a list of edges to a graph.
void parallelize(int num_workers, Task_ num_tasks, Run_ run_task_range)
std::vector< std::vector< Index_ > > find_nearest_neighbors_index_only(const Prebuilt< Index_, Data_, Distance_ > &index, int k, int num_threads=1)
std::vector< std::vector< std::pair< Index_, Distance_ > > > NeighborList
Graph-based clustering of single-cell data.
Definition build_snn_graph.hpp:22
void build_snn_graph(const Index_ num_cells, const GetNeighbors_ get_neighbors, const GetIndex_ get_index, const BuildSnnGraphOptions &options, BuildSnnGraphResults< Node_, Weight_ > &output)
Definition build_snn_graph.hpp:151
double DefaultWeight
Definition build_snn_graph.hpp:110
int DefaultNode
Definition build_snn_graph.hpp:104
SnnWeightScheme
Definition build_snn_graph.hpp:45
Options for SNN graph construction.
Definition build_snn_graph.hpp:50
int num_threads
Definition build_snn_graph.hpp:67
int num_neighbors
Definition build_snn_graph.hpp:56
SnnWeightScheme weighting_scheme
Definition build_snn_graph.hpp:61
Results of SNN graph construction.
Definition build_snn_graph.hpp:77
Node_ num_cells
Definition build_snn_graph.hpp:81
std::vector< Node_ > edges
Definition build_snn_graph.hpp:87
std::vector< Weight_ > weights
Definition build_snn_graph.hpp:93