scran_graph_cluster
Graph-based clustering of cells
Loading...
Searching...
No Matches
build_snn_graph.hpp
Go to the documentation of this file.
1#ifndef SCRAN_GRAPH_CLUSTER_BUILD_SNN_GRAPH_HPP
2#define SCRAN_GRAPH_CLUSTER_BUILD_SNN_GRAPH_HPP
3
4#include <vector>
5#include <algorithm>
6#include <memory>
7
9
10#if __has_include("igraph.h")
11#include "raiigraph/raiigraph.hpp"
12#include "edges_to_graph.hpp"
13#endif
14
21
41enum class SnnWeightScheme : char { RANKED, NUMBER, JACCARD };
42
52 int num_neighbors = 10;
53
57 SnnWeightScheme weighting_scheme = SnnWeightScheme::RANKED;
58
63 int num_threads = 1;
64};
65
72template<typename Node_, typename Weight_>
77 size_t num_cells;
78
83 std::vector<Node_> edges;
84
89 std::vector<Weight_> weights;
90};
91
92#if __has_include("igraph.h")
93typedef igraph_integer_t DefaultNode;
94typedef igraph_real_t DefaultWeight;
95#else
100typedef int DefaultNode;
101
106typedef double DefaultWeight;
107#endif
108
132template<typename Node_ = DefaultNode, typename Weight_ = DefaultWeight, class GetNeighbors_, class GetIndex_>
133void build_snn_graph(size_t num_cells, GetNeighbors_ get_neighbors, GetIndex_ get_index, const BuildSnnGraphOptions& options, BuildSnnGraphResults<Node_, Weight_>& output) {
134 // Reverse mapping is not parallel-frendly, so we don't construct this with the neighbor search.
135 std::vector<std::vector<Node_> > simple_hosts;
136 std::vector<std::vector<std::pair<Node_, Weight_> > > ranked_hosts;
137
138 if (options.weighting_scheme == SnnWeightScheme::RANKED) {
139 ranked_hosts.resize(num_cells);
140 for (size_t i = 0; i < num_cells; ++i) {
141 ranked_hosts[i].emplace_back(i, 0); // each point is its own 0-th nearest neighbor
142 const auto& current = get_neighbors(i);
143 Weight_ rank = 1;
144 for (auto x : current) {
145 ranked_hosts[get_index(x)].emplace_back(i, rank);
146 ++rank;
147 }
148 }
149 } else {
150 simple_hosts.resize(num_cells);
151 for (size_t i = 0; i < num_cells; ++i) {
152 simple_hosts[i].emplace_back(i); // each point is its own 0-th nearest neighbor
153 const auto& current = get_neighbors(i);
154 for (auto x : current) {
155 simple_hosts[get_index(x)].emplace_back(i);
156 }
157 }
158 }
159
160 // Constructing the shared neighbor graph.
161 std::vector<std::vector<Node_> > edge_stores(num_cells);
162 std::vector<std::vector<Weight_> > weight_stores(num_cells);
163
164 knncolle::parallelize(options.num_threads, num_cells, [&](int, size_t start, size_t length) -> void {
165 std::vector<Weight_> current_score(num_cells);
166 std::vector<Node_> current_added;
167 current_added.reserve(num_cells);
168
169 for (size_t j = start, end = start + length; j < end; ++j) {
170 const Node_ j_cast = j;
171
172 const auto& current_neighbors = get_neighbors(j);
173 const size_t nneighbors = current_neighbors.size();
174
175 for (size_t i = 0; i <= nneighbors; ++i) {
176 // First iteration treats 'j' as the zero-th neighbor.
177 // Remaining iterations go through the neighbors of 'j'.
178 const Node_ cur_neighbor = (i == 0 ? j : get_index(current_neighbors[i-1]));
179
180 // Going through all observations 'h' for which 'cur_neighbor'
181 // is a nearest neighbor, a.k.a., 'cur_neighbor' is a shared
182 // neighbor of both 'h' and 'j'.
183 if (options.weighting_scheme == SnnWeightScheme::RANKED) {
184 Weight_ i_cast = i;
185 for (const auto& h : ranked_hosts[cur_neighbor]) {
186 auto othernode = h.first;
187 if (othernode < j_cast) { // avoid duplicates from symmetry in the SNN calculations.
188 auto& existing_other = current_score[othernode];
189
190 // Recording the lowest combined rank per neighbor (casting to avoid overflow on Node_).
191 Weight_ currank = h.second + i_cast;
192 if (existing_other == 0) {
193 existing_other = currank;
194 current_added.push_back(othernode);
195 } else if (existing_other > currank) {
196 existing_other = currank;
197 }
198 }
199 }
200
201 } else {
202 for (const auto& othernode : simple_hosts[cur_neighbor]) {
203 if (othernode < j_cast) { // avoid duplicates from symmetry in the SNN calculations.
204 auto& existing_other = current_score[othernode];
205
206 // Recording the number of shared neighbors.
207 if (existing_other == 0) {
208 current_added.push_back(othernode);
209 }
210 ++existing_other;
211 }
212 }
213 }
214 }
215
216 // Converting to edges.
217 auto& current_edges = edge_stores[j];
218 current_edges.reserve(current_added.size() * 2);
219 auto& current_weights = weight_stores[j];
220 current_weights.reserve(current_added.size());
221
222 for (auto othernode : current_added) {
223 Weight_& otherscore = current_score[othernode];
224 Weight_ finalscore;
225 if (options.weighting_scheme == SnnWeightScheme::RANKED) {
226 finalscore = static_cast<Weight_>(nneighbors) - 0.5 * static_cast<Weight_>(otherscore);
227 } else {
228 finalscore = otherscore;
229 if (options.weighting_scheme == SnnWeightScheme::JACCARD) {
230 finalscore = finalscore / (2 * (nneighbors + 1) - finalscore);
231 }
232 }
233
234 current_edges.push_back(j);
235 current_edges.push_back(othernode);
236 current_weights.push_back(std::max(finalscore, 1e-6)); // Ensuring that an edge with a positive weight is always reported.
237
238 // Resetting all those added to zero.
239 otherscore = 0;
240 }
241 current_added.clear();
242 }
243 });
244
245 // Collating the total number of edges.
246 size_t nedges = 0;
247 for (const auto& w : weight_stores) {
248 nedges += w.size();
249 }
250
251 output.num_cells = num_cells;
252 output.weights.clear();
253 output.weights.reserve(nedges);
254 for (const auto& w : weight_stores) {
255 output.weights.insert(output.weights.end(), w.begin(), w.end());
256 }
257 weight_stores.clear();
258 weight_stores.shrink_to_fit(); // forcibly release memory so that we have some more space for edges.
259
260 output.edges.clear();
261 output.edges.reserve(nedges * 2);
262 for (const auto& e : edge_stores) {
263 output.edges.insert(output.edges.end(), e.begin(), e.end());
264 }
265
266 return;
267}
268
287template<typename Node_ = DefaultNode, typename Weight_ = DefaultWeight, typename Index_, typename Distance_>
291 neighbors.size(),
292 [&](size_t i) -> const auto& { return neighbors[i]; },
293 [](const auto& x) -> Node_ { return x.first; },
294 options,
295 output
296 );
297 return output;
298}
299
314template<typename Node_ = int, typename Weight_ = double, typename Index_>
315BuildSnnGraphResults<Node_, Weight_> build_snn_graph(const std::vector<std::vector<Index_> >& neighbors, const BuildSnnGraphOptions& options) {
318 neighbors.size(),
319 [&](size_t i) -> const std::vector<Index_>& { return neighbors[i]; },
320 [](Index_ x) -> Node_ { return x; },
321 options,
322 output
323 );
324 return output;
325}
326
341template<typename Node_ = DefaultNode, typename Weight_ = DefaultWeight, typename Dim_, typename Index_, typename Float_>
343 auto neighbors = knncolle::find_nearest_neighbors_index_only(prebuilt, options.num_neighbors, options.num_threads);
344 return build_snn_graph<Node_, Weight_>(neighbors, options);
345}
346
365template<typename Node_ = DefaultNode, typename Weight_ = DefaultWeight, typename Dim_, typename Index_, typename Value_, typename Float_>
367 Dim_ num_dims,
368 Index_ num_cells,
369 const Value_* data,
371 const BuildSnnGraphOptions& options)
372{
373 auto prebuilt = knn_method.build_unique(knncolle::SimpleMatrix<Dim_, Index_, Value_>(num_dims, num_cells, data));
374 return build_snn_graph<Node_, Weight_>(*prebuilt, options);
375}
376
377#if __has_include("igraph.h")
388template<typename Node_ = DefaultNode, typename Weight_ = DefaultWeight>
389raiigraph::Graph convert_to_graph(const BuildSnnGraphResults<Node_, Weight_>& result) {
390 return edges_to_graph(result.edges.size(), result.edges.data(), result.num_cells, IGRAPH_UNDIRECTED);
391}
392#endif
393
394}
395
396#endif
Convert a list of edges to a graph.
std::vector< std::vector< Index_ > > find_nearest_neighbors_index_only(const Prebuilt< Dim_, Index_, Float_ > &index, int k, int num_threads=1)
std::vector< std::vector< std::pair< Index_, Float_ > > > NeighborList
void parallelize(int num_workers, Task_ num_tasks, Run_ run_task_range)
Graph-based clustering of single-cell data.
Definition build_snn_graph.hpp:20
double DefaultWeight
Definition build_snn_graph.hpp:106
void build_snn_graph(size_t num_cells, GetNeighbors_ get_neighbors, GetIndex_ get_index, const BuildSnnGraphOptions &options, BuildSnnGraphResults< Node_, Weight_ > &output)
Definition build_snn_graph.hpp:133
int DefaultNode
Definition build_snn_graph.hpp:100
SnnWeightScheme
Definition build_snn_graph.hpp:41
Options for SNN graph construction.
Definition build_snn_graph.hpp:46
int num_threads
Definition build_snn_graph.hpp:63
int num_neighbors
Definition build_snn_graph.hpp:52
SnnWeightScheme weighting_scheme
Definition build_snn_graph.hpp:57
Results of SNN graph construction.
Definition build_snn_graph.hpp:73
std::vector< Node_ > edges
Definition build_snn_graph.hpp:83
std::vector< Weight_ > weights
Definition build_snn_graph.hpp:89
size_t num_cells
Definition build_snn_graph.hpp:77