scran_graph_cluster
Graph-based clustering of cells
Loading...
Searching...
No Matches
build_snn_graph.hpp
Go to the documentation of this file.
1#ifndef SCRAN_GRAPH_CLUSTER_BUILD_SNN_GRAPH_HPP
2#define SCRAN_GRAPH_CLUSTER_BUILD_SNN_GRAPH_HPP
3
4#include <vector>
5#include <algorithm>
6#include <memory>
7#include <cstddef>
8
10
11#if __has_include("igraph.h")
12#include "raiigraph/raiigraph.hpp"
13#include "edges_to_graph.hpp"
14#endif
15
22
42enum class SnnWeightScheme : char { RANKED, NUMBER, JACCARD };
43
53 int num_neighbors = 10;
54
58 SnnWeightScheme weighting_scheme = SnnWeightScheme::RANKED;
59
64 int num_threads = 1;
65};
66
73template<typename Node_, typename Weight_>
78 Node_ num_cells;
79
84 std::vector<Node_> edges;
85
90 std::vector<Weight_> weights;
91};
92
93#if __has_include("igraph.h")
94typedef igraph_integer_t DefaultNode;
95typedef igraph_real_t DefaultWeight;
96#else
101typedef int DefaultNode;
102
107typedef double DefaultWeight;
108#endif
109
134template<typename Node_ = DefaultNode, typename Weight_ = DefaultWeight, typename Index_, class GetNeighbors_, class GetIndex_>
135void build_snn_graph(Index_ num_cells, GetNeighbors_ get_neighbors, GetIndex_ get_index, const BuildSnnGraphOptions& options, BuildSnnGraphResults<Node_, Weight_>& output) {
136 // Reverse mapping is not parallel-frendly, so we don't construct this with the neighbor search.
137 std::vector<std::vector<Node_> > simple_hosts;
138 std::vector<std::vector<std::pair<Node_, Weight_> > > ranked_hosts;
139
140 if (options.weighting_scheme == SnnWeightScheme::RANKED) {
141 ranked_hosts.resize(num_cells);
142 for (Index_ i = 0; i < num_cells; ++i) {
143 ranked_hosts[i].emplace_back(i, 0); // each point is its own 0-th nearest neighbor
144 const auto& current = get_neighbors(i);
145 Weight_ rank = 1;
146 for (auto x : current) {
147 ranked_hosts[get_index(x)].emplace_back(i, rank);
148 ++rank;
149 }
150 }
151 } else {
152 simple_hosts.resize(num_cells);
153 for (Index_ i = 0; i < num_cells; ++i) {
154 simple_hosts[i].emplace_back(i); // each point is its own 0-th nearest neighbor
155 const auto& current = get_neighbors(i);
156 for (auto x : current) {
157 simple_hosts[get_index(x)].emplace_back(i);
158 }
159 }
160 }
161
162 // Constructing the shared neighbor graph.
163 std::vector<std::vector<Node_> > edge_stores(num_cells);
164 std::vector<std::vector<Weight_> > weight_stores(num_cells);
165
166 knncolle::parallelize(options.num_threads, num_cells, [&](int, Index_ start, Index_ length) -> void {
167 std::vector<Weight_> current_score(num_cells);
168 std::vector<Node_> current_added;
169 current_added.reserve(num_cells);
170
171 for (Index_ j = start, end = start + length; j < end; ++j) {
172 const Node_ j_cast = j;
173
174 const auto& current_neighbors = get_neighbors(j);
175 auto nneighbors = current_neighbors.size();
176
177 for (decltype(nneighbors) i = 0; i <= nneighbors; ++i) {
178 // First iteration treats 'j' as the zero-th neighbor.
179 // Remaining iterations go through the neighbors of 'j'.
180 const Node_ cur_neighbor = (i == 0 ? j : get_index(current_neighbors[i-1]));
181
182 // Going through all observations 'h' for which 'cur_neighbor'
183 // is a nearest neighbor, a.k.a., 'cur_neighbor' is a shared
184 // neighbor of both 'h' and 'j'.
185 if (options.weighting_scheme == SnnWeightScheme::RANKED) {
186 Weight_ i_cast = i;
187 for (const auto& h : ranked_hosts[cur_neighbor]) {
188 auto othernode = h.first;
189 if (othernode < j_cast) { // avoid duplicates from symmetry in the SNN calculations.
190 auto& existing_other = current_score[othernode];
191
192 // Recording the lowest combined rank per neighbor (casting to avoid overflow on Node_).
193 Weight_ currank = h.second + i_cast;
194 if (existing_other == 0) {
195 existing_other = currank;
196 current_added.push_back(othernode);
197 } else if (existing_other > currank) {
198 existing_other = currank;
199 }
200 }
201 }
202
203 } else {
204 for (const auto& othernode : simple_hosts[cur_neighbor]) {
205 if (othernode < j_cast) { // avoid duplicates from symmetry in the SNN calculations.
206 auto& existing_other = current_score[othernode];
207
208 // Recording the number of shared neighbors.
209 if (existing_other == 0) {
210 current_added.push_back(othernode);
211 }
212 ++existing_other;
213 }
214 }
215 }
216 }
217
218 // Converting to edges.
219 auto& current_edges = edge_stores[j];
220 current_edges.reserve(current_added.size() * 2);
221 auto& current_weights = weight_stores[j];
222 current_weights.reserve(current_added.size());
223
224 for (auto othernode : current_added) {
225 Weight_& otherscore = current_score[othernode];
226 Weight_ finalscore;
227 if (options.weighting_scheme == SnnWeightScheme::RANKED) {
228 finalscore = static_cast<Weight_>(nneighbors) - 0.5 * static_cast<Weight_>(otherscore);
229 } else {
230 finalscore = otherscore;
231 if (options.weighting_scheme == SnnWeightScheme::JACCARD) {
232 finalscore = finalscore / (2 * (nneighbors + 1) - finalscore);
233 }
234 }
235
236 current_edges.push_back(j);
237 current_edges.push_back(othernode);
238 current_weights.push_back(std::max(finalscore, 1e-6)); // Ensuring that an edge with a positive weight is always reported.
239
240 // Resetting all those added to zero.
241 otherscore = 0;
242 }
243 current_added.clear();
244 }
245 });
246
247 // Collating the total number of edges.
248 std::size_t nedges = 0;
249 for (const auto& w : weight_stores) {
250 nedges += w.size();
251 }
252
253 output.num_cells = num_cells;
254 output.weights.clear();
255 output.weights.reserve(nedges);
256 for (const auto& w : weight_stores) {
257 output.weights.insert(output.weights.end(), w.begin(), w.end());
258 }
259 weight_stores.clear();
260 weight_stores.shrink_to_fit(); // forcibly release memory so that we have some more space for edges.
261
262 output.edges.clear();
263 output.edges.reserve(nedges * 2);
264 for (const auto& e : edge_stores) {
265 output.edges.insert(output.edges.end(), e.begin(), e.end());
266 }
267
268 return;
269}
270
289template<typename Node_ = DefaultNode, typename Weight_ = DefaultWeight, typename Index_, typename Distance_>
293 static_cast<Index_>(neighbors.size()),
294 [&](Index_ i) -> const std::vector<std::pair<Index_, Distance_> >& { return neighbors[i]; },
295 [](const std::pair<Index_, Distance_>& x) -> Index_ { return x.first; },
296 options,
297 output
298 );
299 return output;
300}
301
316template<typename Node_ = int, typename Weight_ = double, typename Index_>
317BuildSnnGraphResults<Node_, Weight_> build_snn_graph(const std::vector<std::vector<Index_> >& neighbors, const BuildSnnGraphOptions& options) {
320 static_cast<Index_>(neighbors.size()),
321 [&](Index_ i) -> const std::vector<Index_>& { return neighbors[i]; },
322 [](Index_ x) -> Index_ { return x; },
323 options,
324 output
325 );
326 return output;
327}
328
344template<typename Node_ = DefaultNode, typename Weight_ = DefaultWeight, typename Index_, typename Input_, typename Distance_>
346 auto neighbors = knncolle::find_nearest_neighbors_index_only(prebuilt, options.num_neighbors, options.num_threads);
347 return build_snn_graph<Node_, Weight_>(neighbors, options);
348}
349
369template<typename Node_ = DefaultNode, typename Weight_ = DefaultWeight, typename Index_, typename Input_, typename Distance_, class Matrix_ = knncolle::Matrix<Index_, Input_> >
371 std::size_t num_dims,
372 Index_ num_cells,
373 const Input_* data,
375 const BuildSnnGraphOptions& options)
376{
377 auto prebuilt = knn_method.build_unique(knncolle::SimpleMatrix(num_dims, num_cells, data));
378 return build_snn_graph<Node_, Weight_>(*prebuilt, options);
379}
380
381#if __has_include("igraph.h")
392template<typename Node_ = DefaultNode, typename Weight_ = DefaultWeight>
393raiigraph::Graph convert_to_graph(const BuildSnnGraphResults<Node_, Weight_>& result) {
394 return edges_to_graph(result.edges.size(), result.edges.data(), result.num_cells, IGRAPH_UNDIRECTED);
395}
396#endif
397
398}
399
400#endif
std::unique_ptr< Prebuilt< Index_, Data_, Distance_ > > build_unique(const Matrix_ &data) const
Convert a list of edges to a graph.
void parallelize(int num_workers, Task_ num_tasks, Run_ run_task_range)
std::vector< std::vector< Index_ > > find_nearest_neighbors_index_only(const Prebuilt< Index_, Data_, Distance_ > &index, int k, int num_threads=1)
std::vector< std::vector< std::pair< Index_, Distance_ > > > NeighborList
Graph-based clustering of single-cell data.
Definition build_snn_graph.hpp:21
double DefaultWeight
Definition build_snn_graph.hpp:107
int DefaultNode
Definition build_snn_graph.hpp:101
void build_snn_graph(Index_ num_cells, GetNeighbors_ get_neighbors, GetIndex_ get_index, const BuildSnnGraphOptions &options, BuildSnnGraphResults< Node_, Weight_ > &output)
Definition build_snn_graph.hpp:135
SnnWeightScheme
Definition build_snn_graph.hpp:42
Options for SNN graph construction.
Definition build_snn_graph.hpp:47
int num_threads
Definition build_snn_graph.hpp:64
int num_neighbors
Definition build_snn_graph.hpp:53
SnnWeightScheme weighting_scheme
Definition build_snn_graph.hpp:58
Results of SNN graph construction.
Definition build_snn_graph.hpp:74
Node_ num_cells
Definition build_snn_graph.hpp:78
std::vector< Node_ > edges
Definition build_snn_graph.hpp:84
std::vector< Weight_ > weights
Definition build_snn_graph.hpp:90