mumosa
Multi-modal analyses of single-cell data
Loading...
Searching...
No Matches
blocked.hpp
Go to the documentation of this file.
1#ifndef MUMOSA_BLOCKED_HPP
2#define MUMOSA_BLOCKED_HPP
3
4#include <vector>
5#include <algorithm>
6#include <cstddef>
7
9#include "sanisizer/sanisizer.hpp"
11
12#include "simple.hpp"
13#include "utils.hpp"
14
20namespace mumosa {
21
50
58template<typename Distance_>
63 std::vector<Distance_> weights;
64 Distance_ total_weight;
65
66 std::vector<Distance_> distance_buffer;
70};
71
81template<typename Distance_, typename Index_>
82BlockedWorkspace<Distance_> create_workspace(const std::vector<Index_>& block_sizes, const BlockedOptions& options) {
85 output.total_weight = std::accumulate(output.weights.begin(), output.weights.end(), static_cast<Distance_>(0));
86
87 Index_ max_size = 0;
88 if (block_sizes.size()) {
89 max_size = *std::max_element(block_sizes.begin(), block_sizes.end());
90 }
91 sanisizer::resize(output.distance_buffer, max_size);
92
93 return output;
94}
95
138template<typename Index_, typename Input_, typename Distance_>
139std::pair<Distance_, Distance_> compute_distance_blocked(
140 const std::vector<std::shared_ptr<const knncolle::Prebuilt<Index_, Input_, Distance_> > >& prebuilts,
142 const BlockedOptions& options
143) {
144 Options simple_opt;
145 simple_opt.num_neighbors = options.num_neighbors;
146 simple_opt.num_threads = options.num_threads;
147
148 std::pair<Distance_, Distance_> output(0, 0);
149
150 const auto nblocks = prebuilts.size();
151 for (I<decltype(nblocks)> b = 0; b < nblocks; ++b) {
152 const auto curweight = workspace.weights[b];
153 const auto& pbptr = prebuilts[b];
154 if (curweight && pbptr && pbptr->num_observations()) {
155 const auto curdist = compute_distance(*pbptr, workspace.distance_buffer.data(), simple_opt);
156 output.first += curdist.first * curweight;
157 output.second += curdist.second * curweight;
158 }
159 }
160
161 if (workspace.total_weight) {
162 output.first /= workspace.total_weight;
163 output.second /= workspace.total_weight;
164 }
165
166 return output;
167}
168
191template<typename Index_, typename Input_, typename Distance_, class Matrix_ = knncolle::Matrix<Index_, Input_> >
192std::vector<std::shared_ptr<const knncolle::Prebuilt<Index_, Input_, Distance_> > > build_blocked_indices(
193 const std::size_t num_dim,
194 const std::vector<Index_> block_sizes,
195 const Input_* const data,
197) {
198 const auto num_blocks = block_sizes.size();
199 auto prebuilts = sanisizer::create<std::vector<std::shared_ptr<const knncolle::Prebuilt<Index_, Input_, Distance_> > > >(num_blocks);
200
201 Index_ sofar = 0;
202 for (I<decltype(num_blocks)> b = 0; b < num_blocks; ++b) {
203 const auto cursize = block_sizes[b];
204 if (cursize) {
205 prebuilts[b] = builder.build_shared(knncolle::SimpleMatrix(num_dim, cursize, data + sanisizer::product_unsafe<std::size_t>(sofar, num_dim)));
206 }
207 sofar += cursize;
208 }
209
210 return prebuilts;
211}
212
234template<typename Index_, typename Input_, typename Distance_, class Matrix_ = knncolle::Matrix<Index_, Input_> >
235std::pair<Distance_, Distance_> compute_distance_blocked(
236 const std::size_t num_dim,
237 const std::vector<Index_>& block_sizes,
238 const Input_* const data,
240 const BlockedOptions& options
241) {
242 const auto prebuilts = build_blocked_indices(num_dim, block_sizes, data, builder);
243 auto workspace = create_workspace<Distance_>(block_sizes, options);
244 return compute_distance_blocked(prebuilts, workspace, options);
245}
246
256template<typename Index_, typename Block_>
258private:
259 Index_ my_num_cells;
260 const Block_* my_block;
261 Block_ my_num_blocks = 0;
262 std::vector<Index_> my_block_sizes;
263
264 std::vector<std::pair<Index_, Index_> > my_contigs;
265 Index_ my_non_contig_total = 0;
266 std::vector<Index_> my_non_contig_offsets;
267
268public:
276 const Index_ num_cells,
277 const Block_* block
278 ) :
279 my_num_cells(num_cells),
280 my_block(block)
281 {
282 if (num_cells) {
283 my_num_blocks = sanisizer::sum<Block_>(1, *std::max_element(block, block + num_cells));
284 }
285
286 sanisizer::resize(my_block_sizes, my_num_blocks);
287 auto block_non_contig = sanisizer::create<std::vector<char> >(my_num_blocks);
288 auto& block_ends = my_non_contig_offsets; // repurposing the offset vector to store the end of each contiguous block.
289 sanisizer::resize(block_ends, my_num_blocks);
290
291 for (Index_ c = 0; c < my_num_cells; ++c) {
292 const auto curb = my_block[c];
293 my_block_sizes[curb] += 1;
294
295 auto& nc = block_non_contig[curb];
296 if (!nc) {
297 auto& be = block_ends[curb];
298 if (be == 0) {
299 be = c + 1;
300 } else if (be == c) {
301 ++be;
302 } else {
303 nc = true;
304 }
305 }
306 }
307
308 sanisizer::resize(my_contigs, my_num_blocks);
309
310 for (Block_ b = 0; b < my_num_blocks; ++b) {
311 const auto length = my_block_sizes[b];
312 if (block_non_contig[b]) {
313 my_non_contig_offsets[b] = my_non_contig_total;
314 my_non_contig_total += length;
315 } else if (length) {
316 const auto start = block_ends[b] - length;
317 my_contigs[b] = std::make_pair(start, length);
318 }
319 }
320 }
321
322public:
327 const std::vector<Index_>& sizes() const {
328 return my_block_sizes;
329 }
330
335 template<typename Input_>
336 struct Buffers {
340 std::vector<Index_> tmp_offsets;
341 std::vector<Input_> tmp_buffer;
345 };
346
350 template<typename Input_>
352 return Buffers<Input_>();
353 }
354
355public:
372 template<typename Input_, typename Distance_, class Matrix_ = knncolle::Matrix<Index_, Input_> >
373 void build(
374 const std::size_t num_dim,
375 const Input_* const data,
377 std::vector<std::shared_ptr<const knncolle::Prebuilt<Index_, Input_, Distance_> > >& output,
378 Buffers<Input_>& work
379 ) const {
380 output.clear();
381 sanisizer::resize(output, my_num_blocks);
382
383 for (Block_ b = 0; b < my_num_blocks; ++b) {
384 const auto& con = my_contigs[b];
385 if (con.second) {
386 const auto ptr = data + sanisizer::product_unsafe<std::size_t>(con.first, num_dim);
387 output[b] = builder.build_shared(knncolle::SimpleMatrix(num_dim, con.second, ptr));
388 }
389 }
390
391 if (my_non_contig_total) {
392 work.tmp_buffer.resize(sanisizer::product<I<decltype(work.tmp_buffer.size())> >(my_non_contig_total, num_dim));
393 work.tmp_offsets.clear();
394 work.tmp_offsets.insert(work.tmp_offsets.end(), my_non_contig_offsets.begin(), my_non_contig_offsets.end());
395
396 Index_ c = 0;
397 while (c < my_num_cells) {
398 const auto curb = my_block[c];
399 const auto& con = my_contigs[curb];
400 if (con.second) {
401 c += con.second; // skip past the contiguous stretch of observations.
402 } else {
403 auto& curoff = work.tmp_offsets[curb];
404 std::copy_n(
405 data + sanisizer::product_unsafe<std::size_t>(c, num_dim),
406 num_dim,
407 work.tmp_buffer.data() + sanisizer::product_unsafe<std::size_t>(curoff, num_dim)
408 );
409 ++curoff;
410 ++c;
411 }
412 }
413
414 for (Block_ b = 0; b < my_num_blocks; ++b) {
415 if (my_contigs[b].second == 0) {
416 const auto length = my_block_sizes[b];
417 const auto ptr = work.tmp_buffer.data() + sanisizer::product_unsafe<std::size_t>(my_non_contig_offsets[b], num_dim);
418 output[b] = builder.build_shared(knncolle::SimpleMatrix(num_dim, length, ptr));
419 }
420 }
421 }
422 }
423
441 template<typename Input_, typename Distance_, class Matrix_ = knncolle::Matrix<Index_, Input_> >
442 std::vector<std::shared_ptr<const knncolle::Prebuilt<Index_, Input_, Distance_> > > build(
443 const std::size_t num_dim,
444 const Input_* const data,
446 ) const {
447 std::vector<std::shared_ptr<const knncolle::Prebuilt<Index_, Input_, Distance_> > > prebuilts;
448 auto bufs = create_buffers<Input_>();
449 build(num_dim, data, builder, prebuilts, bufs);
450 return prebuilts;
451 }
452};
453
478template<typename Index_, typename Input_, typename Block_, typename Distance_, class Matrix_ = knncolle::Matrix<Index_, Input_> >
479std::pair<Distance_, Distance_> compute_distance_blocked(
480 const std::size_t num_dim,
481 const Index_ num_cells,
482 const Input_* const data,
483 const Block_* const block,
485 const BlockedOptions& options
486) {
487 BlockedIndicesFactory<Index_, Block_> blocked_factory(num_cells, block);
488 const auto prebuilts = blocked_factory.build(num_dim, data, builder);
489 auto workspace = create_workspace<Distance_>(blocked_factory.sizes(), options);
490 return compute_distance_blocked(prebuilts, workspace, options);
491}
492
493}
494
495#endif
std::shared_ptr< Prebuilt< Index_, Data_, Distance_ > > build_shared(const Matrix_ &data) const
Factory for creating nearest-neighbor search indices for each block.
Definition blocked.hpp:257
BlockedIndicesFactory(const Index_ num_cells, const Block_ *block)
Definition blocked.hpp:275
void build(const std::size_t num_dim, const Input_ *const data, const knncolle::Builder< Index_, Input_, Distance_, Matrix_ > &builder, std::vector< std::shared_ptr< const knncolle::Prebuilt< Index_, Input_, Distance_ > > > &output, Buffers< Input_ > &work) const
Definition blocked.hpp:373
const std::vector< Index_ > & sizes() const
Definition blocked.hpp:327
Buffers< Input_ > create_buffers() const
Definition blocked.hpp:351
std::vector< std::shared_ptr< const knncolle::Prebuilt< Index_, Input_, Distance_ > > > build(const std::size_t num_dim, const Input_ *const data, const knncolle::Builder< Index_, Input_, Distance_, Matrix_ > &builder) const
Definition blocked.hpp:442
Scale multi-modal embeddings to adjust for differences in variance.
Definition blocked.hpp:20
std::pair< Distance_, Distance_ > compute_distance_blocked(const std::vector< std::shared_ptr< const knncolle::Prebuilt< Index_, Input_, Distance_ > > > &prebuilts, BlockedWorkspace< Distance_ > &workspace, const BlockedOptions &options)
Definition blocked.hpp:139
std::pair< Distance_, Distance_ > compute_distance(const Index_ num_cells, Distance_ *const distances)
Definition simple.hpp:59
std::vector< std::shared_ptr< const knncolle::Prebuilt< Index_, Input_, Distance_ > > > build_blocked_indices(const std::size_t num_dim, const std::vector< Index_ > block_sizes, const Input_ *const data, const knncolle::Builder< Index_, Input_, Distance_, Matrix_ > &builder)
Definition blocked.hpp:192
BlockedWorkspace< Distance_ > create_workspace(const std::vector< Index_ > &block_sizes, const BlockedOptions &options)
Definition blocked.hpp:82
void compute_weights(const std::size_t num_blocks, const Size_ *const sizes, const WeightPolicy policy, const VariableWeightParameters &variable, Weight_ *const weights)
Compute distances to nearest neighbors.
Temporary buffers for build().
Definition blocked.hpp:336
Options for compute_distance_blocked().
Definition blocked.hpp:25
scran_blocks::VariableWeightParameters variable_block_weight_parameters
Definition blocked.hpp:42
scran_blocks::WeightPolicy block_weight_policy
Definition blocked.hpp:36
int num_threads
Definition blocked.hpp:48
int num_neighbors
Definition blocked.hpp:31
Workspace for compute_distance_blocked().
Definition blocked.hpp:59
Options for compute_distance().
Definition simple.hpp:28
int num_threads
Definition simple.hpp:40
int num_neighbors
Definition simple.hpp:34