1#ifndef MUMOSA_BLOCKED_HPP
2#define MUMOSA_BLOCKED_HPP
9#include "sanisizer/sanisizer.hpp"
58template<
typename Distance_>
63 std::vector<Distance_> weights;
64 Distance_ total_weight;
66 std::vector<Distance_> distance_buffer;
81template<
typename Distance_,
typename Index_>
85 output.total_weight = std::accumulate(output.weights.begin(), output.weights.end(),
static_cast<Distance_
>(0));
88 if (block_sizes.size()) {
89 max_size = *std::max_element(block_sizes.begin(), block_sizes.end());
91 sanisizer::resize(output.distance_buffer, max_size);
138template<
typename Index_,
typename Input_,
typename Distance_>
148 std::pair<Distance_, Distance_> output(0, 0);
150 const auto nblocks = prebuilts.size();
151 for (I<
decltype(nblocks)> b = 0; b < nblocks; ++b) {
152 const auto curweight = workspace.weights[b];
153 const auto& pbptr = prebuilts[b];
154 if (curweight && pbptr && pbptr->num_observations()) {
155 const auto curdist =
compute_distance(*pbptr, workspace.distance_buffer.data(), simple_opt);
156 output.first += curdist.first * curweight;
157 output.second += curdist.second * curweight;
161 if (workspace.total_weight) {
162 output.first /= workspace.total_weight;
163 output.second /= workspace.total_weight;
191template<
typename Index_,
typename Input_,
typename Distance_,
class Matrix_ = knncolle::Matrix<Index_, Input_> >
193 const std::size_t num_dim,
194 const std::vector<Index_> block_sizes,
195 const Input_*
const data,
198 const auto num_blocks = block_sizes.size();
199 auto prebuilts = sanisizer::create<std::vector<std::shared_ptr<const knncolle::Prebuilt<Index_, Input_, Distance_> > > >(num_blocks);
202 for (I<
decltype(num_blocks)> b = 0; b < num_blocks; ++b) {
203 const auto cursize = block_sizes[b];
234template<
typename Index_,
typename Input_,
typename Distance_,
class Matrix_ = knncolle::Matrix<Index_, Input_> >
236 const std::size_t num_dim,
237 const std::vector<Index_>& block_sizes,
238 const Input_*
const data,
256template<
typename Index_,
typename Block_>
260 const Block_* my_block;
261 Block_ my_num_blocks = 0;
262 std::vector<Index_> my_block_sizes;
264 std::vector<std::pair<Index_, Index_> > my_contigs;
265 Index_ my_non_contig_total = 0;
266 std::vector<Index_> my_non_contig_offsets;
276 const Index_ num_cells,
279 my_num_cells(num_cells),
283 my_num_blocks = sanisizer::sum<Block_>(1, *std::max_element(block, block + num_cells));
286 sanisizer::resize(my_block_sizes, my_num_blocks);
287 auto block_non_contig = sanisizer::create<std::vector<char> >(my_num_blocks);
288 auto& block_ends = my_non_contig_offsets;
289 sanisizer::resize(block_ends, my_num_blocks);
291 for (Index_ c = 0; c < my_num_cells; ++c) {
292 const auto curb = my_block[c];
293 my_block_sizes[curb] += 1;
295 auto& nc = block_non_contig[curb];
297 auto& be = block_ends[curb];
300 }
else if (be == c) {
308 sanisizer::resize(my_contigs, my_num_blocks);
310 for (Block_ b = 0; b < my_num_blocks; ++b) {
311 const auto length = my_block_sizes[b];
312 if (block_non_contig[b]) {
313 my_non_contig_offsets[b] = my_non_contig_total;
314 my_non_contig_total += length;
316 const auto start = block_ends[b] - length;
317 my_contigs[b] = std::make_pair(start, length);
327 const std::vector<Index_>&
sizes()
const {
328 return my_block_sizes;
335 template<
typename Input_>
340 std::vector<Index_> tmp_offsets;
341 std::vector<Input_> tmp_buffer;
350 template<
typename Input_>
372 template<
typename Input_,
typename Distance_,
class Matrix_ = knncolle::Matrix<Index_, Input_> >
374 const std::size_t num_dim,
375 const Input_*
const data,
381 sanisizer::resize(output, my_num_blocks);
383 for (Block_ b = 0; b < my_num_blocks; ++b) {
384 const auto& con = my_contigs[b];
386 const auto ptr = data + sanisizer::product_unsafe<std::size_t>(con.first, num_dim);
391 if (my_non_contig_total) {
392 work.tmp_buffer.resize(sanisizer::product<I<
decltype(work.tmp_buffer.size())> >(my_non_contig_total, num_dim));
393 work.tmp_offsets.clear();
394 work.tmp_offsets.insert(work.tmp_offsets.end(), my_non_contig_offsets.begin(), my_non_contig_offsets.end());
397 while (c < my_num_cells) {
398 const auto curb = my_block[c];
399 const auto& con = my_contigs[curb];
403 auto& curoff = work.tmp_offsets[curb];
405 data + sanisizer::product_unsafe<std::size_t>(c, num_dim),
407 work.tmp_buffer.data() + sanisizer::product_unsafe<std::size_t>(curoff, num_dim)
414 for (Block_ b = 0; b < my_num_blocks; ++b) {
415 if (my_contigs[b].second == 0) {
416 const auto length = my_block_sizes[b];
417 const auto ptr = work.tmp_buffer.data() + sanisizer::product_unsafe<std::size_t>(my_non_contig_offsets[b], num_dim);
441 template<
typename Input_,
typename Distance_,
class Matrix_ = knncolle::Matrix<Index_, Input_> >
442 std::vector<std::shared_ptr<const knncolle::Prebuilt<Index_, Input_, Distance_> > >
build(
443 const std::size_t num_dim,
444 const Input_*
const data,
447 std::vector<std::shared_ptr<const knncolle::Prebuilt<Index_, Input_, Distance_> > > prebuilts;
449 build(num_dim, data, builder, prebuilts, bufs);
478template<
typename Index_,
typename Input_,
typename Block_,
typename Distance_,
class Matrix_ = knncolle::Matrix<Index_, Input_> >
480 const std::size_t num_dim,
481 const Index_ num_cells,
482 const Input_*
const data,
483 const Block_*
const block,
488 const auto prebuilts = blocked_factory.
build(num_dim, data, builder);
std::shared_ptr< Prebuilt< Index_, Data_, Distance_ > > build_shared(const Matrix_ &data) const
Factory for creating nearest-neighbor search indices for each block.
Definition blocked.hpp:257
BlockedIndicesFactory(const Index_ num_cells, const Block_ *block)
Definition blocked.hpp:275
void build(const std::size_t num_dim, const Input_ *const data, const knncolle::Builder< Index_, Input_, Distance_, Matrix_ > &builder, std::vector< std::shared_ptr< const knncolle::Prebuilt< Index_, Input_, Distance_ > > > &output, Buffers< Input_ > &work) const
Definition blocked.hpp:373
const std::vector< Index_ > & sizes() const
Definition blocked.hpp:327
Buffers< Input_ > create_buffers() const
Definition blocked.hpp:351
std::vector< std::shared_ptr< const knncolle::Prebuilt< Index_, Input_, Distance_ > > > build(const std::size_t num_dim, const Input_ *const data, const knncolle::Builder< Index_, Input_, Distance_, Matrix_ > &builder) const
Definition blocked.hpp:442
Scale multi-modal embeddings to adjust for differences in variance.
Definition blocked.hpp:20
std::pair< Distance_, Distance_ > compute_distance_blocked(const std::vector< std::shared_ptr< const knncolle::Prebuilt< Index_, Input_, Distance_ > > > &prebuilts, BlockedWorkspace< Distance_ > &workspace, const BlockedOptions &options)
Definition blocked.hpp:139
std::pair< Distance_, Distance_ > compute_distance(const Index_ num_cells, Distance_ *const distances)
Definition simple.hpp:59
std::vector< std::shared_ptr< const knncolle::Prebuilt< Index_, Input_, Distance_ > > > build_blocked_indices(const std::size_t num_dim, const std::vector< Index_ > block_sizes, const Input_ *const data, const knncolle::Builder< Index_, Input_, Distance_, Matrix_ > &builder)
Definition blocked.hpp:192
BlockedWorkspace< Distance_ > create_workspace(const std::vector< Index_ > &block_sizes, const BlockedOptions &options)
Definition blocked.hpp:82
void compute_weights(const std::size_t num_blocks, const Size_ *const sizes, const WeightPolicy policy, const VariableWeightParameters &variable, Weight_ *const weights)
Compute distances to nearest neighbors.
Temporary buffers for build().
Definition blocked.hpp:336
Options for compute_distance_blocked().
Definition blocked.hpp:25
scran_blocks::VariableWeightParameters variable_block_weight_parameters
Definition blocked.hpp:42
scran_blocks::WeightPolicy block_weight_policy
Definition blocked.hpp:36
int num_threads
Definition blocked.hpp:48
int num_neighbors
Definition blocked.hpp:31
Workspace for compute_distance_blocked().
Definition blocked.hpp:59
Options for compute_distance().
Definition simple.hpp:28
int num_threads
Definition simple.hpp:40
int num_neighbors
Definition simple.hpp:34