mumosa
Multi-modal analyses of single-cell data
Loading...
Searching...
No Matches
simple.hpp
Go to the documentation of this file.
1#ifndef MUMOSA_SIMPLE_HPP
2#define MUMOSA_SIMPLE_HPP
3
4#include <vector>
5#include <stdexcept>
6#include <cmath>
7#include <algorithm>
8#include <limits>
9#include <cstddef>
10#include <type_traits>
11
12#include "knncolle/knncolle.hpp"
13#include "tatami_stats/tatami_stats.hpp"
14#include "sanisizer/sanisizer.hpp"
15
16#include "utils.hpp"
17
23namespace mumosa {
24
28struct Options {
34 int num_neighbors = 20;
35
40 int num_threads = 1;
41};
42
58template<typename Index_, typename Distance_>
59std::pair<Distance_, Distance_> compute_distance(const Index_ num_cells, Distance_* const distances) {
60 const Distance_ med = tatami_stats::medians::direct(distances, num_cells, /* skip_nan = */ false);
61 Distance_ rmsd = 0;
62 for (Index_ i = 0; i < num_cells; ++i) {
63 const auto d = distances[i];
64 rmsd += d * d;
65 }
66 rmsd = std::sqrt(rmsd);
67 return std::make_pair(med, rmsd);
68}
69
88template<typename Index_, typename Input_, typename Distance_>
89std::pair<Distance_, Distance_> compute_distance(
91 Distance_* const distances,
92 const Options& options
93) {
94 const Index_ nobs = prebuilt.num_observations();
95 const auto capped_k = knncolle::cap_k(options.num_neighbors, nobs);
96
97 knncolle::parallelize(options.num_threads, nobs, [&](const int, const Index_ start, const Index_ length) -> void {
98 const auto searcher = prebuilt.initialize();
99 std::vector<Distance_> cur_distances;
100 for (Index_ i = start, end = start + length; i < end; ++i) {
101 searcher->search(i, capped_k, NULL, &cur_distances);
102 if (cur_distances.size()) {
103 distances[i] = cur_distances.back();
104 }
105 }
106 });
107
108 return compute_distance(nobs, distances);
109}
110
131template<typename Index_, typename Input_, typename Distance_, class Matrix_ = knncolle::Matrix<Index_, Input_> >
132std::pair<Distance_, Distance_> compute_distance(
133 const std::size_t num_dim,
134 const Index_ num_cells,
135 const Input_* const data,
137 const Options& options
138) {
139 auto dist = sanisizer::create<std::vector<Distance_> >(num_cells);
140 const auto prebuilt = builder.build_unique(knncolle::SimpleMatrix(num_dim, num_cells, data));
141 return compute_distance(*prebuilt, dist.data(), options);
142}
143
144}
145
146#endif
std::unique_ptr< Prebuilt< Index_, Data_, Distance_ > > build_unique(const Matrix_ &data) const
virtual Index_ num_observations() const=0
void parallelize(int num_workers, Task_ num_tasks, Run_ run_task_range)
int cap_k(int k, Index_ num_observations)
Scale multi-modal embeddings to adjust for differences in variance.
Definition blocked.hpp:20
std::pair< Distance_, Distance_ > compute_distance(const Index_ num_cells, Distance_ *const distances)
Definition simple.hpp:59
Options for compute_distance().
Definition simple.hpp:28
int num_threads
Definition simple.hpp:40
int num_neighbors
Definition simple.hpp:34