70std::pair<Distance_, Distance_>
compute_distance(
const Index_ num_cells, Distance_*
const distances) {
71 const Distance_ med = tatami_stats::medians::direct(distances, num_cells,
false);
73 for (Index_ i = 0; i < num_cells; ++i) {
74 const auto d = distances[i];
77 rmsd = std::sqrt(rmsd);
78 return std::make_pair(med, rmsd);
98 auto dist = sanisizer::create<std::vector<Distance_> >(nobs);
101 const auto searcher = prebuilt.initialize();
102 std::vector<Distance_> distances;
103 for (Index_ i = start, end = start + length; i < end; ++i) {
104 searcher->search(i, capped_k, NULL, &distances);
105 if (distances.size()) {
106 dist[i] = distances.back();
165Distance_
compute_scale(
const std::pair<Distance_, Distance_>& ref,
const std::pair<Distance_, Distance_>& target) {
166 if (target.first == 0 || ref.first == 0) {
167 if (target.second == 0) {
168 return std::numeric_limits<Distance_>::infinity();
169 }
else if (ref.second == 0) {
172 return ref.second / target.second;
175 return ref.first / target.first;
193std::vector<Distance_>
compute_scale(
const std::vector<std::pair<Distance_, Distance_> >& distances) {
194 const auto ndist = distances.size();
195 auto output = sanisizer::create<std::vector<Distance_> >(ndist);
198 bool found_ref =
false;
199 decltype(I(ndist)) ref = 0;
200 for (
decltype(I(ndist)) e = 0; e < ndist; ++e) {
201 if (distances[e].second) {
210 const auto& dref = distances[ref];
211 for (
decltype(I(ndist)) e = 0; e < ndist; ++e) {
212 output[e] = (e == ref ?
static_cast<Distance_
>(1) :
compute_scale(dref, distances[e]));
241void combine_scaled_embeddings(
const std::vector<std::size_t>& num_dims,
const Index_ num_cells,
const std::vector<Input_*>& embeddings,
const std::vector<Scale_>& scaling, Output_*
const output) {
242 const auto nembed = num_dims.size();
243 if (embeddings.size() != nembed || scaling.size() != nembed) {
244 throw std::runtime_error(
"'num_dims', 'embeddings' and 'scale' should have the same length");
247 const std::size_t ntotal = std::accumulate(num_dims.begin(), num_dims.end(),
static_cast<std::size_t
>(0));
248 std::size_t starting_dim = 0;
250 for (
decltype(I(nembed)) e = 0; e < nembed; ++e) {
251 const auto curdim = num_dims[e];
252 const auto inptr = embeddings[e];
253 const auto s = scaling[e];
258 for (Index_ c = 0; c < num_cells; ++c) {
259 const auto out_offset = sanisizer::nd_offset<std::size_t>(starting_dim, ntotal, c);
260 std::fill_n(output + out_offset, curdim, 0);
263 for (Index_ c = 0; c < num_cells; ++c) {
264 for (
decltype(I(curdim)) d = 0; d < curdim; ++d) {
265 const auto out_offset = sanisizer::nd_offset<std::size_t>(starting_dim + d, ntotal, c);
266 const auto in_offset = sanisizer::nd_offset<std::size_t>(d, curdim, c);
267 output[out_offset] = inptr[in_offset] * s;
272 starting_dim += curdim;
Distance_ compute_scale(const std::pair< Distance_, Distance_ > &ref, const std::pair< Distance_, Distance_ > &target)
Definition mumosa.hpp:165
void combine_scaled_embeddings(const std::vector< std::size_t > &num_dims, const Index_ num_cells, const std::vector< Input_ * > &embeddings, const std::vector< Scale_ > &scaling, Output_ *const output)
Definition mumosa.hpp:241