mnncorrect/compute_8hpp_source.html

#ifndef MNNCORRECT_COMPUTE_HPP

#define MNNCORRECT_COMPUTE_HPP


#include <algorithm>

#include <vector>

#include <numeric>

#include <stdexcept>

#include <cstddef>


#include "knncolle/knncolle.hpp"


#include "AutomaticOrder.hpp"

#include "CustomOrder.hpp"

#include "Options.hpp"

#include "restore_order.hpp"

#include "utils.hpp"


namespace mnncorrect {


struct Details {

    Details() = default;


    Details(std::vector<BatchIndex> merge_order, std::vector<unsigned long long> num_pairs) : merge_order(std::move(merge_order)), num_pairs(std::move(num_pairs)) {}

    std::vector<BatchIndex> merge_order;


    std::vector<unsigned long long> num_pairs;

};


namespace internal {


template<typename Index_, typename Float_, class Matrix_>

Details compute(std::size_t num_dim, const std::vector<Index_>& num_obs, const std::vector<const Float_*>& batches, Float_* output, const Options<Index_, Float_, Matrix_>& options) {

    auto builder = options.builder;

    if (!builder) {

        typedef knncolle::EuclideanDistance<Float_, Float_> Euclidean;

        builder.reset(new knncolle::VptreeBuilder<Index_, Float_, Float_, Matrix_, Euclidean>(std::make_shared<Euclidean>()));

    }


    if (!options.order.empty()) {

        CustomOrder<Index_, Float_, Matrix_> runner(num_dim, num_obs, batches, output, *builder, options.num_neighbors, options.order, options.mass_cap, options.num_threads);

        runner.run(options.num_mads, options.robust_iterations, options.robust_trim);

        return Details(runner.get_order(), runner.get_num_pairs());


    } else if (options.automatic_order) {

        AutomaticOrder<Index_, Float_, Matrix_> runner(num_dim, num_obs, batches, output, *builder, options.num_neighbors, options.reference_policy, options.mass_cap, options.num_threads);

        runner.run(options.num_mads, options.robust_iterations, options.robust_trim);

        return Details(runner.get_order(), runner.get_num_pairs());


    } else {

        std::vector<BatchIndex> trivial_order(num_obs.size());

        std::iota(trivial_order.begin(), trivial_order.end(), static_cast<BatchIndex>(0));

        CustomOrder<Index_, Float_, Matrix_> runner(num_dim, num_obs, batches, output, *builder, options.num_neighbors, trivial_order, options.mass_cap, options.num_threads);

        runner.run(options.num_mads, options.robust_iterations, options.robust_trim);

        return Details(std::move(trivial_order), runner.get_num_pairs());

    }

}


}

template<typename Index_, typename Float_, class Matrix_>


Details compute(std::size_t num_dim, const std::vector<Index_>& num_obs, const std::vector<const Float_*>& batches, Float_* output, const Options<Index_, Float_, Matrix_>& options) {

    auto stats = internal::compute(num_dim, num_obs, batches, output, options);

    internal::restore_order(num_dim, stats.merge_order, num_obs, output);

    return stats;

}


template<typename Index_, typename Float_, class Matrix_>


Details compute(std::size_t num_dim, const std::vector<Index_>& num_obs, const Float_* input, Float_* output, const Options<Index_, Float_, Matrix_>& options) {

    std::vector<const Float_*> batches;

    batches.reserve(num_obs.size());

    for (auto n : num_obs) {

        batches.push_back(input);

        input += static_cast<std::size_t>(n) * num_dim; // cast to size_t's to avoid overflow.

    }

    return compute(num_dim, num_obs, batches, output, options);

}


template<typename Index_, typename Float_, typename Batch_, class Matrix_>


Details compute(std::size_t num_dim, Index_ num_obs, const Float_* input, const Batch_* batch, Float_* output, const Options<Index_, Float_, Matrix_>& options) {

    const BatchIndex nbatches = (num_obs ? static_cast<BatchIndex>(*std::max_element(batch, batch + num_obs)) + 1 : 0);

    std::vector<Index_> sizes(nbatches);

    for (Index_ o = 0; o < num_obs; ++o) {

        ++sizes[batch[o]];

    }


    // Avoiding the need to allocate a temporary buffer

    // if we're already dealing with contiguous batches.

    bool already_sorted = true;

    for (Index_ o = 1; o < num_obs; ++o) {

       if (batch[o] < batch[o-1]) {

           already_sorted = false;

           break;

       }

    }

    if (already_sorted) {

        return compute(num_dim, sizes, input, output, options);

    }


    std::size_t accumulated = 0; // use size_t to avoid overflow issues during later multiplication.

    std::vector<std::size_t> offsets(nbatches);

    for (BatchIndex b = 0; b < nbatches; ++b) {

        offsets[b] = accumulated;

        accumulated += sizes[b];

    }


    // Dumping everything by order into another vector.

    std::vector<Float_> tmp(num_dim * static_cast<std::size_t>(num_obs)); // cast to size_t to avoid overflow.

    std::vector<const Float_*> ptrs(nbatches);

    for (BatchIndex b = 0; b < nbatches; ++b) {

        ptrs[b] = tmp.data() + offsets[b] * num_dim; // already size_t's, so no need to cast to avoid overflow.

    }


    for (Index_ o = 0; o < num_obs; ++o) {

        auto current = input + static_cast<std::size_t>(o) * num_dim; // cast to size_t to avoid overflow.

        auto& offset = offsets[batch[o]];

        auto destination = tmp.data() + num_dim * offset; // already size_t's, so no need to cast to avoid overflow.

        std::copy_n(current, num_dim, destination);

        ++offset;

    }


    auto stats = internal::compute(num_dim, sizes, ptrs, output, options);

    internal::restore_order(num_dim, stats.merge_order, sizes, batch, output);

    return stats;

}


}


#endif

Options.hpp
Options for MNN correction.

knncolle::EuclideanDistance

knncolle::VptreeBuilder

knncolle.hpp

mnncorrect
Batch correction with mutual nearest neighbors.
Definition compute.hpp:24

mnncorrect::BatchIndex
std::size_t BatchIndex
Definition utils.hpp:20

mnncorrect::compute
Details compute(std::size_t num_dim, const std::vector< Index_ > &num_obs, const std::vector< const Float_ * > &batches, Float_ *output, const Options< Index_, Float_, Matrix_ > &options)
Definition compute.hpp:134

mnncorrect::Details
Correction details from compute().
Definition compute.hpp:29

mnncorrect::Details::num_pairs
std::vector< unsigned long long > num_pairs
Definition compute.hpp:51

mnncorrect::Details::merge_order
std::vector< BatchIndex > merge_order
Definition compute.hpp:45

mnncorrect::Options
Options for compute().
Definition Options.hpp:23

mnncorrect::Options::mass_cap
Index_ mass_cap
Definition Options.hpp:96

mnncorrect::Options::robust_iterations
int robust_iterations
Definition Options.hpp:77

mnncorrect::Options::robust_trim
double robust_trim
Definition Options.hpp:83

mnncorrect::Options::builder
std::shared_ptr< knncolle::Builder< Index_, Float_, Float_, Matrix_ > > builder
Definition Options.hpp:44

mnncorrect::Options::num_mads
double num_mads
Definition Options.hpp:38

mnncorrect::Options::num_threads
int num_threads
Definition Options.hpp:102

mnncorrect::Options::reference_policy
ReferencePolicy reference_policy
Definition Options.hpp:88

mnncorrect::Options::num_neighbors
int num_neighbors
Definition Options.hpp:32

mnncorrect::Options::order
std::vector< BatchIndex > order
Definition Options.hpp:58

mnncorrect::Options::automatic_order
bool automatic_order
Definition Options.hpp:71

utils.hpp
Utilities for MNN correction.