qdtsne
Quick and dirty t-SNE in C++
Loading...
Searching...
No Matches
utils.hpp
Go to the documentation of this file.
1#ifndef QDTSNE_UTILS_HPP
2#define QDTSNE_UTILS_HPP
3
10#include <random>
11#include <cmath>
12#include <vector>
13#include <cstddef>
14#include <type_traits>
15
16#include "aarand/aarand.hpp"
17#include "knncolle/knncolle.hpp"
18#include "sanisizer/sanisizer.hpp"
19
20#ifndef QDTSNE_CUSTOM_PARALLEL
21#include "subpar/subpar.hpp"
22#endif
23
24namespace qdtsne {
25
36template<typename Index_, typename Float_>
38
47template<typename Index_ = int>
48Index_ perplexity_to_k(const double perplexity) {
49 return sanisizer::from_float<Index_>(std::ceil(perplexity * 3));
50}
51
55typedef std::mt19937_64 RngEngine;
56
70template<std::size_t num_dim_, typename Float_ = double>
71void initialize_random(Float_* const Y, const std::size_t num_points, const typename RngEngine::result_type seed = 42) {
72 RngEngine rng(seed);
73
74 // Presumably a size_t can store the product in order to allocate Y in the first place.
75 std::size_t num_total = sanisizer::product_unsafe<std::size_t>(num_points, num_dim_);
76 const bool odd = num_total % 2;
77 if (odd) {
78 --num_total;
79 }
80
81 // Box-Muller gives us two random values at a time.
82 for (std::size_t i = 0; i < num_total; i += 2) {
83 const auto paired = aarand::standard_normal<Float_>(rng);
84 Y[i] = paired.first;
85 Y[i + 1] = paired.second;
86 }
87
88 if (odd) {
89 // Adding the poor extra for odd total lengths.
90 const auto paired = aarand::standard_normal<Float_>(rng);
91 Y[num_total] = paired.first;
92 }
93
94 return;
95}
96
108template<std::size_t num_dim_, typename Float_ = double>
109std::vector<Float_> initialize_random(const std::size_t num_points, const typename RngEngine::result_type seed = 42) {
110 std::vector<Float_> Y(sanisizer::product<typename std::vector<Float_>::size_type>(num_points, num_dim_));
111 initialize_random<num_dim_>(Y.data(), num_points, seed);
112 return Y;
113}
114
127template<typename Task_, class Run_>
128void parallelize(const int num_workers, const Task_ num_tasks, Run_ run_task_range) {
129#ifndef QDTSNE_CUSTOM_PARALLEL
130 // Don't make this nothrow_ = true, there's too many allocations and the
131 // derived methods for the nearest neighbors search could do anything...
132 subpar::parallelize(num_workers, num_tasks, std::move(run_task_range));
133#else
134 QDTSNE_CUSTOM_PARALLEL(num_workers, num_tasks, run_task_range);
135#endif
136}
137
141template<typename Input_>
142using I = typename std::remove_cv<typename std::remove_reference<Input_>::type>::type;
147}
148
149#endif
std::vector< std::vector< std::pair< Index_, Distance_ > > > NeighborList
Quick and dirty t-SNE.
knncolle::NeighborList< Index_, Float_ > NeighborList
Lists of neighbors for each observation.
Definition utils.hpp:37
void parallelize(const int num_workers, const Task_ num_tasks, Run_ run_task_range)
Definition utils.hpp:128
std::mt19937_64 RngEngine
Definition utils.hpp:55
Index_ perplexity_to_k(const double perplexity)
Definition utils.hpp:48
void initialize_random(Float_ *const Y, const std::size_t num_points, const typename RngEngine::result_type seed=42)
Definition utils.hpp:71