qdtsne
Quick and dirty t-SNE in C++
Loading...
Searching...
No Matches
utils.hpp
Go to the documentation of this file.
1#ifndef QDTSNE_UTILS_HPP
2#define QDTSNE_UTILS_HPP
3
10#include <random>
11#include <cmath>
12#include <vector>
13#include <cstddef>
14#include <type_traits>
15
16#include "aarand/aarand.hpp"
17#include "knncolle/knncolle.hpp"
18#include "sanisizer/sanisizer.hpp"
19
20#ifndef QDTSNE_CUSTOM_PARALLEL
21#include "subpar/subpar.hpp"
22#endif
23
24namespace qdtsne {
25
36template<typename Index_, typename Float_>
38
47template<typename Index_ = int>
48Index_ perplexity_to_k(const double perplexity) {
49 return sanisizer::from_float<Index_>(std::ceil(perplexity * 3));
50}
51
65template<std::size_t num_dim_, typename Float_ = double>
66void initialize_random(Float_* const Y, const std::size_t num_points, const unsigned long long seed = 42) {
67 // The constructor accepts an unsigned type, so any overflow should just wrap around harmlessly.
68 std::mt19937_64 rng(seed);
69
70 // Presumably a size_t can store the product in order to allocate Y in the first place.
71 std::size_t num_total = sanisizer::product_unsafe<std::size_t>(num_points, num_dim_);
72 const bool odd = num_total % 2;
73 if (odd) {
74 --num_total;
75 }
76
77 // Box-Muller gives us two random values at a time.
78 for (std::size_t i = 0; i < num_total; i += 2) {
79 auto paired = aarand::standard_normal<Float_>(rng);
80 Y[i] = paired.first;
81 Y[i + 1] = paired.second;
82 }
83
84 if (odd) {
85 // Adding the poor extra for odd total lengths.
86 auto paired = aarand::standard_normal<Float_>(rng);
87 Y[num_total] = paired.first;
88 }
89
90 return;
91}
92
104template<std::size_t num_dim_, typename Float_ = double>
105std::vector<Float_> initialize_random(const std::size_t num_points, const unsigned long long seed = 42) {
106 std::vector<Float_> Y(sanisizer::product<typename std::vector<Float_>::size_type>(num_points, num_dim_));
107 initialize_random<num_dim_>(Y.data(), num_points, seed);
108 return Y;
109}
110
123template<typename Task_, class Run_>
124void parallelize(const int num_workers, const Task_ num_tasks, Run_ run_task_range) {
125#ifndef QDTSNE_CUSTOM_PARALLEL
126 // Don't make this nothrow_ = true, there's too many allocations and the
127 // derived methods for the nearest neighbors search could do anything...
128 subpar::parallelize(num_workers, num_tasks, std::move(run_task_range));
129#else
130 QDTSNE_CUSTOM_PARALLEL(num_workers, num_tasks, run_task_range);
131#endif
132}
133
137template<typename Input_>
138std::remove_cv_t<std::remove_reference_t<Input_> > I(const Input_ x) {
139 return x;
140}
145}
146
147#endif
std::vector< std::vector< std::pair< Index_, Distance_ > > > NeighborList
Quick and dirty t-SNE.
knncolle::NeighborList< Index_, Float_ > NeighborList
Lists of neighbors for each observation.
Definition utils.hpp:37
void parallelize(const int num_workers, const Task_ num_tasks, Run_ run_task_range)
Definition utils.hpp:124
Index_ perplexity_to_k(const double perplexity)
Definition utils.hpp:48
void initialize_random(Float_ *const Y, const std::size_t num_points, const unsigned long long seed=42)
Definition utils.hpp:66