qdtsne
A quick and dirty t-SNE C++ library
Loading...
Searching...
No Matches
Status.hpp
Go to the documentation of this file.
1/*
2 *
3 * Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology)
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the Delft University of Technology.
16 * 4. Neither the name of the Delft University of Technology nor the names of
17 * its contributors may be used to endorse or promote products derived from
18 * this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS
21 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
23 * EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
26 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
28 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
29 * OF SUCH DAMAGE.
30 *
31 */
32
33#ifndef QDTSNE_STATUS_HPP
34#define QDTSNE_STATUS_HPP
35
36#include <vector>
37#include <cmath>
38#include <algorithm>
39#include <type_traits>
40#include <limits>
41#include <cstddef>
42
43#include "SPTree.hpp"
44#include "Options.hpp"
45#include "utils.hpp"
46
52namespace qdtsne {
53
64template<std::size_t num_dim_, typename Index_, typename Float_>
65class Status {
66public:
70 Status(NeighborList<Index_, Float_> neighbors, Options options) :
71 my_neighbors(std::move(neighbors)),
72 my_tree(my_neighbors.size(), options.max_depth),
73 my_options(std::move(options))
74 {
75 std::size_t full_size = static_cast<std::size_t>(my_neighbors.size()) * num_dim_; // cast to avoid overflow.
76 my_dY.resize(full_size);
77 my_uY.resize(full_size);
78 my_gains.resize(full_size, static_cast<Float_>(1));
79 my_pos_f.resize(full_size);
80 my_neg_f.resize(full_size);
81
82 if (options.num_threads > 1) {
83 my_parallel_buffer.resize(my_neighbors.size());
84 }
85 }
90private:
91 NeighborList<Index_, Float_> my_neighbors;
92 std::vector<Float_> my_dY, my_uY, my_gains, my_pos_f, my_neg_f;
93
94 internal::SPTree<num_dim_, Float_> my_tree;
95 std::vector<Float_> my_parallel_buffer; // Buffer to hold parallel-computed results prior to reduction.
96
97 Options my_options;
98 int my_iter = 0;
99
100 typename decltype(my_tree)::LeafApproxWorkspace my_leaf_workspace;
101
102public:
106 int iteration() const {
107 return my_iter;
108 }
109
114 int max_iterations() const {
115 return my_options.max_iterations;
116 }
117
121 std::size_t num_observations() const {
122 return my_neighbors.size();
123 }
124
125#ifndef NDEBUG
129 const auto& get_neighbors() const {
130 return my_neighbors;
131 }
135#endif
136
137public:
149 void run(Float_* Y, int limit) {
150 Float_ multiplier = (my_iter < my_options.stop_lying_iter ? my_options.exaggeration_factor : 1);
151 Float_ momentum = (my_iter < my_options.mom_switch_iter ? my_options.start_momentum : my_options.final_momentum);
152
153 for(; my_iter < limit; ++my_iter) {
154 // Stop lying about the P-values after a while, and switch momentum
155 if (my_iter == my_options.stop_lying_iter) {
156 multiplier = 1;
157 }
158 if (my_iter == my_options.mom_switch_iter) {
159 momentum = my_options.final_momentum;
160 }
161
162 iterate(Y, multiplier, momentum);
163 }
164 }
165
175 void run(Float_* Y) {
176 run(Y, my_options.max_iterations);
177 }
178
179private:
180 static Float_ sign(Float_ x) {
181 constexpr Float_ zero = 0;
182 constexpr Float_ one = 1;
183 return (x == zero ? zero : (x < zero ? -one : one));
184 }
185
186 void iterate(Float_* Y, Float_ multiplier, Float_ momentum) {
187 compute_gradient(Y, multiplier);
188
189 // Update gains
190 auto ngains = my_gains.size();
191 for (decltype(ngains) i = 0; i < ngains; ++i) {
192 Float_& g = my_gains[i];
193 constexpr Float_ lower_bound = 0.01;
194 constexpr Float_ to_add = 0.2;
195 constexpr Float_ to_mult = 0.8;
196 g = std::max(lower_bound, sign(my_dY[i]) != sign(my_uY[i]) ? (g + to_add) : (g * to_mult));
197 }
198
199 // Perform gradient update (with momentum and gains)
200 for (decltype(ngains) i = 0; i < ngains; ++i) {
201 my_uY[i] = momentum * my_uY[i] - my_options.eta * my_gains[i] * my_dY[i];
202 Y[i] += my_uY[i];
203 }
204
205 // Make solution zero-mean for each dimension
206 std::array<Float_, num_dim_> means{};
207 std::size_t num_obs = num_observations();
208 for (std::size_t i = 0; i < num_obs; ++i) {
209 for (std::size_t d = 0; d < num_dim_; ++d) {
210 means[d] += Y[d + i * num_dim_]; // all of these are already size_t to avoid overflow.
211 }
212 }
213 for (std::size_t d = 0; d < num_dim_; ++d) {
214 means[d] /= num_obs;
215 }
216 for (std::size_t i = 0; i < num_obs; ++i) {
217 for (std::size_t d = 0; d < num_dim_; ++d) {
218 Y[d + i * num_dim_] -= means[d]; // again, everything here is already a size_t.
219 }
220 }
221
222 return;
223 }
224
225private:
226 void compute_gradient(const Float_* Y, Float_ multiplier) {
227 my_tree.set(Y);
228 compute_edge_forces(Y, multiplier);
229
230 std::size_t num_obs = num_observations();
231 std::fill(my_neg_f.begin(), my_neg_f.end(), 0);
232 Float_ sum_Q = compute_non_edge_forces();
233
234 // Compute final t-SNE gradient
235 std::size_t ntotal = num_obs * num_dim_; // already size_t's to avoid overflow.
236 for (std::size_t i = 0; i < ntotal; ++i) {
237 my_dY[i] = my_pos_f[i] - (my_neg_f[i] / sum_Q);
238 }
239 }
240
241 void compute_edge_forces(const Float_* Y, Float_ multiplier) {
242 std::fill(my_pos_f.begin(), my_pos_f.end(), 0);
243 std::size_t num_obs = num_observations();
244
245 parallelize(my_options.num_threads, num_obs, [&](int, std::size_t start, std::size_t length) -> void {
246 for (std::size_t i = start, end = start + length; i < end; ++i) {
247 const auto& current = my_neighbors[i];
248 size_t offset = i * num_dim_; // already size_t's to avoid overflow.
249 const Float_* self = Y + offset;
250 Float_* pos_out = my_pos_f.data() + offset;
251
252 for (const auto& x : current) {
253 Float_ sqdist = 0;
254 const Float_* neighbor = Y + static_cast<std::size_t>(x.first) * num_dim_; // cast to avoid overflow.
255 for (std::size_t d = 0; d < num_dim_; ++d) {
256 Float_ delta = self[d] - neighbor[d];
257 sqdist += delta * delta;
258 }
259
260 const Float_ mult = multiplier * x.second / (static_cast<Float_>(1) + sqdist);
261 for (std::size_t d = 0; d < num_dim_; ++d) {
262 pos_out[d] += mult * (self[d] - neighbor[d]);
263 }
264 }
265 }
266 });
267
268 return;
269 }
270
271 Float_ compute_non_edge_forces() {
272 if (my_options.leaf_approximation) {
273 my_tree.compute_non_edge_forces_for_leaves(my_options.theta, my_leaf_workspace, my_options.num_threads);
274 }
275
276 std::size_t num_obs = num_observations();
277 if (my_options.num_threads > 1) {
278 // Don't use reduction methods, otherwise we get numeric imprecision
279 // issues (and stochastic results) based on the order of summation.
280 parallelize(my_options.num_threads, num_obs, [&](int, std::size_t start, std::size_t length) -> void {
281 for (std::size_t i = start, end = start + length; i < end; ++i) {
282 auto neg_ptr = my_neg_f.data() + i * num_dim_; // already size_t's to avoid overflow.
283 if (my_options.leaf_approximation) {
284 my_parallel_buffer[i] = my_tree.compute_non_edge_forces_from_leaves(i, neg_ptr, my_leaf_workspace);
285 } else {
286 my_parallel_buffer[i] = my_tree.compute_non_edge_forces(i, my_options.theta, neg_ptr);
287 }
288 }
289 });
290 return std::accumulate(my_parallel_buffer.begin(), my_parallel_buffer.end(), static_cast<Float_>(0));
291 }
292
293 Float_ sum_Q = 0;
294 for (std::size_t i = 0; i < num_obs; ++i) {
295 auto neg_ptr = my_neg_f.data() + i * num_dim_; // already size_ts to avoid overflow.
296 if (my_options.leaf_approximation) {
297 sum_Q += my_tree.compute_non_edge_forces_from_leaves(i, neg_ptr, my_leaf_workspace);
298 } else {
299 sum_Q += my_tree.compute_non_edge_forces(i, my_options.theta, neg_ptr);
300 }
301 }
302 return sum_Q;
303 }
304};
305
306}
307
308#endif
Options for the t-SNE algorithm.
Status of the t-SNE iterations.
Definition Status.hpp:65
int iteration() const
Definition Status.hpp:106
void run(Float_ *Y, int limit)
Definition Status.hpp:149
void run(Float_ *Y)
Definition Status.hpp:175
std::size_t num_observations() const
Definition Status.hpp:121
int max_iterations() const
Definition Status.hpp:114
Quick and dirty t-SNE.
knncolle::NeighborList< Index_, Float_ > NeighborList
Lists of neighbors for each observation.
Definition utils.hpp:36
void parallelize(int num_workers, Task_ num_tasks, Run_ run_task_range)
Definition utils.hpp:119
Options for initialize().
Definition Options.hpp:14
int mom_switch_iter
Definition Options.hpp:63
int max_depth
Definition Options.hpp:95
int max_iterations
Definition Options.hpp:44
bool leaf_approximation
Definition Options.hpp:102
double eta
Definition Options.hpp:79
double final_momentum
Definition Options.hpp:73
double exaggeration_factor
Definition Options.hpp:84
double start_momentum
Definition Options.hpp:68
int num_threads
Definition Options.hpp:109
double theta
Definition Options.hpp:39
int stop_lying_iter
Definition Options.hpp:53
Utilities for running t-SNE.