qdtsne
Quick and dirty t-SNE in C++
Loading...
Searching...
No Matches
Status.hpp
Go to the documentation of this file.
1/*
2 *
3 * Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology)
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the Delft University of Technology.
16 * 4. Neither the name of the Delft University of Technology nor the names of
17 * its contributors may be used to endorse or promote products derived from
18 * this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS
21 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
23 * EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
26 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
28 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
29 * OF SUCH DAMAGE.
30 *
31 */
32
33#ifndef QDTSNE_STATUS_HPP
34#define QDTSNE_STATUS_HPP
35
36#include <vector>
37#include <array>
38#include <algorithm>
39#include <cstddef>
40
41#include "sanisizer/sanisizer.hpp"
42
43#include "SPTree.hpp"
44#include "Options.hpp"
45#include "utils.hpp"
46
52namespace qdtsne {
53
64template<std::size_t num_dim_, typename Index_, typename Float_>
65class Status {
66public:
70 Status(NeighborList<Index_, Float_> neighbors, Options options) :
71 my_neighbors(std::move(neighbors)),
72 my_tree(sanisizer::cast<internal::SPTreeIndex>(my_neighbors.size()), options.max_depth),
73 my_options(std::move(options))
74 {
75 const auto nobs = sanisizer::cast<Index_>(my_neighbors.size()); // use Index_ to check safety of cast in num_observations().
76 const std::size_t buffer_size = sanisizer::product<std::size_t>(nobs, num_dim_);
77
78 sanisizer::resize(my_dY, buffer_size);
79 sanisizer::resize(my_uY, buffer_size);
80 sanisizer::resize(my_gains, buffer_size, static_cast<Float_>(1));
81 sanisizer::resize(my_pos_f, buffer_size);
82 sanisizer::resize(my_neg_f, buffer_size);
83
84 if (options.num_threads > 1) {
85 sanisizer::resize(my_parallel_buffer, nobs);
86 }
87 }
92private:
93 NeighborList<Index_, Float_> my_neighbors;
94 std::vector<Float_> my_dY, my_uY, my_gains, my_pos_f, my_neg_f;
95 Float_ my_non_edge_sum = 0;
96
97 internal::SPTree<num_dim_, Float_> my_tree;
98 std::vector<Float_> my_parallel_buffer; // Buffer to hold parallel-computed results prior to reduction.
99
100 Options my_options;
101 int my_iter = 0;
102
103 typename decltype(I(my_tree))::LeafApproxWorkspace my_leaf_workspace;
104
105public:
109 int iteration() const {
110 return my_iter;
111 }
112
116 int max_iterations() const {
117 return my_options.max_iterations;
118 }
119
123 Index_ num_observations() const {
124 return my_neighbors.size(); // safety of the cast to Index_ is already checked in the constructor.
125 }
126
127#ifndef NDEBUG
131 const auto& get_neighbors() const {
132 return my_neighbors;
133 }
137#endif
138
139public:
151 void run(Float_* const Y, const int limit) {
152 Float_ multiplier = (my_iter < my_options.early_exaggeration_iterations ? my_options.exaggeration_factor : 1);
153 Float_ momentum = (my_iter < my_options.momentum_switch_iterations ? my_options.start_momentum : my_options.final_momentum);
154
155 for(; my_iter < limit; ++my_iter) {
156 if (my_iter == my_options.early_exaggeration_iterations) {
157 multiplier = 1;
158 }
159 if (my_iter == my_options.momentum_switch_iterations) {
160 momentum = my_options.final_momentum;
161 }
162 iterate(Y, multiplier, momentum);
163 }
164 }
165
175 void run(Float_* const Y) {
176 run(Y, my_options.max_iterations);
177 }
178
179private:
180 static Float_ sign(const Float_ x) {
181 constexpr Float_ zero = 0;
182 constexpr Float_ one = 1;
183 return (x == zero ? zero : (x < zero ? -one : one));
184 }
185
186 void iterate(Float_* const Y, const Float_ multiplier, const Float_ momentum) {
187 compute_gradient(Y, multiplier);
188
189 // Update gains
190 const auto buffer_size = my_gains.size();
191 for (decltype(I(buffer_size)) i = 0; i < buffer_size; ++i) {
192 Float_& g = my_gains[i];
193 constexpr Float_ lower_bound = 0.01;
194 constexpr Float_ to_add = 0.2;
195 constexpr Float_ to_mult = 0.8;
196 g = std::max(lower_bound, sign(my_dY[i]) != sign(my_uY[i]) ? (g + to_add) : (g * to_mult));
197 }
198
199 // Perform gradient update (with momentum and gains)
200 for (decltype(I(buffer_size)) i = 0; i < buffer_size; ++i) {
201 my_uY[i] = momentum * my_uY[i] - my_options.eta * my_gains[i] * my_dY[i];
202 Y[i] += my_uY[i];
203 }
204
205 // Make solution zero-mean for each dimension
206 std::array<Float_, num_dim_> means{};
207 const Index_ num_obs = num_observations();
208 for (Index_ i = 0; i < num_obs; ++i) {
209 for (std::size_t d = 0; d < num_dim_; ++d) {
210 means[d] += Y[sanisizer::nd_offset<std::size_t>(d, num_dim_, i)];
211 }
212 }
213 for (std::size_t d = 0; d < num_dim_; ++d) {
214 means[d] /= num_obs;
215 }
216 for (Index_ i = 0; i < num_obs; ++i) {
217 for (std::size_t d = 0; d < num_dim_; ++d) {
218 Y[sanisizer::nd_offset<std::size_t>(d, num_dim_, i)] -= means[d];
219 }
220 }
221
222 return;
223 }
224
225private:
226 void compute_gradient(const Float_* const Y, const Float_ multiplier) {
227 my_tree.set(Y);
228 compute_edge_forces(Y, multiplier);
229
230 std::fill(my_neg_f.begin(), my_neg_f.end(), 0);
231 compute_non_edge_forces();
232
233 const auto buffer_size = my_dY.size();
234 for (decltype(I(buffer_size)) i = 0; i < buffer_size; ++i) {
235 my_dY[i] = my_pos_f[i] - (my_neg_f[i] / my_non_edge_sum);
236 }
237 }
238
239 void compute_edge_forces(const Float_* const Y, Float_ multiplier) {
240 std::fill(my_pos_f.begin(), my_pos_f.end(), 0);
241 const Index_ num_obs = num_observations();
242
243 parallelize(my_options.num_threads, num_obs, [&](const int, const Index_ start, const Index_ length) -> void {
244 for (Index_ i = start, end = start + length; i < end; ++i) {
245 const auto& current = my_neighbors[i];
246 const auto offset = sanisizer::product_unsafe<std::size_t>(i, num_dim_);
247 const auto self = Y + offset;
248 const auto pos_out = my_pos_f.data() + offset;
249
250 for (const auto& x : current) {
251 Float_ sqdist = 0;
252 const auto neighbor = Y + sanisizer::product_unsafe<std::size_t>(x.first, num_dim_);
253 for (std::size_t d = 0; d < num_dim_; ++d) {
254 const Float_ delta = self[d] - neighbor[d];
255 sqdist += delta * delta;
256 }
257
258 const Float_ mult = multiplier * x.second / (static_cast<Float_>(1) + sqdist);
259 for (std::size_t d = 0; d < num_dim_; ++d) {
260 pos_out[d] += mult * (self[d] - neighbor[d]);
261 }
262 }
263 }
264 });
265
266 return;
267 }
268
269 void compute_non_edge_forces() {
270 if (my_options.leaf_approximation) {
271 my_tree.compute_non_edge_forces_for_leaves(my_options.theta, my_leaf_workspace, my_options.num_threads);
272 }
273
274 const Index_ num_obs = num_observations();
275 if (my_options.num_threads > 1) {
276 // Don't use reduction methods, otherwise we get numeric imprecision
277 // issues (and stochastic results) based on the order of summation.
278 parallelize(my_options.num_threads, num_obs, [&](const int, const Index_ start, const Index_ length) -> void {
279 for (Index_ i = start, end = start + length; i < end; ++i) {
280 const auto neg_ptr = my_neg_f.data() + sanisizer::product_unsafe<std::size_t>(i, num_dim_);
281 if (my_options.leaf_approximation) {
282 my_parallel_buffer[i] = my_tree.compute_non_edge_forces_from_leaves(i, neg_ptr, my_leaf_workspace);
283 } else {
284 my_parallel_buffer[i] = my_tree.compute_non_edge_forces(i, my_options.theta, neg_ptr);
285 }
286 }
287 });
288
289 my_non_edge_sum = std::accumulate(my_parallel_buffer.begin(), my_parallel_buffer.end(), static_cast<Float_>(0));
290 return;
291 }
292
293 my_non_edge_sum = 0;
294 for (Index_ i = 0; i < num_obs; ++i) {
295 const auto neg_ptr = my_neg_f.data() + sanisizer::product_unsafe<std::size_t>(i, num_dim_);
296 if (my_options.leaf_approximation) {
297 my_non_edge_sum += my_tree.compute_non_edge_forces_from_leaves(i, neg_ptr, my_leaf_workspace);
298 } else {
299 my_non_edge_sum += my_tree.compute_non_edge_forces(i, my_options.theta, neg_ptr);
300 }
301 }
302 }
303
304public:
317 Float_ cost(const Float_* const Y) {
318 my_tree.set(Y);
319 std::fill(my_neg_f.begin(), my_neg_f.end(), 0);
320 compute_non_edge_forces();
321
322 const Index_ num_obs = num_observations();
323 Float_ total = 0;
324 for (Index_ i = 0; i < num_obs; ++i) {
325 const auto& cur_neighbors = my_neighbors[i];
326 const auto self = Y + sanisizer::product_unsafe<std::size_t>(i, num_dim_);
327
328 for (const auto& x : cur_neighbors) {
329 const auto neighbor = Y + sanisizer::product_unsafe<std::size_t>(x.first, num_dim_);
330 Float_ sqdist = 0;
331 for (std::size_t d = 0; d < num_dim_; ++d) {
332 const Float_ delta = self[d] - neighbor[d];
333 sqdist += delta * delta;
334 }
335
336 const Float_ qprob = (static_cast<Float_>(1) / (static_cast<Float_>(1) + sqdist)) / my_non_edge_sum;
337 constexpr Float_ lim = std::numeric_limits<Float_>::min();
338 total += x.second * std::log(std::max(lim, x.second) / std::max(lim, qprob));
339 }
340 }
341
342 return total;
343 }
344};
345
346}
347
348#endif
Options for the t-SNE algorithm.
Status of the t-SNE iterations.
Definition Status.hpp:65
void run(Float_ *const Y, const int limit)
Definition Status.hpp:151
int iteration() const
Definition Status.hpp:109
Float_ cost(const Float_ *const Y)
Definition Status.hpp:317
Index_ num_observations() const
Definition Status.hpp:123
void run(Float_ *const Y)
Definition Status.hpp:175
int max_iterations() const
Definition Status.hpp:116
Quick and dirty t-SNE.
knncolle::NeighborList< Index_, Float_ > NeighborList
Lists of neighbors for each observation.
Definition utils.hpp:37
void parallelize(const int num_workers, const Task_ num_tasks, Run_ run_task_range)
Definition utils.hpp:124
Options for initialize().
Definition Options.hpp:14
int max_depth
Definition Options.hpp:105
int momentum_switch_iterations
Definition Options.hpp:73
int early_exaggeration_iterations
Definition Options.hpp:58
int max_iterations
Definition Options.hpp:50
bool leaf_approximation
Definition Options.hpp:117
double eta
Definition Options.hpp:91
double final_momentum
Definition Options.hpp:85
double exaggeration_factor
Definition Options.hpp:64
int num_threads
Definition Options.hpp:124
double theta
Definition Options.hpp:44
Utilities for running t-SNE.