71 my_neighbors(std::move(neighbors)),
72 my_tree(sanisizer::cast<internal::SPTreeIndex>(my_neighbors.size()), options.
max_depth),
73 my_options(std::move(options))
75 const auto nobs = sanisizer::cast<Index_>(my_neighbors.size());
76 const std::size_t buffer_size = sanisizer::product<std::size_t>(nobs, num_dim_);
78 sanisizer::resize(my_dY, buffer_size);
79 sanisizer::resize(my_uY, buffer_size);
80 sanisizer::resize(my_gains, buffer_size,
static_cast<Float_
>(1));
81 sanisizer::resize(my_pos_f, buffer_size);
82 sanisizer::resize(my_neg_f, buffer_size);
85 sanisizer::resize(my_parallel_buffer, nobs);
94 std::vector<Float_> my_dY, my_uY, my_gains, my_pos_f, my_neg_f;
96 internal::SPTree<num_dim_, Float_> my_tree;
97 std::vector<Float_> my_parallel_buffer;
102 typename decltype(I(my_tree))::LeafApproxWorkspace my_leaf_workspace;
123 return my_neighbors.size();
130 const auto& get_neighbors()
const {
150 void run(Float_*
const Y,
const int limit) {
154 for(; my_iter < limit; ++my_iter) {
161 iterate(Y, multiplier, momentum);
174 void run(Float_*
const Y) {
179 static Float_ sign(
const Float_ x) {
180 constexpr Float_ zero = 0;
181 constexpr Float_ one = 1;
182 return (x == zero ? zero : (x < zero ? -one : one));
185 void iterate(Float_*
const Y,
const Float_ multiplier,
const Float_ momentum) {
186 compute_gradient(Y, multiplier);
189 const auto buffer_size = my_gains.size();
190 for (
decltype(I(buffer_size)) i = 0; i < buffer_size; ++i) {
191 Float_& g = my_gains[i];
192 constexpr Float_ lower_bound = 0.01;
193 constexpr Float_ to_add = 0.2;
194 constexpr Float_ to_mult = 0.8;
195 g = std::max(lower_bound, sign(my_dY[i]) != sign(my_uY[i]) ? (g + to_add) : (g * to_mult));
199 for (
decltype(I(buffer_size)) i = 0; i < buffer_size; ++i) {
200 my_uY[i] = momentum * my_uY[i] - my_options.
eta * my_gains[i] * my_dY[i];
205 std::array<Float_, num_dim_> means{};
207 for (Index_ i = 0; i < num_obs; ++i) {
208 for (std::size_t d = 0; d < num_dim_; ++d) {
209 means[d] += Y[sanisizer::nd_offset<std::size_t>(d, num_dim_, i)];
212 for (std::size_t d = 0; d < num_dim_; ++d) {
215 for (Index_ i = 0; i < num_obs; ++i) {
216 for (std::size_t d = 0; d < num_dim_; ++d) {
217 Y[sanisizer::nd_offset<std::size_t>(d, num_dim_, i)] -= means[d];
225 void compute_gradient(
const Float_*
const Y,
const Float_ multiplier) {
227 compute_edge_forces(Y, multiplier);
229 std::fill(my_neg_f.begin(), my_neg_f.end(), 0);
230 const Float_ sum_Q = compute_non_edge_forces();
232 const auto buffer_size = my_dY.size();
233 for (
decltype(I(buffer_size)) i = 0; i < buffer_size; ++i) {
234 my_dY[i] = my_pos_f[i] - (my_neg_f[i] / sum_Q);
238 void compute_edge_forces(
const Float_*
const Y, Float_ multiplier) {
239 std::fill(my_pos_f.begin(), my_pos_f.end(), 0);
242 parallelize(my_options.
num_threads, num_obs, [&](
const int,
const Index_ start,
const Index_ length) ->
void {
243 for (Index_ i = start, end = start + length; i < end; ++i) {
244 const auto& current = my_neighbors[i];
245 const auto offset = sanisizer::product_unsafe<std::size_t>(i, num_dim_);
246 const auto self = Y + offset;
247 const auto pos_out = my_pos_f.data() + offset;
249 for (const auto& x : current) {
251 const auto neighbor = Y + sanisizer::product_unsafe<std::size_t>(x.first, num_dim_);
252 for (std::size_t d = 0; d < num_dim_; ++d) {
253 Float_ delta = self[d] - neighbor[d];
254 sqdist += delta * delta;
257 const Float_ mult = multiplier * x.second / (static_cast<Float_>(1) + sqdist);
258 for (std::size_t d = 0; d < num_dim_; ++d) {
259 pos_out[d] += mult * (self[d] - neighbor[d]);
268 Float_ compute_non_edge_forces() {
270 my_tree.compute_non_edge_forces_for_leaves(my_options.
theta, my_leaf_workspace, my_options.
num_threads);
273 const Index_ num_obs = num_observations();
277 parallelize(my_options.
num_threads, num_obs, [&](
const int,
const Index_ start,
const Index_ length) ->
void {
278 for (Index_ i = start, end = start + length; i < end; ++i) {
279 const auto neg_ptr = my_neg_f.data() + sanisizer::product_unsafe<std::size_t>(i, num_dim_);
280 if (my_options.leaf_approximation) {
281 my_parallel_buffer[i] = my_tree.compute_non_edge_forces_from_leaves(i, neg_ptr, my_leaf_workspace);
283 my_parallel_buffer[i] = my_tree.compute_non_edge_forces(i, my_options.theta, neg_ptr);
287 return std::accumulate(my_parallel_buffer.begin(), my_parallel_buffer.end(),
static_cast<Float_
>(0));
291 for (Index_ i = 0; i < num_obs; ++i) {
292 const auto neg_ptr = my_neg_f.data() + sanisizer::product_unsafe<std::size_t>(i, num_dim_);
294 sum_Q += my_tree.compute_non_edge_forces_from_leaves(i, neg_ptr, my_leaf_workspace);
296 sum_Q += my_tree.compute_non_edge_forces(i, my_options.
theta, neg_ptr);