71 my_neighbors(std::move(neighbors)),
72 my_tree(sanisizer::cast<internal::SPTreeIndex>(my_neighbors.size()), options.
max_depth),
73 my_options(std::move(options))
75 const auto nobs = sanisizer::cast<Index_>(my_neighbors.size());
76 const std::size_t buffer_size = sanisizer::product<std::size_t>(nobs, num_dim_);
78 sanisizer::resize(my_dY, buffer_size);
79 sanisizer::resize(my_uY, buffer_size);
80 sanisizer::resize(my_gains, buffer_size,
static_cast<Float_
>(1));
81 sanisizer::resize(my_pos_f, buffer_size);
82 sanisizer::resize(my_neg_f, buffer_size);
85 sanisizer::resize(my_parallel_buffer, nobs);
94 std::vector<Float_> my_dY, my_uY, my_gains, my_pos_f, my_neg_f;
95 Float_ my_non_edge_sum = 0;
97 internal::SPTree<num_dim_, Float_> my_tree;
98 std::vector<Float_> my_parallel_buffer;
103 typename decltype(I(my_tree))::LeafApproxWorkspace my_leaf_workspace;
124 return my_neighbors.size();
131 const auto& get_neighbors()
const {
151 void run(Float_*
const Y,
const int limit) {
155 for(; my_iter < limit; ++my_iter) {
162 iterate(Y, multiplier, momentum);
175 void run(Float_*
const Y) {
180 static Float_ sign(
const Float_ x) {
181 constexpr Float_ zero = 0;
182 constexpr Float_ one = 1;
183 return (x == zero ? zero : (x < zero ? -one : one));
186 void iterate(Float_*
const Y,
const Float_ multiplier,
const Float_ momentum) {
187 compute_gradient(Y, multiplier);
190 const auto buffer_size = my_gains.size();
191 for (
decltype(I(buffer_size)) i = 0; i < buffer_size; ++i) {
192 Float_& g = my_gains[i];
193 constexpr Float_ lower_bound = 0.01;
194 constexpr Float_ to_add = 0.2;
195 constexpr Float_ to_mult = 0.8;
196 g = std::max(lower_bound, sign(my_dY[i]) != sign(my_uY[i]) ? (g + to_add) : (g * to_mult));
200 for (
decltype(I(buffer_size)) i = 0; i < buffer_size; ++i) {
201 my_uY[i] = momentum * my_uY[i] - my_options.
eta * my_gains[i] * my_dY[i];
206 std::array<Float_, num_dim_> means{};
208 for (Index_ i = 0; i < num_obs; ++i) {
209 for (std::size_t d = 0; d < num_dim_; ++d) {
210 means[d] += Y[sanisizer::nd_offset<std::size_t>(d, num_dim_, i)];
213 for (std::size_t d = 0; d < num_dim_; ++d) {
216 for (Index_ i = 0; i < num_obs; ++i) {
217 for (std::size_t d = 0; d < num_dim_; ++d) {
218 Y[sanisizer::nd_offset<std::size_t>(d, num_dim_, i)] -= means[d];
226 void compute_gradient(
const Float_*
const Y,
const Float_ multiplier) {
228 compute_edge_forces(Y, multiplier);
230 std::fill(my_neg_f.begin(), my_neg_f.end(), 0);
231 compute_non_edge_forces();
233 const auto buffer_size = my_dY.size();
234 for (
decltype(I(buffer_size)) i = 0; i < buffer_size; ++i) {
235 my_dY[i] = my_pos_f[i] - (my_neg_f[i] / my_non_edge_sum);
239 void compute_edge_forces(
const Float_*
const Y, Float_ multiplier) {
240 std::fill(my_pos_f.begin(), my_pos_f.end(), 0);
243 parallelize(my_options.
num_threads, num_obs, [&](
const int,
const Index_ start,
const Index_ length) ->
void {
244 for (Index_ i = start, end = start + length; i < end; ++i) {
245 const auto& current = my_neighbors[i];
246 const auto offset = sanisizer::product_unsafe<std::size_t>(i, num_dim_);
247 const auto self = Y + offset;
248 const auto pos_out = my_pos_f.data() + offset;
250 for (const auto& x : current) {
252 const auto neighbor = Y + sanisizer::product_unsafe<std::size_t>(x.first, num_dim_);
253 for (std::size_t d = 0; d < num_dim_; ++d) {
254 const Float_ delta = self[d] - neighbor[d];
255 sqdist += delta * delta;
258 const Float_ mult = multiplier * x.second / (static_cast<Float_>(1) + sqdist);
259 for (std::size_t d = 0; d < num_dim_; ++d) {
260 pos_out[d] += mult * (self[d] - neighbor[d]);
269 void compute_non_edge_forces() {
271 my_tree.compute_non_edge_forces_for_leaves(my_options.
theta, my_leaf_workspace, my_options.
num_threads);
274 const Index_ num_obs = num_observations();
278 parallelize(my_options.
num_threads, num_obs, [&](
const int,
const Index_ start,
const Index_ length) ->
void {
279 for (Index_ i = start, end = start + length; i < end; ++i) {
280 const auto neg_ptr = my_neg_f.data() + sanisizer::product_unsafe<std::size_t>(i, num_dim_);
281 if (my_options.leaf_approximation) {
282 my_parallel_buffer[i] = my_tree.compute_non_edge_forces_from_leaves(i, neg_ptr, my_leaf_workspace);
284 my_parallel_buffer[i] = my_tree.compute_non_edge_forces(i, my_options.theta, neg_ptr);
289 my_non_edge_sum = std::accumulate(my_parallel_buffer.begin(), my_parallel_buffer.end(),
static_cast<Float_
>(0));
294 for (Index_ i = 0; i < num_obs; ++i) {
295 const auto neg_ptr = my_neg_f.data() + sanisizer::product_unsafe<std::size_t>(i, num_dim_);
297 my_non_edge_sum += my_tree.compute_non_edge_forces_from_leaves(i, neg_ptr, my_leaf_workspace);
299 my_non_edge_sum += my_tree.compute_non_edge_forces(i, my_options.
theta, neg_ptr);
317 Float_
cost(
const Float_*
const Y) {
319 std::fill(my_neg_f.begin(), my_neg_f.end(), 0);
320 compute_non_edge_forces();
322 const Index_ num_obs = num_observations();
324 for (Index_ i = 0; i < num_obs; ++i) {
325 const auto& cur_neighbors = my_neighbors[i];
326 const auto self = Y + sanisizer::product_unsafe<std::size_t>(i, num_dim_);
328 for (
const auto& x : cur_neighbors) {
329 const auto neighbor = Y + sanisizer::product_unsafe<std::size_t>(x.first, num_dim_);
331 for (std::size_t d = 0; d < num_dim_; ++d) {
332 const Float_ delta = self[d] - neighbor[d];
333 sqdist += delta * delta;
336 const Float_ qprob = (
static_cast<Float_
>(1) / (
static_cast<Float_
>(1) + sqdist)) / my_non_edge_sum;
337 constexpr Float_ lim = std::numeric_limits<Float_>::min();
338 total += x.second * std::log(std::max(lim, x.second) / std::max(lim, qprob));