1#ifndef WEIGHTEDLOWESS_WINDOW_HPP
2#define WEIGHTEDLOWESS_WINDOW_HPP
11#include "sanisizer/sanisizer.hpp"
45template<
typename Data_>
46Data_ derive_delta(
const std::size_t num_anchors,
const std::size_t num_points,
const Data_*
const x) {
47 assert(num_points > 0);
49 const auto points_m1 = num_points - 1;
50 auto diffs = sanisizer::create<std::vector<Data_> >(points_m1);
51 for (I<
decltype(points_m1)> i = 0; i < points_m1; ++i) {
52 diffs[i] = x[i + 1] - x[i];
55 std::sort(diffs.begin(), diffs.end());
56 for (I<
decltype(points_m1)> i = 1; i < points_m1; ++i) {
57 diffs[i] += diffs[i-1];
60 Data_ lowest_delta = diffs.back();
61 if (num_anchors > 1) {
62 auto max_skips = sanisizer::min(num_anchors - 1, points_m1);
63 for (I<
decltype(max_skips)> nskips = 0; nskips < max_skips; ++nskips) {
64 Data_ candidate_delta = diffs[points_m1 - nskips - 1] / (num_anchors - nskips);
65 lowest_delta = std::min(candidate_delta, lowest_delta);
81template<
typename Data_>
82void find_anchors(
const std::size_t num_points,
const Data_* x, Data_ delta, std::vector<std::size_t>& anchors) {
83 assert(num_points > 0);
87 std::size_t last_pt = 0;
88 const std::size_t points_m1 = num_points - 1;
89 for (I<
decltype(points_m1)> pt = 1; pt < points_m1; ++pt) {
90 if (x[pt] - x[last_pt] > delta) {
91 anchors.push_back(pt);
96 anchors.push_back(points_m1);
100template<
typename Data_>
102 std::size_t left, right;
116template<
typename Data_>
117std::vector<Window<Data_> > find_limits(
118 const std::vector<std::size_t>& anchors,
119 const Data_ span_weight,
120 const std::size_t num_points,
121 const Data_*
const x,
122 const Data_*
const weights,
123 const Data_ min_width,
126 const auto nanchors = anchors.size();
127 auto limits = sanisizer::create<std::vector<Window<Data_> > >(nanchors);
128 const auto half_min_width = min_width / 2;
130 assert(num_points > 0);
131 const auto points_m1 = num_points - 1;
133 parallelize(nthreads, nanchors, [&](
const int,
const I<
decltype(nanchors)> start,
const I<
decltype(nanchors)> length) {
134 for (I<
decltype(start)> s = start, end = start + length; s < end; ++s) {
135 const auto curpt = anchors[s];
136 const auto curx = x[curpt];
137 auto left = curpt, right = curpt;
138 Data_ curw = (weights == NULL ? 1 : weights[curpt]);
142 if (curpt > 0 && curpt < points_m1) {
143 auto next_ldist = curx - x[left - 1];
144 auto next_rdist = x[right + 1] - curx;
146 while (curw < span_weight) {
147 if (next_ldist < next_rdist) {
149 curw += (weights == NULL ? 1 : weights[left]);
153 next_ldist = curx - x[left - 1];
155 }
else if (next_ldist > next_rdist) {
157 curw += (weights == NULL ? 1 : weights[right]);
158 if (right == points_m1) {
161 next_rdist = x[right + 1] - curx;
169 curw += (weights == NULL ? 2 : weights[left] + weights[right]);
170 if (left == 0 || right == points_m1) {
173 next_ldist = curx - x[left - 1];
174 next_rdist = x[right + 1] - curx;
180 while (left > 0 && curw < span_weight) {
182 curw += (weights == NULL ? 1 : weights[left]);
185 while (right < points_m1 && curw < span_weight) {
187 curw += (weights == NULL ? 1 : weights[right]);
191 while (left > 0 && x[left] == x[left - 1]) {
195 while (right < points_m1 && x[right] == x[right + 1]) {
202 auto mdist = std::max(curx - x[left], x[right] - curx);
203 if (mdist < half_min_width) {
204 left = std::lower_bound(x, x + left, curx - half_min_width) - x;
213 right = std::upper_bound(x + right + 1, x + num_points, curx + half_min_width) - x;
216 mdist = std::max(curx - x[left], x[right] - curx);
219 limits[s].left = left;
220 limits[s].right = right;
221 limits[s].distance = mdist;
240template<
typename Data_>
245 std::vector<std::size_t> anchors;
246 const Data_* freq_weights = NULL;
247 Data_ total_weight = 0;
248 std::vector<internal::Window<Data_> > limits;
273template<
typename Data_>
276 if (num_points == 0) {
280 if (!std::is_sorted(x, x + num_points)) {
281 throw std::runtime_error(
"'x' should be sorted");
285 std::optional<Data_> delta = opt.
delta;
286 if (delta.has_value() && *delta < 0) {
291 auto& anchors = output.anchors;
292 if (delta.has_value()) {
294 sanisizer::resize(anchors, num_points);
295 std::iota(anchors.begin(), anchors.end(),
static_cast<std::size_t
>(0));
297 internal::find_anchors(num_points, x, *delta, anchors);
300 if (opt.
anchors >= num_points) {
301 sanisizer::resize(anchors, num_points);
302 std::iota(anchors.begin(), anchors.end(),
static_cast<std::size_t
>(0));
304 Data_ eff_delta = internal::derive_delta(opt.
anchors, num_points, x);
305 internal::find_anchors(num_points, x, eff_delta, anchors);
311 output.total_weight = (output.freq_weights != NULL ? std::accumulate(output.freq_weights, output.freq_weights + num_points,
static_cast<Data_
>(0)) : num_points);
313 output.limits = internal::find_limits(anchors, span_weight, num_points, x, output.freq_weights, opt.
minimum_width, opt.
num_threads);
Namespace for LOWESS functions.
Definition compute.hpp:18
PrecomputedWindows< Data_ > define_windows(const std::size_t num_points, const Data_ *const x, const Options< Data_ > &opt)
Definition window.hpp:274
int parallelize(const int num_workers, const Task_ num_tasks, Run_ run_task_range)
Definition parallelize.hpp:32
Definitions for parallelization.
Options for compute().
Definition Options.hpp:20
const Data_ * weights
Definition Options.hpp:83
bool frequency_weights
Definition Options.hpp:90
std::size_t anchors
Definition Options.hpp:60
Data_ span
Definition Options.hpp:31
Data_ minimum_width
Definition Options.hpp:45
bool span_as_proportion
Definition Options.hpp:38
std::optional< Data_ > delta
Definition Options.hpp:76
int num_threads
Definition Options.hpp:97
Precomputed windows for LOWESS smoothing.
Definition window.hpp:241