70void compute(
const Index_ num_obs,
const Partition_* partition,
const Index_ target,
const Options& options, std::vector<Index_>& output) {
71 if (target >= num_obs) {
72 sanisizer::resize(output, num_obs);
73 std::iota(output.begin(), output.end(),
static_cast<Index_
>(0));
78 const Partition_ num_partitions = sanisizer::sum<Partition_>(*std::max_element(partition, partition + num_obs), 1);
80 auto partition_count = sanisizer::create<std::vector<Index_> >(num_partitions);
81 for (Index_ o = 0; o < num_obs; ++o) {
82 const auto part = partition[o];
83 partition_count[part] += 1;
86 std::mt19937_64 rng(options.
seed);
96 auto to_sample = sanisizer::create<std::vector<Index_> >(num_partitions);
98 std::vector<std::pair<double, Partition_> > probabilities;
99 probabilities.reserve(num_partitions);
101 const double ratio =
static_cast<double>(target) /
static_cast<double>(num_obs);
102 for (Partition_ p = 0; p < num_partitions; ++p) {
103 const double expected =
static_cast<double>(partition_count[p]) * ratio;
110 const double minimum = std::floor(expected);
111 to_sample[p] = minimum;
112 if (expected > minimum) {
113 probabilities.emplace_back(expected - minimum, p);
118 const Index_ already_used = std::accumulate(to_sample.begin(), to_sample.end(),
static_cast<Index_
>(0));
120 if (already_used < target) {
131 for (
auto& prob : probabilities) {
132 prob.first = - std::log(aarand::standard_uniform(rng)) / prob.first;
135 const Index_ leftovers = target - already_used;
136 std::nth_element(probabilities.begin(), probabilities.begin() + leftovers, probabilities.end());
138 for (Index_ i = 0; i < leftovers; ++i) {
139 ++to_sample[probabilities[i].second];
146 for (Index_ i = 0; i < num_obs; ++i) {
147 const auto part = partition[i];
148 auto& needed = to_sample[part];
153 auto& available = partition_count[part];
154 if (available <= needed || aarand::standard_uniform(rng) *
static_cast<double>(available) <=
static_cast<double>(needed)) {
void compute(const Index_ num_obs, const Partition_ *partition, const Index_ target, const Options &options, std::vector< Index_ > &output)
Definition partisub.hpp:70