factorize
Create factors from categorical variables
Loading...
Searching...
No Matches
create_factor.hpp
Go to the documentation of this file.
1#ifndef FACTORIZE_CLEAN_FACTOR_HPP
2#define FACTORIZE_CLEAN_FACTOR_HPP
3
4#include <unordered_map>
5#include <vector>
6#include <algorithm>
7#include <cstddef>
8
9#include "sanisizer/sanisizer.hpp"
10
11#include "utils.hpp"
12
18namespace factorize {
19
38template<typename Input_, typename Code_>
39std::vector<Input_> create_factor(const std::size_t n, const Input_* const input, Code_* const codes) {
40 auto unique = [&]{ // scoping this in an IIFE to release map memory sooner.
41 std::unordered_map<Input_, Code_> mapping;
42 for (I<decltype(n)> i = 0; i < n; ++i) {
43 const auto current = input[i];
44 const auto mIt = mapping.find(current);
45 if (mIt != mapping.end()) {
46 codes[i] = mIt->second;
47 } else {
48 Code_ alt = mapping.size();
49 mapping[current] = alt;
50 codes[i] = alt;
51 }
52 }
53 return std::vector<std::pair<Input_, Code_> >(mapping.begin(), mapping.end());
54 }();
55
56 // Remapping to a sorted set.
57 std::sort(unique.begin(), unique.end());
58 const auto nuniq = unique.size();
59 auto remapping = sanisizer::create<std::vector<Code_> >(nuniq);
60 auto output = sanisizer::create<std::vector<Input_> >(nuniq);
61 for (I<decltype(nuniq)> u = 0; u < nuniq; ++u) {
62 remapping[unique[u].second] = u;
63 output[u] = unique[u].first;
64 }
65
66 // Mapping each cell to its sorted factor.
67 for (I<decltype(n)> i = 0; i < n; ++i) {
68 codes[i] = remapping[codes[i]];
69 }
70
71 return output;
72}
73
74}
75
76#endif
Create factors from categorical variables.
Definition combine_to_factor.hpp:20
std::vector< Input_ > create_factor(const std::size_t n, const Input_ *const input, Code_ *const codes)
Definition create_factor.hpp:39