summaryrefslogtreecommitdiff
path: root/kernel/hashlib.h
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/hashlib.h')
-rw-r--r--kernel/hashlib.h774
1 files changed, 774 insertions, 0 deletions
diff --git a/kernel/hashlib.h b/kernel/hashlib.h
new file mode 100644
index 00000000..a8d32086
--- /dev/null
+++ b/kernel/hashlib.h
@@ -0,0 +1,774 @@
+// This is free and unencumbered software released into the public domain.
+//
+// Anyone is free to copy, modify, publish, use, compile, sell, or
+// distribute this software, either in source code form or as a compiled
+// binary, for any purpose, commercial or non-commercial, and by any
+// means.
+
+// -------------------------------------------------------
+// Written by Clifford Wolf <clifford@clifford.at> in 2014
+// -------------------------------------------------------
+
+#ifndef HASHLIB_H
+
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+namespace hashlib {
+
+#define HASHLIB_SIZE_FACTOR 3
+
+// The XOR version of DJB2
+// (traditionally 5381 is used as starting value for the djb2 hash)
+inline unsigned int mkhash(unsigned int a, unsigned int b) {
+ return ((a << 5) + a) ^ b;
+}
+
+// The ADD version of DJB2
+// (use this version for cache locality in b)
+inline unsigned int mkhash_add(unsigned int a, unsigned int b) {
+ return ((a << 5) + a) + b;
+}
+
+template<typename T> struct hash_ops {
+ bool cmp(const T &a, const T &b) const {
+ return a == b;
+ }
+ unsigned int hash(const T &a) const {
+ return a.hash();
+ }
+};
+
+template<> struct hash_ops<int> {
+ template<typename T>
+ bool cmp(T a, T b) const {
+ return a == b;
+ }
+ unsigned int hash(unsigned int a) const {
+ return a;
+ }
+};
+
+template<> struct hash_ops<std::string> {
+ bool cmp(const std::string &a, const std::string &b) const {
+ return a == b;
+ }
+ unsigned int hash(const std::string &a) const {
+ unsigned int v = 0;
+ for (auto c : a)
+ v = mkhash(v, c);
+ return v;
+ }
+};
+
+struct hash_cstr_ops {
+ bool cmp(const char *a, const char *b) const {
+ for (int i = 0; a[i] || b[i]; i++)
+ if (a[i] != b[i])
+ return false;
+ return true;
+ }
+ unsigned int hash(const char *a) const {
+ unsigned int hash = 5381;
+ while (*a)
+ hash = mkhash(hash, *(a++));
+ return hash;
+ }
+};
+
+struct hash_ptr_ops {
+ bool cmp(const void *a, const void *b) const {
+ return a == b;
+ }
+ unsigned int hash(const void *a) const {
+ return (unsigned long)a;
+ }
+};
+
+struct hash_obj_ops {
+ bool cmp(const void *a, const void *b) const {
+ return a == b;
+ }
+ template<typename T>
+ unsigned int hash(const T *a) const {
+ return a->hash();
+ }
+};
+
+inline int hashtable_size(int old_size)
+{
+ // prime numbers, approx. in powers of two
+ if (old_size < 53) return 53;
+ if (old_size < 113) return 113;
+ if (old_size < 251) return 251;
+ if (old_size < 503) return 503;
+ if (old_size < 1129) return 1129;
+ if (old_size < 2503) return 2503;
+ if (old_size < 5023) return 5023;
+ if (old_size < 11299) return 11299;
+ if (old_size < 25097) return 25097;
+ if (old_size < 50291) return 50291;
+ if (old_size < 112997) return 112997;
+ if (old_size < 251003) return 251003;
+ if (old_size < 503003) return 503003;
+ if (old_size < 1129991) return 1129991;
+ if (old_size < 2509993) return 2509993;
+ if (old_size < 5029991) return 5029991;
+ if (old_size < 11299997) return 11299997;
+ if (old_size < 25099999) return 25099999;
+ if (old_size < 50299999) return 50299999;
+ if (old_size < 113000009) return 113000009;
+ if (old_size < 250999999) return 250999999;
+ if (old_size < 503000009) return 503000009;
+ if (old_size < 1129999999) return 1129999999;
+ throw std::length_error("hash table exceeded maximum size");
+}
+
+template<typename K, typename T, typename OPS = hash_ops<K>>
+class dict
+{
+ struct entry_t
+ {
+ int link;
+ std::pair<K, T> udata;
+
+ entry_t() : link(-1) { }
+ entry_t(const std::pair<K, T> &udata) : link(1), udata(udata) { }
+
+ bool is_free() const { return link < 0; }
+ int get_next() const { return (link > 0 ? link : -link) - 2; }
+ bool get_last() const { return get_next() == -1; }
+ void set_next_used(int next) { link = next + 2; }
+ void set_next_free(int next) { link = -(next + 2); }
+ };
+
+ std::vector<int> hashtable;
+ std::vector<entry_t> entries;
+ int free_list, counter;
+ OPS ops;
+
+ void init()
+ {
+ free_list = -1;
+ counter = 0;
+ }
+
+ void init_from(const dict<K, T, OPS> &other)
+ {
+ hashtable.clear();
+ entries.clear();
+
+ counter = other.size();
+ int new_size = hashtable_size(HASHLIB_SIZE_FACTOR * counter);
+ hashtable.resize(new_size);
+ new_size = new_size / HASHLIB_SIZE_FACTOR + 1;
+ entries.reserve(new_size);
+
+ for (auto &it : other)
+ entries.push_back(entry_t(it));
+ entries.resize(new_size);
+ rehash();
+ }
+
+ int mkhash(const K &key) const
+ {
+ unsigned int hash = 0;
+ if (!hashtable.empty())
+ hash = ops.hash(key) % (unsigned int)(hashtable.size());
+ return hash;
+ }
+
+ void rehash()
+ {
+ free_list = -1;
+
+ for (auto &h : hashtable)
+ h = -1;
+
+ for (int i = 0; i < int(entries.size()); i++)
+ if (entries[i].is_free()) {
+ entries[i].set_next_free(free_list);
+ free_list = i;
+ } else {
+ int hash = mkhash(entries[i].udata.first);
+ entries[i].set_next_used(hashtable[hash]);
+ hashtable[hash] = i;
+ }
+ }
+
+ void do_erase(const K &key, int hash)
+ {
+ int last_index = -1;
+ int index = hashtable.empty() ? -1 : hashtable[hash];
+ while (1) {
+ if (index < 0)
+ return;
+ if (ops.cmp(entries[index].udata.first, key)) {
+ if (last_index < 0)
+ hashtable[hash] = entries[index].get_next();
+ else
+ entries[last_index].set_next_used(entries[index].get_next());
+ entries[index].udata = std::pair<K, T>();
+ entries[index].set_next_free(free_list);
+ free_list = index;
+ if (--counter == 0)
+ init();
+ return;
+ }
+ last_index = index;
+ index = entries[index].get_next();
+ }
+ }
+
+ int lookup_index(const K &key, int hash) const
+ {
+ int index = hashtable.empty() ? -1 : hashtable[hash];
+ while (1) {
+ if (index < 0)
+ return -1;
+ if (ops.cmp(entries[index].udata.first, key))
+ return index;
+ index = entries[index].get_next();
+ }
+ }
+
+ int insert_at(const std::pair<K, T> &value, int hash)
+ {
+ if (free_list < 0)
+ {
+ int i = entries.size();
+ int new_size = hashtable_size(HASHLIB_SIZE_FACTOR * entries.size());
+ hashtable.resize(new_size);
+ entries.resize(new_size / HASHLIB_SIZE_FACTOR + 1);
+ entries[i].udata = value;
+ entries[i].set_next_used(0);
+ counter++;
+ rehash();
+ return i;
+ }
+
+ int i = free_list;
+ free_list = entries[i].get_next();
+ entries[i].udata = value;
+ entries[i].set_next_used(hashtable[hash]);
+ hashtable[hash] = i;
+ counter++;
+ return i;
+ }
+
+public:
+ class iterator
+ {
+ dict<K, T, OPS> *ptr;
+ int index;
+ public:
+ iterator() { }
+ iterator(dict<K, T, OPS> *ptr, int index) : ptr(ptr), index(index) { }
+ iterator operator++() { do index++; while (index != int(ptr->entries.size()) && ptr->entries[index].is_free()); return *this; }
+ iterator operator--() { do index--; while (index != 0 && ptr->entries[index].is_free()); return *this; }
+ bool operator==(const iterator &other) const { return index == other.index; }
+ bool operator!=(const iterator &other) const { return index != other.index; }
+ std::pair<K, T> &operator*() { return ptr->entries[index].udata; }
+ std::pair<K, T> *operator->() { return &ptr->entries[index].udata; }
+ const std::pair<K, T> &operator*() const { return ptr->entries[index].udata; }
+ const std::pair<K, T> *operator->() const { return &ptr->entries[index].udata; }
+ };
+
+ class const_iterator
+ {
+ const dict<K, T, OPS> *ptr;
+ int index;
+ public:
+ const_iterator() { }
+ const_iterator(const dict<K, T, OPS> *ptr, int index) : ptr(ptr), index(index) { }
+ const_iterator operator++() { do index++; while (index != int(ptr->entries.size()) && ptr->entries[index].is_free()); return *this; }
+ const_iterator operator--() { do index--; while (index != 0 && ptr->entries[index].is_free()); return *this; }
+ bool operator==(const const_iterator &other) const { return index == other.index; }
+ bool operator!=(const const_iterator &other) const { return index != other.index; }
+ const std::pair<K, T> &operator*() const { return ptr->entries[index].udata; }
+ const std::pair<K, T> *operator->() const { return &ptr->entries[index].udata; }
+ };
+
+ dict()
+ {
+ init();
+ }
+
+ dict(const dict<K, T, OPS> &other)
+ {
+ init_from(other);
+ }
+
+ dict(dict<K, T, OPS> &&other)
+ {
+ free_list = -1;
+ counter = 0;
+ swap(other);
+ }
+
+ dict<K, T, OPS> &operator=(const dict<K, T, OPS> &other) {
+ clear();
+ init_from(other);
+ return *this;
+ }
+
+ dict<K, T, OPS> &operator=(dict<K, T, OPS> &&other) {
+ clear();
+ swap(other);
+ return *this;
+ }
+
+ dict(const std::initializer_list<std::pair<K, T>> &list)
+ {
+ init();
+ for (auto &it : list)
+ insert(it);
+ }
+
+ template<class InputIterator>
+ dict(InputIterator first, InputIterator last)
+ {
+ init();
+ insert(first, last);
+ }
+
+ template<class InputIterator>
+ void insert(InputIterator first, InputIterator last)
+ {
+ for (; first != last; ++first)
+ insert(*first);
+ }
+
+ std::pair<iterator, bool> insert(const std::pair<K, T> &value)
+ {
+ int hash = mkhash(value.first);
+ int i = lookup_index(value.first, hash);
+ if (i >= 0)
+ return std::pair<iterator, bool>(iterator(this, i), false);
+ i = insert_at(value, hash);
+ return std::pair<iterator, bool>(iterator(this, i), true);
+ }
+
+ void erase(const K &key)
+ {
+ int hash = mkhash(key);
+ do_erase(key, hash);
+ }
+
+ void erase(const iterator it)
+ {
+ int hash = mkhash(it->first);
+ do_erase(it->first, hash);
+ }
+
+ int count(const K &key) const
+ {
+ int hash = mkhash(key);
+ int i = lookup_index(key, hash);
+ return i < 0 ? 0 : 1;
+ }
+
+ iterator find(const K &key)
+ {
+ int hash = mkhash(key);
+ int i = lookup_index(key, hash);
+ if (i < 0)
+ return end();
+ return iterator(this, i);
+ }
+
+ const_iterator find(const K &key) const
+ {
+ int hash = mkhash(key);
+ int i = lookup_index(key, hash);
+ if (i < 0)
+ return end();
+ return const_iterator(this, i);
+ }
+
+ T& at(const K &key)
+ {
+ int hash = mkhash(key);
+ int i = lookup_index(key, hash);
+ if (i < 0)
+ throw std::out_of_range("dict::at()");
+ return entries[i].udata.second;
+ }
+
+ const T& at(const K &key) const
+ {
+ int hash = mkhash(key);
+ int i = lookup_index(key, hash);
+ if (i < 0)
+ throw std::out_of_range("dict::at()");
+ return entries[i].udata.second;
+ }
+
+ T& operator[](const K &key)
+ {
+ int hash = mkhash(key);
+ int i = lookup_index(key, hash);
+ if (i < 0)
+ i = insert_at(std::pair<K, T>(key, T()), hash);
+ return entries[i].udata.second;
+ }
+
+ void swap(dict<K, T, OPS> &other)
+ {
+ hashtable.swap(other.hashtable);
+ entries.swap(other.entries);
+ std::swap(free_list, other.free_list);
+ std::swap(counter, other.counter);
+ }
+
+ bool operator==(const dict<K, T, OPS> &other) const {
+ if (counter != other.counter)
+ return false;
+ if (counter == 0)
+ return true;
+ if (entries.size() < other.entries.size())
+ for (auto &it : *this) {
+ auto oit = other.find(it.first);
+ if (oit == other.end() || oit->second != it.second)
+ return false;
+ }
+ else
+ for (auto &oit : other) {
+ auto it = find(oit.first);
+ if (it == end() || it->second != oit.second)
+ return false;
+ }
+ return true;
+ }
+
+ bool operator!=(const dict<K, T, OPS> &other) const {
+ return !(*this == other);
+ }
+
+ size_t size() const { return counter; }
+ bool empty() const { return counter == 0; }
+ void clear() { hashtable.clear(); entries.clear(); init(); }
+
+ iterator begin() { int index = 0; while (index != int(entries.size()) && entries[index].is_free()) index++; return iterator(this, index); }
+ iterator end() { return iterator(this, entries.size()); }
+
+ const_iterator begin() const { int index = 0; while (index != int(entries.size()) && entries[index].is_free()) index++; return const_iterator(this, index); }
+ const_iterator end() const { return const_iterator(this, entries.size()); }
+};
+
+template<typename K, typename OPS = hash_ops<K>>
+class pool
+{
+ struct entry_t
+ {
+ int link;
+ K key;
+
+ entry_t() : link(-1) { }
+ entry_t(const K &key) : link(1), key(key) { }
+
+ bool is_free() const { return link < 0; }
+ int get_next() const { return (link > 0 ? link : -link) - 2; }
+ bool get_last() const { return get_next() == -1; }
+ void set_next_used(int next) { link = next + 2; }
+ void set_next_free(int next) { link = -(next + 2); }
+ };
+
+ std::vector<int> hashtable;
+ std::vector<entry_t> entries;
+ int free_list, counter;
+ OPS ops;
+
+ void init()
+ {
+ free_list = -1;
+ counter = 0;
+ }
+
+ void init_from(const pool<K, OPS> &other)
+ {
+ hashtable.clear();
+ entries.clear();
+
+ counter = other.size();
+ int new_size = hashtable_size(HASHLIB_SIZE_FACTOR * counter);
+ hashtable.resize(new_size);
+ new_size = new_size / HASHLIB_SIZE_FACTOR + 1;
+ entries.reserve(new_size);
+
+ for (auto &it : other)
+ entries.push_back(entry_t(it));
+ entries.resize(new_size);
+ rehash();
+ }
+
+ int mkhash(const K &key) const
+ {
+ unsigned int hash = 0;
+ if (!hashtable.empty())
+ hash = ops.hash(key) % (unsigned int)(hashtable.size());
+ return hash;
+ }
+
+ void rehash()
+ {
+ free_list = -1;
+
+ for (auto &h : hashtable)
+ h = -1;
+
+ for (int i = 0; i < int(entries.size()); i++)
+ if (entries[i].is_free()) {
+ entries[i].set_next_free(free_list);
+ free_list = i;
+ } else {
+ int hash = mkhash(entries[i].key);
+ entries[i].set_next_used(hashtable[hash]);
+ hashtable[hash] = i;
+ }
+ }
+
+ void do_erase(const K &key, int hash)
+ {
+ int last_index = -1;
+ int index = hashtable.empty() ? -1 : hashtable[hash];
+ while (1) {
+ if (index < 0)
+ return;
+ if (ops.cmp(entries[index].key, key)) {
+ if (last_index < 0)
+ hashtable[hash] = entries[index].get_next();
+ else
+ entries[last_index].set_next_used(entries[index].get_next());
+ entries[index].key = K();
+ entries[index].set_next_free(free_list);
+ free_list = index;
+ if (--counter == 0)
+ init();
+ return;
+ }
+ last_index = index;
+ index = entries[index].get_next();
+ }
+ }
+
+ int lookup_index(const K &key, int hash) const
+ {
+ int index = hashtable.empty() ? -1 : hashtable[hash];
+ while (1) {
+ if (index < 0)
+ return -1;
+ if (ops.cmp(entries[index].key, key))
+ return index;
+ index = entries[index].get_next();
+ }
+ }
+
+ int insert_at(const K &key, int hash)
+ {
+ if (free_list < 0)
+ {
+ int i = entries.size();
+ int new_size = hashtable_size(HASHLIB_SIZE_FACTOR * entries.size());
+ hashtable.resize(new_size);
+ entries.resize(new_size / HASHLIB_SIZE_FACTOR + 1);
+ entries[i].key = key;
+ entries[i].set_next_used(0);
+ counter++;
+ rehash();
+ return i;
+ }
+
+ int i = free_list;
+ free_list = entries[i].get_next();
+ entries[i].key = key;
+ entries[i].set_next_used(hashtable[hash]);
+ hashtable[hash] = i;
+ counter++;
+ return i;
+ }
+
+public:
+ class iterator
+ {
+ pool<K, OPS> *ptr;
+ int index;
+ public:
+ iterator() { }
+ iterator(pool<K, OPS> *ptr, int index) : ptr(ptr), index(index) { }
+ iterator operator++() { do index++; while (index != int(ptr->entries.size()) && ptr->entries[index].is_free()); return *this; }
+ iterator operator--() { do index--; while (index != 0 && ptr->entries[index].is_free()); return *this; }
+ bool operator==(const iterator &other) const { return index == other.index; }
+ bool operator!=(const iterator &other) const { return index != other.index; }
+ K &operator*() { return ptr->entries[index].key; }
+ K *operator->() { return &ptr->entries[index].key; }
+ const K &operator*() const { return ptr->entries[index].key; }
+ const K *operator->() const { return &ptr->entries[index].key; }
+ };
+
+ class const_iterator
+ {
+ const pool<K, OPS> *ptr;
+ int index;
+ public:
+ const_iterator() { }
+ const_iterator(const pool<K, OPS> *ptr, int index) : ptr(ptr), index(index) { }
+ const_iterator operator++() { do index++; while (index != int(ptr->entries.size()) && ptr->entries[index].is_free()); return *this; }
+ const_iterator operator--() { do index--; while (index != 0 && ptr->entries[index].is_free()); return *this; }
+ bool operator==(const const_iterator &other) const { return index == other.index; }
+ bool operator!=(const const_iterator &other) const { return index != other.index; }
+ const K &operator*() const { return ptr->entries[index].key; }
+ const K *operator->() const { return &ptr->entries[index].key; }
+ };
+
+ pool()
+ {
+ init();
+ }
+
+ pool(const pool<K, OPS> &other)
+ {
+ init_from(other);
+ }
+
+ pool(pool<K, OPS> &&other)
+ {
+ free_list = -1;
+ counter = 0;
+ swap(other);
+ }
+
+ pool<K, OPS> &operator=(const pool<K, OPS> &other) {
+ clear();
+ init_from(other);
+ return *this;
+ }
+
+ pool<K, OPS> &operator=(pool<K, OPS> &&other) {
+ clear();
+ swap(other);
+ return *this;
+ }
+
+ pool(const std::initializer_list<K> &list)
+ {
+ init();
+ for (auto &it : list)
+ insert(it);
+ }
+
+ template<class InputIterator>
+ pool(InputIterator first, InputIterator last)
+ {
+ init();
+ insert(first, last);
+ }
+
+ template<class InputIterator>
+ void insert(InputIterator first, InputIterator last)
+ {
+ for (; first != last; ++first)
+ insert(*first);
+ }
+
+ std::pair<iterator, bool> insert(const K &key)
+ {
+ int hash = mkhash(key);
+ int i = lookup_index(key, hash);
+ if (i >= 0)
+ return std::pair<iterator, bool>(iterator(this, i), false);
+ i = insert_at(key, hash);
+ return std::pair<iterator, bool>(iterator(this, i), true);
+ }
+
+ void erase(const K &key)
+ {
+ int hash = mkhash(key);
+ do_erase(key, hash);
+ }
+
+ void erase(const iterator it)
+ {
+ int hash = mkhash(*it);
+ do_erase(*it, hash);
+ }
+
+ int count(const K &key) const
+ {
+ int hash = mkhash(key);
+ int i = lookup_index(key, hash);
+ return i < 0 ? 0 : 1;
+ }
+
+ iterator find(const K &key)
+ {
+ int hash = mkhash(key);
+ int i = lookup_index(key, hash);
+ if (i < 0)
+ return end();
+ return iterator(this, i);
+ }
+
+ const_iterator find(const K &key) const
+ {
+ int hash = mkhash(key);
+ int i = lookup_index(key, hash);
+ if (i < 0)
+ return end();
+ return const_iterator(this, i);
+ }
+
+ bool operator[](const K &key) const
+ {
+ int hash = mkhash(key);
+ int i = lookup_index(key, hash);
+ return i >= 0;
+ }
+
+ void swap(pool<K, OPS> &other)
+ {
+ hashtable.swap(other.hashtable);
+ entries.swap(other.entries);
+ std::swap(free_list, other.free_list);
+ std::swap(counter, other.counter);
+ }
+
+ bool operator==(const pool<K, OPS> &other) const {
+ if (counter != other.counter)
+ return false;
+ if (counter == 0)
+ return true;
+ if (entries.size() < other.entries.size())
+ for (auto &it : *this) {
+ auto oit = other.find(it.first);
+ if (oit == other.end() || oit->second != it.second)
+ return false;
+ }
+ else
+ for (auto &oit : other) {
+ auto it = find(oit.first);
+ if (it == end() || it->second != oit.second)
+ return false;
+ }
+ return true;
+ }
+
+ bool operator!=(const pool<K, OPS> &other) const {
+ return !(*this == other);
+ }
+
+ size_t size() const { return counter; }
+ bool empty() const { return counter == 0; }
+ void clear() { hashtable.clear(); entries.clear(); init(); }
+
+ iterator begin() { int index = 0; while (index != int(entries.size()) && entries[index].is_free()) index++; return iterator(this, index); }
+ iterator end() { return iterator(this, entries.size()); }
+
+ const_iterator begin() const { int index = 0; while (index != int(entries.size()) && entries[index].is_free()) index++; return const_iterator(this, index); }
+ const_iterator end() const { return const_iterator(this, entries.size()); }
+};
+
+} /* namespace hashlib */
+
+#endif