Based on this implementation it supports multiple hashes for better positive hit ratio. Can be initializes with size in bits and number of hashes to perform, like this:
bloom_filter bloom(128, 5);
As always, complete implementation on GitHub: bloom.hpp.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
#pragma once #include <vector> #include <random> #include <functional> #include <cstddef> #include <cassert> namespace { template<typename Key, typename Hash = std::hash<Key>> class mixer { public: mixer(std::size_t size, const Key& key) : m_size(size), m_random(Hash{}(key)) { assert(size > 0); } std::size_t operator()() { return m_random() % m_size; } private: std::size_t m_size; std::mt19937 m_random; }; } template <typename Key, typename Hash = std::hash<Key>> class bloom_filter { public: bloom_filter(std::size_t size, std::size_t hashes) : m_size(size), m_hashes(hashes), m_bits(size) { assert(size > 0); assert(hashes > 0); } void add(const Key& key) { mixer<Key, Hash> m(m_size, key); for(std::size_t i = 0; i < m_hashes; ++i) m_bits[m()] = true; } bool contains(const Key& key) const { mixer<Key, Hash> m(m_size, key); for(std::size_t i = 0; i < m_hashes; ++i) if(!m_bits[m()]) return false; return true; } private: std::size_t m_size; std::size_t m_hashes; std::vector<bool> m_bits; }; |
One Reply to “Better bloom filter”