I’ve been playing around
The GCC implementation is more aggressive in terms of container growth; it creates more buckets than Apple’s CLANG and LLVM’s implementation which had identical results.
The takeaway here is that when using unordered containers it is good to either reserve the appropriate element count, or rehash after inserting the elements to reduce the bucket count and optimize memory use.
****************************************
Program output.
* GCC -Ofast -march=native -lstdc++ *
****************************************
unordered_map
initial
size = 10000000
bucket count = 15345007
load factor = 0.651678
reserved
size = 10000000
bucket count = 10352717
load factor = 0.96593
re-hashed
size = 10000000
bucket count = 1056323
load factor = 9.4668
unordered_set
initial
size = 10000000
bucket count = 15345007
load factor = 0.651678
reserved
size = 10000000
bucket count = 10352717
load factor = 0.96593
re-hashed
size = 10000000
bucket count = 1056323
load factor = 9.4668
**********************************************
* Apple CLANG -Ofast -march=native -lc++ *
**********************************************
unordered_map
initial
size = 10000000
bucket count = 13169977
load factor = 0.759303
reserved
size = 10000000
bucket count = 10000019
load factor = 0.999998
re-hashed
size = 10000000
bucket count = 1000003
load factor = 9.99997
unordered_set
initial
size = 10000000
bucket count = 13169977
load factor = 0.759303
reserved
size = 10000000
bucket count = 10000019
load factor = 0.999998
re-hashed
size = 10000000
bucket count = 1000003
load factor = 9.99997
Complete listing:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
#include <unordered_map> #include <unordered_set> #include "trace.h" using namespace std; const int COUNT = 10'000'000; int main(int argc, char** argv) { unordered_map<int, int> m; for(int i = 0; i < COUNT; ++i) m[i] = i; trace("unordered_map"); trace("initial"); trace("size = ", m.size()); trace("bucket count = ", m.bucket_count()); trace("load factor = ", m.load_factor(), "\n"); m.clear(); m.reserve(COUNT); for(int i = 0; i < COUNT; ++i) m[i] = i; trace("reserved"); trace("size = ", m.size()); trace("bucket count = ", m.bucket_count()); trace("load factor = ", m.load_factor(), "\n"); m.max_load_factor(10); m.rehash(COUNT / 10); trace("re-hashed"); trace("size = ", m.size()); trace("bucket count = ", m.bucket_count()); trace("load factor = ", m.load_factor(), "\n"); unordered_set<int> s; for(int i = 0; i < COUNT; ++i) s.insert(i); trace("unordered_set"); trace("initial"); trace("size = ", s.size()); trace("bucket count = ", s.bucket_count()); trace("load factor = ", s.load_factor(), "\n"); s.clear(); s.reserve(COUNT); for(int i = 0; i < COUNT; ++i) s.insert(i); trace("reserved"); trace("size = ", s.size()); trace("bucket count = ", s.bucket_count()); trace("load factor = ", s.load_factor(), "\n"); s.max_load_factor(10); s.rehash(COUNT / 10); trace("re-hashed"); trace("size = ", s.size()); trace("bucket count = ", s.bucket_count()); trace("load factor = ", s.load_factor(), "\n"); return 1; } |
trace.h:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 |
#pragma once #include <iostream> #include <mutex> #include "mutex.h" namespace { static inline fast_mutex kStdOutLock; } template<typename... Ts> inline void trace(Ts&&... args) { std::scoped_lock lock(kStdOutLock); (std::cout << ... << args) << std::endl; } |
I’ve never used fast_mutex. Why did you pick it over a plain mutex?
it’s an implementation from my previous post; check it out.
thank you