Line data Source code
1 : // Author(s): Maurice Laveaux
2 : // Copyright: see the accompanying file COPYING or copy at
3 : // https://github.com/mCRL2org/mCRL2/blob/master/COPYING
4 : //
5 : // Distributed under the Boost Software License, Version 1.0.
6 : // (See accompanying file LICENSE_1_0.txt or copy at
7 : // http://www.boost.org/LICENSE_1_0.txt)
8 :
9 : #ifndef MCRL2_UTILITIES_UNORDERED_SET_IMPLEMENTATION_H
10 : #define MCRL2_UTILITIES_UNORDERED_SET_IMPLEMENTATION_H
11 : #pragma once
12 :
13 : #define MCRL2_UNORDERED_SET_TEMPLATES template<typename Key, typename Hash, typename Equals, typename Allocator, bool ThreadSafe, bool Resize>
14 : #define MCRL2_UNORDERED_SET_CLASS unordered_set<Key, Hash, Equals, Allocator, ThreadSafe, Resize>
15 :
16 : #include "mcrl2/utilities/unordered_set.h"
17 :
18 : #include "mcrl2/utilities/power_of_two.h"
19 :
20 : #include <algorithm>
21 :
22 : namespace mcrl2::utilities
23 : {
24 :
25 : static constexpr std::size_t minimum_size = 4UL;
26 :
27 : inline float bytes_to_megabytes(std::size_t bytes)
28 : {
29 : return static_cast<float>(bytes) / (1024.0f * 1024.0f);
30 : }
31 :
32 : MCRL2_UNORDERED_SET_TEMPLATES
33 916 : MCRL2_UNORDERED_SET_CLASS::unordered_set(const unordered_set& set)
34 : {
35 916 : reserve(set.size());
36 :
37 1883 : for (auto& element : set)
38 : {
39 51 : emplace(element);
40 : }
41 916 : }
42 :
43 : MCRL2_UNORDERED_SET_TEMPLATES
44 0 : MCRL2_UNORDERED_SET_CLASS& MCRL2_UNORDERED_SET_CLASS::operator=(const unordered_set& set)
45 : {
46 0 : clear();
47 0 : reserve(set.size());
48 :
49 0 : for (auto& element : set)
50 : {
51 0 : emplace(element);
52 : }
53 :
54 0 : return *this;
55 : }
56 :
57 : MCRL2_UNORDERED_SET_TEMPLATES
58 23441 : MCRL2_UNORDERED_SET_CLASS::~unordered_set()
59 : {
60 : // This unordered_set is not moved-from.
61 23441 : if (m_buckets.size() > 0)
62 : {
63 23440 : clear();
64 : }
65 23441 : }
66 :
67 : MCRL2_UNORDERED_SET_TEMPLATES
68 23441 : void MCRL2_UNORDERED_SET_CLASS::clear()
69 : {
70 23441 : assert(m_buckets.size() != 0);
71 :
72 : // A naive implementation of erasing all elements.
73 54726 : for (auto it = begin(); it != end(); )
74 : {
75 31285 : it = erase(it);
76 : }
77 :
78 145989 : assert(std::all_of(m_buckets.begin(), m_buckets.end(), [](const bucket_type& bucket) { return bucket.empty(); }));
79 23441 : assert(m_number_of_elements == 0);
80 23441 : assert(m_buckets.size() != 0);
81 23441 : }
82 :
83 : MCRL2_UNORDERED_SET_TEMPLATES
84 : template<typename ...Args>
85 180136082 : auto MCRL2_UNORDERED_SET_CLASS::emplace(Args&&... args) -> std::pair<iterator, bool>
86 : {
87 104311 : if constexpr (Resize) { rehash_if_needed(); }
88 :
89 : size_type bucket_index;
90 180136082 : iterator it;
91 :
92 : // Searching the current bucket list is safe and can be performed without locking.
93 : if constexpr (allow_transparent)
94 : {
95 : // Compute the hash and corresponding bucket.
96 180036054 : bucket_index = find_bucket_index(std::forward<Args>(args)...);
97 180036054 : it = find_impl(bucket_index, std::forward<Args>(args)...);
98 : }
99 : else
100 : {
101 : // Compute the hash and corresponding bucket.
102 100028 : Key object(std::forward<Args>(args)...);
103 100028 : bucket_index = find_bucket_index(object);
104 100028 : it = find_impl(bucket_index, object);
105 1 : }
106 :
107 180136082 : if (it != end())
108 : {
109 178848536 : return std::make_pair(it, false);
110 : }
111 : else
112 : {
113 1287546 : return emplace_impl(bucket_index, std::forward<Args>(args)...);
114 : }
115 : }
116 :
117 : MCRL2_UNORDERED_SET_TEMPLATES
118 355717 : auto MCRL2_UNORDERED_SET_CLASS::erase(typename MCRL2_UNORDERED_SET_CLASS::const_iterator it) -> iterator
119 : {
120 : // Find the bucket that is pointed to and remove the key after the before iterator.
121 355717 : bucket_type& bucket = const_cast<bucket_type&>(*it.get_bucket_it());
122 :
123 : // An element was removed from the hash table.
124 355717 : --m_number_of_elements;
125 :
126 : // Remove the key that is after the before iterator.
127 355717 : iterator result_it(it.get_bucket_it(), m_buckets.end(), it.key_before_it(), bucket.erase_after(m_allocator, it.key_before_it()));
128 :
129 : // We must guarantee that the iterator points the a valid key (and this might not be the case after removal).
130 355717 : result_it.goto_next_bucket();
131 355717 : return result_it;
132 : }
133 :
134 : MCRL2_UNORDERED_SET_TEMPLATES
135 : template<typename ...Args>
136 3 : std::size_t MCRL2_UNORDERED_SET_CLASS::count(const Args&... args) const
137 : {
138 : if constexpr (allow_transparent)
139 : {
140 : return find(args...) != end();
141 : }
142 : else
143 : {
144 3 : return find(Key(args...)) != end();
145 : }
146 : }
147 :
148 : MCRL2_UNORDERED_SET_TEMPLATES
149 : template<typename ...Args>
150 : void MCRL2_UNORDERED_SET_CLASS::erase(const Args&... args)
151 : {
152 : if constexpr (allow_transparent)
153 : {
154 : erase_impl(args...);
155 : }
156 : else
157 : {
158 : erase_impl(Key(args...));
159 : }
160 : }
161 :
162 : MCRL2_UNORDERED_SET_TEMPLATES
163 : template<typename ...Args>
164 29087 : auto MCRL2_UNORDERED_SET_CLASS::find(const Args&... args) const -> const_iterator
165 : {
166 : if constexpr (allow_transparent)
167 : {
168 29084 : return find_impl(find_bucket_index(args...), args...);
169 : }
170 : else
171 : {
172 3 : Key element(args...);
173 6 : return find_impl(find_bucket_index(element), element);
174 : }
175 : }
176 :
177 : MCRL2_UNORDERED_SET_TEMPLATES
178 : template<typename ...Args>
179 1841100 : auto MCRL2_UNORDERED_SET_CLASS::find(const Args&... args) -> iterator
180 : {
181 : if constexpr (allow_transparent)
182 : {
183 1831094 : return find_impl(find_bucket_index(args...), args...);
184 : }
185 : else
186 : {
187 10006 : Key element(args...);
188 20012 : return find_impl(find_bucket_index(element), element);
189 : }
190 : }
191 :
192 : MCRL2_UNORDERED_SET_TEMPLATES
193 25859 : void MCRL2_UNORDERED_SET_CLASS::rehash(std::size_t number_of_buckets)
194 : {
195 : // Ensure that the number of buckets is a power of two greater than the minimum size.
196 25859 : number_of_buckets = std::max(utilities::round_up_to_power_of_two(number_of_buckets), minimum_size);
197 :
198 : // If n is greater than the current number of buckets in the container (bucket_count), a rehash is forced.
199 : // The new bucket count can either be equal or greater than n. Otherwise, it has no effect.
200 25859 : if (number_of_buckets <= bucket_count())
201 : {
202 0 : return;
203 : }
204 :
205 : // Update the number of mutexes.
206 : if constexpr (!EnableLockfreeInsertion)
207 : {
208 : m_bucket_mutexes = std::vector<std::mutex>(std::max(number_of_buckets / BucketsPerMutex, 1ul));
209 : }
210 :
211 : // Create one bucket list for all elements in the hashtable.
212 25859 : bucket_type old_keys;
213 315867 : for (auto&& bucket : m_buckets)
214 : {
215 290008 : old_keys.splice_after(old_keys.before_begin(), bucket);
216 : }
217 :
218 25859 : assert(std::distance(old_keys.begin(), old_keys.end()) == static_cast<long>(m_number_of_elements));
219 :
220 : // Recreate the hash table, but don't move or copy the old elements.
221 : {
222 : // clear() doesn't actually free the memory used and still results in an 3n peak.
223 25859 : std::vector<bucket_type>().swap(m_buckets);
224 : }
225 25859 : m_buckets.resize(number_of_buckets);
226 25859 : m_buckets_mask = m_buckets.size() - 1;
227 :
228 : // Fill the set with all elements that are stored in old_keys.
229 418992 : while (!old_keys.empty())
230 : {
231 : // Move the current element to this bucket.
232 393133 : bucket_type& bucket = m_buckets[find_bucket_index(old_keys.front())];
233 393133 : bucket.splice_front(bucket.before_begin(), old_keys);
234 : }
235 : }
236 :
237 : template<typename T>
238 : void print_performance_statistics(const T& unordered_set)
239 : {
240 : // Calculate a histogram of the bucket lengths.
241 : std::vector<std::size_t> histogram;
242 :
243 : for (std::size_t index = 0; index < unordered_set.bucket_count(); ++index)
244 : {
245 : // Ensure that the current bucket fits within the histogram.
246 : std::size_t bucket_length = unordered_set.bucket_size(index);
247 : histogram.resize(std::max(histogram.size(), bucket_length + 1));
248 : ++histogram[bucket_length];
249 : }
250 :
251 : mCRL2log(mcrl2::log::info) << "Table stores " << unordered_set.size() << " keys in " << unordered_set.bucket_count() << " buckets.\n";
252 :
253 : for (std::size_t i = 0; i < histogram.size(); ++i)
254 : {
255 : mCRL2log(mcrl2::log::debug) << "There are " << histogram[i] << " buckets that store " << i << " keys.\n";
256 : }
257 : }
258 :
259 : /// Private functions
260 :
261 : MCRL2_UNORDERED_SET_TEMPLATES
262 : template<typename ...Args>
263 1287547 : auto MCRL2_UNORDERED_SET_CLASS::emplace_impl(size_type bucket_index, Args&&... args) -> std::pair<iterator, bool>
264 : {
265 1287547 : bucket_type& bucket = m_buckets[bucket_index];
266 :
267 : if constexpr (ThreadSafe)
268 : {
269 : if constexpr (EnableLockfreeInsertion)
270 : {
271 : // Construct a new node and put it at the front of the bucket list.
272 1173829 : auto [key_it, added] = bucket.emplace_front_unique(m_allocator, m_equals, std::forward<Args>(args)...);
273 1173829 : iterator it(m_buckets.begin() + bucket_index, m_buckets.end(), bucket.before_begin(), key_it);
274 :
275 1173829 : if (added)
276 : {
277 1173829 : m_number_of_elements.fetch_add(1, std::memory_order_relaxed);
278 : }
279 :
280 2347658 : return std::make_pair(it, added);
281 : }
282 : else
283 : {
284 : // Obtain exclusive access to this bucket.
285 : std::lock_guard g(m_bucket_mutexes[bucket_index % m_bucket_mutexes.size()]);
286 :
287 : iterator it = find_impl(bucket_index, std::forward<Args>(args)...);
288 : if (it != end())
289 : {
290 : // This element has been inserted in the mean time.
291 : return std::make_pair(it, false);
292 : }
293 : else
294 : {
295 : // Construct a new node and put it at the front of the bucket list.
296 : bucket.emplace_front(m_allocator, std::forward<Args>(args)...);
297 :
298 : // Return the iterator.
299 : m_number_of_elements.fetch_add(1, std::memory_order_relaxed);
300 : iterator it(m_buckets.begin() + bucket_index, m_buckets.end(), bucket.before_begin(), bucket.begin());
301 : return std::make_pair(it, true);
302 : }
303 : }
304 :
305 : }
306 : else
307 : {
308 : // Construct a new node and put it at the front of the bucket list.
309 113718 : bucket.emplace_front(m_allocator, std::forward<Args>(args)...);
310 113718 : ++m_number_of_elements;
311 :
312 : // Return the iterator.
313 113718 : iterator it(m_buckets.begin() + bucket_index, m_buckets.end(), bucket.before_begin(), bucket.begin());
314 113718 : return std::make_pair(it, true);
315 : }
316 :
317 : }
318 :
319 : MCRL2_UNORDERED_SET_TEMPLATES
320 : template<typename ...Args>
321 : void MCRL2_UNORDERED_SET_CLASS::erase_impl(const Args&... args)
322 : {
323 : bucket_type& bucket = m_buckets[find_bucket_index(args...)];
324 :
325 : // Loop over the elements in the bucket until the key was found.
326 : typename bucket_type::const_iterator before_it = bucket.before_begin();
327 : for (typename bucket_type::iterator it = bucket.begin(); it != bucket.end();)
328 : {
329 : if (m_equals(*it, args...))
330 : {
331 : // Erase the current element and stop iterating.
332 : --m_number_of_elements;
333 : it = bucket.erase_after(m_allocator, before_it);
334 : break;
335 : }
336 : else
337 : {
338 : // Both iterators move one place forward.
339 : ++before_it;
340 : ++it;
341 : }
342 : }
343 : }
344 :
345 :
346 : MCRL2_UNORDERED_SET_TEMPLATES
347 : template<typename ...Args>
348 182399404 : std::size_t MCRL2_UNORDERED_SET_CLASS::find_bucket_index(const Args&... args) const
349 : {
350 182399404 : std::size_t hash = m_hash(args...);
351 : /// n mod 2^i is equal to n & (2^i - 1).
352 182399404 : assert(m_buckets_mask == m_buckets.size() - 1);
353 182399404 : std::size_t index = hash & m_buckets_mask;
354 182399404 : assert(index < m_buckets.size());
355 182399404 : return index;
356 : }
357 :
358 : MCRL2_UNORDERED_SET_TEMPLATES
359 : template<typename ...Args>
360 182006271 : auto MCRL2_UNORDERED_SET_CLASS::find_impl(size_type bucket_index, const Args&... args) const -> const_iterator
361 : {
362 182006271 : const auto& bucket = m_buckets[bucket_index];
363 :
364 : // Search through the bucket to find an element that is equivalent.
365 182006271 : auto before_it = bucket.before_begin();
366 283745914 : for(auto it = bucket.begin(); it != bucket.end(); ++it)
367 : {
368 281934000 : auto& key = *it;
369 281934000 : if (m_equals(key, args...))
370 : {
371 180194357 : return const_iterator(m_buckets.begin() + bucket_index, m_buckets.end(), before_it, it);
372 : }
373 :
374 101739643 : before_it = it;
375 : }
376 :
377 1811914 : return end();
378 : }
379 :
380 : MCRL2_UNORDERED_SET_TEMPLATES
381 104731 : void MCRL2_UNORDERED_SET_CLASS::rehash_if_needed()
382 : {
383 104731 : if (load_factor() >= max_load_factor())
384 : {
385 299 : rehash(m_buckets.size() * 2);
386 : }
387 104731 : }
388 :
389 : #undef MCRL2_UNORDERED_SET_CLASS
390 : #undef MCRL2_UNORDERED_SET_TEMPLATES
391 :
392 : } // namespace mcrl2::utilities
393 :
394 : #endif // MCRL2_UTILITIES_UNORDERED_SET_IMPLEMENTATION_H
|