mCRL2
Loading...
Searching...
No Matches
unordered_set_implementation.h
Go to the documentation of this file.
1// Author(s): Maurice Laveaux
2// Copyright: see the accompanying file COPYING or copy at
3// https://github.com/mCRL2org/mCRL2/blob/master/COPYING
4//
5// Distributed under the Boost Software License, Version 1.0.
6// (See accompanying file LICENSE_1_0.txt or copy at
7// http://www.boost.org/LICENSE_1_0.txt)
8
9#ifndef MCRL2_UTILITIES_UNORDERED_SET_IMPLEMENTATION_H
10#define MCRL2_UTILITIES_UNORDERED_SET_IMPLEMENTATION_H
11#pragma once
12
13#define MCRL2_UNORDERED_SET_TEMPLATES template<typename Key, typename Hash, typename Equals, typename Allocator, bool ThreadSafe, bool Resize>
14#define MCRL2_UNORDERED_SET_CLASS unordered_set<Key, Hash, Equals, Allocator, ThreadSafe, Resize>
15
17
19
20#include <algorithm>
21
22namespace mcrl2::utilities
23{
24
25static constexpr std::size_t minimum_size = 4UL;
26
27inline float bytes_to_megabytes(std::size_t bytes)
28{
29 return static_cast<float>(bytes) / (1024.0f * 1024.0f);
30}
31
33MCRL2_UNORDERED_SET_CLASS::unordered_set(const unordered_set& set)
34{
35 reserve(set.size());
36
37 for (auto& element : set)
38 {
39 emplace(element);
40 }
41}
42
44MCRL2_UNORDERED_SET_CLASS& MCRL2_UNORDERED_SET_CLASS::operator=(const unordered_set& set)
45{
46 clear();
47 reserve(set.size());
48
49 for (auto& element : set)
50 {
51 emplace(element);
52 }
53
54 return *this;
55}
56
58MCRL2_UNORDERED_SET_CLASS::~unordered_set()
59{
60 // This unordered_set is not moved-from.
61 if (m_buckets.size() > 0)
62 {
63 clear();
64 }
65}
66
68void MCRL2_UNORDERED_SET_CLASS::clear()
69{
70 assert(m_buckets.size() != 0);
71
72 // A naive implementation of erasing all elements.
73 for (auto it = begin(); it != end(); )
74 {
75 it = erase(it);
76 }
77
78 assert(std::all_of(m_buckets.begin(), m_buckets.end(), [](const bucket_type& bucket) { return bucket.empty(); }));
79 assert(m_number_of_elements == 0);
80 assert(m_buckets.size() != 0);
81}
82
84template<typename ...Args>
85auto MCRL2_UNORDERED_SET_CLASS::emplace(Args&&... args) -> std::pair<iterator, bool>
86{
87 if constexpr (Resize) { rehash_if_needed(); }
88
89 size_type bucket_index;
90 iterator it;
91
92 // Searching the current bucket list is safe and can be performed without locking.
93 if constexpr (allow_transparent)
94 {
95 // Compute the hash and corresponding bucket.
96 bucket_index = find_bucket_index(std::forward<Args>(args)...);
97 it = find_impl(bucket_index, std::forward<Args>(args)...);
98 }
99 else
100 {
101 // Compute the hash and corresponding bucket.
102 Key object(std::forward<Args>(args)...);
103 bucket_index = find_bucket_index(object);
104 it = find_impl(bucket_index, object);
105 }
106
107 if (it != end())
108 {
109 return std::make_pair(it, false);
110 }
111 else
112 {
113 return emplace_impl(bucket_index, std::forward<Args>(args)...);
114 }
115}
116
118auto MCRL2_UNORDERED_SET_CLASS::erase(typename MCRL2_UNORDERED_SET_CLASS::const_iterator it) -> iterator
119{
120 // Find the bucket that is pointed to and remove the key after the before iterator.
121 bucket_type& bucket = const_cast<bucket_type&>(*it.get_bucket_it());
122
123 // An element was removed from the hash table.
124 --m_number_of_elements;
125
126 // Remove the key that is after the before iterator.
127 iterator result_it(it.get_bucket_it(), m_buckets.end(), it.key_before_it(), bucket.erase_after(m_allocator, it.key_before_it()));
128
129 // We must guarantee that the iterator points the a valid key (and this might not be the case after removal).
130 result_it.goto_next_bucket();
131 return result_it;
132}
133
135template<typename ...Args>
136std::size_t MCRL2_UNORDERED_SET_CLASS::count(const Args&... args) const
137{
138 if constexpr (allow_transparent)
139 {
140 return find(args...) != end();
141 }
142 else
143 {
144 return find(Key(args...)) != end();
145 }
146}
147
149template<typename ...Args>
150void MCRL2_UNORDERED_SET_CLASS::erase(const Args&... args)
151{
152 if constexpr (allow_transparent)
153 {
154 erase_impl(args...);
155 }
156 else
157 {
158 erase_impl(Key(args...));
159 }
160}
161
163template<typename ...Args>
164auto MCRL2_UNORDERED_SET_CLASS::find(const Args&... args) const -> const_iterator
165{
166 if constexpr (allow_transparent)
167 {
168 return find_impl(find_bucket_index(args...), args...);
169 }
170 else
171 {
172 Key element(args...);
173 return find_impl(find_bucket_index(element), element);
174 }
175}
176
178template<typename ...Args>
179auto MCRL2_UNORDERED_SET_CLASS::find(const Args&... args) -> iterator
180{
181 if constexpr (allow_transparent)
182 {
183 return find_impl(find_bucket_index(args...), args...);
184 }
185 else
186 {
187 Key element(args...);
188 return find_impl(find_bucket_index(element), element);
189 }
190}
191
193void MCRL2_UNORDERED_SET_CLASS::rehash(std::size_t number_of_buckets)
194{
195 // Ensure that the number of buckets is a power of two greater than the minimum size.
196 number_of_buckets = std::max(utilities::round_up_to_power_of_two(number_of_buckets), minimum_size);
197
198 // If n is greater than the current number of buckets in the container (bucket_count), a rehash is forced.
199 // The new bucket count can either be equal or greater than n. Otherwise, it has no effect.
200 if (number_of_buckets <= bucket_count())
201 {
202 return;
203 }
204
205 // Update the number of mutexes.
206 if constexpr (!EnableLockfreeInsertion)
207 {
208 m_bucket_mutexes = std::vector<std::mutex>(std::max(number_of_buckets / BucketsPerMutex, 1ul));
209 }
210
211 // Create one bucket list for all elements in the hashtable.
212 bucket_type old_keys;
213 for (auto&& bucket : m_buckets)
214 {
215 old_keys.splice_after(old_keys.before_begin(), bucket);
216 }
217
218 assert(std::distance(old_keys.begin(), old_keys.end()) == static_cast<long>(m_number_of_elements));
219
220 // Recreate the hash table, but don't move or copy the old elements.
221 {
222 // clear() doesn't actually free the memory used and still results in an 3n peak.
223 std::vector<bucket_type>().swap(m_buckets);
224 }
225 m_buckets.resize(number_of_buckets);
226 m_buckets_mask = m_buckets.size() - 1;
227
228 // Fill the set with all elements that are stored in old_keys.
229 while (!old_keys.empty())
230 {
231 // Move the current element to this bucket.
232 bucket_type& bucket = m_buckets[find_bucket_index(old_keys.front())];
233 bucket.splice_front(bucket.before_begin(), old_keys);
234 }
235}
236
237template<typename T>
239{
240 // Calculate a histogram of the bucket lengths.
241 std::vector<std::size_t> histogram;
242
243 for (std::size_t index = 0; index < unordered_set.bucket_count(); ++index)
244 {
245 // Ensure that the current bucket fits within the histogram.
246 std::size_t bucket_length = unordered_set.bucket_size(index);
247 histogram.resize(std::max(histogram.size(), bucket_length + 1));
248 ++histogram[bucket_length];
249 }
250
251 mCRL2log(mcrl2::log::info) << "Table stores " << unordered_set.size() << " keys in " << unordered_set.bucket_count() << " buckets.\n";
252
253 for (std::size_t i = 0; i < histogram.size(); ++i)
254 {
255 mCRL2log(mcrl2::log::debug) << "There are " << histogram[i] << " buckets that store " << i << " keys.\n";
256 }
257}
258
260
262template<typename ...Args>
263auto MCRL2_UNORDERED_SET_CLASS::emplace_impl(size_type bucket_index, Args&&... args) -> std::pair<iterator, bool>
264{
265 bucket_type& bucket = m_buckets[bucket_index];
266
267 if constexpr (ThreadSafe)
268 {
269 if constexpr (EnableLockfreeInsertion)
270 {
271 // Construct a new node and put it at the front of the bucket list.
272 auto [key_it, added] = bucket.emplace_front_unique(m_allocator, m_equals, std::forward<Args>(args)...);
273 iterator it(m_buckets.begin() + bucket_index, m_buckets.end(), bucket.before_begin(), key_it);
274
275 if (added)
276 {
277 m_number_of_elements.fetch_add(1, std::memory_order_relaxed);
278 }
279
280 return std::make_pair(it, added);
281 }
282 else
283 {
284 // Obtain exclusive access to this bucket.
285 std::lock_guard g(m_bucket_mutexes[bucket_index % m_bucket_mutexes.size()]);
286
287 iterator it = find_impl(bucket_index, std::forward<Args>(args)...);
288 if (it != end())
289 {
290 // This element has been inserted in the mean time.
291 return std::make_pair(it, false);
292 }
293 else
294 {
295 // Construct a new node and put it at the front of the bucket list.
296 bucket.emplace_front(m_allocator, std::forward<Args>(args)...);
297
298 // Return the iterator.
299 m_number_of_elements.fetch_add(1, std::memory_order_relaxed);
300 iterator it(m_buckets.begin() + bucket_index, m_buckets.end(), bucket.before_begin(), bucket.begin());
301 return std::make_pair(it, true);
302 }
303 }
304
305 }
306 else
307 {
308 // Construct a new node and put it at the front of the bucket list.
309 bucket.emplace_front(m_allocator, std::forward<Args>(args)...);
310 ++m_number_of_elements;
311
312 // Return the iterator.
313 iterator it(m_buckets.begin() + bucket_index, m_buckets.end(), bucket.before_begin(), bucket.begin());
314 return std::make_pair(it, true);
315 }
316
317}
318
320template<typename ...Args>
321void MCRL2_UNORDERED_SET_CLASS::erase_impl(const Args&... args)
322{
323 bucket_type& bucket = m_buckets[find_bucket_index(args...)];
324
325 // Loop over the elements in the bucket until the key was found.
326 typename bucket_type::const_iterator before_it = bucket.before_begin();
327 for (typename bucket_type::iterator it = bucket.begin(); it != bucket.end();)
328 {
329 if (m_equals(*it, args...))
330 {
331 // Erase the current element and stop iterating.
332 --m_number_of_elements;
333 it = bucket.erase_after(m_allocator, before_it);
334 break;
335 }
336 else
337 {
338 // Both iterators move one place forward.
339 ++before_it;
340 ++it;
341 }
342 }
343}
344
345
347template<typename ...Args>
348std::size_t MCRL2_UNORDERED_SET_CLASS::find_bucket_index(const Args&... args) const
349{
350 std::size_t hash = m_hash(args...);
352 assert(m_buckets_mask == m_buckets.size() - 1);
353 std::size_t index = hash & m_buckets_mask;
354 assert(index < m_buckets.size());
355 return index;
356}
357
359template<typename ...Args>
360auto MCRL2_UNORDERED_SET_CLASS::find_impl(size_type bucket_index, const Args&... args) const -> const_iterator
361{
362 const auto& bucket = m_buckets[bucket_index];
363
364 // Search through the bucket to find an element that is equivalent.
365 auto before_it = bucket.before_begin();
366 for(auto it = bucket.begin(); it != bucket.end(); ++it)
367 {
368 auto& key = *it;
369 if (m_equals(key, args...))
370 {
371 return const_iterator(m_buckets.begin() + bucket_index, m_buckets.end(), before_it, it);
372 }
373
374 before_it = it;
375 }
376
377 return end();
378}
379
381void MCRL2_UNORDERED_SET_CLASS::rehash_if_needed()
382{
383 if (load_factor() >= max_load_factor())
384 {
385 rehash(m_buckets.size() * 2);
386 }
387}
388
389#undef MCRL2_UNORDERED_SET_CLASS
390#undef MCRL2_UNORDERED_SET_TEMPLATES
391
392} // namespace mcrl2::utilities
393
394#endif // MCRL2_UTILITIES_UNORDERED_SET_IMPLEMENTATION_H
A unordered_set with a subset of the interface of std::unordered_set that only stores a single pointe...
size_type bucket_count() const noexcept
size_type size() const noexcept
size_type bucket_size(size_type n) const noexcept
#define mCRL2log(LEVEL)
mCRL2log(LEVEL) provides the stream used to log.
Definition logger.h:391
float bytes_to_megabytes(std::size_t bytes)
static constexpr std::size_t minimum_size
static constexpr long BucketsPerMutex
Number of buckets per mutex.
static T round_up_to_power_of_two(T value)
static constexpr bool EnableLockfreeInsertion
Enables lockfree implementation of emplace.
void print_performance_statistics(const T &unordered_set)
Prints various information for unordered_set like data structures.
#define hash(l, r, m)
Definition tree_set.cpp:23
#define MCRL2_UNORDERED_SET_CLASS
#define MCRL2_UNORDERED_SET_TEMPLATES