Line data Source code
1 : // Author(s): Maurice Laveaux
2 : // Copyright: see the accompanying file COPYING or copy at
3 : // https://github.com/mCRL2org/mCRL2/blob/master/COPYING
4 : //
5 : // Distributed under the Boost Software License, Version 1.0.
6 : // (See accompanying file LICENSE_1_0.txt or copy at
7 : // http://www.boost.org/LICENSE_1_0.txt)
8 :
9 : #ifndef MCRL2_UTILITIES_UNORDERED_SET_H
10 : #define MCRL2_UTILITIES_UNORDERED_SET_H
11 :
12 : #include "mcrl2/utilities/block_allocator.h"
13 : #include "mcrl2/utilities/logger.h"
14 : #include "mcrl2/utilities/detail/bucket_list.h"
15 :
16 : #include <cmath>
17 : #include <mutex>
18 : #include <vector>
19 :
20 : namespace mcrl2::utilities
21 : {
22 :
23 : /// \brief Enables lockfree implementation of emplace.
24 : constexpr static bool EnableLockfreeInsertion = true;
25 :
26 : /// \brief Number of buckets per mutex.
27 : constexpr static long BucketsPerMutex = 256;
28 :
29 : /// \brief Prints various information for unordered_set like data structures.
30 : template<typename T>
31 : void print_performance_statistics(const T& unordered_set);
32 :
33 : // Forward declaration of an unordered_map.
34 : template<typename Key,
35 : typename T,
36 : typename Hash = std::hash<Key>,
37 : typename KeyEqual = std::equal_to<Key>,
38 : typename Allocator = std::allocator<Key>,
39 : bool ThreadSafe = false,
40 : bool Resize = true>
41 : class unordered_map;
42 :
43 : namespace detail
44 : {
45 : // Check for the existence of the is_transparent type.
46 : template <typename...> using void_t = void;
47 :
48 : template <typename X, typename = void> struct is_transparent : std::false_type
49 : {};
50 :
51 : template <typename X>
52 : struct is_transparent<X, void_t<typename X::is_transparent>> : std::true_type
53 : {};
54 : } // namespace detail
55 :
56 :
57 : /// \brief A unordered_set with a subset of the interface of std::unordered_set that only stores a single pointer for each element.
58 : /// \details Only supports input iterators (not bidirectional) compared to std::unordered_set. Furthermore, iterating over all elements
59 : /// in the set is O(n + m), where n is the number of elements in the set and m the number of empty buckets. Also incrementing the
60 : /// iterator is O(m) as opposed to O(1) as the standard mandates.
61 : ///
62 : /// Additionally, the unordered_set supports allocators that have a specialized allocate_args(args...) to vary the allocation size based
63 : /// on the arguments used. This is required to store _aterm_appl classes with the function symbol arity determined at runtime.
64 : ///
65 : /// Threadsafe enables concurrent emplace calls, and Resize enables automatically resizing if needed.
66 : ///
67 : /// \todo Does not implement std::unordered_map equal_range and swap.
68 : template<typename Key,
69 : typename Hash = std::hash<Key>,
70 : typename Equals = std::equal_to<Key>,
71 : typename Allocator = std::allocator<Key>,
72 : bool ThreadSafe = false,
73 : bool Resize = true>
74 : class unordered_set
75 : {
76 : static_assert (!(ThreadSafe && Resize), "ThreadSafe cannot be enabled together with automatic resizing.");
77 :
78 : private:
79 : /// \brief Combine the bucket list and a lock that locks modifications to the bucket list.
80 : using bucket_type = detail::bucket_list<Key, Allocator>;
81 : using bucket_iterator = typename std::vector<bucket_type>::iterator;
82 : using const_bucket_iterator = typename std::vector<bucket_type>::const_iterator;
83 : using mutex_type = std::mutex;
84 :
85 : template<typename Key_, typename T, typename Hash_, typename KeyEqual, typename Allocator_, bool ThreadSafe_, bool Resize_>
86 : friend class unordered_map;
87 :
88 : public:
89 : /// \brief An iterator over all elements in the unordered set.
90 : template<typename Bucket, bool Constant>
91 : class unordered_set_iterator : std::iterator_traits<Key>
92 : {
93 : private:
94 : friend class unordered_set;
95 :
96 : template<typename Key_, typename T, typename Hash_, typename KeyEqual, typename Allocator_, bool ThreadSafe_, bool Resize_>
97 : friend class unordered_map;
98 :
99 : using bucket_it = typename std::vector<Bucket>::const_iterator;
100 : using key_it_type = typename Bucket::const_iterator;
101 :
102 : public:
103 : using value_type = Key;
104 : using reference = typename std::conditional<Constant, const Key&, Key&>::type;
105 : using pointer = typename std::conditional<Constant, const Key*, Key*>::type;
106 : using difference_type = std::ptrdiff_t;
107 : using iterator_category = std::forward_iterator_tag;
108 801 :
109 180133812 : unordered_set_iterator() = default;
110 :
111 93801 : operator unordered_set_iterator<Bucket, true>() const
112 : {
113 93801 : return unordered_set_iterator<Bucket, true>(m_bucket_it, m_bucket_end, m_key_before_it, m_key_it);
114 : }
115 0 :
116 1722379 : unordered_set_iterator& operator++()
117 0 : {
118 1722379 : ++m_key_before_it;
119 1722379 : ++m_key_it;
120 1722379 : goto_next_bucket();
121 1722379 : return *this;
122 : }
123 :
124 : unordered_set_iterator operator++(int)
125 : {
126 : unordered_set_iterator copy(*this);
127 : ++(*this);
128 : return *copy;
129 : }
130 801 :
131 183103999 : reference operator*() const
132 801 : {
133 183103999 : return const_cast<reference>(*m_key_it);
134 : }
135 :
136 455766 : pointer operator->() const
137 : {
138 455766 : return const_cast<pointer>(&(*m_key_it));
139 : }
140 801 :
141 184053591 : bool operator!=(const unordered_set_iterator& other) const
142 801 : {
143 184053591 : return m_key_it != other.m_key_it || m_bucket_it != other.m_bucket_it;
144 : }
145 :
146 527673 : bool operator==(const unordered_set_iterator& other) const
147 : {
148 527673 : return !(*this != other);
149 : }
150 :
151 : private:
152 801 : /// \brief Construct an iterator over all keys passed in this bucket and all remaining buckets.
153 183581014 : unordered_set_iterator(bucket_it it, bucket_it end, key_it_type before_it, key_it_type key) :
154 183581014 : m_bucket_it(it), m_bucket_end(end), m_key_before_it(before_it), m_key_it(key)
155 183580213 : {}
156 :
157 0 : /// \brief Construct the begin iterator (over all elements).
158 57822 : unordered_set_iterator(bucket_it it, bucket_it end) :
159 57822 : m_bucket_it(it), m_bucket_end(end), m_key_before_it((*it).before_begin()), m_key_it((*it).begin())
160 0 : {
161 57822 : goto_next_bucket();
162 57822 : }
163 :
164 1118 : /// \brief Construct the end iterator
165 184181907 : explicit unordered_set_iterator(bucket_it it) :
166 184181907 : m_bucket_it(it)
167 184180789 : {}
168 :
169 1651061 : operator unordered_set_iterator<Bucket, false>() const
170 : {
171 1651061 : return unordered_set_iterator<Bucket, false>(m_bucket_it, m_bucket_end, m_key_before_it, m_key_it);
172 : }
173 :
174 0 : /// \returns A reference to the before key iterator.
175 711434 : key_it_type& key_before_it() { return m_key_before_it; }
176 :
177 : /// \returns A reference to the key iterator.
178 : key_it_type& key_it() { return m_key_it; }
179 :
180 0 : /// \returns A reference to the bucket iterator.
181 711434 : bucket_it& get_bucket_it() { return m_bucket_it; }
182 :
183 0 : /// \brief Iterate to the next non-empty bucket.
184 2135918 : void goto_next_bucket()
185 : {
186 0 : // Find the first bucket that is not empty.
187 492102047 : while(!(m_key_it != detail::EndIterator))
188 : {
189 0 : // Take the next bucket and reset the key iterator.
190 490044099 : ++m_bucket_it;
191 0 :
192 490044099 : if (m_bucket_it != m_bucket_end)
193 0 : {
194 489966129 : m_key_it = (*m_bucket_it).begin();
195 489966129 : m_key_before_it = (*m_bucket_it).before_begin();
196 : }
197 : else
198 : {
199 0 : // Reached the end of the buckets.
200 77970 : break;
201 : }
202 : }
203 :
204 0 : // The current bucket contains elements or we are at the end.
205 2135918 : assert(m_bucket_it == m_bucket_end || m_key_it != detail::EndIterator);
206 2135918 : }
207 :
208 : bucket_it m_bucket_it;
209 : bucket_it m_bucket_end;
210 : key_it_type m_key_before_it;
211 : key_it_type m_key_it; // Invariant: m_key_it != EndIterator.
212 : };
213 :
214 : using key_type = Key;
215 : using value_type = Key;
216 : using hasher = Hash;
217 : using key_equal = Equals;
218 : using allocator_type = typename bucket_type::NodeAllocator;
219 :
220 : using reference = value_type&;
221 : using const_reference = const value_type&;
222 : using pointer = typename std::allocator_traits<Allocator>::pointer;
223 : using const_pointer = typename std::allocator_traits<Allocator>::const_pointer;
224 :
225 : using const_local_iterator = typename bucket_type::const_iterator;
226 : using local_iterator = typename bucket_type::const_iterator;
227 : using const_iterator = unordered_set_iterator<bucket_type, true>;
228 : using iterator = const_iterator;
229 :
230 : using size_type = std::size_t;
231 : using difference_type = std::ptrdiff_t;
232 :
233 151 : unordered_set() { rehash(0); }
234 :
235 : /// \brief Constructs an unordered_set that contains bucket_count number of buckets.
236 23946 : explicit unordered_set(size_type bucket_count,
237 : const hasher& hash = hasher(),
238 : const key_equal& equals = key_equal())
239 : : m_hash(hash),
240 23946 : m_equals(equals)
241 : {
242 23946 : rehash(bucket_count);
243 23946 : }
244 :
245 : // Copy operators.
246 : unordered_set(const unordered_set& set);
247 : unordered_set& operator=(const unordered_set& set);
248 :
249 : // Default move operators.
250 1 : unordered_set(unordered_set&& other) = default;
251 0 : unordered_set& operator=(unordered_set&& other) = default;
252 :
253 : ~unordered_set();
254 :
255 : /// \returns A reference to the local node allocator.
256 0 : const allocator_type& get_allocator() const noexcept { return m_allocator; }
257 10150 : allocator_type& get_allocator() noexcept { return m_allocator; }
258 :
259 0 : /// \returns An iterator over all keys.
260 54879 : iterator begin() { return iterator(m_buckets.begin(), m_buckets.end()); }
261 182336354 : iterator end() { return iterator(m_buckets.end()); }
262 :
263 : /// \returns A const iterator over all keys.
264 4061 : const_iterator begin() const { return const_iterator(m_buckets.begin(), m_buckets.end()); }
265 1844435 : const_iterator end() const { return const_iterator(m_buckets.end()); }
266 :
267 : /// \returns A const iterator over all keys.
268 : const_iterator cbegin() const { return const_iterator(m_buckets.begin(), m_buckets.end()); }
269 : const_iterator cend() const { return const_iterator(m_buckets.end()); }
270 :
271 : /// \returns True iff the set is empty.
272 88650 : bool empty() const noexcept { return size() == 0; }
273 :
274 0 : /// \returns The amount of elements stored in this set.
275 321764 : size_type size() const noexcept { return m_number_of_elements; }
276 : size_type max_size() const noexcept { return m_buckets.max_size(); }
277 :
278 : /// \brief Removes all elements from the set.
279 : /// \details Does not free the vector of buckets itself.
280 : void clear();
281 :
282 : /// \brief Inserts an element Key(args...) into the set if it did not already exist.
283 : /// \returns A pair of the iterator pointing to the element and a boolean that is true iff
284 : /// a new element was inserted (as opposed to it already existing in the set).
285 : /// \threadsafe
286 : template<typename ...Args>
287 : std::pair<iterator, bool> emplace(Args&&... args);
288 :
289 : /// \brief Erases the given key_type(args...) from the unordered set.
290 : template<typename...Args>
291 : void erase(const Args&... args);
292 :
293 : /// \brief Erases the element pointed to by the iterator.
294 : /// \returns An iterator to the next key.
295 : iterator erase(const_iterator it);
296 :
297 : /// \brief Counts the number of occurrences of the given key (1 when it exists and 0 otherwise).
298 : template<typename ...Args>
299 : size_type count(const Args&... args) const;
300 :
301 : /// \brief Searches whether an object key_type(args...) occurs in the set.
302 : /// \returns An iterator to the matching element or the end when this object does not exist.
303 : template<typename...Args>
304 : const_iterator find(const Args&... args) const;
305 :
306 : template<typename...Args>
307 : iterator find(const Args&... args);
308 :
309 : /// \returns An iterator to the elements in the given bucket with index n.
310 : local_iterator begin(size_type n) { return m_buckets[n].begin(); }
311 : local_iterator end(size_type n) { return m_buckets[n].end(); }
312 :
313 : const_local_iterator begin(size_type n) const { return m_buckets[n].begin(); }
314 : const_local_iterator end(size_type n) const { return m_buckets[n].end(); }
315 :
316 : const_local_iterator cbegin(size_type n) const { return m_buckets[n].begin(); }
317 : const_local_iterator cend(size_type n) const { return m_buckets[n].end(); }
318 :
319 10 : /// \returns The number of buckets.
320 337596 : size_type bucket_count() const noexcept { return m_buckets_mask + 1; }
321 : size_type max_bucket_count() const noexcept { return m_buckets.max_size(); }
322 :
323 : size_type bucket_size(size_type n) const noexcept { return std::distance(m_buckets[n].begin(), m_buckets[n].end()); }
324 : size_type bucket(const key_type& key) const noexcept { return std::distance(m_buckets.begin(), find_bucket_index(key)); }
325 0 :
326 104731 : float load_factor() const { return static_cast<float>(size()) / bucket_count(); }
327 209155 : float max_load_factor() const { return m_max_load_factor; }
328 : void max_load_factor(float factor) { m_max_load_factor = factor; }
329 :
330 : /// \brief Resize the number buckets to at least number_of_buckets.
331 : void rehash(size_type number_of_buckets);
332 :
333 : /// \brief Resizes the set to the given number of elements.
334 916 : void reserve(size_type count) { rehash(static_cast<std::size_t>(std::ceil(static_cast<float>(count) / max_load_factor()))); }
335 :
336 : hasher hash_function() const { return m_hash; }
337 : key_equal key_eq() const { return m_equals; }
338 :
339 : /// \returns The number of elements that can be present in the set before resizing.
340 11 : /// \details Not standard.
341 1981 : size_type capacity() const noexcept { return m_buckets.size(); }
342 :
343 : /// \brief Resizes the hash table if necessary.
344 : /// \details Not standard.
345 : void rehash_if_needed();
346 :
347 : private:
348 : template<typename Key_, typename T, typename Hash_, typename KeyEqual, typename Allocator_, bool ThreadSafe_, bool Resize_>
349 : friend class unordered_map;
350 :
351 : /// \brief Inserts T(args...) into the given bucket, assumes that it did not exists before.
352 : /// \threadsafe
353 : template<typename ...Args>
354 : std::pair<iterator, bool> emplace_impl(size_type bucket_index, Args&&... args);
355 :
356 : /// \brief Removes T(args...) from the set.
357 : template<typename ...Args>
358 : void erase_impl(const Args&... args);
359 :
360 : /// \returns The index of the bucket that might contain the element constructed by the given arguments.
361 : template<typename ...Args>
362 : size_type find_bucket_index(const Args&... args) const;
363 :
364 : /// \brief Searches for the element in the given bucket.
365 : template<typename ...Args>
366 : const_iterator find_impl(size_type bucket_index, const Args&... args) const;
367 :
368 : /// \brief True iff the hash and equals functions allow transparent lookup,
369 : static constexpr bool allow_transparent = detail::is_transparent<Hash>() && detail::is_transparent<Equals>();
370 :
371 : /// \brief The number of elements stored in this set.
372 : std::conditional_t<ThreadSafe, std::atomic<size_type>, size_type> m_number_of_elements = 0;
373 :
374 : /// \brief Always equal to m_buckets.size() - 1.
375 : std::conditional_t<ThreadSafe, std::atomic<size_type>, size_type> m_buckets_mask = 0;
376 :
377 : std::vector<bucket_type> m_buckets;
378 : std::vector<std::mutex> m_bucket_mutexes;
379 :
380 : float m_max_load_factor = 1.0f;
381 :
382 : hasher m_hash = hasher();
383 : key_equal m_equals = key_equal();
384 : allocator_type m_allocator;
385 : };
386 :
387 : /// \brief A specialization for large unordered sets that uses the block_allocator internally by default.
388 : template<typename Key,
389 : typename Hash = std::hash<Key>,
390 : typename Equals = std::equal_to<Key>,
391 : typename Allocator = mcrl2::utilities::block_allocator<Key>,
392 : bool ThreadSafe = false>
393 : using unordered_set_large = unordered_set<Key, Hash, Equals, Allocator, ThreadSafe>;
394 :
395 : } // namespace mcrl2::utilities
396 :
397 : #include "mcrl2/utilities/detail/unordered_set_implementation.h"
398 :
399 : #endif // MCRL2_UTILITIES_UNORDERED_SET_H
|