LCOV - code coverage report
Current view: top level - utilities/include/mcrl2/utilities - unordered_set.h (source / functions) Hit Total Coverage
Test: mcrl2_coverage.info.cleaned Lines: 66 84 78.6 %
Date: 2024-04-21 03:44:01 Functions: 441 508 86.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // Author(s): Maurice Laveaux
       2             : // Copyright: see the accompanying file COPYING or copy at
       3             : // https://github.com/mCRL2org/mCRL2/blob/master/COPYING
       4             : //
       5             : // Distributed under the Boost Software License, Version 1.0.
       6             : // (See accompanying file LICENSE_1_0.txt or copy at
       7             : // http://www.boost.org/LICENSE_1_0.txt)
       8             : 
       9             : #ifndef MCRL2_UTILITIES_UNORDERED_SET_H
      10             : #define MCRL2_UTILITIES_UNORDERED_SET_H
      11             : 
      12             : #include "mcrl2/utilities/block_allocator.h"
      13             : #include "mcrl2/utilities/logger.h"
      14             : #include "mcrl2/utilities/detail/bucket_list.h"
      15             : 
      16             : #include <cmath>
      17             : #include <mutex>
      18             : #include <vector>
      19             : 
      20             : namespace mcrl2::utilities
      21             : {
      22             : 
      23             : /// \brief Enables lockfree implementation of emplace.
      24             : constexpr static bool EnableLockfreeInsertion = true;
      25             : 
      26             : /// \brief Number of buckets per mutex.
      27             : constexpr static long BucketsPerMutex = 256;
      28             : 
      29             : /// \brief Prints various information for unordered_set like data structures.
      30             : template<typename T>
      31             : void print_performance_statistics(const T& unordered_set);
      32             : 
      33             : // Forward declaration of an unordered_map.
      34             : template<typename Key,
      35             :          typename T,
      36             :          typename Hash = std::hash<Key>,
      37             :          typename KeyEqual = std::equal_to<Key>,
      38             :          typename Allocator = std::allocator<Key>,
      39             :          bool ThreadSafe = false,
      40             :          bool Resize = true>
      41             : class unordered_map;
      42             : 
      43             : namespace detail
      44             : {
      45             :   // Check for the existence of the is_transparent type.
      46             :   template <typename...> using void_t = void;
      47             : 
      48             :   template <typename X, typename = void> struct is_transparent : std::false_type
      49             :   {};
      50             : 
      51             :   template <typename X>
      52             :   struct is_transparent<X, void_t<typename X::is_transparent>> : std::true_type
      53             :   {};
      54             : } // namespace detail
      55             : 
      56             : 
      57             : /// \brief A unordered_set with a subset of the interface of std::unordered_set that only stores a single pointer for each element.
      58             : /// \details Only supports input iterators (not bidirectional) compared to std::unordered_set. Furthermore, iterating over all elements
      59             : ///          in the set is O(n + m), where n is the number of elements in the set and m the number of empty buckets. Also incrementing the
      60             : ///          iterator is O(m) as opposed to O(1) as the standard mandates.
      61             : ///
      62             : ///          Additionally, the unordered_set supports allocators that have a specialized allocate_args(args...) to vary the allocation size based
      63             : ///          on the arguments used. This is required to store _aterm_appl classes with the function symbol arity determined at runtime.
      64             : ///
      65             : ///          Threadsafe enables concurrent emplace calls, and Resize enables automatically resizing if needed.
      66             : ///
      67             : /// \todo Does not implement std::unordered_map equal_range and swap.
      68             : template<typename Key,
      69             :          typename Hash = std::hash<Key>,
      70             :          typename Equals = std::equal_to<Key>,
      71             :          typename Allocator = std::allocator<Key>,
      72             :          bool ThreadSafe = false,
      73             :          bool Resize = true>
      74             : class unordered_set
      75             : {
      76             :   static_assert (!(ThreadSafe && Resize), "ThreadSafe cannot be enabled together with automatic resizing.");
      77             : 
      78             : private:
      79             :   /// \brief Combine the bucket list and a lock that locks modifications to the bucket list.
      80             :   using bucket_type = detail::bucket_list<Key, Allocator>;
      81             :   using bucket_iterator = typename std::vector<bucket_type>::iterator;
      82             :   using const_bucket_iterator = typename std::vector<bucket_type>::const_iterator;
      83             :   using mutex_type = std::mutex;
      84             : 
      85             :   template<typename Key_, typename T, typename Hash_, typename KeyEqual, typename Allocator_, bool ThreadSafe_, bool Resize_>
      86             :   friend class unordered_map;
      87             : 
      88             : public:
      89             :   /// \brief An iterator over all elements in the unordered set.
      90             :   template<typename Bucket, bool Constant>
      91             :   class unordered_set_iterator : std::iterator_traits<Key>
      92             :   {
      93             :   private:
      94             :     friend class unordered_set;
      95             : 
      96             :     template<typename Key_, typename T, typename Hash_, typename KeyEqual, typename Allocator_, bool ThreadSafe_, bool Resize_>
      97             :     friend class unordered_map;
      98             : 
      99             :     using bucket_it = typename std::vector<Bucket>::const_iterator;
     100             :     using key_it_type = typename Bucket::const_iterator;
     101             : 
     102             :   public:
     103             :     using value_type = Key;
     104             :     using reference = typename std::conditional<Constant, const Key&, Key&>::type;
     105             :     using pointer = typename std::conditional<Constant, const Key*, Key*>::type;
     106             :     using difference_type = std::ptrdiff_t;
     107             :     using iterator_category = std::forward_iterator_tag;
     108         801 : 
     109   180133812 :     unordered_set_iterator() = default;
     110             : 
     111       93801 :     operator unordered_set_iterator<Bucket, true>() const
     112             :     {
     113       93801 :       return unordered_set_iterator<Bucket, true>(m_bucket_it, m_bucket_end, m_key_before_it, m_key_it);
     114             :     }
     115           0 : 
     116     1722379 :     unordered_set_iterator& operator++()
     117           0 :     {
     118     1722379 :       ++m_key_before_it;
     119     1722379 :       ++m_key_it;
     120     1722379 :       goto_next_bucket();
     121     1722379 :       return *this;
     122             :     }
     123             : 
     124             :     unordered_set_iterator operator++(int)
     125             :     {
     126             :       unordered_set_iterator copy(*this);
     127             :       ++(*this);
     128             :       return *copy;
     129             :     }
     130         801 : 
     131   183103999 :     reference operator*() const
     132         801 :     {
     133   183103999 :       return const_cast<reference>(*m_key_it);
     134             :     }
     135             : 
     136      455766 :     pointer operator->() const
     137             :     {
     138      455766 :       return const_cast<pointer>(&(*m_key_it));
     139             :     }
     140         801 : 
     141   184053591 :     bool operator!=(const unordered_set_iterator& other) const
     142         801 :     {
     143   184053591 :       return m_key_it != other.m_key_it || m_bucket_it != other.m_bucket_it;
     144             :     }
     145             : 
     146      527673 :     bool operator==(const unordered_set_iterator& other) const
     147             :     {
     148      527673 :       return !(*this != other);
     149             :     }
     150             : 
     151             :   private:
     152         801 :     /// \brief Construct an iterator over all keys passed in this bucket and all remaining buckets.
     153   183581014 :     unordered_set_iterator(bucket_it it, bucket_it end, key_it_type before_it, key_it_type key) :
     154   183581014 :       m_bucket_it(it), m_bucket_end(end), m_key_before_it(before_it), m_key_it(key)
     155   183580213 :     {}
     156             : 
     157           0 :     /// \brief Construct the begin iterator (over all elements).
     158       57822 :     unordered_set_iterator(bucket_it it, bucket_it end) :
     159       57822 :       m_bucket_it(it), m_bucket_end(end), m_key_before_it((*it).before_begin()), m_key_it((*it).begin())
     160           0 :     {
     161       57822 :       goto_next_bucket();
     162       57822 :     }
     163             : 
     164        1118 :     /// \brief Construct the end iterator
     165   184181907 :     explicit unordered_set_iterator(bucket_it it) :
     166   184181907 :       m_bucket_it(it)
     167   184180789 :     {}
     168             : 
     169     1651061 :     operator unordered_set_iterator<Bucket, false>() const
     170             :     {
     171     1651061 :       return unordered_set_iterator<Bucket, false>(m_bucket_it, m_bucket_end, m_key_before_it, m_key_it);
     172             :     }
     173             : 
     174           0 :     /// \returns A reference to the before key iterator.
     175      711434 :     key_it_type& key_before_it() { return m_key_before_it; }
     176             : 
     177             :     /// \returns A reference to the key iterator.
     178             :     key_it_type& key_it() { return m_key_it; }
     179             : 
     180           0 :     /// \returns A reference to the bucket iterator.
     181      711434 :     bucket_it& get_bucket_it() { return m_bucket_it; }
     182             : 
     183           0 :     /// \brief Iterate to the next non-empty bucket.
     184     2135918 :     void goto_next_bucket()
     185             :     {
     186           0 :       // Find the first bucket that is not empty.
     187   492102047 :       while(!(m_key_it != detail::EndIterator))
     188             :       {
     189           0 :         // Take the next bucket and reset the key iterator.
     190   490044099 :         ++m_bucket_it;
     191           0 : 
     192   490044099 :         if (m_bucket_it != m_bucket_end)
     193           0 :         {
     194   489966129 :           m_key_it = (*m_bucket_it).begin();
     195   489966129 :           m_key_before_it = (*m_bucket_it).before_begin();
     196             :         }
     197             :         else
     198             :         {
     199           0 :           // Reached the end of the buckets.
     200       77970 :           break;
     201             :         }
     202             :       }
     203             : 
     204           0 :       // The current bucket contains elements or we are at the end.
     205     2135918 :       assert(m_bucket_it == m_bucket_end || m_key_it != detail::EndIterator);
     206     2135918 :     }
     207             : 
     208             :     bucket_it m_bucket_it;
     209             :     bucket_it m_bucket_end;
     210             :     key_it_type m_key_before_it;
     211             :     key_it_type m_key_it; // Invariant: m_key_it != EndIterator.
     212             :   };
     213             : 
     214             :   using key_type = Key;
     215             :   using value_type = Key;
     216             :   using hasher = Hash;
     217             :   using key_equal = Equals;
     218             :   using allocator_type = typename bucket_type::NodeAllocator;
     219             : 
     220             :   using reference = value_type&;
     221             :   using const_reference = const value_type&;
     222             :   using pointer = typename std::allocator_traits<Allocator>::pointer;
     223             :   using const_pointer = typename std::allocator_traits<Allocator>::const_pointer;
     224             : 
     225             :   using const_local_iterator = typename bucket_type::const_iterator;
     226             :   using local_iterator = typename bucket_type::const_iterator;
     227             :   using const_iterator = unordered_set_iterator<bucket_type, true>;
     228             :   using iterator = const_iterator;
     229             : 
     230             :   using size_type = std::size_t;
     231             :   using difference_type = std::ptrdiff_t;
     232             : 
     233         151 :   unordered_set() { rehash(0); }
     234             : 
     235             :   /// \brief Constructs an unordered_set that contains bucket_count number of buckets.
     236       23946 :   explicit unordered_set(size_type bucket_count,
     237             :     const hasher& hash = hasher(),
     238             :     const key_equal& equals = key_equal())
     239             :     : m_hash(hash),
     240       23946 :       m_equals(equals)
     241             :   {
     242       23946 :     rehash(bucket_count);
     243       23946 :   }
     244             : 
     245             :   // Copy operators.
     246             :   unordered_set(const unordered_set& set);
     247             :   unordered_set& operator=(const unordered_set& set);
     248             : 
     249             :   // Default move operators.
     250           1 :   unordered_set(unordered_set&& other) = default;
     251           0 :   unordered_set& operator=(unordered_set&& other) = default;
     252             : 
     253             :   ~unordered_set();
     254             : 
     255             :   /// \returns A reference to the local node allocator.
     256           0 :   const allocator_type& get_allocator() const noexcept { return m_allocator; }
     257       10150 :   allocator_type& get_allocator() noexcept { return m_allocator; }
     258             : 
     259           0 :   /// \returns An iterator over all keys.
     260       54879 :   iterator begin() { return iterator(m_buckets.begin(), m_buckets.end()); }
     261   182336354 :   iterator end() { return iterator(m_buckets.end()); }
     262             : 
     263             :   /// \returns A const iterator over all keys.
     264        4061 :   const_iterator begin() const { return const_iterator(m_buckets.begin(), m_buckets.end()); }
     265     1844435 :   const_iterator end() const { return const_iterator(m_buckets.end()); }
     266             : 
     267             :   /// \returns A const iterator over all keys.
     268             :   const_iterator cbegin() const { return const_iterator(m_buckets.begin(), m_buckets.end()); }
     269             :   const_iterator cend() const { return const_iterator(m_buckets.end()); }
     270             : 
     271             :   /// \returns True iff the set is empty.
     272       88650 :   bool empty() const noexcept { return size() == 0; }
     273             : 
     274           0 :   /// \returns The amount of elements stored in this set.
     275      321764 :   size_type size() const noexcept { return m_number_of_elements; }
     276             :   size_type max_size() const noexcept { return m_buckets.max_size(); }
     277             : 
     278             :   /// \brief Removes all elements from the set.
     279             :   /// \details Does not free the vector of buckets itself.
     280             :   void clear();
     281             : 
     282             :   /// \brief Inserts an element Key(args...) into the set if it did not already exist.
     283             :   /// \returns A pair of the iterator pointing to the element and a boolean that is true iff
     284             :   ///         a new element was inserted (as opposed to it already existing in the set).
     285             :   /// \threadsafe
     286             :   template<typename ...Args>
     287             :   std::pair<iterator, bool> emplace(Args&&... args);
     288             : 
     289             :   /// \brief Erases the given key_type(args...) from the unordered set.
     290             :   template<typename...Args>
     291             :   void erase(const Args&... args);
     292             : 
     293             :   /// \brief Erases the element pointed to by the iterator.
     294             :   /// \returns An iterator to the next key.
     295             :   iterator erase(const_iterator it);
     296             : 
     297             :   /// \brief Counts the number of occurrences of the given key (1 when it exists and 0 otherwise).
     298             :   template<typename ...Args>
     299             :   size_type count(const Args&... args) const;
     300             : 
     301             :   /// \brief Searches whether an object key_type(args...) occurs in the set.
     302             :   /// \returns An iterator to the matching element or the end when this object does not exist.
     303             :   template<typename...Args>
     304             :   const_iterator find(const Args&... args) const;
     305             : 
     306             :   template<typename...Args>
     307             :   iterator find(const Args&... args);
     308             : 
     309             :   /// \returns An iterator to the elements in the given bucket with index n.
     310             :   local_iterator begin(size_type n) { return m_buckets[n].begin(); }
     311             :   local_iterator end(size_type n) { return m_buckets[n].end(); }
     312             : 
     313             :   const_local_iterator begin(size_type n) const { return m_buckets[n].begin(); }
     314             :   const_local_iterator end(size_type n) const { return m_buckets[n].end(); }
     315             : 
     316             :   const_local_iterator cbegin(size_type n) const { return m_buckets[n].begin(); }
     317             :   const_local_iterator cend(size_type n) const { return m_buckets[n].end(); }
     318             : 
     319          10 :   /// \returns The number of buckets.
     320      337596 :   size_type bucket_count() const noexcept { return m_buckets_mask + 1; }
     321             :   size_type max_bucket_count() const noexcept { return m_buckets.max_size(); }
     322             : 
     323             :   size_type bucket_size(size_type n) const noexcept { return std::distance(m_buckets[n].begin(), m_buckets[n].end()); }
     324             :   size_type bucket(const key_type& key) const noexcept { return std::distance(m_buckets.begin(), find_bucket_index(key)); }
     325           0 : 
     326      104731 :   float load_factor() const { return static_cast<float>(size()) / bucket_count(); }
     327      209155 :   float max_load_factor() const { return m_max_load_factor; }
     328             :   void max_load_factor(float factor) { m_max_load_factor = factor; }
     329             : 
     330             :   /// \brief Resize the number buckets to at least number_of_buckets.
     331             :   void rehash(size_type number_of_buckets);
     332             : 
     333             :   /// \brief Resizes the set to the given number of elements.
     334         916 :   void reserve(size_type count) { rehash(static_cast<std::size_t>(std::ceil(static_cast<float>(count) / max_load_factor()))); }
     335             : 
     336             :   hasher hash_function() const { return m_hash; }
     337             :   key_equal key_eq() const { return m_equals; }
     338             : 
     339             :   /// \returns The number of elements that can be present in the set before resizing.
     340          11 :   /// \details Not standard.
     341        1981 :   size_type capacity() const noexcept { return m_buckets.size(); }
     342             : 
     343             :   /// \brief Resizes the hash table if necessary.
     344             :   /// \details Not standard.
     345             :   void rehash_if_needed();
     346             : 
     347             : private:
     348             :   template<typename Key_, typename T, typename Hash_, typename KeyEqual, typename Allocator_, bool ThreadSafe_, bool Resize_>
     349             :   friend class unordered_map;
     350             : 
     351             :   /// \brief Inserts T(args...) into the given bucket, assumes that it did not exists before.
     352             :   /// \threadsafe
     353             :   template<typename ...Args>
     354             :   std::pair<iterator, bool> emplace_impl(size_type bucket_index, Args&&... args);
     355             : 
     356             :   /// \brief Removes T(args...) from the set.
     357             :   template<typename ...Args>
     358             :   void erase_impl(const Args&... args);
     359             : 
     360             :   /// \returns The index of the bucket that might contain the element constructed by the given arguments.
     361             :   template<typename ...Args>
     362             :   size_type find_bucket_index(const Args&... args) const;
     363             : 
     364             :   /// \brief Searches for the element in the given bucket.
     365             :   template<typename ...Args>
     366             :   const_iterator find_impl(size_type bucket_index, const Args&... args) const;
     367             : 
     368             :   /// \brief True iff the hash and equals functions allow transparent lookup,
     369             :   static constexpr bool allow_transparent = detail::is_transparent<Hash>() && detail::is_transparent<Equals>();
     370             : 
     371             :   /// \brief The number of elements stored in this set.
     372             :   std::conditional_t<ThreadSafe, std::atomic<size_type>, size_type> m_number_of_elements = 0;
     373             : 
     374             :   /// \brief Always equal to m_buckets.size() - 1.
     375             :   std::conditional_t<ThreadSafe, std::atomic<size_type>, size_type> m_buckets_mask = 0;
     376             : 
     377             :   std::vector<bucket_type> m_buckets;
     378             :   std::vector<std::mutex> m_bucket_mutexes;
     379             : 
     380             :   float m_max_load_factor = 1.0f;
     381             : 
     382             :   hasher m_hash = hasher();
     383             :   key_equal m_equals = key_equal();
     384             :   allocator_type m_allocator;
     385             : };
     386             : 
     387             : /// \brief A specialization for large unordered sets that uses the block_allocator internally by default.
     388             : template<typename Key,
     389             :          typename Hash = std::hash<Key>,
     390             :          typename Equals = std::equal_to<Key>,
     391             :          typename Allocator = mcrl2::utilities::block_allocator<Key>,
     392             :          bool ThreadSafe = false>
     393             : using unordered_set_large = unordered_set<Key, Hash, Equals, Allocator, ThreadSafe>;
     394             : 
     395             : } // namespace mcrl2::utilities
     396             : 
     397             : #include "mcrl2/utilities/detail/unordered_set_implementation.h"
     398             : 
     399             : #endif // MCRL2_UTILITIES_UNORDERED_SET_H

Generated by: LCOV version 1.14