LCOV - code coverage report
Current view: top level - utilities/include/mcrl2/utilities/detail - unordered_set_implementation.h (source / functions) Hit Total Coverage
Test: mcrl2_coverage.info.cleaned Lines: 88 95 92.6 %
Date: 2024-04-19 03:43:27 Functions: 874 1182 73.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // Author(s): Maurice Laveaux
       2             : // Copyright: see the accompanying file COPYING or copy at
       3             : // https://github.com/mCRL2org/mCRL2/blob/master/COPYING
       4             : //
       5             : // Distributed under the Boost Software License, Version 1.0.
       6             : // (See accompanying file LICENSE_1_0.txt or copy at
       7             : // http://www.boost.org/LICENSE_1_0.txt)
       8             : 
       9             : #ifndef MCRL2_UTILITIES_UNORDERED_SET_IMPLEMENTATION_H
      10             : #define MCRL2_UTILITIES_UNORDERED_SET_IMPLEMENTATION_H
      11             : #pragma once
      12             : 
      13             : #define MCRL2_UNORDERED_SET_TEMPLATES template<typename Key, typename Hash, typename Equals, typename Allocator, bool ThreadSafe, bool Resize>
      14             : #define MCRL2_UNORDERED_SET_CLASS unordered_set<Key, Hash, Equals, Allocator, ThreadSafe, Resize>
      15             : 
      16             : #include "mcrl2/utilities/unordered_set.h"
      17             : 
      18             : #include "mcrl2/utilities/power_of_two.h"
      19             : 
      20             : #include <algorithm>
      21             : 
      22             : namespace mcrl2::utilities
      23             : {
      24             : 
      25             : static constexpr std::size_t minimum_size = 4UL;
      26             : 
      27             : inline float bytes_to_megabytes(std::size_t bytes)
      28             : {
      29             :   return static_cast<float>(bytes) / (1024.0f * 1024.0f);
      30             : }
      31             : 
      32             : MCRL2_UNORDERED_SET_TEMPLATES
      33         916 : MCRL2_UNORDERED_SET_CLASS::unordered_set(const unordered_set& set)
      34             : {
      35         916 :   reserve(set.size());
      36             : 
      37        1883 :   for (auto& element : set)
      38             :   {
      39          51 :     emplace(element);
      40             :   }
      41         916 : }
      42             : 
      43             : MCRL2_UNORDERED_SET_TEMPLATES
      44           0 : MCRL2_UNORDERED_SET_CLASS& MCRL2_UNORDERED_SET_CLASS::operator=(const unordered_set& set)
      45             : {
      46           0 :   clear();
      47           0 :   reserve(set.size());
      48             : 
      49           0 :   for (auto& element : set)
      50             :   {
      51           0 :     emplace(element);
      52             :   }
      53             : 
      54           0 :   return *this;
      55             : }
      56             : 
      57             : MCRL2_UNORDERED_SET_TEMPLATES
      58       23441 : MCRL2_UNORDERED_SET_CLASS::~unordered_set()
      59             : {
      60             :   // This unordered_set is not moved-from.
      61       23441 :   if (m_buckets.size() > 0)
      62             :   {
      63       23440 :     clear();
      64             :   }
      65       23441 : }
      66             : 
      67             : MCRL2_UNORDERED_SET_TEMPLATES
      68       23441 : void MCRL2_UNORDERED_SET_CLASS::clear()
      69             : {
      70       23441 :   assert(m_buckets.size() != 0);
      71             : 
      72             :   // A naive implementation of erasing all elements.
      73       54726 :   for (auto it = begin(); it != end(); )
      74             :   {
      75       31285 :     it = erase(it);
      76             :   }
      77             : 
      78      145989 :   assert(std::all_of(m_buckets.begin(), m_buckets.end(), [](const bucket_type& bucket) { return bucket.empty(); }));
      79       23441 :   assert(m_number_of_elements == 0);
      80       23441 :   assert(m_buckets.size() != 0);
      81       23441 : }
      82             : 
      83             : MCRL2_UNORDERED_SET_TEMPLATES
      84             : template<typename ...Args>
      85   180136082 : auto MCRL2_UNORDERED_SET_CLASS::emplace(Args&&... args) -> std::pair<iterator, bool>
      86             : {
      87      104311 :   if constexpr (Resize) { rehash_if_needed(); }
      88             : 
      89             :   size_type bucket_index;
      90   180136082 :   iterator it;
      91             : 
      92             :   // Searching the current bucket list is safe and can be performed without locking.
      93             :   if constexpr (allow_transparent)
      94             :   {
      95             :     // Compute the hash and corresponding bucket.
      96   180036054 :     bucket_index = find_bucket_index(std::forward<Args>(args)...);
      97   180036054 :     it = find_impl(bucket_index, std::forward<Args>(args)...);
      98             :   }
      99             :   else
     100             :   {
     101             :     // Compute the hash and corresponding bucket.
     102      100028 :     Key object(std::forward<Args>(args)...);
     103      100028 :     bucket_index = find_bucket_index(object);
     104      100028 :     it = find_impl(bucket_index, object);
     105           1 :   }  
     106             : 
     107   180136082 :   if (it != end())
     108             :   {
     109   178848536 :     return std::make_pair(it, false);
     110             :   }
     111             :   else
     112             :   {
     113     1287546 :     return emplace_impl(bucket_index, std::forward<Args>(args)...);
     114             :   }
     115             : }
     116             : 
     117             : MCRL2_UNORDERED_SET_TEMPLATES
     118      355717 : auto MCRL2_UNORDERED_SET_CLASS::erase(typename MCRL2_UNORDERED_SET_CLASS::const_iterator it) -> iterator
     119             : {
     120             :   // Find the bucket that is pointed to and remove the key after the before iterator.
     121      355717 :   bucket_type& bucket = const_cast<bucket_type&>(*it.get_bucket_it());
     122             : 
     123             :   // An element was removed from the hash table.
     124      355717 :   --m_number_of_elements;
     125             : 
     126             :   // Remove the key that is after the before iterator.
     127      355717 :   iterator result_it(it.get_bucket_it(), m_buckets.end(), it.key_before_it(), bucket.erase_after(m_allocator, it.key_before_it()));
     128             : 
     129             :   // We must guarantee that the iterator points the a valid key (and this might not be the case after removal).
     130      355717 :   result_it.goto_next_bucket();
     131      355717 :   return result_it;
     132             : }
     133             : 
     134             : MCRL2_UNORDERED_SET_TEMPLATES
     135             : template<typename ...Args>
     136           3 : std::size_t MCRL2_UNORDERED_SET_CLASS::count(const Args&... args) const
     137             : {
     138             :   if constexpr (allow_transparent)
     139             :   {
     140             :     return find(args...) != end();
     141             :   }
     142             :   else
     143             :   {
     144           3 :     return find(Key(args...)) != end();
     145             :   }
     146             : }
     147             : 
     148             : MCRL2_UNORDERED_SET_TEMPLATES
     149             : template<typename ...Args>
     150             : void MCRL2_UNORDERED_SET_CLASS::erase(const Args&... args)
     151             : {
     152             :   if constexpr (allow_transparent)
     153             :   {
     154             :     erase_impl(args...);
     155             :   }
     156             :   else
     157             :   {
     158             :     erase_impl(Key(args...));
     159             :   }
     160             : }
     161             : 
     162             : MCRL2_UNORDERED_SET_TEMPLATES
     163             : template<typename ...Args>
     164       29087 : auto MCRL2_UNORDERED_SET_CLASS::find(const Args&... args) const -> const_iterator
     165             : {
     166             :   if constexpr (allow_transparent)
     167             :   {
     168       29084 :     return find_impl(find_bucket_index(args...), args...);
     169             :   }
     170             :   else
     171             :   {
     172           3 :     Key element(args...);
     173           6 :     return find_impl(find_bucket_index(element), element);
     174             :   }
     175             : }
     176             : 
     177             : MCRL2_UNORDERED_SET_TEMPLATES
     178             : template<typename ...Args>
     179     1841100 : auto MCRL2_UNORDERED_SET_CLASS::find(const Args&... args) -> iterator
     180             : {
     181             :   if constexpr (allow_transparent)
     182             :   {
     183     1831094 :     return find_impl(find_bucket_index(args...), args...);
     184             :   }
     185             :   else
     186             :   {
     187       10006 :     Key element(args...);
     188       20012 :     return find_impl(find_bucket_index(element), element);
     189             :   }
     190             : }
     191             : 
     192             : MCRL2_UNORDERED_SET_TEMPLATES
     193       25859 : void MCRL2_UNORDERED_SET_CLASS::rehash(std::size_t number_of_buckets)
     194             : {
     195             :   // Ensure that the number of buckets is a power of two greater than the minimum size.
     196       25859 :   number_of_buckets = std::max(utilities::round_up_to_power_of_two(number_of_buckets), minimum_size);
     197             : 
     198             :   // If n is greater than the current number of buckets in the container (bucket_count), a rehash is forced.
     199             :   // The new bucket count can either be equal or greater than n. Otherwise, it has no effect.
     200       25859 :   if (number_of_buckets <= bucket_count())
     201             :   {
     202           0 :     return;
     203             :   }
     204             : 
     205             :   // Update the number of mutexes.
     206             :   if constexpr (!EnableLockfreeInsertion)
     207             :   {
     208             :     m_bucket_mutexes = std::vector<std::mutex>(std::max(number_of_buckets / BucketsPerMutex, 1ul));
     209             :   }
     210             : 
     211             :   // Create one bucket list for all elements in the hashtable.
     212       25859 :   bucket_type old_keys;
     213      315867 :   for (auto&& bucket : m_buckets)
     214             :   {
     215      290008 :     old_keys.splice_after(old_keys.before_begin(), bucket);
     216             :   }
     217             : 
     218       25859 :   assert(std::distance(old_keys.begin(), old_keys.end()) == static_cast<long>(m_number_of_elements));
     219             : 
     220             :   // Recreate the hash table, but don't move or copy the old elements.
     221             :   {
     222             :     // clear() doesn't actually free the memory used and still results in an 3n peak.
     223       25859 :     std::vector<bucket_type>().swap(m_buckets);
     224             :   }
     225       25859 :   m_buckets.resize(number_of_buckets);
     226       25859 :   m_buckets_mask = m_buckets.size() - 1;
     227             : 
     228             :   // Fill the set with all elements that are stored in old_keys.
     229      418992 :   while (!old_keys.empty())
     230             :   {
     231             :     // Move the current element to this bucket.
     232      393133 :     bucket_type& bucket = m_buckets[find_bucket_index(old_keys.front())];
     233      393133 :     bucket.splice_front(bucket.before_begin(), old_keys);
     234             :   }
     235             : }
     236             : 
     237             : template<typename T>
     238             : void print_performance_statistics(const T& unordered_set)
     239             : {
     240             :   // Calculate a histogram of the bucket lengths.
     241             :   std::vector<std::size_t> histogram;
     242             : 
     243             :   for (std::size_t index = 0; index < unordered_set.bucket_count(); ++index)
     244             :   {
     245             :     // Ensure that the current bucket fits within the histogram.
     246             :     std::size_t bucket_length = unordered_set.bucket_size(index);
     247             :     histogram.resize(std::max(histogram.size(), bucket_length + 1));
     248             :     ++histogram[bucket_length];
     249             :   }
     250             : 
     251             :   mCRL2log(mcrl2::log::info) << "Table stores " << unordered_set.size() << " keys in " << unordered_set.bucket_count() << " buckets.\n";
     252             : 
     253             :   for (std::size_t i = 0; i < histogram.size(); ++i)
     254             :   {
     255             :     mCRL2log(mcrl2::log::debug) << "There are " << histogram[i] << " buckets that store " << i << " keys.\n";
     256             :   }
     257             : }
     258             : 
     259             : /// Private functions
     260             : 
     261             : MCRL2_UNORDERED_SET_TEMPLATES
     262             : template<typename ...Args>
     263     1287547 : auto MCRL2_UNORDERED_SET_CLASS::emplace_impl(size_type bucket_index, Args&&... args) -> std::pair<iterator, bool>
     264             : {
     265     1287547 :   bucket_type& bucket = m_buckets[bucket_index];
     266             : 
     267             :   if constexpr (ThreadSafe)
     268             :   {
     269             :     if constexpr (EnableLockfreeInsertion)
     270             :     {
     271             :       // Construct a new node and put it at the front of the bucket list.
     272     1173829 :       auto [key_it, added] = bucket.emplace_front_unique(m_allocator, m_equals, std::forward<Args>(args)...);
     273     1173829 :       iterator it(m_buckets.begin() + bucket_index, m_buckets.end(), bucket.before_begin(), key_it);
     274             : 
     275     1173829 :       if (added)
     276             :       {
     277     1173829 :         m_number_of_elements.fetch_add(1, std::memory_order_relaxed);
     278             :       }
     279             : 
     280     2347658 :       return std::make_pair(it, added);
     281             :     }
     282             :     else
     283             :     {
     284             :       // Obtain exclusive access to this bucket.
     285             :       std::lock_guard g(m_bucket_mutexes[bucket_index % m_bucket_mutexes.size()]);
     286             : 
     287             :       iterator it = find_impl(bucket_index, std::forward<Args>(args)...);
     288             :       if (it != end())
     289             :       {
     290             :         // This element has been inserted in the mean time.
     291             :         return std::make_pair(it, false);
     292             :       }
     293             :       else
     294             :       {
     295             :         // Construct a new node and put it at the front of the bucket list.
     296             :         bucket.emplace_front(m_allocator, std::forward<Args>(args)...);
     297             : 
     298             :         // Return the iterator.
     299             :         m_number_of_elements.fetch_add(1, std::memory_order_relaxed);
     300             :         iterator it(m_buckets.begin() + bucket_index, m_buckets.end(), bucket.before_begin(), bucket.begin());
     301             :         return std::make_pair(it, true);
     302             :       }
     303             :     }
     304             : 
     305             :   }
     306             :   else
     307             :   {
     308             :     // Construct a new node and put it at the front of the bucket list.
     309      113718 :     bucket.emplace_front(m_allocator, std::forward<Args>(args)...);
     310      113718 :     ++m_number_of_elements;
     311             : 
     312             :     // Return the iterator.
     313      113718 :     iterator it(m_buckets.begin() + bucket_index, m_buckets.end(), bucket.before_begin(), bucket.begin());
     314      113718 :     return std::make_pair(it, true);
     315             :   }
     316             : 
     317             : }
     318             : 
     319             : MCRL2_UNORDERED_SET_TEMPLATES
     320             : template<typename ...Args>
     321             : void MCRL2_UNORDERED_SET_CLASS::erase_impl(const Args&... args)
     322             : {
     323             :   bucket_type& bucket = m_buckets[find_bucket_index(args...)];
     324             : 
     325             :   // Loop over the elements in the bucket until the key was found.
     326             :   typename bucket_type::const_iterator before_it = bucket.before_begin();
     327             :   for (typename bucket_type::iterator it = bucket.begin(); it != bucket.end();)
     328             :   {
     329             :     if (m_equals(*it, args...))
     330             :     {
     331             :       // Erase the current element and stop iterating.
     332             :       --m_number_of_elements;
     333             :       it = bucket.erase_after(m_allocator, before_it);
     334             :       break;
     335             :     }
     336             :     else
     337             :     {
     338             :       // Both iterators move one place forward.
     339             :       ++before_it;
     340             :       ++it;
     341             :     }
     342             :   }
     343             : }
     344             : 
     345             : 
     346             : MCRL2_UNORDERED_SET_TEMPLATES
     347             : template<typename ...Args>
     348   182399404 : std::size_t MCRL2_UNORDERED_SET_CLASS::find_bucket_index(const Args&... args) const
     349             : {
     350   182399404 :   std::size_t hash = m_hash(args...);
     351             :   /// n mod 2^i is equal to n & (2^i - 1).
     352   182399404 :   assert(m_buckets_mask == m_buckets.size() - 1);
     353   182399404 :   std::size_t index = hash & m_buckets_mask;
     354   182399404 :   assert(index < m_buckets.size());
     355   182399404 :   return index;
     356             : }
     357             : 
     358             : MCRL2_UNORDERED_SET_TEMPLATES
     359             : template<typename ...Args>
     360   182006271 : auto MCRL2_UNORDERED_SET_CLASS::find_impl(size_type bucket_index, const Args&... args) const -> const_iterator
     361             : {
     362   182006271 :   const auto& bucket = m_buckets[bucket_index];
     363             : 
     364             :   // Search through the bucket to find an element that is equivalent.
     365   182006271 :   auto before_it = bucket.before_begin();
     366   283745914 :   for(auto it = bucket.begin(); it != bucket.end(); ++it)
     367             :   {
     368   281934000 :     auto& key = *it;
     369   281934000 :     if (m_equals(key, args...))
     370             :     {
     371   180194357 :       return const_iterator(m_buckets.begin() + bucket_index, m_buckets.end(), before_it, it);
     372             :     }
     373             : 
     374   101739643 :     before_it = it;
     375             :   }
     376             : 
     377     1811914 :   return end();
     378             : }
     379             : 
     380             : MCRL2_UNORDERED_SET_TEMPLATES
     381      104731 : void MCRL2_UNORDERED_SET_CLASS::rehash_if_needed()
     382             : {
     383      104731 :   if (load_factor() >= max_load_factor())
     384             :   {
     385         299 :     rehash(m_buckets.size() * 2);
     386             :   }
     387      104731 : }
     388             : 
     389             : #undef MCRL2_UNORDERED_SET_CLASS
     390             : #undef MCRL2_UNORDERED_SET_TEMPLATES
     391             : 
     392             : } // namespace mcrl2::utilities
     393             : 
     394             : #endif // MCRL2_UTILITIES_UNORDERED_SET_IMPLEMENTATION_H

Generated by: LCOV version 1.14