LCOV - code coverage report
Current view: top level - atermpp/source - aterm_io_binary.cpp (source / functions) Hit Total Coverage
Test: mcrl2_coverage.info.cleaned Lines: 113 115 98.3 %
Date: 2019-09-14 00:54:39 Functions: 15 16 93.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // Author(s): Jan Friso Groote
       2             : // Copyright: see the accompanying file COPYING or copy at
       3             : // https://github.com/mCRL2org/mCRL2/blob/master/COPYING
       4             : //
       5             : // Distributed under the Boost Software License, Version 1.0.
       6             : // (See accompanying file LICENSE_1_0.txt or copy at
       7             : // http://www.boost.org/LICENSE_1_0.txt)
       8             : //
       9             : 
      10             : #include "mcrl2/atermpp/aterm_io.h"
      11             : 
      12             : #include "mcrl2/atermpp/aterm.h"
      13             : #include "mcrl2/atermpp/aterm_int.h"
      14             : #include "mcrl2/atermpp/aterm_list.h"
      15             : 
      16             : #include <cassert>
      17             : #include <cmath>
      18             : 
      19             : namespace atermpp
      20             : {
      21             : using namespace mcrl2::utilities;
      22             : 
      23             : /// \brief The magic value for a binary aterm format stream.
      24             : /// \details As of version 0x8305 the magic and version are written as 2 bytes not encoded as variable-width integers. To ensure
      25             : ///          compatibility with older formats the previously variable-width encoding is mimicked by prefixing them with 1000 (0x8).
      26             : static constexpr std::uint16_t BAF_MAGIC = 0x8baf;
      27             : 
      28             : /// \brief The BAF_VERSION constant is the version number of the ATerms written in BAF
      29             : ///        format. As of 29 August 2013 this version number is used by the mCRL2
      30             : ///        toolset. Whenever the file format of mCRL2 files is changed, the BAF_VERSION
      31             : ///        has to be increased.
      32             : ///
      33             : /// \details History:
      34             : ///
      35             : /// before 2013       : version 0x0300
      36             : /// 29 August 2013    : version changed to 0x0301
      37             : /// 23 November 2013  : version changed to 0x0302 (introduction of index for variable types)
      38             : /// 24 September 2014 : version changed to 0x0303 (introduction of stochastic distribution)
      39             : ///  2 April 2017     : version changed to 0x0304 (removed a few superfluous fields in the format)
      40             : /// 19 Juli 2019      : version changed to 0x8305 (introduction of the streamable aterm format)
      41             : static constexpr std::uint16_t BAF_VERSION = 0x8305;
      42             : 
      43             : /// \brief Each packet has a header consisting of a type and an invisible bit (indicating no output).
      44             : enum class packet_type
      45             : {
      46             :   function_symbol = 0,
      47             :   aterm,
      48             :   aterm_output,
      49             :   aterm_int,
      50             : };
      51             : 
      52             : /// \brief The number of bits needed to store an element of packet_type.
      53             : static constexpr unsigned int packet_bits = 2;
      54             : 
      55         126 : binary_aterm_output::binary_aterm_output(std::ostream& stream, std::function<aterm_transformer> transformer)
      56             :   : m_stream(stream),
      57         126 :     m_transformer(transformer)
      58             : {
      59             :   // The term with function symbol index 0 indicates the end of the stream, its actual value does not matter.
      60         126 :   m_function_symbols.insert(detail::g_as_int);
      61         126 :   m_function_symbol_index_width = 1;
      62             : 
      63             :   // Write the header of the binary aterm format.
      64         126 :   m_stream.write_bits(0, 8);
      65         126 :   m_stream.write_bits(BAF_MAGIC, 16);
      66         126 :   m_stream.write_bits(BAF_VERSION, 16);
      67         126 : }
      68             : 
      69         252 : binary_aterm_output::~binary_aterm_output()
      70             : {
      71             :   // Write the end of the stream.
      72         126 :   m_stream.write_bits(static_cast<std::size_t>(packet_type::aterm), packet_bits);
      73         126 :   m_stream.write_integer(0);
      74         126 : }
      75             : 
      76             : /// \brief Keep track of whether the term can be written to the stream.
      77             : struct write_todo
      78             : {
      79             :   const aterm_appl& term;
      80             :   bool write = false;
      81             : 
      82       19288 :   write_todo(const aterm_appl& term)
      83       19288 :    : term(term)
      84       19288 :   {}
      85             : };
      86             : 
      87        3391 : void binary_aterm_output::write_term(const aterm& term)
      88             : {
      89        3391 :   assert(!term.type_is_int());
      90             : 
      91             :   // Traverse the term bottom up and store the subterms (and function symbol) before the actual term.
      92        6782 :   std::stack<write_todo> stack;
      93        3391 :   stack.emplace(static_cast<const aterm_appl&>(term));
      94             : 
      95       38336 :   do
      96             :   {
      97       38336 :     write_todo& current = stack.top();
      98       76672 :     aterm_appl transformed = m_transformer(current.term);
      99             : 
     100             :     // Indicates that this term is output and not a subterm, these should always be written.
     101       38336 :     bool is_output = stack.size() == 1;
     102       38336 :     if (m_terms.index(current.term) >= m_terms.size() || is_output)
     103             :     {
     104       38096 :       if (current.write)
     105             :       {
     106       19048 :         if (transformed.type_is_int())
     107             :         {
     108             :           // Write the packet identifier of an aterm_int followed by its value.
     109        1524 :           m_stream.write_bits(static_cast<std::size_t>(packet_type::aterm_int), packet_bits);
     110        1524 :           m_stream.write_integer(reinterpret_cast<const aterm_int&>(current.term).value());
     111             :         }
     112             :         else
     113             :         {
     114       17524 :           std::size_t symbol_index = write_function_symbol(transformed.function());
     115             : 
     116             :           // Write the packet identifier, followed by the indices of its function symbol and arguments.
     117       17524 :           m_stream.write_bits(static_cast<std::size_t>(is_output ? packet_type::aterm_output : packet_type::aterm), packet_bits);
     118       17524 :           m_stream.write_bits(symbol_index, function_symbol_index_width());
     119             : 
     120       50915 :           for (const aterm& argument : transformed)
     121             :           {
     122       33391 :             std::size_t index = m_terms.index(argument);
     123       33391 :             assert(index < m_terms.size()); // Every argument must already be written.
     124       33391 :             m_stream.write_bits(index, term_index_width());
     125             :           }
     126             :         }
     127             : 
     128       19048 :         if (!is_output)
     129             :         {
     130             :           // Output terms are not shared and thus can be forgotten.
     131       15657 :           bool assigned = m_terms.insert(current.term).second;
     132       15657 :           assert(assigned); mcrl2_unused(assigned); // This term should have a new index assigned.
     133       15657 :           m_term_index_width = static_cast<std::uint8_t>(std::log2(m_terms.size()) + 1);
     134             :         }
     135             : 
     136       19048 :         stack.pop();
     137             :       }
     138             :       else
     139             :       {
     140             :         // Add all the arguments to the stack; to be processed first.
     141       52439 :         for (const aterm& argument : transformed)
     142             :         {
     143       33391 :           const aterm_appl& term = static_cast<const aterm_appl&>(argument);
     144       33391 :           if (m_terms.index(term) >= m_terms.size())
     145             :           {
     146             :             // Only add arguments that have not been written before.
     147       15897 :             stack.emplace(term);
     148             :           }
     149             :         }
     150             : 
     151       19048 :         current.write = true;
     152             :       }
     153             :     }
     154             :     else
     155             :     {
     156         240 :      stack.pop(); // This term was already written and as such should be skipped. This can happen if
     157             :                   // one term has two equal subterms.
     158             :     }
     159             :   }
     160       38336 :   while (!stack.empty());
     161        3391 : }
     162             : 
     163       33391 : unsigned int binary_aterm_output::term_index_width()
     164             : {
     165       33391 :   assert(m_term_index_width == static_cast<unsigned int>(std::log2(m_terms.size()) + 1));
     166       33391 :   return m_term_index_width;
     167             : }
     168             : 
     169       17524 : unsigned int binary_aterm_output::function_symbol_index_width()
     170             : {
     171       17524 :   assert(m_function_symbol_index_width == static_cast<unsigned int>(std::log2(m_function_symbols.size()) + 1));
     172       17524 :   return m_function_symbol_index_width;
     173             : }
     174             : 
     175         128 : binary_aterm_input::binary_aterm_input(std::istream& is, std::function<aterm_transformer> transformer)
     176             :   : m_stream(is),
     177         129 :     m_transformer(transformer)
     178             : {
     179             :   // The term with function symbol index 0 indicates the end of the stream.
     180         128 :   m_function_symbols.emplace_back();
     181         128 :   m_function_symbol_index_width = 1;
     182             : 
     183             :   // Read the binary aterm format header.
     184         128 :   if (m_stream.read_bits(8) != 0 || m_stream.read_bits(16) != BAF_MAGIC)
     185             :   {
     186           1 :     throw mcrl2::runtime_error("Error while reading file: The file is not correct as it does not have the BAF_MAGIC control sequence at the right place.");
     187             :   }
     188             : 
     189         127 :   std::size_t version = m_stream.read_bits(16);
     190         127 :   if (version != BAF_VERSION)
     191             :   {
     192           0 :     throw mcrl2::runtime_error("The BAF version (" + std::to_string(version) + ") of the input file is incompatible with the version (" + std::to_string(BAF_VERSION) +
     193           0 :                                ") of this tool. The input file must be regenerated. ");
     194             :   }
     195         127 : }
     196             : 
     197       17524 : std::size_t binary_aterm_output::write_function_symbol(const function_symbol& symbol)
     198             : {
     199       17524 :   std::size_t result = m_function_symbols.index(symbol);
     200             : 
     201       17524 :   if (result < m_function_symbols.size())
     202             :   {
     203       12776 :     return result;
     204             :   }
     205             :   else
     206             :   {
     207             :     // The function symbol has not been written yet, write it now and return its index.
     208        4748 :     m_stream.write_bits(static_cast<std::size_t>(packet_type::function_symbol), packet_bits);
     209        4748 :     m_stream.write_string(symbol.name());
     210        4748 :     m_stream.write_integer(symbol.arity());
     211             : 
     212        4748 :     auto result = m_function_symbols.insert(symbol);
     213        4748 :     m_function_symbol_index_width = static_cast<unsigned int>(std::log2(m_function_symbols.size()) + 1);
     214             : 
     215        4748 :     return result.first;
     216             :   }
     217             : }
     218             : 
     219       24759 : aterm binary_aterm_input::read_term()
     220             : {
     221       21259 :   while(true)
     222             :   {
     223             :     // Determine the type of the next packet.
     224       24759 :     std::size_t header = m_stream.read_bits(packet_bits);
     225       24759 :     packet_type packet = static_cast<packet_type>(header);
     226             : 
     227       24759 :     if (packet == packet_type::function_symbol)
     228             :     {
     229             :       // Read a single function symbol and insert it into the already read function symbols.
     230        9858 :       std::string name = m_stream.read_string();
     231        4929 :       std::size_t arity = m_stream.read_integer();
     232        4929 :       m_function_symbols.emplace_back(name, arity);
     233        4929 :       m_function_symbol_index_width = static_cast<unsigned int>(std::log2(m_function_symbols.size()) + 1);
     234             :     }
     235       19830 :     else if (packet == packet_type::aterm || packet == packet_type::aterm_output)
     236             :     {
     237             :       // First read the function symbol of the following term.
     238       33112 :       function_symbol symbol = m_function_symbols[m_stream.read_bits(function_symbol_index_width())];
     239             : 
     240       18306 :       if (!symbol.defined())
     241             :       {
     242             :         // The term with function symbol zero marks the end of the stream.
     243         108 :         return aterm();
     244             :       }
     245             : 
     246             :       // Read arity number of arguments from the stream and search them in the already defined set of terms.
     247       33004 :       std::vector<aterm> arguments(symbol.arity());
     248       52800 :       for (std::size_t argument = 0; argument < symbol.arity(); ++argument)
     249             :       {
     250       34602 :         arguments[argument] = m_terms[m_stream.read_bits(term_index_width())];
     251             :       }
     252             : 
     253             :       // Transform the resulting term.
     254       33004 :       aterm transformed = m_transformer(aterm_appl(symbol, arguments.begin(), arguments.end()));
     255             : 
     256       18198 :       if (packet == packet_type::aterm_output)
     257             :       {
     258             :         // This aterm was marked as output in the file so return it.
     259        3392 :         return transformed;
     260             :       }
     261             :       else
     262             :       {
     263             :         // Construct the term appl from the function symbol and the already read arguments and insert it.
     264       14806 :         m_terms.emplace_back(transformed);
     265       14806 :         m_term_index_width = static_cast<unsigned int>(std::log2(m_terms.size()) + 1);
     266       14806 :       }
     267             :     }
     268        1524 :     else if (packet == packet_type::aterm_int)
     269             :     {
     270             :       // Read the integer from the stream and construct an aterm_int.
     271        1524 :       std::size_t value = m_stream.read_integer();
     272        1524 :       m_terms.emplace_back(aterm_int(value));
     273        1524 :       m_term_index_width = static_cast<unsigned int>(std::log2(m_terms.size()) + 1);
     274             :     }
     275             :   }
     276             : }
     277             : 
     278       34602 : unsigned int binary_aterm_input::term_index_width()
     279             : {
     280       34602 :   assert(m_term_index_width == static_cast<unsigned int>(std::log2(m_terms.size()) + 1));
     281       34602 :   return m_term_index_width;
     282             : }
     283             : 
     284       18306 : unsigned int binary_aterm_input::function_symbol_index_width()
     285             : {
     286       18306 :   assert(m_function_symbol_index_width == static_cast<unsigned int>(std::log2(m_function_symbols.size()) + 1));
     287       18306 :   return m_function_symbol_index_width;
     288             : }
     289             : 
     290          16 : void write_term_to_binary_stream(const aterm& t, std::ostream& os)
     291             : {
     292          32 :   binary_aterm_output output(os);
     293          16 :   output.write_term(t);
     294          16 : }
     295             : 
     296          18 : aterm read_term_from_binary_stream(std::istream& is)
     297             : {
     298          36 :   binary_aterm_input input(is);
     299          34 :   return input.read_term();
     300             : }
     301             : 
     302         420 : } // namespace atermpp

Generated by: LCOV version 1.12