Line data Source code
1 : // Author(s): Jan Friso Groote 2 : // Copyright: see the accompanying file COPYING or copy at 3 : // https://github.com/mCRL2org/mCRL2/blob/master/COPYING 4 : // 5 : // Distributed under the Boost Software License, Version 1.0. 6 : // (See accompanying file LICENSE_1_0.txt or copy at 7 : // http://www.boost.org/LICENSE_1_0.txt) 8 : // 9 : 10 : #include "mcrl2/atermpp/aterm_io_binary.h" 11 : #include "mcrl2/atermpp/standard_containers/stack.h" 12 : 13 : 14 : namespace atermpp 15 : { 16 : using namespace mcrl2::utilities; 17 : 18 : /// \brief The magic value for a binary aterm format stream. 19 : /// \details As of version 0x8305 the magic and version are written as 2 bytes not encoded as variable-width integers. To ensure 20 : /// compatibility with older formats the previously variable-width encoding is mimicked by prefixing them with 1000 (0x8). 21 : static constexpr std::uint16_t BAF_MAGIC = 0x8baf; 22 : 23 : /// \brief The BAF_VERSION constant is the version number of the ATerms written in BAF 24 : /// format. As of 29 August 2013 this version number is used by the mCRL2 25 : /// toolset. Whenever the file format of mCRL2 files is changed, the BAF_VERSION 26 : /// has to be increased. 27 : /// 28 : /// \details History: 29 : /// 30 : /// before 2013 : version 0x0300 31 : /// 29 August 2013 : version changed to 0x0301 32 : /// 23 November 2013 : version changed to 0x0302 (introduction of index for variable types) 33 : /// 24 September 2014 : version changed to 0x0303 (introduction of stochastic distribution) 34 : /// 2 April 2017 : version changed to 0x0304 (removed a few superfluous fields in the format) 35 : /// 19 July 2019 : version changed to 0x8305 (introduction of the streamable aterm format) 36 : /// 28 February 2020 : version changed to 0x8306 (added ability to stream aterm_int, implemented structured streaming for all objects) 37 : /// 24 January 2023 : version changed to 0x8307 (removed NoIndex from Variables, Boolean variables. Made the .lts format more 38 : /// compact by not storing states with a default probability 1) 39 : static constexpr std::uint16_t BAF_VERSION = 0x8307; 40 : 41 : /// \brief Each packet has a header consisting of a type. 42 : /// \details Either indicates a function symbol, a term (either shared or output) or an arbitrary integer. 43 : enum class packet_type 44 : { 45 : function_symbol = 0, 46 : aterm, 47 : aterm_output, 48 : aterm_int_output 49 : }; 50 : 51 : /// \brief The number of bits needed to store an element of packet_type. 52 : static constexpr unsigned int packet_bits = 2; 53 : 54 78 : binary_aterm_ostream::binary_aterm_ostream(std::shared_ptr<mcrl2::utilities::obitstream> stream) 55 78 : : m_stream(stream) 56 : { 57 : // The term with function symbol index 0 indicates the end of the stream, its actual value does not matter. 58 78 : m_function_symbols.insert(function_symbol("end_of_stream", 0)); 59 78 : m_function_symbol_index_width = 1; 60 : 61 : // Write the header of the binary aterm format. 62 78 : m_stream->write_bits(0, 8); 63 78 : m_stream->write_bits(BAF_MAGIC, 16); 64 78 : m_stream->write_bits(BAF_VERSION, 16); 65 78 : } 66 : 67 78 : binary_aterm_ostream::binary_aterm_ostream(std::ostream& stream) 68 78 : : binary_aterm_ostream(std::make_shared<mcrl2::utilities::obitstream>(stream)) 69 78 : {} 70 : 71 78 : binary_aterm_ostream::~binary_aterm_ostream() 72 : { 73 : // Write the end of the stream. 74 78 : m_stream->write_bits(static_cast<std::size_t>(packet_type::aterm), packet_bits); 75 78 : m_stream->write_bits(0, function_symbol_index_width()); 76 78 : } 77 : 78 : /// \brief Keep track of whether the term can be written to the stream. 79 : struct write_todo 80 : { 81 : detail::reference_aterm<aterm> term; 82 : bool write = false; 83 : 84 11003 : write_todo(const aterm& term) 85 11003 : : term(term) 86 11003 : {} 87 : 88 0 : void mark(std::stack<std::reference_wrapper<detail::_aterm>>& todo) const 89 : { 90 0 : term.mark(todo); 91 0 : } 92 : }; 93 : 94 4918 : void binary_aterm_ostream::put(const aterm& term) 95 : { 96 : // Traverse the term bottom up and store the subterms (and function symbol) before the actual term. 97 4918 : atermpp::stack<write_todo> stack; 98 4918 : stack.emplace(static_cast<const aterm_appl&>(term)); 99 : 100 : do 101 : { 102 21945 : write_todo& current = stack.top(); 103 21945 : aterm_appl transformed = m_transformer(static_cast<const aterm_appl&>(static_cast<const aterm&>(current.term))); 104 : 105 : // Indicates that this term is output and not a subterm, these should always be written. 106 21945 : bool is_output = stack.size() == 1; 107 21945 : if (m_terms.index(current.term) >= m_terms.size() || is_output) 108 : { 109 21884 : if (current.write) 110 : { 111 10942 : if (transformed.type_is_int()) 112 : { 113 2251 : if (is_output) 114 : { 115 : // If the integer is output, write the header and just an integer 116 2247 : m_stream->write_bits(static_cast<std::size_t>(packet_type::aterm_int_output), packet_bits); 117 2247 : m_stream->write_integer(static_cast<const aterm_int&>(static_cast<const aterm&>(current.term)).value()); 118 : } 119 : else 120 : { 121 4 : std::size_t symbol_index = write_function_symbol(transformed.function()); 122 : 123 : // Write the packet identifier of an aterm_int followed by its value. 124 4 : m_stream->write_bits(static_cast<std::size_t>(packet_type::aterm), packet_bits); 125 4 : m_stream->write_bits(symbol_index, function_symbol_index_width()); 126 4 : m_stream->write_integer(static_cast<const aterm_int&>(static_cast<const aterm&>(current.term)).value()); 127 : } 128 : } 129 : else 130 : { 131 8691 : std::size_t symbol_index = write_function_symbol(transformed.function()); 132 : 133 : // Write the packet identifier, followed by the indices of its function symbol and arguments. 134 8691 : m_stream->write_bits(static_cast<std::size_t>(is_output ? packet_type::aterm_output : packet_type::aterm), packet_bits); 135 8691 : m_stream->write_bits(symbol_index, function_symbol_index_width()); 136 : 137 22744 : for (const aterm& argument : transformed) 138 : { 139 14053 : std::size_t index = m_terms.index(argument); 140 14053 : assert(index < m_terms.size()); // Every argument must already be written. 141 14053 : m_stream->write_bits(index, term_index_width()); 142 : } 143 : } 144 : 145 10942 : if (!is_output) 146 : { 147 : // Only regular terms (not output) are shared and as such need a unique index. 148 6024 : bool assigned = m_terms.insert(current.term).second; 149 6024 : assert(assigned); mcrl2_unused(assigned); // This term should have a new index assigned. 150 6024 : m_term_index_width = static_cast<std::uint8_t>(std::log2(m_terms.size()) + 1); 151 : } 152 : 153 10942 : stack.pop(); 154 : } 155 : else 156 : { 157 : // Add all the arguments to the stack; to be processed first. 158 24995 : for (const aterm& argument : transformed) 159 : { 160 14053 : const aterm_appl& term = static_cast<const aterm_appl&>(argument); 161 14053 : if (m_terms.index(term) >= m_terms.size()) 162 : { 163 : // Only add arguments that have not been written before. 164 6085 : stack.emplace(term); 165 : } 166 : } 167 : 168 10942 : current.write = true; 169 : } 170 : } 171 : else 172 : { 173 61 : stack.pop(); // This term was already written and as such should be skipped. This can happen if 174 : // one term has two equal subterms. 175 : } 176 21945 : } 177 21945 : while (!stack.empty()); 178 4918 : } 179 : 180 14053 : unsigned int binary_aterm_ostream::term_index_width() 181 : { 182 14053 : assert(m_term_index_width == static_cast<unsigned int>(std::log2(m_terms.size()) + 1)); 183 14053 : return m_term_index_width; 184 : } 185 : 186 8773 : unsigned int binary_aterm_ostream::function_symbol_index_width() 187 : { 188 8773 : assert(m_function_symbol_index_width == static_cast<unsigned int>(std::log2(m_function_symbols.size()) + 1)); 189 8773 : return m_function_symbol_index_width; 190 : } 191 : 192 82 : binary_aterm_istream::binary_aterm_istream(std::shared_ptr<mcrl2::utilities::ibitstream> stream) 193 82 : : m_stream(stream) 194 : { 195 : // The term with function symbol index 0 indicates the end of the stream. 196 82 : m_function_symbols.emplace_back(); 197 82 : m_function_symbol_index_width = 1; 198 : 199 : // Read the binary aterm format header. 200 82 : if (m_stream->read_bits(8) != 0 || m_stream->read_bits(16) != BAF_MAGIC) 201 : { 202 1 : throw mcrl2::runtime_error("Error while reading: missing the BAF_MAGIC control sequence."); 203 : } 204 : 205 81 : std::size_t version = m_stream->read_bits(16); 206 81 : if (version != BAF_VERSION) 207 : { 208 0 : throw mcrl2::runtime_error("The BAF version (" + std::to_string(version) + ") of the input file is incompatible with the version (" + std::to_string(BAF_VERSION) + 209 0 : ") of this tool. The input file must be regenerated. "); 210 : } 211 85 : } 212 : 213 82 : binary_aterm_istream::binary_aterm_istream(std::istream& is) 214 82 : : binary_aterm_istream(std::make_shared<mcrl2::utilities::ibitstream>(is)) 215 81 : {} 216 : 217 8695 : std::size_t binary_aterm_ostream::write_function_symbol(const function_symbol& symbol) 218 : { 219 8695 : std::size_t result = m_function_symbols.index(symbol); 220 : 221 8695 : if (result < m_function_symbols.size()) 222 : { 223 6602 : return result; 224 : } 225 : else 226 : { 227 : // The function symbol has not been written yet, write it now and return its index. 228 2093 : m_stream->write_bits(static_cast<std::size_t>(packet_type::function_symbol), packet_bits); 229 2093 : m_stream->write_string(symbol.name()); 230 2093 : m_stream->write_integer(symbol.arity()); 231 : 232 2093 : auto result = m_function_symbols.insert(symbol); 233 2093 : m_function_symbol_index_width = static_cast<unsigned int>(std::log2(m_function_symbols.size()) + 1); 234 : 235 2093 : return result.first; 236 : } 237 : } 238 : 239 13946 : void binary_aterm_istream::get(aterm& t) 240 : { 241 : while(true) 242 : { 243 : // Determine the type of the next packet. 244 13946 : std::size_t header = m_stream->read_bits(packet_bits); 245 13946 : packet_type packet = static_cast<packet_type>(header); 246 : 247 13946 : if (packet == packet_type::function_symbol) 248 : { 249 : // Read a single function symbol and insert it into the already read function symbols. 250 2265 : std::string name = m_stream->read_string(); 251 2265 : std::size_t arity = m_stream->read_integer(); 252 2265 : m_function_symbols.emplace_back(name, arity); 253 2265 : m_function_symbol_index_width = static_cast<unsigned int>(std::log2(m_function_symbols.size()) + 1); 254 2265 : } 255 11681 : else if (packet == packet_type::aterm_int_output) 256 : { 257 : // Read the integer from the stream and construct an aterm_int. 258 2270 : std::size_t value = m_stream->read_integer(); 259 2270 : make_aterm_int(static_cast<aterm_int&>(t), value); 260 2270 : return; 261 : } 262 9411 : else if (packet == packet_type::aterm || packet == packet_type::aterm_output) 263 : { 264 : // First read the function symbol of the following term. 265 9411 : function_symbol symbol = m_function_symbols[m_stream->read_bits(function_symbol_index_width())]; 266 : 267 9411 : if (!symbol.defined()) 268 : { 269 : // The term with function symbol zero marks the end of the stream. 270 63 : t=aterm(); 271 63 : return; 272 : } 273 9348 : else if (symbol == detail::g_as_int) 274 : { 275 : // Read the integer from the stream and construct a shared aterm_int in the index. 276 4 : std::size_t value = m_stream->read_integer(); 277 4 : m_terms.emplace_back(aterm_int(value)); 278 4 : m_term_index_width = static_cast<unsigned int>(std::log2(m_terms.size()) + 1); 279 : } 280 : else 281 : { 282 : // Read arity number of arguments from the stream and search them in the already defined set of terms. 283 9344 : std::vector<aterm> arguments(symbol.arity()); 284 24490 : for (std::size_t argument = 0; argument < symbol.arity(); ++argument) 285 : { 286 15146 : arguments[argument] = m_terms[m_stream->read_bits(term_index_width())]; 287 : } 288 : 289 : // Transform the resulting term. 290 18688 : aterm transformed = m_transformer(aterm_appl(symbol, arguments.begin(), arguments.end())); 291 : 292 9344 : if (packet == packet_type::aterm_output) 293 : { 294 : // This aterm was marked as output in the file so return it. 295 2784 : t=transformed; 296 2784 : return; 297 : } 298 : else 299 : { 300 : // Construct the term appl from the function symbol and the already read arguments and insert it. 301 6560 : m_terms.emplace_back(transformed); 302 6560 : m_term_index_width = static_cast<unsigned int>(std::log2(m_terms.size()) + 1); 303 : } 304 12128 : } 305 9411 : } 306 8829 : } 307 : } 308 : 309 15146 : unsigned int binary_aterm_istream::term_index_width() 310 : { 311 15146 : assert(m_term_index_width == static_cast<unsigned int>(std::log2(m_terms.size()) + 1)); 312 15146 : return m_term_index_width; 313 : } 314 : 315 9411 : unsigned int binary_aterm_istream::function_symbol_index_width() 316 : { 317 9411 : assert(m_function_symbol_index_width == static_cast<unsigned int>(std::log2(m_function_symbols.size()) + 1)); 318 9411 : return m_function_symbol_index_width; 319 : } 320 : 321 6 : void write_term_to_binary_stream(const aterm& t, std::ostream& os) 322 : { 323 6 : binary_aterm_ostream(os) << t; 324 6 : } 325 : 326 6 : void read_term_from_binary_stream(std::istream& is, aterm& t) 327 : { 328 6 : binary_aterm_istream(is).get(t); 329 6 : } 330 : 331 : } // namespace atermpp