LCOV - code coverage report
Current view: top level - utilities/source - text_utility.cpp (source / functions) Hit Total Coverage
Test: mcrl2_coverage.info.cleaned Lines: 67 82 81.7 %
Date: 2020-02-28 00:44:21 Functions: 13 14 92.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // Author(s): Wieger Wesselink
       2             : // Copyright: see the accompanying file COPYING or copy at
       3             : // https://github.com/mCRL2org/mCRL2/blob/master/COPYING
       4             : //
       5             : // Distributed under the Boost Software License, Version 1.0.
       6             : // (See accompanying file LICENSE_1_0.txt or copy at
       7             : // http://www.boost.org/LICENSE_1_0.txt)
       8             : //
       9             : /// \file mcrl2/utilities/text_utility.h
      10             : /// \brief String manipulation functions.
      11             : 
      12             : #include "mcrl2/utilities/exception.h"
      13             : #include "mcrl2/utilities/logger.h"
      14             : 
      15             : #include <boost/algorithm/string.hpp>
      16             : #include <boost/xpressive/xpressive.hpp>
      17             : #include <fstream>
      18             : 
      19             : namespace mcrl2
      20             : {
      21             : 
      22             : namespace utilities
      23             : {
      24             : 
      25             : /// \brief Split a string into paragraphs.
      26             : /// \param text A string
      27             : /// \return The paragraphs of <tt>text</tt>
      28           0 : std::vector<std::string> split_paragraphs(const std::string& text)
      29             : {
      30           0 :   std::vector<std::string> result;
      31             : 
      32             :   // find multiple line endings
      33           0 :   boost::xpressive::sregex paragraph_split = boost::xpressive::sregex::compile("\\n\\s*\\n");
      34             : 
      35             :   // the -1 below directs the token iterator to display the parts of
      36             :   // the string that did NOT match the regular expression.
      37           0 :   boost::xpressive::sregex_token_iterator cur(text.begin(), text.end(), paragraph_split, -1);
      38           0 :   boost::xpressive::sregex_token_iterator end;
      39             : 
      40           0 :   for (; cur != end; ++cur)
      41             :   {
      42           0 :     std::string paragraph = *cur;
      43           0 :     boost::trim(paragraph);
      44           0 :     if (paragraph.size() > 0)
      45             :     {
      46           0 :       result.push_back(paragraph);
      47             :     }
      48             :   }
      49           0 :   return result;
      50             : }
      51             : 
      52             : /// \brief Split the text.
      53             : /// \param line A string
      54             : /// \param separators A string
      55             : /// \return The splitted text
      56        5549 : std::vector<std::string> split(const std::string& line, const std::string& separators)
      57             : {
      58        5549 :   std::vector<std::string> result;
      59        5549 :   boost::algorithm::split(result, line, boost::algorithm::is_any_of(separators));
      60        5549 :   return result;
      61             : }
      62             : 
      63             : /// \brief Read text from a file.
      64             : /// \param filename A string
      65             : /// \param warn If true, a warning is printed to standard error if the file is not found
      66             : /// \return The contents of the file
      67           3 : std::string read_text(const std::string& filename, bool warn)
      68             : {
      69           6 :   std::ifstream in(filename.c_str());
      70           3 :   if (!in)
      71             :   {
      72           0 :     if (warn)
      73             :     {
      74           0 :       mCRL2log(log::warning) << "Could not open input file: " << filename << std::endl;
      75           0 :       return "";
      76             :     }
      77             :     else
      78             :     {
      79           0 :       throw mcrl2::runtime_error("Could not open input file: " + filename);
      80             :     }
      81             :   }
      82           3 :   in.unsetf(std::ios::skipws); //  Turn of white space skipping on the stream
      83             : 
      84           6 :   std::string s;
      85           3 :   std::copy(
      86           6 :     std::istream_iterator<char>(in),
      87           6 :     std::istream_iterator<char>(),
      88             :     std::back_inserter(s)
      89           3 :   );
      90             : 
      91           3 :   return s;
      92             : }
      93             : 
      94             : /// \brief Remove comments from a text (everything from '%' until end of line).
      95             : /// \param text A string
      96             : /// \return The removal result
      97         269 : std::string remove_comments(const std::string& text)
      98             : {
      99             :   // matches everything from '%' until end of line
     100         538 :   boost::xpressive::sregex src = boost::xpressive::sregex::compile("%[^\\n]*\\n");
     101             : 
     102         538 :   std::string dest("\n");
     103         538 :   return boost::xpressive::regex_replace(text, src, dest);
     104             : }
     105             : 
     106             : /// \brief Removes whitespace from a string.
     107             : /// \param text A string
     108             : /// \return The removal result
     109         302 : std::string remove_whitespace(const std::string& text)
     110             : {
     111         604 :   boost::xpressive::sregex src = boost::xpressive::sregex::compile("\\s");
     112         604 :   std::string dest("");
     113         604 :   return boost::xpressive::regex_replace(text, src, dest);
     114             : }
     115             : 
     116             : /// \brief Regular expression replacement in a string.
     117             : /// \param src A string
     118             : /// \param dest A string
     119             : /// \param text A string
     120             : /// \return The transformed string
     121         365 : std::string regex_replace(const std::string& src, const std::string& dest, const std::string& text)
     122             : {
     123         365 :   return boost::xpressive::regex_replace(text, boost::xpressive::sregex::compile(src), dest);
     124             : }
     125             : 
     126             : /// \brief Split a string using a regular expression separator.
     127             : /// \param text A string
     128             : /// \param sep A string
     129             : /// \return The splitted string
     130         281 : std::vector<std::string> regex_split(const std::string& text, const std::string& sep)
     131             : {
     132         281 :   std::vector<std::string> result;
     133             :   // find multiple line endings
     134         562 :   boost::xpressive::sregex paragraph_split = boost::xpressive::sregex::compile(sep);
     135             :   // the -1 below directs the token iterator to display the parts of
     136             :   // the string that did NOT match the regular expression.
     137         562 :   boost::xpressive::sregex_token_iterator cur(text.begin(), text.end(), paragraph_split, -1);
     138         562 :   boost::xpressive::sregex_token_iterator end;
     139        1403 :   for (; cur != end; ++cur)
     140             :   {
     141        1122 :     std::string word = *cur;
     142         561 :     boost::trim(word);
     143         561 :     if (word.size() > 0)
     144             :     {
     145         509 :       result.push_back(word);
     146             :     }
     147             :   }
     148         562 :   return result;
     149             : }
     150             : 
     151             : /// \brief Apply word wrapping to a text that doesn't contain newlines.
     152             : /// \param line A string of text.
     153             : /// \param max_line_length The maximum line length.
     154             : /// \return The wrapped text.
     155             : static
     156           4 : std::vector<std::string> word_wrap_line(const std::string& line, unsigned int max_line_length)
     157             : {
     158           4 :   std::vector<std::string> result;
     159           8 :   std::string text = line;
     160             : 
     161             :   for (;;)
     162             :   {
     163           6 :     if (text.size() <= max_line_length)
     164             :     {
     165           4 :       result.push_back(boost::trim_right_copy(text));
     166           4 :       break;
     167             :     }
     168           2 :     std::string::size_type i = text.find_last_of(" \t", max_line_length);
     169           2 :     if (i == std::string::npos)
     170             :     {
     171           1 :       result.push_back(text.substr(0, max_line_length));
     172           1 :       text = text.substr(max_line_length);
     173             :     }
     174             :     else
     175             :     {
     176           1 :       result.push_back(text.substr(0, i));
     177           1 :       text = text.substr(i + 1);
     178             :     }
     179           2 :   }
     180           8 :   return result;
     181             : }
     182             : 
     183             : /// \brief Apply word wrapping to a text.
     184             : /// \param text A string of text.
     185             : /// \param max_line_length The maximum line length.
     186             : /// \return The wrapped text.
     187           1 : std::string word_wrap_text(const std::string& text, unsigned int max_line_length)
     188             : {
     189           2 :   std::vector<std::string> result;
     190             : 
     191             :   // split the lines and remove trailing white space
     192           2 :   std::vector<std::string> lines = split(text, "\n");
     193           5 :   for (auto & line : lines)
     194             :   {
     195           4 :     boost::trim_right(line);
     196             :   }
     197             : 
     198             :   // word wrap each of the lines
     199           5 :   for (auto & line : lines)
     200             :   {
     201           8 :     std::vector<std::string> v = word_wrap_line(line, max_line_length);
     202           4 :     result.insert(result.end(), v.begin(), v.end());
     203             :   }
     204             : 
     205           2 :   return string_join(result, "\n");
     206             : }
     207             : 
     208             : /// \brief Test if a string is a number.
     209             : /// \param s A string of text.
     210             : /// \return True if s is of the form "0 | -? [1-9][0-9]*", false otherwise
     211       28102 : bool is_numeric_string(const std::string& s)
     212             : {
     213             :   // The static below prevents the regular expression recognizer to be compiled
     214             :   // each time a string is matched, which is far too time consuming.
     215       28102 :   static boost::xpressive::sregex re = boost::xpressive::sregex::compile("0|(-?[1-9][0-9]*)");
     216       28102 :   return boost::xpressive::regex_match(s, re);
     217             : }
     218             : 
     219        3092 : std::string trim_copy(const std::string& text)
     220             : {
     221        3092 :   return boost::trim_copy(text);
     222             : }
     223             : 
     224        3584 : void trim(std::string& text)
     225             : {
     226        3584 :   boost::trim(text);
     227        3584 : }
     228             : 
     229             : } // namespace utilities
     230             : 
     231         438 : } // namespace mcrl2

Generated by: LCOV version 1.13