LCOV - code coverage report
Current view: top level - utilities/source - text_utility.cpp (source / functions) Hit Total Coverage
Test: mcrl2_coverage.info.cleaned Lines: 73 91 80.2 %
Date: 2024-04-13 03:38:08 Functions: 11 12 91.7 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // Author(s): Wieger Wesselink
       2             : // Copyright: see the accompanying file COPYING or copy at
       3             : // https://github.com/mCRL2org/mCRL2/blob/master/COPYING
       4             : //
       5             : // Distributed under the Boost Software License, Version 1.0.
       6             : // (See accompanying file LICENSE_1_0.txt or copy at
       7             : // http://www.boost.org/LICENSE_1_0.txt)
       8             : //
       9             : /// \file mcrl2/utilities/text_utility.h
      10             : /// \brief String manipulation functions.
      11             : 
      12             : #include <fstream>
      13             : #include <regex>
      14             : #include <boost/algorithm/string.hpp>  // for the functions trim, split, trim_copy, is_any_of, trim_right, trim_right_copy. 
      15             : #include "mcrl2/utilities/exception.h"
      16             : #include "mcrl2/utilities/logger.h"
      17             : 
      18             : namespace mcrl2
      19             : {
      20             : 
      21             : namespace utilities
      22             : {
      23             : 
      24             : /// \brief Split a string into paragraphs.
      25             : /// \param text A string
      26             : /// \return The paragraphs of <tt>text</tt>
      27           0 : std::vector<std::string> split_paragraphs(const std::string& text)
      28             : {
      29           0 :   std::vector<std::string> result;
      30             : 
      31             :   // find multiple line endings
      32           0 :   std::regex paragraph_split {"\\n\\s*\\n"};
      33             : 
      34             :   // the -1 below directs the token iterator to display the parts of
      35             :   // the string that did NOT match the regular expression.
      36           0 :   std::regex_token_iterator cur(text.begin(), text.end(), paragraph_split, -1);
      37           0 :   std::regex_token_iterator<std::string::const_iterator> end;
      38             : 
      39           0 :   for (; cur != end; ++cur)
      40             :   {
      41           0 :     std::string paragraph = *cur;
      42           0 :     boost::trim(paragraph);
      43           0 :     if (paragraph.size() > 0)
      44             :     {
      45           0 :       result.push_back(paragraph);
      46             :     }
      47           0 :   }
      48           0 :   return result;
      49           0 : }
      50             : 
      51             : /// \brief Split the text.
      52             : /// \param line A string
      53             : /// \param separators A string
      54             : /// \return The splitted text
      55        5804 : std::vector<std::string> split(const std::string& line, const std::string& separators)
      56             : {
      57        5804 :   std::vector<std::string> result;
      58        5804 :   boost::algorithm::split(result, line, boost::algorithm::is_any_of(separators));
      59        5804 :   return result;
      60           0 : }
      61             : 
      62             : /// \brief Read text from a file.
      63             : /// \param filename A string
      64             : /// \param warn If true, a warning is printed to standard error if the file is not found
      65             : /// \return The contents of the file
      66           3 : std::string read_text(const std::string& filename, bool warn)
      67             : {
      68           3 :   std::ifstream in(filename.c_str());
      69           3 :   if (!in)
      70             :   {
      71           0 :     if (warn)
      72             :     {
      73           0 :       mCRL2log(log::warning) << "Could not open input file: " << filename << std::endl;
      74           0 :       return "";
      75             :     }
      76             :     else
      77             :     {
      78           0 :       throw mcrl2::runtime_error("Could not open input file: " + filename);
      79             :     }
      80             :   }
      81           3 :   in.unsetf(std::ios::skipws); //  Turn of white space skipping on the stream
      82             : 
      83           3 :   std::string s;
      84           6 :   std::copy(
      85           3 :     std::istream_iterator<char>(in),
      86           3 :     std::istream_iterator<char>(),
      87             :     std::back_inserter(s)
      88             :   );
      89             : 
      90           3 :   return s;
      91           3 : }
      92             : 
      93             : /// \brief Remove comments from a text (everything from '%' until end of line).
      94             : /// \param text A string
      95             : /// \return The removal result
      96         288 : std::string remove_comments(const std::string& text)
      97             : {
      98             :   // matches everything from '%' until end of line
      99         288 :   std::regex src {"%[^\\n]*\\n"};
     100             : 
     101         288 :   std::string dest("\n");
     102         576 :   return std::regex_replace(text, src, dest);
     103         288 : }
     104             : 
     105             : /// \brief Removes whitespace from a string.
     106             : /// \param text A string
     107             : /// \return The removal result
     108         329 : std::string remove_whitespace(const std::string& text)
     109             : {
     110         329 :   std::regex src {"\\s"};
     111         329 :   std::string dest("");
     112         658 :   return std::regex_replace(text, src, dest);
     113         329 : }
     114             : 
     115             : /// \brief Regular expression replacement in a string.
     116             : /// \param src A string
     117             : /// \param dest A string
     118             : /// \param text A string
     119             : /// \return The transformed string
     120         386 : std::string regex_replace(const std::string& src, const std::string& dest, const std::string& text)
     121             : {
     122         772 :   return std::regex_replace(text, std::regex(src), dest);
     123             : }
     124             : 
     125             : /// \brief Split a string using a regular expression separator.
     126             : /// \param text A string
     127             : /// \param sep A string
     128             : /// \return The splitted string
     129         281 : std::vector<std::string> regex_split(const std::string& text, const std::string& sep)
     130             : {
     131         281 :   std::vector<std::string> result;
     132             :   // find multiple line endings
     133         281 :   std::regex paragraph_split { sep };
     134             :   // the -1 below directs the token iterator to display the parts of
     135             :   // the string that did NOT match the regular expression.
     136         281 :   std::sregex_token_iterator cur(text.begin(), text.end(), paragraph_split, -1);
     137         281 :   std::sregex_token_iterator end;
     138         848 :   for (; cur != end; ++cur)
     139             :   {
     140         567 :     std::string word = *cur;
     141         567 :     boost::trim(word);
     142         567 :     if (word.size() > 0)
     143             :     {
     144         511 :       result.push_back(word);
     145             :     }
     146         567 :   }
     147         562 :   return result;
     148         281 : }
     149             : 
     150             : /// \brief Apply word wrapping to a text that doesn't contain newlines.
     151             : /// \param line A string of text.
     152             : /// \param max_line_length The maximum line length.
     153             : /// \return The wrapped text.
     154             : static
     155           4 : std::vector<std::string> word_wrap_line(const std::string& line, unsigned int max_line_length)
     156             : {
     157           4 :   std::vector<std::string> result;
     158           4 :   std::string text = line;
     159             : 
     160             :   for (;;)
     161             :   {
     162           6 :     if (text.size() <= max_line_length)
     163             :     {
     164           4 :       result.push_back(boost::trim_right_copy(text));
     165           4 :       break;
     166             :     }
     167           2 :     std::string::size_type i = text.find_last_of(" \t", max_line_length);
     168           2 :     if (i == std::string::npos)
     169             :     {
     170           1 :       result.push_back(text.substr(0, max_line_length));
     171           1 :       text = text.substr(max_line_length);
     172             :     }
     173             :     else
     174             :     {
     175           1 :       result.push_back(text.substr(0, i));
     176           1 :       text = text.substr(i + 1);
     177             :     }
     178           2 :   }
     179           8 :   return result;
     180           4 : }
     181             : 
     182             : /// \brief Apply word wrapping to a text.
     183             : /// \param text A string of text.
     184             : /// \param max_line_length The maximum line length.
     185             : /// \return The wrapped text.
     186           1 : std::string word_wrap_text(const std::string& text, unsigned int max_line_length)
     187             : {
     188           1 :   std::vector<std::string> result;
     189             : 
     190             :   // split the lines and remove trailing white space
     191           2 :   std::vector<std::string> lines = split(text, "\n");
     192           5 :   for (auto & line : lines)
     193             :   {
     194           4 :     boost::trim_right(line);
     195             :   }
     196             : 
     197             :   // word wrap each of the lines
     198           5 :   for (auto & line : lines)
     199             :   {
     200           4 :     std::vector<std::string> v = word_wrap_line(line, max_line_length);
     201           4 :     result.insert(result.end(), v.begin(), v.end());
     202           4 :   }
     203             : 
     204           2 :   return string_join(result, "\n");
     205           1 : }
     206             : 
     207             : /// \brief Test if a string is a number.
     208             : /// \param s A string of text.
     209             : /// \return True if s is of the form "0 | -? [1-9][0-9]*", false otherwise
     210       29582 : bool is_numeric_string(const std::string& s)
     211             : {
     212             :   // The static below prevents the regular expression recognizer to be compiled
     213             :   // each time a string is matched, which is far too time consuming.
     214       29582 :   static std::regex re {"0|(-?[1-9][0-9]*)"};
     215       29582 :   return std::regex_match(s, re);
     216             : }
     217             : 
     218        1934 : std::string trim_copy(const std::string& text)
     219             : {
     220        1934 :   return boost::trim_copy(text);
     221             : }
     222             : 
     223        2164 : void trim(std::string& text)
     224             : {
     225        2164 :   boost::trim(text);
     226        2164 : }
     227             : 
     228             : } // namespace utilities
     229             : 
     230             : } // namespace mcrl2

Generated by: LCOV version 1.14