mCRL2
Loading...
Searching...
No Matches
text_utility.cpp
Go to the documentation of this file.
1// Author(s): Wieger Wesselink
2// Copyright: see the accompanying file COPYING or copy at
3// https://github.com/mCRL2org/mCRL2/blob/master/COPYING
4//
5// Distributed under the Boost Software License, Version 1.0.
6// (See accompanying file LICENSE_1_0.txt or copy at
7// http://www.boost.org/LICENSE_1_0.txt)
8//
9/// \file mcrl2/utilities/text_utility.h
10/// \brief String manipulation functions.
11
12#include <fstream>
13#include <regex>
14#include <boost/algorithm/string.hpp> // for the functions trim, split, trim_copy, is_any_of, trim_right, trim_right_copy.
15#include "mcrl2/utilities/exception.h"
16#include "mcrl2/utilities/logger.h"
17
18namespace mcrl2
19{
20
21namespace utilities
22{
23
24/// \brief Split a string into paragraphs.
25/// \param text A string
26/// \return The paragraphs of <tt>text</tt>
27std::vector<std::string> split_paragraphs(const std::string& text)
28{
29 std::vector<std::string> result;
30
31 // find multiple line endings
32 std::regex paragraph_split {"\\n\\s*\\n"};
33
34 // the -1 below directs the token iterator to display the parts of
35 // the string that did NOT match the regular expression.
36 std::regex_token_iterator cur(text.begin(), text.end(), paragraph_split, -1);
37 std::regex_token_iterator<std::string::const_iterator> end;
38
39 for (; cur != end; ++cur)
40 {
41 std::string paragraph = *cur;
42 boost::trim(paragraph);
43 if (paragraph.size() > 0)
44 {
45 result.push_back(paragraph);
46 }
47 }
48 return result;
49}
50
51/// \brief Split the text.
52/// \param line A string
53/// \param separators A string
54/// \return The splitted text
55std::vector<std::string> split(const std::string& line, const std::string& separators)
56{
57 std::vector<std::string> result;
58 boost::algorithm::split(result, line, boost::algorithm::is_any_of(separators));
59 return result;
60}
61
62/// \brief Read text from a file.
63/// \param filename A string
64/// \param warn If true, a warning is printed to standard error if the file is not found
65/// \return The contents of the file
66std::string read_text(const std::string& filename, bool warn)
67{
68 std::ifstream in(filename.c_str());
69 if (!in)
70 {
71 if (warn)
72 {
73 mCRL2log(log::warning) << "Could not open input file: " << filename << std::endl;
74 return "";
75 }
76 else
77 {
78 throw mcrl2::runtime_error("Could not open input file: " + filename);
79 }
80 }
81 in.unsetf(std::ios::skipws); // Turn of white space skipping on the stream
82
83 std::string s;
84 std::copy(
85 std::istream_iterator<char>(in),
86 std::istream_iterator<char>(),
87 std::back_inserter(s)
88 );
89
90 return s;
91}
92
93/// \brief Remove comments from a text (everything from '%' until end of line).
94/// \param text A string
95/// \return The removal result
96std::string remove_comments(const std::string& text)
97{
98 // matches everything from '%' until end of line
99 std::regex src {"%[^\\n]*\\n"};
100
101 std::string dest("\n");
102 return std::regex_replace(text, src, dest);
103}
104
105/// \brief Removes whitespace from a string.
106/// \param text A string
107/// \return The removal result
108std::string remove_whitespace(const std::string& text)
109{
110 std::regex src {"\\s"};
111 std::string dest("");
112 return std::regex_replace(text, src, dest);
113}
114
115/// \brief Regular expression replacement in a string.
116/// \param src A string
117/// \param dest A string
118/// \param text A string
119/// \return The transformed string
120std::string regex_replace(const std::string& src, const std::string& dest, const std::string& text)
121{
122 return std::regex_replace(text, std::regex(src), dest);
123}
124
125/// \brief Split a string using a regular expression separator.
126/// \param text A string
127/// \param sep A string
128/// \return The splitted string
129std::vector<std::string> regex_split(const std::string& text, const std::string& sep)
130{
131 std::vector<std::string> result;
132 // find multiple line endings
133 std::regex paragraph_split { sep };
134 // the -1 below directs the token iterator to display the parts of
135 // the string that did NOT match the regular expression.
136 std::sregex_token_iterator cur(text.begin(), text.end(), paragraph_split, -1);
137 std::sregex_token_iterator end;
138 for (; cur != end; ++cur)
139 {
140 std::string word = *cur;
141 boost::trim(word);
142 if (word.size() > 0)
143 {
144 result.push_back(word);
145 }
146 }
147 return result;
148}
149
150/// \brief Apply word wrapping to a text that doesn't contain newlines.
151/// \param line A string of text.
152/// \param max_line_length The maximum line length.
153/// \return The wrapped text.
154static
156{
157 std::vector<std::string> result;
158 std::string text = line;
159
160 for (;;)
161 {
162 if (text.size() <= max_line_length)
163 {
164 result.push_back(boost::trim_right_copy(text));
165 break;
166 }
167 std::string::size_type i = text.find_last_of(" \t", max_line_length);
168 if (i == std::string::npos)
169 {
170 result.push_back(text.substr(0, max_line_length));
171 text = text.substr(max_line_length);
172 }
173 else
174 {
175 result.push_back(text.substr(0, i));
176 text = text.substr(i + 1);
177 }
178 }
179 return result;
180}
181
182/// \brief Apply word wrapping to a text.
183/// \param text A string of text.
184/// \param max_line_length The maximum line length.
185/// \return The wrapped text.
186std::string word_wrap_text(const std::string& text, unsigned int max_line_length)
187{
188 std::vector<std::string> result;
189
190 // split the lines and remove trailing white space
191 std::vector<std::string> lines = split(text, "\n");
192 for (auto & line : lines)
193 {
194 boost::trim_right(line);
195 }
196
197 // word wrap each of the lines
198 for (auto & line : lines)
199 {
200 std::vector<std::string> v = word_wrap_line(line, max_line_length);
201 result.insert(result.end(), v.begin(), v.end());
202 }
203
204 return string_join(result, "\n");
205}
206
207/// \brief Test if a string is a number.
208/// \param s A string of text.
209/// \return True if s is of the form "0 | -? [1-9][0-9]*", false otherwise
210bool is_numeric_string(const std::string& s)
211{
212 // The static below prevents the regular expression recognizer to be compiled
213 // each time a string is matched, which is far too time consuming.
214 static std::regex re {"0|(-?[1-9][0-9]*)"};
215 return std::regex_match(s, re);
216}
217
218std::string trim_copy(const std::string& text)
219{
220 return boost::trim_copy(text);
221}
222
223void trim(std::string& text)
224{
225 boost::trim(text);
226}
227
228} // namespace utilities
229
230} // namespace mcrl2
logger(const log_level_t l)
Default constructor.
Definition logger.h:164
Standard exception class for reporting runtime errors.
Definition exception.h:27
#define mCRL2log(LEVEL)
mCRL2log(LEVEL) provides the stream used to log.
Definition logger.h:391
@ warning
Definition logger.h:34
static std::vector< std::string > word_wrap_line(const std::string &line, unsigned int max_line_length)
Apply word wrapping to a text that doesn't contain newlines.
A class that takes a linear process specification and checks all tau-summands of that LPS for conflue...
std::size_t operator()(const atermpp::detail::reference_aterm< T > &t) const