mCRL2
Loading...
Searching...
No Matches
text_utility.cpp
Go to the documentation of this file.
1// Author(s): Wieger Wesselink
2// Copyright: see the accompanying file COPYING or copy at
3// https://github.com/mCRL2org/mCRL2/blob/master/COPYING
4//
5// Distributed under the Boost Software License, Version 1.0.
6// (See accompanying file LICENSE_1_0.txt or copy at
7// http://www.boost.org/LICENSE_1_0.txt)
8//
11
12#include <fstream>
13#include <regex>
14#include <boost/algorithm/string.hpp> // for the functions trim, split, trim_copy, is_any_of, trim_right, trim_right_copy.
17
18namespace mcrl2
19{
20
21namespace utilities
22{
23
27std::vector<std::string> split_paragraphs(const std::string& text)
28{
29 std::vector<std::string> result;
30
31 // find multiple line endings
32 std::regex paragraph_split {"\\n\\s*\\n"};
33
34 // the -1 below directs the token iterator to display the parts of
35 // the string that did NOT match the regular expression.
36 std::regex_token_iterator cur(text.begin(), text.end(), paragraph_split, -1);
37 std::regex_token_iterator<std::string::const_iterator> end;
38
39 for (; cur != end; ++cur)
40 {
41 std::string paragraph = *cur;
42 boost::trim(paragraph);
43 if (paragraph.size() > 0)
44 {
45 result.push_back(paragraph);
46 }
47 }
48 return result;
49}
50
55std::vector<std::string> split(const std::string& line, const std::string& separators)
56{
57 std::vector<std::string> result;
58 boost::algorithm::split(result, line, boost::algorithm::is_any_of(separators));
59 return result;
60}
61
66std::string read_text(const std::string& filename, bool warn)
67{
68 std::ifstream in(filename.c_str());
69 if (!in)
70 {
71 if (warn)
72 {
73 mCRL2log(log::warning) << "Could not open input file: " << filename << std::endl;
74 return "";
75 }
76 else
77 {
78 throw mcrl2::runtime_error("Could not open input file: " + filename);
79 }
80 }
81 in.unsetf(std::ios::skipws); // Turn of white space skipping on the stream
82
83 std::string s;
84 std::copy(
85 std::istream_iterator<char>(in),
86 std::istream_iterator<char>(),
87 std::back_inserter(s)
88 );
89
90 return s;
91}
92
96std::string remove_comments(const std::string& text)
97{
98 // matches everything from '%' until end of line
99 std::regex src {"%[^\\n]*\\n"};
100
101 std::string dest("\n");
102 return std::regex_replace(text, src, dest);
103}
104
108std::string remove_whitespace(const std::string& text)
109{
110 std::regex src {"\\s"};
111 std::string dest("");
112 return std::regex_replace(text, src, dest);
113}
114
120std::string regex_replace(const std::string& src, const std::string& dest, const std::string& text)
121{
122 return std::regex_replace(text, std::regex(src), dest);
123}
124
129std::vector<std::string> regex_split(const std::string& text, const std::string& sep)
130{
131 std::vector<std::string> result;
132 // find multiple line endings
133 std::regex paragraph_split { sep };
134 // the -1 below directs the token iterator to display the parts of
135 // the string that did NOT match the regular expression.
136 std::sregex_token_iterator cur(text.begin(), text.end(), paragraph_split, -1);
137 std::sregex_token_iterator end;
138 for (; cur != end; ++cur)
139 {
140 std::string word = *cur;
141 boost::trim(word);
142 if (word.size() > 0)
143 {
144 result.push_back(word);
145 }
146 }
147 return result;
148}
149
154static
155std::vector<std::string> word_wrap_line(const std::string& line, unsigned int max_line_length)
156{
157 std::vector<std::string> result;
158 std::string text = line;
159
160 for (;;)
161 {
162 if (text.size() <= max_line_length)
163 {
164 result.push_back(boost::trim_right_copy(text));
165 break;
166 }
167 std::string::size_type i = text.find_last_of(" \t", max_line_length);
168 if (i == std::string::npos)
169 {
170 result.push_back(text.substr(0, max_line_length));
171 text = text.substr(max_line_length);
172 }
173 else
174 {
175 result.push_back(text.substr(0, i));
176 text = text.substr(i + 1);
177 }
178 }
179 return result;
180}
181
186std::string word_wrap_text(const std::string& text, unsigned int max_line_length)
187{
188 std::vector<std::string> result;
189
190 // split the lines and remove trailing white space
191 std::vector<std::string> lines = split(text, "\n");
192 for (auto & line : lines)
193 {
194 boost::trim_right(line);
195 }
196
197 // word wrap each of the lines
198 for (auto & line : lines)
199 {
200 std::vector<std::string> v = word_wrap_line(line, max_line_length);
201 result.insert(result.end(), v.begin(), v.end());
202 }
203
204 return string_join(result, "\n");
205}
206
210bool is_numeric_string(const std::string& s)
211{
212 // The static below prevents the regular expression recognizer to be compiled
213 // each time a string is matched, which is far too time consuming.
214 static std::regex re {"0|(-?[1-9][0-9]*)"};
215 return std::regex_match(s, re);
216}
217
218std::string trim_copy(const std::string& text)
219{
220 return boost::trim_copy(text);
221}
222
223void trim(std::string& text)
224{
225 boost::trim(text);
226}
227
228} // namespace utilities
229
230} // namespace mcrl2
Standard exception class for reporting runtime errors.
Definition exception.h:27
Exception classes for use in libraries and tools.
#define mCRL2log(LEVEL)
mCRL2log(LEVEL) provides the stream used to log.
Definition logger.h:391
@ warning
Definition logger.h:34
static std::vector< std::string > word_wrap_line(const std::string &line, unsigned int max_line_length)
Apply word wrapping to a text that doesn't contain newlines.
std::string string_join(const Container &c, const std::string &separator)
Joins a sequence of strings. This is a replacement for boost::algorithm::join, since it gives stack o...
std::vector< std::string > split_paragraphs(const std::string &text)
Split a string into paragraphs.
void trim(std::string &text)
Remove all trailing and leading spaces from the input.
std::string regex_replace(const std::string &src, const std::string &dest, const std::string &text)
Regular expression replacement in a string.
std::string read_text(const std::string &filename, bool warn=false)
Read text from a file.
std::string word_wrap_text(const std::string &text, unsigned int max_line_length=78)
Apply word wrapping to a text.
bool is_numeric_string(const std::string &s)
Test if a string is a number.
std::vector< std::string > regex_split(const std::string &text, const std::string &sep)
Split a string using a regular expression separator.
std::string remove_comments(const std::string &text)
Remove comments from a text (everything from '' until end of line).
std::string trim_copy(const std::string &text)
Remove all trailing and leading spaces from the input.
std::string remove_whitespace(const std::string &text)
Removes whitespace from a string.
std::vector< std::string > split(const std::string &line, const std::string &separators)
Split the text.
A class that takes a linear process specification and checks all tau-summands of that LPS for conflue...
Definition indexed_set.h:72