Line data Source code
1 : // Author(s): Ruud Koolen, Thomas Neele
2 : // Copyright: see the accompanying file COPYING or copy at
3 : // https://github.com/mCRL2org/mCRL2/blob/master/COPYING
4 : //
5 : // Distributed under the Boost Software License, Version 1.0.
6 : // (See accompanying file LICENSE_1_0.txt or copy at
7 : // http://www.boost.org/LICENSE_1_0.txt)
8 :
9 : #ifndef MCRL2_DATA_UNFOLD_PATTERN_MATCHING_H
10 : #define MCRL2_DATA_UNFOLD_PATTERN_MATCHING_H
11 :
12 : #include "mcrl2/data/join.h"
13 : #include "mcrl2/data/replace.h"
14 : #include "mcrl2/data/representative_generator.h"
15 : #include "mcrl2/data/substitutions/map_substitution.h"
16 : #include "mcrl2/data/substitutions/variable_substitution.h"
17 :
18 : namespace mcrl2::data
19 : {
20 :
21 : namespace detail
22 : {
23 : /**
24 : * \brief A rule describes a partially pattern-matched rewrite rule.
25 : * \details match_criteria is a set of data_expression pairs (A, B)
26 : * where A is a data_expression over variables in the left hand
27 : * side of a function definition, and B is a pattern consisting of
28 : * constructor applications over free variables.
29 : *
30 : * The rule match_criteria = { (A, B), (C, D) }, condition = E, rhs = R
31 : * describes the following rewrite proposition:
32 : *
33 : * "if the data expression A pattern-matches to pattern B, and the
34 : * data expression C pattern-matches to pattern D, and the condition
35 : * E is true after substituting the proper pattern-matching variables,
36 : * then the right hand side R applies (again with substitution of
37 : * pattern-matched variables."
38 : *
39 : * Pattern matching can then be performed by deconstructing the patterns
40 : * in the right hand sides of match_criteria, and rewriting rules accordingly.
41 : * As an example, the following rewrite rule:
42 : *
43 : * is_even(n) -> sign_of_list_sum(n |> l) = sign_of_list_sum(l)
44 : *
45 : * Can be represented as the following rule:
46 : *
47 : * match_criteria = { v1 -> n |> l }, condition = is_even(n), rhs = sign_of_list_sum(l)
48 : *
49 : * Which, after one step of pattern matching, gets simplified to the following rule:
50 : *
51 : * match_criteria = { head(v1) -> n, tail(v1) -> l }, condition = is_even(n), rhs = sign_of_list_sum(l)
52 : */
53 : struct rule
54 : {
55 : std::map<data_expression, data_expression> match_criteria;
56 : data_expression rhs;
57 : data_expression condition;
58 : variable_list variables;
59 :
60 0 : rule(const std::map<data_expression, data_expression>& mc,
61 : const data_expression& r,
62 : const data_expression& c,
63 : const variable_list& v)
64 0 : : match_criteria(mc)
65 0 : , rhs(r)
66 0 : , condition(c)
67 0 : , variables(v)
68 : {
69 0 : assert(condition.sort() == sort_bool::bool_());
70 0 : }
71 :
72 : };
73 :
74 : inline
75 : std::ostream& operator<<(std::ostream& out, const rule& r)
76 : {
77 : return out << core::detail::print_list(r.variables) << ". " << r.condition << " -> " << core::detail::print_map(r.match_criteria) << " = " << r.rhs;
78 : }
79 :
80 :
81 : /// @brief Get the top level function symbol f if expr is of the shape f or f(t1,...,tn)
82 : /// @param expr The expression from which to extract the top function symbol
83 : /// @return The top function symbol, or function_symbol() if it has none
84 : inline
85 2 : function_symbol get_top_fs(const data_expression& expr)
86 : {
87 2 : if (is_function_symbol(expr))
88 : {
89 0 : return atermpp::down_cast<function_symbol>(expr);
90 : }
91 2 : if (is_application(expr))
92 : {
93 2 : const application& expr_appl = atermpp::down_cast<application>(expr);
94 2 : if (is_function_symbol(expr_appl.head()))
95 : {
96 2 : return atermpp::down_cast<function_symbol>(expr_appl.head());
97 : }
98 : }
99 0 : return function_symbol();
100 : }
101 :
102 : /**
103 : * \brief For a list of rules with equal left hand sides of match_criteria and only variables in
104 : * the right hand sides of match_criteria, construct a right hand side based on the
105 : * conditions and right hand sides of the rules.
106 : */
107 : inline
108 0 : data_expression construct_condition_rhs(const std::vector<rule>& rules, const data_expression& representative)
109 : {
110 : // data_expression_vector negated_conditions;
111 : // for (const rule& r: rules)
112 : // {
113 : // negated_conditions.push_back(sort_bool::not_(r.condition));
114 : // }
115 : // data_expression else_clause = join_and(negated_conditions.begin(), negated_conditions.end());
116 : // TODO: Check whether else_clause is equivalent to false. Can we use the enumerator for this?
117 :
118 0 : if (rules.size() == 0)
119 : {
120 0 : return representative;
121 : }
122 :
123 0 : data_expression result;
124 0 : for (const rule& r: rules)
125 : {
126 0 : std::map<variable, data_expression> substitution_map;
127 0 : for (const auto& [var_expr, pattern]: r.match_criteria)
128 : {
129 0 : assert(is_variable(pattern));
130 0 : substitution_map[atermpp::down_cast<variable>(pattern)] = var_expr;
131 : }
132 0 : map_substitution<std::map<variable, data_expression> > substitution(substitution_map);
133 :
134 0 : data_expression condition = replace_variables_capture_avoiding(r.condition, substitution);
135 0 : data_expression rhs = replace_variables_capture_avoiding(r.rhs, substitution);
136 :
137 0 : if (result == data_expression())
138 : {
139 0 : result = rhs;
140 : }
141 : else
142 : {
143 0 : result = lazy::if_(condition, rhs, result);
144 : }
145 0 : }
146 :
147 0 : return result;
148 0 : }
149 :
150 : /**
151 : * \brief For a list of rules with equal left hand sides of match_criteria, construct a right hand side.
152 : */
153 : template <typename StructInfo>
154 0 : data_expression construct_rhs(
155 : StructInfo& ssf,
156 : representative_generator& gen,
157 : const std::vector<rule>& rules,
158 : const sort_expression& sort
159 : )
160 : {
161 0 : if (rules.empty())
162 : {
163 0 : return construct_condition_rhs(rules, gen(sort));
164 : }
165 :
166 : /*
167 : * For each matching rule LHS, check whether pattern matching needs to happen on that LHS.
168 : *
169 : * We prefer to match on an expression such that *all* rules perform matching on that expression.
170 : * For example, for a ruleset representing the following rewrite rules:
171 : *
172 : * optionally_remove_first_element(false, l) = l
173 : * optionally_remove_first_element(true, []) = []
174 : * optionally_remove_first_element(true, x |> l) = l
175 : *
176 : * We prefer to pattern match on the first parameter, not the second. Pattern matching on the
177 : * second parameter will (recursively) happen only on the true-branch, not the false-branch.
178 : * Pattern matching on the second parameter is still possible if there is no other option, but
179 : * it requires splitting the first rewrite rule into different cases for different
180 : * constructors -- suboptimal.
181 : */
182 0 : data_expression matching_target;
183 : enum class MatchType
184 : {
185 : NONE, INCOMPLETE, PARTIAL, FULL
186 : };
187 0 : MatchType matching_type = MatchType::NONE;
188 : // iterate over matches, this is the same for all rules, so just use rules[0]
189 : // More formally, we have rules[i].match_criteria->first is equivalent for all i.
190 0 : for (const auto& [var_expr, _]: rules[0].match_criteria)
191 : {
192 0 : bool variable_seen = false;
193 0 : std::set<function_symbol> constructors_seen;
194 0 : for (const rule& r: rules)
195 : {
196 0 : const data_expression& pattern = r.match_criteria.at(var_expr);
197 0 : if (is_variable(pattern))
198 : {
199 0 : variable_seen = true;
200 : }
201 : else
202 : {
203 : // either function symbol or application of function symbol
204 0 : function_symbol fs = get_top_fs(pattern);
205 : // this also means fs != function_symbol()
206 0 : assert(ssf.is_constructor(fs));
207 0 : constructors_seen.insert(fs);
208 0 : }
209 : }
210 :
211 : MatchType new_matching_type;
212 0 : if (constructors_seen.empty())
213 : {
214 : // No pattern matching is possible on this variable.
215 0 : new_matching_type = MatchType::NONE;
216 : }
217 0 : else if (variable_seen)
218 : {
219 : // There are both rules that match on this variable and rules that do not.
220 : // That's better than an incomplete matching but worse than a full matching.
221 0 : new_matching_type = MatchType::PARTIAL;
222 : }
223 0 : else if (constructors_seen.size() != ssf.get_constructors(var_expr.sort()).size())
224 : {
225 : // There are constructors for which there are no rules.
226 : // Thus, we have an incomplete function definition, that needs to be completed artificially.
227 : // A partial matching would be a better choice.
228 0 : new_matching_type = MatchType::INCOMPLETE;
229 : }
230 : else
231 : {
232 : // There is a matching rule for each constructor, and no rule with a plain variable.
233 : // This variable is a perfect pattern matching candidate.
234 0 : new_matching_type = MatchType::FULL;
235 : }
236 :
237 0 : if (new_matching_type > matching_type)
238 : {
239 0 : matching_target = var_expr;
240 0 : matching_type = new_matching_type;
241 : }
242 0 : if (matching_type == MatchType::FULL)
243 : {
244 0 : break;
245 : }
246 : }
247 :
248 0 : if (matching_type == MatchType::NONE)
249 : {
250 : // No constructor-based matching needs to happen.
251 : // All that needs to happen is incorporating the rule conditions.
252 0 : return construct_condition_rhs(rules, gen(sort));
253 : }
254 :
255 : /*
256 : * For each constructor, find the set of rules that apply to it, rewritten to match the constructor.
257 : */
258 : // auto type below depends on the type of ssf
259 0 : const auto& match_constructors = ssf.get_constructors(matching_target.sort());
260 0 : std::map<function_symbol, std::vector<rule> > constructor_rules;
261 0 : for (const rule& r: rules)
262 : {
263 0 : const data_expression& pattern = r.match_criteria.at(matching_target);
264 0 : if (is_function_symbol(pattern) || is_application(pattern))
265 : {
266 : /*
267 : * For a rule with a constructor pattern, strip the constructor and
268 : * introduce patterns for the constructor parameters.
269 : */
270 0 : function_symbol constructor = get_top_fs(pattern);
271 0 : assert(constructor != function_symbol());
272 0 : assert(utilities::detail::contains(match_constructors, constructor));
273 :
274 0 : data_expression_vector parameters;
275 0 : if (is_application(pattern))
276 : {
277 0 : const application& pattern_appl = atermpp::down_cast<application>(pattern);
278 0 : parameters.insert(parameters.end(), pattern_appl.begin(), pattern_appl.end());
279 : }
280 :
281 0 : rule rule = r;
282 0 : rule.match_criteria.erase(matching_target);
283 : // To prevent creating expressions of the shape head(l) |> tail(l), we perform a substitution here
284 : // This is only safe if there are no binders in the right-hand side
285 0 : std::set<data_expression> subexpr = find_data_expressions(rule.rhs);
286 0 : if (!parameters.empty() && std::none_of(subexpr.begin(), subexpr.end(), [](const data_expression& e) { return is_abstraction(e); }))
287 : {
288 0 : auto sigma = [&](const data_expression& x) { return x == pattern ? matching_target : x; };
289 0 : rule.rhs = replace_data_expressions(rule.rhs, sigma, true);
290 0 : rule.condition = replace_data_expressions(rule.condition, sigma, true);
291 : }
292 0 : for (std::size_t j = 0; j < parameters.size(); j++)
293 : {
294 0 : function_symbol projection_function = ssf.get_projection_funcs(constructor)[j];
295 0 : data_expression lhs_expression = application(projection_function, matching_target);
296 0 : rule.match_criteria[lhs_expression] = parameters[j];
297 : }
298 0 : constructor_rules[constructor].push_back(rule);
299 0 : }
300 : else
301 : {
302 : /*
303 : * For a rule with a variable pattern that nonetheless needs to pattern match
304 : * against the possible constructors for that variable, copy the rule for each
305 : * possible constructor. Substitute the original un-matched term for the pattern
306 : * variable that disappears.
307 : */
308 0 : assert(is_variable(pattern));
309 0 : variable_substitution sigma(atermpp::down_cast<variable>(pattern), matching_target);
310 0 : data_expression condition = replace_variables_capture_avoiding(r.condition, sigma);
311 0 : data_expression rhs = replace_variables_capture_avoiding(r.rhs, sigma);
312 :
313 0 : for (const function_symbol& f: match_constructors)
314 : {
315 0 : rule rule(r.match_criteria, rhs, condition, r.variables);
316 0 : rule.match_criteria.erase(matching_target);
317 :
318 0 : set_identifier_generator generator;
319 0 : for (const variable& v: r.variables)
320 : {
321 0 : generator.add_identifier(v.name());
322 : }
323 :
324 0 : if (is_function_sort(f.sort()))
325 : {
326 0 : function_sort sort(f.sort());
327 0 : std::size_t index = 0;
328 0 : for (const sort_expression& s: sort.domain())
329 : {
330 0 : variable variable(generator("v"), s);
331 0 : function_symbol projection_function = ssf.get_projection_funcs(f)[index];
332 0 : data_expression lhs_expression = application(projection_function, matching_target);
333 0 : rule.match_criteria[lhs_expression] = variable;
334 0 : index++;
335 : }
336 0 : }
337 :
338 0 : constructor_rules[f].push_back(rule);
339 : }
340 0 : }
341 : }
342 :
343 : /*
344 : * Construct an rhs of the form if(is_cons1, rhs_cons1, if(is_cons2, rhs_cons2, ...)) or equivalent
345 : * The exact form depends on the implementation in ssf
346 : */
347 0 : data_expression_vector rhs;
348 0 : for (const auto& f: match_constructors)
349 : {
350 0 : rhs.push_back(construct_rhs(ssf, gen, constructor_rules[f], sort));
351 : }
352 0 : return ssf.create_cases(matching_target, rhs);
353 0 : }
354 :
355 : } // namespace detail
356 :
357 : /**
358 : * \brief Check whether the given rewrite rule can be classified as a pattern matching rule.
359 : * \details That is, its arguments are constructed only out of unique variable occurrences and
360 : * constructor function symbols and constructor function applications.
361 : */
362 : template <typename StructInfo>
363 0 : bool is_pattern_matching_rule(StructInfo& ssf, const data_equation& rewrite_rule)
364 : {
365 : // For this rewrite rule to be usable in pattern matching, its lhs must only contain
366 : // constructor functions and variables that occur at most once.
367 :
368 0 : std::set<data_expression> subexpressions = find_data_expressions(rewrite_rule.lhs());
369 0 : subexpressions.erase(rewrite_rule.lhs());
370 0 : if (is_application(rewrite_rule.lhs()))
371 : {
372 0 : subexpressions.erase(application(rewrite_rule.lhs()).head());
373 : }
374 :
375 0 : bool all_pattern = std::all_of(subexpressions.begin(), subexpressions.end(), [&ssf](const data_expression& expr) {
376 : return
377 0 : is_variable(expr) ||
378 0 : (is_function_symbol(expr) && ssf.is_constructor(atermpp::down_cast<function_symbol>(expr))) ||
379 0 : (is_application(expr) && is_function_symbol(atermpp::down_cast<application>(expr).head()) &&
380 0 : ssf.is_constructor(function_symbol(atermpp::down_cast<application>(expr).head())));
381 : });
382 0 : if (!all_pattern)
383 : {
384 0 : return false;
385 : }
386 0 : if (std::all_of(subexpressions.begin(), subexpressions.end(), [](const data_expression& x){ return is_variable(x); }))
387 : {
388 : // Each argument is a variable, this is just an ordinarily defined function
389 0 : return false;
390 : }
391 :
392 : // Check whether each variable occurs at most once
393 0 : std::set<variable> variable_set;
394 0 : std::multiset<variable> variable_multiset;
395 0 : find_all_variables(rewrite_rule.lhs(), std::inserter(variable_set, variable_set.end()));
396 0 : find_all_variables(rewrite_rule.lhs(), std::inserter(variable_multiset, variable_multiset.end()));
397 0 : return variable_set.size() == variable_multiset.size();
398 0 : }
399 :
400 : /**
401 : * \brief This converts a collection of rewrite rules for a give function symbol into a
402 : * one-rule specification of the function, using recogniser and projection functions
403 : * to implement pattern matching.
404 : * \details For example, the collection of rewrite rules below:
405 : *
406 : * sign_of_list_sum([]) = false;
407 : * is_even(n) -> sign_of_list_sum(n |> l) = sign_of_list_sum(l);
408 : * !is_even(n) -> sign_of_list_sum(n |> l) = !sign_of_list_sum(l);
409 : *
410 : * gets translated into the following function specification:
411 : *
412 : * sign_of_list_sum(l) = if(is_emptylist(l), false,
413 : * if(is_even(head(l)), sign_of_list_sum(tail(l)),
414 : * !sign_of_list_sum(tail(l))))
415 : *
416 : * Two complications can arise. The rewrite rule set may contain rules that do not
417 : * pattern-match on the function parameters, such as 'not(not(b)) = b`; rules like
418 : * these are discarded.
419 : * More problematically, the set of rewrite rules may not be complete, or may not
420 : * easily be proven complete; in the example above, if the rewriter cannot rewrite
421 : * 'is_even(n) || !is_even(n)' to 'true', the translation cannot tell that the
422 : * rewrite rule set is complete.
423 : * In cases where a (non-constructor )function invocation can genuinely not be
424 : * rewritten any further, the MCRL2 semantics are unspecified (TODO check this);
425 : * the translation resolves this situation by returning an arbitrary value in this
426 : * case. Thus, in practice, the function definion above might well be translated
427 : * into the following:
428 : *
429 : * sign_of_list_sum(l) = if(is_emptylist(l), false,
430 : * if(is_even(head(l)), sign_of_list_sum(tail(l)),
431 : * if(!is_even(head(l)), !sign_of_list_sum(tail(l)),
432 : * false)))
433 : *
434 : * Where 'false' is a representative of sort_bool.
435 : */
436 : template <typename StructInfo>
437 0 : data_equation unfold_pattern_matching(
438 : const function_symbol& mapping,
439 : const data_equation_vector& rewrite_rules,
440 : StructInfo& ssf,
441 : representative_generator& gen,
442 : set_identifier_generator& id_gen
443 : )
444 : {
445 0 : sort_expression codomain = mapping.sort().target_sort();
446 0 : variable_vector temp_par;
447 0 : if (is_function_sort(mapping.sort()))
448 : {
449 0 : const function_sort& sort = atermpp::down_cast<function_sort>(mapping.sort());
450 0 : for (sort_expression s: sort.domain())
451 : {
452 0 : temp_par.push_back(variable(id_gen("x"), s));
453 : }
454 : }
455 0 : variable_list new_parameters(temp_par.begin(), temp_par.end());
456 :
457 : // Create a rule for each data_equation
458 0 : std::vector<detail::rule> rules;
459 0 : for (const data_equation& eq: rewrite_rules)
460 : {
461 0 : assert(is_pattern_matching_rule(ssf, eq));
462 :
463 0 : std::map<data_expression, data_expression> match_criteria;
464 0 : if (is_application(eq.lhs()))
465 : {
466 0 : const application& lhs_appl = atermpp::down_cast<application>(eq.lhs());
467 :
468 0 : assert(lhs_appl.head() == mapping);
469 0 : assert(new_parameters.size() == lhs_appl.size());
470 :
471 : // Simultaneously iterate the parameters defined by the mapping and this
472 : // left-hand side to determine how matching occurs
473 0 : auto mappar_it = new_parameters.begin();
474 0 : auto lhspar_it = lhs_appl.begin();
475 0 : while(mappar_it != new_parameters.end())
476 : {
477 0 : match_criteria[*mappar_it] = *lhspar_it;
478 0 : ++mappar_it, ++lhspar_it;
479 : }
480 :
481 0 : assert(lhspar_it == lhs_appl.end());
482 : }
483 :
484 0 : detail::rule rule(match_criteria, eq.rhs(), eq.condition(), eq.variables());
485 0 : rules.push_back(rule);
486 : }
487 0 : assert(rules.size() != 0);
488 :
489 0 : data_expression new_lhs(application(mapping, new_parameters));
490 0 : data_expression new_rhs(construct_rhs(ssf, gen, rules, codomain));
491 0 : return data_equation(new_parameters, new_lhs, new_rhs);
492 0 : }
493 :
494 : } // namespace mcrl2::data
495 :
496 : #endif
|