LCOV - code coverage report
Current view: top level - data/include/mcrl2/data - unfold_pattern_matching.h (source / functions) Hit Total Coverage
Test: mcrl2_coverage.info.cleaned Lines: 6 164 3.7 %
Date: 2024-05-04 03:44:52 Functions: 1 17 5.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // Author(s): Ruud Koolen, Thomas Neele
       2             : // Copyright: see the accompanying file COPYING or copy at
       3             : // https://github.com/mCRL2org/mCRL2/blob/master/COPYING
       4             : //
       5             : // Distributed under the Boost Software License, Version 1.0.
       6             : // (See accompanying file LICENSE_1_0.txt or copy at
       7             : // http://www.boost.org/LICENSE_1_0.txt)
       8             : 
       9             : #ifndef MCRL2_DATA_UNFOLD_PATTERN_MATCHING_H
      10             : #define MCRL2_DATA_UNFOLD_PATTERN_MATCHING_H
      11             : 
      12             : #include "mcrl2/data/join.h"
      13             : #include "mcrl2/data/replace.h"
      14             : #include "mcrl2/data/representative_generator.h"
      15             : #include "mcrl2/data/substitutions/map_substitution.h"
      16             : #include "mcrl2/data/substitutions/variable_substitution.h"
      17             : 
      18             : namespace mcrl2::data
      19             : {
      20             : 
      21             : namespace detail
      22             : {
      23             : /**
      24             :  * \brief A rule describes a partially pattern-matched rewrite rule.
      25             :  * \details match_criteria is a set of data_expression pairs (A, B)
      26             :  * where A is a data_expression over variables in the left hand
      27             :  * side of a function definition, and B is a pattern consisting of
      28             :  * constructor applications over free variables.
      29             :  *
      30             :  * The rule match_criteria = { (A, B), (C, D) }, condition = E, rhs = R
      31             :  * describes the following rewrite proposition:
      32             :  *
      33             :  *  "if the data expression A pattern-matches to pattern B, and the
      34             :  *   data expression C pattern-matches to pattern D, and the condition
      35             :  *   E is true after substituting the proper pattern-matching variables,
      36             :  *   then the right hand side R applies (again with substitution of
      37             :  *   pattern-matched variables."
      38             :  *
      39             :  * Pattern matching can then be performed by deconstructing the patterns
      40             :  * in the right hand sides of match_criteria, and rewriting rules accordingly.
      41             :  * As an example, the following rewrite rule:
      42             :  *
      43             :  *   is_even(n) -> sign_of_list_sum(n |> l) = sign_of_list_sum(l)
      44             :  *
      45             :  * Can be represented as the following rule:
      46             :  *
      47             :  *   match_criteria = { v1 -> n |> l }, condition = is_even(n), rhs = sign_of_list_sum(l)
      48             :  *
      49             :  * Which, after one step of pattern matching, gets simplified to the following rule:
      50             :  *
      51             :  *   match_criteria = { head(v1) -> n, tail(v1) -> l }, condition = is_even(n), rhs = sign_of_list_sum(l)
      52             :  */
      53             : struct rule
      54             : {
      55             :   std::map<data_expression, data_expression> match_criteria;
      56             :   data_expression rhs;
      57             :   data_expression condition;
      58             :   variable_list variables;
      59             : 
      60           0 :   rule(const std::map<data_expression, data_expression>& mc,
      61             :        const data_expression& r,
      62             :        const data_expression& c,
      63             :        const variable_list& v)
      64           0 :   : match_criteria(mc)
      65           0 :   , rhs(r)
      66           0 :   , condition(c)
      67           0 :   , variables(v)
      68             :   {
      69           0 :     assert(condition.sort() == sort_bool::bool_());
      70           0 :   }
      71             : 
      72             : };
      73             : 
      74             : inline
      75             : std::ostream& operator<<(std::ostream& out, const rule& r)
      76             : {
      77             :   return out << core::detail::print_list(r.variables) << ". " << r.condition << " -> " << core::detail::print_map(r.match_criteria) << " = " << r.rhs;
      78             : }
      79             : 
      80             : 
      81             : /// @brief Get the top level function symbol f if expr is of the shape f or f(t1,...,tn)
      82             : /// @param expr The expression from which to extract the top function symbol
      83             : /// @return The top function symbol, or function_symbol() if it has none
      84             : inline
      85           2 : function_symbol get_top_fs(const data_expression& expr)
      86             : {
      87           2 :   if (is_function_symbol(expr))
      88             :   {
      89           0 :     return atermpp::down_cast<function_symbol>(expr);
      90             :   }
      91           2 :   if (is_application(expr))
      92             :   {
      93           2 :     const application& expr_appl = atermpp::down_cast<application>(expr);
      94           2 :     if (is_function_symbol(expr_appl.head()))
      95             :     {
      96           2 :       return atermpp::down_cast<function_symbol>(expr_appl.head());
      97             :     }
      98             :   }
      99           0 :   return function_symbol();
     100             : }
     101             : 
     102             : /**
     103             :  * \brief For a list of rules with equal left hand sides of match_criteria and only variables in
     104             :  * the right hand sides of match_criteria, construct a right hand side based on the
     105             :  * conditions and right hand sides of the rules.
     106             :  */
     107             : inline
     108           0 : data_expression construct_condition_rhs(const std::vector<rule>& rules, const data_expression& representative)
     109             : {
     110             :   // data_expression_vector negated_conditions;
     111             :   // for (const rule& r: rules)
     112             :   // {
     113             :   //   negated_conditions.push_back(sort_bool::not_(r.condition));
     114             :   // }
     115             :   // data_expression else_clause = join_and(negated_conditions.begin(), negated_conditions.end());
     116             :   // TODO: Check whether else_clause is equivalent to false. Can we use the enumerator for this?
     117             : 
     118           0 :   if (rules.size() == 0)
     119             :   {
     120           0 :     return representative;
     121             :   }
     122             : 
     123           0 :   data_expression result;
     124           0 :   for (const rule& r: rules)
     125             :   {
     126           0 :     std::map<variable, data_expression> substitution_map;
     127           0 :     for (const auto& [var_expr, pattern]: r.match_criteria)
     128             :     {
     129           0 :       assert(is_variable(pattern));
     130           0 :       substitution_map[atermpp::down_cast<variable>(pattern)] = var_expr;
     131             :     }
     132           0 :     map_substitution<std::map<variable, data_expression> > substitution(substitution_map);
     133             : 
     134           0 :     data_expression condition = replace_variables_capture_avoiding(r.condition, substitution);
     135           0 :     data_expression rhs = replace_variables_capture_avoiding(r.rhs, substitution);
     136             : 
     137           0 :     if (result == data_expression())
     138             :     {
     139           0 :       result = rhs;
     140             :     }
     141             :     else
     142             :     {
     143           0 :       result = lazy::if_(condition, rhs, result);
     144             :     }
     145           0 :   }
     146             : 
     147           0 :   return result;
     148           0 : }
     149             : 
     150             : /**
     151             :  * \brief For a list of rules with equal left hand sides of match_criteria, construct a right hand side.
     152             :  */
     153             : template <typename StructInfo>
     154           0 : data_expression construct_rhs(
     155             :   StructInfo& ssf,
     156             :   representative_generator& gen,
     157             :   const std::vector<rule>& rules,
     158             :   const sort_expression& sort
     159             : )
     160             : {
     161           0 :   if (rules.empty())
     162             :   {
     163           0 :     return construct_condition_rhs(rules, gen(sort));
     164             :   }
     165             : 
     166             :   /*
     167             :    * For each matching rule LHS, check whether pattern matching needs to happen on that LHS.
     168             :    *
     169             :    * We prefer to match on an expression such that *all* rules perform matching on that expression.
     170             :    * For example, for a ruleset representing the following rewrite rules:
     171             :    *
     172             :    *   optionally_remove_first_element(false, l) = l
     173             :    *   optionally_remove_first_element(true, []) = []
     174             :    *   optionally_remove_first_element(true, x |> l) = l
     175             :    *
     176             :    * We prefer to pattern match on the first parameter, not the second. Pattern matching on the
     177             :    * second parameter will (recursively) happen only on the true-branch, not the false-branch.
     178             :    * Pattern matching on the second parameter is still possible if there is no other option, but
     179             :    * it requires splitting the first rewrite rule into different cases for different
     180             :    * constructors -- suboptimal.
     181             :    */
     182           0 :   data_expression matching_target;
     183             :   enum class MatchType
     184             :   {
     185             :     NONE, INCOMPLETE, PARTIAL, FULL
     186             :   };
     187           0 :   MatchType matching_type = MatchType::NONE;
     188             :   // iterate over matches, this is the same for all rules, so just use rules[0]
     189             :   // More formally, we have rules[i].match_criteria->first is equivalent for all i.
     190           0 :   for (const auto& [var_expr, _]: rules[0].match_criteria)
     191             :   {
     192           0 :     bool variable_seen = false;
     193           0 :     std::set<function_symbol> constructors_seen;
     194           0 :     for (const rule& r: rules)
     195             :     {
     196           0 :       const data_expression& pattern = r.match_criteria.at(var_expr);
     197           0 :       if (is_variable(pattern))
     198             :       {
     199           0 :         variable_seen = true;
     200             :       }
     201             :       else
     202             :       {
     203             :         // either function symbol or application of function symbol
     204           0 :         function_symbol fs = get_top_fs(pattern);
     205             :         // this also means fs != function_symbol()
     206           0 :         assert(ssf.is_constructor(fs));
     207           0 :         constructors_seen.insert(fs);
     208           0 :       }
     209             :     }
     210             : 
     211             :     MatchType new_matching_type;
     212           0 :     if (constructors_seen.empty())
     213             :     {
     214             :       // No pattern matching is possible on this variable.
     215           0 :       new_matching_type = MatchType::NONE;
     216             :     }
     217           0 :     else if (variable_seen)
     218             :     {
     219             :       // There are both rules that match on this variable and rules that do not.
     220             :       // That's better than an incomplete matching but worse than a full matching.
     221           0 :       new_matching_type = MatchType::PARTIAL;
     222             :     }
     223           0 :     else if (constructors_seen.size() != ssf.get_constructors(var_expr.sort()).size())
     224             :     {
     225             :       // There are constructors for which there are no rules.
     226             :       // Thus, we have an incomplete function definition, that needs to be completed artificially.
     227             :       // A partial matching would be a better choice.
     228           0 :       new_matching_type = MatchType::INCOMPLETE;
     229             :     }
     230             :     else
     231             :     {
     232             :       // There is a matching rule for each constructor, and no rule with a plain variable.
     233             :       // This variable is a perfect pattern matching candidate.
     234           0 :       new_matching_type = MatchType::FULL;
     235             :     }
     236             : 
     237           0 :     if (new_matching_type > matching_type)
     238             :     {
     239           0 :       matching_target = var_expr;
     240           0 :       matching_type = new_matching_type;
     241             :     }
     242           0 :     if (matching_type == MatchType::FULL)
     243             :     {
     244           0 :       break;
     245             :     }
     246             :   }
     247             : 
     248           0 :   if (matching_type == MatchType::NONE)
     249             :   {
     250             :     // No constructor-based matching needs to happen.
     251             :     // All that needs to happen is incorporating the rule conditions.
     252           0 :     return construct_condition_rhs(rules, gen(sort));
     253             :   }
     254             : 
     255             :   /*
     256             :    * For each constructor, find the set of rules that apply to it, rewritten to match the constructor.
     257             :    */
     258             :   // auto type below depends on the type of ssf
     259           0 :   const auto& match_constructors = ssf.get_constructors(matching_target.sort());
     260           0 :   std::map<function_symbol, std::vector<rule> > constructor_rules;
     261           0 :   for (const rule& r: rules)
     262             :   {
     263           0 :     const data_expression& pattern = r.match_criteria.at(matching_target);
     264           0 :     if (is_function_symbol(pattern) || is_application(pattern))
     265             :     {
     266             :       /*
     267             :        * For a rule with a constructor pattern, strip the constructor and
     268             :        * introduce patterns for the constructor parameters.
     269             :        */
     270           0 :       function_symbol constructor = get_top_fs(pattern);
     271           0 :       assert(constructor != function_symbol());
     272           0 :       assert(utilities::detail::contains(match_constructors, constructor));
     273             : 
     274           0 :       data_expression_vector parameters;
     275           0 :       if (is_application(pattern))
     276             :       {
     277           0 :         const application& pattern_appl = atermpp::down_cast<application>(pattern);
     278           0 :         parameters.insert(parameters.end(), pattern_appl.begin(), pattern_appl.end());
     279             :       }
     280             : 
     281           0 :       rule rule = r;
     282           0 :       rule.match_criteria.erase(matching_target);
     283             :       // To prevent creating expressions of the shape head(l) |> tail(l), we perform a substitution here
     284             :       // This is only safe if there are no binders in the right-hand side
     285           0 :       std::set<data_expression> subexpr = find_data_expressions(rule.rhs);
     286           0 :       if (!parameters.empty() && std::none_of(subexpr.begin(), subexpr.end(), [](const data_expression& e) { return is_abstraction(e); }))
     287             :       {
     288           0 :         auto sigma = [&](const data_expression& x) { return x == pattern ? matching_target : x; };
     289           0 :         rule.rhs = replace_data_expressions(rule.rhs, sigma, true);
     290           0 :         rule.condition = replace_data_expressions(rule.condition, sigma, true);
     291             :       }
     292           0 :       for (std::size_t j = 0; j < parameters.size(); j++)
     293             :       {
     294           0 :         function_symbol projection_function = ssf.get_projection_funcs(constructor)[j];
     295           0 :         data_expression lhs_expression = application(projection_function, matching_target);
     296           0 :         rule.match_criteria[lhs_expression] = parameters[j];
     297             :       }
     298           0 :       constructor_rules[constructor].push_back(rule);
     299           0 :     }
     300             :     else
     301             :     {
     302             :       /*
     303             :        * For a rule with a variable pattern that nonetheless needs to pattern match
     304             :        * against the possible constructors for that variable, copy the rule for each
     305             :        * possible constructor. Substitute the original un-matched term for the pattern
     306             :        * variable that disappears.
     307             :        */
     308           0 :       assert(is_variable(pattern));
     309           0 :       variable_substitution sigma(atermpp::down_cast<variable>(pattern), matching_target);
     310           0 :       data_expression condition = replace_variables_capture_avoiding(r.condition, sigma);
     311           0 :       data_expression rhs = replace_variables_capture_avoiding(r.rhs, sigma);
     312             : 
     313           0 :       for (const function_symbol& f: match_constructors)
     314             :       {
     315           0 :         rule rule(r.match_criteria, rhs, condition, r.variables);
     316           0 :         rule.match_criteria.erase(matching_target);
     317             : 
     318           0 :         set_identifier_generator generator;
     319           0 :         for (const variable& v: r.variables)
     320             :         {
     321           0 :           generator.add_identifier(v.name());
     322             :         }
     323             : 
     324           0 :         if (is_function_sort(f.sort()))
     325             :         {
     326           0 :           function_sort sort(f.sort());
     327           0 :           std::size_t index = 0;
     328           0 :           for (const sort_expression& s: sort.domain())
     329             :           {
     330           0 :             variable variable(generator("v"), s);
     331           0 :             function_symbol projection_function = ssf.get_projection_funcs(f)[index];
     332           0 :             data_expression lhs_expression = application(projection_function, matching_target);
     333           0 :             rule.match_criteria[lhs_expression] = variable;
     334           0 :             index++;
     335             :           }
     336           0 :         }
     337             : 
     338           0 :         constructor_rules[f].push_back(rule);
     339             :       }
     340           0 :     }
     341             :   }
     342             : 
     343             :   /*
     344             :    * Construct an rhs of the form if(is_cons1, rhs_cons1, if(is_cons2, rhs_cons2, ...)) or equivalent
     345             :    * The exact form depends on the implementation in ssf
     346             :    */
     347           0 :   data_expression_vector rhs;
     348           0 :   for (const auto& f: match_constructors)
     349             :   {
     350           0 :     rhs.push_back(construct_rhs(ssf, gen, constructor_rules[f], sort));
     351             :   }
     352           0 :   return ssf.create_cases(matching_target, rhs);
     353           0 : }
     354             : 
     355             : } // namespace detail
     356             : 
     357             : /**
     358             :  * \brief Check whether the given rewrite rule can be classified as a pattern matching rule.
     359             :  * \details That is, its arguments are constructed only out of unique variable occurrences and
     360             :  * constructor function symbols and constructor function applications.
     361             :  */
     362             : template <typename StructInfo>
     363           0 : bool is_pattern_matching_rule(StructInfo& ssf, const data_equation& rewrite_rule)
     364             : {
     365             :   // For this rewrite rule to be usable in pattern matching, its lhs must only contain
     366             :   // constructor functions and variables that occur at most once.
     367             : 
     368           0 :   std::set<data_expression> subexpressions = find_data_expressions(rewrite_rule.lhs());
     369           0 :   subexpressions.erase(rewrite_rule.lhs());
     370           0 :   if (is_application(rewrite_rule.lhs()))
     371             :   {
     372           0 :     subexpressions.erase(application(rewrite_rule.lhs()).head());
     373             :   }
     374             : 
     375           0 :   bool all_pattern = std::all_of(subexpressions.begin(), subexpressions.end(), [&ssf](const data_expression& expr) {
     376             :     return
     377           0 :       is_variable(expr) ||
     378           0 :       (is_function_symbol(expr) && ssf.is_constructor(atermpp::down_cast<function_symbol>(expr))) ||
     379           0 :       (is_application(expr) && is_function_symbol(atermpp::down_cast<application>(expr).head()) &&
     380           0 :                           ssf.is_constructor(function_symbol(atermpp::down_cast<application>(expr).head())));
     381             :   });
     382           0 :   if (!all_pattern)
     383             :   {
     384           0 :     return false;
     385             :   }
     386           0 :   if (std::all_of(subexpressions.begin(), subexpressions.end(), [](const data_expression& x){ return is_variable(x); }))
     387             :   {
     388             :     // Each argument is a variable, this is just an ordinarily defined function
     389           0 :     return false;
     390             :   }
     391             : 
     392             :   // Check whether each variable occurs at most once
     393           0 :   std::set<variable> variable_set;
     394           0 :   std::multiset<variable> variable_multiset;
     395           0 :   find_all_variables(rewrite_rule.lhs(), std::inserter(variable_set, variable_set.end()));
     396           0 :   find_all_variables(rewrite_rule.lhs(), std::inserter(variable_multiset, variable_multiset.end()));
     397           0 :   return variable_set.size() == variable_multiset.size();
     398           0 : }
     399             : 
     400             : /**
     401             :  * \brief This converts a collection of rewrite rules for a give function symbol into a
     402             :  * one-rule specification of the function, using recogniser and projection functions
     403             :  * to implement pattern matching.
     404             :  * \details For example, the collection of rewrite rules below:
     405             :  *
     406             :  *   sign_of_list_sum([]) = false;
     407             :  *   is_even(n) -> sign_of_list_sum(n |> l) = sign_of_list_sum(l);
     408             :  *   !is_even(n) -> sign_of_list_sum(n |> l) = !sign_of_list_sum(l);
     409             :  *
     410             :  * gets translated into the following function specification:
     411             :  *
     412             :  *   sign_of_list_sum(l) = if(is_emptylist(l), false,
     413             :  *                            if(is_even(head(l)), sign_of_list_sum(tail(l)),
     414             :  *                               !sign_of_list_sum(tail(l))))
     415             :  *
     416             :  * Two complications can arise. The rewrite rule set may contain rules that do not
     417             :  * pattern-match on the function parameters, such as 'not(not(b)) = b`; rules like
     418             :  * these are discarded.
     419             :  * More problematically, the set of rewrite rules may not be complete, or may not
     420             :  * easily be proven complete; in the example above, if the rewriter cannot rewrite
     421             :  * 'is_even(n) || !is_even(n)' to 'true', the translation cannot tell that the
     422             :  * rewrite rule set is complete.
     423             :  * In cases where a (non-constructor )function invocation can genuinely not be
     424             :  * rewritten any further, the MCRL2 semantics are unspecified (TODO check this);
     425             :  * the translation resolves this situation by returning an arbitrary value in this
     426             :  * case. Thus, in practice, the function definion above might well be translated
     427             :  * into the following:
     428             :  *
     429             :  *   sign_of_list_sum(l) = if(is_emptylist(l), false,
     430             :  *                            if(is_even(head(l)), sign_of_list_sum(tail(l)),
     431             :  *                               if(!is_even(head(l)), !sign_of_list_sum(tail(l)),
     432             :  *                                  false)))
     433             :  *
     434             :  * Where 'false' is a representative of sort_bool.
     435             :  */
     436             : template <typename StructInfo>
     437           0 : data_equation unfold_pattern_matching(
     438             :   const function_symbol& mapping,
     439             :   const data_equation_vector& rewrite_rules,
     440             :   StructInfo& ssf,
     441             :   representative_generator& gen,
     442             :   set_identifier_generator& id_gen
     443             : )
     444             : {
     445           0 :   sort_expression codomain = mapping.sort().target_sort();
     446           0 :   variable_vector temp_par;
     447           0 :   if (is_function_sort(mapping.sort()))
     448             :   {
     449           0 :     const function_sort& sort = atermpp::down_cast<function_sort>(mapping.sort());
     450           0 :     for (sort_expression s: sort.domain())
     451             :     {
     452           0 :       temp_par.push_back(variable(id_gen("x"), s));
     453             :     }
     454             :   }
     455           0 :   variable_list new_parameters(temp_par.begin(), temp_par.end());
     456             : 
     457             :   // Create a rule for each data_equation
     458           0 :   std::vector<detail::rule> rules;
     459           0 :   for (const data_equation& eq: rewrite_rules)
     460             :   {
     461           0 :     assert(is_pattern_matching_rule(ssf, eq));
     462             : 
     463           0 :     std::map<data_expression, data_expression> match_criteria;
     464           0 :     if (is_application(eq.lhs()))
     465             :     {
     466           0 :       const application& lhs_appl = atermpp::down_cast<application>(eq.lhs());
     467             : 
     468           0 :       assert(lhs_appl.head() == mapping);
     469           0 :       assert(new_parameters.size() == lhs_appl.size());
     470             : 
     471             :       // Simultaneously iterate the parameters defined by the mapping and this
     472             :       // left-hand side to determine how matching occurs
     473           0 :       auto mappar_it = new_parameters.begin();
     474           0 :       auto lhspar_it = lhs_appl.begin();
     475           0 :       while(mappar_it != new_parameters.end())
     476             :       {
     477           0 :         match_criteria[*mappar_it] = *lhspar_it;
     478           0 :         ++mappar_it, ++lhspar_it;
     479             :       }
     480             : 
     481           0 :       assert(lhspar_it == lhs_appl.end());
     482             :     }
     483             : 
     484           0 :     detail::rule rule(match_criteria, eq.rhs(), eq.condition(), eq.variables());
     485           0 :     rules.push_back(rule);
     486             :   }
     487           0 :   assert(rules.size() != 0);
     488             : 
     489           0 :   data_expression new_lhs(application(mapping, new_parameters));
     490           0 :   data_expression new_rhs(construct_rhs(ssf, gen, rules, codomain));
     491           0 :   return data_equation(new_parameters, new_lhs, new_rhs);
     492           0 : }
     493             : 
     494             : } // namespace mcrl2::data
     495             : 
     496             : #endif

Generated by: LCOV version 1.14