Boost spirit take away keyword and ignore skipper

2019-07-25 07:32发布

问题:

This is a small part of a grammer using expressions.

 prefix =
     (lit(L"not") >> prefix) 
    |(lit('-') >> prefix)
    | postfix
    ;

Some way inside postfix I have name_pure to take an identifier ..

name_pure = lexeme[+(boost::spirit::standard_wide::alpha | '_') >> *(boost::spirit::standard_wide::alnum | '_')];

So far all is fine. Can write something like

a=not b

But if I start to use not as a name prefix like this one

a=not notvarname 

I get a parser Output from the AST which look like this

a=not not varname

Which means the not is used as prefix rule and not as Name with the rule Name_pure.

From my point of view it looks like the Skipper is not involved correctly.

This is my Skipper

 template<typename Iterator>
  struct eol_skipper : public qi::grammar<Iterator> {

    eol_skipper() : eol_skipper::base_type(skip) 
    {

      using qi::eol;
      using qi::lit;
      using qi::char_;     
      skip = ascii::space -eol;
    }
    qi::rule<Iterator> skip;
  };

回答1:

Like last time, I don't think the skipper is your problem.

Assumptions about what the skipper does, perhaps.

  1. space - eol is just blank.
  2. lexemes do not skip (that's the definition): Boost spirit skipper issues
  3. PEG grammars are greedy and left-to-right. So, you need to make sure you're on a word boundary if you want to avoid matching "not" inside a purename: Prevent the Boost Spirit Symbol parser from accepting a keyword too early or How to parse reserved words correctly in boost spirit

I'd write the rules a lot more self-descriptive (e.g. eol_skipper suggests it skips eol, but that's exactly what it DOESN'T skip?).

using Skipper = qi::blank_type;

Then, make your identifier rule (pure_name?) an implicit lexeme by just dropping the skipper from the declaration:

  private:
    qi::rule<Iterator, Ast::AssignmentStatement()> start;
    qi::rule<Iterator, Ast::AssignmentStatement(), Skipper> assignment;
    qi::rule<Iterator, Ast::Expr(), Skipper> expr;
    qi::rule<Iterator, Ast::Negated(), Skipper> negation;
    // implicit lexemes
    qi::rule<Iterator, Ast::Identifier()> identifier;

Finally, use !p parser directive to assert that not matched on a keyword/identifier boundary:

    negation
        = lexeme [(lit("not") | '0') >> !(alnum|'_')] >> expr 
        ;

Demo Time

Live On Coliru

#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iostream>
namespace qi = boost::spirit::qi;

namespace Ast {
    using Identifier = std::string;
    struct Negated;

    using Expr = boost::variant<Identifier, boost::recursive_wrapper<Negated> >;

    struct Negated {
        Expr expr;
    };

    struct AssignmentStatement {
        Identifier lhs;
        Expr rhs;
    };
}

BOOST_FUSION_ADAPT_STRUCT(Ast::Negated, expr)
BOOST_FUSION_ADAPT_STRUCT(Ast::AssignmentStatement, lhs, rhs)

template <typename Iterator> struct parser : qi::grammar<Iterator, Ast::AssignmentStatement()> {
    using Skipper = qi::blank_type;

    parser() : parser::base_type(start) {
        using namespace qi;

        start      = skip(blank) [ assignment ];

        assignment = identifier >> '=' >> expr;

        expr       = negation | identifier;

        negation
            = lexeme [(lit("not") | '0') >> !(alnum|'_')] >> expr 
            ;

        identifier = char_("a-zA-Z_") >> *char_("a-zA-Z0-9_");
        // or:
        identifier = raw [ +(alpha | '_') >> *(alnum | '_') ];

        BOOST_SPIRIT_DEBUG_NODES((start)(expr)(assignment)(identifier)(negation))
    }

  private:
    qi::rule<Iterator, Ast::AssignmentStatement()> start;
    qi::rule<Iterator, Ast::AssignmentStatement(), Skipper> assignment;
    qi::rule<Iterator, Ast::Expr(), Skipper> expr;
    qi::rule<Iterator, Ast::Negated(), Skipper> negation;
    // implicit lexemes
    qi::rule<Iterator, Ast::Identifier()> identifier;
};

namespace Ast {
    std::ostream& operator<<(std::ostream& os, Negated const& o)             { return os << "NOT[" << o.expr << "]"; } 
    std::ostream& operator<<(std::ostream& os, AssignmentStatement const& a) { return os << a.lhs << " = " << a.rhs; } 
}

int main() {
    using It = std::string::const_iterator;
    for (std::string const input : {
            "a=not _b",
            "a=not not_var_name",
        })
    {
        It f = input.begin(), l = input.end();

        Ast::AssignmentStatement assignment;
        if (parse(f, l, parser<It>{}, assignment))
            std::cout << "Parsed " << assignment << "\n";
        else
            std::cout << "Parse failed\n";

        if (f!=l)
            std::cout << "Remaining unparsed input: '" << std::string(f,l) << "'\n";
    }
}

Prints

Parsed a = NOT[_b]
Parsed a = NOT[not_var_name]

NOTE how defining BOOST_SPIRIT_DEBUG also gives you debug output in case you wanted to troubleshoot your rules, next time:

<start>
  <try>a=not b</try>
  <assignment>
    <try>a=not b</try>
    <identifier>
      <try>a=not b</try>
      <success>=not b</success>
      <attributes>[[a]]</attributes>
    </identifier>
    <expr>
      <try>not b</try>
      <negation>
        <try>not b</try>
        <expr>
          <try> b</try>
          <negation>
            <try> b</try>
            <fail/>
          </negation>
          <identifier>
            <try>b</try>
            <success></success>
            <attributes>[[b]]</attributes>
          </identifier>
          <success></success>
          <attributes>[[b]]</attributes>
        </expr>
        <success></success>
        <attributes>[[[b]]]</attributes>
      </negation>
      <success></success>
      <attributes>[[[b]]]</attributes>
    </expr>
    <success></success>
    <attributes>[[[a], [[b]]]]</attributes>
  </assignment>
  <success></success>
  <attributes>[[[a], [[b]]]]</attributes>
</start>
Parsed a = NOT[b]
<start>
  <try>a=not notvarname</try>
  <assignment>
    <try>a=not notvarname</try>
    <identifier>
      <try>a=not notvarname</try>
      <success>=not notvarname</success>
      <attributes>[[a]]</attributes>
    </identifier>
    <expr>
      <try>not notvarname</try>
      <negation>
        <try>not notvarname</try>
        <expr>
          <try> notvarname</try>
          <negation>
            <try> notvarname</try>
            <fail/>
          </negation>
          <identifier>
            <try>notvarname</try>
            <success></success>
            <attributes>[[n, o, t, v, a, r, n, a, m, e]]</attributes>
          </identifier>
          <success></success>
          <attributes>[[n, o, t, v, a, r, n, a, m, e]]</attributes>
        </expr>
        <success></success>
        <attributes>[[[n, o, t, v, a, r, n, a, m, e]]]</attributes>
      </negation>
      <success></success>
      <attributes>[[[n, o, t, v, a, r, n, a, m, e]]]</attributes>
    </expr>
    <success></success>
    <attributes>[[[a], [[n, o, t, v, a, r, n, a, m, e]]]]</attributes>
  </assignment>
  <success></success>
  <attributes>[[[a], [[n, o, t, v, a, r, n, a, m, e]]]]</attributes>
</start>
Parsed a = NOT[notvarname]