Implementing operator precedence with boost spirit

2020-03-31 03:31发布

I was attempting to replicate this example in order to implement C++ like operator precedence rules (I started with a subset, but I eventually plan to add the others).

Try as I might, I could not get the grammar to parse a single binary operation. It would parse literals (44, 3.42, "stackoverflow") just fine, but would fail anything like 3 + 4.

I did look at this question, and this one in an attempt to get my solution to work, but got the same result.

(In an attempt to keep things short, I'll post only the relevant bits here, the full code is here)

Relevant data structures for the AST:

enum class BinaryOperator
{
    ADD, SUBTRACT, MULTIPLY, DIVIDE, MODULO, LEFT_SHIFT, RIGHT_SHIFT, EQUAL, NOT_EQUAL, LOWER, LOWER_EQUAL, GREATER, GREATER_EQUAL,
};

typedef boost::variant<double, int, std::string> Litteral;

struct Identifier { std::string name; };

typedef boost::variant<
    Litteral, 
    Identifier,
    boost::recursive_wrapper<UnaryOperation>,
    boost::recursive_wrapper<BinaryOperation>,
    boost::recursive_wrapper<FunctionCall>
> Expression;

struct BinaryOperation
{
    Expression rhs, lhs;
    BinaryOperator op;

    BinaryOperation() {}
    BinaryOperation(Expression rhs, BinaryOperator op, Expression lhs) : rhs(rhs), op(op), lhs(lhs) {}
};

The grammar:

template<typename Iterator, typename Skipper>
struct BoltGrammar : qi::grammar<Iterator, Skipper, Program()>
{
    BoltGrammar() : BoltGrammar::base_type(start, "start")
    {
        equalOp.add("==", BinaryOperator::EQUAL)("!=", BinaryOperator::NOT_EQUAL);
        equal %= (lowerGreater >> equalOp >> lowerGreater);
        equal.name("equal");

        lowerGreaterOp.add("<", BinaryOperator::LOWER)("<=", BinaryOperator::LOWER_EQUAL)(">", BinaryOperator::GREATER)(">=", BinaryOperator::GREATER_EQUAL);
        lowerGreater %= (shift >> lowerGreaterOp >> shift);
        lowerGreater.name("lower or greater");

        shiftOp.add("<<", BinaryOperator::LEFT_SHIFT)(">>", BinaryOperator::RIGHT_SHIFT);
        shift %= (addSub >> shiftOp >> addSub);
        shift.name("shift");

        addSubOp.add("+", BinaryOperator::ADD)("-", BinaryOperator::SUBTRACT);
        addSub %= (multDivMod >> addSubOp >> multDivMod);
        addSub.name("add or sub");

        multDivModOp.add("*", BinaryOperator::MULTIPLY)("/", BinaryOperator::DIVIDE)("%", BinaryOperator::MODULO);
        multDivMod %= (value >> multDivModOp >> value);
        multDivMod.name("mult, div, or mod");

        value %= identifier | litteral | ('(' > expression > ')');
        value.name("value");

        start %= qi::eps >> *(value >> qi::lit(';'));
        start.name("start");

        expression %= identifier | litteral | equal;
        expression.name("expression");

        identifier %= qi::lexeme[ascii::char_("a-zA-Z") >> *ascii::char_("0-9a-zA-Z")];
        identifier.name("identifier");

        litteral %= qi::double_ | qi::int_ | quotedString;
        litteral.name("litteral");

        quotedString %= qi::lexeme['"' >> +(ascii::char_ - '"') >> '"'];
        quotedString.name("quoted string");

        namespace phx = boost::phoenix;
        using namespace qi::labels;
        qi::on_error<qi::fail>(start, std::cout << phx::val("Error! Expecting: ") << _4 << phx::val(" here: \"") << phx::construct<std::string>(_3, _2) << phx::val("\"") << std::endl);
}

qi::symbols<char, BinaryOperator> equalOp, lowerGreaterOp, shiftOp, addSubOp, multDivModOp;
qi::rule<Iterator, Skipper, BinaryOperation()> equal, lowerGreater, shift, addSub, multDivMod;
qi::rule<Iterator, Skipper, Expression()> value;

qi::rule<Iterator, Skipper, Program()> start;
qi::rule<Iterator, Skipper, Expression()> expression;
qi::rule<Iterator, Skipper, Identifier()> identifier;
qi::rule<Iterator, Skipper, Litteral()> litteral;
qi::rule<Iterator, Skipper, std::string()> quotedString;
};

1条回答
Anthone
2楼-- · 2020-03-31 04:30

The main problem (indeed) appears to be addressed in that second answer you linked to.

Let me address some points:

  1. the main problem was was compound:

    • your start rule is

      start     %= qi::eps >> *(value >> qi::lit(';'));
      

      this means it expects values:

      value     %= identifier | literal | ('(' > expression > ')');
      

      however, since this parses only identifiers and literals or parenthesized subexpressions, the 3+4 binary operation will never be parsed.

    • your expression rule, again allows identifier or literal first (redundant/confusing):

      expression %= identifier | literal | equal;
      

    I think you'd want something more like

    expression   = '(' >> expression >> ')' | equal | value;
    value        = identifier | literal;
    
    // and then
    start        = qi::eps >> -expression % ';';
    
  2. your BinaryOperation productions allow only for the case where the operator is present; this breaks the way the rules are nested for operator precedence: a multDivOp would never be accepted as match, unless it happens to be followed by an addSubOp:

    addSub %= (multDivMod >> addSubOp >> multDivMod);
    multDivMod %= (value >> multDivModOp >> value);
    

    This can best be fixed as shown in the linked answer:

    addSub     = multDivMod >> -(addSubOp     >> multDivMod);
    multDivMod = value      >> -(multDivModOp >> value);
    

    where you can use semantic actions to build the AST nodes "dynamically":

    addSub     = multDivMod >> -(addSubOp     >> multDivMod) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
    multDivMod = value      >> -(multDivModOp >> value)      [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
    

    This beats the 'tedious" declarative approach hands-down (which leads to a lot of backtracking, see e.g. Boost spirit poor perfomance with Alternative parser)

  3. the literal rule will parse an integer as a double, because it isn't strict:

    literal   %= qi::double_ | qi::int_ | quotedString;
    

    you can fix this like:

    qi::real_parser<double, qi::strict_real_policies<double> > strict_double;
    
    literal      = quotedString | strict_double | qi::int_;
    
  4. FunctionCall should adapt functionName as an Identifier (not std::string)

    BOOST_FUSION_ADAPT_STRUCT(FunctionCall, (Identifier, functionName)(std::vector<Expression>, args))
    
  5. You Expression operator<< could (should) be a boost::static_visitor so that you

    • eliminate magic type switch numbers
    • get compiler checking of completeness of the switch
    • can leverage overload resolution to switch on variant member types

    Using c++11, the code could still be inside the one function:

    std::ostream& operator<<(std::ostream& os, const Expression& expr)
    {
        os << "Expression ";
        struct v : boost::static_visitor<> {
            v(std::ostream& os) : os(os) {}
            std::ostream& os;
    
            void operator()(Literal         const& e) const { os << "(literal: "    << e                           << ")"; }
            void operator()(Identifier      const& e) const { os << "(identifier: " << e.name                      << ")"; }
            void operator()(UnaryOperation  const& e) const { os << "(unary op: "   << boost::fusion::as_vector(e) << ")"; }
            void operator()(BinaryOperation const& e) const { os << "(binary op: "  << boost::fusion::as_vector(e) << ")"; }
            void operator()(FunctionCall    const& e) const {
                os << "(function call: " << e.functionName << "("; 
                if (e.args.size() > 0) os << e.args.front();
                for (auto it = e.args.begin() + 1; it != e.args.end(); it++) { os << ", " << *it; }
                os << ")";
            }
        };
        boost::apply_visitor(v(os), expr);
        return os;
    }
    
  6. you can use the BOOST_SPIRIT_DEBUG_NODES macro to name your rules

    BOOST_SPIRIT_DEBUG_NODES(
        (start)(expression)(identifier)(literal)(quotedString)
        (equal)(lowerGreater)(shift)(addSub)(multDivMod)(value)
    )
    
  7. you should include from the spirit/include/ directory, which then relays to spirit/home/ or phoenix/include/ instead of including them directly.

Here is a fully working sample, that also improved the grammar rules for readability Live On Coliru:

//#define BOOST_SPIRIT_DEBUG
#define BOOST_SPIRIT_USE_PHOENIX_V3

#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/variant.hpp>
#include <iostream>
#include <string>
#include <vector>

namespace qi    = boost::spirit::qi;
namespace phx   = boost::phoenix;
namespace ascii = boost::spirit::ascii;

enum class UnaryOperator
{
    NOT,
    PLUS,
    MINUS,
};

std::ostream& operator<<(std::ostream& os, const UnaryOperator op)
{
    switch (op)
    {
        case UnaryOperator::NOT:   return os << "!";
        case UnaryOperator::PLUS:  return os << "+";
        case UnaryOperator::MINUS: return os << "-";
    }

    assert(false);
}

enum class BinaryOperator
{
    ADD,        SUBTRACT,      MULTIPLY, DIVIDE,
    MODULO,
    LEFT_SHIFT, RIGHT_SHIFT,
    EQUAL,      NOT_EQUAL,
    LOWER,      LOWER_EQUAL,
    GREATER,    GREATER_EQUAL,
};

std::ostream& operator<<(std::ostream& os, const BinaryOperator op)
{
    switch (op)
    {
        case BinaryOperator::ADD:           return os << "+";
        case BinaryOperator::SUBTRACT:      return os << "-";
        case BinaryOperator::MULTIPLY:      return os << "*";
        case BinaryOperator::DIVIDE:        return os << "/";
        case BinaryOperator::MODULO:        return os << "%";
        case BinaryOperator::LEFT_SHIFT:    return os << "<<";
        case BinaryOperator::RIGHT_SHIFT:   return os << ">>";
        case BinaryOperator::EQUAL:         return os << "==";
        case BinaryOperator::NOT_EQUAL:     return os << "!=";
        case BinaryOperator::LOWER:         return os << "<";
        case BinaryOperator::LOWER_EQUAL:   return os << "<=";
        case BinaryOperator::GREATER:       return os << ">";
        case BinaryOperator::GREATER_EQUAL: return os << ">=";
    }

    assert(false);
}

typedef boost::variant<
    double, 
    int, 
    std::string
> Literal;

struct Identifier
{
    std::string name;
};
BOOST_FUSION_ADAPT_STRUCT(Identifier, (std::string, name))

struct UnaryOperation;
struct BinaryOperation;
struct FunctionCall;

typedef boost::variant<
    Literal, 
    Identifier,
    boost::recursive_wrapper<UnaryOperation>,
    boost::recursive_wrapper<BinaryOperation>,
    boost::recursive_wrapper<FunctionCall>
> Expression;

struct UnaryOperation
{
    Expression rhs;
    UnaryOperator op;
};
BOOST_FUSION_ADAPT_STRUCT(UnaryOperation, (Expression,rhs)(UnaryOperator,op))

struct BinaryOperation
{
    Expression rhs;
    BinaryOperator op;
    Expression lhs;

    BinaryOperation() {}
    BinaryOperation(Expression rhs, BinaryOperator op, Expression lhs) : rhs(rhs), op(op), lhs(lhs) {}
};
BOOST_FUSION_ADAPT_STRUCT(BinaryOperation, (Expression,rhs)(BinaryOperator,op)(Expression,lhs))

struct FunctionCall
{
    Identifier functionName;
    std::vector<Expression> args;
};
BOOST_FUSION_ADAPT_STRUCT(FunctionCall, (Identifier, functionName)(std::vector<Expression>, args))

struct Program
{
    std::vector<Expression> statements;
};
BOOST_FUSION_ADAPT_STRUCT(Program, (std::vector<Expression>, statements))

std::ostream& operator<<(std::ostream& os, const Expression& expr)
{
    os << "Expression ";
    struct v : boost::static_visitor<> {
        v(std::ostream& os) : os(os) {}
        std::ostream& os;

        void operator()(Literal         const& e) const { os << "(literal: "    << e                           << ")"; }
        void operator()(Identifier      const& e) const { os << "(identifier: " << e.name                      << ")"; }
        void operator()(UnaryOperation  const& e) const { os << "(unary op: "   << boost::fusion::as_vector(e) << ")"; }
        void operator()(BinaryOperation const& e) const { os << "(binary op: "  << boost::fusion::as_vector(e) << ")"; }
        void operator()(FunctionCall    const& e) const {
            os << "(function call: " << e.functionName << "("; 
            if (e.args.size() > 0) os << e.args.front();
            for (auto it = e.args.begin() + 1; it != e.args.end(); it++) { os << ", " << *it; }
            os << ")";
        }
    };
    boost::apply_visitor(v(os), expr);
    return os;
}

std::ostream& operator<<(std::ostream& os, const Program& prog)
{
    os << "Program" << std::endl << "{" << std::endl;
    for (const Expression& expr : prog.statements) 
    { 
        std::cout << "\t" << expr << std::endl; 
    }
    os << "}" << std::endl;

    return os;
}

template<typename Iterator, typename Skipper>
struct BoltGrammar : qi::grammar<Iterator, Skipper, Program()>
{
    BoltGrammar() : BoltGrammar::base_type(start, "start")
    {
        using namespace qi::labels;

        equalOp.add
            ("==", BinaryOperator::EQUAL)
            ("!=", BinaryOperator::NOT_EQUAL);
        lowerGreaterOp.add
            ("<", BinaryOperator::LOWER)
            ("<=", BinaryOperator::LOWER_EQUAL)
            (">", BinaryOperator::GREATER)
            (">=", BinaryOperator::GREATER_EQUAL);
        shiftOp.add
            ("<<", BinaryOperator::LEFT_SHIFT)
            (">>", BinaryOperator::RIGHT_SHIFT);
        addSubOp.add
            ("+", BinaryOperator::ADD)
            ("-", BinaryOperator::SUBTRACT);
        multDivModOp.add
            ("*", BinaryOperator::MULTIPLY)
            ("/", BinaryOperator::DIVIDE)
            ("%", BinaryOperator::MODULO);

        equal        = lowerGreater [ _val=_1 ] >> -(equalOp        >> lowerGreater) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
        lowerGreater = shift        [ _val=_1 ] >> -(lowerGreaterOp >> shift)        [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
        shift        = addSub       [ _val=_1 ] >> -(shiftOp        >> addSub)       [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
        addSub       = multDivMod   [ _val=_1 ] >> -(addSubOp       >> multDivMod)   [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
        multDivMod   = value        [ _val=_1 ] >> -(multDivModOp   >> value)        [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];

        addSub     = multDivMod >> -(addSubOp     >> multDivMod);
        multDivMod = value      >> -(multDivModOp >> value);

        addSub     = multDivMod >> -(addSubOp     >> multDivMod) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
        multDivMod = value      >> -(multDivModOp >> value)      [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];

        start        = qi::eps >> -expression % ';';
        expression   = '(' >> expression >> ')' | equal | value;
        value        = identifier | literal;
        identifier   = qi::lexeme[ascii::char_("a-zA-Z") >> *ascii::char_("0-9a-zA-Z")];

        qi::real_parser<double, qi::strict_real_policies<double> > strict_double;

        literal      = quotedString | strict_double | qi::int_;
        quotedString = qi::lexeme['"' >> +(ascii::char_ - '"') >> '"'];

        qi::on_error<qi::fail>(start, std::cout << phx::val("Error! Expecting: ") << _4 << phx::val(" here: \"") << phx::construct<std::string>(_3, _2) << phx::val("\"") << std::endl);

        BOOST_SPIRIT_DEBUG_NODES((start)(expression)(identifier)(literal)(quotedString)
                (equal)(lowerGreater)(shift)(addSub)(multDivMod)(value)
                )
    }

    qi::symbols<char, BinaryOperator> equalOp, lowerGreaterOp, shiftOp, addSubOp, multDivModOp;
    qi::rule<Iterator, Skipper, Expression()>  equal,  lowerGreater,  shift,  addSub,  multDivMod;
    qi::rule<Iterator, Skipper, Expression()>  value;

    qi::rule<Iterator, Skipper, Program()>     start;
    qi::rule<Iterator, Skipper, Expression()>  expression;
    qi::rule<Iterator, Skipper, Identifier()>  identifier;
    qi::rule<Iterator, Skipper, Literal()>     literal;
    qi::rule<Iterator, Skipper, std::string()> quotedString;
};

typedef std::string::iterator Iterator;
typedef boost::spirit::ascii::space_type Skipper;

int main()
{
    BoltGrammar<Iterator, Skipper> grammar;

    std::string str("3; 4.2; \"lounge <c++>\"; 3 + 4;");

    Program prog;
    Iterator iter = str.begin(), last = str.end();
    bool r = phrase_parse(iter, last, grammar, ascii::space, prog);

    if (r && iter == last)
    {
        std::cout << "Parsing succeeded: " << prog << std::endl;
    }
    else
    {
        std::cout << "Parsing failed, remaining: " << std::string(iter, last) << std::endl;
    }

    return 0;
}

Prints:

Parsing succeeded: Program
{
    Expression (literal: 3)
    Expression (literal: 4.2)
    Expression (literal: lounge <c++>)
    Expression (binary op: (Expression (literal: 3) + Expression (literal: 4)))
}
查看更多
登录 后发表回答