I was attempting to replicate this example in order to implement C++ like operator precedence rules (I started with a subset, but I eventually plan to add the others).
Try as I might, I could not get the grammar to parse a single binary operation. It would parse literals (44, 3.42, "stackoverflow") just fine, but would fail anything like 3 + 4
.
I did look at this question, and this one in an attempt to get my solution to work, but got the same result.
(In an attempt to keep things short, I'll post only the relevant bits here, the full code is here)
Relevant data structures for the AST:
enum class BinaryOperator
{
ADD, SUBTRACT, MULTIPLY, DIVIDE, MODULO, LEFT_SHIFT, RIGHT_SHIFT, EQUAL, NOT_EQUAL, LOWER, LOWER_EQUAL, GREATER, GREATER_EQUAL,
};
typedef boost::variant<double, int, std::string> Litteral;
struct Identifier { std::string name; };
typedef boost::variant<
Litteral,
Identifier,
boost::recursive_wrapper<UnaryOperation>,
boost::recursive_wrapper<BinaryOperation>,
boost::recursive_wrapper<FunctionCall>
> Expression;
struct BinaryOperation
{
Expression rhs, lhs;
BinaryOperator op;
BinaryOperation() {}
BinaryOperation(Expression rhs, BinaryOperator op, Expression lhs) : rhs(rhs), op(op), lhs(lhs) {}
};
The grammar:
template<typename Iterator, typename Skipper>
struct BoltGrammar : qi::grammar<Iterator, Skipper, Program()>
{
BoltGrammar() : BoltGrammar::base_type(start, "start")
{
equalOp.add("==", BinaryOperator::EQUAL)("!=", BinaryOperator::NOT_EQUAL);
equal %= (lowerGreater >> equalOp >> lowerGreater);
equal.name("equal");
lowerGreaterOp.add("<", BinaryOperator::LOWER)("<=", BinaryOperator::LOWER_EQUAL)(">", BinaryOperator::GREATER)(">=", BinaryOperator::GREATER_EQUAL);
lowerGreater %= (shift >> lowerGreaterOp >> shift);
lowerGreater.name("lower or greater");
shiftOp.add("<<", BinaryOperator::LEFT_SHIFT)(">>", BinaryOperator::RIGHT_SHIFT);
shift %= (addSub >> shiftOp >> addSub);
shift.name("shift");
addSubOp.add("+", BinaryOperator::ADD)("-", BinaryOperator::SUBTRACT);
addSub %= (multDivMod >> addSubOp >> multDivMod);
addSub.name("add or sub");
multDivModOp.add("*", BinaryOperator::MULTIPLY)("/", BinaryOperator::DIVIDE)("%", BinaryOperator::MODULO);
multDivMod %= (value >> multDivModOp >> value);
multDivMod.name("mult, div, or mod");
value %= identifier | litteral | ('(' > expression > ')');
value.name("value");
start %= qi::eps >> *(value >> qi::lit(';'));
start.name("start");
expression %= identifier | litteral | equal;
expression.name("expression");
identifier %= qi::lexeme[ascii::char_("a-zA-Z") >> *ascii::char_("0-9a-zA-Z")];
identifier.name("identifier");
litteral %= qi::double_ | qi::int_ | quotedString;
litteral.name("litteral");
quotedString %= qi::lexeme['"' >> +(ascii::char_ - '"') >> '"'];
quotedString.name("quoted string");
namespace phx = boost::phoenix;
using namespace qi::labels;
qi::on_error<qi::fail>(start, std::cout << phx::val("Error! Expecting: ") << _4 << phx::val(" here: \"") << phx::construct<std::string>(_3, _2) << phx::val("\"") << std::endl);
}
qi::symbols<char, BinaryOperator> equalOp, lowerGreaterOp, shiftOp, addSubOp, multDivModOp;
qi::rule<Iterator, Skipper, BinaryOperation()> equal, lowerGreater, shift, addSub, multDivMod;
qi::rule<Iterator, Skipper, Expression()> value;
qi::rule<Iterator, Skipper, Program()> start;
qi::rule<Iterator, Skipper, Expression()> expression;
qi::rule<Iterator, Skipper, Identifier()> identifier;
qi::rule<Iterator, Skipper, Litteral()> litteral;
qi::rule<Iterator, Skipper, std::string()> quotedString;
};
The main problem (indeed) appears to be addressed in that second answer you linked to.
Let me address some points:
the main problem was was compound:
your start rule is
start %= qi::eps >> *(value >> qi::lit(';'));
this means it expects value
s:
value %= identifier | literal | ('(' > expression > ')');
however, since this parses only identifiers and literals or parenthesized subexpressions, the 3+4
binary operation will never be parsed.
your expression rule, again allows identifier
or literal
first (redundant/confusing):
expression %= identifier | literal | equal;
I think you'd want something more like
expression = '(' >> expression >> ')' | equal | value;
value = identifier | literal;
// and then
start = qi::eps >> -expression % ';';
your BinaryOperation
productions allow only for the case where the operator is present; this breaks the way the rules are nested for operator precedence: a multDivOp
would never be accepted as match, unless it happens to be followed by an addSubOp
:
addSub %= (multDivMod >> addSubOp >> multDivMod);
multDivMod %= (value >> multDivModOp >> value);
This can best be fixed as shown in the linked answer:
addSub = multDivMod >> -(addSubOp >> multDivMod);
multDivMod = value >> -(multDivModOp >> value);
where you can use semantic actions to build the AST nodes "dynamically":
addSub = multDivMod >> -(addSubOp >> multDivMod) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
multDivMod = value >> -(multDivModOp >> value) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
This beats the 'tedious" declarative approach hands-down (which leads to a lot of backtracking, see e.g. Boost spirit poor perfomance with Alternative parser)
the literal rule will parse an integer as a double, because it isn't strict:
literal %= qi::double_ | qi::int_ | quotedString;
you can fix this like:
qi::real_parser<double, qi::strict_real_policies<double> > strict_double;
literal = quotedString | strict_double | qi::int_;
FunctionCall
should adapt functionName
as an Identifier
(not std::string
)
BOOST_FUSION_ADAPT_STRUCT(FunctionCall, (Identifier, functionName)(std::vector<Expression>, args))
You Expression
operator<<
could (should) be a boost::static_visitor
so that you
- eliminate magic type switch numbers
- get compiler checking of completeness of the switch
- can leverage overload resolution to switch on variant member types
Using c++11, the code could still be inside the one function:
std::ostream& operator<<(std::ostream& os, const Expression& expr)
{
os << "Expression ";
struct v : boost::static_visitor<> {
v(std::ostream& os) : os(os) {}
std::ostream& os;
void operator()(Literal const& e) const { os << "(literal: " << e << ")"; }
void operator()(Identifier const& e) const { os << "(identifier: " << e.name << ")"; }
void operator()(UnaryOperation const& e) const { os << "(unary op: " << boost::fusion::as_vector(e) << ")"; }
void operator()(BinaryOperation const& e) const { os << "(binary op: " << boost::fusion::as_vector(e) << ")"; }
void operator()(FunctionCall const& e) const {
os << "(function call: " << e.functionName << "(";
if (e.args.size() > 0) os << e.args.front();
for (auto it = e.args.begin() + 1; it != e.args.end(); it++) { os << ", " << *it; }
os << ")";
}
};
boost::apply_visitor(v(os), expr);
return os;
}
you can use the BOOST_SPIRIT_DEBUG_NODES
macro to name your rules
BOOST_SPIRIT_DEBUG_NODES(
(start)(expression)(identifier)(literal)(quotedString)
(equal)(lowerGreater)(shift)(addSub)(multDivMod)(value)
)
you should include from the spirit/include/
directory, which then relays to spirit/home/
or phoenix/include/
instead of including them directly.
Here is a fully working sample, that also improved the grammar rules for readability Live On Coliru:
//#define BOOST_SPIRIT_DEBUG
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/variant.hpp>
#include <iostream>
#include <string>
#include <vector>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
namespace ascii = boost::spirit::ascii;
enum class UnaryOperator
{
NOT,
PLUS,
MINUS,
};
std::ostream& operator<<(std::ostream& os, const UnaryOperator op)
{
switch (op)
{
case UnaryOperator::NOT: return os << "!";
case UnaryOperator::PLUS: return os << "+";
case UnaryOperator::MINUS: return os << "-";
}
assert(false);
}
enum class BinaryOperator
{
ADD, SUBTRACT, MULTIPLY, DIVIDE,
MODULO,
LEFT_SHIFT, RIGHT_SHIFT,
EQUAL, NOT_EQUAL,
LOWER, LOWER_EQUAL,
GREATER, GREATER_EQUAL,
};
std::ostream& operator<<(std::ostream& os, const BinaryOperator op)
{
switch (op)
{
case BinaryOperator::ADD: return os << "+";
case BinaryOperator::SUBTRACT: return os << "-";
case BinaryOperator::MULTIPLY: return os << "*";
case BinaryOperator::DIVIDE: return os << "/";
case BinaryOperator::MODULO: return os << "%";
case BinaryOperator::LEFT_SHIFT: return os << "<<";
case BinaryOperator::RIGHT_SHIFT: return os << ">>";
case BinaryOperator::EQUAL: return os << "==";
case BinaryOperator::NOT_EQUAL: return os << "!=";
case BinaryOperator::LOWER: return os << "<";
case BinaryOperator::LOWER_EQUAL: return os << "<=";
case BinaryOperator::GREATER: return os << ">";
case BinaryOperator::GREATER_EQUAL: return os << ">=";
}
assert(false);
}
typedef boost::variant<
double,
int,
std::string
> Literal;
struct Identifier
{
std::string name;
};
BOOST_FUSION_ADAPT_STRUCT(Identifier, (std::string, name))
struct UnaryOperation;
struct BinaryOperation;
struct FunctionCall;
typedef boost::variant<
Literal,
Identifier,
boost::recursive_wrapper<UnaryOperation>,
boost::recursive_wrapper<BinaryOperation>,
boost::recursive_wrapper<FunctionCall>
> Expression;
struct UnaryOperation
{
Expression rhs;
UnaryOperator op;
};
BOOST_FUSION_ADAPT_STRUCT(UnaryOperation, (Expression,rhs)(UnaryOperator,op))
struct BinaryOperation
{
Expression rhs;
BinaryOperator op;
Expression lhs;
BinaryOperation() {}
BinaryOperation(Expression rhs, BinaryOperator op, Expression lhs) : rhs(rhs), op(op), lhs(lhs) {}
};
BOOST_FUSION_ADAPT_STRUCT(BinaryOperation, (Expression,rhs)(BinaryOperator,op)(Expression,lhs))
struct FunctionCall
{
Identifier functionName;
std::vector<Expression> args;
};
BOOST_FUSION_ADAPT_STRUCT(FunctionCall, (Identifier, functionName)(std::vector<Expression>, args))
struct Program
{
std::vector<Expression> statements;
};
BOOST_FUSION_ADAPT_STRUCT(Program, (std::vector<Expression>, statements))
std::ostream& operator<<(std::ostream& os, const Expression& expr)
{
os << "Expression ";
struct v : boost::static_visitor<> {
v(std::ostream& os) : os(os) {}
std::ostream& os;
void operator()(Literal const& e) const { os << "(literal: " << e << ")"; }
void operator()(Identifier const& e) const { os << "(identifier: " << e.name << ")"; }
void operator()(UnaryOperation const& e) const { os << "(unary op: " << boost::fusion::as_vector(e) << ")"; }
void operator()(BinaryOperation const& e) const { os << "(binary op: " << boost::fusion::as_vector(e) << ")"; }
void operator()(FunctionCall const& e) const {
os << "(function call: " << e.functionName << "(";
if (e.args.size() > 0) os << e.args.front();
for (auto it = e.args.begin() + 1; it != e.args.end(); it++) { os << ", " << *it; }
os << ")";
}
};
boost::apply_visitor(v(os), expr);
return os;
}
std::ostream& operator<<(std::ostream& os, const Program& prog)
{
os << "Program" << std::endl << "{" << std::endl;
for (const Expression& expr : prog.statements)
{
std::cout << "\t" << expr << std::endl;
}
os << "}" << std::endl;
return os;
}
template<typename Iterator, typename Skipper>
struct BoltGrammar : qi::grammar<Iterator, Skipper, Program()>
{
BoltGrammar() : BoltGrammar::base_type(start, "start")
{
using namespace qi::labels;
equalOp.add
("==", BinaryOperator::EQUAL)
("!=", BinaryOperator::NOT_EQUAL);
lowerGreaterOp.add
("<", BinaryOperator::LOWER)
("<=", BinaryOperator::LOWER_EQUAL)
(">", BinaryOperator::GREATER)
(">=", BinaryOperator::GREATER_EQUAL);
shiftOp.add
("<<", BinaryOperator::LEFT_SHIFT)
(">>", BinaryOperator::RIGHT_SHIFT);
addSubOp.add
("+", BinaryOperator::ADD)
("-", BinaryOperator::SUBTRACT);
multDivModOp.add
("*", BinaryOperator::MULTIPLY)
("/", BinaryOperator::DIVIDE)
("%", BinaryOperator::MODULO);
equal = lowerGreater [ _val=_1 ] >> -(equalOp >> lowerGreater) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
lowerGreater = shift [ _val=_1 ] >> -(lowerGreaterOp >> shift) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
shift = addSub [ _val=_1 ] >> -(shiftOp >> addSub) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
addSub = multDivMod [ _val=_1 ] >> -(addSubOp >> multDivMod) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
multDivMod = value [ _val=_1 ] >> -(multDivModOp >> value) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
addSub = multDivMod >> -(addSubOp >> multDivMod);
multDivMod = value >> -(multDivModOp >> value);
addSub = multDivMod >> -(addSubOp >> multDivMod) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
multDivMod = value >> -(multDivModOp >> value) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
start = qi::eps >> -expression % ';';
expression = '(' >> expression >> ')' | equal | value;
value = identifier | literal;
identifier = qi::lexeme[ascii::char_("a-zA-Z") >> *ascii::char_("0-9a-zA-Z")];
qi::real_parser<double, qi::strict_real_policies<double> > strict_double;
literal = quotedString | strict_double | qi::int_;
quotedString = qi::lexeme['"' >> +(ascii::char_ - '"') >> '"'];
qi::on_error<qi::fail>(start, std::cout << phx::val("Error! Expecting: ") << _4 << phx::val(" here: \"") << phx::construct<std::string>(_3, _2) << phx::val("\"") << std::endl);
BOOST_SPIRIT_DEBUG_NODES((start)(expression)(identifier)(literal)(quotedString)
(equal)(lowerGreater)(shift)(addSub)(multDivMod)(value)
)
}
qi::symbols<char, BinaryOperator> equalOp, lowerGreaterOp, shiftOp, addSubOp, multDivModOp;
qi::rule<Iterator, Skipper, Expression()> equal, lowerGreater, shift, addSub, multDivMod;
qi::rule<Iterator, Skipper, Expression()> value;
qi::rule<Iterator, Skipper, Program()> start;
qi::rule<Iterator, Skipper, Expression()> expression;
qi::rule<Iterator, Skipper, Identifier()> identifier;
qi::rule<Iterator, Skipper, Literal()> literal;
qi::rule<Iterator, Skipper, std::string()> quotedString;
};
typedef std::string::iterator Iterator;
typedef boost::spirit::ascii::space_type Skipper;
int main()
{
BoltGrammar<Iterator, Skipper> grammar;
std::string str("3; 4.2; \"lounge <c++>\"; 3 + 4;");
Program prog;
Iterator iter = str.begin(), last = str.end();
bool r = phrase_parse(iter, last, grammar, ascii::space, prog);
if (r && iter == last)
{
std::cout << "Parsing succeeded: " << prog << std::endl;
}
else
{
std::cout << "Parsing failed, remaining: " << std::string(iter, last) << std::endl;
}
return 0;
}
Prints:
Parsing succeeded: Program
{
Expression (literal: 3)
Expression (literal: 4.2)
Expression (literal: lounge <c++>)
Expression (binary op: (Expression (literal: 3) + Expression (literal: 4)))
}