I'm trying to write a program to parse and evaluate mathematical, litteral and boolean expressions, for example :
- "(9/3) == 3+3*2" would be parsed as "(9/3) == (3+(3*2))" and evaluted as "false"
- "1+2/3" would be parsed as "1+(2/3)" and evaluated as "6".
- "this + is + a + test" would be parsed as "this+is+a+test" and evaluated as "thisisatest"
The program correctly parses and solves what I'm giving it, but as soon as I put parenthesis in the expressions, the parsing takes a ridiculous amount of time.
I've based my work on Sehe's impressively exhaustive answer on how to write a boolean grammar parser. I've added new operators (+, -, /, *, ==, !=) following that example.
Here is the parser
// DEFINING TYPES
struct op_not {};
struct op_or {};
struct op_and {};
struct op_xor {};
struct op_equal {};
struct op_unequal {};
struct op_sum {};
struct op_difference {};
struct op_factor {};
struct op_division {};
typedef ustring var;
template <typename tag> struct binop;
template <typename tag> struct unop;
typedef boost::variant<var,
boost::recursive_wrapper<unop <op_not> >,
boost::recursive_wrapper<binop<op_equal> >,
boost::recursive_wrapper<binop<op_unequal> >,
boost::recursive_wrapper<binop<op_and> >,
boost::recursive_wrapper<binop<op_xor> >,
boost::recursive_wrapper<binop<op_or> >,
boost::recursive_wrapper<binop<op_difference> >,
boost::recursive_wrapper<binop<op_sum> >,
boost::recursive_wrapper<binop<op_factor> >,
boost::recursive_wrapper<binop<op_division> >
> expressionContainer;
template <typename tag> struct binop
{
explicit binop(const expressionContainer& l
, const expressionContainer& r)
: oper1(l), oper2(r) { }
expressionContainer oper1, oper2;
};
template <typename tag> struct unop
{
explicit unop(const expressionContainer& o) : oper1(o) { }
expressionContainer oper1;
};
// EXPRESSION PARSER
template <typename It, typename Skipper = boost::spirit::standard_wide::space_type>
struct parserExpression : qi::grammar<It, expressionContainer(), Skipper>
{
parserExpression() : parserExpression::base_type(expr_)
{
using namespace qi;
expr_ = or_.alias();
// Logical Operators
or_ = (xor_ >> orOperator_ >> or_) [_val = boost::phoenix::construct<Expression::binop<op_or >>(_1, _3)] | xor_[_val = _1];
xor_ = (and_ >> xorOperator_ >> xor_) [_val = boost::phoenix::construct<Expression::binop<op_xor>>(_1, _3)] | and_[_val = _1];
and_ = (equal_ >> andOperator_ >> and_) [_val = boost::phoenix::construct<Expression::binop<op_and>>(_1, _3)] | equal_[_val = _1];
equal_ = (unequal_ >> equalOperator_ >> equal_) [_val = boost::phoenix::construct<Expression::binop<op_equal>>(_1, _3)] | unequal_[_val = _1];
unequal_ = (factor_ >> unequalOperator_ >> unequal_) [_val = boost::phoenix::construct<Expression::binop<op_unequal>>(_1, _3)] | factor_[_val = _1];
// Numerical Operators
factor_ = (division_ >> factorOperator_ >> factor_) [_val = boost::phoenix::construct<Expression::binop<op_factor>>(_1, _3)] | division_[_val = _1];
division_ = (sum_ >> divisionOperator_ >> division_) [_val = boost::phoenix::construct<Expression::binop<op_division>>(_1, _3)] | sum_[_val = _1];
sum_ = (difference_ >> sumOperator_ >> sum_) [_val = boost::phoenix::construct<Expression::binop<op_sum>>(_1, _3)] | difference_[_val = _1];
difference_ = (not_ >> differenceOperator_ >> difference_) [_val = boost::phoenix::construct<Expression::binop<op_difference>>(_1, _3)] | not_[_val = _1];
// UNARY OPERATIONS
not_ = (notOperator_ > simple) [_val = boost::phoenix::construct<Expression::unop <op_not>>(_2)] | simple[_val = _1];
simple = (('(' > expr_ > ')') | var_);
var_ = qi::lexeme[+alnum];
notOperator_ = qi::char_('!');
andOperator_ = qi::string("&&");
orOperator_ = qi::string("||");
xorOperator_ = qi::char_("^");
equalOperator_ = qi::string("==");
unequalOperator_ = qi::string("!=");
sumOperator_ = qi::char_("+");
differenceOperator_ = qi::char_("-");
factorOperator_ = qi::char_("*");
divisionOperator_ = qi::char_("/");
/*BOOST_SPIRIT_DEBUG_NODE(expr_);
BOOST_SPIRIT_DEBUG_NODE(or_);
BOOST_SPIRIT_DEBUG_NODE(xor_);
BOOST_SPIRIT_DEBUG_NODE(and_);
BOOST_SPIRIT_DEBUG_NODE(not_);
BOOST_SPIRIT_DEBUG_NODE(simple);
BOOST_SPIRIT_DEBUG_NODE(var_);
BOOST_SPIRIT_DEBUG_NODE(notOperator_);
BOOST_SPIRIT_DEBUG_NODE(andOperator_);
BOOST_SPIRIT_DEBUG_NODE(orOperator_);
BOOST_SPIRIT_DEBUG_NODE(xorOperator_);
BOOST_SPIRIT_DEBUG_NODE(sumOperator_);
BOOST_SPIRIT_DEBUG_NODE(differenceOperator_);
BOOST_SPIRIT_DEBUG_NODE(factorOperator_);
BOOST_SPIRIT_DEBUG_NODE(divisionOperator_);*/
}
private:
qi::rule<It, var(), Skipper> var_;
qi::rule<It, expressionContainer(), Skipper> not_
, and_
, xor_
, or_
, equal_
, unequal_
, sum_
, difference_
, factor_
, division_
, simple
, expr_;
qi::rule<It, ustring(), Skipper> notOperator_
, andOperator_
, orOperator_
, xorOperator_
, equalOperator_
, unequalOperator_
, sumOperator_
, differenceOperator_
, factorOperator_
, divisionOperator_;
};
With the above code, on my computer (running an Intel I5 CPU) :
- parsing "1 + 2 - 3 * 4 / 5 == 6 != 7 && 8 || 9 ^ 8 * 7 / 6 ^ 5 && 4 || 3 != 2 == 1" is instantaneous.
- parsing "(1)" takes approximately 200ms
Spirit's performance having been proved before, I'm left with the obvious question : what could I improve?
Your problem is that
(1)
is the worst possible case for backtracking with that grammar. Let's study a simplified example:And here is a step by step walkthrough:
or_
and_
not_
'!'
,'!' >> simple_
failssimple
'('
, it matchesor_
and_
not_
'!'
,'!' >> simple_
failssimple
'('
,'(' >> or_ >> ')'
failsvar_
, it matchessimple_
suceedsnot_
suceeds'&'
,not_ >> '&' >> and_
fails (the previoussimple_
andnot_
matches are discarded)not_
(the one that is alone)not_
succeedsand_
succeeds'|'
,and_ >> '|' >> or_
fails (and_
,not_
andsimple_
matches discarded)and_
(the one alone)and_
succeedsor_
succeeds')'
,'(' >> or_ >> ')'
succeedssimple_
succeedsnot_
succeeds'&'
,not_ >> '&' >> and_
fails (everything is discarded)not_
(the one alone)not_
suceedsand_
suceeds'|'
,and_ >> '|' >> or_
fails (everything is discarded)and_
(the one alone)and_
succeedsor_
succeedsAnd that is with only two binary rules, your case is much worse.
You could possibly use something like:
and, although even uglier than before, it will not discard any of the matches.
One problem I don't know you if you have noticed is that the result of the parse is right-associative (meaning
3-2-1
=>3-(2-1)
). I think that something like:could solve the problem, but I haven't tested it.
Also because of the way you have arranged your rules you have given
+
and-
a higher priority than*
and/
.Trying to solve these problems (and to remove the semantic actions) I have come up with a custom directive that seems to work, you would use it like this:
The directive parses the initial parser (
xor_.alias()
) and tries the subject many times. If the subject never succeeds the final attribute is the attribute of the initial parser. If the subject succeeds the final attribute will bebinop<op_or>(initial_attr,subject_attr)
/binop<op_or>(binop<op_or>(initial_attr,subject_attr1),subject_attr2)
/...Full Sample (Running on WandBox)
custom_fold_directive.hpp
main.cpp