Is there a good way to make a grammar nonterminal which is parsed differently, depending on results of some boost phoenix function?
In my use-case, I have a grammar which among other things includes CPP-style #define directives, and #ifdef #else #endif directives. (It's not actually the C preprocessor though its just some crude imitation made by someone else.) When I parse it in qi, I pass my grammar (in its ctor) a reference to a "preprocessor database" object which is adapted to a fusion structure, and I have adapted phoenix functions which allow to add PP definitions / check for PP definitions. I have made it so that the #define directives have a semantic action which registers new definitions.
When I try to implement the #ifdef #else directives I'm not sure what I should do. The only way that I can think of to do this is to add a boolean flag to all of the attributes types of all of my grammar nonterminals that marks whether it is in a discarded #ifdef branch, and after my AST is parsed then crawl through that again and toss the marked guys. But that's pretty inelegant, there has to be a better way, right?
If possible I would like to be able to keep track of the original line numbers (before ifdefs are resolved).
I hope the question is clear, if it's not I can cook up a minimal example to show what I'm trying to do but my actual grammar is large.
Edit: Okay, I cooked up an SSCCE:
So here is a program that parses a very simple grammar of pairs, and has some minimal preprocessor language which includes define and ifdef. I understand how to use semantic actions so that matching things causes C++ callbacks to get fired, and that part seems to be working. However what I don't understand is how to use callbacks to feedback info into the grammar, i.e. "if this phoenix function returns false then parse this differently". It would be enough I guess to know how to say "if this phoenix function returns boolean false as part of this semantic action, then arbitrarily declare the nonterminal not to have matched and backtrack." Actually now that I'm writing all this I guess I know that the "mini XML" example must somehow do this since it uses a local variable to enforce that start and close tags must match? So I guess I could reverse engineer how it works there maybe. But apparently I didn't figure it out yet from reading docs / studying the examples.
Note that I think it's different from your first suggestion, just make a skip grammar. The thing is that I don't know how to make the skip grammar's behavior depend on a boost phoenix function's output either, it's just the same problem again. The only thing I know how to do with phoenix inside of qi right now is, fire void callbacks, and make things that get assigned to the attributed values.
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <boost/fusion/adapted/struct/adapt_struct.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/std_pair.hpp>
#include <boost/variant/recursive_variant.hpp>
#include <cassert>
#include <cmath>
#include <memory>
#include <string>
#include <utility>
#include <vector>
namespace fusion = boost::fusion;
namespace phoenix = boost::phoenix;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
typedef std::string pp_sym;
typedef std::set<pp_sym> pp_data;
void add(pp_data & defines, const pp_sym & s) { defines.insert(s); }
void remove(pp_data & defines, const pp_sym & s) { defines.erase(s); }
bool search(pp_data & defines, const pp_sym & s) { return defines.count(s); }
BOOST_PHOENIX_ADAPT_FUNCTION(void, pp_add_define_, add, 2);
BOOST_PHOENIX_ADAPT_FUNCTION(void, pp_remove_define_, remove, 2);
BOOST_PHOENIX_ADAPT_FUNCTION(bool, pp_search_define_, search, 2);
typedef std::string Str;
typedef std::pair<Str, Str> Pair;
typedef std::vector<Pair> PairVec;
/***
* Grammar definitions
*/
template <typename Iterator>
struct simple_grammar : qi::grammar<Iterator, PairVec()> {
qi::rule<Iterator, PairVec()> main;
qi::rule<Iterator, Pair()> pair;
qi::rule<Iterator, Str()> first;
qi::rule<Iterator, Str()> second;
qi::rule<Iterator, pp_sym()> pp_symbol;
qi::rule<Iterator> pp_directive;
qi::rule<Iterator, pp_sym()> define_directive;
qi::rule<Iterator, pp_sym()> undef_directive;
qi::rule<Iterator, pp_sym()> if_directive;
qi::rule<Iterator> else_directive;
qi::rule<Iterator> endif_directive;
qi::rule<Iterator> ws;
simple_grammar(pp_data & preprocessor_data)
: simple_grammar::base_type(main)
{
using qi::lit;
using qi::char_;
using namespace qi::labels;
ws = char_(" \t\r\n");
first = !lit('#') >> *(char_ - '=') >> lit('=');
second = *(char_ - '\n') >> lit('\n');
pair = first >> second;
pp_symbol = +char_("A-Za-z_");
pp_directive = &lit('#')
>> ((define_directive [ pp_add_define_(ref(preprocessor_data), _1) ] )
| (undef_directive [ pp_remove_define_(ref(preprocessor_data), _1) ] )
| if_directive // [ ??? ]
| else_directive
| endif_directive)
>> *(char_ - '\n') >> lit('\n');
main = (pp_directive >> -main) | (pair >> -main);
define_directive = lit("#define ") >> pp_symbol >> &ws;
undef_directive = lit("#undef ") >> pp_symbol >> &ws;
if_directive = lit("#ifdef ") >> pp_symbol >> &ws;
else_directive = lit("#else");
endif_directive = lit("#endif");
}
};
const char * example_1 = ""
"#define FOO\n"
"led_zeppelin=9\n"
"the_shins=9\n"
"dead_mau5=6\n"
"portishead=10\n"
"#ifdef FOO\n"
"foo_fighters=7\n"
"#else\n"
"the_who=6\n"
"#endif\n"
"kanye_west=4\n"
"#undef FOO\n"
"#define BAR\n";
int main() {
std::string temp{example_1};
typedef std::string::const_iterator str_it;
typedef simple_grammar<str_it> my_grammar;
pp_data defines;
my_grammar gram(defines); // Our grammar
PairVec ast; // Our tree
str_it it = temp.begin();
str_it end = temp.end();
bool b = qi::parse(it, end, gram, ast);
assert(b);
assert(defines.count("FOO") == 0);
assert(defines.count("BAR") == 1);
std::cout << "Parsed a list:\n\n";
for( const auto & p : ast) {
std::cout << p.first << "\n\t\t\t=\t" << p.second << std::endl;
}
return 0;
}
For me the output of the above is (as expected):
$ ./main
Parsed a list:
led_zeppelin
= 9
the_shins
= 9
dead_mau5
= 6
portishead
= 10
foo_fighters
= 7
the_who
= 6
kanye_west
= 4
However what I'd like to do is make the ifdef part do what you would naturally expect, and permit nested ifdef clauses.
In response to the "SSCCE" code added to the question:
The AST
The only way to treat nested defines correctly (including the case where conditional blocks contain #define
/#undef
directives!) is to use an AST that represents a tree of the blocks¹:
namespace Common {
typedef std::string pp_sym;
}
namespace Ast {
using Common::pp_sym;
typedef std::string Str;
typedef std::pair<Str, Str> Pair;
typedef std::vector<Pair> Pairs;
struct ConditionalBlock;
namespace tag {
struct define;
struct undefine;
}
template <typename Tag> struct Directive {
pp_sym name;
};
typedef Directive<tag::define> Define;
typedef Directive<tag::undefine> Undef;
typedef boost::make_recursive_variant<
Pairs,
boost::recursive_wrapper<ConditionalBlock>,
Define,
Undef
>::type Block;
typedef std::vector<Block> Blocks;
struct ConditionalBlock {
pp_sym required;
Blocks if_, else_;
};
}
To facilitate parsing these without ever using a semantic action:
BOOST_FUSION_ADAPT_TPL_STRUCT((Tag), (Ast::Directive)(Tag), name)
BOOST_FUSION_ADAPT_STRUCT(Ast::ConditionalBlock, required, if_, else_)
Done.
Parsing
Because of the above work, we can now define the parser exactly as we would have liked!
Notes:
- now using a skipper to avoid having hardcoded amounts of whitespace required or whitespace intolerance
- now using
seek[eol]
to ignore until the end of a line
- now using
distinct
to parse identifiers (see boost::spirit::qi keywords and identifiers)
- Now made the appearance of
#else
optional (see -else
)
- Removes all semantic actions
Enables debug information without any more work
start = skip(blank) [ blocks ];
blocks = *block;
block = define | undef | conditional_block | +pair;
pair = !char_("#") >> +~char_("=\r\n") >> '=' >> *(char_ - eol) >> *eol;
pp_symbol = qr::distinct(char_("A-Za-z_")) [ +char_("A-Za-z_") ];
define = '#' >> distinct(alnum | '_') [ "define" ] >> pp_symbol >> seek[*eol];
undef = '#' >> distinct(alnum | '_') [ "undef" ] >> pp_symbol >> seek[*eol];
else_ = '#' >> distinct(alnum | '_') [ "else" ] >> seek[*eol];
endif = '#' >> distinct(alnum | '_') [ "endif" ] >> seek[*eol];
conditional_block =
('#' >> distinct(alnum | '_') [ "ifdef" ] >> pp_symbol >> seek[*eol])
>> *(!(else_|endif) >> block)
>> -else_
>> *(!endif >> block)
>> endif
;
BOOST_SPIRIT_DEBUG_NODES((start)(blocks)(block)(pair)(pp_symbol)(define)(undef)(else_)(endif)(conditional_block))
I'd say that is pretty legible, and it results in the ast containing all the information you could want to use later
Processing Logic
Now that we've separated the processing from the parsing, processing is a single visitation of the tree. We use a single function object Logic::Preprocessor
that doubles as the variant visitor:
Logic::Preprocess pp({{"EXTERNAL"}} , " ");
pp(ast);
In this sample, we start with the preprocessor symbol EXTERNAL
defined (as if it was defined "externally", like on a command line).
The implementation of the visitor is pretty straightforward, but let me show the action bits, namely where conditions are taken and branches ignored. To make things very complete I even traverse the branches that are not satisfied, just to show that the full AST is there, but with en isolated
instance of the function object so there is no effect:
void operator()(Ast::ConditionalBlock const& cb) const {
bool const satisfied = ctx.defined.count(cb.required);
auto old_indent = indent;
indent += "\t";
std::cout << old_indent << "#ifdef " << cb.required << " // " << std::boolalpha << satisfied << "\n";
Preprocess isolated{ctx, indent+"// "}; // prevent changes to ctx to affect us for the non-matching branch
(satisfied? *this : isolated)(cb.if_);
std::cout << old_indent << "#else " << " // ifdef " << cb.required << "\n";
(satisfied? isolated : *this)(cb.else_);
std::cout << old_indent << "#endif " << " // ifdef " << cb.required << "\n";
indent.resize(indent.size()-1);
}
void operator()(Ast::Define const& directive) const {
ctx.defined.insert(directive.name);
std::cout << indent << "#define\t" << directive.name;
report();
}
void operator()(Ast::Undef const& directive) const {
ctx.defined.erase(directive.name);
std::cout << indent << "#undef\t" << directive.name;
report();
}
Demo
Observe how this document, which even nests conditional blocks and defines symbols from within conditional branches (so, conditionally), is correctly interpreted:
#define FOO
led_zeppelin=9
the_shins=9
dead_mau5=6
portishead=10
#ifdef FOO
foo_fighters=7
#define ZOO
#else
the_who=6
#define QUX
#endif
#ifdef EXTERNAL
#ifdef ZOO
zoowasdefined=yes
#else
zoowasdefined=no
#endif
#ifdef QUX
quxwasdefined=yes
#else
quxwasdefined=no
#endif
#endif
kanye_west=4
#undef FOO
#define BAR
Our demo program prints: Live On Coliru
Preprocess results:
#define FOO // effective: EXTERNAL FOO
led_zeppelin=9
the_shins=9
dead_mau5=6
portishead=10
#ifdef FOO // true
foo_fighters=7
#define ZOO // effective: EXTERNAL FOO ZOO
#else // ifdef FOO
// the_who=6
// #define QUX // effective: EXTERNAL FOO QUX
#endif // ifdef FOO
#ifdef EXTERNAL // true
#ifdef ZOO // true
zoowasdefined=yes
#else // ifdef ZOO
// zoowasdefined=no
#endif // ifdef ZOO
#ifdef QUX // false
// quxwasdefined=yes
#else // ifdef QUX
quxwasdefined=no
#endif // ifdef QUX
#else // ifdef EXTERNAL
#endif // ifdef EXTERNAL
kanye_west=4
#undef FOO // effective: EXTERNAL ZOO
#define BAR // effective: BAR EXTERNAL ZOO
Defines still in effect: BAR EXTERNAL ZOO
Full Listing
Live On Coliru
#define BOOST_SPIRIT_USE_PHOENIX_V3
//#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/repository/include/qi_distinct.hpp>
#include <boost/spirit/repository/include/qi_seek.hpp>
#include <boost/variant.hpp>
#include <cassert>
namespace phx = boost::phoenix;
namespace qi = boost::spirit::qi;
namespace qr = boost::spirit::repository::qi;
namespace Common {
typedef std::string pp_sym;
}
namespace Ast {
using Common::pp_sym;
typedef std::string Str;
typedef std::pair<Str, Str> Pair;
typedef std::vector<Pair> Pairs;
struct ConditionalBlock;
namespace tag {
struct define;
struct undefine;
}
template <typename Tag> struct Directive {
pp_sym name;
};
typedef Directive<tag::define> Define;
typedef Directive<tag::undefine> Undef;
typedef boost::make_recursive_variant<
Pairs,
boost::recursive_wrapper<ConditionalBlock>,
Define,
Undef
>::type Block;
typedef std::vector<Block> Blocks;
struct ConditionalBlock {
pp_sym required;
Blocks if_, else_;
};
}
BOOST_FUSION_ADAPT_TPL_STRUCT((Tag), (Ast::Directive)(Tag), name)
BOOST_FUSION_ADAPT_STRUCT(Ast::ConditionalBlock, required, if_, else_)
/***
* Grammar definitions
*/
template <typename Iterator>
struct simple_grammar : qi::grammar<Iterator, Ast::Blocks()> {
simple_grammar() : simple_grammar::base_type(start)
{
using namespace qi;
using qr::distinct;
using qr::seek;
start = skip(blank) [ blocks ];
blocks = *block;
block = define | undef | conditional_block | +pair;
pair = +~char_("=\r\n") >> '=' >> *(char_ - eol) >> *eol;
pp_symbol = qr::distinct(char_("A-Za-z_")) [ +char_("A-Za-z_") ];
define = '#' >> distinct(alnum | '_') [ "define" ] >> pp_symbol >> seek[*eol];
undef = '#' >> distinct(alnum | '_') [ "undef" ] >> pp_symbol >> seek[*eol];
else_ = '#' >> distinct(alnum | '_') [ "else" ] >> seek[*eol];
endif = '#' >> distinct(alnum | '_') [ "endif" ] >> seek[*eol];
conditional_block =
('#' >> distinct(alnum | '_') [ "ifdef" ] >> pp_symbol >> seek[*eol])
>> *(!(else_|endif) >> block)
>> -else_
>> *(!endif >> block)
>> endif
;
BOOST_SPIRIT_DEBUG_NODES((start)(blocks)(block)(pair)(pp_symbol)(define)(undef)(else_)(endif)(conditional_block))
}
private:
using Skipper = qi::blank_type;
qi::rule<Iterator, Ast::Blocks()> start;
qi::rule<Iterator, Ast::Blocks(), Skipper> blocks;
qi::rule<Iterator, Ast::Block(), Skipper> block;
// directive
qi::rule<Iterator, Ast::ConditionalBlock(), Skipper> conditional_block;
qi::rule<Iterator, Ast::Define(), Skipper> define;
qi::rule<Iterator, Ast::Undef(), Skipper> undef;
// empty directives
qi::rule<Iterator, Skipper> else_, endif;
// lexeme
qi::rule<Iterator, Ast::Pair()> pair;
qi::rule<Iterator, Ast::pp_sym()> pp_symbol;
};
namespace Logic {
using Common::pp_sym;
typedef std::set<pp_sym> pp_syms;
struct context {
pp_syms defined;
};
struct Preprocess : boost::static_visitor<void> {
context ctx;
std::string indent;
Preprocess(context ctx = {}, std::string indent = "")
: ctx(std::move(ctx)), indent(std::move(indent))
{ }
void operator()(Ast::Blocks const& blocks) {
for (auto& b : blocks)
boost::apply_visitor(*this, b);
}
void operator()(Ast::Block const& block) {
boost::apply_visitor(*this, block);
}
void operator()(Ast::Pairs const& pairs) {
for (auto& p : pairs)
std::cout << indent << p.first << "=" << p.second << "\n";
}
void operator()(Ast::ConditionalBlock const& cb) {
bool const satisfied = ctx.defined.count(cb.required);
auto old_indent = indent;
indent += "\t";
std::cout << old_indent << "#ifdef " << cb.required << " // " << std::boolalpha << satisfied << "\n";
Preprocess isolated{ctx, indent+"// "}; // prevent changes to ctx to affect us for the non-matching branch
(satisfied? *this : isolated)(cb.if_);
std::cout << old_indent << "#else " << " // ifdef " << cb.required << "\n";
(satisfied? isolated : *this)(cb.else_);
std::cout << old_indent << "#endif " << " // ifdef " << cb.required << "\n";
indent.resize(indent.size()-1);
}
void operator()(Ast::Define const& directive) {
ctx.defined.insert(directive.name);
std::cout << indent << "#define\t" << directive.name;
report();
}
void operator()(Ast::Undef const& directive) {
ctx.defined.erase(directive.name);
std::cout << indent << "#undef\t" << directive.name;
report();
}
private:
void report() const {
std::cout << "\t// effective: ";
for (auto& sym : ctx.defined) std::cout << sym << " ";
std::cout << "\n";
}
};
}
int main() {
typedef boost::spirit::istream_iterator It;
typedef simple_grammar<It> my_grammar;
my_grammar gram; // Our grammar
Ast::Blocks ast; // Our tree
It it(std::cin >> std::noskipws), end;
bool b = qi::parse(it, end, gram, ast);
if (it != end)
std::cout << "Remaining input: '" << std::string(it, end) << "'\n";
assert(b);
std::cout << "Preprocess results:\n\n";
Logic::Preprocess pp({{"EXTERNAL"}} , " ");
pp(ast);
std::cout << "\n\nDefines still in effect: ";
for (auto& sym : pp.ctx.defined) std::cout << sym << " ";
}
BONUS: Debug Info
Enabling debug information yields the following detailed trace information in addition to the above output:
<start>
<try>#define FOO\nled_zepp</try>
<blocks>
<try>#define FOO\nled_zepp</try>
<block>
<try>#define FOO\nled_zepp</try>
<define>
<try>#define FOO\nled_zepp</try>
<pp_symbol>
<try>FOO\nled_zeppelin=9\nt</try>
<success>\nled_zeppelin=9\nthe_</success>
<attributes>[[F, O, O]]</attributes>
</pp_symbol>
<success>led_zeppelin=9\nthe_s</success>
<attributes>[[[F, O, O]]]</attributes>
</define>
<success>led_zeppelin=9\nthe_s</success>
<attributes>[[[F, O, O]]]</attributes>
</block>
<block>
<try>led_zeppelin=9\nthe_s</try>
<define>
<try>led_zeppelin=9\nthe_s</try>
<fail/>
</define>
<undef>
<try>led_zeppelin=9\nthe_s</try>
<fail/>
</undef>
<conditional_block>
<try>led_zeppelin=9\nthe_s</try>
<fail/>
</conditional_block>
<pair>
<try>led_zeppelin=9\nthe_s</try>
<success>the_shins=9\ndead_mau</success>
<attributes>[[[l, e, d, _, z, e, p, p, e, l, i, n], [9]]]</attributes>
</pair>
<pair>
<try>the_shins=9\ndead_mau</try>
<success>dead_mau5=6\nportishe</success>
<attributes>[[[t, h, e, _, s, h, i, n, s], [9]]]</attributes>
</pair>
<pair>
<try>dead_mau5=6\nportishe</try>
<success>portishead=10\n#ifdef</success>
<attributes>[[[d, e, a, d, _, m, a, u, 5], [6]]]</attributes>
</pair>
<pair>
<try>portishead=10\n#ifdef</try>
<success>#ifdef FOO\nfoo_fight</success>
<attributes>[[[p, o, r, t, i, s, h, e, a, d], [1, 0]]]</attributes>
</pair>
<pair>
<try>#ifdef FOO\nfoo_fight</try>
<fail/>
</pair>
<success>#ifdef FOO\nfoo_fight</success>
<attributes>[[[[l, e, d, _, z, e, p, p, e, l, i, n], [9]], [[t, h, e, _, s, h, i, n, s], [9]], [[d, e, a, d, _, m, a, u, 5], [6]], [[p, o, r, t, i, s, h, e, a, d], [1, 0]]]]</attributes>
</block>
<block>
<try>#ifdef FOO\nfoo_fight</try>
<define>
<try>#ifdef FOO\nfoo_fight</try>
<fail/>
</define>
<undef>
<try>#ifdef FOO\nfoo_fight</try>
<fail/>
</undef>
<conditional_block>
<try>#ifdef FOO\nfoo_fight</try>
<pp_symbol>
<try>FOO\nfoo_fighters=7\n#</try>
<success>\nfoo_fighters=7\n#def</success>
<attributes>[[F, O, O]]</attributes>
</pp_symbol>
<else_>
<try>foo_fighters=7\n#defi</try>
<fail/>
</else_>
<endif>
<try>foo_fighters=7\n#defi</try>
<fail/>
</endif>
<block>
<try>foo_fighters=7\n#defi</try>
<define>
<try>foo_fighters=7\n#defi</try>
<fail/>
</define>
<undef>
<try>foo_fighters=7\n#defi</try>
<fail/>
</undef>
<conditional_block>
<try>foo_fighters=7\n#defi</try>
<fail/>
</conditional_block>
<pair>
<try>foo_fighters=7\n#defi</try>
<success>#define ZOO\n#else\nth</success>
<attributes>[[[f, o, o, _, f, i, g, h, t, e, r, s], [7]]]</attributes>
</pair>
<pair>
<try>#define ZOO\n#else\nth</try>
<fail/>
</pair>
<success>#define ZOO\n#else\nth</success>
<attributes>[[[[f, o, o, _, f, i, g, h, t, e, r, s], [7]]]]</attributes>
</block>
<else_>
<try>#define ZOO\n#else\nth</try>
<fail/>
</else_>
<endif>
<try>#define ZOO\n#else\nth</try>
<fail/>
</endif>
<block>
<try>#define ZOO\n#else\nth</try>
<define>
<try>#define ZOO\n#else\nth</try>
<pp_symbol>
<try>ZOO\n#else\nthe_who=6\n</try>
<success>\n#else\nthe_who=6\n#de</success>
<attributes>[[Z, O, O]]</attributes>
</pp_symbol>
<success>#else\nthe_who=6\n#def</success>
<attributes>[[[Z, O, O]]]</attributes>
</define>
<success>#else\nthe_who=6\n#def</success>
<attributes>[[[Z, O, O]]]</attributes>
</block>
<else_>
<try>#else\nthe_who=6\n#def</try>
<success>the_who=6\n#define QU</success>
<attributes>[]</attributes>
</else_>
<else_>
<try>#else\nthe_who=6\n#def</try>
<success>the_who=6\n#define QU</success>
<attributes>[]</attributes>
</else_>
<endif>
<try>the_who=6\n#define QU</try>
<fail/>
</endif>
<block>
<try>the_who=6\n#define QU</try>
<define>
<try>the_who=6\n#define QU</try>
<fail/>
</define>
<undef>
<try>the_who=6\n#define QU</try>
<fail/>
</undef>
<conditional_block>
<try>the_who=6\n#define QU</try>
<fail/>
</conditional_block>
<pair>
<try>the_who=6\n#define QU</try>
<success>#define QUX\n#endif\n\n</success>
<attributes>[[[t, h, e, _, w, h, o], [6]]]</attributes>
</pair>
<pair>
<try>#define QUX\n#endif\n\n</try>
<fail/>
</pair>
<success>#define QUX\n#endif\n\n</success>
<attributes>[[[[t, h, e, _, w, h, o], [6]]]]</attributes>
</block>
<endif>
<try>#define QUX\n#endif\n\n</try>
<fail/>
</endif>
<block>
<try>#define QUX\n#endif\n\n</try>
<define>
<try>#define QUX\n#endif\n\n</try>
<pp_symbol>
<try>QUX\n#endif\n\n#ifdef E</try>
<success>\n#endif\n\n#ifdef EXTE</success>
<attributes>[[Q, U, X]]</attributes>
</pp_symbol>
<success>#endif\n\n#ifdef EXTER</success>
<attributes>[[[Q, U, X]]]</attributes>
</define>
<success>#endif\n\n#ifdef EXTER</success>
<attributes>[[[Q, U, X]]]</attributes>
</block>
<endif>
<try>#endif\n\n#ifdef EXTER</try>
<success>#ifdef EXTERNAL\n\n#if</success>
<attributes>[]</attributes>
</endif>
<endif>
<try>#endif\n\n#ifdef EXTER</try>
<success>#ifdef EXTERNAL\n\n#if</success>
<attributes>[]</attributes>
</endif>
<success>#ifdef EXTERNAL\n\n#if</success>
<attributes>[[[F, O, O], [[[[f, o, o, _, f, i, g, h, t, e, r, s], [7]]], [[Z, O, O]]], [[[[t, h, e, _, w, h, o], [6]]], [[Q, U, X]]]]]</attributes>
</conditional_block>
<success>#ifdef EXTERNAL\n\n#if</success>
<attributes>[[[F, O, O], [[[[f, o, o, _, f, i, g, h, t, e, r, s], [7]]], [[Z, O, O]]], [[[[t, h, e, _, w, h, o], [6]]], [[Q, U, X]]]]]</attributes>
</block>
<block>
<try>#ifdef EXTERNAL\n\n#if</try>
<define>
<try>#ifdef EXTERNAL\n\n#if</try>
<fail/>
</define>
<undef>
<try>#ifdef EXTERNAL\n\n#if</try>
<fail/>
</undef>
<conditional_block>
<try>#ifdef EXTERNAL\n\n#if</try>
<pp_symbol>
<try>EXTERNAL\n\n#ifdef ZOO</try>
<success>\n\n#ifdef ZOO\nzoowasd</success>
<attributes>[[E, X, T, E, R, N, A, L]]</attributes>
</pp_symbol>
<else_>
<try>#ifdef ZOO\nzoowasdef</try>
<fail/>
</else_>
<endif>
<try>#ifdef ZOO\nzoowasdef</try>
<fail/>
</endif>
<block>
<try>#ifdef ZOO\nzoowasdef</try>
<define>
<try>#ifdef ZOO\nzoowasdef</try>
<fail/>
</define>
<undef>
<try>#ifdef ZOO\nzoowasdef</try>
<fail/>
</undef>
<conditional_block>
<try>#ifdef ZOO\nzoowasdef</try>
<pp_symbol>
<try>ZOO\nzoowasdefined=ye</try>
<success>\nzoowasdefined=yes\n#</success>
<attributes>[[Z, O, O]]</attributes>
</pp_symbol>
<else_>
<try>zoowasdefined=yes\n#e</try>
<fail/>
</else_>
<endif>
<try>zoowasdefined=yes\n#e</try>
<fail/>
</endif>
<block>
<try>zoowasdefined=yes\n#e</try>
<define>
<try>zoowasdefined=yes\n#e</try>
<fail/>
</define>
<undef>
<try>zoowasdefined=yes\n#e</try>
<fail/>
</undef>
<conditional_block>
<try>zoowasdefined=yes\n#e</try>
<fail/>
</conditional_block>
<pair>
<try>zoowasdefined=yes\n#e</try>
<success>#else\nzoowasdefined=</success>
<attributes>[[[z, o, o, w, a, s, d, e, f, i, n, e, d], [y, e, s]]]</attributes>
</pair>
<pair>
<try>#else\nzoowasdefined=</try>
<fail/>
</pair>
<success>#else\nzoowasdefined=</success>
<attributes>[[[[z, o, o, w, a, s, d, e, f, i, n, e, d], [y, e, s]]]]</attributes>
</block>
<else_>
<try>#else\nzoowasdefined=</try>
<success>zoowasdefined=no\n#en</success>
<attributes>[]</attributes>
</else_>
<else_>
<try>#else\nzoowasdefined=</try>
<success>zoowasdefined=no\n#en</success>
<attributes>[]</attributes>
</else_>
<endif>
<try>zoowasdefined=no\n#en</try>
<fail/>
</endif>
<block>
<try>zoowasdefined=no\n#en</try>
<define>
<try>zoowasdefined=no\n#en</try>
<fail/>
</define>
<undef>
<try>zoowasdefined=no\n#en</try>
<fail/>
</undef>
<conditional_block>
<try>zoowasdefined=no\n#en</try>
<fail/>
</conditional_block>
<pair>
<try>zoowasdefined=no\n#en</try>
<success>#endif\n\n#ifdef QUX\nq</success>
<attributes>[[[z, o, o, w, a, s, d, e, f, i, n, e, d], [n, o]]]</attributes>
</pair>
<pair>
<try>#endif\n\n#ifdef QUX\nq</try>
<fail/>
</pair>
<success>#endif\n\n#ifdef QUX\nq</success>
<attributes>[[[[z, o, o, w, a, s, d, e, f, i, n, e, d], [n, o]]]]</attributes>
</block>
<endif>
<try>#endif\n\n#ifdef QUX\nq</try>
<success>#ifdef QUX\nquxwasdef</success>
<attributes>[]</attributes>
</endif>
<endif>
<try>#endif\n\n#ifdef QUX\nq</try>
<success>#ifdef QUX\nquxwasdef</success>
<attributes>[]</attributes>
</endif>
<success>#ifdef QUX\nquxwasdef</success>
<attributes>[[[Z, O, O], [[[[z, o, o, w, a, s, d, e, f, i, n, e, d], [y, e, s]]]], [[[[z, o, o, w, a, s, d, e, f, i, n, e, d], [n, o]]]]]]</attributes>
</conditional_block>
<success>#ifdef QUX\nquxwasdef</success>
<attributes>[[[Z, O, O], [[[[z, o, o, w, a, s, d, e, f, i, n, e, d], [y, e, s]]]], [[[[z, o, o, w, a, s, d, e, f, i, n, e, d], [n, o]]]]]]</attributes>
</block>
....
</start>
¹ or you should have a rather complicated tree to match at parse time. Whenever in doubt, separate parsing from processing. This ties in closely with Boost Spirit: "Semantic actions are evil"?
From reading spirit docs, I think the correct way to resolve the basic issue (quoting myself)
Is there a good way to make a grammar nonterminal which is parsed differently, depending on results of some boost phoenix function?
is to is to use boost::spirit::qi::eps
. From the docs (
http://www.boost.org/doc/libs/1_41_0/libs/spirit/doc/html/spirit/qi/reference/auxiliary/eps.html ):
Semantic Predicate
Semantic predicates allow you to attach a conditional function anywhere in the grammar. In this role, the epsilon takes a Lazy Argument that returns true or false. The Lazy Argument is typically a test that is called to resolve ambiguity in the grammar. A parse failure will be reported when the Lazy Argument result evaluates to false. Otherwise an empty match will be reported. The general form is:
eps(f) >> rest;
The Lazy Argument f is called to do a semantic test (say, checking if a symbol is in the symbol table). If test returns true, rest will be evaluated. Otherwise, the production will return early with a no-match without ever touching rest.
Going to try to extend the SSCCE using this technique and edit this answer shortly...
Okay, here's what I ended up with. I think it still has some short comings, like it won't completely handle nested ifdefs correctly, and my grammar has some code duplication. I think the short answer is that you should not try to implement ifdef inside of any even moderately complex grammar, you should probably always do some kind of two phase processing, even if the grammar is really simple, or it will create a lot of problems. But anyways I think this was a pretty good exercise in using spirit.
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <boost/fusion/adapted/struct/adapt_struct.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/std_pair.hpp>
#include <boost/variant/recursive_variant.hpp>
#include <cassert>
#include <cmath>
#include <memory>
#include <string>
#include <utility>
#include <vector>
namespace fusion = boost::fusion;
namespace phoenix = boost::phoenix;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
typedef std::string pp_sym;
typedef std::set<pp_sym> pp_data;
void add(pp_data & defines, const pp_sym & s) { /*std::cout << "Parser: #define " << s << std::endl;*/ defines.insert(s); }
void remove(pp_data & defines, const pp_sym & s) { /*std::cout << "Parser: #undef " << s << std::endl;*/ defines.erase(s); }
bool search(pp_data & defines, const pp_sym & s) { /*std::cout << "Parser: #ifdef " << s << std::endl;*/ return defines.count(s); }
BOOST_PHOENIX_ADAPT_FUNCTION(void, pp_add_define_, add, 2);
BOOST_PHOENIX_ADAPT_FUNCTION(void, pp_remove_define_, remove, 2);
BOOST_PHOENIX_ADAPT_FUNCTION(bool, pp_search_define_, search, 2);
typedef std::string Str;
typedef std::pair<Str, Str> Pair;
typedef std::vector<Pair> PairVec;
/***
* Grammar definitions
*/
template <typename Iterator>
struct simple_grammar : qi::grammar<Iterator, PairVec()> {
qi::rule<Iterator, PairVec()> main;
qi::rule<Iterator, PairVec(), qi::locals<std::string>> if_block;
qi::rule<Iterator, PairVec()> if_true_block;
qi::rule<Iterator, PairVec()> if_false_block;
qi::rule<Iterator, Pair()> pair;
qi::rule<Iterator, Str()> first;
qi::rule<Iterator, Str()> second;
qi::rule<Iterator, pp_sym()> pp_symbol;
qi::rule<Iterator> pp_directive;
qi::rule<Iterator, pp_sym()> define_directive;
qi::rule<Iterator, pp_sym()> undef_directive;
qi::rule<Iterator, pp_sym()> if_directive;
qi::rule<Iterator> else_directive;
qi::rule<Iterator> endif_directive;
qi::rule<Iterator> ws;
qi::rule<Iterator> skip_to_eol;
simple_grammar(pp_data & preprocessor_data)
: simple_grammar::base_type(main)
{
using qi::lit;
using qi::char_;
using qi::omit;
using qi::eps;
using namespace qi::labels;
ws = char_(" \t\r\n");
first = !lit('#') >> *(char_ - '=') >> lit('=');
second = *(char_ - '\n') >> lit('\n');
pair = first >> second;
pp_symbol = +char_("A-Za-z_");
skip_to_eol = *(char_ - '\n') >> lit('\n');
pp_directive = &lit('#')
>> ((define_directive [ pp_add_define_(ref(preprocessor_data), _1) ] )
| (undef_directive [ pp_remove_define_(ref(preprocessor_data), _1) ] )
| else_directive
| endif_directive)
>> skip_to_eol;
main = (if_block >> -main) | (pp_directive >> -main) | (pair >> -main);
define_directive = lit("#define ") >> pp_symbol >> &ws;
undef_directive = lit("#undef ") >> pp_symbol >> &ws;
if_directive = lit("#ifdef ") >> pp_symbol >> &ws;
else_directive = lit("#else");
endif_directive = lit("#endif");
if_block = omit[if_directive[_a = _1] ] >> skip_to_eol
>> ((eps( pp_search_define_(ref(preprocessor_data), _a) ) > if_true_block ) | if_false_block)
>> endif_directive >> skip_to_eol;
if_false_block = omit[ *(char_ - else_directive - endif_directive) ] >> -(else_directive >> skip_to_eol >> if_true_block);
if_true_block = !endif_directive >>
( (else_directive >> skip_to_eol >> if_false_block)
| (if_block >> -if_true_block)
| (pp_directive >> -if_true_block)
| (pair >> -if_true_block));
}
};
#define CHECK(C) \
do { \
if (!(C)) { \
std::cout << "Check \"" << #C << "\" failed!" << std::endl; \
} \
} while(0)
#define CHECK_ITS(STR, IT, END) \
do { \
if (IT != END) { \
std::cout << "Failed to fully parse \"" << STR << "\"\n"; \
std::cout << "Stopped at \"" << std::string(IT, END) << "\"" << std::endl; \
} \
} while(0)
typedef std::string::const_iterator str_it;
typedef simple_grammar<str_it> my_grammar;
void unit_test() {
std::cout << " --- unit tests ---" << std::endl;
pp_data defines;
my_grammar gram(defines); // Our grammar
{
std::cout << "test 1\n";
std::string temp = "#define ZED\n";
str_it it = temp.begin();
str_it end = temp.end();
std::string ast;
bool check1 = qi::parse(it, end, gram.define_directive >> gram.skip_to_eol, ast);
CHECK(check1);
CHECK_ITS(temp, it, end);
CHECK(ast == "ZED");
}
{
std::cout << "test 2\n";
std::string temp = "#define ZED\n";
str_it it = temp.begin();
str_it end = temp.end();
bool check1 = qi::parse(it, end, gram.pp_directive);
CHECK(check1);
CHECK_ITS(temp, it, end);
CHECK(defines.count("ZED") == 1);
}
{
std::cout << "test 3\n";
std::string temp = "#undef ZED\n";
str_it it = temp.begin();
str_it end = temp.end();
bool check1 = qi::parse(it, end, gram.pp_directive);
CHECK(check1);
CHECK_ITS(temp, it, end);
CHECK(defines.count("ZED") == 0);
}
std::cout << " --- end unit tests ---" << std::endl;
}
std::ostream & operator << (std::ostream & ss, const PairVec & pv) {
ss << "Parsed a list:\n\n";
for( const auto & p : pv) {
ss << p.first << "\n\t\t\t=\t" << p.second << std::endl;
}
return ss;
}
PairVec test_case(pp_data & defines, int & result, const std::string & temp) {
my_grammar gram(defines); // Our grammar
PairVec ast; // Our tree
str_it it = temp.begin();
str_it end = temp.end();
bool parse_successful = qi::parse(it, end, gram, ast);
CHECK(parse_successful);
CHECK_ITS(temp, it, end);
std::cout << ast;
result |= parse_successful ? 0 : 1;
return ast;
}
bool have_name(const PairVec & pv, const Str & name) {
return pv.end() != std::find_if(pv.begin(), pv.end(), [&](const Pair & p) { return p.first == name; });
}
int main() {
unit_test();
int result = 0;
{
std::cout << "Test case 1" << std::endl;
pp_data defines;
PairVec ast = test_case(defines, result, ""
"#define FOO\n"
"led_zeppelin=9\n"
"the_shins=9\n"
"dead_mau5=6\n"
"portishead=10\n"
"#ifdef FOO\n"
"foo_fighters=7\n"
"#else\n"
"the_who=6\n"
"#endif\n"
"kanye_west=4\n"
"#undef FOO\n"
"#define BAR\n");
CHECK(defines.count("FOO") == 0);
CHECK(defines.count("BAR") == 1);
if (!have_name (ast, "foo_fighters")) { std::cout << "error no foo" << std::endl;}
}
{
std::cout << "Test case 2" << std::endl;
pp_data defines;
PairVec ast = test_case(defines, result, ""
"#define WOO\n"
"led_zeppelin=9\n"
"the_shins=9\n"
"dead_mau5=6\n"
"portishead=10\n"
"#ifdef FOO\n"
"foo_fighters=7\n"
"#else\n"
"the_who=6\n"
"#endif\n"
"kanye_west=4\n"
"#undef FOO\n"
"#define BAR\n"
"#define ZED\n");
CHECK(defines.count("FOO") == 0);
CHECK(defines.count("BAR") == 1);
CHECK(defines.count("WOO") == 1);
CHECK(defines.count("ZED") == 1);
CHECK(defines.count("GOO") == 0);
CHECK(!have_name(ast, "foo_fighters"));
CHECK(have_name(ast, "the_who"));
}
return result;
}