I am trying to parse files of the following form:
// comment bla bla
[sectionname]
key = value
key2=value2
// comment
key = value
[anothersection]
...
using the following code. Unfortunately, it reports the last eol as an error although all eols at the end should be accepted by: (*qi::eol > -(sectionGrammar > *(+qi::eol > sectionGrammar)) > *qi::eol),
Besides I really don't know how to parse comments properly without taking the eol which is required for the next key-value pair which is the reason I didn't placed in in the Skipper (only ascii::blank).
The last issue I have is that I don't know how to add sections to a boost::ptr_vector without copying them.
This is my code:
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/support_multi_pass.hpp>
#include <boost/spirit/include/classic_position_iterator.hpp> // for more detailed error information
#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/fusion/adapted/struct/adapt_struct.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/bind.hpp>
#include <boost/spirit/home/phoenix/core/argument.hpp>
#include <boost/foreach.hpp>
#include "txt.hpp"
// Only use in global namespace!
BOOST_FUSION_ADAPT_STRUCT(
wc3lib::map::Txt::Section,
(wc3lib::string, name)
(wc3lib::map::Txt::Pairs, entries)
)
namespace wc3lib
{
namespace map
{
namespace client
{
using namespace boost::spirit;
//using namespace boost::spirit::qi;
using qi::double_;
using qi::phrase_parse;
using standard::space;
using boost::phoenix::ref;
//typedef BOOST_TYPEOF(space | lit("//") >> *(standard::char_ - qi::eol) >> qi::eol) SkipperType;
/*
* Doesn't skip eols since value pairs are separated linewise which therefore can be specified easier in the rules
*/
template<typename Iterator>
struct CommentSkipper : public qi::grammar<Iterator> {
qi::rule<Iterator> skip;
CommentSkipper() : CommentSkipper::base_type(skip, "PL/0")
{
skip = ascii::blank | lit("//") >> *(standard::char_ - qi::eol) >> qi::eol;
}
};
template <typename Iterator, typename Skipper = CommentSkipper<Iterator> >
struct KeyValueSquence : qi::grammar<Iterator, Txt::Pairs(), Skipper>
{
//Txt::Pairs::value_type
qi::rule<Iterator, Txt::Pairs(), Skipper> query; // NOTE first rule used as parameter for base_type does always need the skipper type of the grammar
qi::rule<Iterator, std::pair<string, string>(), Skipper> pair;
qi::rule<Iterator, string()> key, value;
KeyValueSquence() : KeyValueSquence::base_type(query)
{
query = pair > *(pair); // use only > for backtracking
pair = +qi::eol > key > lit('=') > -value; // -('=' >> value)
key = standard::char_("a-zA-Z_") > *standard::char_("a-zA-Z_0-9");
value = +(standard::char_ - qi::eol); // values can be empty or all characters except eol which indicates the and of the value
}
};
template <typename Iterator, typename Skipper = CommentSkipper<Iterator> >
struct SectionRule : qi::grammar<Iterator, Txt::Section(), Skipper>
{
qi::rule<Iterator, Txt::Section(), Skipper> query;
qi::rule<Iterator, string()> name;
qi::rule<Iterator, Txt::Pairs(), Skipper> entries;
KeyValueSquence<Iterator, Skipper> keyValueSequence;
SectionRule() : SectionRule::base_type(query)
{
query = name > -entries;
name = lit('[') > standard::char_("a-zA-Z_") > *standard::char_("a-zA-Z_0-9") > lit(']');
entries = keyValueSequence;
}
};
template <typename Iterator>
bool parse(Iterator first, Iterator last, Txt::Sections §ions)
{
SectionRule<Iterator> sectionGrammar;
CommentSkipper<Iterator> commentSkipper;
std::vector<Txt::Section> tmpSections;
bool r = boost::spirit::qi::phrase_parse(
first,
last,
(*qi::eol > -(sectionGrammar > *(+qi::eol > sectionGrammar)) > *qi::eol),
// comment skipper
commentSkipper,
tmpSections //sections store into "sections"!
);
if (first != last) // fail if we did not get a full match
{
return false;
}
// TODO temporary workaround, add sections directly from heap to vector
BOOST_FOREACH(std::vector<Txt::Section>::const_reference ref, tmpSections) {
std::auto_ptr<Txt::Section> s(new Txt::Section());
s->name = ref.name;
s->entries = ref.entries;
sections.push_back(s);
}
return r;
}
}
</code>
From the comment
I get the impression that you're understanding this wrong.
>
will actually prevent backtracking past that point, because it mandates the next token.In order to present some techniques, I've mocked up the presumed missing header:
Now, I've "fixed" up your code to show how to do
See it live at Coliru
Note
please don't
using namespace
at namespace scope. Instead, use convenient namespace aliases:please don't (don't) use
auto_ptr<>
. It's error-prone, obsolete, deprecated, inflexible :/ Useunique_ptr
instead if you must have the sections on the heap (why?).eol
to terminate your lines...be careful of naming.
query
was double used as a rule nameKeyValueSquence
s?section_header
instead ofname
etc.I do have the feeling some confusion would not have occurred with more careful naming.
consider merging the grammars into one, unless
Without further ado:
Which prints
After/along with the debug trace of the parsers: