boost-sprit-lex unifying multiple tokens into a si

edit : I have ripped out the lexer as it does not cleanly integrate with Qi and just obfuscates grammars (see answer below).

My lexer looks as follows :

template <typename Lexer>
struct tokens : lex::lexer<Lexer>
{
tokens()
    : left_curly("\"{\""),
    right_curly("\"}\""),
    left_paren("\"(\""),
    right_paren("\")\""),
    colon(":"),
    scolon(";"),
    namespace_("(?i:namespace)"),
    event("(?i:event)"),
    optional("(?i:optional)"),
    required("(?i:required)"),
    repeated("(?i:repeated)"),
    t_int_4("(?i:int4)"),
    t_int_8("(?i:int8)"),
    t_string("(?i:string)"),
    ordinal("\\d+"),
    identifier("\\w+")

{
    using boost::spirit::lex::_val;

    this->self
        = 
        left_curly    
        | right_curly 
        | left_paren
        | right_paren
        | colon         
        | scolon
        | namespace_      
        | event             
        | optional           
        | required          
        | repeated
        | t_int_4
        | t_int_8
        | t_string
        | ordinal             
        | identifier         
        | lex::token_def<>("[ \\t\\n]+")   [lex::_pass = lex::pass_flags::pass_ignore];
}


lex::token_def<lex::omit> left_curly, right_curly, colon, scolon,repeated, left_paren, right_paren;
lex::token_def<lex::omit> namespace_, event, optional, required,t_int_4, t_int_8, t_string;
lex::token_def<boost::uint32_t> ordinal;
lex::token_def<std::string> identifier;

};

I want t_int_4,t_int_8, and t_string to represented by a single token type attributed by an integral type. At the moment my QI grammar has to do the lifting for this and then set the token in a qi::rule semantic action :

 atomic_type = tok.t_int_4     [ _val = RBL_INT4]
                | tok.t_int_8             [ _val = RBL_INT8]
                | tok.t_string            [ _val = RBL_STRING];

标签： c++ parsing boost boost-spirit lexer

1条回答

萌系小妹纸

2楼-- · 2019-01-19 09:51

From your questions relating to integrating lex into qi grammar, from the last few days. It seems you've identified multiple integration issues. At this point you should ask yourself why you are even trying to integrate a lexer into a PEG grammar. PEG grammars can neatly capture tokenization in situ, and so you don't really gain much from introducing lexer especially considering the lex->qi case where introducing a lexer has shown you that not only do you need hacks to do what is neat in qi in terms of expressing your grammar but also hacks for getting error handling and annotation working properly. Therefore I suggest removing Lex and sticking to Qi.

Here is your grammar with the lexer removed. The ast is in a file of it's own.

#include "ast.hpp"
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/range/iterator_range.hpp>
#include <vector>

namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace px = boost::phoenix;

template <typename Iterator>
struct skipper : qi::grammar<Iterator>
{
    skipper() : skipper::base_type(start)
    {
        using boost::spirit::ascii::char_;

        start = ascii::space | qi::lit("//") >> *(ascii::char_ - qi::eol) >> qi::eol;
    }

    qi::rule<Iterator> start;
};

struct error_handler_
{
    typedef void result_type;
    template<typename First, typename Last, typename ErrorPos, typename What>
    void operator()(First f, Last l, ErrorPos e, What w) const
    {
        std::cout << "Expected : " << w << std::endl;
        std::cout << std::string(f,l) << std::endl;
        int i = std::distance(f,e);
        std::cout << std::string(i+1,' ') <<  "^---- here"  << std::endl;
    }
};

px::function<error_handler_> error_handler;

template<typename Iterator>
struct annotation_state
{
  typedef boost::iterator_range<Iterator> annotation_iterator;
  typedef std::vector<annotation_iterator> annotation_iterators;

  annotation_iterators annotations;
};

template<typename Iterator>
struct annotate_
{
    typedef void result_type;

    annotation_state<Iterator> & as;
    annotate_(annotation_state<Iterator> & as) : as(as) {}

    template<typename Val, typename First, typename Last>
    void operator()(Val v, First f, Last l) const
    {
      v.id = as.annotations.size();
      as.annotations.push_back(boost::make_iterator_range(f,l));
      std::cout << std::string(f,l) << std::endl;
    }
};



template <typename Iterator, typename Skipper>
struct grammar : qi::grammar<Iterator,namespace_descriptor(),Skipper>
{
    grammar(annotation_state<Iterator> & as) 
        : grammar::base_type(namespace_descriptor_),
          annotation_state_(as),
          annotate(as)

    {
        using namespace qi;

        atomic_type.add
            ("int4", RBL_INT4)
            ("int8", RBL_INT8)
            ("string", RBL_STRING);

        event_entry_qualifier.add
            ("optional", ENTRY_OPTIONAL)
            ("required", ENTRY_REQUIRED)
            ("repeated", ENTRY_REPEATED);

        oid_ = ordinal  > ':' > identifier;
        ordinal = uint_parser<boost::uint32_t>();
        identifier = +(char_("a","z") | char_("A","Z") | char_('_'));
        type_descriptor_ = atomic_type_ | compound_type_;
        atomic_type_ = no_case[atomic_type] > attr("");

        compound_type_ = 
            no_case[lit("event")] 
            > attr(RBL_EVENT) 
            > '(' 
            > identifier  
            > ')';

        event_entry_ = 
            no_case[event_entry_qualifier] 
            > oid_ 
            > type_descriptor_ 
            > ';';

        event_descriptor_ = 
            no_case[lit("event")] 
            > oid_ 
            > '{' 
            > *(event_entry_) 
            > '}'; 

        namespace_descriptor_ = 
            no_case[lit("namespace")] 
            > identifier 
            > '{' 
            > * (event_descriptor_) 
            > '}'; 

        identifier.name("identifier");
        oid_.name("ordinal-identifier pair");
        ordinal.name("ordinal");

        on_error<fail>(namespace_descriptor_, ::error_handler(_1,_2,_3,_4));
        on_success(oid_, annotate(_val,_1,_3));
        on_success(type_descriptor_, annotate(_val,_1,_3));
        on_success(event_entry_, annotate(_val,_1,_3));
        on_success(event_descriptor_, annotate(_val,_1,_3));
    }

    annotation_state<Iterator> & annotation_state_;
    px::function<annotate_<Iterator> > annotate;

    qi::rule< Iterator, oid()> oid_;
    qi::rule< Iterator, boost::uint32_t()> ordinal;
    qi::rule< Iterator, std::string()> identifier;
    qi::rule< Iterator, type_descriptor()> type_descriptor_;
    qi::rule< Iterator, type_descriptor()> atomic_type_;
    qi::rule< Iterator, type_descriptor()> compound_type_; 

    qi::rule< Iterator, event_entry(), Skipper> event_entry_;
    qi::rule< Iterator, event_descriptor(), Skipper> event_descriptor_;
    qi::rule< Iterator, namespace_descriptor(), Skipper> namespace_descriptor_;

    qi::symbols<char, int> atomic_type;
    qi::symbols<char, int> event_entry_qualifier;
};

int main()
{
    std::string test = "namespace ns { event 1:sihan { OpTIONAL 1:hassan event(haSsan);} }";
    typedef std::string::iterator it;

    it beg = test.begin();
    it end = test.end();

    annotation_state<it> as;
    skipper<it> skip;
    grammar<it, skipper<it> > g(as);


    bool r = qi::phrase_parse(beg,end,g,skip);
    if(r)
        ;
    else
    {
        std::cout << "parsing failed" << std::endl;
    }
}

0人赞添加讨论(0) 举报

boost-sprit-lex unifying multiple tokens into a si

采纳回答

编辑标签

举报内容

检举类型

检举原因

检举说明(必填)

打开微信“扫一扫”，打开网页后点击屏幕右上角分享按钮

付费偷看金额在0.1-10元之间