How do I capture the original input into the synth

2020-03-06 04:18发布

问题:

I'm working on a boost::spirit::qi::grammar and would like to copy a portion of the original text into the synthesized output structure of the grammar (more specifically, the portion that matched one of the components of the rule). The grammar would ultimately be used as a sub-grammar for a more complicated grammar, so I don't really have access to the original input.

I'm guessing that this can be done through semantic actions or the grammar context, but I can't find an example that does this without access to the original parse().

Here's what I have so far:

#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>

namespace qi = boost::spirit::qi;

struct A
{
    std::string header;
    std::vector<int> ints;
    std::string inttext;
};

BOOST_FUSION_ADAPT_STRUCT(
    A,
    (std::string, header)
    (std::vector<int>, ints)
    //(std::string, inttext)
)

template <typename Iterator>
struct parser : qi::grammar< Iterator, A() >
{
    parser() : parser::base_type(start)
    {
        header %= qi::lexeme[ +qi::alpha ];
        ints %= qi::lexeme[ qi::int_ % qi::char_(",_") ]; // <---- capture the original text that matches this into inttext
        start %= header >> ' ' >> ints;
    }

    qi::rule<Iterator, std::string()> header;
    qi::rule<Iterator, std::vector<int>() > ints;
    qi::rule<Iterator, A()> start;
};

int main()
{
    A output;
    std::string input("out 1,2_3");
    auto iter = input.begin();
    parser<decltype(iter)> p;
    bool r = qi::parse(iter, input.end(), p, output);
    if( !r || iter != input.end() )
    {
        std::cout << "did not parse";
    }
    else
    {
        // would like output.inttext to be "1,2_3"
        std::cout << output.header << ": " << output.inttext << " -> [ ";
        for( auto & i: output.ints )
            std::cout << i << ' ';
        std::cout << ']' << std::endl;
    }
}

回答1:

Something similar to what you asked without using semantic actions:

#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/repository/include/qi_iter_pos.hpp>

namespace qi = boost::spirit::qi;
using boost::spirit::repository::qi::iter_pos;

struct ints_type
{
   std::vector<int> data;
   std::string::const_iterator begin;
   std::string::const_iterator end;   
};

struct A
{
    std::string header;
    ints_type ints;
};

BOOST_FUSION_ADAPT_STRUCT(
    ints_type,
    (std::string::const_iterator, begin)
    (std::vector<int>, data)
    (std::string::const_iterator, end)
)

BOOST_FUSION_ADAPT_STRUCT(
     A,
     (std::string, header)
     (ints_type, ints)
)

template <typename Iterator>
struct parser : qi::grammar< Iterator, A() >
{
    parser() : parser::base_type(start)
    {
        header %= qi::lexeme[ +qi::alpha ];
        ints %= qi::lexeme[ iter_pos >> qi::int_ % qi::char_(",_") >> iter_pos ]; // <---- capture the original text that matches this into inttext
        start %= header >> ' ' >> ints;
    }

    qi::rule<Iterator, std::string()> header;
    qi::rule<Iterator, ints_type() > ints;
    qi::rule<Iterator, A()> start;
};

int main()
{
    A output;
    std::string input("out 1,2_3");
    auto iter = input.begin();
    parser<decltype(iter)> p;
    bool r = qi::parse(iter, input.end(), p, output);
    if( !r || iter != input.end() )
    {
        std::cout << "did not parse";
    }
    else
    {
        // would like output.inttext to be "1,2_3"
        std::cout << output.header << ": " << std::string(output.ints.begin,output.ints.end) << " -> [ ";
        for( auto & i: output.ints.data )
            std::cout << i << ' ';
        std::cout << ']' << std::endl;
    }
}

Using semantic actions:

#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/repository/include/qi_iter_pos.hpp>

namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
using boost::spirit::repository::qi::iter_pos;

struct ints_type
{
   std::vector<int> data;
   std::string inttext; 
};

struct A
{
    std::string header;
    ints_type ints;

};

BOOST_FUSION_ADAPT_STRUCT(
    ints_type,
    (std::vector<int>, data)
    (std::string, inttext)
)

BOOST_FUSION_ADAPT_STRUCT(
     A,
     (std::string, header)
     (ints_type, ints)
)

template <typename Iterator>
struct parser : qi::grammar< Iterator, A() >
{
    parser() : parser::base_type(start)
    {
        header %= qi::lexeme[ +qi::alpha ];
        ints = qi::lexeme[
                  (iter_pos >> qi::int_ % qi::char_(",_") >> iter_pos)
                     [phx::at_c<0>(qi::_val)=qi::_2,
                      phx::at_c<1>(qi::_val)=phx::construct<std::string>(qi::_1,qi::_3)] 
               ]; 
        start %= header >> ' ' >> ints;
    }

    qi::rule<Iterator, std::string()> header;
    qi::rule<Iterator, ints_type() > ints;
    qi::rule<Iterator, A()> start;
};

int main()
{
    A output;
    std::string input("out 1,2_3");
    auto iter = input.begin();
    parser<decltype(iter)> p;
    bool r = qi::parse(iter, input.end(), p, output);
    if( !r || iter != input.end() )
    {
        std::cout << "did not parse";
    }
    else
    {
        // would like output.inttext to be "1,2_3"
        std::cout << output.header << ": " << output.ints.inttext << " -> [ ";
        for( auto & i: output.ints.data )
            std::cout << i << ' ';
        std::cout << ']' << std::endl;
    }
}


回答2:

Another alternative using a custom directive dont_eat that returns the subject attribute but does not consume any input. This is possibly slower since the rule ints is parsed twice, but I believe that the syntax is nicer (and it's a good excuse to try creating your own directive)(It's a slightly modified version of "boost/spirit/home/qi/directive/lexeme.hpp").

dont_eat.hpp

#if !defined(DONT_EAT_HPP)
#define DONT_EAT_HPP

#if defined(_MSC_VER)
#pragma once
#endif

#include <boost/spirit/home/qi/meta_compiler.hpp>
#include <boost/spirit/home/qi/skip_over.hpp>
#include <boost/spirit/home/qi/parser.hpp>
#include <boost/spirit/home/support/unused.hpp>
#include <boost/spirit/home/support/common_terminals.hpp>
#include <boost/spirit/home/qi/detail/attributes.hpp>
#include <boost/spirit/home/support/info.hpp>
#include <boost/spirit/home/support/handles_container.hpp>

namespace custom 
{ 
    BOOST_SPIRIT_TERMINAL(dont_eat); 
}

namespace boost { namespace spirit
{
    ///////////////////////////////////////////////////////////////////////////
    // Enablers
    ///////////////////////////////////////////////////////////////////////////
    template <>
    struct use_directive<qi::domain, custom::tag::dont_eat> // enables dont_eat
      : mpl::true_ {};
}}

namespace custom
{


    template <typename Subject>
    struct dont_eat_directive : boost::spirit::qi::unary_parser<dont_eat_directive<Subject> >
    {
        typedef Subject subject_type;
        dont_eat_directive(Subject const& subject)
          : subject(subject) {}

        template <typename Context, typename Iterator>
        struct attribute
        {
            typedef typename
                boost::spirit::traits::attribute_of<subject_type, Context, Iterator>::type
            type;
        };

        template <typename Iterator, typename Context
          , typename Skipper, typename Attribute>
        bool parse(Iterator& first, Iterator const& last
          , Context& context, Skipper const& skipper
          , Attribute& attr) const
        {
            Iterator temp = first;
            boost::spirit::qi::skip_over(temp, last, skipper);
            return subject.parse(temp, last, context, skipper, attr);
        }

        template <typename Context>
        boost::spirit::info what(Context& context) const
        {
            return info("dont_eat", subject.what(context));

        }

        Subject subject;
    };
}//custom
    ///////////////////////////////////////////////////////////////////////////
    // Parser generators: make_xxx function (objects)
    ///////////////////////////////////////////////////////////////////////////
namespace boost { namespace spirit { namespace qi
{
    template <typename Subject, typename Modifiers>
    struct make_directive<custom::tag::dont_eat, Subject, Modifiers>
    {
        typedef custom::dont_eat_directive<Subject> result_type;
        result_type operator()(unused_type, Subject const& subject, unused_type) const
        {
            return result_type(subject);
        }
    };
}}}

namespace boost { namespace spirit { namespace traits
{
    ///////////////////////////////////////////////////////////////////////////
    template <typename Subject>
    struct has_semantic_action<custom::dont_eat_directive<Subject> >
      : unary_has_semantic_action<Subject> {};

    ///////////////////////////////////////////////////////////////////////////
    template <typename Subject, typename Attribute, typename Context
        , typename Iterator>
    struct handles_container<custom::dont_eat_directive<Subject>, Attribute
        , Context, Iterator>
      : unary_handles_container<Subject, Attribute, Context, Iterator> {};
}}}

#endif

main.cpp

#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include "dont_eat.hpp"

namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;

struct ints_type
{
   std::vector<int> data;
   std::string inttext; 
};

struct A
{
    std::string header;
    ints_type ints;

};

BOOST_FUSION_ADAPT_STRUCT(
    ints_type,
    (std::vector<int>, data)
    (std::string, inttext)
)

BOOST_FUSION_ADAPT_STRUCT(
     A,
     (std::string, header)
     (ints_type, ints)
)

template <typename Iterator>
struct parser : qi::grammar< Iterator, A() >
{
    parser() : parser::base_type(start)
    {
        header %= qi::lexeme[ +qi::alpha ];
        ints = qi::lexeme[qi::int_ % qi::char_(",_")]; 
        ints_string = custom::dont_eat[ints] >> qi::as_string[qi::raw[ints]];
        start %= header >> ' ' >> ints_string;
    }

    qi::rule<Iterator, std::string()> header;
    qi::rule<Iterator, std::vector<int>() > ints;
    qi::rule<Iterator, ints_type() > ints_string;
    qi::rule<Iterator, A()> start;
};

int main()
{
    A output;
    std::string input("out 1,2_3");
    auto iter = input.begin();
    parser<decltype(iter)> p;
    bool r = qi::parse(iter, input.end(), p, output);
    if( !r || iter != input.end() )
    {
        std::cout << "did not parse";
    }
    else
    {
        // would like output.inttext to be "1,2_3"
        std::cout << output.header << ": " << output.ints.inttext << " -> [ ";
        for( auto & i: output.ints.data )
            std::cout << i << ' ';
        std::cout << ']' << std::endl;
    }
}


回答3:

This directive returns a fusion::vector2<> with the subject's attribute as its first member and the string corresponding to the synthesized attribute as its second. I think this is the easiest method to reuse as long as you adapt your structs adequately. I'm not sure that this fusion::vector2<> is the best way to handle the attributes but in the limited testing I've done it has worked fine. With this directive the ints_string rule would simply be:

ints_string=custom::annotate[ints];
//or ints_string=custom::annotate[qi::lexeme[qi::int_ % qi::char_(",_")]];

Example on LWS.

annotate.hpp

#if !defined(ANNOTATE_HPP)
#define ANNOTATE_HPP

#if defined(_MSC_VER)
#pragma once
#endif

#include <boost/spirit/home/qi/meta_compiler.hpp>
#include <boost/spirit/home/qi/skip_over.hpp>
#include <boost/spirit/home/qi/parser.hpp>
#include <boost/spirit/home/support/unused.hpp>
#include <boost/spirit/home/support/common_terminals.hpp>
#include <boost/spirit/home/qi/detail/attributes.hpp>
#include <boost/spirit/home/support/info.hpp>
#include <boost/spirit/home/support/handles_container.hpp>

namespace custom 
{ 
    BOOST_SPIRIT_TERMINAL(annotate); 
}

namespace boost { namespace spirit
{
    ///////////////////////////////////////////////////////////////////////////
    // Enablers
    ///////////////////////////////////////////////////////////////////////////
    template <>
    struct use_directive<qi::domain, custom::tag::annotate> // enables annotate
      : mpl::true_ {};
}}

namespace custom
{


    template <typename Subject>
    struct annotate_directive : boost::spirit::qi::unary_parser<annotate_directive<Subject> >
    {
        typedef Subject subject_type;
        annotate_directive(Subject const& subject)
          : subject(subject) {}

        template <typename Context, typename Iterator>
        struct attribute
        {
            typedef 
                boost::fusion::vector2<
                    typename boost::spirit::traits::attribute_of<subject_type, Context, Iterator>::type
                    ,std::string
                >
            type;
        };

        template <typename Iterator, typename Context
          , typename Skipper, typename Attribute>
        bool parse(Iterator& first, Iterator const& last
          , Context& context, Skipper const& skipper
          , Attribute& attr) const
        {
            boost::spirit::qi::skip_over(first, last, skipper);
            Iterator save = first;
            typename boost::spirit::traits::attribute_of<subject_type, Context, Iterator>::type attr_;
            if(subject.parse(first, last, context, skipper, attr_))
            {
                boost::spirit::traits::assign_to(attr_,boost::fusion::at_c<0>(attr));
                boost::spirit::traits::assign_to(std::string(save,first),boost::fusion::at_c<1>(attr));
                return true;
            }
            first = save;
            return false;
        }

        template <typename Context>
        boost::spirit::info what(Context& context) const
        {
            return info("annotate", subject.what(context));

        }

        Subject subject;
    };
}//custom
    ///////////////////////////////////////////////////////////////////////////
    // Parser generators: make_xxx function (objects)
    ///////////////////////////////////////////////////////////////////////////
namespace boost { namespace spirit { namespace qi
{
    template <typename Subject, typename Modifiers>
    struct make_directive<custom::tag::annotate, Subject, Modifiers>
    {
        typedef custom::annotate_directive<Subject> result_type;
        result_type operator()(unused_type, Subject const& subject, unused_type) const
        {
            return result_type(subject);
        }
    };
}}}

namespace boost { namespace spirit { namespace traits
{
    ///////////////////////////////////////////////////////////////////////////
    template <typename Subject>
    struct has_semantic_action<custom::annotate_directive<Subject> >
      : unary_has_semantic_action<Subject> {};

    ///////////////////////////////////////////////////////////////////////////
    template <typename Subject, typename Attribute, typename Context
        , typename Iterator>
    struct handles_container<custom::annotate_directive<Subject>, Attribute
        , Context, Iterator>
      : unary_handles_container<Subject, Attribute, Context, Iterator> {};
}}}

#endif

main.cpp

#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include "annotate.hpp"

namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;

struct ints_type
{
   std::vector<int> data;
   std::string inttext; 
};

struct A
{
    std::string header;
    ints_type ints;

};

BOOST_FUSION_ADAPT_STRUCT(
    ints_type,
    (std::vector<int>, data)
    (std::string, inttext)
)

BOOST_FUSION_ADAPT_STRUCT(
     A,
     (std::string, header)
     (ints_type, ints)
)

template <typename Iterator>
struct parser : qi::grammar< Iterator, A() >
{
    parser() : parser::base_type(start)
    {
        header %= qi::lexeme[ +qi::alpha ];
        ints = qi::lexeme[qi::int_ % qi::char_(",_")]; 
        ints_string = custom::annotate[ints];
        start %= header >> ' ' >> ints_string;
    }

    qi::rule<Iterator, std::string()> header;
    qi::rule<Iterator, std::vector<int>() > ints;
    qi::rule<Iterator, ints_type() > ints_string;
    qi::rule<Iterator, A()> start;
};

int main()
{
    A output;
    std::string input("out 1,2_3");
    auto iter = input.begin();
    parser<decltype(iter)> p;
    std::string annotation;
    bool r = qi::parse(iter, input.end(), custom::annotate[p], output, annotation);
    if( !r || iter != input.end() )
    {
        std::cout << "did not parse";
    }
    else
    {
        // would like output.inttext to be "1,2_3"
        std::cout << "annotation: " << annotation << std::endl;
        std::cout << output.header << ": " << output.ints.inttext << " -> [ ";
        for( auto & i: output.ints.data )
            std::cout << i << ' ';
        std::cout << ']' << std::endl;
    }
}