boost spirit - improving error output

2019-06-10 13:03发布

This question leads on from its predecessor here: decoding an http header value

The Question:

In my test assertion failure, I am printing out the following contents of error_message:

Error! Expecting <alternative><media_type_no_parameters><media_type_with_parameters> in header value: "text/html garbage ; charset = \"ISO-8859-5\"" at position: 0

Which is unhelpful...

What is the correct way to get a nice syntax error that says:

Error! token_pair has invalid syntax here:
    text/html garbage ; charset = "ISO-8859-5"
              ^ must be eoi or separator of type ;

Background:

HTTP Content-Type in a request has the following form:

type/subtype *( ; param[=param_value]) <eoi>

Where type and subtype many not be quoted or be separated by spaces, param is not quoted, and param_value is both optional and optionally quoted.

Other than between type/subtype spaces or horizontal tabs may be used as white space. There may also be space before type/subtype.

For now I am ignoring the possibility of HTTP line breaks or comments as I understand that they are deprecated.

Summary:

There shall be one type, one subtype and zero or more parameters. type and subtype are HTTP tokens, which is to say that they may not contain delimiters ("/\[]<>,; and so on) or spaces.

Thus, the following header is legal:

text/html ; charset = "ISO-8859-5"

And the following header is illegal:

text/html garbage ; charset = "ISO-8859-5"
          ^^^^^^^ illegal - must be either ; or <eoi>

The code I am using to parse this (seemingly simple, but actually quite devious) protocol component is below.

Code

My code, adapted from sehe's fantasic answer here (warning, prerequisites are google test and boost)

//#define BOOST_SPIRIT_DEBUG
#include <boost/config/warning_disable.hpp>
#include <gtest/gtest.h>

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapted.hpp>

#include <utility>
#include <vector>
#include <string>
#include <iostream>


using token_pair = std::pair<std::string, std::string>;

struct parameter {
    std::string name;
    std::string value;
    bool has_value;
};

struct media_type {
    token_pair type_subtype;
    std::vector<parameter> params;
};


BOOST_FUSION_ADAPT_STRUCT(parameter, name, value, has_value)
BOOST_FUSION_ADAPT_STRUCT(media_type, type_subtype, params)

namespace qi = boost::spirit::qi;
namespace phoenix = boost::phoenix;
using namespace std::literals;

template<class Iterator>
struct components
{

    components()
    {
        using qi::ascii::char_;
        spaces        = char_(" \t");
        token         = +~char_( "()<>@,;:\\\"/[]?={} \t");
        token_pair_rule = token >> '/' >> token;
        quoted_string = '"' >> *('\\' >> char_ | ~char_('"')) >> '"';
        value         = quoted_string | token;

        name_only         = token >> qi::attr("") >> qi::attr(false);
        nvp               = token >> '=' >> value >> qi::attr(true);
        any_parameter     = ';' >> (nvp | name_only);
        some_parameters   = +any_parameter;
        parameters        = *any_parameter;

        qi::on_error<qi::fail>(
                               token,
                               this->report_error(qi::_1, qi::_2, qi::_3, qi::_4)
                               );

        BOOST_SPIRIT_DEBUG_NODES((token)
                                 (quoted_string)
                                 (value)
                                 (name_only)
                                 (nvp)
                                 (any_parameter)
                                 (parameters)
                                 )
    }

protected:
    using Skipper = qi::space_type;
    Skipper spaces;
    qi::rule<Iterator, std::string()>        quoted_string, token, value;
    qi::rule<Iterator, parameter(), Skipper> nvp, name_only, any_parameter;
    qi::rule<Iterator, std::vector<parameter>(), Skipper> parameters, some_parameters;
    qi::rule<Iterator, token_pair()>        token_pair_rule;

public:
    std::string error_message;

protected:
    struct ReportError {
        // the result type must be explicit for Phoenix
        template<typename, typename, typename, typename>
        struct result { typedef void type; };

        ReportError(std::string& error_message)
        : error_message(error_message) {}

        // contract the string to the surrounding new-line characters
        template<typename Iter>
        void operator()(Iter first, Iter last,
                        Iter error, const qi::info& what) const
        {
            using namespace std::string_literals;
            std::ostringstream ss;
            ss << "Error! Expecting "
            << what
            << " in header value: " << std::quoted(std::string(first, last))
            << " at position: " << error - first;
            error_message = ss.str();
        }
        std::string& error_message;
    };

    const phoenix::function<ReportError> report_error = ReportError(error_message);
};

template<class Iterator>
struct token_grammar
: components<Iterator>
, qi::grammar<Iterator, media_type()>
{

    token_grammar() : token_grammar::base_type(media_type_rule)
    {

        media_type_with_parameters = token_pair_rule >> qi::skip(spaces)[some_parameters];
        media_type_no_parameters = token_pair_rule >> qi::attr(std::vector<parameter>()) >> qi::skip(spaces)[qi::eoi];
        media_type_rule = qi::eps > (qi::hold[media_type_no_parameters]
                                     | qi::hold[media_type_with_parameters]);

        BOOST_SPIRIT_DEBUG_NODES((media_type_with_parameters)
                                 (media_type_no_parameters)
                                 (media_type_rule))

        qi::on_error<qi::fail>(
                               media_type_rule,
                               this->report_error(qi::_1, qi::_2, qi::_3, qi::_4)
                               );

    }

private:
    using Skipper = typename token_grammar::components::Skipper;
    using token_grammar::components::spaces;
    using token_grammar::components::token;
    using token_grammar::components::token_pair_rule;
    using token_grammar::components::value;
    using token_grammar::components::any_parameter;
    using token_grammar::components::parameters;
    using token_grammar::components::some_parameters;

public:
    qi::rule<Iterator, media_type()>        media_type_no_parameters, media_type_with_parameters, media_type_rule;
};



TEST(spirit_test, test1)
{
    token_grammar<std::string::const_iterator> grammar{};

    auto test = R"__test(application/json )__test"s;
    auto ct = media_type {};
    bool r = parse(test.cbegin(), test.cend(), grammar, ct);
    EXPECT_EQ("application", ct.type_subtype.first);
    EXPECT_EQ("json", ct.type_subtype.second);
    EXPECT_EQ(0, ct.params.size());

    ct = {};
    test = R"__test(text/html ; charset = "ISO-8859-5")__test"s;
    parse(test.cbegin(), test.cend(), grammar, ct);
    EXPECT_EQ("text", ct.type_subtype.first);
    EXPECT_EQ("html", ct.type_subtype.second);
    ASSERT_EQ(1, ct.params.size());
    EXPECT_TRUE(ct.params[0].has_value);
    EXPECT_EQ("charset", ct.params[0].name);
    EXPECT_EQ("ISO-8859-5", ct.params[0].value);

    auto mt = media_type {};
    parse(test.cbegin(), test.cend(), grammar.media_type_rule, mt);
    EXPECT_EQ("text", mt.type_subtype.first);
    EXPECT_EQ("html", mt.type_subtype.second);
    EXPECT_EQ(1, mt.params.size());

    //
    // Introduce a failure case
    //

    mt = media_type {};
    test = R"__test(text/html garbage ; charset = "ISO-8859-5")__test"s;
    r = parse(test.cbegin(), test.cend(), grammar.media_type_rule, mt);
    EXPECT_FALSE(r);
    EXPECT_EQ("", grammar.error_message);
}

0条回答
登录 后发表回答