This question leads on from its predecessor here: decoding an http header value
The Question:
In my test assertion failure, I am printing out the following contents of error_message
:
Error! Expecting <alternative><media_type_no_parameters><media_type_with_parameters> in header value: "text/html garbage ; charset = \"ISO-8859-5\"" at position: 0
Which is unhelpful...
What is the correct way to get a nice syntax error that says:
Error! token_pair has invalid syntax here:
text/html garbage ; charset = "ISO-8859-5"
^ must be eoi or separator of type ;
Background:
HTTP Content-Type in a request has the following form:
type/subtype *( ; param[=param_value]) <eoi>
Where type and subtype many not be quoted or be separated by spaces, param is not quoted, and param_value is both optional and optionally quoted.
Other than between type/subtype
spaces or horizontal tabs may be used as white space. There may also be space before type/subtype
.
For now I am ignoring the possibility of HTTP line breaks or comments as I understand that they are deprecated.
Summary:
There shall be one type, one subtype and zero or more parameters. type and subtype are HTTP tokens, which is to say that they may not contain delimiters ("/\[]<>,;
and so on) or spaces.
Thus, the following header is legal:
text/html ; charset = "ISO-8859-5"
And the following header is illegal:
text/html garbage ; charset = "ISO-8859-5"
^^^^^^^ illegal - must be either ; or <eoi>
The code I am using to parse this (seemingly simple, but actually quite devious) protocol component is below.
Code
My code, adapted from sehe's fantasic answer here (warning, prerequisites are google test and boost)
//#define BOOST_SPIRIT_DEBUG
#include <boost/config/warning_disable.hpp>
#include <gtest/gtest.h>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapted.hpp>
#include <utility>
#include <vector>
#include <string>
#include <iostream>
using token_pair = std::pair<std::string, std::string>;
struct parameter {
std::string name;
std::string value;
bool has_value;
};
struct media_type {
token_pair type_subtype;
std::vector<parameter> params;
};
BOOST_FUSION_ADAPT_STRUCT(parameter, name, value, has_value)
BOOST_FUSION_ADAPT_STRUCT(media_type, type_subtype, params)
namespace qi = boost::spirit::qi;
namespace phoenix = boost::phoenix;
using namespace std::literals;
template<class Iterator>
struct components
{
components()
{
using qi::ascii::char_;
spaces = char_(" \t");
token = +~char_( "()<>@,;:\\\"/[]?={} \t");
token_pair_rule = token >> '/' >> token;
quoted_string = '"' >> *('\\' >> char_ | ~char_('"')) >> '"';
value = quoted_string | token;
name_only = token >> qi::attr("") >> qi::attr(false);
nvp = token >> '=' >> value >> qi::attr(true);
any_parameter = ';' >> (nvp | name_only);
some_parameters = +any_parameter;
parameters = *any_parameter;
qi::on_error<qi::fail>(
token,
this->report_error(qi::_1, qi::_2, qi::_3, qi::_4)
);
BOOST_SPIRIT_DEBUG_NODES((token)
(quoted_string)
(value)
(name_only)
(nvp)
(any_parameter)
(parameters)
)
}
protected:
using Skipper = qi::space_type;
Skipper spaces;
qi::rule<Iterator, std::string()> quoted_string, token, value;
qi::rule<Iterator, parameter(), Skipper> nvp, name_only, any_parameter;
qi::rule<Iterator, std::vector<parameter>(), Skipper> parameters, some_parameters;
qi::rule<Iterator, token_pair()> token_pair_rule;
public:
std::string error_message;
protected:
struct ReportError {
// the result type must be explicit for Phoenix
template<typename, typename, typename, typename>
struct result { typedef void type; };
ReportError(std::string& error_message)
: error_message(error_message) {}
// contract the string to the surrounding new-line characters
template<typename Iter>
void operator()(Iter first, Iter last,
Iter error, const qi::info& what) const
{
using namespace std::string_literals;
std::ostringstream ss;
ss << "Error! Expecting "
<< what
<< " in header value: " << std::quoted(std::string(first, last))
<< " at position: " << error - first;
error_message = ss.str();
}
std::string& error_message;
};
const phoenix::function<ReportError> report_error = ReportError(error_message);
};
template<class Iterator>
struct token_grammar
: components<Iterator>
, qi::grammar<Iterator, media_type()>
{
token_grammar() : token_grammar::base_type(media_type_rule)
{
media_type_with_parameters = token_pair_rule >> qi::skip(spaces)[some_parameters];
media_type_no_parameters = token_pair_rule >> qi::attr(std::vector<parameter>()) >> qi::skip(spaces)[qi::eoi];
media_type_rule = qi::eps > (qi::hold[media_type_no_parameters]
| qi::hold[media_type_with_parameters]);
BOOST_SPIRIT_DEBUG_NODES((media_type_with_parameters)
(media_type_no_parameters)
(media_type_rule))
qi::on_error<qi::fail>(
media_type_rule,
this->report_error(qi::_1, qi::_2, qi::_3, qi::_4)
);
}
private:
using Skipper = typename token_grammar::components::Skipper;
using token_grammar::components::spaces;
using token_grammar::components::token;
using token_grammar::components::token_pair_rule;
using token_grammar::components::value;
using token_grammar::components::any_parameter;
using token_grammar::components::parameters;
using token_grammar::components::some_parameters;
public:
qi::rule<Iterator, media_type()> media_type_no_parameters, media_type_with_parameters, media_type_rule;
};
TEST(spirit_test, test1)
{
token_grammar<std::string::const_iterator> grammar{};
auto test = R"__test(application/json )__test"s;
auto ct = media_type {};
bool r = parse(test.cbegin(), test.cend(), grammar, ct);
EXPECT_EQ("application", ct.type_subtype.first);
EXPECT_EQ("json", ct.type_subtype.second);
EXPECT_EQ(0, ct.params.size());
ct = {};
test = R"__test(text/html ; charset = "ISO-8859-5")__test"s;
parse(test.cbegin(), test.cend(), grammar, ct);
EXPECT_EQ("text", ct.type_subtype.first);
EXPECT_EQ("html", ct.type_subtype.second);
ASSERT_EQ(1, ct.params.size());
EXPECT_TRUE(ct.params[0].has_value);
EXPECT_EQ("charset", ct.params[0].name);
EXPECT_EQ("ISO-8859-5", ct.params[0].value);
auto mt = media_type {};
parse(test.cbegin(), test.cend(), grammar.media_type_rule, mt);
EXPECT_EQ("text", mt.type_subtype.first);
EXPECT_EQ("html", mt.type_subtype.second);
EXPECT_EQ(1, mt.params.size());
//
// Introduce a failure case
//
mt = media_type {};
test = R"__test(text/html garbage ; charset = "ISO-8859-5")__test"s;
r = parse(test.cbegin(), test.cend(), grammar.media_type_rule, mt);
EXPECT_FALSE(r);
EXPECT_EQ("", grammar.error_message);
}