This question leads on from its predecessor here: decoding an http header value
The Question:
In my test assertion failure, I am printing out the following contents of error_message
Error! Expecting <alternative><media_type_no_parameters><media_type_with_parameters> in header value: "text/html garbage ; charset = \"ISO-8859-5\"" at position: 0
Which is unhelpful...
What is the correct way to get a nice syntax error that says:
Error! token_pair has invalid syntax here:
text/html garbage ; charset = "ISO-8859-5"
^ must be eoi or separator of type ;
HTTP Content-Type in a request has the following form:
type/subtype *( ; param[=param_value]) <eoi>
Where type and subtype many not be quoted or be separated by spaces, param is not quoted, and param_value is both optional and optionally quoted.
Other than between type/subtype
spaces or horizontal tabs may be used as white space. There may also be space before type/subtype
For now I am ignoring the possibility of HTTP line breaks or comments as I understand that they are deprecated.
There shall be one type, one subtype and zero or more parameters. type and subtype are HTTP tokens, which is to say that they may not contain delimiters ("/\[]<>,;
and so on) or spaces.
Thus, the following header is legal:
text/html ; charset = "ISO-8859-5"
And the following header is illegal:
text/html garbage ; charset = "ISO-8859-5"
^^^^^^^ illegal - must be either ; or <eoi>
The code I am using to parse this (seemingly simple, but actually quite devious) protocol component is below.
My code, adapted from sehe's fantasic answer here (warning, prerequisites are google test and boost)
#include <boost/config/warning_disable.hpp>
#include <gtest/gtest.h>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapted.hpp>
#include <utility>
#include <vector>
#include <string>
#include <iostream>
using token_pair = std::pair<std::string, std::string>;
struct parameter {
std::string name;
std::string value;
bool has_value;
struct media_type {
token_pair type_subtype;
std::vector<parameter> params;
BOOST_FUSION_ADAPT_STRUCT(parameter, name, value, has_value)
BOOST_FUSION_ADAPT_STRUCT(media_type, type_subtype, params)
namespace qi = boost::spirit::qi;
namespace phoenix = boost::phoenix;
using namespace std::literals;
template<class Iterator>
struct components
using qi::ascii::char_;
spaces = char_(" \t");
token = +~char_( "()<>@,;:\\\"/[]?={} \t");
token_pair_rule = token >> '/' >> token;
quoted_string = '"' >> *('\\' >> char_ | ~char_('"')) >> '"';
value = quoted_string | token;
name_only = token >> qi::attr("") >> qi::attr(false);
nvp = token >> '=' >> value >> qi::attr(true);
any_parameter = ';' >> (nvp | name_only);
some_parameters = +any_parameter;
parameters = *any_parameter;
this->report_error(qi::_1, qi::_2, qi::_3, qi::_4)
using Skipper = qi::space_type;
Skipper spaces;
qi::rule<Iterator, std::string()> quoted_string, token, value;
qi::rule<Iterator, parameter(), Skipper> nvp, name_only, any_parameter;
qi::rule<Iterator, std::vector<parameter>(), Skipper> parameters, some_parameters;
qi::rule<Iterator, token_pair()> token_pair_rule;
std::string error_message;
struct ReportError {
// the result type must be explicit for Phoenix
template<typename, typename, typename, typename>
struct result { typedef void type; };
ReportError(std::string& error_message)
: error_message(error_message) {}
// contract the string to the surrounding new-line characters
template<typename Iter>
void operator()(Iter first, Iter last,
Iter error, const qi::info& what) const
using namespace std::string_literals;
std::ostringstream ss;
ss << "Error! Expecting "
<< what
<< " in header value: " << std::quoted(std::string(first, last))
<< " at position: " << error - first;
error_message = ss.str();
std::string& error_message;
const phoenix::function<ReportError> report_error = ReportError(error_message);
template<class Iterator>
struct token_grammar
: components<Iterator>
, qi::grammar<Iterator, media_type()>
token_grammar() : token_grammar::base_type(media_type_rule)
media_type_with_parameters = token_pair_rule >> qi::skip(spaces)[some_parameters];
media_type_no_parameters = token_pair_rule >> qi::attr(std::vector<parameter>()) >> qi::skip(spaces)[qi::eoi];
media_type_rule = qi::eps > (qi::hold[media_type_no_parameters]
| qi::hold[media_type_with_parameters]);
this->report_error(qi::_1, qi::_2, qi::_3, qi::_4)
using Skipper = typename token_grammar::components::Skipper;
using token_grammar::components::spaces;
using token_grammar::components::token;
using token_grammar::components::token_pair_rule;
using token_grammar::components::value;
using token_grammar::components::any_parameter;
using token_grammar::components::parameters;
using token_grammar::components::some_parameters;
qi::rule<Iterator, media_type()> media_type_no_parameters, media_type_with_parameters, media_type_rule;
TEST(spirit_test, test1)
token_grammar<std::string::const_iterator> grammar{};
auto test = R"__test(application/json )__test"s;
auto ct = media_type {};
bool r = parse(test.cbegin(), test.cend(), grammar, ct);
EXPECT_EQ("application", ct.type_subtype.first);
EXPECT_EQ("json", ct.type_subtype.second);
EXPECT_EQ(0, ct.params.size());
ct = {};
test = R"__test(text/html ; charset = "ISO-8859-5")__test"s;
parse(test.cbegin(), test.cend(), grammar, ct);
EXPECT_EQ("text", ct.type_subtype.first);
EXPECT_EQ("html", ct.type_subtype.second);
ASSERT_EQ(1, ct.params.size());
EXPECT_EQ("charset", ct.params[0].name);
EXPECT_EQ("ISO-8859-5", ct.params[0].value);
auto mt = media_type {};
parse(test.cbegin(), test.cend(), grammar.media_type_rule, mt);
EXPECT_EQ("text", mt.type_subtype.first);
EXPECT_EQ("html", mt.type_subtype.second);
EXPECT_EQ(1, mt.params.size());
// Introduce a failure case
mt = media_type {};
test = R"__test(text/html garbage ; charset = "ISO-8859-5")__test"s;
r = parse(test.cbegin(), test.cend(), grammar.media_type_rule, mt);
EXPECT_EQ("", grammar.error_message);