I wanted to raise a subject that just sent me down a rabbit hole and brought up a question about qi::symbols.
It all started while I was looking into the new beast library and read a tutorial example
It starts with a function that guesses mime types from http path extensions. I started to look more closely and saw this:
auto const ext = [&path]
{
auto const pos = path.rfind(".");
if(pos == boost::beast::string_view::npos)
return boost::beast::string_view{};
return path.substr(pos);
}();
It took me a while to figure out it was a IIFE in C++ style, and was used to initialize ext
while declaring it constant.
Anyway, I started testing if this made any performance difference that would justify the horrible readability vs the straight forward implementation.
Doing so I started wondering is this wouldn't be much better implemented in qi::symbols. So I came up with two alternative implementations:
#include <boost/smart_ptr/scoped_array.hpp>
#include <boost/accumulators/accumulators.hpp>
#include <boost/accumulators/statistics/stats.hpp>
#include <boost/accumulators/statistics/mean.hpp>
#include <boost/accumulators/statistics/moment.hpp>
#include <boost/chrono.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/qi_parse.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/vector.hpp>
#include <boost/spirit/include/karma.hpp>
#include <boost/algorithm/string.hpp>
#include <boost/assign.hpp>
#include <iostream>
#include <string>
#include <vector>
#include <random>
using namespace boost::accumulators;
typedef boost::chrono::duration<long long, boost::micro> microseconds;
namespace qi = boost::spirit::qi;
namespace karma = boost::spirit::karma;
namespace ascii = qi::ascii;
namespace phx = boost::phoenix;
const std::map<const std::string, const std::string> mime_exts = {
{ ".htm", "text/html" },
{ ".html", "text/html" },
{ ".php", "text/html" },
{ ".css", "text/css" },
{ ".js", "application/javascript" },
{ ".json", "application/json" },
{ ".xml", "application/xml" },
{ ".swf", "application/x-shockwave-flash" },
{ ".flv", "video/x-flv" },
{ ".png", "image/png" },
{ ".jpe", "image/jpeg" },
{ ".jpeg", "image/jpeg" },
{ ".jpg", "image/jpeg" },
{ ".gif", "image/gif" },
{ ".bmp", "image/bmp" },
{ ".ico", "image/vnd.microsoft.icon" },
{ ".tif", "image/tiff" },
{ ".tiff", "image/tiff" },
{ ".svg", "image/svg+xml"},
{ ".svgz", "image/svg+xml"}
};
const char *mime_literals[] = {
"text/html",
"text/css",
"text/plain",
"application/javascript",
"application/json",
"application/xml",
"application/x-shockwave-flash",
"video/x-flv",
"image/png",
"image/jpeg",
"image/gif",
"image/bmp",
"image/vnd.microsoft.icon",
"image/tiff",
"image/svg+xml"
};
template <typename Iterator>
struct mimetype_matching_parser : qi::grammar<Iterator, unsigned int()> {
mimetype_matching_parser() : mimetype_matching_parser::base_type(m_start, "mimetype_matching_parser") {
m_mime_extensions.add
(".htm", 0)
(".html", 0)
(".php", 0)
(".css", 1)
(".txt", 2)
(".js", 3)
(".json", 4)
(".xml", 5)
(".swf", 6)
(".flv", 7)
(".png", 8)
(".jpe", 9)
(".jpeg", 9)
(".jpg", 9)
(".gif", 10)
(".bmp", 11)
(".ico", 12)
(".tiff", 13)
(".tif", 13)
(".svg", 14)
(".svgz", 14)
;
using qi::no_case;
m_start %= no_case[m_mime_extensions] >> qi::eoi;
}
qi::symbols<char, unsigned int> m_mime_extensions;
qi::rule<Iterator, unsigned int()> m_start;
};
std::string mime_extension(const std::string &n_path) {
// First locate the extension itself
const std::size_t last_dot = n_path.rfind(".");
if (last_dot == std::string::npos) {
return "application/text";
}
// and now pipe the extension into a qi symbols parser.
// I don't know if this is any faster than a more trivial algorithm::ends_with
// approach but I guess it won't be any slower
const mimetype_matching_parser<std::string::const_iterator> p;
unsigned int result;
std::string::const_iterator begin = n_path.begin() + last_dot;
const std::string::const_iterator end = n_path.end();
try {
if (qi::parse(begin, end, p, result) && (begin == end)) {
return mime_literals[result];
} else {
return "application/text";
}
} catch (const std::exception &) { // asio throws on invalid parse
return "application/text";
}
}
std::string mime_extension2(const std::string &n_path) {
using boost::algorithm::iequals;
auto const ext = [&n_path] {
auto const pos = n_path.rfind(".");
if (pos == std::string::npos)
return std::string{};
return n_path.substr(pos);
}();
// const std::size_t pos = n_path.rfind(".");
// if (pos == std::string::npos) {
// return std::string{};
// }
// const std::string ext = n_path.substr(pos);
if (iequals(ext, ".htm")) return "text/html";
if (iequals(ext, ".html")) return "text/html";
if (iequals(ext, ".php")) return "text/html";
if (iequals(ext, ".css")) return "text/css";
if (iequals(ext, ".txt")) return "text/plain";
if (iequals(ext, ".js")) return "application/javascript";
if (iequals(ext, ".json")) return "application/json";
if (iequals(ext, ".xml")) return "application/xml";
if (iequals(ext, ".swf")) return "application/x-shockwave-flash";
if (iequals(ext, ".flv")) return "video/x-flv";
if (iequals(ext, ".png")) return "image/png";
if (iequals(ext, ".jpe")) return "image/jpeg";
if (iequals(ext, ".jpeg")) return "image/jpeg";
if (iequals(ext, ".jpg")) return "image/jpeg";
if (iequals(ext, ".gif")) return "image/gif";
if (iequals(ext, ".bmp")) return "image/bmp";
if (iequals(ext, ".ico")) return "image/vnd.microsoft.icon";
if (iequals(ext, ".tiff")) return "image/tiff";
if (iequals(ext, ".tif")) return "image/tiff";
if (iequals(ext, ".svg")) return "image/svg+xml";
if (iequals(ext, ".svgz")) return "image/svg+xml";
return "application/text";
}
std::string mime_extension3(const std::string &n_path) {
using boost::algorithm::iequals;
auto ext = [&n_path] {
auto const pos = n_path.rfind(".");
if (pos == std::string::npos) {
return std::string{};
} else {
return n_path.substr(pos);
}
}();
boost::algorithm::to_lower(ext);
const std::map<const std::string, const std::string>::const_iterator i = mime_exts.find(ext);
if (i != mime_exts.cend()) {
return i->second;
} else {
return "application/text";
}
}
const std::string samples[] = {
"test.txt",
"test.html",
"longer/test.tiff",
"www.webSite.de/ico.ico",
"www.websIte.de/longEr/path/ico.bmp",
"www.TEST.com/longer/path/ico.svg",
"googlecom/shoRT/path/index.HTM",
"googlecom/bild.jpg",
"WWW.FLASH.COM/app.swf",
"WWW.FLASH.COM/BILD.GIF"
};
int test_qi_impl() {
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<> dis(0, 10);
const std::string sample = samples[dis(gen)];
const std::string result = mime_extension(sample);
int ret = dis(gen);
for (const char &c : result) { ret += c; }
return ret;
}
int test_lambda_impl() {
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<> dis(0, 10);
const std::string sample = samples[dis(gen)];
const std::string result = mime_extension2(sample);
int ret = dis(gen);
for (const char &c : result) { ret += c; }
return ret;
}
int test_map_impl() {
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<> dis(0, 10);
const std::string sample = samples[dis(gen)];
const std::string result = mime_extension3(sample);
int ret = dis(gen);
for (const char &c : result) { ret += c; }
return ret;
}
int main(int argc, char **argv) {
const unsigned int loops = 100000;
accumulator_set<boost::chrono::high_resolution_clock::duration, features<tag::mean> > times_qi;
accumulator_set<boost::chrono::high_resolution_clock::duration, features<tag::mean> > times_lambda;
accumulator_set<boost::chrono::high_resolution_clock::duration, features<tag::mean> > times_map;
std::cout << "Measure execution times for " << loops << " lambda runs" << std::endl;
for (unsigned int i = 0; i < loops; i++) {
boost::chrono::high_resolution_clock::time_point start = boost::chrono::high_resolution_clock::now();
test_lambda_impl();
boost::chrono::high_resolution_clock::time_point end = boost::chrono::high_resolution_clock::now();
times_lambda(end - start);
}
std::cout << "Measure execution times for " << loops << " qi runs" << std::endl;
for (unsigned int i = 0; i < loops; i++) {
boost::chrono::high_resolution_clock::time_point start = boost::chrono::high_resolution_clock::now();
test_qi_impl();
boost::chrono::high_resolution_clock::time_point end = boost::chrono::high_resolution_clock::now();
times_qi(end - start);
}
std::cout << "Measure execution times for " << loops << " map runs" << std::endl;
for (unsigned int i = 0; i < loops; i++) {
boost::chrono::high_resolution_clock::time_point start = boost::chrono::high_resolution_clock::now();
test_map_impl();
boost::chrono::high_resolution_clock::time_point end = boost::chrono::high_resolution_clock::now();
times_map(end - start);
}
std::cout << "Lambda runs took " << mean(times_lambda) << std::endl;
std::cout << "Qi runs took " << mean(times_qi) << std::endl;
std::cout << "Map runs took " << mean(times_map) << std::endl;
return EXIT_SUCCESS;
}
To my surprise, the lambda does matter (a little bit). What surprised me even more was that the qi implementation is much slower.
Measure execution times for 100000 lambda runs
Measure execution times for 100000 qi runs
Measure execution times for 100000 map runs
Lambda runs took 12443 nanoseconds
Qi runs took 15311 nanoseconds
Map runs took 10466 nanoseconds
Attempted Optimization #1
First, I was using symbols like this
template <typename Iterator>
struct mimetype_matching_parser : qi::grammar<Iterator, std::string()> {
mimetype_matching_parser() : mimetype_matching_parser::base_type(m_start,
"mimetype_matching_parser") {
m_mime_extensions.add
(".htm", "text/html")
(".html", "text/html")
(".php", "text/html")
(".css", "text/css")
(".svg", "whatever...")
;
using qi::no_case;
m_start %= no_case[m_mime_extensions] >> qi::eoi;
}
qi::symbols<char, std::string> m_mime_extensions;
qi::rule<Iterator, std::string()> m_start;
};
That returned the string directly as attribute. A colleague pointed out that this is one extra std::string copy so I changed it so it only returns an index to a static char array:
const char *mime_literals[] = {
"text/html",
"text/css",
"text/plain",
// ... and so forth
};
template <typename Iterator>
struct mimetype_matching_parser : qi::grammar<Iterator, unsigned int()> {
mimetype_matching_parser() : mimetype_matching_parser::base_type(m_start, "mimetype_matching_parser")
{
m_mime_extensions.add
(".htm",0)
(".html",0)
(".php",0)
(".css",1)
(".svg",... etc.
;
using qi::no_case;
m_start %= no_case[m_mime_extensions] >> qi::eoi;
}
qi::symbols<char, unsigned int> m_mime_extensions;
qi::rule<Iterator, unsigned int()> m_start;
};
This was a little bit faster but not really worth mentioning.
On my laptop, in Release mode, I am getting: - Beast Tutorial (Lambda) implementation averages at 6200 nanoseconds a run. - Qi implementation averages at about 7100 nanoseconds.
Now, that would be my first question: Why is that?
The 'beast' implementation strikes me as very inefficient as it goes through all the substrings, calls iequals every time, while it would be able to cache the lowercase.
I thought surely qi would do a binary search upon the keywords I added to a symbols parser. But is doesn't look like it does.
Attempted Optimization #2
So I came up with my own, also trivial implementation and use a static map with a cached lowercase temporary (see impl3 in attached source).
Test yielded:
- Static Map implementation averages at about 2400 nanoseconds a run
Sooo, I guess the question is why?
Am I misusing qi::symbols
somehow? Does it actually do a binary search but
the performance is lost some other place?
Stephan¹
(I am on windows MSVC14 64 bits with boost 1.66)
(¹ this question was paraphrased from the Spirit General mailing list, where it was posted 20180112T14:15CET; the online archives seem sadly broken)