Qi Symbols slow performance?

2020-07-14 06:07发布

问题:

I wanted to raise a subject that just sent me down a rabbit hole and brought up a question about qi::symbols.

It all started while I was looking into the new beast library and read a tutorial example

It starts with a function that guesses mime types from http path extensions. I started to look more closely and saw this:

 auto const ext = [&path]
    {
        auto const pos = path.rfind(".");
        if(pos == boost::beast::string_view::npos)
            return boost::beast::string_view{};
        return path.substr(pos);
    }();

It took me a while to figure out it was a IIFE in C++ style, and was used to initialize ext while declaring it constant.

Anyway, I started testing if this made any performance difference that would justify the horrible readability vs the straight forward implementation.

Doing so I started wondering is this wouldn't be much better implemented in qi::symbols. So I came up with two alternative implementations:

#include <boost/smart_ptr/scoped_array.hpp>
#include <boost/accumulators/accumulators.hpp>
#include <boost/accumulators/statistics/stats.hpp>
#include <boost/accumulators/statistics/mean.hpp>
#include <boost/accumulators/statistics/moment.hpp>
#include <boost/chrono.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/qi_parse.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/vector.hpp>
#include <boost/spirit/include/karma.hpp>
#include <boost/algorithm/string.hpp>
#include <boost/assign.hpp>

#include <iostream>
#include <string>
#include <vector>
#include <random>

using namespace boost::accumulators;
typedef boost::chrono::duration<long long, boost::micro> microseconds;
namespace qi = boost::spirit::qi;
namespace karma = boost::spirit::karma;
namespace ascii = qi::ascii;
namespace phx = boost::phoenix;

const std::map<const std::string, const std::string> mime_exts = {
    { ".htm",  "text/html" },
    { ".html", "text/html" },
    { ".php",  "text/html" },
    { ".css",  "text/css"  },
    { ".js",   "application/javascript" },
    { ".json", "application/json" },
    { ".xml",  "application/xml" },
    { ".swf",  "application/x-shockwave-flash" },
    { ".flv",  "video/x-flv" },
    { ".png",  "image/png" },
    { ".jpe",  "image/jpeg" },
    { ".jpeg", "image/jpeg" },
    { ".jpg",  "image/jpeg" },
    { ".gif",  "image/gif" },
    { ".bmp",  "image/bmp" },
    { ".ico",  "image/vnd.microsoft.icon" },
    { ".tif",  "image/tiff" },
    { ".tiff", "image/tiff" },
    { ".svg",  "image/svg+xml"},
    { ".svgz", "image/svg+xml"}
};


const char *mime_literals[] = {
    "text/html",
    "text/css",
    "text/plain",
    "application/javascript",
    "application/json",
    "application/xml",
    "application/x-shockwave-flash",
    "video/x-flv",
    "image/png",
    "image/jpeg",
    "image/gif",
    "image/bmp",
    "image/vnd.microsoft.icon",
    "image/tiff",
    "image/svg+xml"
};

template <typename Iterator>
struct mimetype_matching_parser : qi::grammar<Iterator, unsigned int()> {

    mimetype_matching_parser() : mimetype_matching_parser::base_type(m_start, "mimetype_matching_parser") {

        m_mime_extensions.add
            (".htm", 0)
            (".html", 0)
            (".php", 0)
            (".css", 1)
            (".txt", 2)
            (".js", 3)
            (".json", 4)
            (".xml", 5)
            (".swf", 6)
            (".flv", 7)
            (".png", 8)
            (".jpe", 9)
            (".jpeg", 9)
            (".jpg", 9)
            (".gif", 10)
            (".bmp", 11)
            (".ico", 12)
            (".tiff", 13)
            (".tif", 13)
            (".svg", 14)
            (".svgz", 14)
            ;

        using qi::no_case;

        m_start %= no_case[m_mime_extensions] >> qi::eoi;
    }

    qi::symbols<char, unsigned int>   m_mime_extensions;
    qi::rule<Iterator, unsigned int()> m_start;
};

std::string mime_extension(const std::string &n_path) {

    // First locate the extension itself
    const std::size_t last_dot = n_path.rfind(".");
    if (last_dot == std::string::npos) {
        return "application/text";
    }

    // and now pipe the extension into a qi symbols parser.
    // I don't know if this is any faster than a more trivial algorithm::ends_with
    // approach but I guess it won't be any slower
    const mimetype_matching_parser<std::string::const_iterator> p;
    unsigned int                        result;
    std::string::const_iterator         begin = n_path.begin() + last_dot;
    const std::string::const_iterator   end = n_path.end();

    try {
        if (qi::parse(begin, end, p, result) && (begin == end)) {
            return mime_literals[result];
        } else {
            return "application/text";
        }
    } catch (const std::exception &) {  // asio throws on invalid parse
        return "application/text";
    }
}

std::string mime_extension2(const std::string &n_path) {

    using boost::algorithm::iequals;

    auto const ext = [&n_path] {
        auto const pos = n_path.rfind(".");
        if (pos == std::string::npos)
            return std::string{};
        return n_path.substr(pos);
    }();

    //  const std::size_t pos = n_path.rfind(".");
    //  if (pos == std::string::npos) {
    //      return std::string{};
    //  }
    //  const std::string ext = n_path.substr(pos);

    if (iequals(ext, ".htm"))  return "text/html";
    if (iequals(ext, ".html")) return "text/html";
    if (iequals(ext, ".php"))  return "text/html";
    if (iequals(ext, ".css"))  return "text/css";
    if (iequals(ext, ".txt"))  return "text/plain";
    if (iequals(ext, ".js"))   return "application/javascript";
    if (iequals(ext, ".json")) return "application/json";
    if (iequals(ext, ".xml"))  return "application/xml";
    if (iequals(ext, ".swf"))  return "application/x-shockwave-flash";
    if (iequals(ext, ".flv"))  return "video/x-flv";
    if (iequals(ext, ".png"))  return "image/png";
    if (iequals(ext, ".jpe"))  return "image/jpeg";
    if (iequals(ext, ".jpeg")) return "image/jpeg";
    if (iequals(ext, ".jpg"))  return "image/jpeg";
    if (iequals(ext, ".gif"))  return "image/gif";
    if (iequals(ext, ".bmp"))  return "image/bmp";
    if (iequals(ext, ".ico"))  return "image/vnd.microsoft.icon";
    if (iequals(ext, ".tiff")) return "image/tiff";
    if (iequals(ext, ".tif"))  return "image/tiff";
    if (iequals(ext, ".svg"))  return "image/svg+xml";
    if (iequals(ext, ".svgz")) return "image/svg+xml";
    return "application/text";
}

std::string mime_extension3(const std::string &n_path) {

    using boost::algorithm::iequals;

    auto ext = [&n_path] {
        auto const pos = n_path.rfind(".");
        if (pos == std::string::npos) {
            return std::string{};
        } else {
            return n_path.substr(pos);
        }
    }();

    boost::algorithm::to_lower(ext);

    const std::map<const std::string, const std::string>::const_iterator i = mime_exts.find(ext);

    if (i != mime_exts.cend()) {
        return i->second;
    } else {
        return "application/text";
    }
}

const std::string samples[] = {
    "test.txt",
    "test.html",
    "longer/test.tiff",
    "www.webSite.de/ico.ico",
    "www.websIte.de/longEr/path/ico.bmp",
    "www.TEST.com/longer/path/ico.svg",
    "googlecom/shoRT/path/index.HTM",
    "googlecom/bild.jpg",
    "WWW.FLASH.COM/app.swf",
    "WWW.FLASH.COM/BILD.GIF"
};

int test_qi_impl() {

    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_int_distribution<> dis(0, 10);

    const std::string sample = samples[dis(gen)];
    const std::string result = mime_extension(sample);

    int ret = dis(gen);
    for (const char &c : result) { ret += c; }
    return ret;
}

int test_lambda_impl() {

    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_int_distribution<> dis(0, 10);

    const std::string sample = samples[dis(gen)];
    const std::string result = mime_extension2(sample);

    int ret = dis(gen);
    for (const char &c : result) { ret += c; }
    return ret;
}

int test_map_impl() {

    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_int_distribution<> dis(0, 10);

    const std::string sample = samples[dis(gen)];
    const std::string result = mime_extension3(sample);

    int ret = dis(gen);
    for (const char &c : result) { ret += c; }
    return ret;
}

int main(int argc, char **argv) {

    const unsigned int loops = 100000;

    accumulator_set<boost::chrono::high_resolution_clock::duration, features<tag::mean> > times_qi;
    accumulator_set<boost::chrono::high_resolution_clock::duration, features<tag::mean> > times_lambda;
    accumulator_set<boost::chrono::high_resolution_clock::duration, features<tag::mean> > times_map;

    std::cout << "Measure execution times for " << loops << " lambda runs" << std::endl;
    for (unsigned int i = 0; i < loops; i++) {
        boost::chrono::high_resolution_clock::time_point start = boost::chrono::high_resolution_clock::now();
        test_lambda_impl();
        boost::chrono::high_resolution_clock::time_point end = boost::chrono::high_resolution_clock::now();
        times_lambda(end - start);
    }

    std::cout << "Measure execution times for " << loops << " qi runs" << std::endl;
    for (unsigned int i = 0; i < loops; i++) {
        boost::chrono::high_resolution_clock::time_point start = boost::chrono::high_resolution_clock::now();
        test_qi_impl();
        boost::chrono::high_resolution_clock::time_point end = boost::chrono::high_resolution_clock::now();
        times_qi(end - start);
    }

    std::cout << "Measure execution times for " << loops << " map runs" << std::endl;
    for (unsigned int i = 0; i < loops; i++) {
        boost::chrono::high_resolution_clock::time_point start = boost::chrono::high_resolution_clock::now();
        test_map_impl();
        boost::chrono::high_resolution_clock::time_point end = boost::chrono::high_resolution_clock::now();
        times_map(end - start);
    }

    std::cout << "Lambda runs took " << mean(times_lambda) << std::endl;
    std::cout << "Qi runs took " << mean(times_qi) << std::endl;
    std::cout << "Map runs took " << mean(times_map) << std::endl;
    return EXIT_SUCCESS;
}

To my surprise, the lambda does matter (a little bit). What surprised me even more was that the qi implementation is much slower.

Measure execution times for 100000 lambda runs
Measure execution times for 100000 qi runs
Measure execution times for 100000 map runs
Lambda runs took 12443 nanoseconds
Qi runs took 15311 nanoseconds
Map runs took 10466 nanoseconds

Attempted Optimization #1

First, I was using symbols like this

template <typename Iterator>
struct mimetype_matching_parser : qi::grammar<Iterator, std::string()> {

    mimetype_matching_parser() : mimetype_matching_parser::base_type(m_start,
            "mimetype_matching_parser") {

        m_mime_extensions.add
            (".htm", "text/html")
            (".html",  "text/html")
            (".php",  "text/html")
            (".css",  "text/css")
            (".svg",  "whatever...")
            ;

        using qi::no_case;

        m_start %= no_case[m_mime_extensions] >> qi::eoi;
    }

    qi::symbols<char, std::string>   m_mime_extensions;
    qi::rule<Iterator, std::string()> m_start;
};

That returned the string directly as attribute. A colleague pointed out that this is one extra std::string copy so I changed it so it only returns an index to a static char array:

const char *mime_literals[] = {
    "text/html",
    "text/css",
    "text/plain",
    // ... and so forth
};

template <typename Iterator>
struct mimetype_matching_parser : qi::grammar<Iterator, unsigned int()> {

    mimetype_matching_parser() : mimetype_matching_parser::base_type(m_start, "mimetype_matching_parser")
    {
        m_mime_extensions.add
            (".htm",0)
            (".html",0)
            (".php",0)
            (".css",1)
            (".svg",... etc.
            ;

        using qi::no_case;

        m_start %= no_case[m_mime_extensions] >> qi::eoi;
    }

    qi::symbols<char, unsigned int>   m_mime_extensions;
    qi::rule<Iterator, unsigned int()> m_start;
};

This was a little bit faster but not really worth mentioning.

On my laptop, in Release mode, I am getting: - Beast Tutorial (Lambda) implementation averages at 6200 nanoseconds a run. - Qi implementation averages at about 7100 nanoseconds.

Now, that would be my first question: Why is that?

The 'beast' implementation strikes me as very inefficient as it goes through all the substrings, calls iequals every time, while it would be able to cache the lowercase.

I thought surely qi would do a binary search upon the keywords I added to a symbols parser. But is doesn't look like it does.

Attempted Optimization #2

So I came up with my own, also trivial implementation and use a static map with a cached lowercase temporary (see impl3 in attached source).

Test yielded:

  • Static Map implementation averages at about 2400 nanoseconds a run

Sooo, I guess the question is why?

Am I misusing qi::symbols somehow? Does it actually do a binary search but the performance is lost some other place?

Stephan¹

(I am on windows MSVC14 64 bits with boost 1.66)


(¹ this question was paraphrased from the Spirit General mailing list, where it was posted 20180112T14:15CET; the online archives seem sadly broken)

回答1:

On 12-01-18 14:15, Stephan Menzel wrote:

So I came up with two different implementations. Please find the source attached.

I've looked at it. A few superficial observations first:

  1. You're comparing apples and pears, since Beast uses zero-copy string-views, where Qi doesn't.

  2. Also, the sample selection invokes UB because uniform_int_distribution(0,10) is out of range for the sample array (should be (0, 9)).

  3. Lastly, the map approach didn't have a mapping for the .txt extension.

With these out of the way I simplified/structured the test program to the following:

Live On Coliru

Prints the following on my system:

Lambda runs took 2319 nanoseconds
Qi     runs took 2841 nanoseconds
Map    runs took 193 nanoseconds

Now, the biggest culprit is (obviously?) that you're constructing the grammar each time through the loop (compiling the rules). Of course, there's no need. Removing that yields:

Live On Coliru

Lambda runs took 2676 nanoseconds
Qi     runs took 98 nanoseconds
Map    runs took 189 nanoseconds

That's already faster, even though you're still copying strings when there's no actual need for it. Using the inspiration from the answer linked above, I'd probably write it like:

#include <boost/spirit/include/qi.hpp>
namespace qi_impl {
    namespace qi = boost::spirit::qi;

    struct mimetype_symbols_type : qi::symbols<char, char const*> {
        mimetype_symbols_type() {
            auto rev = [](string_view s) -> std::string { return { s.rbegin(), s.rend() }; };

            this->add
                (rev(".htm"),  "text/html")
                (rev(".html"), "text/html")
                (rev(".php"),  "text/html")
                (rev(".css"),  "text/css")
                (rev(".txt"),  "text/plain")
                (rev(".js"),   "application/javascript")
                (rev(".json"), "application/json")
                (rev(".xml"),  "application/xml")
                (rev(".swf"),  "application/x-shockwave-flash")
                (rev(".flv"),  "video/x-flv")
                (rev(".png"),  "image/png")
                (rev(".jpe"),  "image/jpeg")
                (rev(".jpeg"), "image/jpeg")
                (rev(".jpg"),  "image/jpeg")
                (rev(".gif"),  "image/gif")
                (rev(".bmp"),  "image/bmp")
                (rev(".ico"),  "image/vnd.microsoft.icon")
                (rev(".tiff"), "image/tiff")
                (rev(".tif"),  "image/tiff")
                (rev(".svg"),  "image/svg+xml")
                (rev(".svgz"), "image/svg+xml")
                ;
        }
    } static const mime_symbols;

    char const* using_spirit(const string_view &n_path) {
        char const* result = "application/text";
        qi::parse(n_path.crbegin(), n_path.crend(), qi::no_case[mime_symbols], result);
        return result;
    }
}

There is no more no need to muck with finding "the last dot" in the first place, no need to "check for the match being at the end", and you get the value directly from the symbols. You are free to assign to a string_view or a std::string as desired.

Full Listing

Using string_views (both std::string_view and boost::string_view supported/shown) throughout.

Note also this shows a custom comparator being used on the map<> approach, just to prove that indeed there's a benefit from knowing that the map keys are all lower-case. (It's not, in fact, because it "cached the lowercase" since it's only used once!)

Live On Coliru

#include <boost/chrono.hpp>
#include <string>

#ifdef BOOST_STRING_VIEW
    #include <boost/utility/string_view.hpp>
    using string_view = boost::string_view;
#else
    #include <string_view>
    using string_view = std::string_view;
#endif

static auto constexpr npos = string_view::npos;

#include <boost/spirit/include/qi.hpp>
namespace qi_impl {
    namespace qi = boost::spirit::qi;

    struct mimetype_symbols_type : qi::symbols<char, char const*> {
        mimetype_symbols_type() {
            auto rev = [](string_view s) -> std::string { return { s.rbegin(), s.rend() }; };

            this->add
                (rev(".htm"),  "text/html")
                (rev(".html"), "text/html")
                (rev(".php"),  "text/html")
                (rev(".css"),  "text/css")
                (rev(".txt"),  "text/plain")
                (rev(".js"),   "application/javascript")
                (rev(".json"), "application/json")
                (rev(".xml"),  "application/xml")
                (rev(".swf"),  "application/x-shockwave-flash")
                (rev(".flv"),  "video/x-flv")
                (rev(".png"),  "image/png")
                (rev(".jpe"),  "image/jpeg")
                (rev(".jpeg"), "image/jpeg")
                (rev(".jpg"),  "image/jpeg")
                (rev(".gif"),  "image/gif")
                (rev(".bmp"),  "image/bmp")
                (rev(".ico"),  "image/vnd.microsoft.icon")
                (rev(".tiff"), "image/tiff")
                (rev(".tif"),  "image/tiff")
                (rev(".svg"),  "image/svg+xml")
                (rev(".svgz"), "image/svg+xml")
                ;
        }
    } static const mime_symbols;

    char const* using_spirit(const string_view &n_path) {
        char const* result = "application/text";
        qi::parse(n_path.crbegin(), n_path.crend(), qi::no_case[mime_symbols], result);
        return result;
    }
}

#include <boost/algorithm/string.hpp>
namespace impl {
    string_view using_iequals(const string_view &n_path) {

        using boost::algorithm::iequals;

        auto const ext = [&n_path] {
            auto pos = n_path.rfind(".");
            return pos != npos? n_path.substr(pos) : string_view {};
        }();

        if (iequals(ext, ".htm"))  return "text/html";
        if (iequals(ext, ".html")) return "text/html";
        if (iequals(ext, ".php"))  return "text/html";
        if (iequals(ext, ".css"))  return "text/css";
        if (iequals(ext, ".txt"))  return "text/plain";
        if (iequals(ext, ".js"))   return "application/javascript";
        if (iequals(ext, ".json")) return "application/json";
        if (iequals(ext, ".xml"))  return "application/xml";
        if (iequals(ext, ".swf"))  return "application/x-shockwave-flash";
        if (iequals(ext, ".flv"))  return "video/x-flv";
        if (iequals(ext, ".png"))  return "image/png";
        if (iequals(ext, ".jpe"))  return "image/jpeg";
        if (iequals(ext, ".jpeg")) return "image/jpeg";
        if (iequals(ext, ".jpg"))  return "image/jpeg";
        if (iequals(ext, ".gif"))  return "image/gif";
        if (iequals(ext, ".bmp"))  return "image/bmp";
        if (iequals(ext, ".ico"))  return "image/vnd.microsoft.icon";
        if (iequals(ext, ".tiff")) return "image/tiff";
        if (iequals(ext, ".tif"))  return "image/tiff";
        if (iequals(ext, ".svg"))  return "image/svg+xml";
        if (iequals(ext, ".svgz")) return "image/svg+xml";
        return "application/text";
    }
}

#include <boost/algorithm/string.hpp>
#include <map>

namespace impl {
    struct CiCmp {
        template <typename R1, typename R2>
        bool operator()(R1 const& a, R2 const& b) const {
            return boost::algorithm::ilexicographical_compare(a, b);
        }
    };

    static const std::map<string_view, string_view, CiCmp> s_mime_exts_map  {
        { ".txt", "text/plain" },
        { ".htm",  "text/html" },
        { ".html", "text/html" },
        { ".php",  "text/html" },
        { ".css",  "text/css"  },
        { ".js",   "application/javascript" },
        { ".json", "application/json" },
        { ".xml",  "application/xml" },
        { ".swf",  "application/x-shockwave-flash" },
        { ".flv",  "video/x-flv" },
        { ".png",  "image/png" },
        { ".jpe",  "image/jpeg" },
        { ".jpeg", "image/jpeg" },
        { ".jpg",  "image/jpeg" },
        { ".gif",  "image/gif" },
        { ".bmp",  "image/bmp" },
        { ".ico",  "image/vnd.microsoft.icon" },
        { ".tif",  "image/tiff" },
        { ".tiff", "image/tiff" },
        { ".svg",  "image/svg+xml"},
        { ".svgz", "image/svg+xml"},
    };

    string_view using_map(const string_view& n_path) {
        auto const ext = [](string_view n_path) {
            auto pos = n_path.rfind(".");
            return pos != npos? n_path.substr(pos) : string_view {};
        };

        auto i = s_mime_exts_map.find(ext(n_path));

        if (i != s_mime_exts_map.cend()) {
            return i->second;
        } else {
            return "application/text";
        }
    }
}

#include <random>
namespace samples {

    static string_view const s_samples[] = {
    "test.txt",
    "test.html",
    "longer/test.tiff",
    "www.webSite.de/ico.ico",
    "www.websIte.de/longEr/path/ico.bmp",
    "www.TEST.com/longer/path/ico.svg",
    "googlecom/shoRT/path/index.HTM",
    "googlecom/bild.jpg",
    "WWW.FLASH.COM/app.swf",
    "WWW.FLASH.COM/BILD.GIF"
    };

    std::mt19937 s_random_generator(std::random_device{}());
    std::uniform_int_distribution<> s_dis(0, boost::size(s_samples) - 1);

    string_view random_sample() {
        return s_samples[s_dis(s_random_generator)];
    }
}

#include <boost/functional/hash.hpp>
#include <iostream>
template <typename F>
int generic_test(F f) {
    auto sample = samples::random_sample();
    string_view result = f(sample);

    //std::cout << "DEBUG " << sample << " -> " << result << "\n";

    return boost::hash_range(result.begin(), result.end());
}

#include <boost/serialization/array_wrapper.hpp> // missing include in boost version on coliru
#include <boost/accumulators/accumulators.hpp>
#include <boost/accumulators/statistics.hpp>

template <typename F>
auto benchmark(F f) {
    using C = boost::chrono::high_resolution_clock;
    using duration = C::duration;

    const unsigned int loops = 100000;

    namespace ba = boost::accumulators;
    ba::accumulator_set<duration, ba::features<ba::tag::mean>> times;

    for (unsigned int i = 0; i < loops; i++) {
        auto start = C::now();
        generic_test(f);
        times(C::now() - start);
    }

    return ba::mean(times);
}


int main() {
    std::cout << std::unitbuf;
    std::cout << "Lambda runs took " << benchmark(impl::using_iequals)   << std::endl;
    std::cout << "Qi     runs took " << benchmark(qi_impl::using_spirit) << std::endl;
    std::cout << "Map    runs took " << benchmark(impl::using_map)       << std::endl;
}

Prints

Lambda runs took 2470 nanoseconds
Qi     runs took 119 nanoseconds
Map    runs took 2239 nanoseconds // see Note above