Several matches in a one pass parser?

2019-02-28 03:48发布

I am trying (yet) to populate several vectors with data parsed from a log. The key is do it as fast and efficient as possible, so I would like to collect all the data in only one pass (not "or" between rules).

I have found the next problems:

1) Every time I use spirit and it does not work as expected I find myself totally loss and trying test and error for two hours. Is there any debug directive that gives some hint about what has gone wrong?

2) Is it valid the way I use phoenix construct? I mean, can it be used as I have done in code for avoiding using a symbol table?

3) Is there any way of getting info for a rule and using it into another rule? I have tried with phoenix::ref but it confuses data when using combined with BOOST_FUSION_ADAPT_STRUCT.

4) Am I making a deep error using code as this? I mean, should I use a grammar wrapping this with auto rules or just simplify using two rules one for "location" and other for "location + event" and then using phoenix?

#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/repository/include/qi_seek.hpp>
#include <boost/phoenix/phoenix.hpp>
#include <cstring> // strlen

typedef char const* It;
enum kind { SLOPE, GEAR };

struct Location {
    int driver;
    double time;
    double vel;
    double km;
    std::string date;
    std::string road;
};

struct Event {
    int event;
    double time;
    double value;
};

BOOST_FUSION_ADAPT_STRUCT(Location, date, time, driver, vel, road, km)
BOOST_FUSION_ADAPT_STRUCT(Event, event, value)//Same "time" as its previous "Location" header. Please do not adapt "time" unless necesssary.


//They shall be defined in another compilation unit and defined as extern in production code. Please do not insert within dispatcher struct.
std::vector<Location> container1;
std::vector<Event> container2;

struct dispatcher
{
    static void add(const Location& loc) { container1.push_back(loc); }
    static void add(const Event& ev)     { container2.push_back(ev);  }
};

namespace qi = boost::spirit::qi;
namespace px = boost::phoenix;

namespace boost { namespace spirit { namespace traits
{
    template <> struct is_container<dispatcher> : std::true_type { };

    template <> struct container_value<dispatcher>
    {
        typedef boost::variant<Location, Event> type;
    };

    template <typename T> struct push_back_container<dispatcher, T>
    {
        struct Visitor
        {
            typedef void result_type;
            template <typename U> void operator()(U const& ev) const { dispatcher::add(ev); }
        };

        static bool call(dispatcher& log, T const& attribute)
        {
            boost::apply_visitor(Visitor(), attribute);
            return true;
        }
    };
} } }

void parse_test_1(It b, It e) {
    using namespace qi;

    auto date = copy(
        repeat(4)[digit] >> '-' >> repeat(3)[alpha] >> '-' >> repeat(2)[digit] >> ' ' >> 
        repeat(2)[digit] >> ':' >> repeat(2)[digit] >> ':' >> repeat(2)[digit] >> '.' >> +digit);

    qi::rule<It, Event()> slope = lit(" - SLOPE: ")[px::construct<int>(kind::SLOPE)] >> double_;
    qi::rule<It, Event()> gear = lit(" - GEAR: ")[px::construct<int>(kind::GEAR)] >> double_;

    qi::rule<It, Location()> line = '[' >> raw[date] >> "] - "
        >> double_ >> " s"
        >> " => Driver: "  >> int_
        >> " - Speed: "    >> double_
        >> " - Road: "     >> raw[+graph]
        >> " - Km: "       >> double_
        >> -(slope | gear)
        >> (eol | eoi);

    parse(b, e, *boost::spirit::repository::qi::seek[line], dispatcher());
}

void parse_test_2(It b, It e) {
    using namespace qi;

    double t = 0;
    auto date = copy(
        repeat(4)[digit] >> '-' >> repeat(3)[alpha] >> '-' >> repeat(2)[digit] >> ' ' >> 
        repeat(2)[digit] >> ':' >> repeat(2)[digit] >> ':' >> repeat(2)[digit] >> '.' >> +digit);

    qi::rule<It, Event()> slope = lit(" - SLOPE: ")[px::construct<int>(kind::SLOPE)] >> double_;
    qi::rule<It, Event()> gear = lit(" - GEAR: ")[px::construct<int>(kind::GEAR)] >> double_;

    qi::rule<It, Location()> line = '[' >> raw[date] >> "] - "
        >> double_ >> " s"
        >> " => Driver: "  >> int_
        >> " - Speed: "    >> double_
        >> " - Road: "     >> raw[+graph]
        >> " - Km: "       >> double_
        >> -(slope | gear)
        >> (eol | eoi);

    parse(b, e, *line, dispatcher());
}

//Not all the lines will match the parser!
static char input1[] = 
"[2018-Mar-13 13:13:59.580482] - 0.200 s => Driver: 0 - Speed: 0.0 - Road: A-11 - Km: 90.0 - SLOPE: 5.5\n\
[2018-Mar-13 13:14:01.170203] - 1.790 s => Driver: 0 - Speed: 0.0 - Road: A-11 - Km: 90.0 - GEAR: 1\n\
[2018-Mar-13 13:14:01.170203] - 1.790 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90.0\n\
[2018-Mar-13 13:14:01.170203] - 1.790 s => I do not care about this line\n\
[2018-Mar-13 13:14:01.819966] - 2.440 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90.0\n\
[2018-Mar-13 13:14:01.170203] - 2.440 s => Neither I do about this other line\n\
[2018-Mar-13 13:15:01.819966] - 3.440 s => Driver: 0 - Speed: 0.2 - Road: A-11 - Km: 90.0 - SLOPE: 10\n";

static const size_t len1 = strlen(input1);

//All the lines shall match the parser!
static char input2[] = 
"[2018-Mar-13 13:13:59.580482] - 0.200 s => Driver: 0 - Speed: 0.0 - Road: A-11 - Km: 90.0 - SLOPE: 5.5\n\
[2018-Mar-13 13:14:01.170203] - 1.790 s => Driver: 0 - Speed: 0.0 - Road: A-11 - Km: 90.0 - GEAR: 1\n\
[2018-Mar-13 13:14:01.170203] - 1.790 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90.0\n\
[2018-Mar-13 13:14:01.819966] - 2.440 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90.0\n\
[2018-Mar-13 13:15:01.819966] - 3.440 s => Driver: 0 - Speed: 0.2 - Road: A-11 - Km: 90.0 - SLOPE: 10\n";

static const size_t len2 = strlen(input2);

int main()
{
    parse_test_1(input1, input1+len1);
    std::cout << "TEST 1:\n";
    std::cout << "Locations:\n";
    std::for_each(std::begin(container1), std::end(container1), [](const Location& loc)
    {
        std::cout << "[" << loc.date << "] - " << loc.time << " s => Driver: " << loc.driver << " - Speed: " << loc.vel << " - Road: " << loc.road << " - Km: " << loc.km << std::endl;
    });

    std::cout << "Events:\n";
    std::for_each(std::begin(container2), std::end(container2), [](const Event& ev)
    {
        std::cout << ev.time << " s => EVENT(" << ev.event << ") : " << ev.value << std::endl;
    });

    container1.clear();
    container2.clear();

    parse_test_2(input2, input2+len2);
    std::cout << "\nTEST 2:\n";
    std::cout << "Locations:\n";
    std::for_each(std::begin(container1), std::end(container1), [](const Location& loc)
    {
        std::cout << "[" << loc.date << "] - " << loc.time << " s => Driver: " << loc.driver << " - Speed: " << loc.vel << " - Road: " << loc.road << " - Km: " << loc.km << std::endl;
    });

    std::cout << "Events:\n";
    std::for_each(std::begin(container2), std::end(container2), [](const Event& ev)
    {
        std::cout << ev.time << " s => EVENT(" << ev.event << ") : " << ev.value << std::endl;
    });

    return 0;
}

RESULT: The expected result should be this one:

TEST 1:
Locations:
[2018-Mar-13 13:13:59.580482] - 0.2 s => Driver: 0 - Speed: 0 - Road: A-11 - Km: 90
[2018-Mar-13 13:14:01.170203] - 1.79 s => Driver: 0 - Speed: 0 - Road: A-11 - Km: 90
[2018-Mar-13 13:14:01.170203] - 1.79 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90
[2018-Mar-13 13:14:01.819966] - 2.44 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90
[2018-Mar-13 13:15:01.819966] - 3.44 s => Driver: 0 - Speed: 0.2 - Road: A-11 - Km: 90
Events:
0.2 s => EVENT(0): 5.5
1.79 s => EVENT(1): 1
3.44 s => EVENT(0): 10

TEST 2:
Locations:
[2018-Mar-13 13:13:59.580482] - 0.2 s => Driver: 0 - Speed: 0 - Road: A-11 - Km: 90
[2018-Mar-13 13:14:01.170203] - 1.79 s => Driver: 0 - Speed: 0 - Road: A-11 - Km: 90
[2018-Mar-13 13:14:01.170203] - 1.79 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90
[2018-Mar-13 13:14:01.819966] - 2.44 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90
[2018-Mar-13 13:15:01.819966] - 3.44 s => Driver: 0 - Speed: 0.2 - Road: A-11 - Km: 90
Events:
0.2 s => EVENT(0): 5.5
1.79 s => EVENT(1): 1
3.44 s => EVENT(0): 10

1条回答
狗以群分
2楼-- · 2019-02-28 04:12

First off: I gave you all of that in that answer, under "Separate vectors with a trait". The only difference appears to be the types and the fact that you made LogEvents members global variables (ick).

On to your question code:

 parse(b, e, *boost::spirit::repository::qi::seek[line], dispatcher());

Why are you passing the dispatcher there? Dispatcher is not a compatible attribute (in fact has only static non-data members).

So, let's fix it back to a sane data structure (instead of global variables):

struct ParsedData
{
    std::vector<Location> _locations;
    std::vector<Event> _events;
    void add(const Location& loc) { _locations.push_back(loc); }
    void add(const Event& ev)     { _events.push_back(ev);  }
};

Note that the containers aren't global any more and they have proper names.

The boost::spirit::traits specializations are the same (mutatis mutandis) except that we now have a data instance, so we bind it (again, as in the original example linked above, line 52, so let's fix the usage:

ParsedData data;
parse(b, e, *boost::spirit::repository::qi::seek[line], data);
return data;

From here, it all worked.

Further Cleanup And Demo

Notes:

  • there is no reason to use raw char arrays and strlen in C++ (I used std::string)
  • there is no reason to duplicate all the code and name everything _1 or _2. I made main:

    int main() {
        do_test("TEST 1", input1, parse_test_1);
        do_test("TEST 2", input2, parse_test_2);
    }
    
  • there is no reason to use for_each with a lambda where a ranged-for would suffice. This is do_test:

    void do_test(std::string caption, std::string const& input, ParsedData(*f)(It,It)) {
        ParsedData const data = f(input.begin(), input.end());
        std::cout << caption << ":\n";
        std::cout << "Locations:\n";
        for (Location const& loc : data._locations) {
            std::cout << "[" << loc.date << "] - " << loc.time << " s => Driver: " << loc.driver << " - Speed: " << loc.vel << " - Road: " << loc.road << " - Km: " << loc.km << std::endl;
        }
    
        std::cout << "Events:\n";
        for (Event const& ev : data._events) {
            std::cout << " EVENT(" << ev.event << ") : " << ev.value << std::endl;
        }
    }
    
  • I dropped the time member from Event since it was unused.

Full Listing

Live On Coliru

#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/repository/include/qi_seek.hpp>
#include <boost/phoenix/phoenix.hpp>
#include <cstring> // strlen

typedef std::string::const_iterator It;
enum kind { SLOPE, GEAR };

struct Location {
    int driver;
    double time;
    double vel;
    double km;
    std::string date;
    std::string road;
};

struct Event {
    int event;
    double value;
};

BOOST_FUSION_ADAPT_STRUCT(Location, date, time, driver, vel, road, km)
BOOST_FUSION_ADAPT_STRUCT(Event, event, value)

struct ParsedData {
    std::vector<Location> _locations;
    std::vector<Event> _events;
    void add(const Location& loc) { _locations.push_back(loc); }
    void add(const Event& ev)     { _events.push_back(ev);  }
};

namespace qi = boost::spirit::qi;
namespace px = boost::phoenix;

namespace boost { namespace spirit { namespace traits {
    template <> struct is_container<ParsedData> : std::true_type {};
    template <> struct container_value<ParsedData> { typedef boost::variant<Location, Event> type; };

    template <typename T> struct push_back_container<ParsedData, T> {
        struct Visitor {
            ParsedData &data;
            typedef void result_type;
            template <typename U> void operator()(U const &ev) const { data.add(ev); }
        };

        static bool call(ParsedData &log, T const &attribute) {
            boost::apply_visitor(Visitor{ log }, attribute);
            return true;
        }
    };
} } } // namespace boost::spirit::traits

ParsedData parse_test_1(It b, It e) {
    using namespace qi;

    auto date = copy(
        repeat(4)[digit] >> '-' >> repeat(3)[alpha] >> '-' >> repeat(2)[digit] >> ' ' >> 
        repeat(2)[digit] >> ':' >> repeat(2)[digit] >> ':' >> repeat(2)[digit] >> '.' >> +digit);

    qi::rule<It, Event()> slope = lit(" - SLOPE: ")[px::construct<int>(kind::SLOPE)] >> double_;
    qi::rule<It, Event()> gear = lit(" - GEAR: ")[px::construct<int>(kind::GEAR)] >> double_;

    qi::rule<It, Location()> line = '[' >> raw[date] >> "] - "
        >> double_ >> " s"
        >> " => Driver: "  >> int_
        >> " - Speed: "    >> double_
        >> " - Road: "     >> raw[+graph]
        >> " - Km: "       >> double_
        >> -(slope | gear)
        >> (eol | eoi);

    ParsedData data;
    parse(b, e, *boost::spirit::repository::qi::seek[line], data);
    return data;
}

ParsedData parse_test_2(It b, It e) {
    using namespace qi;

    auto date = copy(
        repeat(4)[digit] >> '-' >> repeat(3)[alpha] >> '-' >> repeat(2)[digit] >> ' ' >> 
        repeat(2)[digit] >> ':' >> repeat(2)[digit] >> ':' >> repeat(2)[digit] >> '.' >> +digit);

    qi::rule<It, Event()> slope = lit(" - SLOPE: ")[px::construct<int>(kind::SLOPE)] >> double_;
    qi::rule<It, Event()> gear = lit(" - GEAR: ")[px::construct<int>(kind::GEAR)] >> double_;

    qi::rule<It, Location()> line = '[' >> raw[date] >> "] - "
        >> double_ >> " s"
        >> " => Driver: "  >> int_
        >> " - Speed: "    >> double_
        >> " - Road: "     >> raw[+graph]
        >> " - Km: "       >> double_
        >> -(slope | gear)
        >> (eol | eoi);

    ParsedData data;
    parse(b, e, *line, data);
    return data;
}

//Not all the lines will match the parser!
static std::string const input1 = 
"[2018-Mar-13 13:13:59.580482] - 0.200 s => Driver: 0 - Speed: 0.0 - Road: A-11 - Km: 90.0 - SLOPE: 5.5\n\
[2018-Mar-13 13:14:01.170203] - 1.790 s => Driver: 0 - Speed: 0.0 - Road: A-11 - Km: 90.0 - GEAR: 1\n\
[2018-Mar-13 13:14:01.170203] - 1.790 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90.0\n\
[2018-Mar-13 13:14:01.170203] - 1.790 s => I do not care about this line\n\
[2018-Mar-13 13:14:01.819966] - 2.440 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90.0\n\
[2018-Mar-13 13:14:01.170203] - 2.440 s => Neither I do about this other line\n\
[2018-Mar-13 13:15:01.819966] - 3.440 s => Driver: 0 - Speed: 0.2 - Road: A-11 - Km: 90.0 - SLOPE: 10\n";

//All the lines shall match the parser!
static std::string const input2 = 
"[2018-Mar-13 13:13:59.580482] - 0.200 s => Driver: 0 - Speed: 0.0 - Road: A-11 - Km: 90.0 - SLOPE: 5.5\n\
[2018-Mar-13 13:14:01.170203] - 1.790 s => Driver: 0 - Speed: 0.0 - Road: A-11 - Km: 90.0 - GEAR: 1\n\
[2018-Mar-13 13:14:01.170203] - 1.790 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90.0\n\
[2018-Mar-13 13:14:01.819966] - 2.440 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90.0\n\
[2018-Mar-13 13:15:01.819966] - 3.440 s => Driver: 0 - Speed: 0.2 - Road: A-11 - Km: 90.0 - SLOPE: 10\n";

void do_test(std::string caption, std::string const& input, ParsedData(*f)(It,It)) {
    ParsedData const data = f(input.begin(), input.end());
    std::cout << caption << ":\n";
    std::cout << "Locations:\n";
    for (Location const& loc : data._locations) {
        std::cout << "[" << loc.date << "] - " << loc.time << " s => Driver: " << loc.driver << " - Speed: " << loc.vel << " - Road: " << loc.road << " - Km: " << loc.km << std::endl;
    }

    std::cout << "Events:\n";
    for (Event const& ev : data._events) {
        std::cout << " EVENT(" << ev.event << ") : " << ev.value << std::endl;
    }
}

int main() {
    do_test("TEST 1", input1, parse_test_1);
    do_test("TEST 2", input2, parse_test_2);
}

Further Observations:

  1. It is unclear to me when you'd expect the Event rules (slope/gear) to match or synthesize an attribute. It's also unclear to me why those would be optional (the location part of a line cannot possibly match without that part).

  2. Also, the natural attribute exposed by a rule like

    qi::rule<It, Location()> line = '[' >> raw[date] >> "] - "
        >> double_ >> " s"
        >> " => Driver: "  >> int_
        >> " - Speed: "    >> double_
        >> " - Road: "     >> raw[+graph]
        >> " - Km: "       >> double_
        >> -(slope | gear)
        >> (eol | eoi);
    

    Would have Location contain an extra field:

    struct Location {
        int driver;
        double time;
        double vel;
        double km;
        std::string date;
        std::string road;
        boost::optional<Event> event;
    };
    
    BOOST_FUSION_ADAPT_STRUCT(Event, event, value)
    BOOST_FUSION_ADAPT_STRUCT(Location, date, time, driver, vel, road, km, event)
    
  3. These rules are odd:

    qi::rule<It, Event()> slope = lit(" - SLOPE: ")[px::construct<int>(kind::SLOPE)] >> double_;
    qi::rule<It, Event()> gear = lit(" - GEAR: ")[px::construct<int>(kind::GEAR)] >> double_;
    

    Why not use the symbols approach exactly as I showed in the linked answer (line 57/98)? If you insist on doing it "clumsy", do not use semantic actions (Boost Spirit: "Semantic actions are evil"?) but use qi::attr:

    qi::rule<It, Event()> slope = " - SLOPE: " >> attr(kind::SLOPE) >> double_;
    qi::rule<It, Event()> gear = " - GEAR: " >> attr(kind::GEAR) >> double_;
    

    Among the helpful effects are that your compilation times can be cut in half, and also the attribute values actually propagate (your semantic actions had no effect at all, and actively suppressed automatic attribute propagation...).

With these improvements in place we get:

Live On Coliru

#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/repository/include/qi_seek.hpp>

typedef std::string::const_iterator It;
enum kind { SLOPE, GEAR };

struct Event {
    int event;
    double value;
};

struct Location {
    int driver;
    double time;
    double vel;
    double km;
    std::string date;
    std::string road;
    boost::optional<Event> event;
};

BOOST_FUSION_ADAPT_STRUCT(Event, event, value)
BOOST_FUSION_ADAPT_STRUCT(Location, date, time, driver, vel, road, km, event)

using ParsedData = std::vector<Location>;

namespace qi = boost::spirit::qi;
namespace px = boost::phoenix;

ParsedData parse_test(It b, It e) {
    using namespace qi;

    auto date = copy(
        repeat(4)[digit] >> '-' >> repeat(3)[alpha] >> '-' >> repeat(2)[digit] >> ' ' >> 
        repeat(2)[digit] >> ':' >> repeat(2)[digit] >> ':' >> repeat(2)[digit] >> '.' >> +digit);

    qi::rule<It, Event()> slope = " - SLOPE: " >> attr(kind::SLOPE) >> double_;
    qi::rule<It, Event()> gear = " - GEAR: " >> attr(kind::GEAR) >> double_;

    qi::rule<It, Location()> line = '[' >> raw[date] >> "] - "
        >> double_ >> " s"
        >> " => Driver: "  >> int_
        >> " - Speed: "    >> double_
        >> " - Road: "     >> raw[+graph]
        >> " - Km: "       >> double_
        >> -(slope | gear)
        >> (eol | eoi);

    ParsedData data;
    parse(b, e, *boost::spirit::repository::qi::seek[line], data);
    return data;
}

//Not all the lines will match the parser!
static std::string const input = 
"[2018-Mar-13 13:13:59.580482] - 0.200 s => Driver: 0 - Speed: 0.0 - Road: A-11 - Km: 90.0 - SLOPE: 5.5\n\
[2018-Mar-13 13:14:01.170203] - 1.790 s => Driver: 0 - Speed: 0.0 - Road: A-11 - Km: 90.0 - GEAR: 1\n\
[2018-Mar-13 13:14:01.170203] - 1.790 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90.0\n\
[2018-Mar-13 13:14:01.170203] - 1.790 s => I do not care about this line\n\
[2018-Mar-13 13:14:01.819966] - 2.440 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90.0\n\
[2018-Mar-13 13:14:01.170203] - 2.440 s => Neither I do about this other line\n\
[2018-Mar-13 13:15:01.819966] - 3.440 s => Driver: 0 - Speed: 0.2 - Road: A-11 - Km: 90.0 - SLOPE: 10\n";

int main() {
    auto parsed = parse_test(input.begin(), input.end());
    std::cout << "Locations:\n";
    for (Location const& loc : parsed) {
        std::cout << "[" << loc.date << "] - " << loc.time << " s => Driver: " << loc.driver << " - Speed: " << loc.vel << " - Road: " << loc.road << " - Km: " << loc.km << std::endl;
        if (loc.event)
            std::cout << " - event: " << loc.event->event << " value: " << loc.event->value << "\n";
    }
}

Printing

Locations:
[2018-Mar-13 13:13:59.580482] - 0.2 s => Driver: 0 - Speed: 0 - Road: A-11 - Km: 90
 - event: 0 value: 5.5
[2018-Mar-13 13:14:01.170203] - 1.79 s => Driver: 0 - Speed: 0 - Road: A-11 - Km: 90
 - event: 1 value: 1
[2018-Mar-13 13:14:01.170203] - 1.79 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90
[2018-Mar-13 13:14:01.1702032018-Mar-13 13:14:01.819966] - 2.44 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90
[2018-Mar-13 13:14:01.1702032018-Mar-13 13:15:01.819966] - 3.44 s => Driver: 0 - Speed: 0.2 - Road: A-11 - Km: 90
 - event: 0 value: 10
查看更多
登录 后发表回答