In boost spirit, use of multi_pass with streaming

2019-07-04 22:18发布

问题:

I want to input a significant size csv file to parse it with spirit qi (using boost 1.59.0). There are examples of this and it looks straight forward, but the obvious setup to this results in a compile error where the first parameter to qi::phrase_parse(...) is not accepted. What works here? (One example is at: How to pass the iterator to a function in spirit qi ) The code:

#define BOOST_SPIRIT_DEBUG
//#define BOOST_SPIRIT_DEBUG_PRINT_SOME 200
//#define BOOST_SPIRIT_DEBUG_OUT std::cerr

#include <stdio.h>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/support_multi_pass.hpp>
#include <fstream>

std::string dataLoc = "afile.csv";

namespace qi = boost::spirit::qi;

using Column  = std::string;
using Columns = std::vector<Column>;
using CsvLine = Columns;
using CsvParsed = std::vector<CsvLine>;

template <typename It>
struct CsvGrammar : qi::grammar<It, CsvParsed(), qi::blank_type>
{
    CsvGrammar() : CsvGrammar::base_type(start)
    {
        using namespace qi;

        static const char colsep = '|';

        start  = -line % eol;
        line   = column % colsep;
        column = quoted | *~char_(colsep);
        quoted = '"' >> *("\"\"" | ~char_('"')) >> '"';

        BOOST_SPIRIT_DEBUG_NODES((start)(line)(column)(quoted));
    }
private:
    qi::rule<It, CsvParsed(), qi::blank_type> start;
    qi::rule<It, CsvLine(), qi::blank_type> line;
    qi::rule<It, Column(),  qi::blank_type> column;
    qi::rule<It, std::string()> quoted;
};

int main()
{
    std::ifstream inFile(dataLoc, std::ifstream::in);
    if (inFile.good()) {
        std::cout << "input found" << std::endl;
    }
/*
    // use either this block of code
    typedef boost::spirit::istream_iterator istreamIter;
    istreamIter fwd_begin = istreamIter(inFile);
    istreamIter fwd_end = istreamIter();
*/
    // or this block
    typedef std::istreambuf_iterator<char> base_iterator_type;
    typedef boost::spirit::multi_pass<base_iterator_type> forward_iterator_type;
    base_iterator_type in_begin(inFile);
    base_iterator_type in_end;
    forward_iterator_type fwd_begin = boost::spirit::make_default_multi_pass(in_begin);
    forward_iterator_type fwd_end  = boost::spirit::make_default_multi_pass(in_end);

    CsvGrammar<std::string::const_iterator> p;
    CsvParsed parsed;
    bool ok = qi::phrase_parse(fwd_begin, fwd_end, p, qi::blank, parsed);
    if (ok)
    {
        for(auto& line : parsed) {
            for(auto& col : line)
                std::cout << '[' << col << ']';
            std::cout << std::endl;
        }
    } else
    {
        std::cout << "Parse failed\n";
    }

    if (fwd_begin != fwd_end)
        std::cout << "Remaining unparsed: '" << std::string(fwd_begin, fwd_end ) << "'\n";
}

The compiler (Apple clang 6.1 via CLion) gives the following error:

    In file included from /Users/alan/ClionProjects/csvreader/csvReader.cpp:16:
    In file included from /Users/alan/ClionProjects/csvreader/boost/boost_1_59_0/boost/spirit/include/qi.hpp:16:
    In file included from /Users/alan/ClionProjects/csvreader/boost/boost_1_59_0/boost/spirit/home/qi.hpp:21:
    In file included from /Users/alan/ClionProjects/csvreader/boost/boost_1_59_0/boost/spirit/home/qi/nonterminal.hpp:14:
    In file included from /Users/alan/ClionProjects/csvreader/boost/boost_1_59_0/boost/spirit/home/qi/nonterminal/rule.hpp:35:
    /Users/alan/ClionProjects/csvreader/boost/boost_1_59_0/boost/spirit/home/qi/reference.hpp:43:30: error: no matching member function for call to 'parse'
                return ref.get().parse(first, last, context, skipper, attr_);
                       ~~~~~~~~~~^~~~~
    /Users/alan/ClionProjects/csvreader/boost/boost_1_59_0/boost/spirit/home/qi/parse.hpp:164:40: note: in instantiation of function template specialization 'boost::spirit::qi::reference, std::__1::vector, std::__1::allocator > >, std::__1::allocator, std::__1::allocator > > > > (), boost::proto::exprns_::expr >, 0>, boost::spirit::unused_type, boost::spirit::unused_type> >::parse >, boost::spirit::iterator_policies::default_policy >, boost::spirit::context, std::__1::allocator > >, std::__1::allocator, std::__1::allocator > > > > &, boost::fusion::nil_>, boost::spirit::locals >, boost::spirit::qi::char_class >, std::__1::vector, std::__1::allocator > >, std::__1::allocator, std::__1::allocator > > > > >' requested here
            if (!compile(expr).parse(
                                           ^
    /Users/alan/ClionProjects/csvreader/boost/boost_1_59_0/boost/spirit/home/qi/parse.hpp:197:20: note: in instantiation of function template specialization 'boost::spirit::qi::phrase_parse >, boost::spirit::iterator_policies::default_policy >, CsvGrammar >, boost::proto::exprns_::expr >, 0>, std::__1::vector, std::__1::allocator > >, std::__1::allocator, std::__1::allocator > > > > >' requested here
            return qi::phrase_parse(first, last, expr, skipper, skip_flag::postskip, attr);
                       ^
    /Users/alan/ClionProjects/csvreader/csvReader.cpp:74:19: note: in instantiation of function template specialization 'boost::spirit::qi::phrase_parse >, boost::spirit::iterator_policies::default_policy >, CsvGrammar >, boost::proto::exprns_::expr >, 0>, std::__1::vector, std::__1::allocator > >, std::__1::allocator, std::__1::allocator > > > > >' requested here
        bool ok = qi::phrase_parse(fwd_begin, fwd_end, p, qi::blank, parsed);
                      ^
    /Users/alan/ClionProjects/csvreader/boost/boost_1_59_0/boost/spirit/home/qi/nonterminal/rule.hpp:274:14: note: candidate function [with Context = boost::spirit::context, std::__1::allocator > >, std::__1::allocator, std::__1::allocator > > > > &, boost::fusion::nil_>, boost::spirit::locals >, Skipper = boost::spirit::qi::char_class >, Attribute = std::__1::vector, std::__1::allocator > >, std::__1::allocator, std::__1::allocator > > > >] not viable: no known conversion from 'boost::spirit::multi_pass >, boost::spirit::iterator_policies::default_policy >' to 'std::__1::__wrap_iter &' for 1st argument
            bool parse(Iterator& first, Iterator const& last
                 ^
    /Users/alan/ClionProjects/csvreader/boost/boost_1_59_0/boost/spirit/home/qi/nonterminal/rule.hpp:320:14: note: candidate function template not viable: requires 6 arguments, but 5 were provided
            bool parse(Iterator& first, Iterator const& last
                 ^

So it looks like the wrong type of iterator is being fed into qi::phrase_parse as the first parameter. What should go here?

回答1:

You had the grammar declared using std::string::const_iterator...

CsvGrammar<forward_iterator_type> p;

Is more to the point.

Besides:

  1. you can use boost::spirit::istream_iterator directly (which is almost equivalent the same but much more convenient); but don't forget to unset the std::ios::skipws flag in such case
  2. consider parsing (zero-copy) from a memory mapped file; I have some answers on SO doing this. This should scale very well, beyond what streaming parsing can promise because the AST can be lazy/lightweight
  3. you probably want "" to parse into ", so make the rule:

    quoted = '"' >> *("\"" >> char_('"') | ~char_('"')) >> '"';
    
  4. you want unquoted columns to stop at eol; so make that rule

    column = quoted | *(char_ - colsep - eol);
    
  5. To avoid the empty record at the end:

    start  = *(line >> eol);
    column = quoted | +(char_ - colsep - eol);
    
  6. and to skip empty lines:

    start  = *(line >> +eol);
    

Live On Coliru

#define BOOST_SPIRIT_DEBUG
//#define BOOST_SPIRIT_DEBUG_PRINT_SOME 200
//#define BOOST_SPIRIT_DEBUG_OUT std::cerr

#include <stdio.h>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/support_multi_pass.hpp>
#include <fstream>

std::string dataLoc = "afile.csv";

namespace qi = boost::spirit::qi;

using Column  = std::string;
using Columns = std::vector<Column>;
using CsvLine = Columns;
using CsvParsed = std::vector<CsvLine>;

template <typename It>
struct CsvGrammar : qi::grammar<It, CsvParsed(), qi::blank_type>
{
    CsvGrammar() : CsvGrammar::base_type(start)
    {
        using namespace qi;

        static const char colsep = '|';

        start  = *(line >> +eol);
        line   = column % colsep;
        column = quoted | +(char_ - colsep - eol);
        quoted = '"' >> *("\"" >> char_('"') | ~char_('"')) >> '"';

        BOOST_SPIRIT_DEBUG_NODES((start)(line)(column)(quoted));
    }
private:
    qi::rule<It, CsvParsed(), qi::blank_type> start;
    qi::rule<It, CsvLine(),   qi::blank_type> line;
    qi::rule<It, Column(),    qi::blank_type> column;
    qi::rule<It, std::string()> quoted;
};

int main()
{
    std::ifstream inFile(dataLoc, std::ifstream::in);
    if (inFile.good()) {
        std::cout << "input found" << std::endl;
    }
/*
    // use either this block of code
    typedef boost::spirit::istream_iterator istreamIter;
    istreamIter fwd_begin = istreamIter(inFile);
    istreamIter fwd_end = istreamIter();
*/
    // or this block
    typedef std::istreambuf_iterator<char> base_iterator_type;
    typedef boost::spirit::multi_pass<base_iterator_type> forward_iterator_type;
    base_iterator_type in_begin(inFile);
    base_iterator_type in_end;
    forward_iterator_type fwd_begin = boost::spirit::make_default_multi_pass(in_begin);
    forward_iterator_type fwd_end   = boost::spirit::make_default_multi_pass(in_end);

    CsvGrammar<forward_iterator_type> p;
    CsvParsed parsed;
    bool ok = qi::phrase_parse(fwd_begin, fwd_end, p, qi::blank, parsed);
    if (ok)
    {
        for(auto& line : parsed) {
            for(auto& col : line)
                std::cout << '[' << col << ']';
            std::cout << std::endl;
        }
    } else
    {
        std::cout << "Parse failed\n";
    }

    if (fwd_begin != fwd_end)
        std::cout << "Remaining unparsed: '" << std::string(fwd_begin, fwd_end ) << "'\n";
}

Printing

<start>
  <try>a|b|c\n1|2|3\nX|Y|Z\n</try>
  <line>
    <try>a|b|c\n1|2|3\nX|Y|Z\n</try>
    <column>
      <try>a|b|c\n1|2|3\nX|Y|Z\n</try>
      <quoted>
        <try>a|b|c\n1|2|3\nX|Y|Z\n</try>
        <fail/>
      </quoted>
      <success>|b|c\n1|2|3\nX|Y|Z\n</success>
      <attributes>[[a]]</attributes>
    </column>
    <column>
      <try>b|c\n1|2|3\nX|Y|Z\n</try>
      <quoted>
        <try>b|c\n1|2|3\nX|Y|Z\n</try>
        <fail/>
      </quoted>
      <success>|c\n1|2|3\nX|Y|Z\n</success>
      <attributes>[[b]]</attributes>
    </column>
    <column>
      <try>c\n1|2|3\nX|Y|Z\n</try>
      <quoted>
        <try>c\n1|2|3\nX|Y|Z\n</try>
        <fail/>
      </quoted>
      <success>\n1|2|3\nX|Y|Z\n</success>
      <attributes>[[c]]</attributes>
    </column>
    <success>\n1|2|3\nX|Y|Z\n</success>
    <attributes>[[[a], [b], [c]]]</attributes>
  </line>
  <line>
    <try>1|2|3\nX|Y|Z\n</try>
    <column>
      <try>1|2|3\nX|Y|Z\n</try>
      <quoted>
        <try>1|2|3\nX|Y|Z\n</try>
        <fail/>
      </quoted>
      <success>|2|3\nX|Y|Z\n</success>
      <attributes>[[1]]</attributes>
    </column>
    <column>
      <try>2|3\nX|Y|Z\n</try>
      <quoted>
        <try>2|3\nX|Y|Z\n</try>
        <fail/>
      </quoted>
      <success>|3\nX|Y|Z\n</success>
      <attributes>[[2]]</attributes>
    </column>
    <column>
      <try>3\nX|Y|Z\n</try>
      <quoted>
        <try>3\nX|Y|Z\n</try>
        <fail/>
      </quoted>
      <success>\nX|Y|Z\n</success>
      <attributes>[[3]]</attributes>
    </column>
    <success>\nX|Y|Z\n</success>
    <attributes>[[[1], [2], [3]]]</attributes>
  </line>
  <line>
    <try>X|Y|Z\n</try>
    <column>
      <try>X|Y|Z\n</try>
      <quoted>
        <try>X|Y|Z\n</try>
        <fail/>
      </quoted>
      <success>|Y|Z\n</success>
      <attributes>[[X]]</attributes>
    </column>
    <column>
      <try>Y|Z\n</try>
      <quoted>
        <try>Y|Z\n</try>
        <fail/>
      </quoted>
      <success>|Z\n</success>
      <attributes>[[Y]]</attributes>
    </column>
    <column>
      <try>Z\n</try>
      <quoted>
        <try>Z\n</try>
        <fail/>
      </quoted>
      <success>\n</success>
      <attributes>[[Z]]</attributes>
    </column>
    <success>\n</success>
    <attributes>[[[X], [Y], [Z]]]</attributes>
  </line>
  <line>
    <try></try>
    <column>
      <try></try>
      <quoted>
        <try></try>
        <fail/>
      </quoted>
      <fail/>
    </column>
    <fail/>
  </line>
  <success></success>
  <attributes>[[[[a], [b], [c]], [[1], [2], [3]], [[X], [Y], [Z]]]]</attributes>
</start>
[a][b][c]
[1][2][3]
[X][Y][Z]