I managed to parse a pgn file thanks to the Boost Spirit library, but it fails as soon as there is some characters I did not "anticipated".
Here is my Spirit grammar :
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
BOOST_FUSION_ADAPT_STRUCT(
loloof64::pgn_tag,
(std::string, key),
(std::string, value)
)
BOOST_FUSION_ADAPT_STRUCT(
loloof64::game_move,
(unsigned, move_number),
(std::string, move_turn),
(std::string, white_move),
(std::string, black_move),
(std::string, result)
)
BOOST_FUSION_ADAPT_STRUCT(
loloof64::pgn_game,
(std::vector<loloof64::pgn_tag>, header),
(std::vector<loloof64::game_move>, moves)
)
namespace loloof64 {
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;
template <typename Iterator>
struct pgn_parser : qi::grammar<Iterator, std::vector<pgn_game>, qi::unused_type>
{
pgn_parser() : pgn_parser::base_type(games)
{
using qi::lexeme;
using ascii::char_;
using qi::uint_;
using qi::alnum;
using qi::space;
using qi::omit;
using qi::eol;
using qi::lit;
quoted_string %= lexeme[lit('"') >> *(char_ - '"') >> lit('"')];
tag %=
'['
>> +alnum
>> omit[+space]
>> quoted_string
>> ']'
>> omit[+eol]
;
header %= +tag;
move_turn %= qi::string("...") | qi::string(".");
regular_move %=
+char_("a-hNBRQK")
>> +char_("a-h1-8x=NBRQK")
>> -qi::string("e.p.")
;
castle_move %= qi::string("O-O-O") | qi::string("O-O");
single_move %=
(regular_move | castle_move) >> -(char_('+') | char_('#'))
;
result %= qi::string("1-0") | qi::string("0-1") | qi::string("1/2-1/2") | qi::string("*");
full_move %=
uint_
>> move_turn
>> omit[*space]
>> single_move
>> -(omit[+space] >> single_move)
>> -(omit[+space] >> result)
;
game_description %= full_move
>> *(omit[*space] >> full_move);
single_game %=
-header
>> game_description
;
games %=
single_game
>> *(omit[*(space|eol)] >> single_game)
;
}
qi::rule<Iterator, pgn_tag(), qi::unused_type> tag;
qi::rule<Iterator, std::vector<pgn_tag>, qi::unused_type> header;
qi::rule<Iterator, std::string(), qi::unused_type> quoted_string;
qi::rule<Iterator, std::string(), qi::unused_type> result;
qi::rule<Iterator, std::string(), qi::unused_type> regular_move;
qi::rule<Iterator, std::string(), qi::unused_type> castle_move;
qi::rule<Iterator, std::string(), qi::unused_type> single_move;
qi::rule<Iterator, std::string(), qi::unused_type> move_turn;
qi::rule<Iterator, game_move(), qi::unused_type> full_move;
qi::rule<Iterator, std::vector<game_move>, qi::unused_type> game_description;
qi::rule<Iterator, pgn_game, qi::unused_type> single_game;
qi::rule<Iterator, std::vector<pgn_game>, qi::unused_type> games;
};
}
How could I simply consume any character I could not "anticipate" ? I mean, how could I ignore any character that I don't want in none of my grammar rule ?
As for testing purposes :
here my parser header (pgn_games_extractor.hpp)
#ifndef PGNGAMESEXTRACTOR_HPP
#define PGNGAMESEXTRACTOR_HPP
#include <string>
#include <vector>
#include <fstream>
#include <stdexcept>
namespace loloof64 {
struct pgn_tag {
std::string key;
std::string value;
};
struct game_move {
unsigned move_number;
std::string move_turn;
std::string white_move;
std::string black_move;
std::string result;
};
struct pgn_game {
std::vector<pgn_tag> header;
std::vector<game_move> moves;
};
class PgnGamesExtractor
{
public:
PgnGamesExtractor(std::string inputFilePath);
PgnGamesExtractor(std::ifstream &inputFile);
/*
Both constructos may throw PgnParsingException (if bad pgn format) and InputFileException (if missing file)
*/
std::vector<pgn_game> getGames() const { return games; }
virtual ~PgnGamesExtractor();
protected:
private:
std::vector<pgn_game> games;
void parseInput(std::ifstream &inputFile);
};
class PgnParsingException : public std::runtime_error
{
public:
PgnParsingException(std::string message): std::runtime_error(message){}
};
class InputFileException : public std::runtime_error
{
public:
InputFileException(std::string message) : std::runtime_error(message){}
};
}
#endif // PGNGAMESEXTRACTOR_HPP
Here is my parser source (pgn_games_extractor.cpp) :
#include "pgn_games_extractor.hpp"
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
BOOST_FUSION_ADAPT_STRUCT(
loloof64::pgn_tag,
(std::string, key),
(std::string, value)
)
BOOST_FUSION_ADAPT_STRUCT(
loloof64::game_move,
(unsigned, move_number),
(std::string, move_turn),
(std::string, white_move),
(std::string, black_move),
(std::string, result)
)
BOOST_FUSION_ADAPT_STRUCT(
loloof64::pgn_game,
(std::vector<loloof64::pgn_tag>, header),
(std::vector<loloof64::game_move>, moves)
)
namespace loloof64 {
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;
template <typename Iterator>
struct pgn_parser : qi::grammar<Iterator, std::vector<pgn_game>, qi::unused_type>
{
pgn_parser() : pgn_parser::base_type(games)
{
using qi::lexeme;
using ascii::char_;
using qi::uint_;
using qi::alnum;
using qi::space;
using qi::omit;
using qi::eol;
using qi::lit;
quoted_string %= lexeme[lit('"') >> *(char_ - '"') >> lit('"')];
tag %=
'['
>> +alnum
>> omit[+space]
>> quoted_string
>> ']'
>> omit[+eol]
;
header %= +tag;
move_turn %= qi::string("...") | qi::string(".");
regular_move %=
+char_("a-hNBRQK")
>> +char_("a-h1-8x=NBRQK")
>> -qi::string("e.p.")
;
castle_move %= qi::string("O-O-O") | qi::string("O-O");
single_move %=
(regular_move | castle_move) >> -(char_('+') | char_('#'))
;
result %= qi::string("1-0") | qi::string("0-1") | qi::string("1/2-1/2") | qi::string("*");
full_move %=
uint_
>> move_turn
>> omit[*space]
>> single_move
>> -(omit[+space] >> single_move)
>> -(omit[+space] >> result)
;
game_description %= full_move
>> *(omit[*space] >> full_move);
single_game %=
-header
>> game_description
;
games %=
single_game
>> *(omit[*(space|eol)] >> single_game)
;
}
qi::rule<Iterator, pgn_tag(), qi::unused_type> tag;
qi::rule<Iterator, std::vector<pgn_tag>, qi::unused_type> header;
qi::rule<Iterator, std::string(), qi::unused_type> quoted_string;
qi::rule<Iterator, std::string(), qi::unused_type> result;
qi::rule<Iterator, std::string(), qi::unused_type> regular_move;
qi::rule<Iterator, std::string(), qi::unused_type> castle_move;
qi::rule<Iterator, std::string(), qi::unused_type> single_move;
qi::rule<Iterator, std::string(), qi::unused_type> move_turn;
qi::rule<Iterator, game_move(), qi::unused_type> full_move;
qi::rule<Iterator, std::vector<game_move>, qi::unused_type> game_description;
qi::rule<Iterator, pgn_game, qi::unused_type> single_game;
qi::rule<Iterator, std::vector<pgn_game>, qi::unused_type> games;
};
}
loloof64::PgnGamesExtractor::PgnGamesExtractor(std::string inputFilePath)
{
std::ifstream inputFile(inputFilePath);
parseInput(inputFile);
}
loloof64::PgnGamesExtractor::PgnGamesExtractor(std::ifstream &inputFile)
{
parseInput(inputFile);
}
loloof64::PgnGamesExtractor::~PgnGamesExtractor()
{
//dtor
}
void loloof64::PgnGamesExtractor::parseInput(std::ifstream &inputFile)
{
using namespace std;
if (! inputFile) throw InputFileException("File does not exist !");
string content("");
getline(inputFile, content, (char) inputFile.eof());
if (inputFile.fail() || inputFile.bad()) throw new InputFileException("Could not read the input file !");
loloof64::pgn_parser<string::const_iterator> parser;
std::vector<loloof64::pgn_game> temp_games;
string::const_iterator iter = content.begin();
string::const_iterator end = content.end();
bool success = boost::spirit::qi::phrase_parse(iter, end, parser, boost::spirit::qi::eol, temp_games);
if (success && iter == end)
{
games = temp_games;
}
else
{
string error_fragment(iter, end);
string error_message("");
error_message = "Failed to parse the input at :'" + error_fragment + "' !";
throw PgnParsingException(error_message);
}
}
I am asking this question because I could not parse the following pgn : ScotchGambitPgn.zip. I think it is because of an encoding issue with this file.
I am using Spirit 2 and C++ 11 (Gnu)