From ff2261623e2637c97052635d721557fb3de454af Mon Sep 17 00:00:00 2001 From: Adam Kasztenny Date: Sat, 6 May 2017 15:15:32 -0400 Subject: [PATCH] Add ECO to parser --- pgnparser/.gitignore | 1 + pgnparser/README.rst | 62 ++++++++ pgnparser/pgn.py | 278 ++++++++++++++++++++++++++++++++++++ pgnparser/setup.py | 46 ++++++ pgnparser/test_pngparser.py | 175 +++++++++++++++++++++++ 5 files changed, 562 insertions(+) create mode 100644 pgnparser/.gitignore create mode 100644 pgnparser/README.rst create mode 100644 pgnparser/pgn.py create mode 100644 pgnparser/setup.py create mode 100644 pgnparser/test_pngparser.py diff --git a/pgnparser/.gitignore b/pgnparser/.gitignore new file mode 100644 index 0000000..7e99e36 --- /dev/null +++ b/pgnparser/.gitignore @@ -0,0 +1 @@ +*.pyc \ No newline at end of file diff --git a/pgnparser/README.rst b/pgnparser/README.rst new file mode 100644 index 0000000..82fe26d --- /dev/null +++ b/pgnparser/README.rst @@ -0,0 +1,62 @@ +========== +PGN Parser +========== + +A simple python PGN parser. + +PGN (Portable Game Notation) is computer-processible format for recording chess +games, both the moves and related data. + +This module is based on features of others python parser modules (such json and +yaml). The basic usage:: + + import pgn + + pgn_text = open('morphy.pgn').read() + pgn_game = pgn.PGNGame() + + print pgn.loads(pgn_text) # Returns a list of PGNGame + print pgn.dumps(pgn_game) # Returns a string with a pgn game + +**Note**: + +The above basic example doesn't work properly with huge files (hundreds of +megabytes and more): reading the whole file at once is slow and uses much +memory, pgn.loads(big_string) uses even more memory. + +To process huge PGN files, do it like this:: + + import pgn + + for game in pgn.GameIterator("bigfile.pgn"): + print game # or do something else with it + +**Features**: + +- Required tags: "Event", "Site", "Date", "Round", "White", "Black", and + "Result". +- Optional tags: "Annotator", "PlyCount", "TimeControl", "Time", "Termination", + "Mode", and "FEN". +- Commentaries: "**;**" (a comment that continues to the end of the line) and + "**{**" (which continues until a matching "**}**"). The last one just in + moves list. + + +**PGN example**:: + + [Event "F/S Return Match"] + [Site "Belgrade, Serbia Yugoslavia|JUG"] + [Date "1992.11.04"] + [Round "29"] + [White "Fischer, Robert J."] + [Black "Spassky, Boris V."] + [Result "1/2-1/2"] + + 1. e4 e5 2. Nf3 Nc6 3. Bb5 {This opening is called the Ruy Lopez.} 3... a6 + 4. Ba4 Nf6 5. O-O Be7 6. Re1 b5 7. Bb3 d6 8. c3 O-O 9. h3 Nb8 10. d4 Nbd7 + 11. c4 c6 12. cxb5 axb5 13. Nc3 Bb7 14. Bg5 b4 15. Nb1 h6 16. Bh4 c5 17. dxe5 + Nxe4 18. Bxe7 Qxe7 19. exd6 Qf6 20. Nbd2 Nxd6 21. Nc4 Nxc4 22. Bxc4 Nb6 + 23. Ne5 Rae8 24. Bxf7+ Rxf7 25. Nxf7 Rxe1+ 26. Qxe1 Kxf7 27. Qe3 Qg5 28. Qxg5 + hxg5 29. b3 Ke6 30. a3 Kd6 31. axb4 cxb4 32. Ra5 Nd5 33. f3 Bc8 34. Kf2 Bf5 + 35. Ra7 g6 36. Ra6+ Kc5 37. Ke1 Nf4 38. g3 Nxh3 39. Kd2 Kb5 40. Rd6 Kc5 41. Ra6 + Nf2 42. g4 Bd3 43. Re6 1/2-1/2 diff --git a/pgnparser/pgn.py b/pgnparser/pgn.py new file mode 100644 index 0000000..ab59f8d --- /dev/null +++ b/pgnparser/pgn.py @@ -0,0 +1,278 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2011 Renato de Pontes Pereira, renato.ppontes at gmail dot com +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import re + +''' +A simple PGN parser. + +PGN (Portable Game Notation) is computer-processible format for recording chess +games, both the moves and related data. + +This module is based on features of others parser modules (such json and yaml). +The basic usage:: + + import pgn + + pgn_text = open('morphy.pgn).read() + pgn_game = pgn.PGNGame() + + print pgn.loads(pgn_text) # Returns a list of PGNGame + print pgn.dumps(pgn_game) # Returns a string with a pgn game + +''' + +class PGNGame(object): + ''' + Describes a single chess game in PGN format. + ''' + + TAG_ORDER = ['Event', 'Site', 'Date', 'Round', 'White', 'Black', 'Result', 'ECO', + 'Annotator', 'PlyCount', 'TimeControl', 'Time', 'Termination', + 'Mode', 'FEN'] + + def __init__(self, event=None, site=None, date=None, round=None, + white=None, + black=None, + result=None): + ''' + Initializes the PGNGame, receiving the requireds tags. + ''' + self.event = event + self.site = site + self.date = date + self.round = round + self.white = white + self.black = black + self.result = result + self.eco = None + self.annotator = None + self.plycount = None + self.timecontrol = None + self.time = None + self.termination = None + self.mode = None + self.fen = None + + self.moves = [] + + def dumps(self): + return dumps(self) + + def __repr__(self): + return '' % (self.white, self.black) + +class GameStringIterator(object): + """ + Iterator containing multiline strings + that represent games from a PGN file + """ + + def __init__(self, file_name): + """ + Args: + file_name (str): PGN file name + """ + self.file_name = file_name + self.file_iter = iter(open(self.file_name)) + self.game_lines = [] + self.end = False + + def __iter__(self): + """doc""" + return self + + def next(self): + """doc""" + if self.end is True: + raise StopIteration + try: + while True: + line = self.file_iter.next() + if line.startswith("[Event"): + if len(self.game_lines) == 0: + self.game_lines.append(line) + continue + else: + game_lines = self.game_lines[:] + self.game_lines = [] + self.game_lines.append(line) + game_str = "".join(game_lines) + return game_str + else: + self.game_lines.append(line) + except StopIteration: + game_lines = self.game_lines[:] + game_str = "".join(game_lines) + self.end = True + return game_str + +class GameIterator(object): + """ + Iterator containing games from a PGN file + """ + + def __init__(self, file_name): + """ + Args: + file_name (str): PGN file name + """ + self.game_str_iterator = GameStringIterator(file_name) + + def __iter__(self): + """doc""" + return self + + def next(self): + """doc""" + for game_str in self.game_str_iterator: + game = loads(game_str)[0] + return game + +def _pre_process_text(text): + ''' + This function is responsible for removal of end line commentarys + (;commentary), blank lines and aditional spaces. Also, it converts + ``\\r\\n`` to ``\\n``. + ''' + text = re.sub(r'\s*(\\r)?\\n\s*', '\n', text.strip()) + lines = [] + for line in text.split('\n'): + line = re.sub(r'(\s*;.*|^\s*)', '', line) + if line: + lines.append(line) + + return lines + +def _next_token(lines): + ''' + Get the next token from lines (list of text pgn file lines). + + There is 2 kind of tokens: tags and moves. Tags tokens starts with ``[`` + char, e.g. ``[TagName "Tag Value"]``. Moves tags follows the example: + ``1. e4 e5 2. d4``. + ''' + if not lines: + return None + + token = lines.pop(0).strip() + if token.startswith('['): + return token + + while lines and not lines[0].startswith('['): + token += ' '+lines.pop(0).strip() + + return token.strip() + +def _parse_tag(token): + ''' + Parse a tag token and returns a tuple with (tagName, tagValue). + ''' + tag, value = re.match(r'\[(\w*)\s*(.+)', token).groups() + return tag.lower(), value.strip('"[] ') + +def _parse_moves(token): + ''' + Parse a moves token and returns a list with moviments + ''' + moves = [] + while token: + token = re.sub(r'^\s*(\d+\.+\s*)?', '', token) + + if token.startswith('{'): + pos = token.find('}')+1 + else: + pos1 = token.find(' ') + pos2 = token.find('{') + if pos1 <= 0: + pos = pos2 + elif pos2 <= 0: + pos = pos1 + else: + pos = min([pos1, pos2]) + + if pos > 0: + moves.append(token[:pos]) + token = token[pos:] + else: + moves.append(token) + token = '' + + return moves + +def loads(text): + ''' + Converts a string ``text`` into a list of PNGGames + ''' + games = [] + game = None + lines = _pre_process_text(text) + + while True: + token = _next_token(lines) + + if not token: + break + + if token.startswith('['): + tag, value = _parse_tag(token) + if not game or (game and game.moves): + game = PGNGame() + games.append(game) + + setattr(game, tag, value) + else: + game.moves = _parse_moves(token) + + return games + +def dumps(games): + ''' + Serialize a list os PGNGames (or a single game) into text format. + ''' + all_dumps = [] + + if not isinstance(games, (list, tuple)): + games = [games] + + for game in games: + dump = '' + for i, tag in enumerate(PGNGame.TAG_ORDER): + if getattr(game, tag.lower()): + dump += '[%s "%s"]\n' % (tag, getattr(game, tag.lower())) + elif i <= 6: + dump += '[%s "?"]\n' % tag + + + dump += '\n' + i = 0 + for move in game.moves: + if not move.startswith('{'): + if i%2 == 0: + dump += str(i/2+1)+'. ' + + i += 1 + + dump += move + ' ' + + all_dumps.append(dump.strip()) + + return '\n\n\n'.join(all_dumps) diff --git a/pgnparser/setup.py b/pgnparser/setup.py new file mode 100644 index 0000000..b0a192f --- /dev/null +++ b/pgnparser/setup.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- + +__author__ = 'Renato de Pontes Pereira' +__author_email__ = 'renato.ppontes@gmail.com' +__version__ = '1.0' +__date__ = '2011 10 13' + +try: + import setuptools +except ImportError: + from ez_setup import use_setuptools + use_setuptools() + +from setuptools import setup, find_packages + +f = open('README.rst','rU') +long_description = f.read() +f.close() + +setup( + name = 'pgnparser', + version = __version__, + author = __author__, + license='MIT License', + description = 'A python PGN parser', + long_description=long_description, + url = 'http://renatopp.com/pgnparser', + download_url = 'https://github.com/renatopp/pgnparser', + classifiers=[ + 'Development Status :: 5 - Production/Stable', + 'Environment :: MacOS X', + 'Environment :: Win32 (MS Windows)', + 'Environment :: X11 Applications', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: MIT License', + 'Natural Language :: English', + 'Operating System :: MacOS :: MacOS X', + 'Operating System :: Microsoft :: Windows', + 'Operating System :: POSIX :: Linux', + 'Programming Language :: Python', + ('Topic :: Software Development :: Libraries :: Python Modules'), + ('Topic :: Games/Entertainment'), + ], + keywords='chess game pgn parser python', + py_modules=['pgn'], + ) \ No newline at end of file diff --git a/pgnparser/test_pngparser.py b/pgnparser/test_pngparser.py new file mode 100644 index 0000000..da2f6b9 --- /dev/null +++ b/pgnparser/test_pngparser.py @@ -0,0 +1,175 @@ +import unittest +import pgn + +def game_fixture(): + game = pgn.PGNGame( + 'F/S Return Match', + 'Belgrade, Serbia Yugoslavia|JUG', + '1992.11.04', + '29', + 'Fischer, Robert J.', + 'Spassky, Boris V.', + '1/2-1/2' + ) + + game.annotator = 'Renato' + game.plycount = '3' + game.timecontrol = '40/7200:3600' + game.time = '12:32:43' + game.termination = 'abandoned' + game.mode = 'ICS' + game.fen = 'rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/R1BQKBNR' + game.eco = 'C60' + + game.moves = ['e4', 'e5', 'd4', 'd5', 'f3', '1/2-1/2'] + + return game + +PGN_TEXT = '''[Event "F/S Return Match"] +[Site "Belgrade, Serbia Yugoslavia|JUG"] +[Date "1992.11.04"] +[Round "29"] +[White "Fischer, Robert J."] +[Black "Spassky, Boris V."] +[Result "1/2-1/2"] +[ECO "C60"] +[Annotator "Renato"] +[PlyCount "3"] +[TimeControl "40/7200:3600"] +[Time "12:32:43"] +[Termination "abandoned"] +[Mode "ICS"] +[FEN "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/R1BQKBNR"] + +1. e4 e5 2. d4 d5 3. f3 1/2-1/2''' + +class PGNGame_Test(unittest.TestCase): + def test_init(self): + game = game_fixture() + assert game.event == 'F/S Return Match' + assert game.site == 'Belgrade, Serbia Yugoslavia|JUG' + assert game.date == '1992.11.04' + assert game.round == '29' + assert game.white == 'Fischer, Robert J.' + assert game.black == 'Spassky, Boris V.' + assert game.result == '1/2-1/2' + assert game.eco == 'C60' + assert game.annotator == 'Renato' + assert game.plycount == '3' + assert game.timecontrol == '40/7200:3600' + assert game.time == '12:32:43' + assert game.termination == 'abandoned' + assert game.mode == 'ICS' + assert game.fen == 'rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/R1BQKBNR' + + +class PGN_Test(unittest.TestCase): + def test_next_token(self): + '''Tests ``_next_token`` function''' + + lines = [ + '[Site "Belgrade, Serbia Yugoslavia|JUG"] ', + '[Date "1234.32.32"]', + '', + '1. e4 e5 2. Nf3 Nc6 ', + '3. Bb5 1-0', + ' ' + ] + + token = pgn._next_token(lines) + assert token == '[Site "Belgrade, Serbia Yugoslavia|JUG"]' + assert len(lines) == 5 + + token = pgn._next_token(lines) + assert token == '[Date "1234.32.32"]' + assert len(lines) == 4 + + token = pgn._next_token(lines) + assert token == '1. e4 e5 2. Nf3 Nc6 3. Bb5 1-0' + assert len(lines) == 0 + + token = pgn._next_token(lines) + assert not token + + def test_pre_process_text(self): + '''Tests ``_pre_process_text`` function''' + + text = ''' + [tag "value"] ;comment + + ; commentary + 1. e4 e5 2. d4 d5 ;commentary + 3. f3 1/2-1/2''' + + lines = pgn._pre_process_text(text) + expt = ['[tag "value"]', '1. e4 e5 2. d4 d5', '3. f3 1/2-1/2'] + assert lines == expt + + def test_parse_tag(self): + '''Tests ``_parse_tag`` function''' + + token = '[Site "Belgrade, Serbia Yugoslavia|JUG"]' + tag, value = pgn._parse_tag(token) + assert tag == 'site' + assert value == 'Belgrade, Serbia Yugoslavia|JUG' + + def test_parse_moves(self): + '''Tests ``_parse_moves`` function''' + + token = '1. e4 e5 2. Nf3 Nc6 3. Bb5 1/2-1/2' + moves = pgn._parse_moves(token) + assert moves == ['e4', 'e5', 'Nf3', 'Nc6', 'Bb5', '1/2-1/2'] + + def test_parse_moves_with_commentary(self): + '''Tests ``_parse_moves`` function with commentary ({})''' + + token = '{start comment}1. e4{middlecomment}e5 2. {dunno}Nf3 Nc6' +\ + ' 3. Bb5 1/2-1/2{end}' + + moves = pgn._parse_moves(token) + expected = ['{start comment}', 'e4', '{middlecomment}', 'e5', '{dunno}', + 'Nf3', 'Nc6', 'Bb5', '1/2-1/2', '{end}'] + + assert moves == expected + + def test_loads(self): + '''Tests ``loads`` function''' + + text = ''' + [Site "Belgrade, Serbia Yugoslavia|JUG"] + [Date "1234.32.32"] + + 1. e4 e5 2. Nf3 Nc6 + 3. Bb5 1-0''' + + games = pgn.loads(text) + assert len(games) == 1 + + def test_dumps_single(self): + '''Tests ``dumps`` function for a single game''' + game = game_fixture() + dump = pgn.dumps(game) + + assert dump == PGN_TEXT + + def test_dumps_multi(self): + '''Tests ``dumps`` function for a list of games''' + games = [game_fixture(), game_fixture()] + dump = pgn.dumps(games) + + assert dump == PGN_TEXT+'\n\n\n'+PGN_TEXT + + def test_dumps_special(self): + '''Tests ``dumps`` function with move commentary and null tag''' + game = pgn.PGNGame('XYZ') + game.moves = ['{comment}', 'e4', 'e5', '{in}', 'd4', '{lol}', '1-0'] + + dump = pgn.dumps(game) + first_expected = '[Event "XYZ"]\n[Site "?"]' + last_expected = '{comment} 1. e4 e5 {in} 2. d4 {lol} 1-0' + + assert dump.startswith(first_expected) + assert dump.endswith(last_expected) + +if __name__ == '__main__': + unittest.main()