HaileyStorm commited on
Commit
990da19
1 Parent(s): 22cfeab

Create filter_lichess_zst.py

Browse files
Files changed (1) hide show
  1. filter_lichess_zst.py +46 -0
filter_lichess_zst.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import chess
2
+ import chess.pgn
3
+ import csv
4
+ import sys
5
+
6
+ def process_pgn_stream(output_file):
7
+ with open(output_file, 'a', newline='') as csv_file:
8
+ csv_writer = csv.writer(csv_file)
9
+ csv_writer.writerow(['transcript'])
10
+
11
+ games_seen = 0
12
+ games_added = 0
13
+ while True:
14
+ game = chess.pgn.read_game(sys.stdin)
15
+ if game is None:
16
+ break
17
+ games_seen += 1
18
+
19
+ # Filter games based on the specified criteria
20
+ if (
21
+ game.headers['Result'] == '1-0' and
22
+ 'Rated' in game.headers['Event'] and
23
+ 1500 < int(game.headers['WhiteElo']) < 2400 and
24
+ 1400 < int(game.headers['BlackElo']) < 2800
25
+ ):
26
+ board = chess.Board()
27
+ moves = []
28
+ move_number = 1
29
+ for move in game.mainline_moves():
30
+ if board.turn == chess.WHITE:
31
+ moves.append(f"{move_number}.")
32
+ move_number += 1
33
+ san = board.san(move)
34
+ moves.append(san + " ")
35
+ board.push(board.parse_san(san))
36
+
37
+ if board.is_game_over() and board.result() == "1-0":
38
+ transcript = ''.join(moves)
39
+ csv_writer.writerow([transcript.rstrip()])
40
+ games_added += 1
41
+ if games_added % 100 == 0:
42
+ print(f"Added {games_added} of {games_seen} games.", file=sys.stderr)
43
+
44
+ # Usage example
45
+ output_file = './lichess_transcripts_phase2_stable.csv'
46
+ process_pgn_stream(output_file)