PAWN / scripts /generate_stockfish_data.py
thomas-schweich's picture
Fix outcome array dtype in ceiling computation, np.asarray in script
75cf8a6
#!/usr/bin/env python3
"""Generate Stockfish self-play data as UCI move sequences.
Runs a pool of single-threaded Stockfish engines in parallel, one per core.
Output format: one game per line, space-separated UCI moves followed by the result.
e2e4 e7e5 g1f3 b8c6 ... 1-0
Each tier uses MultiPV + softmax temperature sampling to produce diverse games
from deterministic Stockfish search. Seeds are hardcoded per tier so runs are
reproducible. Worker seeds are derived as tier_seed + worker_id.
Tiers (by node count):
nodes_0001: 128K games, 1 node (near-random)
nodes_0032: 128K games, 32 nodes
nodes_0128: 128K games, 128 nodes
nodes_0256: 128K games, 256 nodes
nodes_1024: 128K games, 1024 nodes (strongest)
Usage:
python scripts/generate_stockfish_data.py --stockfish ~/bin/stockfish --output data/stockfish/
python scripts/generate_stockfish_data.py --stockfish ~/bin/stockfish --output data/stockfish/ --tier nodes_0128
python scripts/generate_stockfish_data.py --stockfish ~/bin/stockfish --output data/stockfish/ --workers 8
"""
from __future__ import annotations
import argparse
import math
import os
import random
import subprocess
import sys
import time
from concurrent.futures import ProcessPoolExecutor
from multiprocessing import get_context
from pathlib import Path
# Hardcoded, non-conflicting seeds per tier. Worker i gets seed = tier_seed + i.
TIERS = [
{"name": "nodes_0001", "nodes": 1, "games": 128_000, "seed": 10_000},
{"name": "nodes_0032", "nodes": 32, "games": 128_000, "seed": 20_000},
{"name": "nodes_0128", "nodes": 128, "games": 128_000, "seed": 30_000},
{"name": "nodes_0256", "nodes": 256, "games": 128_000, "seed": 40_000},
{"name": "nodes_1024", "nodes": 1024, "games": 128_000, "seed": 50_000},
]
MULTI_PV = 5 # candidates per move during opening
TEMPERATURE = 1.0 # softmax temperature (higher = more random)
SAMPLE_PLIES = 15 # use MultiPV+temperature for the first 15 plies, then top-1
class StockfishEngine:
def __init__(self, path: str, hash_mb: int = 16, multi_pv: int = 1):
self.last_terminal: str | None = None # "checkmate", "stalemate", or None
self.proc = subprocess.Popen(
[path],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
text=True,
bufsize=1,
)
self._send("uci")
self._wait_for("uciok")
self._send(f"setoption name Hash value {hash_mb}")
self._send("setoption name Threads value 1")
if multi_pv > 1:
self._send(f"setoption name MultiPV value {multi_pv}")
self._send("isready")
self._wait_for("readyok")
def _send(self, cmd: str):
self.proc.stdin.write(cmd + "\n")
self.proc.stdin.flush()
def _wait_for(self, token: str) -> list[str]:
lines = []
while True:
line = self.proc.stdout.readline().strip()
lines.append(line)
if line.startswith(token):
return lines
def candidates(self, moves: list[str], nodes: int) -> list[tuple[str, float]]:
"""Return list of (uci_move, score_cp) from MultiPV search.
Side effect: sets self.last_terminal to "checkmate" or "stalemate"
when bestmove is (none), otherwise None.
"""
self.last_terminal = None
pos = "position startpos"
if moves:
pos += " moves " + " ".join(moves)
self._send(pos)
self._send(f"go nodes {nodes}")
lines = self._wait_for("bestmove")
# Parse info lines for multipv results. Keep the last (deepest) line
# for each multipv index.
best_by_pv: dict[int, tuple[str, float]] = {}
for line in lines:
if not line.startswith("info") or " multipv " not in line:
continue
parts = line.split()
try:
pv_idx = int(parts[parts.index("multipv") + 1])
# Parse score
si = parts.index("score")
if parts[si + 1] == "cp":
score = float(parts[si + 2])
elif parts[si + 1] == "mate":
mate_in = int(parts[si + 2])
score = 30_000.0 if mate_in > 0 else -30_000.0
else:
continue
# First move of the PV
pv_start = parts.index("pv")
move = parts[pv_start + 1]
best_by_pv[pv_idx] = (move, score)
except (ValueError, IndexError):
continue
if not best_by_pv:
# Fallback: parse bestmove directly
for line in lines:
if line.startswith("bestmove"):
parts = line.split()
m = parts[1] if len(parts) > 1 else None
if m and m != "(none)":
return [(m, 0.0)]
# No legal moves — distinguish checkmate from stalemate
# by checking the score in the info lines.
self.last_terminal = "stalemate" # default
for info_line in lines:
if not info_line.startswith("info") or "score" not in info_line:
continue
info_parts = info_line.split()
try:
si = info_parts.index("score")
if info_parts[si + 1] == "mate":
self.last_terminal = "checkmate"
except (ValueError, IndexError):
pass
return []
return []
# Return sorted by pv index
return [best_by_pv[k] for k in sorted(best_by_pv)]
def set_multi_pv(self, n: int):
self._send(f"setoption name MultiPV value {n}")
self._send("isready")
self._wait_for("readyok")
def new_game(self):
self._send("ucinewgame")
self._send("isready")
self._wait_for("readyok")
def close(self):
try:
self._send("quit")
self.proc.wait(timeout=5)
except Exception:
self.proc.kill()
def softmax_sample(
candidates: list[tuple[str, float]], temperature: float, rng: random.Random
) -> str | None:
"""Pick a move from candidates using softmax over centipawn scores."""
if not candidates:
return None
if len(candidates) == 1:
return candidates[0][0]
# temperature=0 → argmax (pick the highest-scoring move)
if temperature <= 0:
return max(candidates, key=lambda c: c[1])[0]
scores = [s for _, s in candidates]
# Shift for numerical stability
max_s = max(scores)
# Scale: 100 cp ~ 1 pawn. temperature=1.0 means 1-pawn difference ≈ e fold.
exps = [math.exp((s - max_s) / (100.0 * temperature)) for s in scores]
total = sum(exps)
probs = [e / total for e in exps]
r = rng.random()
cumulative = 0.0
for i, p in enumerate(probs):
cumulative += p
if r <= cumulative:
return candidates[i][0]
return candidates[-1][0]
def play_game(
engine: StockfishEngine,
nodes: int,
rng: random.Random,
temperature: float,
multi_pv: int,
sample_plies: int,
max_ply: int = 500,
) -> tuple[list[str], str]:
"""Play one self-play game with temperature sampling. Returns (moves_uci, result)."""
engine.new_game()
engine.set_multi_pv(multi_pv)
moves: list[str] = []
switched = False
for ply in range(max_ply):
# Switch to top-1 after the opening phase
if not switched and ply >= sample_plies:
engine.set_multi_pv(1)
switched = True
cands = engine.candidates(moves, nodes)
if switched:
# Top-1 mode: just take the best move
move = cands[0][0] if cands else None
else:
move = softmax_sample(cands, temperature, rng)
if move is None:
break
moves.append(move)
n = len(moves)
if n == 0:
return moves, "*"
if n >= max_ply:
result = "1/2-1/2"
elif engine.last_terminal == "checkmate":
# Side to move was checkmated
result = "0-1" if n % 2 == 0 else "1-0"
elif engine.last_terminal == "stalemate":
result = "1/2-1/2"
else:
result = "*"
return moves, result
def worker_generate(
stockfish_path: str,
nodes: int,
num_games: int,
hash_mb: int,
worker_id: int,
seed: int,
temperature: float,
multi_pv: int,
sample_plies: int,
) -> tuple[list[str], int, float]:
"""Worker: play num_games, return (lines, total_ply, elapsed)."""
rng = random.Random(seed)
engine = StockfishEngine(stockfish_path, hash_mb=hash_mb, multi_pv=multi_pv)
lines: list[str] = []
total_ply = 0
t0 = time.perf_counter()
for i in range(num_games):
moves, result = play_game(engine, nodes, rng, temperature, multi_pv, sample_plies)
total_ply += len(moves)
line = " ".join(moves) + " " + result
lines.append(line)
if (i + 1) % 500 == 0:
elapsed = time.perf_counter() - t0
rate = (i + 1) / elapsed
print(
f" [worker {worker_id:>2}] {i + 1:>6,}/{num_games:,} "
f"{rate:.1f} games/s",
flush=True,
)
elapsed = time.perf_counter() - t0
engine.close()
return lines, total_ply, elapsed
def generate_tier(
stockfish_path: str,
output_dir: Path,
tier: dict,
num_workers: int,
hash_mb: int,
temperature: float,
multi_pv: int,
sample_plies: int,
):
nodes = tier["nodes"]
total_games = tier["games"]
tier_seed = tier["seed"]
name = tier["name"]
out_path = output_dir / f"{name}.txt"
# Distribute games across workers
base = total_games // num_workers
remainder = total_games % num_workers
per_worker = [base + (1 if i < remainder else 0) for i in range(num_workers)]
print(f"\n{'=' * 60}")
print(f"Generating {total_games:,} games: {name} (nodes={nodes})")
print(f"Workers: {num_workers}, games/worker: ~{base}")
print(f"MultiPV: {multi_pv} for first {sample_plies} plies, then top-1; temperature: {temperature}")
print(f"Seed base: {tier_seed} (workers {tier_seed}..{tier_seed + num_workers - 1})")
print(f"Output: {out_path}")
print(f"{'=' * 60}")
ctx = get_context("spawn")
wall_t0 = time.perf_counter()
with ProcessPoolExecutor(max_workers=num_workers, mp_context=ctx) as pool:
futures = [
pool.submit(
worker_generate,
stockfish_path,
nodes,
per_worker[i],
hash_mb,
i,
tier_seed + i,
temperature,
multi_pv,
sample_plies,
)
for i in range(num_workers)
]
results = [f.result() for f in futures]
wall_elapsed = time.perf_counter() - wall_t0
# Collect and write
total_ply = 0
total_written = 0
with open(out_path, "w") as f:
for lines, ply, _ in results:
for line in lines:
f.write(line + "\n")
total_ply += ply
total_written += len(lines)
avg_ply = total_ply / total_written if total_written else 0
rate = total_written / wall_elapsed
print(f"\n Done: {total_written:,} games in {wall_elapsed / 60:.1f}m")
print(f" Rate: {rate:.1f} games/s Avg ply: {avg_ply:.0f}")
print(f" File: {out_path} ({out_path.stat().st_size / 1e6:.1f} MB)")
return out_path
def main():
parser = argparse.ArgumentParser(
description="Generate Stockfish self-play UCI data (parallel)"
)
parser.add_argument(
"--stockfish",
type=str,
default=os.path.expanduser("~/bin/stockfish"),
help="Path to stockfish binary",
)
parser.add_argument(
"--output", type=str, default="data/stockfish", help="Output directory"
)
parser.add_argument(
"--tier",
type=str,
default=None,
help="Only generate this tier (e.g. nodes_0128)",
)
parser.add_argument(
"--workers", type=int, default=14, help="Number of parallel engines"
)
parser.add_argument(
"--games", type=int, default=None, help="Override number of games per tier"
)
parser.add_argument(
"--hash", type=int, default=16, help="Hash table MB per engine"
)
parser.add_argument(
"--temperature", type=float, default=TEMPERATURE, help="Softmax temperature"
)
parser.add_argument(
"--multi-pv", type=int, default=MULTI_PV, help="MultiPV candidates per move"
)
parser.add_argument(
"--sample-plies", type=int, default=SAMPLE_PLIES,
help="Use MultiPV+temperature for the first N plies, then top-1"
)
args = parser.parse_args()
sf_path = os.path.expanduser(args.stockfish)
if not os.path.isfile(sf_path):
print(f"ERROR: Stockfish not found at {sf_path}")
sys.exit(1)
output_dir = Path(args.output)
output_dir.mkdir(parents=True, exist_ok=True)
tiers = TIERS
if args.tier is not None:
matched = [t for t in TIERS if t["name"] == args.tier]
if not matched:
valid = ", ".join(t["name"] for t in TIERS)
print(f"ERROR: unknown tier '{args.tier}'. Valid: {valid}")
sys.exit(1)
tiers = matched
for tier in tiers:
if args.games is not None:
tier = {**tier, "games": args.games}
generate_tier(
sf_path, output_dir, tier, args.workers, args.hash,
args.temperature, args.multi_pv, args.sample_plies,
)
print(f"\nAll done. Files in {output_dir}/")
if __name__ == "__main__":
main()