diff --git a/src/.DS_Store b/src/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..dc01b496a4ed7f333a7d9953665381ebfa5bed77 Binary files /dev/null and b/src/.DS_Store differ diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..917bd5c06c7a83f9d4c48f9a531df196b0cb2d9d --- /dev/null +++ b/src/Makefile @@ -0,0 +1,1009 @@ +# Stockfish, a UCI chess playing engine derived from Glaurung 2.1 +# Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) +# +# Stockfish is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Stockfish is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +### ========================================================================== +### Section 1. General Configuration +### ========================================================================== + +### Establish the operating system name +KERNEL = $(shell uname -s) +ifeq ($(KERNEL),Linux) + OS = $(shell uname -o) +endif + +### Target Windows OS +ifeq ($(OS),Windows_NT) + ifneq ($(COMP),ndk) + target_windows = yes + endif +else ifeq ($(COMP),mingw) + target_windows = yes + ifeq ($(WINE_PATH),) + WINE_PATH = $(shell which wine) + endif +endif + +### Executable name +ifeq ($(target_windows),yes) + EXE = stockfish.exe +else + EXE = stockfish +endif + +### Installation dir definitions +PREFIX = /usr/local +BINDIR = $(PREFIX)/bin + +### Built-in benchmark for pgo-builds +ifeq ($(SDE_PATH),) + PGOBENCH = $(WINE_PATH) ./$(EXE) bench +else + PGOBENCH = $(SDE_PATH) -- $(WINE_PATH) ./$(EXE) bench +endif + +### Source and object files +SRCS = benchmark.cpp bitbase.cpp bitboard.cpp endgame.cpp evaluate.cpp main.cpp \ + material.cpp misc.cpp movegen.cpp movepick.cpp pawns.cpp position.cpp psqt.cpp \ + search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp \ + nnue/evaluate_nnue.cpp nnue/features/half_ka_v2_hm.cpp + +OBJS = $(notdir $(SRCS:.cpp=.o)) + +VPATH = syzygy:nnue:nnue/features + +### ========================================================================== +### Section 2. High-level Configuration +### ========================================================================== +# +# flag --- Comp switch --- Description +# ---------------------------------------------------------------------------- +# +# debug = yes/no --- -DNDEBUG --- Enable/Disable debug mode +# sanitize = none/ ... (-fsanitize ) +# --- ( undefined ) --- enable undefined behavior checks +# --- ( thread ) --- enable threading error checks +# --- ( address ) --- enable memory access checks +# --- ...etc... --- see compiler documentation for supported sanitizers +# optimize = yes/no --- (-O3/-fast etc.) --- Enable/Disable optimizations +# arch = (name) --- (-arch) --- Target architecture +# bits = 64/32 --- -DIS_64BIT --- 64-/32-bit operating system +# prefetch = yes/no --- -DUSE_PREFETCH --- Use prefetch asm-instruction +# popcnt = yes/no --- -DUSE_POPCNT --- Use popcnt asm-instruction +# pext = yes/no --- -DUSE_PEXT --- Use pext x86_64 asm-instruction +# sse = yes/no --- -msse --- Use Intel Streaming SIMD Extensions +# mmx = yes/no --- -mmmx --- Use Intel MMX instructions +# sse2 = yes/no --- -msse2 --- Use Intel Streaming SIMD Extensions 2 +# ssse3 = yes/no --- -mssse3 --- Use Intel Supplemental Streaming SIMD Extensions 3 +# sse41 = yes/no --- -msse4.1 --- Use Intel Streaming SIMD Extensions 4.1 +# avx2 = yes/no --- -mavx2 --- Use Intel Advanced Vector Extensions 2 +# avxvnni = yes/no --- -mavxvnni --- Use Intel Vector Neural Network Instructions AVX +# avx512 = yes/no --- -mavx512bw --- Use Intel Advanced Vector Extensions 512 +# vnni256 = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 256 +# vnni512 = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 512 +# neon = yes/no --- -DUSE_NEON --- Use ARM SIMD architecture +# +# Note that Makefile is space sensitive, so when adding new architectures +# or modifying existing flags, you have to make sure there are no extra spaces +# at the end of the line for flag values. +# +# Example of use for these flags: +# make build ARCH=x86-64-avx512 debug=yes sanitize="address undefined" + + +### 2.1. General and architecture defaults + +ifeq ($(ARCH),) + ARCH = x86-64-modern + help_skip_sanity = yes +endif +# explicitly check for the list of supported architectures (as listed with make help), +# the user can override with `make ARCH=x86-32-vnni256 SUPPORTED_ARCH=true` +ifeq ($(ARCH), $(filter $(ARCH), \ + x86-64-vnni512 x86-64-vnni256 x86-64-avx512 x86-64-avxvnni x86-64-bmi2 \ + x86-64-avx2 x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \ + x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-32 e2k \ + armv7 armv7-neon armv8 apple-silicon general-64 general-32 riscv64)) + SUPPORTED_ARCH=true +else + SUPPORTED_ARCH=false +endif + +optimize = yes +debug = no +sanitize = none +bits = 64 +prefetch = no +popcnt = no +pext = no +sse = no +mmx = no +sse2 = no +ssse3 = no +sse41 = no +avx2 = no +avxvnni = no +avx512 = no +vnni256 = no +vnni512 = no +neon = no +arm_version = 0 +STRIP = strip + +### 2.2 Architecture specific + +ifeq ($(findstring x86,$(ARCH)),x86) + +# x86-32/64 + +ifeq ($(findstring x86-32,$(ARCH)),x86-32) + arch = i386 + bits = 32 + sse = no + mmx = yes +else + arch = x86_64 + sse = yes + sse2 = yes +endif + +ifeq ($(findstring -sse,$(ARCH)),-sse) + sse = yes +endif + +ifeq ($(findstring -popcnt,$(ARCH)),-popcnt) + popcnt = yes +endif + +ifeq ($(findstring -mmx,$(ARCH)),-mmx) + mmx = yes +endif + +ifeq ($(findstring -sse2,$(ARCH)),-sse2) + sse = yes + sse2 = yes +endif + +ifeq ($(findstring -ssse3,$(ARCH)),-ssse3) + sse = yes + sse2 = yes + ssse3 = yes +endif + +ifeq ($(findstring -sse41,$(ARCH)),-sse41) + sse = yes + sse2 = yes + ssse3 = yes + sse41 = yes +endif + +ifeq ($(findstring -modern,$(ARCH)),-modern) + popcnt = yes + sse = yes + sse2 = yes + ssse3 = yes + sse41 = yes +endif + +ifeq ($(findstring -avx2,$(ARCH)),-avx2) + popcnt = yes + sse = yes + sse2 = yes + ssse3 = yes + sse41 = yes + avx2 = yes +endif + +ifeq ($(findstring -avxvnni,$(ARCH)),-avxvnni) + popcnt = yes + sse = yes + sse2 = yes + ssse3 = yes + sse41 = yes + avx2 = yes + avxvnni = yes + pext = yes +endif + +ifeq ($(findstring -bmi2,$(ARCH)),-bmi2) + popcnt = yes + sse = yes + sse2 = yes + ssse3 = yes + sse41 = yes + avx2 = yes + pext = yes +endif + +ifeq ($(findstring -avx512,$(ARCH)),-avx512) + popcnt = yes + sse = yes + sse2 = yes + ssse3 = yes + sse41 = yes + avx2 = yes + pext = yes + avx512 = yes +endif + +ifeq ($(findstring -vnni256,$(ARCH)),-vnni256) + popcnt = yes + sse = yes + sse2 = yes + ssse3 = yes + sse41 = yes + avx2 = yes + pext = yes + vnni256 = yes +endif + +ifeq ($(findstring -vnni512,$(ARCH)),-vnni512) + popcnt = yes + sse = yes + sse2 = yes + ssse3 = yes + sse41 = yes + avx2 = yes + pext = yes + avx512 = yes + vnni512 = yes +endif + +ifeq ($(sse),yes) + prefetch = yes +endif + +# 64-bit pext is not available on x86-32 +ifeq ($(bits),32) + pext = no +endif + +else + +# all other architectures + +ifeq ($(ARCH),general-32) + arch = any + bits = 32 +endif + +ifeq ($(ARCH),general-64) + arch = any +endif + +ifeq ($(ARCH),armv7) + arch = armv7 + prefetch = yes + bits = 32 + arm_version = 7 +endif + +ifeq ($(ARCH),armv7-neon) + arch = armv7 + prefetch = yes + popcnt = yes + neon = yes + bits = 32 + arm_version = 7 +endif + +ifeq ($(ARCH),armv8) + arch = armv8 + prefetch = yes + popcnt = yes + neon = yes + arm_version = 8 +endif + +ifeq ($(ARCH),apple-silicon) + arch = arm64 + prefetch = yes + popcnt = yes + neon = yes + arm_version = 8 +endif + +ifeq ($(ARCH),ppc-32) + arch = ppc + bits = 32 +endif + +ifeq ($(ARCH),ppc-64) + arch = ppc64 + popcnt = yes + prefetch = yes +endif + +ifeq ($(findstring e2k,$(ARCH)),e2k) + arch = e2k + mmx = yes + bits = 64 + sse = yes + sse2 = yes + ssse3 = yes + sse41 = yes + popcnt = yes +endif + +ifeq ($(ARCH),riscv64) + arch = riscv64 +endif +endif + + +### ========================================================================== +### Section 3. Low-level Configuration +### ========================================================================== + +### 3.1 Selecting compiler (default = gcc) +ifeq ($(MAKELEVEL),0) + export ENV_CXXFLAGS := $(CXXFLAGS) + export ENV_DEPENDFLAGS := $(DEPENDFLAGS) + export ENV_LDFLAGS := $(LDFLAGS) +endif + +CXXFLAGS = $(ENV_CXXFLAGS) -Wall -Wcast-qual -fno-exceptions -std=c++17 $(EXTRACXXFLAGS) +DEPENDFLAGS = $(ENV_DEPENDFLAGS) -std=c++17 +LDFLAGS = $(ENV_LDFLAGS) $(EXTRALDFLAGS) + +ifeq ($(COMP),) + COMP=gcc +endif + +ifeq ($(COMP),gcc) + comp=gcc + CXX=g++ + CXXFLAGS += -pedantic -Wextra -Wshadow + + ifeq ($(arch),$(filter $(arch),armv7 armv8 riscv64)) + ifeq ($(OS),Android) + CXXFLAGS += -m$(bits) + LDFLAGS += -m$(bits) + endif + ifeq ($(ARCH),riscv64) + CXXFLAGS += -latomic + endif + else + CXXFLAGS += -m$(bits) + LDFLAGS += -m$(bits) + endif + + ifeq ($(arch),$(filter $(arch),armv7)) + LDFLAGS += -latomic + endif + + ifneq ($(KERNEL),Darwin) + LDFLAGS += -Wl,--no-as-needed + endif +endif + +ifeq ($(target_windows),yes) + LDFLAGS += -static +endif + +ifeq ($(COMP),mingw) + comp=mingw + + ifeq ($(bits),64) + ifeq ($(shell which x86_64-w64-mingw32-c++-posix 2> /dev/null),) + CXX=x86_64-w64-mingw32-c++ + else + CXX=x86_64-w64-mingw32-c++-posix + endif + else + ifeq ($(shell which i686-w64-mingw32-c++-posix 2> /dev/null),) + CXX=i686-w64-mingw32-c++ + else + CXX=i686-w64-mingw32-c++-posix + endif + endif + CXXFLAGS += -pedantic -Wextra -Wshadow +endif + +ifeq ($(COMP),icc) + comp=icc + CXX=icpc + CXXFLAGS += -diag-disable 1476,10120 -Wcheck -Wabi -Wdeprecated -strict-ansi +endif + +ifeq ($(COMP),clang) + comp=clang + CXX=clang++ + ifeq ($(target_windows),yes) + CXX=x86_64-w64-mingw32-clang++ + endif + + CXXFLAGS += -pedantic -Wextra -Wshadow + + ifeq ($(filter $(KERNEL),Darwin OpenBSD FreeBSD),) + ifeq ($(target_windows),) + ifneq ($(RTLIB),compiler-rt) + LDFLAGS += -latomic + endif + endif + endif + + ifeq ($(arch),$(filter $(arch),armv7 armv8 riscv64)) + ifeq ($(OS),Android) + CXXFLAGS += -m$(bits) + LDFLAGS += -m$(bits) + endif + ifeq ($(ARCH),riscv64) + CXXFLAGS += -latomic + endif + else + CXXFLAGS += -m$(bits) + LDFLAGS += -m$(bits) + endif +endif + +ifeq ($(KERNEL),Darwin) + CXXFLAGS += -mmacosx-version-min=10.14 + LDFLAGS += -mmacosx-version-min=10.14 + ifneq ($(arch),any) + CXXFLAGS += -arch $(arch) + LDFLAGS += -arch $(arch) + endif + XCRUN = xcrun +endif + +# To cross-compile for Android, NDK version r21 or later is recommended. +# In earlier NDK versions, you'll need to pass -fno-addrsig if using GNU binutils. +# Currently we don't know how to make PGO builds with the NDK yet. +ifeq ($(COMP),ndk) + CXXFLAGS += -stdlib=libc++ -fPIE + comp=clang + ifeq ($(arch),armv7) + CXX=armv7a-linux-androideabi16-clang++ + CXXFLAGS += -mthumb -march=armv7-a -mfloat-abi=softfp -mfpu=neon + ifneq ($(shell which arm-linux-androideabi-strip 2>/dev/null),) + STRIP=arm-linux-androideabi-strip + else + STRIP=llvm-strip + endif + endif + ifeq ($(arch),armv8) + CXX=aarch64-linux-android21-clang++ + ifneq ($(shell which aarch64-linux-android-strip 2>/dev/null),) + STRIP=aarch64-linux-android-strip + else + STRIP=llvm-strip + endif + endif + LDFLAGS += -static-libstdc++ -pie -lm -latomic +endif + +ifeq ($(comp),icc) + profile_make = icc-profile-make + profile_use = icc-profile-use +else ifeq ($(comp),clang) + profile_make = clang-profile-make + profile_use = clang-profile-use +else + profile_make = gcc-profile-make + profile_use = gcc-profile-use + ifeq ($(KERNEL),Darwin) + EXTRAPROFILEFLAGS = -fvisibility=hidden + endif +endif + +### Travis CI script uses COMPILER to overwrite CXX +ifdef COMPILER + COMPCXX=$(COMPILER) +endif + +### Allow overwriting CXX from command line +ifdef COMPCXX + CXX=$(COMPCXX) +endif + +### Sometimes gcc is really clang +ifeq ($(COMP),gcc) + gccversion = $(shell $(CXX) --version) + gccisclang = $(findstring clang,$(gccversion)) + ifneq ($(gccisclang),) + profile_make = clang-profile-make + profile_use = clang-profile-use + endif +endif + +### On mingw use Windows threads, otherwise POSIX +ifneq ($(comp),mingw) + CXXFLAGS += -DUSE_PTHREADS + # On Android Bionic's C library comes with its own pthread implementation bundled in + ifneq ($(OS),Android) + # Haiku has pthreads in its libroot, so only link it in on other platforms + ifneq ($(KERNEL),Haiku) + ifneq ($(COMP),ndk) + LDFLAGS += -lpthread + endif + endif + endif +endif + +### 3.2.1 Debugging +ifeq ($(debug),no) + CXXFLAGS += -DNDEBUG +else + CXXFLAGS += -g +endif + +### 3.2.2 Debugging with undefined behavior sanitizers +ifneq ($(sanitize),none) + CXXFLAGS += -g3 $(addprefix -fsanitize=,$(sanitize)) + LDFLAGS += $(addprefix -fsanitize=,$(sanitize)) +endif + +### 3.3 Optimization +ifeq ($(optimize),yes) + + CXXFLAGS += -O3 + + ifeq ($(comp),gcc) + ifeq ($(OS), Android) + CXXFLAGS += -fno-gcse -mthumb -march=armv7-a -mfloat-abi=softfp + endif + endif + + ifeq ($(KERNEL),Darwin) + ifeq ($(comp),$(filter $(comp),clang icc)) + CXXFLAGS += -mdynamic-no-pic + endif + + ifeq ($(comp),gcc) + ifneq ($(arch),arm64) + CXXFLAGS += -mdynamic-no-pic + endif + endif + endif + + ifeq ($(comp),clang) + CXXFLAGS += -fexperimental-new-pass-manager + endif +endif + +### 3.4 Bits +ifeq ($(bits),64) + CXXFLAGS += -DIS_64BIT +endif + +### 3.5 prefetch and popcount +ifeq ($(prefetch),yes) + ifeq ($(sse),yes) + CXXFLAGS += -msse + endif +else + CXXFLAGS += -DNO_PREFETCH +endif + +ifeq ($(popcnt),yes) + ifeq ($(arch),$(filter $(arch),ppc64 armv7 armv8 arm64)) + CXXFLAGS += -DUSE_POPCNT + else ifeq ($(comp),icc) + CXXFLAGS += -msse3 -DUSE_POPCNT + else + CXXFLAGS += -msse3 -mpopcnt -DUSE_POPCNT + endif +endif + +### 3.6 SIMD architectures +ifeq ($(avx2),yes) + CXXFLAGS += -DUSE_AVX2 + ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + CXXFLAGS += -mavx2 -mbmi + endif +endif + +ifeq ($(avxvnni),yes) + CXXFLAGS += -DUSE_VNNI -DUSE_AVXVNNI + ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + CXXFLAGS += -mavxvnni + endif +endif + +ifeq ($(avx512),yes) + CXXFLAGS += -DUSE_AVX512 + ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + CXXFLAGS += -mavx512f -mavx512bw + endif +endif + +ifeq ($(vnni256),yes) + CXXFLAGS += -DUSE_VNNI + ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + CXXFLAGS += -mavx512f -mavx512bw -mavx512vnni -mavx512dq -mavx512vl -mprefer-vector-width=256 + endif +endif + +ifeq ($(vnni512),yes) + CXXFLAGS += -DUSE_VNNI + ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + CXXFLAGS += -mavx512vnni -mavx512dq -mavx512vl + endif +endif + +ifeq ($(sse41),yes) + CXXFLAGS += -DUSE_SSE41 + ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + CXXFLAGS += -msse4.1 + endif +endif + +ifeq ($(ssse3),yes) + CXXFLAGS += -DUSE_SSSE3 + ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + CXXFLAGS += -mssse3 + endif +endif + +ifeq ($(sse2),yes) + CXXFLAGS += -DUSE_SSE2 + ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + CXXFLAGS += -msse2 + endif +endif + +ifeq ($(mmx),yes) + CXXFLAGS += -DUSE_MMX + ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + CXXFLAGS += -mmmx + endif +endif + +ifeq ($(neon),yes) + CXXFLAGS += -DUSE_NEON=$(arm_version) + ifeq ($(KERNEL),Linux) + ifneq ($(COMP),ndk) + ifneq ($(arch),armv8) + CXXFLAGS += -mfpu=neon + endif + endif + endif +endif + +### 3.7 pext +ifeq ($(pext),yes) + CXXFLAGS += -DUSE_PEXT + ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + CXXFLAGS += -mbmi2 + endif +endif + +### 3.7.1 Try to include git commit sha for versioning +GIT_SHA = $(shell git rev-parse --short HEAD 2>/dev/null) +ifneq ($(GIT_SHA), ) + CXXFLAGS += -DGIT_SHA=\"$(GIT_SHA)\" +endif + +### 3.7.2 Try to include git commit date for versioning +GIT_DATE = $(shell git show -s --date=format:'%Y%m%d' --format=%cd HEAD 2>/dev/null) +ifneq ($(GIT_DATE), ) + CXXFLAGS += -DGIT_DATE=\"$(GIT_DATE)\" +endif + +### 3.8 Link Time Optimization +### This is a mix of compile and link time options because the lto link phase +### needs access to the optimization flags. +ifeq ($(optimize),yes) +ifeq ($(debug), no) + ifeq ($(comp),clang) + CXXFLAGS += -flto=full + ifeq ($(target_windows),yes) + CXXFLAGS += -fuse-ld=lld + endif + LDFLAGS += $(CXXFLAGS) + +# GCC and CLANG use different methods for parallelizing LTO and CLANG pretends to be +# GCC on some systems. + else ifeq ($(comp),gcc) + ifeq ($(gccisclang),) + CXXFLAGS += -flto -flto-partition=one + LDFLAGS += $(CXXFLAGS) -flto=jobserver + else + CXXFLAGS += -flto=full + LDFLAGS += $(CXXFLAGS) + endif + +# To use LTO and static linking on Windows, +# the tool chain requires gcc version 10.1 or later. + else ifeq ($(comp),mingw) + CXXFLAGS += -flto -flto-partition=one + LDFLAGS += $(CXXFLAGS) -save-temps + endif +endif +endif + +### 3.9 Android 5 can only run position independent executables. Note that this +### breaks Android 4.0 and earlier. +ifeq ($(OS), Android) + CXXFLAGS += -fPIE + LDFLAGS += -fPIE -pie +endif + +### ========================================================================== +### Section 4. Public Targets +### ========================================================================== + + +help: + @echo "" + @echo "To compile stockfish, type: " + @echo "" + @echo "make target ARCH=arch [COMP=compiler] [COMPCXX=cxx]" + @echo "" + @echo "Supported targets:" + @echo "" + @echo "help > Display architecture details" + @echo "build > Standard build" + @echo "net > Download the default nnue net" + @echo "profile-build > Faster build (with profile-guided optimization)" + @echo "strip > Strip executable" + @echo "install > Install executable" + @echo "clean > Clean up" + @echo "" + @echo "Supported archs:" + @echo "" + @echo "x86-64-vnni512 > x86 64-bit with vnni support 512bit wide" + @echo "x86-64-vnni256 > x86 64-bit with vnni support 256bit wide" + @echo "x86-64-avx512 > x86 64-bit with avx512 support" + @echo "x86-64-avxvnni > x86 64-bit with avxvnni support" + @echo "x86-64-bmi2 > x86 64-bit with bmi2 support" + @echo "x86-64-avx2 > x86 64-bit with avx2 support" + @echo "x86-64-sse41-popcnt > x86 64-bit with sse41 and popcnt support" + @echo "x86-64-modern > common modern CPU, currently x86-64-sse41-popcnt" + @echo "x86-64-ssse3 > x86 64-bit with ssse3 support" + @echo "x86-64-sse3-popcnt > x86 64-bit with sse3 and popcnt support" + @echo "x86-64 > x86 64-bit generic (with sse2 support)" + @echo "x86-32-sse41-popcnt > x86 32-bit with sse41 and popcnt support" + @echo "x86-32-sse2 > x86 32-bit with sse2 support" + @echo "x86-32 > x86 32-bit generic (with mmx and sse support)" + @echo "ppc-64 > PPC 64-bit" + @echo "ppc-32 > PPC 32-bit" + @echo "armv7 > ARMv7 32-bit" + @echo "armv7-neon > ARMv7 32-bit with popcnt and neon" + @echo "armv8 > ARMv8 64-bit with popcnt and neon" + @echo "e2k > Elbrus 2000" + @echo "apple-silicon > Apple silicon ARM64" + @echo "general-64 > unspecified 64-bit" + @echo "general-32 > unspecified 32-bit" + @echo "riscv64 > RISC-V 64-bit" + @echo "" + @echo "Supported compilers:" + @echo "" + @echo "gcc > Gnu compiler (default)" + @echo "mingw > Gnu compiler with MinGW under Windows" + @echo "clang > LLVM Clang compiler" + @echo "icc > Intel compiler" + @echo "ndk > Google NDK to cross-compile for Android" + @echo "" + @echo "Simple examples. If you don't know what to do, you likely want to run: " + @echo "" + @echo "make -j build ARCH=x86-64 (A portable, slow compile for 64-bit systems)" + @echo "make -j build ARCH=x86-32 (A portable, slow compile for 32-bit systems)" + @echo "" + @echo "Advanced examples, for experienced users looking for performance: " + @echo "" + @echo "make help ARCH=x86-64-bmi2" + @echo "make -j profile-build ARCH=x86-64-bmi2 COMP=gcc COMPCXX=g++-9.0" + @echo "make -j build ARCH=x86-64-ssse3 COMP=clang" + @echo "" + @echo "-------------------------------" +ifeq ($(SUPPORTED_ARCH)$(help_skip_sanity), true) + @echo "The selected architecture $(ARCH) will enable the following configuration: " + @$(MAKE) ARCH=$(ARCH) COMP=$(COMP) config-sanity +else + @echo "Specify a supported architecture with the ARCH option for more details" + @echo "" +endif + + +.PHONY: help build profile-build strip install clean net objclean profileclean \ + config-sanity icc-profile-use icc-profile-make gcc-profile-use gcc-profile-make \ + clang-profile-use clang-profile-make FORCE + +build: net config-sanity + $(MAKE) ARCH=$(ARCH) COMP=$(COMP) all + +profile-build: net config-sanity objclean profileclean + @echo "" + @echo "Step 1/4. Building instrumented executable ..." + $(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_make) + @echo "" + @echo "Step 2/4. Running benchmark for pgo-build ..." + $(PGOBENCH) 2>&1 | tail -n 4 + @echo "" + @echo "Step 3/4. Building optimized executable ..." + $(MAKE) ARCH=$(ARCH) COMP=$(COMP) objclean + $(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_use) + @echo "" + @echo "Step 4/4. Deleting profile data ..." + $(MAKE) ARCH=$(ARCH) COMP=$(COMP) profileclean + +strip: + $(STRIP) $(EXE) + +install: + -mkdir -p -m 755 $(BINDIR) + -cp $(EXE) $(BINDIR) + $(STRIP) $(BINDIR)/$(EXE) + +# clean all +clean: objclean profileclean + @rm -f .depend *~ core + +# evaluation network (nnue) +net: + $(eval nnuenet := $(shell grep EvalFileDefaultName evaluate.h | grep define | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/')) + @echo "Default net: $(nnuenet)" + $(eval nnuedownloadurl1 := https://tests.stockfishchess.org/api/nn/$(nnuenet)) + $(eval nnuedownloadurl2 := https://github.com/official-stockfish/networks/raw/master/$(nnuenet)) + $(eval curl_or_wget := $(shell if hash curl 2>/dev/null; then echo "curl -skL"; elif hash wget 2>/dev/null; then echo "wget -qO-"; fi)) + @if [ "x$(curl_or_wget)" = "x" ]; then \ + echo "Automatic download failed: neither curl nor wget is installed. Install one of these tools or download the net manually"; exit 1; \ + fi + $(eval shasum_command := $(shell if hash shasum 2>/dev/null; then echo "shasum -a 256 "; elif hash sha256sum 2>/dev/null; then echo "sha256sum "; fi)) + @if [ "x$(shasum_command)" = "x" ]; then \ + echo "shasum / sha256sum not found, skipping net validation"; \ + fi + @for nnuedownloadurl in "$(nnuedownloadurl1)" "$(nnuedownloadurl2)"; do \ + if test -f "$(nnuenet)"; then \ + echo "$(nnuenet) available."; \ + else \ + if [ "x$(curl_or_wget)" != "x" ]; then \ + echo "Downloading $${nnuedownloadurl}"; $(curl_or_wget) $${nnuedownloadurl} > $(nnuenet);\ + fi; \ + fi; \ + if [ "x$(shasum_command)" != "x" ]; then \ + if [ "$(nnuenet)" != "nn-"`$(shasum_command) $(nnuenet) | cut -c1-12`".nnue" ]; then \ + echo "Removing failed download"; rm -f $(nnuenet); \ + else \ + echo "Network validated"; break; \ + fi; \ + fi; \ + done + @if ! test -f "$(nnuenet)"; then \ + echo "Failed to download $(nnuenet)."; \ + fi + +# clean binaries and objects +objclean: + @rm -f stockfish stockfish.exe *.o ./syzygy/*.o ./nnue/*.o ./nnue/features/*.o + +# clean auxiliary profiling files +profileclean: + @rm -rf profdir + @rm -f bench.txt *.gcda *.gcno ./syzygy/*.gcda ./nnue/*.gcda ./nnue/features/*.gcda *.s + @rm -f stockfish.profdata *.profraw + @rm -f stockfish.*args* + @rm -f stockfish.*lt* + @rm -f stockfish.res + @rm -f ./-lstdc++.res + +default: + help + +### ========================================================================== +### Section 5. Private Targets +### ========================================================================== + +all: $(EXE) .depend + +config-sanity: net + @echo "" + @echo "Config:" + @echo "debug: '$(debug)'" + @echo "sanitize: '$(sanitize)'" + @echo "optimize: '$(optimize)'" + @echo "arch: '$(arch)'" + @echo "bits: '$(bits)'" + @echo "kernel: '$(KERNEL)'" + @echo "os: '$(OS)'" + @echo "prefetch: '$(prefetch)'" + @echo "popcnt: '$(popcnt)'" + @echo "pext: '$(pext)'" + @echo "sse: '$(sse)'" + @echo "mmx: '$(mmx)'" + @echo "sse2: '$(sse2)'" + @echo "ssse3: '$(ssse3)'" + @echo "sse41: '$(sse41)'" + @echo "avx2: '$(avx2)'" + @echo "avxvnni: '$(avxvnni)'" + @echo "avx512: '$(avx512)'" + @echo "vnni256: '$(vnni256)'" + @echo "vnni512: '$(vnni512)'" + @echo "neon: '$(neon)'" + @echo "arm_version: '$(arm_version)'" + @echo "" + @echo "Flags:" + @echo "CXX: $(CXX)" + @echo "CXXFLAGS: $(CXXFLAGS)" + @echo "LDFLAGS: $(LDFLAGS)" + @echo "" + @echo "Testing config sanity. If this fails, try 'make help' ..." + @echo "" + @test "$(debug)" = "yes" || test "$(debug)" = "no" + @test "$(optimize)" = "yes" || test "$(optimize)" = "no" + @test "$(SUPPORTED_ARCH)" = "true" + @test "$(arch)" = "any" || test "$(arch)" = "x86_64" || test "$(arch)" = "i386" || \ + test "$(arch)" = "ppc64" || test "$(arch)" = "ppc" || test "$(arch)" = "e2k" || \ + test "$(arch)" = "armv7" || test "$(arch)" = "armv8" || test "$(arch)" = "arm64" || test "$(arch)" = "riscv64" + @test "$(bits)" = "32" || test "$(bits)" = "64" + @test "$(prefetch)" = "yes" || test "$(prefetch)" = "no" + @test "$(popcnt)" = "yes" || test "$(popcnt)" = "no" + @test "$(pext)" = "yes" || test "$(pext)" = "no" + @test "$(sse)" = "yes" || test "$(sse)" = "no" + @test "$(mmx)" = "yes" || test "$(mmx)" = "no" + @test "$(sse2)" = "yes" || test "$(sse2)" = "no" + @test "$(ssse3)" = "yes" || test "$(ssse3)" = "no" + @test "$(sse41)" = "yes" || test "$(sse41)" = "no" + @test "$(avx2)" = "yes" || test "$(avx2)" = "no" + @test "$(avx512)" = "yes" || test "$(avx512)" = "no" + @test "$(vnni256)" = "yes" || test "$(vnni256)" = "no" + @test "$(vnni512)" = "yes" || test "$(vnni512)" = "no" + @test "$(neon)" = "yes" || test "$(neon)" = "no" + @test "$(comp)" = "gcc" || test "$(comp)" = "icc" || test "$(comp)" = "mingw" || test "$(comp)" = "clang" \ + || test "$(comp)" = "armv7a-linux-androideabi16-clang" || test "$(comp)" = "aarch64-linux-android21-clang" + +$(EXE): $(OBJS) + +$(CXX) -o $@ $(OBJS) $(LDFLAGS) + +# Force recompilation to ensure version info is up-to-date +misc.o: FORCE +FORCE: + +clang-profile-make: + $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \ + EXTRACXXFLAGS='-fprofile-instr-generate ' \ + EXTRALDFLAGS=' -fprofile-instr-generate' \ + all + +clang-profile-use: + $(XCRUN) llvm-profdata merge -output=stockfish.profdata *.profraw + $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \ + EXTRACXXFLAGS='-fprofile-instr-use=stockfish.profdata' \ + EXTRALDFLAGS='-fprofile-use ' \ + all + +gcc-profile-make: + @mkdir -p profdir + $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \ + EXTRACXXFLAGS='-fprofile-generate=profdir' \ + EXTRACXXFLAGS+=$(EXTRAPROFILEFLAGS) \ + EXTRALDFLAGS='-lgcov' \ + all + +gcc-profile-use: + $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \ + EXTRACXXFLAGS='-fprofile-use=profdir -fno-peel-loops -fno-tracer' \ + EXTRACXXFLAGS+=$(EXTRAPROFILEFLAGS) \ + EXTRALDFLAGS='-lgcov' \ + all + +icc-profile-make: + @mkdir -p profdir + $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \ + EXTRACXXFLAGS='-prof-gen=srcpos -prof_dir ./profdir' \ + all + +icc-profile-use: + $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \ + EXTRACXXFLAGS='-prof_use -prof_dir ./profdir' \ + all + +.depend: $(SRCS) + -@$(CXX) $(DEPENDFLAGS) -MM $(SRCS) > $@ 2> /dev/null + +-include .depend diff --git a/src/benchmark.cpp b/src/benchmark.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e1c025adb9afc7d955d9b763154b2002b27e3394 --- /dev/null +++ b/src/benchmark.cpp @@ -0,0 +1,175 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include +#include +#include + +#include "position.h" + +using namespace std; + +namespace { + +const vector Defaults = { + "setoption name UCI_Chess960 value false", + "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1", + "r3k2r/p1ppqpb1/bn2pnp1/3PN3/1p2P3/2N2Q1p/PPPBBPPP/R3K2R w KQkq - 0 10", + "8/2p5/3p4/KP5r/1R3p1k/8/4P1P1/8 w - - 0 11", + "4rrk1/pp1n3p/3q2pQ/2p1pb2/2PP4/2P3N1/P2B2PP/4RRK1 b - - 7 19", + "rq3rk1/ppp2ppp/1bnpb3/3N2B1/3NP3/7P/PPPQ1PP1/2KR3R w - - 7 14 moves d4e6", + "r1bq1r1k/1pp1n1pp/1p1p4/4p2Q/4Pp2/1BNP4/PPP2PPP/3R1RK1 w - - 2 14 moves g2g4", + "r3r1k1/2p2ppp/p1p1bn2/8/1q2P3/2NPQN2/PPP3PP/R4RK1 b - - 2 15", + "r1bbk1nr/pp3p1p/2n5/1N4p1/2Np1B2/8/PPP2PPP/2KR1B1R w kq - 0 13", + "r1bq1rk1/ppp1nppp/4n3/3p3Q/3P4/1BP1B3/PP1N2PP/R4RK1 w - - 1 16", + "4r1k1/r1q2ppp/ppp2n2/4P3/5Rb1/1N1BQ3/PPP3PP/R5K1 w - - 1 17", + "2rqkb1r/ppp2p2/2npb1p1/1N1Nn2p/2P1PP2/8/PP2B1PP/R1BQK2R b KQ - 0 11", + "r1bq1r1k/b1p1npp1/p2p3p/1p6/3PP3/1B2NN2/PP3PPP/R2Q1RK1 w - - 1 16", + "3r1rk1/p5pp/bpp1pp2/8/q1PP1P2/b3P3/P2NQRPP/1R2B1K1 b - - 6 22", + "r1q2rk1/2p1bppp/2Pp4/p6b/Q1PNp3/4B3/PP1R1PPP/2K4R w - - 2 18", + "4k2r/1pb2ppp/1p2p3/1R1p4/3P4/2r1PN2/P4PPP/1R4K1 b - - 3 22", + "3q2k1/pb3p1p/4pbp1/2r5/PpN2N2/1P2P2P/5PP1/Q2R2K1 b - - 4 26", + "6k1/6p1/6Pp/ppp5/3pn2P/1P3K2/1PP2P2/3N4 b - - 0 1", + "3b4/5kp1/1p1p1p1p/pP1PpP1P/P1P1P3/3KN3/8/8 w - - 0 1", + "2K5/p7/7P/5pR1/8/5k2/r7/8 w - - 0 1 moves g5g6 f3e3 g6g5 e3f3", + "8/6pk/1p6/8/PP3p1p/5P2/4KP1q/3Q4 w - - 0 1", + "7k/3p2pp/4q3/8/4Q3/5Kp1/P6b/8 w - - 0 1", + "8/2p5/8/2kPKp1p/2p4P/2P5/3P4/8 w - - 0 1", + "8/1p3pp1/7p/5P1P/2k3P1/8/2K2P2/8 w - - 0 1", + "8/pp2r1k1/2p1p3/3pP2p/1P1P1P1P/P5KR/8/8 w - - 0 1", + "8/3p4/p1bk3p/Pp6/1Kp1PpPp/2P2P1P/2P5/5B2 b - - 0 1", + "5k2/7R/4P2p/5K2/p1r2P1p/8/8/8 b - - 0 1", + "6k1/6p1/P6p/r1N5/5p2/7P/1b3PP1/4R1K1 w - - 0 1", + "1r3k2/4q3/2Pp3b/3Bp3/2Q2p2/1p1P2P1/1P2KP2/3N4 w - - 0 1", + "6k1/4pp1p/3p2p1/P1pPb3/R7/1r2P1PP/3B1P2/6K1 w - - 0 1", + "8/3p3B/5p2/5P2/p7/PP5b/k7/6K1 w - - 0 1", + "5rk1/q6p/2p3bR/1pPp1rP1/1P1Pp3/P3B1Q1/1K3P2/R7 w - - 93 90", + "4rrk1/1p1nq3/p7/2p1P1pp/3P2bp/3Q1Bn1/PPPB4/1K2R1NR w - - 40 21", + "r3k2r/3nnpbp/q2pp1p1/p7/Pp1PPPP1/4BNN1/1P5P/R2Q1RK1 w kq - 0 16", + "3Qb1k1/1r2ppb1/pN1n2q1/Pp1Pp1Pr/4P2p/4BP2/4B1R1/1R5K b - - 11 40", + "4k3/3q1r2/1N2r1b1/3ppN2/2nPP3/1B1R2n1/2R1Q3/3K4 w - - 5 1", + + // 5-man positions + "8/8/8/8/5kp1/P7/8/1K1N4 w - - 0 1", // Kc2 - mate + "8/8/8/5N2/8/p7/8/2NK3k w - - 0 1", // Na2 - mate + "8/3k4/8/8/8/4B3/4KB2/2B5 w - - 0 1", // draw + + // 6-man positions + "8/8/1P6/5pr1/8/4R3/7k/2K5 w - - 0 1", // Re5 - mate + "8/2p4P/8/kr6/6R1/8/8/1K6 w - - 0 1", // Ka2 - mate + "8/8/3P3k/8/1p6/8/1P6/1K3n2 b - - 0 1", // Nd2 - draw + + // 7-man positions + "8/R7/2q5/8/6k1/8/1P5p/K6R w - - 0 124", // Draw + + // Mate and stalemate positions + "6k1/3b3r/1p1p4/p1n2p2/1PPNpP1q/P3Q1p1/1R1RB1P1/5K2 b - - 0 1", + "r2r1n2/pp2bk2/2p1p2p/3q4/3PN1QP/2P3R1/P4PP1/5RK1 w - - 0 1", + "8/8/8/8/8/6k1/6p1/6K1 w - -", + "7k/7P/6K1/8/3B4/8/8/8 b - -", + + // Chess 960 + "setoption name UCI_Chess960 value true", + "bbqnnrkr/pppppppp/8/8/8/8/PPPPPPPP/BBQNNRKR w HFhf - 0 1 moves g2g3 d7d5 d2d4 c8h3 c1g5 e8d6 g5e7 f7f6", + "nqbnrkrb/pppppppp/8/8/8/8/PPPPPPPP/NQBNRKRB w KQkq - 0 1", + "setoption name UCI_Chess960 value false" +}; + +} // namespace + +namespace Stockfish { + +/// setup_bench() builds a list of UCI commands to be run by bench. There +/// are five parameters: TT size in MB, number of search threads that +/// should be used, the limit value spent for each position, a file name +/// where to look for positions in FEN format, the type of the limit: +/// depth, perft, nodes and movetime (in millisecs), and evaluation type +/// mixed (default), classical, NNUE. +/// +/// bench -> search default positions up to depth 13 +/// bench 64 1 15 -> search default positions up to depth 15 (TT = 64MB) +/// bench 64 4 5000 current movetime -> search current position with 4 threads for 5 sec +/// bench 64 1 100000 default nodes -> search default positions for 100K nodes each +/// bench 16 1 5 default perft -> run a perft 5 on default positions + +vector setup_bench(const Position& current, istream& is) { + + vector fens, list; + string go, token; + + // Assign default values to missing arguments + string ttSize = (is >> token) ? token : "16"; + string threads = (is >> token) ? token : "1"; + string limit = (is >> token) ? token : "13"; + string fenFile = (is >> token) ? token : "default"; + string limitType = (is >> token) ? token : "depth"; + string evalType = (is >> token) ? token : "mixed"; + + go = limitType == "eval" ? "eval" : "go " + limitType + " " + limit; + + if (fenFile == "default") + fens = Defaults; + + else if (fenFile == "current") + fens.push_back(current.fen()); + + else + { + string fen; + ifstream file(fenFile); + + if (!file.is_open()) + { + cerr << "Unable to open file " << fenFile << endl; + exit(EXIT_FAILURE); + } + + while (getline(file, fen)) + if (!fen.empty()) + fens.push_back(fen); + + file.close(); + } + + list.emplace_back("setoption name Threads value " + threads); + list.emplace_back("setoption name Hash value " + ttSize); + list.emplace_back("ucinewgame"); + + size_t posCounter = 0; + + for (const string& fen : fens) + if (fen.find("setoption") != string::npos) + list.emplace_back(fen); + else + { + if (evalType == "classical" || (evalType == "mixed" && posCounter % 2 == 0)) + list.emplace_back("setoption name Use NNUE value false"); + else if (evalType == "NNUE" || (evalType == "mixed" && posCounter % 2 != 0)) + list.emplace_back("setoption name Use NNUE value true"); + list.emplace_back("position fen " + fen); + list.emplace_back(go); + ++posCounter; + } + + list.emplace_back("setoption name Use NNUE value true"); + + return list; +} + +} // namespace Stockfish diff --git a/src/bitbase.cpp b/src/bitbase.cpp new file mode 100644 index 0000000000000000000000000000000000000000..84300baf92abb0dc0519ff4659081f1319190eff --- /dev/null +++ b/src/bitbase.cpp @@ -0,0 +1,172 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include +#include + +#include "bitboard.h" +#include "types.h" + +namespace Stockfish { + +namespace { + + // There are 24 possible pawn squares: files A to D and ranks from 2 to 7. + // Positions with the pawn on files E to H will be mirrored before probing. + constexpr unsigned MAX_INDEX = 2*24*64*64; // stm * psq * wksq * bksq = 196608 + + std::bitset KPKBitbase; + + // A KPK bitbase index is an integer in [0, IndexMax] range + // + // Information is mapped in a way that minimizes the number of iterations: + // + // bit 0- 5: white king square (from SQ_A1 to SQ_H8) + // bit 6-11: black king square (from SQ_A1 to SQ_H8) + // bit 12: side to move (WHITE or BLACK) + // bit 13-14: white pawn file (from FILE_A to FILE_D) + // bit 15-17: white pawn RANK_7 - rank (from RANK_7 - RANK_7 to RANK_7 - RANK_2) + unsigned index(Color stm, Square bksq, Square wksq, Square psq) { + return int(wksq) | (bksq << 6) | (stm << 12) | (file_of(psq) << 13) | ((RANK_7 - rank_of(psq)) << 15); + } + + enum Result { + INVALID = 0, + UNKNOWN = 1, + DRAW = 2, + WIN = 4 + }; + + Result& operator|=(Result& r, Result v) { return r = Result(r | v); } + + struct KPKPosition { + KPKPosition() = default; + explicit KPKPosition(unsigned idx); + operator Result() const { return result; } + Result classify(const std::vector& db); + + Color stm; + Square ksq[COLOR_NB], psq; + Result result; + }; + +} // namespace + +bool Bitbases::probe(Square wksq, Square wpsq, Square bksq, Color stm) { + + assert(file_of(wpsq) <= FILE_D); + + return KPKBitbase[index(stm, bksq, wksq, wpsq)]; +} + + +void Bitbases::init() { + + std::vector db(MAX_INDEX); + unsigned idx, repeat = 1; + + // Initialize db with known win / draw positions + for (idx = 0; idx < MAX_INDEX; ++idx) + db[idx] = KPKPosition(idx); + + // Iterate through the positions until none of the unknown positions can be + // changed to either wins or draws (15 cycles needed). + while (repeat) + for (repeat = idx = 0; idx < MAX_INDEX; ++idx) + repeat |= (db[idx] == UNKNOWN && db[idx].classify(db) != UNKNOWN); + + // Fill the bitbase with the decisive results + for (idx = 0; idx < MAX_INDEX; ++idx) + if (db[idx] == WIN) + KPKBitbase.set(idx); +} + +namespace { + + KPKPosition::KPKPosition(unsigned idx) { + + ksq[WHITE] = Square((idx >> 0) & 0x3F); + ksq[BLACK] = Square((idx >> 6) & 0x3F); + stm = Color ((idx >> 12) & 0x01); + psq = make_square(File((idx >> 13) & 0x3), Rank(RANK_7 - ((idx >> 15) & 0x7))); + + // Invalid if two pieces are on the same square or if a king can be captured + if ( distance(ksq[WHITE], ksq[BLACK]) <= 1 + || ksq[WHITE] == psq + || ksq[BLACK] == psq + || (stm == WHITE && (pawn_attacks_bb(WHITE, psq) & ksq[BLACK]))) + result = INVALID; + + // Win if the pawn can be promoted without getting captured + else if ( stm == WHITE + && rank_of(psq) == RANK_7 + && ksq[WHITE] != psq + NORTH + && ( distance(ksq[BLACK], psq + NORTH) > 1 + || (distance(ksq[WHITE], psq + NORTH) == 1))) + result = WIN; + + // Draw if it is stalemate or the black king can capture the pawn + else if ( stm == BLACK + && ( !(attacks_bb(ksq[BLACK]) & ~(attacks_bb(ksq[WHITE]) | pawn_attacks_bb(WHITE, psq))) + || (attacks_bb(ksq[BLACK]) & ~attacks_bb(ksq[WHITE]) & psq))) + result = DRAW; + + // Position will be classified later + else + result = UNKNOWN; + } + + Result KPKPosition::classify(const std::vector& db) { + + // White to move: If one move leads to a position classified as WIN, the result + // of the current position is WIN. If all moves lead to positions classified + // as DRAW, the current position is classified as DRAW, otherwise the current + // position is classified as UNKNOWN. + // + // Black to move: If one move leads to a position classified as DRAW, the result + // of the current position is DRAW. If all moves lead to positions classified + // as WIN, the position is classified as WIN, otherwise the current position is + // classified as UNKNOWN. + const Result Good = (stm == WHITE ? WIN : DRAW); + const Result Bad = (stm == WHITE ? DRAW : WIN); + + Result r = INVALID; + Bitboard b = attacks_bb(ksq[stm]); + + while (b) + r |= stm == WHITE ? db[index(BLACK, ksq[BLACK], pop_lsb(b), psq)] + : db[index(WHITE, pop_lsb(b), ksq[WHITE], psq)]; + + if (stm == WHITE) + { + if (rank_of(psq) < RANK_7) // Single push + r |= db[index(BLACK, ksq[BLACK], ksq[WHITE], psq + NORTH)]; + + if ( rank_of(psq) == RANK_2 // Double push + && psq + NORTH != ksq[WHITE] + && psq + NORTH != ksq[BLACK]) + r |= db[index(BLACK, ksq[BLACK], ksq[WHITE], psq + NORTH + NORTH)]; + } + + return result = r & Good ? Good : r & UNKNOWN ? UNKNOWN : Bad; + } + +} // namespace + +} // namespace Stockfish diff --git a/src/bitboard.cpp b/src/bitboard.cpp new file mode 100644 index 0000000000000000000000000000000000000000..fd0ba235cb471f70853b76723564cb69c4f17bea --- /dev/null +++ b/src/bitboard.cpp @@ -0,0 +1,222 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include + +#include "bitboard.h" +#include "misc.h" + +namespace Stockfish { + +uint8_t PopCnt16[1 << 16]; +uint8_t SquareDistance[SQUARE_NB][SQUARE_NB]; + +Bitboard SquareBB[SQUARE_NB]; +Bitboard LineBB[SQUARE_NB][SQUARE_NB]; +Bitboard BetweenBB[SQUARE_NB][SQUARE_NB]; +Bitboard PseudoAttacks[PIECE_TYPE_NB][SQUARE_NB]; +Bitboard PawnAttacks[COLOR_NB][SQUARE_NB]; + +Magic RookMagics[SQUARE_NB]; +Magic BishopMagics[SQUARE_NB]; + +namespace { + + Bitboard RookTable[0x19000]; // To store rook attacks + Bitboard BishopTable[0x1480]; // To store bishop attacks + + void init_magics(PieceType pt, Bitboard table[], Magic magics[]); + +} + +/// safe_destination() returns the bitboard of target square for the given step +/// from the given square. If the step is off the board, returns empty bitboard. + +inline Bitboard safe_destination(Square s, int step) { + Square to = Square(s + step); + return is_ok(to) && distance(s, to) <= 2 ? square_bb(to) : Bitboard(0); +} + + +/// Bitboards::pretty() returns an ASCII representation of a bitboard suitable +/// to be printed to standard output. Useful for debugging. + +std::string Bitboards::pretty(Bitboard b) { + + std::string s = "+---+---+---+---+---+---+---+---+\n"; + + for (Rank r = RANK_8; r >= RANK_1; --r) + { + for (File f = FILE_A; f <= FILE_H; ++f) + s += b & make_square(f, r) ? "| X " : "| "; + + s += "| " + std::to_string(1 + r) + "\n+---+---+---+---+---+---+---+---+\n"; + } + s += " a b c d e f g h\n"; + + return s; +} + + +/// Bitboards::init() initializes various bitboard tables. It is called at +/// startup and relies on global objects to be already zero-initialized. + +void Bitboards::init() { + + for (unsigned i = 0; i < (1 << 16); ++i) + PopCnt16[i] = uint8_t(std::bitset<16>(i).count()); + + for (Square s = SQ_A1; s <= SQ_H8; ++s) + SquareBB[s] = (1ULL << s); + + for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1) + for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2) + SquareDistance[s1][s2] = std::max(distance(s1, s2), distance(s1, s2)); + + init_magics(ROOK, RookTable, RookMagics); + init_magics(BISHOP, BishopTable, BishopMagics); + + for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1) + { + PawnAttacks[WHITE][s1] = pawn_attacks_bb(square_bb(s1)); + PawnAttacks[BLACK][s1] = pawn_attacks_bb(square_bb(s1)); + + for (int step : {-9, -8, -7, -1, 1, 7, 8, 9} ) + PseudoAttacks[KING][s1] |= safe_destination(s1, step); + + for (int step : {-17, -15, -10, -6, 6, 10, 15, 17} ) + PseudoAttacks[KNIGHT][s1] |= safe_destination(s1, step); + + PseudoAttacks[QUEEN][s1] = PseudoAttacks[BISHOP][s1] = attacks_bb(s1, 0); + PseudoAttacks[QUEEN][s1] |= PseudoAttacks[ ROOK][s1] = attacks_bb< ROOK>(s1, 0); + + for (PieceType pt : { BISHOP, ROOK }) + for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2) + { + if (PseudoAttacks[pt][s1] & s2) + { + LineBB[s1][s2] = (attacks_bb(pt, s1, 0) & attacks_bb(pt, s2, 0)) | s1 | s2; + BetweenBB[s1][s2] = (attacks_bb(pt, s1, square_bb(s2)) & attacks_bb(pt, s2, square_bb(s1))); + } + BetweenBB[s1][s2] |= s2; + } + } +} + +namespace { + + Bitboard sliding_attack(PieceType pt, Square sq, Bitboard occupied) { + + Bitboard attacks = 0; + Direction RookDirections[4] = {NORTH, SOUTH, EAST, WEST}; + Direction BishopDirections[4] = {NORTH_EAST, SOUTH_EAST, SOUTH_WEST, NORTH_WEST}; + + for (Direction d : (pt == ROOK ? RookDirections : BishopDirections)) + { + Square s = sq; + while (safe_destination(s, d) && !(occupied & s)) + attacks |= (s += d); + } + + return attacks; + } + + + // init_magics() computes all rook and bishop attacks at startup. Magic + // bitboards are used to look up attacks of sliding pieces. As a reference see + // www.chessprogramming.org/Magic_Bitboards. In particular, here we use the so + // called "fancy" approach. + + void init_magics(PieceType pt, Bitboard table[], Magic magics[]) { + + // Optimal PRNG seeds to pick the correct magics in the shortest time + int seeds[][RANK_NB] = { { 8977, 44560, 54343, 38998, 5731, 95205, 104912, 17020 }, + { 728, 10316, 55013, 32803, 12281, 15100, 16645, 255 } }; + + Bitboard occupancy[4096], reference[4096], edges, b; + int epoch[4096] = {}, cnt = 0, size = 0; + + for (Square s = SQ_A1; s <= SQ_H8; ++s) + { + // Board edges are not considered in the relevant occupancies + edges = ((Rank1BB | Rank8BB) & ~rank_bb(s)) | ((FileABB | FileHBB) & ~file_bb(s)); + + // Given a square 's', the mask is the bitboard of sliding attacks from + // 's' computed on an empty board. The index must be big enough to contain + // all the attacks for each possible subset of the mask and so is 2 power + // the number of 1s of the mask. Hence we deduce the size of the shift to + // apply to the 64 or 32 bits word to get the index. + Magic& m = magics[s]; + m.mask = sliding_attack(pt, s, 0) & ~edges; + m.shift = (Is64Bit ? 64 : 32) - popcount(m.mask); + + // Set the offset for the attacks table of the square. We have individual + // table sizes for each square with "Fancy Magic Bitboards". + m.attacks = s == SQ_A1 ? table : magics[s - 1].attacks + size; + + // Use Carry-Rippler trick to enumerate all subsets of masks[s] and + // store the corresponding sliding attack bitboard in reference[]. + b = size = 0; + do { + occupancy[size] = b; + reference[size] = sliding_attack(pt, s, b); + + if (HasPext) + m.attacks[pext(b, m.mask)] = reference[size]; + + size++; + b = (b - m.mask) & m.mask; + } while (b); + + if (HasPext) + continue; + + PRNG rng(seeds[Is64Bit][rank_of(s)]); + + // Find a magic for square 's' picking up an (almost) random number + // until we find the one that passes the verification test. + for (int i = 0; i < size; ) + { + for (m.magic = 0; popcount((m.magic * m.mask) >> 56) < 6; ) + m.magic = rng.sparse_rand(); + + // A good magic must map every possible occupancy to an index that + // looks up the correct sliding attack in the attacks[s] database. + // Note that we build up the database for square 's' as a side + // effect of verifying the magic. Keep track of the attempt count + // and save it in epoch[], little speed-up trick to avoid resetting + // m.attacks[] after every failed attempt. + for (++cnt, i = 0; i < size; ++i) + { + unsigned idx = m.index(occupancy[i]); + + if (epoch[idx] < cnt) + { + epoch[idx] = cnt; + m.attacks[idx] = reference[i]; + } + else if (m.attacks[idx] != reference[i]) + break; + } + } + } + } +} + +} // namespace Stockfish diff --git a/src/bitboard.h b/src/bitboard.h new file mode 100644 index 0000000000000000000000000000000000000000..2b6e2a6920c646486c1ccbe942e980396f571fc3 --- /dev/null +++ b/src/bitboard.h @@ -0,0 +1,451 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef BITBOARD_H_INCLUDED +#define BITBOARD_H_INCLUDED + +#include + +#include "types.h" + +namespace Stockfish { + +namespace Bitbases { + +void init(); +bool probe(Square wksq, Square wpsq, Square bksq, Color us); + +} // namespace Stockfish::Bitbases + +namespace Bitboards { + +void init(); +std::string pretty(Bitboard b); + +} // namespace Stockfish::Bitboards + +constexpr Bitboard AllSquares = ~Bitboard(0); +constexpr Bitboard DarkSquares = 0xAA55AA55AA55AA55ULL; + +constexpr Bitboard FileABB = 0x0101010101010101ULL; +constexpr Bitboard FileBBB = FileABB << 1; +constexpr Bitboard FileCBB = FileABB << 2; +constexpr Bitboard FileDBB = FileABB << 3; +constexpr Bitboard FileEBB = FileABB << 4; +constexpr Bitboard FileFBB = FileABB << 5; +constexpr Bitboard FileGBB = FileABB << 6; +constexpr Bitboard FileHBB = FileABB << 7; + +constexpr Bitboard Rank1BB = 0xFF; +constexpr Bitboard Rank2BB = Rank1BB << (8 * 1); +constexpr Bitboard Rank3BB = Rank1BB << (8 * 2); +constexpr Bitboard Rank4BB = Rank1BB << (8 * 3); +constexpr Bitboard Rank5BB = Rank1BB << (8 * 4); +constexpr Bitboard Rank6BB = Rank1BB << (8 * 5); +constexpr Bitboard Rank7BB = Rank1BB << (8 * 6); +constexpr Bitboard Rank8BB = Rank1BB << (8 * 7); + +constexpr Bitboard QueenSide = FileABB | FileBBB | FileCBB | FileDBB; +constexpr Bitboard CenterFiles = FileCBB | FileDBB | FileEBB | FileFBB; +constexpr Bitboard KingSide = FileEBB | FileFBB | FileGBB | FileHBB; +constexpr Bitboard Center = (FileDBB | FileEBB) & (Rank4BB | Rank5BB); + +constexpr Bitboard KingFlank[FILE_NB] = { + QueenSide ^ FileDBB, QueenSide, QueenSide, + CenterFiles, CenterFiles, + KingSide, KingSide, KingSide ^ FileEBB +}; + +extern uint8_t PopCnt16[1 << 16]; +extern uint8_t SquareDistance[SQUARE_NB][SQUARE_NB]; + +extern Bitboard SquareBB[SQUARE_NB]; +extern Bitboard BetweenBB[SQUARE_NB][SQUARE_NB]; +extern Bitboard LineBB[SQUARE_NB][SQUARE_NB]; +extern Bitboard PseudoAttacks[PIECE_TYPE_NB][SQUARE_NB]; +extern Bitboard PawnAttacks[COLOR_NB][SQUARE_NB]; + + +/// Magic holds all magic bitboards relevant data for a single square +struct Magic { + Bitboard mask; + Bitboard magic; + Bitboard* attacks; + unsigned shift; + + // Compute the attack's index using the 'magic bitboards' approach + unsigned index(Bitboard occupied) const { + + if (HasPext) + return unsigned(pext(occupied, mask)); + + if (Is64Bit) + return unsigned(((occupied & mask) * magic) >> shift); + + unsigned lo = unsigned(occupied) & unsigned(mask); + unsigned hi = unsigned(occupied >> 32) & unsigned(mask >> 32); + return (lo * unsigned(magic) ^ hi * unsigned(magic >> 32)) >> shift; + } +}; + +extern Magic RookMagics[SQUARE_NB]; +extern Magic BishopMagics[SQUARE_NB]; + +inline Bitboard square_bb(Square s) { + assert(is_ok(s)); + return SquareBB[s]; +} + + +/// Overloads of bitwise operators between a Bitboard and a Square for testing +/// whether a given bit is set in a bitboard, and for setting and clearing bits. + +inline Bitboard operator&( Bitboard b, Square s) { return b & square_bb(s); } +inline Bitboard operator|( Bitboard b, Square s) { return b | square_bb(s); } +inline Bitboard operator^( Bitboard b, Square s) { return b ^ square_bb(s); } +inline Bitboard& operator|=(Bitboard& b, Square s) { return b |= square_bb(s); } +inline Bitboard& operator^=(Bitboard& b, Square s) { return b ^= square_bb(s); } + +inline Bitboard operator&(Square s, Bitboard b) { return b & s; } +inline Bitboard operator|(Square s, Bitboard b) { return b | s; } +inline Bitboard operator^(Square s, Bitboard b) { return b ^ s; } + +inline Bitboard operator|(Square s1, Square s2) { return square_bb(s1) | s2; } + +constexpr bool more_than_one(Bitboard b) { + return b & (b - 1); +} + + +constexpr bool opposite_colors(Square s1, Square s2) { + return (s1 + rank_of(s1) + s2 + rank_of(s2)) & 1; +} + + +/// rank_bb() and file_bb() return a bitboard representing all the squares on +/// the given file or rank. + +constexpr Bitboard rank_bb(Rank r) { + return Rank1BB << (8 * r); +} + +constexpr Bitboard rank_bb(Square s) { + return rank_bb(rank_of(s)); +} + +constexpr Bitboard file_bb(File f) { + return FileABB << f; +} + +constexpr Bitboard file_bb(Square s) { + return file_bb(file_of(s)); +} + + +/// shift() moves a bitboard one or two steps as specified by the direction D + +template +constexpr Bitboard shift(Bitboard b) { + return D == NORTH ? b << 8 : D == SOUTH ? b >> 8 + : D == NORTH+NORTH? b <<16 : D == SOUTH+SOUTH? b >>16 + : D == EAST ? (b & ~FileHBB) << 1 : D == WEST ? (b & ~FileABB) >> 1 + : D == NORTH_EAST ? (b & ~FileHBB) << 9 : D == NORTH_WEST ? (b & ~FileABB) << 7 + : D == SOUTH_EAST ? (b & ~FileHBB) >> 7 : D == SOUTH_WEST ? (b & ~FileABB) >> 9 + : 0; +} + + +/// pawn_attacks_bb() returns the squares attacked by pawns of the given color +/// from the squares in the given bitboard. + +template +constexpr Bitboard pawn_attacks_bb(Bitboard b) { + return C == WHITE ? shift(b) | shift(b) + : shift(b) | shift(b); +} + +inline Bitboard pawn_attacks_bb(Color c, Square s) { + + assert(is_ok(s)); + return PawnAttacks[c][s]; +} + + +/// pawn_double_attacks_bb() returns the squares doubly attacked by pawns of the +/// given color from the squares in the given bitboard. + +template +constexpr Bitboard pawn_double_attacks_bb(Bitboard b) { + return C == WHITE ? shift(b) & shift(b) + : shift(b) & shift(b); +} + + +/// adjacent_files_bb() returns a bitboard representing all the squares on the +/// adjacent files of a given square. + +constexpr Bitboard adjacent_files_bb(Square s) { + return shift(file_bb(s)) | shift(file_bb(s)); +} + + +/// line_bb() returns a bitboard representing an entire line (from board edge +/// to board edge) that intersects the two given squares. If the given squares +/// are not on a same file/rank/diagonal, the function returns 0. For instance, +/// line_bb(SQ_C4, SQ_F7) will return a bitboard with the A2-G8 diagonal. + +inline Bitboard line_bb(Square s1, Square s2) { + + assert(is_ok(s1) && is_ok(s2)); + + return LineBB[s1][s2]; +} + + +/// between_bb(s1, s2) returns a bitboard representing the squares in the semi-open +/// segment between the squares s1 and s2 (excluding s1 but including s2). If the +/// given squares are not on a same file/rank/diagonal, it returns s2. For instance, +/// between_bb(SQ_C4, SQ_F7) will return a bitboard with squares D5, E6 and F7, but +/// between_bb(SQ_E6, SQ_F8) will return a bitboard with the square F8. This trick +/// allows to generate non-king evasion moves faster: the defending piece must either +/// interpose itself to cover the check or capture the checking piece. + +inline Bitboard between_bb(Square s1, Square s2) { + + assert(is_ok(s1) && is_ok(s2)); + + return BetweenBB[s1][s2]; +} + + +/// forward_ranks_bb() returns a bitboard representing the squares on the ranks in +/// front of the given one, from the point of view of the given color. For instance, +/// forward_ranks_bb(BLACK, SQ_D3) will return the 16 squares on ranks 1 and 2. + +constexpr Bitboard forward_ranks_bb(Color c, Square s) { + return c == WHITE ? ~Rank1BB << 8 * relative_rank(WHITE, s) + : ~Rank8BB >> 8 * relative_rank(BLACK, s); +} + + +/// forward_file_bb() returns a bitboard representing all the squares along the +/// line in front of the given one, from the point of view of the given color. + +constexpr Bitboard forward_file_bb(Color c, Square s) { + return forward_ranks_bb(c, s) & file_bb(s); +} + + +/// pawn_attack_span() returns a bitboard representing all the squares that can +/// be attacked by a pawn of the given color when it moves along its file, starting +/// from the given square. + +constexpr Bitboard pawn_attack_span(Color c, Square s) { + return forward_ranks_bb(c, s) & adjacent_files_bb(s); +} + + +/// passed_pawn_span() returns a bitboard which can be used to test if a pawn of +/// the given color and on the given square is a passed pawn. + +constexpr Bitboard passed_pawn_span(Color c, Square s) { + return pawn_attack_span(c, s) | forward_file_bb(c, s); +} + + +/// aligned() returns true if the squares s1, s2 and s3 are aligned either on a +/// straight or on a diagonal line. + +inline bool aligned(Square s1, Square s2, Square s3) { + return line_bb(s1, s2) & s3; +} + + +/// distance() functions return the distance between x and y, defined as the +/// number of steps for a king in x to reach y. + +template inline int distance(Square x, Square y); +template<> inline int distance(Square x, Square y) { return std::abs(file_of(x) - file_of(y)); } +template<> inline int distance(Square x, Square y) { return std::abs(rank_of(x) - rank_of(y)); } +template<> inline int distance(Square x, Square y) { return SquareDistance[x][y]; } + +inline int edge_distance(File f) { return std::min(f, File(FILE_H - f)); } +inline int edge_distance(Rank r) { return std::min(r, Rank(RANK_8 - r)); } + + +/// attacks_bb(Square) returns the pseudo attacks of the give piece type +/// assuming an empty board. + +template +inline Bitboard attacks_bb(Square s) { + + assert((Pt != PAWN) && (is_ok(s))); + + return PseudoAttacks[Pt][s]; +} + + +/// attacks_bb(Square, Bitboard) returns the attacks by the given piece +/// assuming the board is occupied according to the passed Bitboard. +/// Sliding piece attacks do not continue passed an occupied square. + +template +inline Bitboard attacks_bb(Square s, Bitboard occupied) { + + assert((Pt != PAWN) && (is_ok(s))); + + switch (Pt) + { + case BISHOP: return BishopMagics[s].attacks[BishopMagics[s].index(occupied)]; + case ROOK : return RookMagics[s].attacks[ RookMagics[s].index(occupied)]; + case QUEEN : return attacks_bb(s, occupied) | attacks_bb(s, occupied); + default : return PseudoAttacks[Pt][s]; + } +} + +inline Bitboard attacks_bb(PieceType pt, Square s, Bitboard occupied) { + + assert((pt != PAWN) && (is_ok(s))); + + switch (pt) + { + case BISHOP: return attacks_bb(s, occupied); + case ROOK : return attacks_bb< ROOK>(s, occupied); + case QUEEN : return attacks_bb(s, occupied) | attacks_bb(s, occupied); + default : return PseudoAttacks[pt][s]; + } +} + + +/// popcount() counts the number of non-zero bits in a bitboard + +inline int popcount(Bitboard b) { + +#ifndef USE_POPCNT + + union { Bitboard bb; uint16_t u[4]; } v = { b }; + return PopCnt16[v.u[0]] + PopCnt16[v.u[1]] + PopCnt16[v.u[2]] + PopCnt16[v.u[3]]; + +#elif defined(_MSC_VER) || defined(__INTEL_COMPILER) + + return (int)_mm_popcnt_u64(b); + +#else // Assumed gcc or compatible compiler + + return __builtin_popcountll(b); + +#endif +} + + +/// lsb() and msb() return the least/most significant bit in a non-zero bitboard + +#if defined(__GNUC__) // GCC, Clang, ICC + +inline Square lsb(Bitboard b) { + assert(b); + return Square(__builtin_ctzll(b)); +} + +inline Square msb(Bitboard b) { + assert(b); + return Square(63 ^ __builtin_clzll(b)); +} + +#elif defined(_MSC_VER) // MSVC + +#ifdef _WIN64 // MSVC, WIN64 + +inline Square lsb(Bitboard b) { + assert(b); + unsigned long idx; + _BitScanForward64(&idx, b); + return (Square) idx; +} + +inline Square msb(Bitboard b) { + assert(b); + unsigned long idx; + _BitScanReverse64(&idx, b); + return (Square) idx; +} + +#else // MSVC, WIN32 + +inline Square lsb(Bitboard b) { + assert(b); + unsigned long idx; + + if (b & 0xffffffff) { + _BitScanForward(&idx, int32_t(b)); + return Square(idx); + } else { + _BitScanForward(&idx, int32_t(b >> 32)); + return Square(idx + 32); + } +} + +inline Square msb(Bitboard b) { + assert(b); + unsigned long idx; + + if (b >> 32) { + _BitScanReverse(&idx, int32_t(b >> 32)); + return Square(idx + 32); + } else { + _BitScanReverse(&idx, int32_t(b)); + return Square(idx); + } +} + +#endif + +#else // Compiler is neither GCC nor MSVC compatible + +#error "Compiler not supported." + +#endif + +/// least_significant_square_bb() returns the bitboard of the least significant +/// square of a non-zero bitboard. It is equivalent to square_bb(lsb(bb)). + +inline Bitboard least_significant_square_bb(Bitboard b) { + assert(b); + return b & -b; +} + +/// pop_lsb() finds and clears the least significant bit in a non-zero bitboard + +inline Square pop_lsb(Bitboard& b) { + assert(b); + const Square s = lsb(b); + b &= b - 1; + return s; +} + + +/// frontmost_sq() returns the most advanced square for the given color, +/// requires a non-zero bitboard. +inline Square frontmost_sq(Color c, Bitboard b) { + assert(b); + return c == WHITE ? msb(b) : lsb(b); +} + +} // namespace Stockfish + +#endif // #ifndef BITBOARD_H_INCLUDED diff --git a/src/endgame.cpp b/src/endgame.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e773e7a91199353881648890490c58a54b5134ea --- /dev/null +++ b/src/endgame.cpp @@ -0,0 +1,747 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include "bitboard.h" +#include "endgame.h" +#include "movegen.h" + +namespace Stockfish { + +namespace { + + // Used to drive the king towards the edge of the board + // in KX vs K and KQ vs KR endgames. + // Values range from 27 (center squares) to 90 (in the corners) + inline int push_to_edge(Square s) { + int rd = edge_distance(rank_of(s)), fd = edge_distance(file_of(s)); + return 90 - (7 * fd * fd / 2 + 7 * rd * rd / 2); + } + + // Used to drive the king towards A1H8 corners in KBN vs K endgames. + // Values range from 0 on A8H1 diagonal to 7 in A1H8 corners + inline int push_to_corner(Square s) { + return abs(7 - rank_of(s) - file_of(s)); + } + + // Drive a piece close to or away from another piece + inline int push_close(Square s1, Square s2) { return 140 - 20 * distance(s1, s2); } + inline int push_away(Square s1, Square s2) { return 120 - push_close(s1, s2); } + +#ifndef NDEBUG + bool verify_material(const Position& pos, Color c, Value npm, int pawnsCnt) { + return pos.non_pawn_material(c) == npm && pos.count(c) == pawnsCnt; + } +#endif + + // Map the square as if strongSide is white and strongSide's only pawn + // is on the left half of the board. + Square normalize(const Position& pos, Color strongSide, Square sq) { + + assert(pos.count(strongSide) == 1); + + if (file_of(pos.square(strongSide)) >= FILE_E) + sq = flip_file(sq); + + return strongSide == WHITE ? sq : flip_rank(sq); + } + +} // namespace + + +namespace Endgames { + + std::pair, Map> maps; + + void init() { + + add("KPK"); + add("KNNK"); + add("KBNK"); + add("KRKP"); + add("KRKB"); + add("KRKN"); + add("KQKP"); + add("KQKR"); + add("KNNKP"); + + add("KRPKR"); + add("KRPKB"); + add("KBPKB"); + add("KBPKN"); + add("KBPPKB"); + add("KRPPKRP"); + } +} + + +/// Mate with KX vs K. This function is used to evaluate positions with +/// king and plenty of material vs a lone king. It simply gives the +/// attacking side a bonus for driving the defending king towards the edge +/// of the board, and for keeping the distance between the two kings small. +template<> +Value Endgame::operator()(const Position& pos) const { + + assert(verify_material(pos, weakSide, VALUE_ZERO, 0)); + assert(!pos.checkers()); // Eval is never called when in check + + // Stalemate detection with lone king + if (pos.side_to_move() == weakSide && !MoveList(pos).size()) + return VALUE_DRAW; + + Square strongKing = pos.square(strongSide); + Square weakKing = pos.square(weakSide); + + Value result = pos.non_pawn_material(strongSide) + + pos.count(strongSide) * PawnValueEg + + push_to_edge(weakKing) + + push_close(strongKing, weakKing); + + if ( pos.count(strongSide) + || pos.count(strongSide) + ||(pos.count(strongSide) && pos.count(strongSide)) + || ( (pos.pieces(strongSide, BISHOP) & ~DarkSquares) + && (pos.pieces(strongSide, BISHOP) & DarkSquares))) + result = std::min(result + VALUE_KNOWN_WIN, VALUE_TB_WIN_IN_MAX_PLY - 1); + + return strongSide == pos.side_to_move() ? result : -result; +} + + +/// Mate with KBN vs K. This is similar to KX vs K, but we have to drive the +/// defending king towards a corner square that our bishop attacks. +template<> +Value Endgame::operator()(const Position& pos) const { + + assert(verify_material(pos, strongSide, KnightValueMg + BishopValueMg, 0)); + assert(verify_material(pos, weakSide, VALUE_ZERO, 0)); + + Square strongKing = pos.square(strongSide); + Square strongBishop = pos.square(strongSide); + Square weakKing = pos.square(weakSide); + + // If our bishop does not attack A1/H8, we flip the enemy king square + // to drive to opposite corners (A8/H1). + + Value result = (VALUE_KNOWN_WIN + 3520) + + push_close(strongKing, weakKing) + + 420 * push_to_corner(opposite_colors(strongBishop, SQ_A1) ? flip_file(weakKing) : weakKing); + + assert(abs(result) < VALUE_TB_WIN_IN_MAX_PLY); + return strongSide == pos.side_to_move() ? result : -result; +} + + +/// KP vs K. This endgame is evaluated with the help of a bitbase +template<> +Value Endgame::operator()(const Position& pos) const { + + assert(verify_material(pos, strongSide, VALUE_ZERO, 1)); + assert(verify_material(pos, weakSide, VALUE_ZERO, 0)); + + // Assume strongSide is white and the pawn is on files A-D + Square strongKing = normalize(pos, strongSide, pos.square(strongSide)); + Square strongPawn = normalize(pos, strongSide, pos.square(strongSide)); + Square weakKing = normalize(pos, strongSide, pos.square(weakSide)); + + Color us = strongSide == pos.side_to_move() ? WHITE : BLACK; + + if (!Bitbases::probe(strongKing, strongPawn, weakKing, us)) + return VALUE_DRAW; + + Value result = VALUE_KNOWN_WIN + PawnValueEg + Value(rank_of(strongPawn)); + + return strongSide == pos.side_to_move() ? result : -result; +} + + +/// KR vs KP. This is a somewhat tricky endgame to evaluate precisely without +/// a bitbase. The function below returns drawish scores when the pawn is +/// far advanced with support of the king, while the attacking king is far +/// away. +template<> +Value Endgame::operator()(const Position& pos) const { + + assert(verify_material(pos, strongSide, RookValueMg, 0)); + assert(verify_material(pos, weakSide, VALUE_ZERO, 1)); + + Square strongKing = pos.square(strongSide); + Square weakKing = pos.square(weakSide); + Square strongRook = pos.square(strongSide); + Square weakPawn = pos.square(weakSide); + Square queeningSquare = make_square(file_of(weakPawn), relative_rank(weakSide, RANK_8)); + Value result; + + // If the stronger side's king is in front of the pawn, it's a win + if (forward_file_bb(strongSide, strongKing) & weakPawn) + result = RookValueEg - distance(strongKing, weakPawn); + + // If the weaker side's king is too far from the pawn and the rook, + // it's a win. + else if ( distance(weakKing, weakPawn) >= 3 + (pos.side_to_move() == weakSide) + && distance(weakKing, strongRook) >= 3) + result = RookValueEg - distance(strongKing, weakPawn); + + // If the pawn is far advanced and supported by the defending king, + // the position is drawish + else if ( relative_rank(strongSide, weakKing) <= RANK_3 + && distance(weakKing, weakPawn) == 1 + && relative_rank(strongSide, strongKing) >= RANK_4 + && distance(strongKing, weakPawn) > 2 + (pos.side_to_move() == strongSide)) + result = Value(80) - 8 * distance(strongKing, weakPawn); + + else + result = Value(200) - 8 * ( distance(strongKing, weakPawn + pawn_push(weakSide)) + - distance(weakKing, weakPawn + pawn_push(weakSide)) + - distance(weakPawn, queeningSquare)); + + return strongSide == pos.side_to_move() ? result : -result; +} + + +/// KR vs KB. This is very simple, and always returns drawish scores. The +/// score is slightly bigger when the defending king is close to the edge. +template<> +Value Endgame::operator()(const Position& pos) const { + + assert(verify_material(pos, strongSide, RookValueMg, 0)); + assert(verify_material(pos, weakSide, BishopValueMg, 0)); + + Value result = Value(push_to_edge(pos.square(weakSide))); + return strongSide == pos.side_to_move() ? result : -result; +} + + +/// KR vs KN. The attacking side has slightly better winning chances than +/// in KR vs KB, particularly if the king and the knight are far apart. +template<> +Value Endgame::operator()(const Position& pos) const { + + assert(verify_material(pos, strongSide, RookValueMg, 0)); + assert(verify_material(pos, weakSide, KnightValueMg, 0)); + + Square weakKing = pos.square(weakSide); + Square weakKnight = pos.square(weakSide); + Value result = Value(push_to_edge(weakKing) + push_away(weakKing, weakKnight)); + return strongSide == pos.side_to_move() ? result : -result; +} + + +/// KQ vs KP. In general, this is a win for the stronger side, but there are a +/// few important exceptions. A pawn on 7th rank and on the A,C,F or H files +/// with a king positioned next to it can be a draw, so in that case, we only +/// use the distance between the kings. +template<> +Value Endgame::operator()(const Position& pos) const { + + assert(verify_material(pos, strongSide, QueenValueMg, 0)); + assert(verify_material(pos, weakSide, VALUE_ZERO, 1)); + + Square strongKing = pos.square(strongSide); + Square weakKing = pos.square(weakSide); + Square weakPawn = pos.square(weakSide); + + Value result = Value(push_close(strongKing, weakKing)); + + if ( relative_rank(weakSide, weakPawn) != RANK_7 + || distance(weakKing, weakPawn) != 1 + || ((FileBBB | FileDBB | FileEBB | FileGBB) & weakPawn)) + result += QueenValueEg - PawnValueEg; + + return strongSide == pos.side_to_move() ? result : -result; +} + + +/// KQ vs KR. This is almost identical to KX vs K: we give the attacking +/// king a bonus for having the kings close together, and for forcing the +/// defending king towards the edge. If we also take care to avoid null move for +/// the defending side in the search, this is usually sufficient to win KQ vs KR. +template<> +Value Endgame::operator()(const Position& pos) const { + + assert(verify_material(pos, strongSide, QueenValueMg, 0)); + assert(verify_material(pos, weakSide, RookValueMg, 0)); + + Square strongKing = pos.square(strongSide); + Square weakKing = pos.square(weakSide); + + Value result = QueenValueEg + - RookValueEg + + push_to_edge(weakKing) + + push_close(strongKing, weakKing); + + return strongSide == pos.side_to_move() ? result : -result; +} + + +/// KNN vs KP. Very drawish, but there are some mate opportunities if we can +/// press the weakSide King to a corner before the pawn advances too much. +template<> +Value Endgame::operator()(const Position& pos) const { + + assert(verify_material(pos, strongSide, 2 * KnightValueMg, 0)); + assert(verify_material(pos, weakSide, VALUE_ZERO, 1)); + + Square weakKing = pos.square(weakSide); + Square weakPawn = pos.square(weakSide); + + Value result = PawnValueEg + + 2 * push_to_edge(weakKing) + - 10 * relative_rank(weakSide, weakPawn); + + return strongSide == pos.side_to_move() ? result : -result; +} + + +/// Some cases of trivial draws +template<> Value Endgame::operator()(const Position&) const { return VALUE_DRAW; } + + +/// KB and one or more pawns vs K. It checks for draws with rook pawns and +/// a bishop of the wrong color. If such a draw is detected, SCALE_FACTOR_DRAW +/// is returned. If not, the return value is SCALE_FACTOR_NONE, i.e. no scaling +/// will be used. +template<> +ScaleFactor Endgame::operator()(const Position& pos) const { + + assert(pos.non_pawn_material(strongSide) == BishopValueMg); + assert(pos.count(strongSide) >= 1); + + // No assertions about the material of weakSide, because we want draws to + // be detected even when the weaker side has some pawns. + + Bitboard strongPawns = pos.pieces(strongSide, PAWN); + Bitboard allPawns = pos.pieces(PAWN); + + Square strongBishop = pos.square(strongSide); + Square weakKing = pos.square(weakSide); + Square strongKing = pos.square(strongSide); + + // All strongSide pawns are on a single rook file? + if (!(strongPawns & ~FileABB) || !(strongPawns & ~FileHBB)) + { + Square queeningSquare = relative_square(strongSide, make_square(file_of(lsb(strongPawns)), RANK_8)); + + if ( opposite_colors(queeningSquare, strongBishop) + && distance(queeningSquare, weakKing) <= 1) + return SCALE_FACTOR_DRAW; + } + + // If all the pawns are on the same B or G file, then it's potentially a draw + if ((!(allPawns & ~FileBBB) || !(allPawns & ~FileGBB)) + && pos.non_pawn_material(weakSide) == 0 + && pos.count(weakSide) >= 1) + { + // Get the least advanced weakSide pawn + Square weakPawn = frontmost_sq(strongSide, pos.pieces(weakSide, PAWN)); + + // There's potential for a draw if our pawn is blocked on the 7th rank, + // the bishop cannot attack it or they only have one pawn left. + if ( relative_rank(strongSide, weakPawn) == RANK_7 + && (strongPawns & (weakPawn + pawn_push(weakSide))) + && (opposite_colors(strongBishop, weakPawn) || !more_than_one(strongPawns))) + { + int strongKingDist = distance(weakPawn, strongKing); + int weakKingDist = distance(weakPawn, weakKing); + + // It's a draw if the weak king is on its back two ranks, within 2 + // squares of the blocking pawn and the strong king is not + // closer. (I think this rule only fails in practically + // unreachable positions such as 5k1K/6p1/6P1/8/8/3B4/8/8 w + // and positions where qsearch will immediately correct the + // problem such as 8/4k1p1/6P1/1K6/3B4/8/8/8 w). + if ( relative_rank(strongSide, weakKing) >= RANK_7 + && weakKingDist <= 2 + && weakKingDist <= strongKingDist) + return SCALE_FACTOR_DRAW; + } + } + + return SCALE_FACTOR_NONE; +} + + +/// KQ vs KR and one or more pawns. It tests for fortress draws with a rook on +/// the third rank defended by a pawn. +template<> +ScaleFactor Endgame::operator()(const Position& pos) const { + + assert(verify_material(pos, strongSide, QueenValueMg, 0)); + assert(pos.count(weakSide) == 1); + assert(pos.count(weakSide) >= 1); + + Square strongKing = pos.square(strongSide); + Square weakKing = pos.square(weakSide); + Square weakRook = pos.square(weakSide); + + if ( relative_rank(weakSide, weakKing) <= RANK_2 + && relative_rank(weakSide, strongKing) >= RANK_4 + && relative_rank(weakSide, weakRook) == RANK_3 + && ( pos.pieces(weakSide, PAWN) + & attacks_bb(weakKing) + & pawn_attacks_bb(strongSide, weakRook))) + return SCALE_FACTOR_DRAW; + + return SCALE_FACTOR_NONE; +} + + +/// KRP vs KR. This function knows a handful of the most important classes of +/// drawn positions, but is far from perfect. It would probably be a good idea +/// to add more knowledge in the future. +/// +/// It would also be nice to rewrite the actual code for this function, +/// which is mostly copied from Glaurung 1.x, and isn't very pretty. +template<> +ScaleFactor Endgame::operator()(const Position& pos) const { + + assert(verify_material(pos, strongSide, RookValueMg, 1)); + assert(verify_material(pos, weakSide, RookValueMg, 0)); + + // Assume strongSide is white and the pawn is on files A-D + Square strongKing = normalize(pos, strongSide, pos.square(strongSide)); + Square strongRook = normalize(pos, strongSide, pos.square(strongSide)); + Square strongPawn = normalize(pos, strongSide, pos.square(strongSide)); + Square weakKing = normalize(pos, strongSide, pos.square(weakSide)); + Square weakRook = normalize(pos, strongSide, pos.square(weakSide)); + + File pawnFile = file_of(strongPawn); + Rank pawnRank = rank_of(strongPawn); + Square queeningSquare = make_square(pawnFile, RANK_8); + int tempo = (pos.side_to_move() == strongSide); + + // If the pawn is not too far advanced and the defending king defends the + // queening square, use the third-rank defence. + if ( pawnRank <= RANK_5 + && distance(weakKing, queeningSquare) <= 1 + && strongKing <= SQ_H5 + && (rank_of(weakRook) == RANK_6 || (pawnRank <= RANK_3 && rank_of(strongRook) != RANK_6))) + return SCALE_FACTOR_DRAW; + + // The defending side saves a draw by checking from behind in case the pawn + // has advanced to the 6th rank with the king behind. + if ( pawnRank == RANK_6 + && distance(weakKing, queeningSquare) <= 1 + && rank_of(strongKing) + tempo <= RANK_6 + && (rank_of(weakRook) == RANK_1 || (!tempo && distance(weakRook, strongPawn) >= 3))) + return SCALE_FACTOR_DRAW; + + if ( pawnRank >= RANK_6 + && weakKing == queeningSquare + && rank_of(weakRook) == RANK_1 + && (!tempo || distance(strongKing, strongPawn) >= 2)) + return SCALE_FACTOR_DRAW; + + // White pawn on a7 and rook on a8 is a draw if black's king is on g7 or h7 + // and the black rook is behind the pawn. + if ( strongPawn == SQ_A7 + && strongRook == SQ_A8 + && (weakKing == SQ_H7 || weakKing == SQ_G7) + && file_of(weakRook) == FILE_A + && (rank_of(weakRook) <= RANK_3 || file_of(strongKing) >= FILE_D || rank_of(strongKing) <= RANK_5)) + return SCALE_FACTOR_DRAW; + + // If the defending king blocks the pawn and the attacking king is too far + // away, it's a draw. + if ( pawnRank <= RANK_5 + && weakKing == strongPawn + NORTH + && distance(strongKing, strongPawn) - tempo >= 2 + && distance(strongKing, weakRook) - tempo >= 2) + return SCALE_FACTOR_DRAW; + + // Pawn on the 7th rank supported by the rook from behind usually wins if the + // attacking king is closer to the queening square than the defending king, + // and the defending king cannot gain tempi by threatening the attacking rook. + if ( pawnRank == RANK_7 + && pawnFile != FILE_A + && file_of(strongRook) == pawnFile + && strongRook != queeningSquare + && (distance(strongKing, queeningSquare) < distance(weakKing, queeningSquare) - 2 + tempo) + && (distance(strongKing, queeningSquare) < distance(weakKing, strongRook) + tempo)) + return ScaleFactor(SCALE_FACTOR_MAX - 2 * distance(strongKing, queeningSquare)); + + // Similar to the above, but with the pawn further back + if ( pawnFile != FILE_A + && file_of(strongRook) == pawnFile + && strongRook < strongPawn + && (distance(strongKing, queeningSquare) < distance(weakKing, queeningSquare) - 2 + tempo) + && (distance(strongKing, strongPawn + NORTH) < distance(weakKing, strongPawn + NORTH) - 2 + tempo) + && ( distance(weakKing, strongRook) + tempo >= 3 + || ( distance(strongKing, queeningSquare) < distance(weakKing, strongRook) + tempo + && (distance(strongKing, strongPawn + NORTH) < distance(weakKing, strongPawn) + tempo)))) + return ScaleFactor( SCALE_FACTOR_MAX + - 8 * distance(strongPawn, queeningSquare) + - 2 * distance(strongKing, queeningSquare)); + + // If the pawn is not far advanced and the defending king is somewhere in + // the pawn's path, it's probably a draw. + if (pawnRank <= RANK_4 && weakKing > strongPawn) + { + if (file_of(weakKing) == file_of(strongPawn)) + return ScaleFactor(10); + if ( distance(weakKing, strongPawn) == 1 + && distance(strongKing, weakKing) > 2) + return ScaleFactor(24 - 2 * distance(strongKing, weakKing)); + } + return SCALE_FACTOR_NONE; +} + +template<> +ScaleFactor Endgame::operator()(const Position& pos) const { + + assert(verify_material(pos, strongSide, RookValueMg, 1)); + assert(verify_material(pos, weakSide, BishopValueMg, 0)); + + // Test for a rook pawn + if (pos.pieces(PAWN) & (FileABB | FileHBB)) + { + Square weakKing = pos.square(weakSide); + Square weakBishop = pos.square(weakSide); + Square strongKing = pos.square(strongSide); + Square strongPawn = pos.square(strongSide); + Rank pawnRank = relative_rank(strongSide, strongPawn); + Direction push = pawn_push(strongSide); + + // If the pawn is on the 5th rank and the pawn (currently) is on + // the same color square as the bishop then there is a chance of + // a fortress. Depending on the king position give a moderate + // reduction or a stronger one if the defending king is near the + // corner but not trapped there. + if (pawnRank == RANK_5 && !opposite_colors(weakBishop, strongPawn)) + { + int d = distance(strongPawn + 3 * push, weakKing); + + if (d <= 2 && !(d == 0 && weakKing == strongKing + 2 * push)) + return ScaleFactor(24); + else + return ScaleFactor(48); + } + + // When the pawn has moved to the 6th rank we can be fairly sure + // it's drawn if the bishop attacks the square in front of the + // pawn from a reasonable distance and the defending king is near + // the corner + if ( pawnRank == RANK_6 + && distance(strongPawn + 2 * push, weakKing) <= 1 + && (attacks_bb(weakBishop) & (strongPawn + push)) + && distance(weakBishop, strongPawn) >= 2) + return ScaleFactor(8); + } + + return SCALE_FACTOR_NONE; +} + +/// KRPP vs KRP. There is just a single rule: if the stronger side has no passed +/// pawns and the defending king is actively placed, the position is drawish. +template<> +ScaleFactor Endgame::operator()(const Position& pos) const { + + assert(verify_material(pos, strongSide, RookValueMg, 2)); + assert(verify_material(pos, weakSide, RookValueMg, 1)); + + Square strongPawn1 = lsb(pos.pieces(strongSide, PAWN)); + Square strongPawn2 = msb(pos.pieces(strongSide, PAWN)); + Square weakKing = pos.square(weakSide); + + // Does the stronger side have a passed pawn? + if (pos.pawn_passed(strongSide, strongPawn1) || pos.pawn_passed(strongSide, strongPawn2)) + return SCALE_FACTOR_NONE; + + Rank pawnRank = std::max(relative_rank(strongSide, strongPawn1), relative_rank(strongSide, strongPawn2)); + + if ( distance(weakKing, strongPawn1) <= 1 + && distance(weakKing, strongPawn2) <= 1 + && relative_rank(strongSide, weakKing) > pawnRank) + { + assert(pawnRank > RANK_1 && pawnRank < RANK_7); + return ScaleFactor(7 * pawnRank); + } + return SCALE_FACTOR_NONE; +} + + +/// K and two or more pawns vs K. There is just a single rule here: if all pawns +/// are on the same rook file and are blocked by the defending king, it's a draw. +template<> +ScaleFactor Endgame::operator()(const Position& pos) const { + + assert(pos.non_pawn_material(strongSide) == VALUE_ZERO); + assert(pos.count(strongSide) >= 2); + assert(verify_material(pos, weakSide, VALUE_ZERO, 0)); + + Square weakKing = pos.square(weakSide); + Bitboard strongPawns = pos.pieces(strongSide, PAWN); + + // If all pawns are ahead of the king on a single rook file, it's a draw. + if ( !(strongPawns & ~(FileABB | FileHBB)) + && !(strongPawns & ~passed_pawn_span(weakSide, weakKing))) + return SCALE_FACTOR_DRAW; + + return SCALE_FACTOR_NONE; +} + + +/// KBP vs KB. There are two rules: if the defending king is somewhere along the +/// path of the pawn, and the square of the king is not of the same color as the +/// stronger side's bishop, it's a draw. If the two bishops have opposite color, +/// it's almost always a draw. +template<> +ScaleFactor Endgame::operator()(const Position& pos) const { + + assert(verify_material(pos, strongSide, BishopValueMg, 1)); + assert(verify_material(pos, weakSide, BishopValueMg, 0)); + + Square strongPawn = pos.square(strongSide); + Square strongBishop = pos.square(strongSide); + Square weakBishop = pos.square(weakSide); + Square weakKing = pos.square(weakSide); + + // Case 1: Defending king blocks the pawn, and cannot be driven away + if ( (forward_file_bb(strongSide, strongPawn) & weakKing) + && ( opposite_colors(weakKing, strongBishop) + || relative_rank(strongSide, weakKing) <= RANK_6)) + return SCALE_FACTOR_DRAW; + + // Case 2: Opposite colored bishops + if (opposite_colors(strongBishop, weakBishop)) + return SCALE_FACTOR_DRAW; + + return SCALE_FACTOR_NONE; +} + + +/// KBPP vs KB. It detects a few basic draws with opposite-colored bishops +template<> +ScaleFactor Endgame::operator()(const Position& pos) const { + + assert(verify_material(pos, strongSide, BishopValueMg, 2)); + assert(verify_material(pos, weakSide, BishopValueMg, 0)); + + Square strongBishop = pos.square(strongSide); + Square weakBishop = pos.square(weakSide); + + if (!opposite_colors(strongBishop, weakBishop)) + return SCALE_FACTOR_NONE; + + Square weakKing = pos.square(weakSide); + Square strongPawn1 = lsb(pos.pieces(strongSide, PAWN)); + Square strongPawn2 = msb(pos.pieces(strongSide, PAWN)); + Square blockSq1, blockSq2; + + if (relative_rank(strongSide, strongPawn1) > relative_rank(strongSide, strongPawn2)) + { + blockSq1 = strongPawn1 + pawn_push(strongSide); + blockSq2 = make_square(file_of(strongPawn2), rank_of(strongPawn1)); + } + else + { + blockSq1 = strongPawn2 + pawn_push(strongSide); + blockSq2 = make_square(file_of(strongPawn1), rank_of(strongPawn2)); + } + + switch (distance(strongPawn1, strongPawn2)) + { + case 0: + // Both pawns are on the same file. It's an easy draw if the defender firmly + // controls some square in the frontmost pawn's path. + if ( file_of(weakKing) == file_of(blockSq1) + && relative_rank(strongSide, weakKing) >= relative_rank(strongSide, blockSq1) + && opposite_colors(weakKing, strongBishop)) + return SCALE_FACTOR_DRAW; + else + return SCALE_FACTOR_NONE; + + case 1: + // Pawns on adjacent files. It's a draw if the defender firmly controls the + // square in front of the frontmost pawn's path, and the square diagonally + // behind this square on the file of the other pawn. + if ( weakKing == blockSq1 + && opposite_colors(weakKing, strongBishop) + && ( weakBishop == blockSq2 + || (attacks_bb(blockSq2, pos.pieces()) & pos.pieces(weakSide, BISHOP)) + || distance(strongPawn1, strongPawn2) >= 2)) + return SCALE_FACTOR_DRAW; + + else if ( weakKing == blockSq2 + && opposite_colors(weakKing, strongBishop) + && ( weakBishop == blockSq1 + || (attacks_bb(blockSq1, pos.pieces()) & pos.pieces(weakSide, BISHOP)))) + return SCALE_FACTOR_DRAW; + else + return SCALE_FACTOR_NONE; + + default: + // The pawns are not on the same file or adjacent files. No scaling. + return SCALE_FACTOR_NONE; + } +} + + +/// KBP vs KN. There is a single rule: if the defending king is somewhere along +/// the path of the pawn, and the square of the king is not of the same color as +/// the stronger side's bishop, it's a draw. +template<> +ScaleFactor Endgame::operator()(const Position& pos) const { + + assert(verify_material(pos, strongSide, BishopValueMg, 1)); + assert(verify_material(pos, weakSide, KnightValueMg, 0)); + + Square strongPawn = pos.square(strongSide); + Square strongBishop = pos.square(strongSide); + Square weakKing = pos.square(weakSide); + + if ( file_of(weakKing) == file_of(strongPawn) + && relative_rank(strongSide, strongPawn) < relative_rank(strongSide, weakKing) + && ( opposite_colors(weakKing, strongBishop) + || relative_rank(strongSide, weakKing) <= RANK_6)) + return SCALE_FACTOR_DRAW; + + return SCALE_FACTOR_NONE; +} + + +/// KP vs KP. This is done by removing the weakest side's pawn and probing the +/// KP vs K bitbase: if the weakest side has a draw without the pawn, it probably +/// has at least a draw with the pawn as well. The exception is when the stronger +/// side's pawn is far advanced and not on a rook file; in this case it is often +/// possible to win (e.g. 8/4k3/3p4/3P4/6K1/8/8/8 w - - 0 1). +template<> +ScaleFactor Endgame::operator()(const Position& pos) const { + + assert(verify_material(pos, strongSide, VALUE_ZERO, 1)); + assert(verify_material(pos, weakSide, VALUE_ZERO, 1)); + + // Assume strongSide is white and the pawn is on files A-D + Square strongKing = normalize(pos, strongSide, pos.square(strongSide)); + Square weakKing = normalize(pos, strongSide, pos.square(weakSide)); + Square strongPawn = normalize(pos, strongSide, pos.square(strongSide)); + + Color us = strongSide == pos.side_to_move() ? WHITE : BLACK; + + // If the pawn has advanced to the fifth rank or further, and is not a + // rook pawn, it's too dangerous to assume that it's at least a draw. + if (rank_of(strongPawn) >= RANK_5 && file_of(strongPawn) != FILE_A) + return SCALE_FACTOR_NONE; + + // Probe the KPK bitbase with the weakest side's pawn removed. If it's a draw, + // it's probably at least a draw even with the pawn. + return Bitbases::probe(strongKing, strongPawn, weakKing, us) ? SCALE_FACTOR_NONE : SCALE_FACTOR_DRAW; +} + +} // namespace Stockfish diff --git a/src/endgame.h b/src/endgame.h new file mode 100644 index 0000000000000000000000000000000000000000..e79f696fa3a091dd8009d78a433228c429300acc --- /dev/null +++ b/src/endgame.h @@ -0,0 +1,126 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef ENDGAME_H_INCLUDED +#define ENDGAME_H_INCLUDED + +#include +#include +#include +#include +#include + +#include "position.h" +#include "types.h" + +namespace Stockfish { + +/// EndgameCode lists all supported endgame functions by corresponding codes + +enum EndgameCode { + + EVALUATION_FUNCTIONS, + KNNK, // KNN vs K + KNNKP, // KNN vs KP + KXK, // Generic "mate lone king" eval + KBNK, // KBN vs K + KPK, // KP vs K + KRKP, // KR vs KP + KRKB, // KR vs KB + KRKN, // KR vs KN + KQKP, // KQ vs KP + KQKR, // KQ vs KR + + SCALING_FUNCTIONS, + KBPsK, // KB and pawns vs K + KQKRPs, // KQ vs KR and pawns + KRPKR, // KRP vs KR + KRPKB, // KRP vs KB + KRPPKRP, // KRPP vs KRP + KPsK, // K and pawns vs K + KBPKB, // KBP vs KB + KBPPKB, // KBPP vs KB + KBPKN, // KBP vs KN + KPKP // KP vs KP +}; + + +/// Endgame functions can be of two types depending on whether they return a +/// Value or a ScaleFactor. + +template using +eg_type = typename std::conditional<(E < SCALING_FUNCTIONS), Value, ScaleFactor>::type; + + +/// Base and derived functors for endgame evaluation and scaling functions + +template +struct EndgameBase { + + explicit EndgameBase(Color c) : strongSide(c), weakSide(~c) {} + virtual ~EndgameBase() = default; + virtual T operator()(const Position&) const = 0; + + const Color strongSide, weakSide; +}; + + +template> +struct Endgame : public EndgameBase { + + explicit Endgame(Color c) : EndgameBase(c) {} + T operator()(const Position&) const override; +}; + + +/// The Endgames namespace handles the pointers to endgame evaluation and scaling +/// base objects in two std::map. We use polymorphism to invoke the actual +/// endgame function by calling its virtual operator(). + +namespace Endgames { + + template using Ptr = std::unique_ptr>; + template using Map = std::unordered_map>; + + extern std::pair, Map> maps; + + void init(); + + template + Map& map() { + return std::get::value>(maps); + } + + template> + void add(const std::string& code) { + + StateInfo st; + map()[Position().set(code, WHITE, &st).material_key()] = Ptr(new Endgame(WHITE)); + map()[Position().set(code, BLACK, &st).material_key()] = Ptr(new Endgame(BLACK)); + } + + template + const EndgameBase* probe(Key key) { + auto it = map().find(key); + return it != map().end() ? it->second.get() : nullptr; + } +} + +} // namespace Stockfish + +#endif // #ifndef ENDGAME_H_INCLUDED diff --git a/src/evaluate.cpp b/src/evaluate.cpp new file mode 100644 index 0000000000000000000000000000000000000000..87412b81b0b141392ebbc7d700a560303e85d20c --- /dev/null +++ b/src/evaluate.cpp @@ -0,0 +1,1171 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include +#include +#include // For std::memset +#include +#include +#include +#include +#include +#include + +#include "bitboard.h" +#include "evaluate.h" +#include "material.h" +#include "misc.h" +#include "pawns.h" +#include "thread.h" +#include "timeman.h" +#include "uci.h" +#include "incbin/incbin.h" + + +// Macro to embed the default efficiently updatable neural network (NNUE) file +// data in the engine binary (using incbin.h, by Dale Weiler). +// This macro invocation will declare the following three variables +// const unsigned char gEmbeddedNNUEData[]; // a pointer to the embedded data +// const unsigned char *const gEmbeddedNNUEEnd; // a marker to the end +// const unsigned int gEmbeddedNNUESize; // the size of the embedded file +// Note that this does not work in Microsoft Visual Studio. +#if !defined(_MSC_VER) && !defined(NNUE_EMBEDDING_OFF) + INCBIN(EmbeddedNNUE, EvalFileDefaultName); +#else + const unsigned char gEmbeddedNNUEData[1] = {0x0}; + const unsigned char *const gEmbeddedNNUEEnd = &gEmbeddedNNUEData[1]; + const unsigned int gEmbeddedNNUESize = 1; +#endif + + +using namespace std; + +namespace Stockfish { + +namespace Eval { + + bool useNNUE; + string currentEvalFileName = "None"; + + /// NNUE::init() tries to load a NNUE network at startup time, or when the engine + /// receives a UCI command "setoption name EvalFile value nn-[a-z0-9]{12}.nnue" + /// The name of the NNUE network is always retrieved from the EvalFile option. + /// We search the given network in three locations: internally (the default + /// network may be embedded in the binary), in the active working directory and + /// in the engine directory. Distro packagers may define the DEFAULT_NNUE_DIRECTORY + /// variable to have the engine search in a special directory in their distro. + + void NNUE::init() { + + useNNUE = Options["Use NNUE"]; + if (!useNNUE) + return; + + string eval_file = string(Options["EvalFile"]); + if (eval_file.empty()) + eval_file = EvalFileDefaultName; + + #if defined(DEFAULT_NNUE_DIRECTORY) + #define stringify2(x) #x + #define stringify(x) stringify2(x) + vector dirs = { "" , "" , CommandLine::binaryDirectory , stringify(DEFAULT_NNUE_DIRECTORY) }; + #else + vector dirs = { "" , "" , CommandLine::binaryDirectory }; + #endif + + for (string directory : dirs) + if (currentEvalFileName != eval_file) + { + if (directory != "") + { + ifstream stream(directory + eval_file, ios::binary); + if (load_eval(eval_file, stream)) + currentEvalFileName = eval_file; + } + + if (directory == "" && eval_file == EvalFileDefaultName) + { + // C++ way to prepare a buffer for a memory stream + class MemoryBuffer : public basic_streambuf { + public: MemoryBuffer(char* p, size_t n) { setg(p, p, p + n); setp(p, p + n); } + }; + + MemoryBuffer buffer(const_cast(reinterpret_cast(gEmbeddedNNUEData)), + size_t(gEmbeddedNNUESize)); + (void) gEmbeddedNNUEEnd; // Silence warning on unused variable + + istream stream(&buffer); + if (load_eval(eval_file, stream)) + currentEvalFileName = eval_file; + } + } + } + + /// NNUE::verify() verifies that the last net used was loaded successfully + void NNUE::verify() { + + string eval_file = string(Options["EvalFile"]); + if (eval_file.empty()) + eval_file = EvalFileDefaultName; + + if (useNNUE && currentEvalFileName != eval_file) + { + + string msg1 = "If the UCI option \"Use NNUE\" is set to true, network evaluation parameters compatible with the engine must be available."; + string msg2 = "The option is set to true, but the network file " + eval_file + " was not loaded successfully."; + string msg3 = "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file."; + string msg4 = "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/" + std::string(EvalFileDefaultName); + string msg5 = "The engine will be terminated now."; + + sync_cout << "info string ERROR: " << msg1 << sync_endl; + sync_cout << "info string ERROR: " << msg2 << sync_endl; + sync_cout << "info string ERROR: " << msg3 << sync_endl; + sync_cout << "info string ERROR: " << msg4 << sync_endl; + sync_cout << "info string ERROR: " << msg5 << sync_endl; + + exit(EXIT_FAILURE); + } + + if (useNNUE) + sync_cout << "info string NNUE evaluation using " << eval_file << " enabled" << sync_endl; + else + sync_cout << "info string classical evaluation enabled" << sync_endl; + } +} + +namespace Trace { + + enum Tracing { NO_TRACE, TRACE }; + + enum Term { // The first 8 entries are reserved for PieceType + MATERIAL = 8, IMBALANCE, MOBILITY, THREAT, PASSED, SPACE, WINNABLE, TOTAL, TERM_NB + }; + + Score scores[TERM_NB][COLOR_NB]; + + double to_cp(Value v) { return double(v) / UCI::NormalizeToPawnValue; } + + void add(int idx, Color c, Score s) { + scores[idx][c] = s; + } + + void add(int idx, Score w, Score b = SCORE_ZERO) { + scores[idx][WHITE] = w; + scores[idx][BLACK] = b; + } + + std::ostream& operator<<(std::ostream& os, Score s) { + os << std::setw(5) << to_cp(mg_value(s)) << " " + << std::setw(5) << to_cp(eg_value(s)); + return os; + } + + std::ostream& operator<<(std::ostream& os, Term t) { + + if (t == MATERIAL || t == IMBALANCE || t == WINNABLE || t == TOTAL) + os << " ---- ----" << " | " << " ---- ----"; + else + os << scores[t][WHITE] << " | " << scores[t][BLACK]; + + os << " | " << scores[t][WHITE] - scores[t][BLACK] << " |\n"; + return os; + } +} + +using namespace Trace; + +namespace { + + // Threshold for lazy and space evaluation + constexpr Value LazyThreshold1 = Value(3631); + constexpr Value LazyThreshold2 = Value(2084); + constexpr Value SpaceThreshold = Value(11551); + + // KingAttackWeights[PieceType] contains king attack weights by piece type + constexpr int KingAttackWeights[PIECE_TYPE_NB] = { 0, 0, 76, 46, 45, 14 }; + + // SafeCheck[PieceType][single/multiple] contains safe check bonus by piece type, + // higher if multiple safe checks are possible for that piece type. + constexpr int SafeCheck[][2] = { + {}, {}, {805, 1292}, {650, 984}, {1071, 1886}, {730, 1128} + }; + +#define S(mg, eg) make_score(mg, eg) + + // MobilityBonus[PieceType-2][attacked] contains bonuses for middle and end game, + // indexed by piece type and number of attacked squares in the mobility area. + constexpr Score MobilityBonus[][32] = { + { S(-62,-79), S(-53,-57), S(-12,-31), S( -3,-17), S( 3, 7), S( 12, 13), // Knight + S( 21, 16), S( 28, 21), S( 37, 26) }, + { S(-47,-59), S(-20,-25), S( 14, -8), S( 29, 12), S( 39, 21), S( 53, 40), // Bishop + S( 53, 56), S( 60, 58), S( 62, 65), S( 69, 72), S( 78, 78), S( 83, 87), + S( 91, 88), S( 96, 98) }, + { S(-60,-82), S(-24,-15), S( 0, 17) ,S( 3, 43), S( 4, 72), S( 14,100), // Rook + S( 20,102), S( 30,122), S( 41,133), S(41 ,139), S( 41,153), S( 45,160), + S( 57,165), S( 58,170), S( 67,175) }, + { S(-29,-49), S(-16,-29), S( -8, -8), S( -8, 17), S( 18, 39), S( 25, 54), // Queen + S( 23, 59), S( 37, 73), S( 41, 76), S( 54, 95), S( 65, 95) ,S( 68,101), + S( 69,124), S( 70,128), S( 70,132), S( 70,133) ,S( 71,136), S( 72,140), + S( 74,147), S( 76,149), S( 90,153), S(104,169), S(105,171), S(106,171), + S(112,178), S(114,185), S(114,187), S(119,221) } + }; + + // BishopPawns[distance from edge] contains a file-dependent penalty for pawns on + // squares of the same color as our bishop. + constexpr Score BishopPawns[int(FILE_NB) / 2] = { + S(3, 8), S(3, 9), S(2, 7), S(3, 7) + }; + + // KingProtector[knight/bishop] contains penalty for each distance unit to own king + constexpr Score KingProtector[] = { S(9, 9), S(7, 9) }; + + // Outpost[knight/bishop] contains bonuses for each knight or bishop occupying a + // pawn protected square on rank 4 to 6 which is also safe from a pawn attack. + constexpr Score Outpost[] = { S(54, 34), S(31, 25) }; + + // PassedRank[Rank] contains a bonus according to the rank of a passed pawn + constexpr Score PassedRank[RANK_NB] = { + S(0, 0), S(2, 38), S(15, 36), S(22, 50), S(64, 81), S(166, 184), S(284, 269) + }; + + constexpr Score RookOnClosedFile = S(10, 5); + constexpr Score RookOnOpenFile[] = { S(18, 8), S(49, 26) }; + + // ThreatByMinor/ByRook[attacked PieceType] contains bonuses according to + // which piece type attacks which one. Attacks on lesser pieces which are + // pawn-defended are not considered. + constexpr Score ThreatByMinor[PIECE_TYPE_NB] = { + S(0, 0), S(6, 37), S(64, 50), S(82, 57), S(103, 130), S(81, 163) + }; + + constexpr Score ThreatByRook[PIECE_TYPE_NB] = { + S(0, 0), S(3, 44), S(36, 71), S(44, 59), S(0, 39), S(60, 39) + }; + + constexpr Value CorneredBishop = Value(50); + + // Assorted bonuses and penalties + constexpr Score UncontestedOutpost = S( 0, 10); + constexpr Score BishopOnKingRing = S( 24, 0); + constexpr Score BishopXRayPawns = S( 4, 5); + constexpr Score FlankAttacks = S( 8, 0); + constexpr Score Hanging = S( 72, 40); + constexpr Score KnightOnQueen = S( 16, 11); + constexpr Score LongDiagonalBishop = S( 45, 0); + constexpr Score MinorBehindPawn = S( 18, 3); + constexpr Score PassedFile = S( 13, 8); + constexpr Score PawnlessFlank = S( 19, 97); + constexpr Score ReachableOutpost = S( 33, 19); + constexpr Score RestrictedPiece = S( 6, 7); + constexpr Score RookOnKingRing = S( 16, 0); + constexpr Score SliderOnQueen = S( 62, 21); + constexpr Score ThreatByKing = S( 24, 87); + constexpr Score ThreatByPawnPush = S( 48, 39); + constexpr Score ThreatBySafePawn = S(167, 99); + constexpr Score TrappedRook = S( 55, 13); + constexpr Score WeakQueenProtection = S( 14, 0); + constexpr Score WeakQueen = S( 57, 19); + + +#undef S + + // Evaluation class computes and stores attacks tables and other working data + template + class Evaluation { + + public: + Evaluation() = delete; + explicit Evaluation(const Position& p) : pos(p) {} + Evaluation& operator=(const Evaluation&) = delete; + Value value(); + + private: + template void initialize(); + template Score pieces(); + template Score king() const; + template Score threats() const; + template Score passed() const; + template Score space() const; + Value winnable(Score score) const; + + const Position& pos; + Material::Entry* me; + Pawns::Entry* pe; + Bitboard mobilityArea[COLOR_NB]; + Score mobility[COLOR_NB] = { SCORE_ZERO, SCORE_ZERO }; + + // attackedBy[color][piece type] is a bitboard representing all squares + // attacked by a given color and piece type. Special "piece types" which + // is also calculated is ALL_PIECES. + Bitboard attackedBy[COLOR_NB][PIECE_TYPE_NB]; + + // attackedBy2[color] are the squares attacked by at least 2 units of a given + // color, including x-rays. But diagonal x-rays through pawns are not computed. + Bitboard attackedBy2[COLOR_NB]; + + // kingRing[color] are the squares adjacent to the king plus some other + // very near squares, depending on king position. + Bitboard kingRing[COLOR_NB]; + + // kingAttackersCount[color] is the number of pieces of the given color + // which attack a square in the kingRing of the enemy king. + int kingAttackersCount[COLOR_NB]; + + // kingAttackersWeight[color] is the sum of the "weights" of the pieces of + // the given color which attack a square in the kingRing of the enemy king. + // The weights of the individual piece types are given by the elements in + // the KingAttackWeights array. + int kingAttackersWeight[COLOR_NB]; + + // kingAttacksCount[color] is the number of attacks by the given color to + // squares directly adjacent to the enemy king. Pieces which attack more + // than one square are counted multiple times. For instance, if there is + // a white knight on g5 and black's king is on g8, this white knight adds 2 + // to kingAttacksCount[WHITE]. + int kingAttacksCount[COLOR_NB]; + }; + + + // Evaluation::initialize() computes king and pawn attacks, and the king ring + // bitboard for a given color. This is done at the beginning of the evaluation. + + template template + void Evaluation::initialize() { + + constexpr Color Them = ~Us; + constexpr Direction Up = pawn_push(Us); + constexpr Direction Down = -Up; + constexpr Bitboard LowRanks = (Us == WHITE ? Rank2BB | Rank3BB : Rank7BB | Rank6BB); + + const Square ksq = pos.square(Us); + + Bitboard dblAttackByPawn = pawn_double_attacks_bb(pos.pieces(Us, PAWN)); + + // Find our pawns that are blocked or on the first two ranks + Bitboard b = pos.pieces(Us, PAWN) & (shift(pos.pieces()) | LowRanks); + + // Squares occupied by those pawns, by our king or queen, by blockers to attacks on our king + // or controlled by enemy pawns are excluded from the mobility area. + mobilityArea[Us] = ~(b | pos.pieces(Us, KING, QUEEN) | pos.blockers_for_king(Us) | pe->pawn_attacks(Them)); + + // Initialize attackedBy[] for king and pawns + attackedBy[Us][KING] = attacks_bb(ksq); + attackedBy[Us][PAWN] = pe->pawn_attacks(Us); + attackedBy[Us][ALL_PIECES] = attackedBy[Us][KING] | attackedBy[Us][PAWN]; + attackedBy2[Us] = dblAttackByPawn | (attackedBy[Us][KING] & attackedBy[Us][PAWN]); + + // Init our king safety tables + Square s = make_square(std::clamp(file_of(ksq), FILE_B, FILE_G), + std::clamp(rank_of(ksq), RANK_2, RANK_7)); + kingRing[Us] = attacks_bb(s) | s; + + kingAttackersCount[Them] = popcount(kingRing[Us] & pe->pawn_attacks(Them)); + kingAttacksCount[Them] = kingAttackersWeight[Them] = 0; + + // Remove from kingRing[] the squares defended by two pawns + kingRing[Us] &= ~dblAttackByPawn; + } + + + // Evaluation::pieces() scores pieces of a given color and type + + template template + Score Evaluation::pieces() { + + constexpr Color Them = ~Us; + constexpr Direction Down = -pawn_push(Us); + constexpr Bitboard OutpostRanks = (Us == WHITE ? Rank4BB | Rank5BB | Rank6BB + : Rank5BB | Rank4BB | Rank3BB); + Bitboard b1 = pos.pieces(Us, Pt); + Bitboard b, bb; + Score score = SCORE_ZERO; + + attackedBy[Us][Pt] = 0; + + while (b1) + { + Square s = pop_lsb(b1); + + // Find attacked squares, including x-ray attacks for bishops and rooks + b = Pt == BISHOP ? attacks_bb(s, pos.pieces() ^ pos.pieces(QUEEN)) + : Pt == ROOK ? attacks_bb< ROOK>(s, pos.pieces() ^ pos.pieces(QUEEN) ^ pos.pieces(Us, ROOK)) + : attacks_bb(s, pos.pieces()); + + if (pos.blockers_for_king(Us) & s) + b &= line_bb(pos.square(Us), s); + + attackedBy2[Us] |= attackedBy[Us][ALL_PIECES] & b; + attackedBy[Us][Pt] |= b; + attackedBy[Us][ALL_PIECES] |= b; + + if (b & kingRing[Them]) + { + kingAttackersCount[Us]++; + kingAttackersWeight[Us] += KingAttackWeights[Pt]; + kingAttacksCount[Us] += popcount(b & attackedBy[Them][KING]); + } + + else if (Pt == ROOK && (file_bb(s) & kingRing[Them])) + score += RookOnKingRing; + + else if (Pt == BISHOP && (attacks_bb(s, pos.pieces(PAWN)) & kingRing[Them])) + score += BishopOnKingRing; + + int mob = popcount(b & mobilityArea[Us]); + mobility[Us] += MobilityBonus[Pt - 2][mob]; + + if (Pt == BISHOP || Pt == KNIGHT) + { + // Bonus if the piece is on an outpost square or can reach one + // Bonus for knights (UncontestedOutpost) if few relevant targets + bb = OutpostRanks & (attackedBy[Us][PAWN] | shift(pos.pieces(PAWN))) + & ~pe->pawn_attacks_span(Them); + Bitboard targets = pos.pieces(Them) & ~pos.pieces(PAWN); + + if ( Pt == KNIGHT + && bb & s & ~CenterFiles // on a side outpost + && !(b & targets) // no relevant attacks + && (!more_than_one(targets & (s & QueenSide ? QueenSide : KingSide)))) + score += UncontestedOutpost * popcount(pos.pieces(PAWN) & (s & QueenSide ? QueenSide : KingSide)); + else if (bb & s) + score += Outpost[Pt == BISHOP]; + else if (Pt == KNIGHT && bb & b & ~pos.pieces(Us)) + score += ReachableOutpost; + + // Bonus for a knight or bishop shielded by pawn + if (shift(pos.pieces(PAWN)) & s) + score += MinorBehindPawn; + + // Penalty if the piece is far from the king + score -= KingProtector[Pt == BISHOP] * distance(pos.square(Us), s); + + if constexpr (Pt == BISHOP) + { + // Penalty according to the number of our pawns on the same color square as the + // bishop, bigger when the center files are blocked with pawns and smaller + // when the bishop is outside the pawn chain. + Bitboard blocked = pos.pieces(Us, PAWN) & shift(pos.pieces()); + + score -= BishopPawns[edge_distance(file_of(s))] * pos.pawns_on_same_color_squares(Us, s) + * (!(attackedBy[Us][PAWN] & s) + popcount(blocked & CenterFiles)); + + // Penalty for all enemy pawns x-rayed + score -= BishopXRayPawns * popcount(attacks_bb(s) & pos.pieces(Them, PAWN)); + + // Bonus for bishop on a long diagonal which can "see" both center squares + if (more_than_one(attacks_bb(s, pos.pieces(PAWN)) & Center)) + score += LongDiagonalBishop; + + // An important Chess960 pattern: a cornered bishop blocked by a friendly + // pawn diagonally in front of it is a very serious problem, especially + // when that pawn is also blocked. + if ( pos.is_chess960() + && (s == relative_square(Us, SQ_A1) || s == relative_square(Us, SQ_H1))) + { + Direction d = pawn_push(Us) + (file_of(s) == FILE_A ? EAST : WEST); + if (pos.piece_on(s + d) == make_piece(Us, PAWN)) + score -= !pos.empty(s + d + pawn_push(Us)) ? 4 * make_score(CorneredBishop, CorneredBishop) + : 3 * make_score(CorneredBishop, CorneredBishop); + } + } + } + + if constexpr (Pt == ROOK) + { + // Bonuses for rook on a (semi-)open or closed file + if (pos.is_on_semiopen_file(Us, s)) + { + score += RookOnOpenFile[pos.is_on_semiopen_file(Them, s)]; + } + else + { + // If our pawn on this file is blocked, increase penalty + if ( pos.pieces(Us, PAWN) + & shift(pos.pieces()) + & file_bb(s)) + { + score -= RookOnClosedFile; + } + + // Penalty when trapped by the king, even more if the king cannot castle + if (mob <= 3) + { + File kf = file_of(pos.square(Us)); + if ((kf < FILE_E) == (file_of(s) < kf)) + score -= TrappedRook * (1 + !pos.castling_rights(Us)); + } + } + } + + if constexpr (Pt == QUEEN) + { + // Penalty if any relative pin or discovered attack against the queen + Bitboard queenPinners; + if (pos.slider_blockers(pos.pieces(Them, ROOK, BISHOP), s, queenPinners)) + score -= WeakQueen; + } + } + if constexpr (T) + Trace::add(Pt, Us, score); + + return score; + } + + + // Evaluation::king() assigns bonuses and penalties to a king of a given color + + template template + Score Evaluation::king() const { + + constexpr Color Them = ~Us; + constexpr Bitboard Camp = (Us == WHITE ? AllSquares ^ Rank6BB ^ Rank7BB ^ Rank8BB + : AllSquares ^ Rank1BB ^ Rank2BB ^ Rank3BB); + + Bitboard weak, b1, b2, b3, safe, unsafeChecks = 0; + Bitboard rookChecks, queenChecks, bishopChecks, knightChecks; + int kingDanger = 0; + const Square ksq = pos.square(Us); + + // Init the score with king shelter and enemy pawns storm + Score score = pe->king_safety(pos); + + // Attacked squares defended at most once by our queen or king + weak = attackedBy[Them][ALL_PIECES] + & ~attackedBy2[Us] + & (~attackedBy[Us][ALL_PIECES] | attackedBy[Us][KING] | attackedBy[Us][QUEEN]); + + // Analyse the safe enemy's checks which are possible on next move + safe = ~pos.pieces(Them); + safe &= ~attackedBy[Us][ALL_PIECES] | (weak & attackedBy2[Them]); + + b1 = attacks_bb(ksq, pos.pieces() ^ pos.pieces(Us, QUEEN)); + b2 = attacks_bb(ksq, pos.pieces() ^ pos.pieces(Us, QUEEN)); + + // Enemy rooks checks + rookChecks = b1 & attackedBy[Them][ROOK] & safe; + if (rookChecks) + kingDanger += SafeCheck[ROOK][more_than_one(rookChecks)]; + else + unsafeChecks |= b1 & attackedBy[Them][ROOK]; + + // Enemy queen safe checks: count them only if the checks are from squares from + // which opponent cannot give a rook check, because rook checks are more valuable. + queenChecks = (b1 | b2) & attackedBy[Them][QUEEN] & safe + & ~(attackedBy[Us][QUEEN] | rookChecks); + if (queenChecks) + kingDanger += SafeCheck[QUEEN][more_than_one(queenChecks)]; + + // Enemy bishops checks: count them only if they are from squares from which + // opponent cannot give a queen check, because queen checks are more valuable. + bishopChecks = b2 & attackedBy[Them][BISHOP] & safe + & ~queenChecks; + if (bishopChecks) + kingDanger += SafeCheck[BISHOP][more_than_one(bishopChecks)]; + + else + unsafeChecks |= b2 & attackedBy[Them][BISHOP]; + + // Enemy knights checks + knightChecks = attacks_bb(ksq) & attackedBy[Them][KNIGHT]; + if (knightChecks & safe) + kingDanger += SafeCheck[KNIGHT][more_than_one(knightChecks & safe)]; + else + unsafeChecks |= knightChecks; + + // Find the squares that opponent attacks in our king flank, the squares + // which they attack twice in that flank, and the squares that we defend. + b1 = attackedBy[Them][ALL_PIECES] & KingFlank[file_of(ksq)] & Camp; + b2 = b1 & attackedBy2[Them]; + b3 = attackedBy[Us][ALL_PIECES] & KingFlank[file_of(ksq)] & Camp; + + int kingFlankAttack = popcount(b1) + popcount(b2); + int kingFlankDefense = popcount(b3); + + kingDanger += kingAttackersCount[Them] * kingAttackersWeight[Them] // (~10 Elo) + + 183 * popcount(kingRing[Us] & weak) // (~15 Elo) + + 148 * popcount(unsafeChecks) // (~4 Elo) + + 98 * popcount(pos.blockers_for_king(Us)) // (~2 Elo) + + 69 * kingAttacksCount[Them] // (~0.5 Elo) + + 3 * kingFlankAttack * kingFlankAttack / 8 // (~0.5 Elo) + + mg_value(mobility[Them] - mobility[Us]) // (~0.5 Elo) + - 873 * !pos.count(Them) // (~24 Elo) + - 100 * bool(attackedBy[Us][KNIGHT] & attackedBy[Us][KING]) // (~5 Elo) + - 6 * mg_value(score) / 8 // (~8 Elo) + - 4 * kingFlankDefense // (~5 Elo) + + 37; // (~0.5 Elo) + + // Transform the kingDanger units into a Score, and subtract it from the evaluation + if (kingDanger > 100) + score -= make_score(kingDanger * kingDanger / 4096, kingDanger / 16); + + // Penalty when our king is on a pawnless flank + if (!(pos.pieces(PAWN) & KingFlank[file_of(ksq)])) + score -= PawnlessFlank; + + // Penalty if king flank is under attack, potentially moving toward the king + score -= FlankAttacks * kingFlankAttack; + + if constexpr (T) + Trace::add(KING, Us, score); + + return score; + } + + + // Evaluation::threats() assigns bonuses according to the types of the + // attacking and the attacked pieces. + + template template + Score Evaluation::threats() const { + + constexpr Color Them = ~Us; + constexpr Direction Up = pawn_push(Us); + constexpr Bitboard TRank3BB = (Us == WHITE ? Rank3BB : Rank6BB); + + Bitboard b, weak, defended, nonPawnEnemies, stronglyProtected, safe; + Score score = SCORE_ZERO; + + // Non-pawn enemies + nonPawnEnemies = pos.pieces(Them) & ~pos.pieces(PAWN); + + // Squares strongly protected by the enemy, either because they defend the + // square with a pawn, or because they defend the square twice and we don't. + stronglyProtected = attackedBy[Them][PAWN] + | (attackedBy2[Them] & ~attackedBy2[Us]); + + // Non-pawn enemies, strongly protected + defended = nonPawnEnemies & stronglyProtected; + + // Enemies not strongly protected and under our attack + weak = pos.pieces(Them) & ~stronglyProtected & attackedBy[Us][ALL_PIECES]; + + // Bonus according to the kind of attacking pieces + if (defended | weak) + { + b = (defended | weak) & (attackedBy[Us][KNIGHT] | attackedBy[Us][BISHOP]); + while (b) + score += ThreatByMinor[type_of(pos.piece_on(pop_lsb(b)))]; + + b = weak & attackedBy[Us][ROOK]; + while (b) + score += ThreatByRook[type_of(pos.piece_on(pop_lsb(b)))]; + + if (weak & attackedBy[Us][KING]) + score += ThreatByKing; + + b = ~attackedBy[Them][ALL_PIECES] + | (nonPawnEnemies & attackedBy2[Us]); + score += Hanging * popcount(weak & b); + + // Additional bonus if weak piece is only protected by a queen + score += WeakQueenProtection * popcount(weak & attackedBy[Them][QUEEN]); + } + + // Bonus for restricting their piece moves + b = attackedBy[Them][ALL_PIECES] + & ~stronglyProtected + & attackedBy[Us][ALL_PIECES]; + score += RestrictedPiece * popcount(b); + + // Protected or unattacked squares + safe = ~attackedBy[Them][ALL_PIECES] | attackedBy[Us][ALL_PIECES]; + + // Bonus for attacking enemy pieces with our relatively safe pawns + b = pos.pieces(Us, PAWN) & safe; + b = pawn_attacks_bb(b) & nonPawnEnemies; + score += ThreatBySafePawn * popcount(b); + + // Find squares where our pawns can push on the next move + b = shift(pos.pieces(Us, PAWN)) & ~pos.pieces(); + b |= shift(b & TRank3BB) & ~pos.pieces(); + + // Keep only the squares which are relatively safe + b &= ~attackedBy[Them][PAWN] & safe; + + // Bonus for safe pawn threats on the next move + b = pawn_attacks_bb(b) & nonPawnEnemies; + score += ThreatByPawnPush * popcount(b); + + // Bonus for threats on the next moves against enemy queen + if (pos.count(Them) == 1) + { + bool queenImbalance = pos.count() == 1; + + Square s = pos.square(Them); + safe = mobilityArea[Us] + & ~pos.pieces(Us, PAWN) + & ~stronglyProtected; + + b = attackedBy[Us][KNIGHT] & attacks_bb(s); + + score += KnightOnQueen * popcount(b & safe) * (1 + queenImbalance); + + b = (attackedBy[Us][BISHOP] & attacks_bb(s, pos.pieces())) + | (attackedBy[Us][ROOK ] & attacks_bb(s, pos.pieces())); + + score += SliderOnQueen * popcount(b & safe & attackedBy2[Us]) * (1 + queenImbalance); + } + + if constexpr (T) + Trace::add(THREAT, Us, score); + + return score; + } + + // Evaluation::passed() evaluates the passed pawns and candidate passed + // pawns of the given color. + + template template + Score Evaluation::passed() const { + + constexpr Color Them = ~Us; + constexpr Direction Up = pawn_push(Us); + constexpr Direction Down = -Up; + + auto king_proximity = [&](Color c, Square s) { + return std::min(distance(pos.square(c), s), 5); + }; + + Bitboard b, bb, squaresToQueen, unsafeSquares, blockedPassers, helpers; + Score score = SCORE_ZERO; + + b = pe->passed_pawns(Us); + + blockedPassers = b & shift(pos.pieces(Them, PAWN)); + if (blockedPassers) + { + helpers = shift(pos.pieces(Us, PAWN)) + & ~pos.pieces(Them) + & (~attackedBy2[Them] | attackedBy[Us][ALL_PIECES]); + + // Remove blocked candidate passers that don't have help to pass + b &= ~blockedPassers + | shift(helpers) + | shift(helpers); + } + + while (b) + { + Square s = pop_lsb(b); + + assert(!(pos.pieces(Them, PAWN) & forward_file_bb(Us, s + Up))); + + int r = relative_rank(Us, s); + + Score bonus = PassedRank[r]; + + if (r > RANK_3) + { + int w = 5 * r - 13; + Square blockSq = s + Up; + + // Adjust bonus based on the king's proximity + bonus += make_score(0, ( king_proximity(Them, blockSq) * 19 / 4 + - king_proximity(Us, blockSq) * 2) * w); + + // If blockSq is not the queening square then consider also a second push + if (r != RANK_7) + bonus -= make_score(0, king_proximity(Us, blockSq + Up) * w); + + // If the pawn is free to advance, then increase the bonus + if (pos.empty(blockSq)) + { + squaresToQueen = forward_file_bb(Us, s); + unsafeSquares = passed_pawn_span(Us, s); + + bb = forward_file_bb(Them, s) & pos.pieces(ROOK, QUEEN); + + if (!(pos.pieces(Them) & bb)) + unsafeSquares &= attackedBy[Them][ALL_PIECES] | pos.pieces(Them); + + // If there are no enemy pieces or attacks on passed pawn span, assign a big bonus. + // Or if there is some, but they are all attacked by our pawns, assign a bit smaller bonus. + // Otherwise assign a smaller bonus if the path to queen is not attacked + // and even smaller bonus if it is attacked but block square is not. + int k = !unsafeSquares ? 36 : + !(unsafeSquares & ~attackedBy[Us][PAWN]) ? 30 : + !(unsafeSquares & squaresToQueen) ? 17 : + !(unsafeSquares & blockSq) ? 7 : + 0 ; + + // Assign a larger bonus if the block square is defended + if ((pos.pieces(Us) & bb) || (attackedBy[Us][ALL_PIECES] & blockSq)) + k += 5; + + bonus += make_score(k * w, k * w); + } + } // r > RANK_3 + + score += bonus - PassedFile * edge_distance(file_of(s)); + } + + if constexpr (T) + Trace::add(PASSED, Us, score); + + return score; + } + + + // Evaluation::space() computes a space evaluation for a given side, aiming to improve game + // play in the opening. It is based on the number of safe squares on the four central files + // on ranks 2 to 4. Completely safe squares behind a friendly pawn are counted twice. + // Finally, the space bonus is multiplied by a weight which decreases according to occupancy. + + template template + Score Evaluation::space() const { + + // Early exit if, for example, both queens or 6 minor pieces have been exchanged + if (pos.non_pawn_material() < SpaceThreshold) + return SCORE_ZERO; + + constexpr Color Them = ~Us; + constexpr Direction Down = -pawn_push(Us); + constexpr Bitboard SpaceMask = + Us == WHITE ? CenterFiles & (Rank2BB | Rank3BB | Rank4BB) + : CenterFiles & (Rank7BB | Rank6BB | Rank5BB); + + // Find the available squares for our pieces inside the area defined by SpaceMask + Bitboard safe = SpaceMask + & ~pos.pieces(Us, PAWN) + & ~attackedBy[Them][PAWN]; + + // Find all squares which are at most three squares behind some friendly pawn + Bitboard behind = pos.pieces(Us, PAWN); + behind |= shift(behind); + behind |= shift(behind); + + // Compute space score based on the number of safe squares and number of our pieces + // increased with number of total blocked pawns in position. + int bonus = popcount(safe) + popcount(behind & safe & ~attackedBy[Them][ALL_PIECES]); + int weight = pos.count(Us) - 3 + std::min(pe->blocked_count(), 9); + Score score = make_score(bonus * weight * weight / 16, 0); + + if constexpr (T) + Trace::add(SPACE, Us, score); + + return score; + } + + + // Evaluation::winnable() adjusts the midgame and endgame score components, based on + // the known attacking/defending status of the players. The final value is derived + // by interpolation from the midgame and endgame values. + + template + Value Evaluation::winnable(Score score) const { + + int outflanking = distance(pos.square(WHITE), pos.square(BLACK)) + + int(rank_of(pos.square(WHITE)) - rank_of(pos.square(BLACK))); + + bool pawnsOnBothFlanks = (pos.pieces(PAWN) & QueenSide) + && (pos.pieces(PAWN) & KingSide); + + bool almostUnwinnable = outflanking < 0 + && !pawnsOnBothFlanks; + + bool infiltration = rank_of(pos.square(WHITE)) > RANK_4 + || rank_of(pos.square(BLACK)) < RANK_5; + + // Compute the initiative bonus for the attacking side + int complexity = 9 * pe->passed_count() + + 12 * pos.count() + + 9 * outflanking + + 21 * pawnsOnBothFlanks + + 24 * infiltration + + 51 * !pos.non_pawn_material() + - 43 * almostUnwinnable + -110 ; + + Value mg = mg_value(score); + Value eg = eg_value(score); + + // Now apply the bonus: note that we find the attacking side by extracting the + // sign of the midgame or endgame values, and that we carefully cap the bonus + // so that the midgame and endgame scores do not change sign after the bonus. + int u = ((mg > 0) - (mg < 0)) * std::clamp(complexity + 50, -abs(mg), 0); + int v = ((eg > 0) - (eg < 0)) * std::max(complexity, -abs(eg)); + + mg += u; + eg += v; + + // Compute the scale factor for the winning side + Color strongSide = eg > VALUE_DRAW ? WHITE : BLACK; + int sf = me->scale_factor(pos, strongSide); + + // If scale factor is not already specific, scale up/down via general heuristics + if (sf == SCALE_FACTOR_NORMAL) + { + if (pos.opposite_bishops()) + { + // For pure opposite colored bishops endgames use scale factor + // based on the number of passed pawns of the strong side. + if ( pos.non_pawn_material(WHITE) == BishopValueMg + && pos.non_pawn_material(BLACK) == BishopValueMg) + sf = 18 + 4 * popcount(pe->passed_pawns(strongSide)); + // For every other opposite colored bishops endgames use scale factor + // based on the number of all pieces of the strong side. + else + sf = 22 + 3 * pos.count(strongSide); + } + // For rook endgames with strong side not having overwhelming pawn number advantage + // and its pawns being on one flank and weak side protecting its pieces with a king + // use lower scale factor. + else if ( pos.non_pawn_material(WHITE) == RookValueMg + && pos.non_pawn_material(BLACK) == RookValueMg + && pos.count(strongSide) - pos.count(~strongSide) <= 1 + && bool(KingSide & pos.pieces(strongSide, PAWN)) != bool(QueenSide & pos.pieces(strongSide, PAWN)) + && (attacks_bb(pos.square(~strongSide)) & pos.pieces(~strongSide, PAWN))) + sf = 36; + // For queen vs no queen endgames use scale factor + // based on number of minors of side that doesn't have queen. + else if (pos.count() == 1) + sf = 37 + 3 * (pos.count(WHITE) == 1 ? pos.count(BLACK) + pos.count(BLACK) + : pos.count(WHITE) + pos.count(WHITE)); + // In every other case use scale factor based on + // the number of pawns of the strong side reduced if pawns are on a single flank. + else + sf = std::min(sf, 36 + 7 * pos.count(strongSide)) - 4 * !pawnsOnBothFlanks; + + // Reduce scale factor in case of pawns being on a single flank + sf -= 4 * !pawnsOnBothFlanks; + } + + // Interpolate between the middlegame and (scaled by 'sf') endgame score + v = mg * int(me->game_phase()) + + eg * int(PHASE_MIDGAME - me->game_phase()) * ScaleFactor(sf) / SCALE_FACTOR_NORMAL; + v /= PHASE_MIDGAME; + + if constexpr (T) + { + Trace::add(WINNABLE, make_score(u, eg * ScaleFactor(sf) / SCALE_FACTOR_NORMAL - eg_value(score))); + Trace::add(TOTAL, make_score(mg, eg * ScaleFactor(sf) / SCALE_FACTOR_NORMAL)); + } + + return Value(v); + } + + + // Evaluation::value() is the main function of the class. It computes the various + // parts of the evaluation and returns the value of the position from the point + // of view of the side to move. + + template + Value Evaluation::value() { + + assert(!pos.checkers()); + + // Probe the material hash table + me = Material::probe(pos); + + // If we have a specialized evaluation function for the current material + // configuration, call it and return. + if (me->specialized_eval_exists()) + return me->evaluate(pos); + + // Initialize score by reading the incrementally updated scores included in + // the position object (material + piece square tables) and the material + // imbalance. Score is computed internally from the white point of view. + Score score = pos.psq_score() + me->imbalance(); + + // Probe the pawn hash table + pe = Pawns::probe(pos); + score += pe->pawn_score(WHITE) - pe->pawn_score(BLACK); + + // Early exit if score is high + auto lazy_skip = [&](Value lazyThreshold) { + return abs(mg_value(score) + eg_value(score)) > lazyThreshold + + std::abs(pos.this_thread()->bestValue) * 5 / 4 + + pos.non_pawn_material() / 32; + }; + + if (lazy_skip(LazyThreshold1)) + goto make_v; + + // Main evaluation begins here + initialize(); + initialize(); + + // Pieces evaluated first (also populates attackedBy, attackedBy2). + // Note that the order of evaluation of the terms is left unspecified. + score += pieces() - pieces() + + pieces() - pieces() + + pieces() - pieces() + + pieces() - pieces(); + + score += mobility[WHITE] - mobility[BLACK]; + + // More complex interactions that require fully populated attack bitboards + score += king< WHITE>() - king< BLACK>() + + passed< WHITE>() - passed< BLACK>(); + + if (lazy_skip(LazyThreshold2)) + goto make_v; + + score += threats() - threats() + + space< WHITE>() - space< BLACK>(); + +make_v: + // Derive single value from mg and eg parts of score + Value v = winnable(score); + + // In case of tracing add all remaining individual evaluation terms + if constexpr (T) + { + Trace::add(MATERIAL, pos.psq_score()); + Trace::add(IMBALANCE, me->imbalance()); + Trace::add(PAWN, pe->pawn_score(WHITE), pe->pawn_score(BLACK)); + Trace::add(MOBILITY, mobility[WHITE], mobility[BLACK]); + } + + // Evaluation grain + v = (v / 16) * 16; + + // Side to move point of view + v = (pos.side_to_move() == WHITE ? v : -v); + + return v; + } + +} // namespace Eval + + +/// evaluate() is the evaluator for the outer world. It returns a static +/// evaluation of the position from the point of view of the side to move. + +Value Eval::evaluate(const Position& pos, int* complexity) { + + Value v; + Value psq = pos.psq_eg_stm(); + + // We use the much less accurate but faster Classical eval when the NNUE + // option is set to false. Otherwise we use the NNUE eval unless the + // PSQ advantage is decisive and several pieces remain. (~3 Elo) + bool useClassical = !useNNUE || (pos.count() > 7 && abs(psq) > 1760); + + if (useClassical) + v = Evaluation(pos).value(); + else + { + int nnueComplexity; + int scale = 1064 + 106 * pos.non_pawn_material() / 5120; + + Color stm = pos.side_to_move(); + Value optimism = pos.this_thread()->optimism[stm]; + + Value nnue = NNUE::evaluate(pos, true, &nnueComplexity); + + // Blend nnue complexity with (semi)classical complexity + nnueComplexity = ( 416 * nnueComplexity + + 424 * abs(psq - nnue) + + (optimism > 0 ? int(optimism) * int(psq - nnue) : 0) + ) / 1024; + + // Return hybrid NNUE complexity to caller + if (complexity) + *complexity = nnueComplexity; + + optimism = optimism * (269 + nnueComplexity) / 256; + v = (nnue * scale + optimism * (scale - 754)) / 1024; + } + + // Damp down the evaluation linearly when shuffling + v = v * (195 - pos.rule50_count()) / 211; + + // Guarantee evaluation does not hit the tablebase range + v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); + + // When not using NNUE, return classical complexity to caller + if (complexity && (!useNNUE || useClassical)) + *complexity = abs(v - psq); + + return v; +} + +/// trace() is like evaluate(), but instead of returning a value, it returns +/// a string (suitable for outputting to stdout) that contains the detailed +/// descriptions and values of each evaluation term. Useful for debugging. +/// Trace scores are from white's point of view + +std::string Eval::trace(Position& pos) { + + if (pos.checkers()) + return "Final evaluation: none (in check)"; + + std::stringstream ss; + ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2); + + Value v; + + std::memset(scores, 0, sizeof(scores)); + + // Reset any global variable used in eval + pos.this_thread()->bestValue = VALUE_ZERO; + pos.this_thread()->optimism[WHITE] = VALUE_ZERO; + pos.this_thread()->optimism[BLACK] = VALUE_ZERO; + + v = Evaluation(pos).value(); + + ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2) + << " Contributing terms for the classical eval:\n" + << "+------------+-------------+-------------+-------------+\n" + << "| Term | White | Black | Total |\n" + << "| | MG EG | MG EG | MG EG |\n" + << "+------------+-------------+-------------+-------------+\n" + << "| Material | " << Term(MATERIAL) + << "| Imbalance | " << Term(IMBALANCE) + << "| Pawns | " << Term(PAWN) + << "| Knights | " << Term(KNIGHT) + << "| Bishops | " << Term(BISHOP) + << "| Rooks | " << Term(ROOK) + << "| Queens | " << Term(QUEEN) + << "| Mobility | " << Term(MOBILITY) + << "|King safety | " << Term(KING) + << "| Threats | " << Term(THREAT) + << "| Passed | " << Term(PASSED) + << "| Space | " << Term(SPACE) + << "| Winnable | " << Term(WINNABLE) + << "+------------+-------------+-------------+-------------+\n" + << "| Total | " << Term(TOTAL) + << "+------------+-------------+-------------+-------------+\n"; + + if (Eval::useNNUE) + ss << '\n' << NNUE::trace(pos) << '\n'; + + ss << std::showpoint << std::showpos << std::fixed << std::setprecision(2) << std::setw(15); + + v = pos.side_to_move() == WHITE ? v : -v; + ss << "\nClassical evaluation " << to_cp(v) << " (white side)\n"; + if (Eval::useNNUE) + { + v = NNUE::evaluate(pos, false); + v = pos.side_to_move() == WHITE ? v : -v; + ss << "NNUE evaluation " << to_cp(v) << " (white side)\n"; + } + + v = evaluate(pos); + v = pos.side_to_move() == WHITE ? v : -v; + ss << "Final evaluation " << to_cp(v) << " (white side)"; + if (Eval::useNNUE) + ss << " [with scaled NNUE, hybrid, ...]"; + ss << "\n"; + + return ss.str(); +} + +} // namespace Stockfish diff --git a/src/evaluate.h b/src/evaluate.h new file mode 100644 index 0000000000000000000000000000000000000000..f5ac3263c3380267cc7c9ce15bc66830e6ce4d56 --- /dev/null +++ b/src/evaluate.h @@ -0,0 +1,62 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef EVALUATE_H_INCLUDED +#define EVALUATE_H_INCLUDED + +#include +#include + +#include "types.h" + +namespace Stockfish { + +class Position; + +namespace Eval { + + std::string trace(Position& pos); + Value evaluate(const Position& pos, int* complexity = nullptr); + + extern bool useNNUE; + extern std::string currentEvalFileName; + + // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue + // for the build process (profile-build and fishtest) to work. Do not change the + // name of the macro, as it is used in the Makefile. + #define EvalFileDefaultName "nn-ad9b42354671.nnue" + + namespace NNUE { + + std::string trace(Position& pos); + Value evaluate(const Position& pos, bool adjusted = false, int* complexity = nullptr); + + void init(); + void verify(); + + bool load_eval(std::string name, std::istream& stream); + bool save_eval(std::ostream& stream); + bool save_eval(const std::optional& filename); + + } // namespace NNUE + +} // namespace Eval + +} // namespace Stockfish + +#endif // #ifndef EVALUATE_H_INCLUDED diff --git a/src/incbin/UNLICENCE b/src/incbin/UNLICENCE new file mode 100644 index 0000000000000000000000000000000000000000..32484ab5e7026f9a1f15c2f8c08b1418802e02a8 --- /dev/null +++ b/src/incbin/UNLICENCE @@ -0,0 +1,26 @@ +The file "incbin.h" is free and unencumbered software released into +the public domain by Dale Weiler, see: + + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to diff --git a/src/incbin/incbin.h b/src/incbin/incbin.h new file mode 100644 index 0000000000000000000000000000000000000000..c19684d72420ed0b326dd857fbe39d100713e746 --- /dev/null +++ b/src/incbin/incbin.h @@ -0,0 +1,368 @@ +/** + * @file incbin.h + * @author Dale Weiler + * @brief Utility for including binary files + * + * Facilities for including binary files into the current translation unit and + * making use from them externally in other translation units. + */ +#ifndef INCBIN_HDR +#define INCBIN_HDR +#include +#if defined(__AVX512BW__) || \ + defined(__AVX512CD__) || \ + defined(__AVX512DQ__) || \ + defined(__AVX512ER__) || \ + defined(__AVX512PF__) || \ + defined(__AVX512VL__) || \ + defined(__AVX512F__) +# define INCBIN_ALIGNMENT_INDEX 6 +#elif defined(__AVX__) || \ + defined(__AVX2__) +# define INCBIN_ALIGNMENT_INDEX 5 +#elif defined(__SSE__) || \ + defined(__SSE2__) || \ + defined(__SSE3__) || \ + defined(__SSSE3__) || \ + defined(__SSE4_1__) || \ + defined(__SSE4_2__) || \ + defined(__neon__) +# define INCBIN_ALIGNMENT_INDEX 4 +#elif ULONG_MAX != 0xffffffffu +# define INCBIN_ALIGNMENT_INDEX 3 +# else +# define INCBIN_ALIGNMENT_INDEX 2 +#endif + +/* Lookup table of (1 << n) where `n' is `INCBIN_ALIGNMENT_INDEX' */ +#define INCBIN_ALIGN_SHIFT_0 1 +#define INCBIN_ALIGN_SHIFT_1 2 +#define INCBIN_ALIGN_SHIFT_2 4 +#define INCBIN_ALIGN_SHIFT_3 8 +#define INCBIN_ALIGN_SHIFT_4 16 +#define INCBIN_ALIGN_SHIFT_5 32 +#define INCBIN_ALIGN_SHIFT_6 64 + +/* Actual alignment value */ +#define INCBIN_ALIGNMENT \ + INCBIN_CONCATENATE( \ + INCBIN_CONCATENATE(INCBIN_ALIGN_SHIFT, _), \ + INCBIN_ALIGNMENT_INDEX) + +/* Stringize */ +#define INCBIN_STR(X) \ + #X +#define INCBIN_STRINGIZE(X) \ + INCBIN_STR(X) +/* Concatenate */ +#define INCBIN_CAT(X, Y) \ + X ## Y +#define INCBIN_CONCATENATE(X, Y) \ + INCBIN_CAT(X, Y) +/* Deferred macro expansion */ +#define INCBIN_EVAL(X) \ + X +#define INCBIN_INVOKE(N, ...) \ + INCBIN_EVAL(N(__VA_ARGS__)) + +/* Green Hills uses a different directive for including binary data */ +#if defined(__ghs__) +# if (__ghs_asm == 2) +# define INCBIN_MACRO ".file" +/* Or consider the ".myrawdata" entry in the ld file */ +# else +# define INCBIN_MACRO "\tINCBIN" +# endif +#else +# define INCBIN_MACRO ".incbin" +#endif + +#ifndef _MSC_VER +# define INCBIN_ALIGN \ + __attribute__((aligned(INCBIN_ALIGNMENT))) +#else +# define INCBIN_ALIGN __declspec(align(INCBIN_ALIGNMENT)) +#endif + +#if defined(__arm__) || /* GNU C and RealView */ \ + defined(__arm) || /* Diab */ \ + defined(_ARM) /* ImageCraft */ +# define INCBIN_ARM +#endif + +#ifdef __GNUC__ +/* Utilize .balign where supported */ +# define INCBIN_ALIGN_HOST ".balign " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n" +# define INCBIN_ALIGN_BYTE ".balign 1\n" +#elif defined(INCBIN_ARM) +/* + * On arm assemblers, the alignment value is calculated as (1 << n) where `n' is + * the shift count. This is the value passed to `.align' + */ +# define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT_INDEX) "\n" +# define INCBIN_ALIGN_BYTE ".align 0\n" +#else +/* We assume other inline assembler's treat `.align' as `.balign' */ +# define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n" +# define INCBIN_ALIGN_BYTE ".align 1\n" +#endif + +/* INCBIN_CONST is used by incbin.c generated files */ +#if defined(__cplusplus) +# define INCBIN_EXTERNAL extern "C" +# define INCBIN_CONST extern const +#else +# define INCBIN_EXTERNAL extern +# define INCBIN_CONST const +#endif + +/** + * @brief Optionally override the linker section into which data is emitted. + * + * @warning If you use this facility, you'll have to deal with platform-specific linker output + * section naming on your own + * + * Overriding the default linker output section, e.g for esp8266/Arduino: + * @code + * #define INCBIN_OUTPUT_SECTION ".irom.text" + * #include "incbin.h" + * INCBIN(Foo, "foo.txt"); + * // Data is emitted into program memory that never gets copied to RAM + * @endcode + */ +#if !defined(INCBIN_OUTPUT_SECTION) +# if defined(__APPLE__) +# define INCBIN_OUTPUT_SECTION ".const_data" +# else +# define INCBIN_OUTPUT_SECTION ".rodata" +# endif +#endif + +#if defined(__APPLE__) +/* The directives are different for Apple branded compilers */ +# define INCBIN_SECTION INCBIN_OUTPUT_SECTION "\n" +# define INCBIN_GLOBAL(NAME) ".globl " INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n" +# define INCBIN_INT ".long " +# define INCBIN_MANGLE "_" +# define INCBIN_BYTE ".byte " +# define INCBIN_TYPE(...) +#else +# define INCBIN_SECTION ".section " INCBIN_OUTPUT_SECTION "\n" +# define INCBIN_GLOBAL(NAME) ".global " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n" +# if defined(__ghs__) +# define INCBIN_INT ".word " +# else +# define INCBIN_INT ".int " +# endif +# if defined(__USER_LABEL_PREFIX__) +# define INCBIN_MANGLE INCBIN_STRINGIZE(__USER_LABEL_PREFIX__) +# else +# define INCBIN_MANGLE "" +# endif +# if defined(INCBIN_ARM) +/* On arm assemblers, `@' is used as a line comment token */ +# define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", %object\n" +# elif defined(__MINGW32__) || defined(__MINGW64__) +/* Mingw doesn't support this directive either */ +# define INCBIN_TYPE(NAME) +# else +/* It's safe to use `@' on other architectures */ +# define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", @object\n" +# endif +# define INCBIN_BYTE ".byte " +#endif + +/* List of style types used for symbol names */ +#define INCBIN_STYLE_CAMEL 0 +#define INCBIN_STYLE_SNAKE 1 + +/** + * @brief Specify the prefix to use for symbol names. + * + * By default this is `g', producing symbols of the form: + * @code + * #include "incbin.h" + * INCBIN(Foo, "foo.txt"); + * + * // Now you have the following symbols: + * // const unsigned char gFooData[]; + * // const unsigned char *const gFooEnd; + * // const unsigned int gFooSize; + * @endcode + * + * If however you specify a prefix before including: e.g: + * @code + * #define INCBIN_PREFIX incbin + * #include "incbin.h" + * INCBIN(Foo, "foo.txt"); + * + * // Now you have the following symbols instead: + * // const unsigned char incbinFooData[]; + * // const unsigned char *const incbinFooEnd; + * // const unsigned int incbinFooSize; + * @endcode + */ +#if !defined(INCBIN_PREFIX) +# define INCBIN_PREFIX g +#endif + +/** + * @brief Specify the style used for symbol names. + * + * Possible options are + * - INCBIN_STYLE_CAMEL "CamelCase" + * - INCBIN_STYLE_SNAKE "snake_case" + * + * Default option is *INCBIN_STYLE_CAMEL* producing symbols of the form: + * @code + * #include "incbin.h" + * INCBIN(Foo, "foo.txt"); + * + * // Now you have the following symbols: + * // const unsigned char FooData[]; + * // const unsigned char *const FooEnd; + * // const unsigned int FooSize; + * @endcode + * + * If however you specify a style before including: e.g: + * @code + * #define INCBIN_STYLE INCBIN_STYLE_SNAKE + * #include "incbin.h" + * INCBIN(foo, "foo.txt"); + * + * // Now you have the following symbols: + * // const unsigned char foo_data[]; + * // const unsigned char *const foo_end; + * // const unsigned int foo_size; + * @endcode + */ +#if !defined(INCBIN_STYLE) +# define INCBIN_STYLE INCBIN_STYLE_CAMEL +#endif + +/* Style lookup tables */ +#define INCBIN_STYLE_0_DATA Data +#define INCBIN_STYLE_0_END End +#define INCBIN_STYLE_0_SIZE Size +#define INCBIN_STYLE_1_DATA _data +#define INCBIN_STYLE_1_END _end +#define INCBIN_STYLE_1_SIZE _size + +/* Style lookup: returning identifier */ +#define INCBIN_STYLE_IDENT(TYPE) \ + INCBIN_CONCATENATE( \ + INCBIN_STYLE_, \ + INCBIN_CONCATENATE( \ + INCBIN_EVAL(INCBIN_STYLE), \ + INCBIN_CONCATENATE(_, TYPE))) + +/* Style lookup: returning string literal */ +#define INCBIN_STYLE_STRING(TYPE) \ + INCBIN_STRINGIZE( \ + INCBIN_STYLE_IDENT(TYPE)) \ + +/* Generate the global labels by indirectly invoking the macro with our style + * type and concatenating the name against them. */ +#define INCBIN_GLOBAL_LABELS(NAME, TYPE) \ + INCBIN_INVOKE( \ + INCBIN_GLOBAL, \ + INCBIN_CONCATENATE( \ + NAME, \ + INCBIN_INVOKE( \ + INCBIN_STYLE_IDENT, \ + TYPE))) \ + INCBIN_INVOKE( \ + INCBIN_TYPE, \ + INCBIN_CONCATENATE( \ + NAME, \ + INCBIN_INVOKE( \ + INCBIN_STYLE_IDENT, \ + TYPE))) + +/** + * @brief Externally reference binary data included in another translation unit. + * + * Produces three external symbols that reference the binary data included in + * another translation unit. + * + * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with + * "Data", as well as "End" and "Size" after. An example is provided below. + * + * @param NAME The name given for the binary data + * + * @code + * INCBIN_EXTERN(Foo); + * + * // Now you have the following symbols: + * // extern const unsigned char FooData[]; + * // extern const unsigned char *const FooEnd; + * // extern const unsigned int FooSize; + * @endcode + */ +#define INCBIN_EXTERN(NAME) \ + INCBIN_EXTERNAL const INCBIN_ALIGN unsigned char \ + INCBIN_CONCATENATE( \ + INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \ + INCBIN_STYLE_IDENT(DATA))[]; \ + INCBIN_EXTERNAL const INCBIN_ALIGN unsigned char *const \ + INCBIN_CONCATENATE( \ + INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \ + INCBIN_STYLE_IDENT(END)); \ + INCBIN_EXTERNAL const unsigned int \ + INCBIN_CONCATENATE( \ + INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \ + INCBIN_STYLE_IDENT(SIZE)) + +/** + * @brief Include a binary file into the current translation unit. + * + * Includes a binary file into the current translation unit, producing three symbols + * for objects that encode the data and size respectively. + * + * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with + * "Data", as well as "End" and "Size" after. An example is provided below. + * + * @param NAME The name to associate with this binary data (as an identifier.) + * @param FILENAME The file to include (as a string literal.) + * + * @code + * INCBIN(Icon, "icon.png"); + * + * // Now you have the following symbols: + * // const unsigned char IconData[]; + * // const unsigned char *const IconEnd; + * // const unsigned int IconSize; + * @endcode + * + * @warning This must be used in global scope + * @warning The identifiers may be different if INCBIN_STYLE is not default + * + * To externally reference the data included by this in another translation unit + * please @see INCBIN_EXTERN. + */ +#ifdef _MSC_VER +#define INCBIN(NAME, FILENAME) \ + INCBIN_EXTERN(NAME) +#else +#define INCBIN(NAME, FILENAME) \ + __asm__(INCBIN_SECTION \ + INCBIN_GLOBAL_LABELS(NAME, DATA) \ + INCBIN_ALIGN_HOST \ + INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) ":\n" \ + INCBIN_MACRO " \"" FILENAME "\"\n" \ + INCBIN_GLOBAL_LABELS(NAME, END) \ + INCBIN_ALIGN_BYTE \ + INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) ":\n" \ + INCBIN_BYTE "1\n" \ + INCBIN_GLOBAL_LABELS(NAME, SIZE) \ + INCBIN_ALIGN_HOST \ + INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(SIZE) ":\n" \ + INCBIN_INT INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) " - " \ + INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) "\n" \ + INCBIN_ALIGN_HOST \ + ".text\n" \ + ); \ + INCBIN_EXTERN(NAME) + +#endif +#endif diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..fad0ef8449b6fcc04f53e44a5854d12a10011679 --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,53 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include "bitboard.h" +#include "endgame.h" +#include "position.h" +#include "psqt.h" +#include "search.h" +#include "syzygy/tbprobe.h" +#include "thread.h" +#include "tt.h" +#include "uci.h" + +using namespace Stockfish; + +int main(int argc, char* argv[]) { + + std::cout << engine_info() << std::endl; + + CommandLine::init(argc, argv); + UCI::init(Options); + Tune::init(); + PSQT::init(); + Bitboards::init(); + Position::init(); + Bitbases::init(); + Endgames::init(); + Threads.set(size_t(Options["Threads"])); + Search::clear(); // After threads are up + Eval::NNUE::init(); + + UCI::loop(argc, argv); + + Threads.set(0); + return 0; +} diff --git a/src/material.cpp b/src/material.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1567358af488143e5befb36add633be9a5f67903 --- /dev/null +++ b/src/material.cpp @@ -0,0 +1,229 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include // For std::memset + +#include "material.h" +#include "thread.h" + +using namespace std; + +namespace Stockfish { + +namespace { + #define S(mg, eg) make_score(mg, eg) + + // Polynomial material imbalance parameters + + // One Score parameter for each pair (our piece, another of our pieces) + constexpr Score QuadraticOurs[][PIECE_TYPE_NB] = { + // OUR PIECE 2 + // bishop pair pawn knight bishop rook queen + {S(1419, 1455) }, // Bishop pair + {S( 101, 28), S( 37, 39) }, // Pawn + {S( 57, 64), S(249, 187), S(-49, -62) }, // Knight OUR PIECE 1 + {S( 0, 0), S(118, 137), S( 10, 27), S( 0, 0) }, // Bishop + {S( -63, -68), S( -5, 3), S(100, 81), S(132, 118), S(-246, -244) }, // Rook + {S(-210, -211), S( 37, 14), S(147, 141), S(161, 105), S(-158, -174), S(-9,-31) } // Queen + }; + + // One Score parameter for each pair (our piece, their piece) + constexpr Score QuadraticTheirs[][PIECE_TYPE_NB] = { + // THEIR PIECE + // bishop pair pawn knight bishop rook queen + { }, // Bishop pair + {S( 33, 30) }, // Pawn + {S( 46, 18), S(106, 84) }, // Knight OUR PIECE + {S( 75, 35), S( 59, 44), S( 60, 15) }, // Bishop + {S( 26, 35), S( 6, 22), S( 38, 39), S(-12, -2) }, // Rook + {S( 97, 93), S(100, 163), S(-58, -91), S(112, 192), S(276, 225) } // Queen + }; + + #undef S + + // Endgame evaluation and scaling functions are accessed directly and not through + // the function maps because they correspond to more than one material hash key. + Endgame EvaluateKXK[] = { Endgame(WHITE), Endgame(BLACK) }; + + Endgame ScaleKBPsK[] = { Endgame(WHITE), Endgame(BLACK) }; + Endgame ScaleKQKRPs[] = { Endgame(WHITE), Endgame(BLACK) }; + Endgame ScaleKPsK[] = { Endgame(WHITE), Endgame(BLACK) }; + Endgame ScaleKPKP[] = { Endgame(WHITE), Endgame(BLACK) }; + + // Helper used to detect a given material distribution + bool is_KXK(const Position& pos, Color us) { + return !more_than_one(pos.pieces(~us)) + && pos.non_pawn_material(us) >= RookValueMg; + } + + bool is_KBPsK(const Position& pos, Color us) { + return pos.non_pawn_material(us) == BishopValueMg + && pos.count(us) >= 1; + } + + bool is_KQKRPs(const Position& pos, Color us) { + return !pos.count(us) + && pos.non_pawn_material(us) == QueenValueMg + && pos.count(~us) == 1 + && pos.count(~us) >= 1; + } + + + /// imbalance() calculates the imbalance by comparing the piece count of each + /// piece type for both colors. + + template + Score imbalance(const int pieceCount[][PIECE_TYPE_NB]) { + + constexpr Color Them = ~Us; + + Score bonus = SCORE_ZERO; + + // Second-degree polynomial material imbalance, by Tord Romstad + for (int pt1 = NO_PIECE_TYPE; pt1 <= QUEEN; ++pt1) + { + if (!pieceCount[Us][pt1]) + continue; + + int v = QuadraticOurs[pt1][pt1] * pieceCount[Us][pt1]; + + for (int pt2 = NO_PIECE_TYPE; pt2 < pt1; ++pt2) + v += QuadraticOurs[pt1][pt2] * pieceCount[Us][pt2] + + QuadraticTheirs[pt1][pt2] * pieceCount[Them][pt2]; + + bonus += pieceCount[Us][pt1] * v; + } + + return bonus; + } + +} // namespace + +namespace Material { + + +/// Material::probe() looks up the current position's material configuration in +/// the material hash table. It returns a pointer to the Entry if the position +/// is found. Otherwise a new Entry is computed and stored there, so we don't +/// have to recompute all when the same material configuration occurs again. + +Entry* probe(const Position& pos) { + + Key key = pos.material_key(); + Entry* e = pos.this_thread()->materialTable[key]; + + if (e->key == key) + return e; + + std::memset(e, 0, sizeof(Entry)); + e->key = key; + e->factor[WHITE] = e->factor[BLACK] = (uint8_t)SCALE_FACTOR_NORMAL; + + Value npm_w = pos.non_pawn_material(WHITE); + Value npm_b = pos.non_pawn_material(BLACK); + Value npm = std::clamp(npm_w + npm_b, EndgameLimit, MidgameLimit); + + // Map total non-pawn material into [PHASE_ENDGAME, PHASE_MIDGAME] + e->gamePhase = Phase(((npm - EndgameLimit) * PHASE_MIDGAME) / (MidgameLimit - EndgameLimit)); + + // Let's look if we have a specialized evaluation function for this particular + // material configuration. Firstly we look for a fixed configuration one, then + // for a generic one if the previous search failed. + if ((e->evaluationFunction = Endgames::probe(key)) != nullptr) + return e; + + for (Color c : { WHITE, BLACK }) + if (is_KXK(pos, c)) + { + e->evaluationFunction = &EvaluateKXK[c]; + return e; + } + + // OK, we didn't find any special evaluation function for the current material + // configuration. Is there a suitable specialized scaling function? + const auto* sf = Endgames::probe(key); + + if (sf) + { + e->scalingFunction[sf->strongSide] = sf; // Only strong color assigned + return e; + } + + // We didn't find any specialized scaling function, so fall back on generic + // ones that refer to more than one material distribution. Note that in this + // case we don't return after setting the function. + for (Color c : { WHITE, BLACK }) + { + if (is_KBPsK(pos, c)) + e->scalingFunction[c] = &ScaleKBPsK[c]; + + else if (is_KQKRPs(pos, c)) + e->scalingFunction[c] = &ScaleKQKRPs[c]; + } + + if (npm_w + npm_b == VALUE_ZERO && pos.pieces(PAWN)) // Only pawns on the board + { + if (!pos.count(BLACK)) + { + assert(pos.count(WHITE) >= 2); + + e->scalingFunction[WHITE] = &ScaleKPsK[WHITE]; + } + else if (!pos.count(WHITE)) + { + assert(pos.count(BLACK) >= 2); + + e->scalingFunction[BLACK] = &ScaleKPsK[BLACK]; + } + else if (pos.count(WHITE) == 1 && pos.count(BLACK) == 1) + { + // This is a special case because we set scaling functions + // for both colors instead of only one. + e->scalingFunction[WHITE] = &ScaleKPKP[WHITE]; + e->scalingFunction[BLACK] = &ScaleKPKP[BLACK]; + } + } + + // Zero or just one pawn makes it difficult to win, even with a small material + // advantage. This catches some trivial draws like KK, KBK and KNK and gives a + // drawish scale factor for cases such as KRKBP and KmmKm (except for KBBKN). + if (!pos.count(WHITE) && npm_w - npm_b <= BishopValueMg) + e->factor[WHITE] = uint8_t(npm_w < RookValueMg ? SCALE_FACTOR_DRAW : + npm_b <= BishopValueMg ? 4 : 14); + + if (!pos.count(BLACK) && npm_b - npm_w <= BishopValueMg) + e->factor[BLACK] = uint8_t(npm_b < RookValueMg ? SCALE_FACTOR_DRAW : + npm_w <= BishopValueMg ? 4 : 14); + + // Evaluate the material imbalance. We use PIECE_TYPE_NONE as a place holder + // for the bishop pair "extended piece", which allows us to be more flexible + // in defining bishop pair bonuses. + const int pieceCount[COLOR_NB][PIECE_TYPE_NB] = { + { pos.count(WHITE) > 1, pos.count(WHITE), pos.count(WHITE), + pos.count(WHITE) , pos.count(WHITE), pos.count(WHITE) }, + { pos.count(BLACK) > 1, pos.count(BLACK), pos.count(BLACK), + pos.count(BLACK) , pos.count(BLACK), pos.count(BLACK) } }; + + e->score = (imbalance(pieceCount) - imbalance(pieceCount)) / 16; + return e; +} + +} // namespace Material + +} // namespace Stockfish diff --git a/src/material.h b/src/material.h new file mode 100644 index 0000000000000000000000000000000000000000..3ca169ce0b93b6ff5500552f0d9a8c4319ed7157 --- /dev/null +++ b/src/material.h @@ -0,0 +1,71 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef MATERIAL_H_INCLUDED +#define MATERIAL_H_INCLUDED + +#include "endgame.h" +#include "misc.h" +#include "position.h" +#include "types.h" + +namespace Stockfish::Material { + +/// Material::Entry contains various information about a material configuration. +/// It contains a material imbalance evaluation, a function pointer to a special +/// endgame evaluation function (which in most cases is NULL, meaning that the +/// standard evaluation function will be used), and scale factors. +/// +/// The scale factors are used to scale the evaluation score up or down. For +/// instance, in KRB vs KR endgames, the score is scaled down by a factor of 4, +/// which will result in scores of absolute value less than one pawn. + +struct Entry { + + Score imbalance() const { return score; } + Phase game_phase() const { return (Phase)gamePhase; } + bool specialized_eval_exists() const { return evaluationFunction != nullptr; } + Value evaluate(const Position& pos) const { return (*evaluationFunction)(pos); } + + // scale_factor() takes a position and a color as input and returns a scale factor + // for the given color. We have to provide the position in addition to the color + // because the scale factor may also be a function which should be applied to + // the position. For instance, in KBP vs K endgames, the scaling function looks + // for rook pawns and wrong-colored bishops. + ScaleFactor scale_factor(const Position& pos, Color c) const { + ScaleFactor sf = scalingFunction[c] ? (*scalingFunction[c])(pos) + : SCALE_FACTOR_NONE; + return sf != SCALE_FACTOR_NONE ? sf : ScaleFactor(factor[c]); + } + + Key key; + const EndgameBase* evaluationFunction; + const EndgameBase* scalingFunction[COLOR_NB]; // Could be one for each + // side (e.g. KPKP, KBPsK) + Score score; + int16_t gamePhase; + uint8_t factor[COLOR_NB]; +}; + +typedef HashTable Table; + +Entry* probe(const Position& pos); + +} // namespace Stockfish::Material + +#endif // #ifndef MATERIAL_H_INCLUDED diff --git a/src/misc.cpp b/src/misc.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2d86969f5361a41ef5d97047ffe0e436387a71f4 --- /dev/null +++ b/src/misc.cpp @@ -0,0 +1,687 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifdef _WIN32 +#if _WIN32_WINNT < 0x0601 +#undef _WIN32_WINNT +#define _WIN32_WINNT 0x0601 // Force to include needed API prototypes +#endif + +#ifndef NOMINMAX +#define NOMINMAX +#endif + +#include +// The needed Windows API for processor groups could be missed from old Windows +// versions, so instead of calling them directly (forcing the linker to resolve +// the calls at compile time), try to load them at runtime. To do this we need +// first to define the corresponding function pointers. +extern "C" { +typedef bool(*fun1_t)(LOGICAL_PROCESSOR_RELATIONSHIP, + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD); +typedef bool(*fun2_t)(USHORT, PGROUP_AFFINITY); +typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY); +typedef bool(*fun4_t)(USHORT, PGROUP_AFFINITY, USHORT, PUSHORT); +typedef WORD(*fun5_t)(); +} +#endif + +#include +#include +#include +#include +#include +#include + +#if defined(__linux__) && !defined(__ANDROID__) +#include +#include +#endif + +#if defined(__APPLE__) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32)) || defined(__e2k__) +#define POSIXALIGNEDALLOC +#include +#endif + +#include "misc.h" +#include "thread.h" + +using namespace std; + +namespace Stockfish { + +namespace { + +/// Version number or dev. +const string version = "15.1"; + +/// Our fancy logging facility. The trick here is to replace cin.rdbuf() and +/// cout.rdbuf() with two Tie objects that tie cin and cout to a file stream. We +/// can toggle the logging of std::cout and std:cin at runtime whilst preserving +/// usual I/O functionality, all without changing a single line of code! +/// Idea from http://groups.google.com/group/comp.lang.c++/msg/1d941c0f26ea0d81 + +struct Tie: public streambuf { // MSVC requires split streambuf for cin and cout + + Tie(streambuf* b, streambuf* l) : buf(b), logBuf(l) {} + + int sync() override { return logBuf->pubsync(), buf->pubsync(); } + int overflow(int c) override { return log(buf->sputc((char)c), "<< "); } + int underflow() override { return buf->sgetc(); } + int uflow() override { return log(buf->sbumpc(), ">> "); } + + streambuf *buf, *logBuf; + + int log(int c, const char* prefix) { + + static int last = '\n'; // Single log file + + if (last == '\n') + logBuf->sputn(prefix, 3); + + return last = logBuf->sputc((char)c); + } +}; + +class Logger { + + Logger() : in(cin.rdbuf(), file.rdbuf()), out(cout.rdbuf(), file.rdbuf()) {} + ~Logger() { start(""); } + + ofstream file; + Tie in, out; + +public: + static void start(const std::string& fname) { + + static Logger l; + + if (l.file.is_open()) + { + cout.rdbuf(l.out.buf); + cin.rdbuf(l.in.buf); + l.file.close(); + } + + if (!fname.empty()) + { + l.file.open(fname, ifstream::out); + + if (!l.file.is_open()) + { + cerr << "Unable to open debug log file " << fname << endl; + exit(EXIT_FAILURE); + } + + cin.rdbuf(&l.in); + cout.rdbuf(&l.out); + } + } +}; + +} // namespace + + +/// engine_info() returns the full name of the current Stockfish version. +/// For local dev compiles we try to append the commit sha and commit date +/// from git if that fails only the local compilation date is set and "nogit" is specified: +/// Stockfish dev-YYYYMMDD-SHA +/// or +/// Stockfish dev-YYYYMMDD-nogit +/// +/// For releases (non dev builds) we only include the version number: +/// Stockfish version + +string engine_info(bool to_uci) { + stringstream ss; + ss << "Stockfish " << version << setfill('0'); + + if (version == "dev") + { + ss << "-"; + #ifdef GIT_DATE + ss << GIT_DATE; + #else + const string months("Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec"); + string month, day, year; + stringstream date(__DATE__); // From compiler, format is "Sep 21 2008" + + date >> month >> day >> year; + ss << year << setw(2) << setfill('0') << (1 + months.find(month) / 4) << setw(2) << setfill('0') << day; + #endif + + ss << "-"; + + #ifdef GIT_SHA + ss << GIT_SHA; + #else + ss << "nogit"; + #endif + } + + ss << (to_uci ? "\nid author ": " by ") + << "the Stockfish developers (see AUTHORS file)"; + + return ss.str(); +} + + +/// compiler_info() returns a string trying to describe the compiler we use + +std::string compiler_info() { + + #define stringify2(x) #x + #define stringify(x) stringify2(x) + #define make_version_string(major, minor, patch) stringify(major) "." stringify(minor) "." stringify(patch) + +/// Predefined macros hell: +/// +/// __GNUC__ Compiler is gcc, Clang or Intel on Linux +/// __INTEL_COMPILER Compiler is Intel +/// _MSC_VER Compiler is MSVC or Intel on Windows +/// _WIN32 Building on Windows (any) +/// _WIN64 Building on Windows 64 bit + + std::string compiler = "\nCompiled by "; + + #ifdef __clang__ + compiler += "clang++ "; + compiler += make_version_string(__clang_major__, __clang_minor__, __clang_patchlevel__); + #elif __INTEL_COMPILER + compiler += "Intel compiler "; + compiler += "(version "; + compiler += stringify(__INTEL_COMPILER) " update " stringify(__INTEL_COMPILER_UPDATE); + compiler += ")"; + #elif _MSC_VER + compiler += "MSVC "; + compiler += "(version "; + compiler += stringify(_MSC_FULL_VER) "." stringify(_MSC_BUILD); + compiler += ")"; + #elif defined(__e2k__) && defined(__LCC__) + #define dot_ver2(n) \ + compiler += (char)'.'; \ + compiler += (char)('0' + (n) / 10); \ + compiler += (char)('0' + (n) % 10); + + compiler += "MCST LCC "; + compiler += "(version "; + compiler += std::to_string(__LCC__ / 100); + dot_ver2(__LCC__ % 100) + dot_ver2(__LCC_MINOR__) + compiler += ")"; + #elif __GNUC__ + compiler += "g++ (GNUC) "; + compiler += make_version_string(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__); + #else + compiler += "Unknown compiler "; + compiler += "(unknown version)"; + #endif + + #if defined(__APPLE__) + compiler += " on Apple"; + #elif defined(__CYGWIN__) + compiler += " on Cygwin"; + #elif defined(__MINGW64__) + compiler += " on MinGW64"; + #elif defined(__MINGW32__) + compiler += " on MinGW32"; + #elif defined(__ANDROID__) + compiler += " on Android"; + #elif defined(__linux__) + compiler += " on Linux"; + #elif defined(_WIN64) + compiler += " on Microsoft Windows 64-bit"; + #elif defined(_WIN32) + compiler += " on Microsoft Windows 32-bit"; + #else + compiler += " on unknown system"; + #endif + + compiler += "\nCompilation settings include: "; + compiler += (Is64Bit ? " 64bit" : " 32bit"); + #if defined(USE_VNNI) + compiler += " VNNI"; + #endif + #if defined(USE_AVX512) + compiler += " AVX512"; + #endif + compiler += (HasPext ? " BMI2" : ""); + #if defined(USE_AVX2) + compiler += " AVX2"; + #endif + #if defined(USE_SSE41) + compiler += " SSE41"; + #endif + #if defined(USE_SSSE3) + compiler += " SSSE3"; + #endif + #if defined(USE_SSE2) + compiler += " SSE2"; + #endif + compiler += (HasPopCnt ? " POPCNT" : ""); + #if defined(USE_MMX) + compiler += " MMX"; + #endif + #if defined(USE_NEON) + compiler += " NEON"; + #endif + + #if !defined(NDEBUG) + compiler += " DEBUG"; + #endif + + compiler += "\n__VERSION__ macro expands to: "; + #ifdef __VERSION__ + compiler += __VERSION__; + #else + compiler += "(undefined macro)"; + #endif + compiler += "\n"; + + return compiler; +} + + +/// Debug functions used mainly to collect run-time statistics +static std::atomic hits[2], means[2]; + +void dbg_hit_on(bool b) { ++hits[0]; if (b) ++hits[1]; } +void dbg_hit_on(bool c, bool b) { if (c) dbg_hit_on(b); } +void dbg_mean_of(int v) { ++means[0]; means[1] += v; } + +void dbg_print() { + + if (hits[0]) + cerr << "Total " << hits[0] << " Hits " << hits[1] + << " hit rate (%) " << 100 * hits[1] / hits[0] << endl; + + if (means[0]) + cerr << "Total " << means[0] << " Mean " + << (double)means[1] / means[0] << endl; +} + + +/// Used to serialize access to std::cout to avoid multiple threads writing at +/// the same time. + +std::ostream& operator<<(std::ostream& os, SyncCout sc) { + + static std::mutex m; + + if (sc == IO_LOCK) + m.lock(); + + if (sc == IO_UNLOCK) + m.unlock(); + + return os; +} + + +/// Trampoline helper to avoid moving Logger to misc.h +void start_logger(const std::string& fname) { Logger::start(fname); } + + +/// prefetch() preloads the given address in L1/L2 cache. This is a non-blocking +/// function that doesn't stall the CPU waiting for data to be loaded from memory, +/// which can be quite slow. +#ifdef NO_PREFETCH + +void prefetch(void*) {} + +#else + +void prefetch(void* addr) { + +# if defined(__INTEL_COMPILER) + // This hack prevents prefetches from being optimized away by + // Intel compiler. Both MSVC and gcc seem not be affected by this. + __asm__ (""); +# endif + +# if defined(__INTEL_COMPILER) || defined(_MSC_VER) + _mm_prefetch((char*)addr, _MM_HINT_T0); +# else + __builtin_prefetch(addr); +# endif +} + +#endif + + +/// std_aligned_alloc() is our wrapper for systems where the c++17 implementation +/// does not guarantee the availability of aligned_alloc(). Memory allocated with +/// std_aligned_alloc() must be freed with std_aligned_free(). + +void* std_aligned_alloc(size_t alignment, size_t size) { + +#if defined(POSIXALIGNEDALLOC) + void *mem; + return posix_memalign(&mem, alignment, size) ? nullptr : mem; +#elif defined(_WIN32) + return _mm_malloc(size, alignment); +#else + return std::aligned_alloc(alignment, size); +#endif +} + +void std_aligned_free(void* ptr) { + +#if defined(POSIXALIGNEDALLOC) + free(ptr); +#elif defined(_WIN32) + _mm_free(ptr); +#else + free(ptr); +#endif +} + +/// aligned_large_pages_alloc() will return suitably aligned memory, if possible using large pages. + +#if defined(_WIN32) + +static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize) { + + #if !defined(_WIN64) + return nullptr; + #else + + HANDLE hProcessToken { }; + LUID luid { }; + void* mem = nullptr; + + const size_t largePageSize = GetLargePageMinimum(); + if (!largePageSize) + return nullptr; + + // We need SeLockMemoryPrivilege, so try to enable it for the process + if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken)) + return nullptr; + + if (LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &luid)) + { + TOKEN_PRIVILEGES tp { }; + TOKEN_PRIVILEGES prevTp { }; + DWORD prevTpLen = 0; + + tp.PrivilegeCount = 1; + tp.Privileges[0].Luid = luid; + tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; + + // Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges() succeeds, + // we still need to query GetLastError() to ensure that the privileges were actually obtained. + if (AdjustTokenPrivileges( + hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp, &prevTpLen) && + GetLastError() == ERROR_SUCCESS) + { + // Round up size to full pages and allocate + allocSize = (allocSize + largePageSize - 1) & ~size_t(largePageSize - 1); + mem = VirtualAlloc( + NULL, allocSize, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE); + + // Privilege no longer needed, restore previous state + AdjustTokenPrivileges(hProcessToken, FALSE, &prevTp, 0, NULL, NULL); + } + } + + CloseHandle(hProcessToken); + + return mem; + + #endif +} + +void* aligned_large_pages_alloc(size_t allocSize) { + + // Try to allocate large pages + void* mem = aligned_large_pages_alloc_windows(allocSize); + + // Fall back to regular, page aligned, allocation if necessary + if (!mem) + mem = VirtualAlloc(NULL, allocSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); + + return mem; +} + +#else + +void* aligned_large_pages_alloc(size_t allocSize) { + +#if defined(__linux__) + constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page size +#else + constexpr size_t alignment = 4096; // assumed small page size +#endif + + // round up to multiples of alignment + size_t size = ((allocSize + alignment - 1) / alignment) * alignment; + void *mem = std_aligned_alloc(alignment, size); +#if defined(MADV_HUGEPAGE) + madvise(mem, size, MADV_HUGEPAGE); +#endif + return mem; +} + +#endif + + +/// aligned_large_pages_free() will free the previously allocated ttmem + +#if defined(_WIN32) + +void aligned_large_pages_free(void* mem) { + + if (mem && !VirtualFree(mem, 0, MEM_RELEASE)) + { + DWORD err = GetLastError(); + std::cerr << "Failed to free large page memory. Error code: 0x" + << std::hex << err + << std::dec << std::endl; + exit(EXIT_FAILURE); + } +} + +#else + +void aligned_large_pages_free(void *mem) { + std_aligned_free(mem); +} + +#endif + + +namespace WinProcGroup { + +#ifndef _WIN32 + +void bindThisThread(size_t) {} + +#else + +/// best_node() retrieves logical processor information using Windows specific +/// API and returns the best node id for the thread with index idx. Original +/// code from Texel by Peter Österlund. + +int best_node(size_t idx) { + + int threads = 0; + int nodes = 0; + int cores = 0; + DWORD returnLength = 0; + DWORD byteOffset = 0; + + // Early exit if the needed API is not available at runtime + HMODULE k32 = GetModuleHandle("Kernel32.dll"); + auto fun1 = (fun1_t)(void(*)())GetProcAddress(k32, "GetLogicalProcessorInformationEx"); + if (!fun1) + return -1; + + // First call to GetLogicalProcessorInformationEx() to get returnLength. + // We expect the call to fail due to null buffer. + if (fun1(RelationAll, nullptr, &returnLength)) + return -1; + + // Once we know returnLength, allocate the buffer + SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *buffer, *ptr; + ptr = buffer = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)malloc(returnLength); + + // Second call to GetLogicalProcessorInformationEx(), now we expect to succeed + if (!fun1(RelationAll, buffer, &returnLength)) + { + free(buffer); + return -1; + } + + while (byteOffset < returnLength) + { + if (ptr->Relationship == RelationNumaNode) + nodes++; + + else if (ptr->Relationship == RelationProcessorCore) + { + cores++; + threads += (ptr->Processor.Flags == LTP_PC_SMT) ? 2 : 1; + } + + assert(ptr->Size); + byteOffset += ptr->Size; + ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)(((char*)ptr) + ptr->Size); + } + + free(buffer); + + std::vector groups; + + // Run as many threads as possible on the same node until core limit is + // reached, then move on filling the next node. + for (int n = 0; n < nodes; n++) + for (int i = 0; i < cores / nodes; i++) + groups.push_back(n); + + // In case a core has more than one logical processor (we assume 2) and we + // have still threads to allocate, then spread them evenly across available + // nodes. + for (int t = 0; t < threads - cores; t++) + groups.push_back(t % nodes); + + // If we still have more threads than the total number of logical processors + // then return -1 and let the OS to decide what to do. + return idx < groups.size() ? groups[idx] : -1; +} + + +/// bindThisThread() set the group affinity of the current thread + +void bindThisThread(size_t idx) { + + // Use only local variables to be thread-safe + int node = best_node(idx); + + if (node == -1) + return; + + // Early exit if the needed API are not available at runtime + HMODULE k32 = GetModuleHandle("Kernel32.dll"); + auto fun2 = (fun2_t)(void(*)())GetProcAddress(k32, "GetNumaNodeProcessorMaskEx"); + auto fun3 = (fun3_t)(void(*)())GetProcAddress(k32, "SetThreadGroupAffinity"); + auto fun4 = (fun4_t)(void(*)())GetProcAddress(k32, "GetNumaNodeProcessorMask2"); + auto fun5 = (fun5_t)(void(*)())GetProcAddress(k32, "GetMaximumProcessorGroupCount"); + + if (!fun2 || !fun3) + return; + + if (!fun4 || !fun5) + { + GROUP_AFFINITY affinity; + if (fun2(node, &affinity)) // GetNumaNodeProcessorMaskEx + fun3(GetCurrentThread(), &affinity, nullptr); // SetThreadGroupAffinity + } + else + { + // If a numa node has more than one processor group, we assume they are + // sized equal and we spread threads evenly across the groups. + USHORT elements, returnedElements; + elements = fun5(); // GetMaximumProcessorGroupCount + GROUP_AFFINITY *affinity = (GROUP_AFFINITY*)malloc(elements * sizeof(GROUP_AFFINITY)); + if (fun4(node, affinity, elements, &returnedElements)) // GetNumaNodeProcessorMask2 + fun3(GetCurrentThread(), &affinity[idx % returnedElements], nullptr); // SetThreadGroupAffinity + free(affinity); + } +} + +#endif + +} // namespace WinProcGroup + +#ifdef _WIN32 +#include +#define GETCWD _getcwd +#else +#include +#define GETCWD getcwd +#endif + +namespace CommandLine { + +string argv0; // path+name of the executable binary, as given by argv[0] +string binaryDirectory; // path of the executable directory +string workingDirectory; // path of the working directory + +void init([[maybe_unused]] int argc, char* argv[]) { + string pathSeparator; + + // extract the path+name of the executable binary + argv0 = argv[0]; + +#ifdef _WIN32 + pathSeparator = "\\"; + #ifdef _MSC_VER + // Under windows argv[0] may not have the extension. Also _get_pgmptr() had + // issues in some windows 10 versions, so check returned values carefully. + char* pgmptr = nullptr; + if (!_get_pgmptr(&pgmptr) && pgmptr != nullptr && *pgmptr) + argv0 = pgmptr; + #endif +#else + pathSeparator = "/"; +#endif + + // extract the working directory + workingDirectory = ""; + char buff[40000]; + char* cwd = GETCWD(buff, 40000); + if (cwd) + workingDirectory = cwd; + + // extract the binary directory path from argv0 + binaryDirectory = argv0; + size_t pos = binaryDirectory.find_last_of("\\/"); + if (pos == std::string::npos) + binaryDirectory = "." + pathSeparator; + else + binaryDirectory.resize(pos + 1); + + // pattern replacement: "./" at the start of path is replaced by the working directory + if (binaryDirectory.find("." + pathSeparator) == 0) + binaryDirectory.replace(0, 1, workingDirectory); +} + + +} // namespace CommandLine + +} // namespace Stockfish diff --git a/src/misc.h b/src/misc.h new file mode 100644 index 0000000000000000000000000000000000000000..77b81d50fc00e02465413273e398a083f41de6c8 --- /dev/null +++ b/src/misc.h @@ -0,0 +1,198 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef MISC_H_INCLUDED +#define MISC_H_INCLUDED + +#include +#include +#include +#include +#include +#include + +#include "types.h" + +namespace Stockfish { + +std::string engine_info(bool to_uci = false); +std::string compiler_info(); +void prefetch(void* addr); +void start_logger(const std::string& fname); +void* std_aligned_alloc(size_t alignment, size_t size); +void std_aligned_free(void* ptr); +void* aligned_large_pages_alloc(size_t size); // memory aligned by page size, min alignment: 4096 bytes +void aligned_large_pages_free(void* mem); // nop if mem == nullptr + +void dbg_hit_on(bool b); +void dbg_hit_on(bool c, bool b); +void dbg_mean_of(int v); +void dbg_print(); + +typedef std::chrono::milliseconds::rep TimePoint; // A value in milliseconds +static_assert(sizeof(TimePoint) == sizeof(int64_t), "TimePoint should be 64 bits"); +inline TimePoint now() { + return std::chrono::duration_cast + (std::chrono::steady_clock::now().time_since_epoch()).count(); +} + +template +struct HashTable { + Entry* operator[](Key key) { return &table[(uint32_t)key & (Size - 1)]; } + +private: + std::vector table = std::vector(Size); // Allocate on the heap +}; + + +enum SyncCout { IO_LOCK, IO_UNLOCK }; +std::ostream& operator<<(std::ostream&, SyncCout); + +#define sync_cout std::cout << IO_LOCK +#define sync_endl std::endl << IO_UNLOCK + + +// align_ptr_up() : get the first aligned element of an array. +// ptr must point to an array of size at least `sizeof(T) * N + alignment` bytes, +// where N is the number of elements in the array. +template +T* align_ptr_up(T* ptr) +{ + static_assert(alignof(T) < Alignment); + + const uintptr_t ptrint = reinterpret_cast(reinterpret_cast(ptr)); + return reinterpret_cast(reinterpret_cast((ptrint + (Alignment - 1)) / Alignment * Alignment)); +} + + +// IsLittleEndian : true if and only if the binary is compiled on a little endian machine +static inline const union { uint32_t i; char c[4]; } Le = { 0x01020304 }; +static inline const bool IsLittleEndian = (Le.c[0] == 4); + + +// RunningAverage : a class to calculate a running average of a series of values. +// For efficiency, all computations are done with integers. +class RunningAverage { + public: + + // Reset the running average to rational value p / q + void set(int64_t p, int64_t q) + { average = p * PERIOD * RESOLUTION / q; } + + // Update average with value v + void update(int64_t v) + { average = RESOLUTION * v + (PERIOD - 1) * average / PERIOD; } + + // Test if average is strictly greater than rational a / b + bool is_greater(int64_t a, int64_t b) const + { return b * average > a * (PERIOD * RESOLUTION); } + + int64_t value() const + { return average / (PERIOD * RESOLUTION); } + + private : + static constexpr int64_t PERIOD = 4096; + static constexpr int64_t RESOLUTION = 1024; + int64_t average; +}; + +template +class ValueList { + +public: + std::size_t size() const { return size_; } + void push_back(const T& value) { values_[size_++] = value; } + const T* begin() const { return values_; } + const T* end() const { return values_ + size_; } + +private: + T values_[MaxSize]; + std::size_t size_ = 0; +}; + + +/// xorshift64star Pseudo-Random Number Generator +/// This class is based on original code written and dedicated +/// to the public domain by Sebastiano Vigna (2014). +/// It has the following characteristics: +/// +/// - Outputs 64-bit numbers +/// - Passes Dieharder and SmallCrush test batteries +/// - Does not require warm-up, no zeroland to escape +/// - Internal state is a single 64-bit integer +/// - Period is 2^64 - 1 +/// - Speed: 1.60 ns/call (Core i7 @3.40GHz) +/// +/// For further analysis see +/// + +class PRNG { + + uint64_t s; + + uint64_t rand64() { + + s ^= s >> 12, s ^= s << 25, s ^= s >> 27; + return s * 2685821657736338717LL; + } + +public: + PRNG(uint64_t seed) : s(seed) { assert(seed); } + + template T rand() { return T(rand64()); } + + /// Special generator used to fast init magic numbers. + /// Output values only have 1/8th of their bits set on average. + template T sparse_rand() + { return T(rand64() & rand64() & rand64()); } +}; + +inline uint64_t mul_hi64(uint64_t a, uint64_t b) { +#if defined(__GNUC__) && defined(IS_64BIT) + __extension__ typedef unsigned __int128 uint128; + return ((uint128)a * (uint128)b) >> 64; +#else + uint64_t aL = (uint32_t)a, aH = a >> 32; + uint64_t bL = (uint32_t)b, bH = b >> 32; + uint64_t c1 = (aL * bL) >> 32; + uint64_t c2 = aH * bL + c1; + uint64_t c3 = aL * bH + (uint32_t)c2; + return aH * bH + (c2 >> 32) + (c3 >> 32); +#endif +} + +/// Under Windows it is not possible for a process to run on more than one +/// logical processor group. This usually means to be limited to use max 64 +/// cores. To overcome this, some special platform specific API should be +/// called to set group affinity for each thread. Original code from Texel by +/// Peter Österlund. + +namespace WinProcGroup { + void bindThisThread(size_t idx); +} + +namespace CommandLine { + void init(int argc, char* argv[]); + + extern std::string binaryDirectory; // path of the executable directory + extern std::string workingDirectory; // path of the working directory +} + +} // namespace Stockfish + +#endif // #ifndef MISC_H_INCLUDED diff --git a/src/movegen.cpp b/src/movegen.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c7a3c29bc04f60816c7867a57f28f0a1d3d28ec2 --- /dev/null +++ b/src/movegen.cpp @@ -0,0 +1,276 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include "movegen.h" +#include "position.h" + +namespace Stockfish { + +namespace { + + template + ExtMove* make_promotions(ExtMove* moveList, Square to) { + + if (Type == CAPTURES || Type == EVASIONS || Type == NON_EVASIONS) + *moveList++ = make(to - D, to, QUEEN); + + if (Type == QUIETS || Type == EVASIONS || Type == NON_EVASIONS) + { + *moveList++ = make(to - D, to, ROOK); + *moveList++ = make(to - D, to, BISHOP); + *moveList++ = make(to - D, to, KNIGHT); + } + + return moveList; + } + + + template + ExtMove* generate_pawn_moves(const Position& pos, ExtMove* moveList, Bitboard target) { + + constexpr Color Them = ~Us; + constexpr Bitboard TRank7BB = (Us == WHITE ? Rank7BB : Rank2BB); + constexpr Bitboard TRank3BB = (Us == WHITE ? Rank3BB : Rank6BB); + constexpr Direction Up = pawn_push(Us); + constexpr Direction UpRight = (Us == WHITE ? NORTH_EAST : SOUTH_WEST); + constexpr Direction UpLeft = (Us == WHITE ? NORTH_WEST : SOUTH_EAST); + + const Bitboard emptySquares = ~pos.pieces(); + const Bitboard enemies = Type == EVASIONS ? pos.checkers() + : pos.pieces(Them); + + Bitboard pawnsOn7 = pos.pieces(Us, PAWN) & TRank7BB; + Bitboard pawnsNotOn7 = pos.pieces(Us, PAWN) & ~TRank7BB; + + // Single and double pawn pushes, no promotions + if (Type != CAPTURES) + { + Bitboard b1 = shift(pawnsNotOn7) & emptySquares; + Bitboard b2 = shift(b1 & TRank3BB) & emptySquares; + + if (Type == EVASIONS) // Consider only blocking squares + { + b1 &= target; + b2 &= target; + } + + if (Type == QUIET_CHECKS) + { + // To make a quiet check, you either make a direct check by pushing a pawn + // or push a blocker pawn that is not on the same file as the enemy king. + // Discovered check promotion has been already generated amongst the captures. + Square ksq = pos.square(Them); + Bitboard dcCandidatePawns = pos.blockers_for_king(Them) & ~file_bb(ksq); + b1 &= pawn_attacks_bb(Them, ksq) | shift< Up>(dcCandidatePawns); + b2 &= pawn_attacks_bb(Them, ksq) | shift(dcCandidatePawns); + } + + while (b1) + { + Square to = pop_lsb(b1); + *moveList++ = make_move(to - Up, to); + } + + while (b2) + { + Square to = pop_lsb(b2); + *moveList++ = make_move(to - Up - Up, to); + } + } + + // Promotions and underpromotions + if (pawnsOn7) + { + Bitboard b1 = shift(pawnsOn7) & enemies; + Bitboard b2 = shift(pawnsOn7) & enemies; + Bitboard b3 = shift(pawnsOn7) & emptySquares; + + if (Type == EVASIONS) + b3 &= target; + + while (b1) + moveList = make_promotions(moveList, pop_lsb(b1)); + + while (b2) + moveList = make_promotions(moveList, pop_lsb(b2)); + + while (b3) + moveList = make_promotions(moveList, pop_lsb(b3)); + } + + // Standard and en passant captures + if (Type == CAPTURES || Type == EVASIONS || Type == NON_EVASIONS) + { + Bitboard b1 = shift(pawnsNotOn7) & enemies; + Bitboard b2 = shift(pawnsNotOn7) & enemies; + + while (b1) + { + Square to = pop_lsb(b1); + *moveList++ = make_move(to - UpRight, to); + } + + while (b2) + { + Square to = pop_lsb(b2); + *moveList++ = make_move(to - UpLeft, to); + } + + if (pos.ep_square() != SQ_NONE) + { + assert(rank_of(pos.ep_square()) == relative_rank(Us, RANK_6)); + + // An en passant capture cannot resolve a discovered check + if (Type == EVASIONS && (target & (pos.ep_square() + Up))) + return moveList; + + b1 = pawnsNotOn7 & pawn_attacks_bb(Them, pos.ep_square()); + + assert(b1); + + while (b1) + *moveList++ = make(pop_lsb(b1), pos.ep_square()); + } + } + + return moveList; + } + + + template + ExtMove* generate_moves(const Position& pos, ExtMove* moveList, Bitboard target) { + + static_assert(Pt != KING && Pt != PAWN, "Unsupported piece type in generate_moves()"); + + Bitboard bb = pos.pieces(Us, Pt); + + while (bb) + { + Square from = pop_lsb(bb); + Bitboard b = attacks_bb(from, pos.pieces()) & target; + + // To check, you either move freely a blocker or make a direct check. + if (Checks && (Pt == QUEEN || !(pos.blockers_for_king(~Us) & from))) + b &= pos.check_squares(Pt); + + while (b) + *moveList++ = make_move(from, pop_lsb(b)); + } + + return moveList; + } + + + template + ExtMove* generate_all(const Position& pos, ExtMove* moveList) { + + static_assert(Type != LEGAL, "Unsupported type in generate_all()"); + + constexpr bool Checks = Type == QUIET_CHECKS; // Reduce template instantiations + const Square ksq = pos.square(Us); + Bitboard target; + + // Skip generating non-king moves when in double check + if (Type != EVASIONS || !more_than_one(pos.checkers())) + { + target = Type == EVASIONS ? between_bb(ksq, lsb(pos.checkers())) + : Type == NON_EVASIONS ? ~pos.pieces( Us) + : Type == CAPTURES ? pos.pieces(~Us) + : ~pos.pieces( ); // QUIETS || QUIET_CHECKS + + moveList = generate_pawn_moves(pos, moveList, target); + moveList = generate_moves(pos, moveList, target); + moveList = generate_moves(pos, moveList, target); + moveList = generate_moves(pos, moveList, target); + moveList = generate_moves(pos, moveList, target); + } + + if (!Checks || pos.blockers_for_king(~Us) & ksq) + { + Bitboard b = attacks_bb(ksq) & (Type == EVASIONS ? ~pos.pieces(Us) : target); + if (Checks) + b &= ~attacks_bb(pos.square(~Us)); + + while (b) + *moveList++ = make_move(ksq, pop_lsb(b)); + + if ((Type == QUIETS || Type == NON_EVASIONS) && pos.can_castle(Us & ANY_CASTLING)) + for (CastlingRights cr : { Us & KING_SIDE, Us & QUEEN_SIDE } ) + if (!pos.castling_impeded(cr) && pos.can_castle(cr)) + *moveList++ = make(ksq, pos.castling_rook_square(cr)); + } + + return moveList; + } + +} // namespace + + +/// Generates all pseudo-legal captures plus queen promotions +/// Generates all pseudo-legal non-captures and underpromotions +/// Generates all pseudo-legal check evasions when the side to move is in check +/// Generates all pseudo-legal non-captures giving check, except castling and promotions +/// Generates all pseudo-legal captures and non-captures +/// +/// Returns a pointer to the end of the move list. + +template +ExtMove* generate(const Position& pos, ExtMove* moveList) { + + static_assert(Type != LEGAL, "Unsupported type in generate()"); + assert((Type == EVASIONS) == (bool)pos.checkers()); + + Color us = pos.side_to_move(); + + return us == WHITE ? generate_all(pos, moveList) + : generate_all(pos, moveList); +} + +// Explicit template instantiations +template ExtMove* generate(const Position&, ExtMove*); +template ExtMove* generate(const Position&, ExtMove*); +template ExtMove* generate(const Position&, ExtMove*); +template ExtMove* generate(const Position&, ExtMove*); +template ExtMove* generate(const Position&, ExtMove*); + + +/// generate generates all the legal moves in the given position + +template<> +ExtMove* generate(const Position& pos, ExtMove* moveList) { + + Color us = pos.side_to_move(); + Bitboard pinned = pos.blockers_for_king(us) & pos.pieces(us); + Square ksq = pos.square(us); + ExtMove* cur = moveList; + + moveList = pos.checkers() ? generate(pos, moveList) + : generate(pos, moveList); + while (cur != moveList) + if ( ((pinned && pinned & from_sq(*cur)) || from_sq(*cur) == ksq || type_of(*cur) == EN_PASSANT) + && !pos.legal(*cur)) + *cur = (--moveList)->move; + else + ++cur; + + return moveList; +} + +} // namespace Stockfish diff --git a/src/movegen.h b/src/movegen.h new file mode 100644 index 0000000000000000000000000000000000000000..bbb35b39159ab20ee4348644eed8e234f7963de2 --- /dev/null +++ b/src/movegen.h @@ -0,0 +1,77 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef MOVEGEN_H_INCLUDED +#define MOVEGEN_H_INCLUDED + +#include + +#include "types.h" + +namespace Stockfish { + +class Position; + +enum GenType { + CAPTURES, + QUIETS, + QUIET_CHECKS, + EVASIONS, + NON_EVASIONS, + LEGAL +}; + +struct ExtMove { + Move move; + int value; + + operator Move() const { return move; } + void operator=(Move m) { move = m; } + + // Inhibit unwanted implicit conversions to Move + // with an ambiguity that yields to a compile error. + operator float() const = delete; +}; + +inline bool operator<(const ExtMove& f, const ExtMove& s) { + return f.value < s.value; +} + +template +ExtMove* generate(const Position& pos, ExtMove* moveList); + +/// The MoveList struct is a simple wrapper around generate(). It sometimes comes +/// in handy to use this class instead of the low level generate() function. +template +struct MoveList { + + explicit MoveList(const Position& pos) : last(generate(pos, moveList)) {} + const ExtMove* begin() const { return moveList; } + const ExtMove* end() const { return last; } + size_t size() const { return last - moveList; } + bool contains(Move move) const { + return std::find(begin(), end(), move) != end(); + } + +private: + ExtMove moveList[MAX_MOVES], *last; +}; + +} // namespace Stockfish + +#endif // #ifndef MOVEGEN_H_INCLUDED diff --git a/src/movepick.cpp b/src/movepick.cpp new file mode 100644 index 0000000000000000000000000000000000000000..188d6bd81723bf97d826484b793b0cbec3b5d885 --- /dev/null +++ b/src/movepick.cpp @@ -0,0 +1,296 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include "bitboard.h" +#include "movepick.h" + +namespace Stockfish { + +namespace { + + enum Stages { + MAIN_TT, CAPTURE_INIT, GOOD_CAPTURE, REFUTATION, QUIET_INIT, QUIET, BAD_CAPTURE, + EVASION_TT, EVASION_INIT, EVASION, + PROBCUT_TT, PROBCUT_INIT, PROBCUT, + QSEARCH_TT, QCAPTURE_INIT, QCAPTURE, QCHECK_INIT, QCHECK + }; + + // partial_insertion_sort() sorts moves in descending order up to and including + // a given limit. The order of moves smaller than the limit is left unspecified. + void partial_insertion_sort(ExtMove* begin, ExtMove* end, int limit) { + + for (ExtMove *sortedEnd = begin, *p = begin + 1; p < end; ++p) + if (p->value >= limit) + { + ExtMove tmp = *p, *q; + *p = *++sortedEnd; + for (q = sortedEnd; q != begin && *(q - 1) < tmp; --q) + *q = *(q - 1); + *q = tmp; + } + } + +} // namespace + + +/// Constructors of the MovePicker class. As arguments we pass information +/// to help it to return the (presumably) good moves first, to decide which +/// moves to return (in the quiescence search, for instance, we only want to +/// search captures, promotions, and some checks) and how important good move +/// ordering is at the current node. + +/// MovePicker constructor for the main search +MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHistory* mh, + const CapturePieceToHistory* cph, + const PieceToHistory** ch, + Move cm, + const Move* killers) + : pos(p), mainHistory(mh), captureHistory(cph), continuationHistory(ch), + ttMove(ttm), refutations{{killers[0], 0}, {killers[1], 0}, {cm, 0}}, depth(d) +{ + assert(d > 0); + + stage = (pos.checkers() ? EVASION_TT : MAIN_TT) + + !(ttm && pos.pseudo_legal(ttm)); + threatenedPieces = 0; +} + +/// MovePicker constructor for quiescence search +MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHistory* mh, + const CapturePieceToHistory* cph, + const PieceToHistory** ch, + Square rs) + : pos(p), mainHistory(mh), captureHistory(cph), continuationHistory(ch), ttMove(ttm), recaptureSquare(rs), depth(d) +{ + assert(d <= 0); + + stage = (pos.checkers() ? EVASION_TT : QSEARCH_TT) + + !( ttm + && pos.pseudo_legal(ttm)); +} + +/// MovePicker constructor for ProbCut: we generate captures with SEE greater +/// than or equal to the given threshold. +MovePicker::MovePicker(const Position& p, Move ttm, Value th, const CapturePieceToHistory* cph) + : pos(p), captureHistory(cph), ttMove(ttm), threshold(th) +{ + assert(!pos.checkers()); + + stage = PROBCUT_TT + !(ttm && pos.capture(ttm) + && pos.pseudo_legal(ttm) + && pos.see_ge(ttm, threshold)); +} + +/// MovePicker::score() assigns a numerical value to each move in a list, used +/// for sorting. Captures are ordered by Most Valuable Victim (MVV), preferring +/// captures with a good history. Quiets moves are ordered using the histories. +template +void MovePicker::score() { + + static_assert(Type == CAPTURES || Type == QUIETS || Type == EVASIONS, "Wrong type"); + + [[maybe_unused]] Bitboard threatenedByPawn, threatenedByMinor, threatenedByRook; + if constexpr (Type == QUIETS) + { + Color us = pos.side_to_move(); + + threatenedByPawn = pos.attacks_by(~us); + threatenedByMinor = pos.attacks_by(~us) | pos.attacks_by(~us) | threatenedByPawn; + threatenedByRook = pos.attacks_by(~us) | threatenedByMinor; + + // Pieces threatened by pieces of lesser material value + threatenedPieces = (pos.pieces(us, QUEEN) & threatenedByRook) + | (pos.pieces(us, ROOK) & threatenedByMinor) + | (pos.pieces(us, KNIGHT, BISHOP) & threatenedByPawn); + } + + for (auto& m : *this) + if constexpr (Type == CAPTURES) + m.value = 6 * int(PieceValue[MG][pos.piece_on(to_sq(m))]) + + (*captureHistory)[pos.moved_piece(m)][to_sq(m)][type_of(pos.piece_on(to_sq(m)))]; + + else if constexpr (Type == QUIETS) + m.value = 2 * (*mainHistory)[pos.side_to_move()][from_to(m)] + + 2 * (*continuationHistory[0])[pos.moved_piece(m)][to_sq(m)] + + (*continuationHistory[1])[pos.moved_piece(m)][to_sq(m)] + + (*continuationHistory[3])[pos.moved_piece(m)][to_sq(m)] + + (*continuationHistory[5])[pos.moved_piece(m)][to_sq(m)] + + (threatenedPieces & from_sq(m) ? + (type_of(pos.moved_piece(m)) == QUEEN && !(to_sq(m) & threatenedByRook) ? 50000 + : type_of(pos.moved_piece(m)) == ROOK && !(to_sq(m) & threatenedByMinor) ? 25000 + : !(to_sq(m) & threatenedByPawn) ? 15000 + : 0) + : 0) + + bool(pos.check_squares(type_of(pos.moved_piece(m))) & to_sq(m)) * 16384; + else // Type == EVASIONS + { + if (pos.capture(m)) + m.value = PieceValue[MG][pos.piece_on(to_sq(m))] + - Value(type_of(pos.moved_piece(m))) + + (1 << 28); + else + m.value = (*mainHistory)[pos.side_to_move()][from_to(m)] + + (*continuationHistory[0])[pos.moved_piece(m)][to_sq(m)]; + } +} + +/// MovePicker::select() returns the next move satisfying a predicate function. +/// It never returns the TT move. +template +Move MovePicker::select(Pred filter) { + + while (cur < endMoves) + { + if (T == Best) + std::swap(*cur, *std::max_element(cur, endMoves)); + + if (*cur != ttMove && filter()) + return *cur++; + + cur++; + } + return MOVE_NONE; +} + +/// MovePicker::next_move() is the most important method of the MovePicker class. It +/// returns a new pseudo-legal move every time it is called until there are no more +/// moves left, picking the move with the highest score from a list of generated moves. +Move MovePicker::next_move(bool skipQuiets) { + +top: + switch (stage) { + + case MAIN_TT: + case EVASION_TT: + case QSEARCH_TT: + case PROBCUT_TT: + ++stage; + return ttMove; + + case CAPTURE_INIT: + case PROBCUT_INIT: + case QCAPTURE_INIT: + cur = endBadCaptures = moves; + endMoves = generate(pos, cur); + + score(); + partial_insertion_sort(cur, endMoves, std::numeric_limits::min()); + ++stage; + goto top; + + case GOOD_CAPTURE: + if (select([&](){ + return pos.see_ge(*cur, Value(-69 * cur->value / 1024)) ? + // Move losing capture to endBadCaptures to be tried later + true : (*endBadCaptures++ = *cur, false); })) + return *(cur - 1); + + // Prepare the pointers to loop over the refutations array + cur = std::begin(refutations); + endMoves = std::end(refutations); + + // If the countermove is the same as a killer, skip it + if ( refutations[0].move == refutations[2].move + || refutations[1].move == refutations[2].move) + --endMoves; + + ++stage; + [[fallthrough]]; + + case REFUTATION: + if (select([&](){ return *cur != MOVE_NONE + && !pos.capture(*cur) + && pos.pseudo_legal(*cur); })) + return *(cur - 1); + ++stage; + [[fallthrough]]; + + case QUIET_INIT: + if (!skipQuiets) + { + cur = endBadCaptures; + endMoves = generate(pos, cur); + + score(); + partial_insertion_sort(cur, endMoves, -3000 * depth); + } + + ++stage; + [[fallthrough]]; + + case QUIET: + if ( !skipQuiets + && select([&](){return *cur != refutations[0].move + && *cur != refutations[1].move + && *cur != refutations[2].move;})) + return *(cur - 1); + + // Prepare the pointers to loop over the bad captures + cur = moves; + endMoves = endBadCaptures; + + ++stage; + [[fallthrough]]; + + case BAD_CAPTURE: + return select([](){ return true; }); + + case EVASION_INIT: + cur = moves; + endMoves = generate(pos, cur); + + score(); + ++stage; + [[fallthrough]]; + + case EVASION: + return select([](){ return true; }); + + case PROBCUT: + return select([&](){ return pos.see_ge(*cur, threshold); }); + + case QCAPTURE: + if (select([&](){ return depth > DEPTH_QS_RECAPTURES + || to_sq(*cur) == recaptureSquare; })) + return *(cur - 1); + + // If we did not find any move and we do not try checks, we have finished + if (depth != DEPTH_QS_CHECKS) + return MOVE_NONE; + + ++stage; + [[fallthrough]]; + + case QCHECK_INIT: + cur = moves; + endMoves = generate(pos, cur); + + ++stage; + [[fallthrough]]; + + case QCHECK: + return select([](){ return true; }); + } + + assert(false); + return MOVE_NONE; // Silence warning +} + +} // namespace Stockfish diff --git a/src/movepick.h b/src/movepick.h new file mode 100644 index 0000000000000000000000000000000000000000..e4c4a5bfdeaa6008db744a9e527bb822aca1f60c --- /dev/null +++ b/src/movepick.h @@ -0,0 +1,157 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef MOVEPICK_H_INCLUDED +#define MOVEPICK_H_INCLUDED + +#include +#include +#include + +#include "movegen.h" +#include "position.h" +#include "types.h" + +namespace Stockfish { + +/// StatsEntry stores the stat table value. It is usually a number but could +/// be a move or even a nested history. We use a class instead of naked value +/// to directly call history update operator<<() on the entry so to use stats +/// tables at caller sites as simple multi-dim arrays. +template +class StatsEntry { + + T entry; + +public: + void operator=(const T& v) { entry = v; } + T* operator&() { return &entry; } + T* operator->() { return &entry; } + operator const T&() const { return entry; } + + void operator<<(int bonus) { + assert(abs(bonus) <= D); // Ensure range is [-D, D] + static_assert(D <= std::numeric_limits::max(), "D overflows T"); + + entry += bonus - entry * abs(bonus) / D; + + assert(abs(entry) <= D); + } +}; + +/// Stats is a generic N-dimensional array used to store various statistics. +/// The first template parameter T is the base type of the array, the second +/// template parameter D limits the range of updates in [-D, D] when we update +/// values with the << operator, while the last parameters (Size and Sizes) +/// encode the dimensions of the array. +template +struct Stats : public std::array, Size> +{ + typedef Stats stats; + + void fill(const T& v) { + + // For standard-layout 'this' points to first struct member + assert(std::is_standard_layout::value); + + typedef StatsEntry entry; + entry* p = reinterpret_cast(this); + std::fill(p, p + sizeof(*this) / sizeof(entry), v); + } +}; + +template +struct Stats : public std::array, Size> {}; + +/// In stats table, D=0 means that the template parameter is not used +enum StatsParams { NOT_USED = 0 }; +enum StatsType { NoCaptures, Captures }; + +/// ButterflyHistory records how often quiet moves have been successful or +/// unsuccessful during the current search, and is used for reduction and move +/// ordering decisions. It uses 2 tables (one for each color) indexed by +/// the move's from and to squares, see www.chessprogramming.org/Butterfly_Boards +/// (~11 elo) +typedef Stats ButterflyHistory; + +/// CounterMoveHistory stores counter moves indexed by [piece][to] of the previous +/// move, see www.chessprogramming.org/Countermove_Heuristic +typedef Stats CounterMoveHistory; + +/// CapturePieceToHistory is addressed by a move's [piece][to][captured piece type] +typedef Stats CapturePieceToHistory; + +/// PieceToHistory is like ButterflyHistory but is addressed by a move's [piece][to] +typedef Stats PieceToHistory; + +/// ContinuationHistory is the combined history of a given pair of moves, usually +/// the current one given a previous one. The nested history table is based on +/// PieceToHistory instead of ButterflyBoards. +/// (~63 elo) +typedef Stats ContinuationHistory; + + +/// MovePicker class is used to pick one pseudo-legal move at a time from the +/// current position. The most important method is next_move(), which returns a +/// new pseudo-legal move each time it is called, until there are no moves left, +/// when MOVE_NONE is returned. In order to improve the efficiency of the +/// alpha-beta algorithm, MovePicker attempts to return the moves which are most +/// likely to get a cut-off first. +class MovePicker { + + enum PickType { Next, Best }; + +public: + MovePicker(const MovePicker&) = delete; + MovePicker& operator=(const MovePicker&) = delete; + MovePicker(const Position&, Move, Depth, const ButterflyHistory*, + const CapturePieceToHistory*, + const PieceToHistory**, + Move, + const Move*); + MovePicker(const Position&, Move, Depth, const ButterflyHistory*, + const CapturePieceToHistory*, + const PieceToHistory**, + Square); + MovePicker(const Position&, Move, Value, const CapturePieceToHistory*); + Move next_move(bool skipQuiets = false); + + Bitboard threatenedPieces; + +private: + template Move select(Pred); + template void score(); + ExtMove* begin() { return cur; } + ExtMove* end() { return endMoves; } + + const Position& pos; + const ButterflyHistory* mainHistory; + const CapturePieceToHistory* captureHistory; + const PieceToHistory** continuationHistory; + Move ttMove; + ExtMove refutations[3], *cur, *endMoves, *endBadCaptures; + int stage; + Square recaptureSquare; + Value threshold; + Depth depth; + ExtMove moves[MAX_MOVES]; +}; + +} // namespace Stockfish + +#endif // #ifndef MOVEPICK_H_INCLUDED diff --git a/src/nnue/.DS_Store b/src/nnue/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..8bf8dd5d2296f616b0a51e071024851dd2509461 Binary files /dev/null and b/src/nnue/.DS_Store differ diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4715fed07d6c71e27e0d1fa69169f12f9f853017 --- /dev/null +++ b/src/nnue/evaluate_nnue.cpp @@ -0,0 +1,406 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +// Code for calculating NNUE evaluation function + +#include +#include +#include +#include +#include + +#include "../evaluate.h" +#include "../position.h" +#include "../misc.h" +#include "../uci.h" +#include "../types.h" + +#include "evaluate_nnue.h" + +namespace Stockfish::Eval::NNUE { + + // Input feature converter + LargePagePtr featureTransformer; + + // Evaluation function + AlignedPtr network[LayerStacks]; + + // Evaluation function file name + std::string fileName; + std::string netDescription; + + namespace Detail { + + // Initialize the evaluation function parameters + template + void initialize(AlignedPtr& pointer) { + + pointer.reset(reinterpret_cast(std_aligned_alloc(alignof(T), sizeof(T)))); + std::memset(pointer.get(), 0, sizeof(T)); + } + + template + void initialize(LargePagePtr& pointer) { + + static_assert(alignof(T) <= 4096, "aligned_large_pages_alloc() may fail for such a big alignment requirement of T"); + pointer.reset(reinterpret_cast(aligned_large_pages_alloc(sizeof(T)))); + std::memset(pointer.get(), 0, sizeof(T)); + } + + // Read evaluation function parameters + template + bool read_parameters(std::istream& stream, T& reference) { + + std::uint32_t header; + header = read_little_endian(stream); + if (!stream || header != T::get_hash_value()) return false; + return reference.read_parameters(stream); + } + + // Write evaluation function parameters + template + bool write_parameters(std::ostream& stream, const T& reference) { + + write_little_endian(stream, T::get_hash_value()); + return reference.write_parameters(stream); + } + + } // namespace Detail + + // Initialize the evaluation function parameters + void initialize() { + + Detail::initialize(featureTransformer); + for (std::size_t i = 0; i < LayerStacks; ++i) + Detail::initialize(network[i]); + } + + // Read network header + bool read_header(std::istream& stream, std::uint32_t* hashValue, std::string* desc) + { + std::uint32_t version, size; + + version = read_little_endian(stream); + *hashValue = read_little_endian(stream); + size = read_little_endian(stream); + if (!stream || version != Version) return false; + desc->resize(size); + stream.read(&(*desc)[0], size); + return !stream.fail(); + } + + // Write network header + bool write_header(std::ostream& stream, std::uint32_t hashValue, const std::string& desc) + { + write_little_endian(stream, Version); + write_little_endian(stream, hashValue); + write_little_endian(stream, (std::uint32_t)desc.size()); + stream.write(&desc[0], desc.size()); + return !stream.fail(); + } + + // Read network parameters + bool read_parameters(std::istream& stream) { + + std::uint32_t hashValue; + if (!read_header(stream, &hashValue, &netDescription)) return false; + if (hashValue != HashValue) return false; + if (!Detail::read_parameters(stream, *featureTransformer)) return false; + for (std::size_t i = 0; i < LayerStacks; ++i) + if (!Detail::read_parameters(stream, *(network[i]))) return false; + return stream && stream.peek() == std::ios::traits_type::eof(); + } + + // Write network parameters + bool write_parameters(std::ostream& stream) { + + if (!write_header(stream, HashValue, netDescription)) return false; + if (!Detail::write_parameters(stream, *featureTransformer)) return false; + for (std::size_t i = 0; i < LayerStacks; ++i) + if (!Detail::write_parameters(stream, *(network[i]))) return false; + return (bool)stream; + } + + // Evaluation function. Perform differential calculation. + Value evaluate(const Position& pos, bool adjusted, int* complexity) { + + // We manually align the arrays on the stack because with gcc < 9.3 + // overaligning stack variables with alignas() doesn't work correctly. + + constexpr uint64_t alignment = CacheLineSize; + int delta = 24 - pos.non_pawn_material() / 9560; + +#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN) + TransformedFeatureType transformedFeaturesUnaligned[ + FeatureTransformer::BufferSize + alignment / sizeof(TransformedFeatureType)]; + + auto* transformedFeatures = align_ptr_up(&transformedFeaturesUnaligned[0]); +#else + alignas(alignment) + TransformedFeatureType transformedFeatures[FeatureTransformer::BufferSize]; +#endif + + ASSERT_ALIGNED(transformedFeatures, alignment); + + const int bucket = (pos.count() - 1) / 4; + const auto psqt = featureTransformer->transform(pos, transformedFeatures, bucket); + const auto positional = network[bucket]->propagate(transformedFeatures); + + if (complexity) + *complexity = abs(psqt - positional) / OutputScale; + + // Give more value to positional evaluation when adjusted flag is set + if (adjusted) + return static_cast(((1024 - delta) * psqt + (1024 + delta) * positional) / (1024 * OutputScale)); + else + return static_cast((psqt + positional) / OutputScale); + } + + struct NnueEvalTrace { + static_assert(LayerStacks == PSQTBuckets); + + Value psqt[LayerStacks]; + Value positional[LayerStacks]; + std::size_t correctBucket; + }; + + static NnueEvalTrace trace_evaluate(const Position& pos) { + + // We manually align the arrays on the stack because with gcc < 9.3 + // overaligning stack variables with alignas() doesn't work correctly. + + constexpr uint64_t alignment = CacheLineSize; + +#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN) + TransformedFeatureType transformedFeaturesUnaligned[ + FeatureTransformer::BufferSize + alignment / sizeof(TransformedFeatureType)]; + + auto* transformedFeatures = align_ptr_up(&transformedFeaturesUnaligned[0]); +#else + alignas(alignment) + TransformedFeatureType transformedFeatures[FeatureTransformer::BufferSize]; +#endif + + ASSERT_ALIGNED(transformedFeatures, alignment); + + NnueEvalTrace t{}; + t.correctBucket = (pos.count() - 1) / 4; + for (IndexType bucket = 0; bucket < LayerStacks; ++bucket) { + const auto materialist = featureTransformer->transform(pos, transformedFeatures, bucket); + const auto positional = network[bucket]->propagate(transformedFeatures); + + t.psqt[bucket] = static_cast( materialist / OutputScale ); + t.positional[bucket] = static_cast( positional / OutputScale ); + } + + return t; + } + + static const std::string PieceToChar(" PNBRQK pnbrqk"); + + + // format_cp_compact() converts a Value into (centi)pawns and writes it in a buffer. + // The buffer must have capacity for at least 5 chars. + static void format_cp_compact(Value v, char* buffer) { + + buffer[0] = (v < 0 ? '-' : v > 0 ? '+' : ' '); + + int cp = std::abs(100 * v / UCI::NormalizeToPawnValue); + if (cp >= 10000) + { + buffer[1] = '0' + cp / 10000; cp %= 10000; + buffer[2] = '0' + cp / 1000; cp %= 1000; + buffer[3] = '0' + cp / 100; + buffer[4] = ' '; + } + else if (cp >= 1000) + { + buffer[1] = '0' + cp / 1000; cp %= 1000; + buffer[2] = '0' + cp / 100; cp %= 100; + buffer[3] = '.'; + buffer[4] = '0' + cp / 10; + } + else + { + buffer[1] = '0' + cp / 100; cp %= 100; + buffer[2] = '.'; + buffer[3] = '0' + cp / 10; cp %= 10; + buffer[4] = '0' + cp / 1; + } + } + + + // format_cp_aligned_dot() converts a Value into (centi)pawns and writes it in a buffer, + // always keeping two decimals. The buffer must have capacity for at least 7 chars. + static void format_cp_aligned_dot(Value v, char* buffer) { + + buffer[0] = (v < 0 ? '-' : v > 0 ? '+' : ' '); + + double cp = 1.0 * std::abs(int(v)) / UCI::NormalizeToPawnValue; + sprintf(&buffer[1], "%6.2f", cp); + } + + + // trace() returns a string with the value of each piece on a board, + // and a table for (PSQT, Layers) values bucket by bucket. + + std::string trace(Position& pos) { + + std::stringstream ss; + + char board[3*8+1][8*8+2]; + std::memset(board, ' ', sizeof(board)); + for (int row = 0; row < 3*8+1; ++row) + board[row][8*8+1] = '\0'; + + // A lambda to output one box of the board + auto writeSquare = [&board](File file, Rank rank, Piece pc, Value value) { + + const int x = ((int)file) * 8; + const int y = (7 - (int)rank) * 3; + for (int i = 1; i < 8; ++i) + board[y][x+i] = board[y+3][x+i] = '-'; + for (int i = 1; i < 3; ++i) + board[y+i][x] = board[y+i][x+8] = '|'; + board[y][x] = board[y][x+8] = board[y+3][x+8] = board[y+3][x] = '+'; + if (pc != NO_PIECE) + board[y+1][x+4] = PieceToChar[pc]; + if (value != VALUE_NONE) + format_cp_compact(value, &board[y+2][x+2]); + }; + + // We estimate the value of each piece by doing a differential evaluation from + // the current base eval, simulating the removal of the piece from its square. + Value base = evaluate(pos); + base = pos.side_to_move() == WHITE ? base : -base; + + for (File f = FILE_A; f <= FILE_H; ++f) + for (Rank r = RANK_1; r <= RANK_8; ++r) + { + Square sq = make_square(f, r); + Piece pc = pos.piece_on(sq); + Value v = VALUE_NONE; + + if (pc != NO_PIECE && type_of(pc) != KING) + { + auto st = pos.state(); + + pos.remove_piece(sq); + st->accumulator.computed[WHITE] = false; + st->accumulator.computed[BLACK] = false; + + Value eval = evaluate(pos); + eval = pos.side_to_move() == WHITE ? eval : -eval; + v = base - eval; + + pos.put_piece(pc, sq); + st->accumulator.computed[WHITE] = false; + st->accumulator.computed[BLACK] = false; + } + + writeSquare(f, r, pc, v); + } + + ss << " NNUE derived piece values:\n"; + for (int row = 0; row < 3*8+1; ++row) + ss << board[row] << '\n'; + ss << '\n'; + + auto t = trace_evaluate(pos); + + ss << " NNUE network contributions " + << (pos.side_to_move() == WHITE ? "(White to move)" : "(Black to move)") << std::endl + << "+------------+------------+------------+------------+\n" + << "| Bucket | Material | Positional | Total |\n" + << "| | (PSQT) | (Layers) | |\n" + << "+------------+------------+------------+------------+\n"; + + for (std::size_t bucket = 0; bucket < LayerStacks; ++bucket) + { + char buffer[3][8]; + std::memset(buffer, '\0', sizeof(buffer)); + + format_cp_aligned_dot(t.psqt[bucket], buffer[0]); + format_cp_aligned_dot(t.positional[bucket], buffer[1]); + format_cp_aligned_dot(t.psqt[bucket] + t.positional[bucket], buffer[2]); + + ss << "| " << bucket << " " + << " | " << buffer[0] << " " + << " | " << buffer[1] << " " + << " | " << buffer[2] << " " + << " |"; + if (bucket == t.correctBucket) + ss << " <-- this bucket is used"; + ss << '\n'; + } + + ss << "+------------+------------+------------+------------+\n"; + + return ss.str(); + } + + + // Load eval, from a file stream or a memory stream + bool load_eval(std::string name, std::istream& stream) { + + initialize(); + fileName = name; + return read_parameters(stream); + } + + // Save eval, to a file stream or a memory stream + bool save_eval(std::ostream& stream) { + + if (fileName.empty()) + return false; + + return write_parameters(stream); + } + + /// Save eval, to a file given by its name + bool save_eval(const std::optional& filename) { + + std::string actualFilename; + std::string msg; + + if (filename.has_value()) + actualFilename = filename.value(); + else + { + if (currentEvalFileName != EvalFileDefaultName) + { + msg = "Failed to export a net. A non-embedded net can only be saved if the filename is specified"; + + sync_cout << msg << sync_endl; + return false; + } + actualFilename = EvalFileDefaultName; + } + + std::ofstream stream(actualFilename, std::ios_base::binary); + bool saved = save_eval(stream); + + msg = saved ? "Network saved successfully to " + actualFilename + : "Failed to export a net"; + + sync_cout << msg << sync_endl; + return saved; + } + + +} // namespace Stockfish::Eval::NNUE diff --git a/src/nnue/evaluate_nnue.h b/src/nnue/evaluate_nnue.h new file mode 100644 index 0000000000000000000000000000000000000000..2e4f1f5098db640edfcfc680dc75ff8afeac83ad --- /dev/null +++ b/src/nnue/evaluate_nnue.h @@ -0,0 +1,59 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +// header used in NNUE evaluation function + +#ifndef NNUE_EVALUATE_NNUE_H_INCLUDED +#define NNUE_EVALUATE_NNUE_H_INCLUDED + +#include "nnue_feature_transformer.h" + +#include + +namespace Stockfish::Eval::NNUE { + + // Hash value of evaluation function structure + constexpr std::uint32_t HashValue = + FeatureTransformer::get_hash_value() ^ Network::get_hash_value(); + + // Deleter for automating release of memory area + template + struct AlignedDeleter { + void operator()(T* ptr) const { + ptr->~T(); + std_aligned_free(ptr); + } + }; + + template + struct LargePageDeleter { + void operator()(T* ptr) const { + ptr->~T(); + aligned_large_pages_free(ptr); + } + }; + + template + using AlignedPtr = std::unique_ptr>; + + template + using LargePagePtr = std::unique_ptr>; + +} // namespace Stockfish::Eval::NNUE + +#endif // #ifndef NNUE_EVALUATE_NNUE_H_INCLUDED diff --git a/src/nnue/features/half_ka_v2_hm.cpp b/src/nnue/features/half_ka_v2_hm.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7dbd3415624c973d47727d5f6f0d92ebfe140766 --- /dev/null +++ b/src/nnue/features/half_ka_v2_hm.cpp @@ -0,0 +1,84 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +//Definition of input features HalfKAv2_hm of NNUE evaluation function + +#include "half_ka_v2_hm.h" + +#include "../../position.h" + +namespace Stockfish::Eval::NNUE::Features { + + // Index of a feature for a given king position and another piece on some square + template + inline IndexType HalfKAv2_hm::make_index(Square s, Piece pc, Square ksq) { + return IndexType((int(s) ^ OrientTBL[Perspective][ksq]) + PieceSquareIndex[Perspective][pc] + KingBuckets[Perspective][ksq]); + } + + // Get a list of indices for active features + template + void HalfKAv2_hm::append_active_indices( + const Position& pos, + IndexList& active + ) { + Square ksq = pos.square(Perspective); + Bitboard bb = pos.pieces(); + while (bb) + { + Square s = pop_lsb(bb); + active.push_back(make_index(s, pos.piece_on(s), ksq)); + } + } + + // Explicit template instantiations + template void HalfKAv2_hm::append_active_indices(const Position& pos, IndexList& active); + template void HalfKAv2_hm::append_active_indices(const Position& pos, IndexList& active); + + // append_changed_indices() : get a list of indices for recently changed features + template + void HalfKAv2_hm::append_changed_indices( + Square ksq, + const DirtyPiece& dp, + IndexList& removed, + IndexList& added + ) { + for (int i = 0; i < dp.dirty_num; ++i) { + if (dp.from[i] != SQ_NONE) + removed.push_back(make_index(dp.from[i], dp.piece[i], ksq)); + if (dp.to[i] != SQ_NONE) + added.push_back(make_index(dp.to[i], dp.piece[i], ksq)); + } + } + + // Explicit template instantiations + template void HalfKAv2_hm::append_changed_indices(Square ksq, const DirtyPiece& dp, IndexList& removed, IndexList& added); + template void HalfKAv2_hm::append_changed_indices(Square ksq, const DirtyPiece& dp, IndexList& removed, IndexList& added); + + int HalfKAv2_hm::update_cost(const StateInfo* st) { + return st->dirtyPiece.dirty_num; + } + + int HalfKAv2_hm::refresh_cost(const Position& pos) { + return pos.count(); + } + + bool HalfKAv2_hm::requires_refresh(const StateInfo* st, Color perspective) { + return st->dirtyPiece.piece[0] == make_piece(perspective, KING); + } + +} // namespace Stockfish::Eval::NNUE::Features diff --git a/src/nnue/features/half_ka_v2_hm.h b/src/nnue/features/half_ka_v2_hm.h new file mode 100644 index 0000000000000000000000000000000000000000..a95d4328bb4f55f25e618a740abfd6a9a72a7c11 --- /dev/null +++ b/src/nnue/features/half_ka_v2_hm.h @@ -0,0 +1,152 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +//Definition of input features HalfKP of NNUE evaluation function + +#ifndef NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED +#define NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED + +#include "../nnue_common.h" + +#include "../../evaluate.h" +#include "../../misc.h" + +namespace Stockfish { + struct StateInfo; +} + +namespace Stockfish::Eval::NNUE::Features { + + // Feature HalfKAv2_hm: Combination of the position of own king + // and the position of pieces. Position mirrored such that king always on e..h files. + class HalfKAv2_hm { + + // unique number for each piece type on each square + enum { + PS_NONE = 0, + PS_W_PAWN = 0, + PS_B_PAWN = 1 * SQUARE_NB, + PS_W_KNIGHT = 2 * SQUARE_NB, + PS_B_KNIGHT = 3 * SQUARE_NB, + PS_W_BISHOP = 4 * SQUARE_NB, + PS_B_BISHOP = 5 * SQUARE_NB, + PS_W_ROOK = 6 * SQUARE_NB, + PS_B_ROOK = 7 * SQUARE_NB, + PS_W_QUEEN = 8 * SQUARE_NB, + PS_B_QUEEN = 9 * SQUARE_NB, + PS_KING = 10 * SQUARE_NB, + PS_NB = 11 * SQUARE_NB + }; + + static constexpr IndexType PieceSquareIndex[COLOR_NB][PIECE_NB] = { + // convention: W - us, B - them + // viewed from other side, W and B are reversed + { PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_KING, PS_NONE, + PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_KING, PS_NONE }, + { PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_KING, PS_NONE, + PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_KING, PS_NONE } + }; + + // Index of a feature for a given king position and another piece on some square + template + static IndexType make_index(Square s, Piece pc, Square ksq); + + public: + // Feature name + static constexpr const char* Name = "HalfKAv2_hm(Friend)"; + + // Hash value embedded in the evaluation file + static constexpr std::uint32_t HashValue = 0x7f234cb8u; + + // Number of feature dimensions + static constexpr IndexType Dimensions = + static_cast(SQUARE_NB) * static_cast(PS_NB) / 2; + +#define B(v) (v * PS_NB) + static constexpr int KingBuckets[COLOR_NB][SQUARE_NB] = { + { B(28), B(29), B(30), B(31), B(31), B(30), B(29), B(28), + B(24), B(25), B(26), B(27), B(27), B(26), B(25), B(24), + B(20), B(21), B(22), B(23), B(23), B(22), B(21), B(20), + B(16), B(17), B(18), B(19), B(19), B(18), B(17), B(16), + B(12), B(13), B(14), B(15), B(15), B(14), B(13), B(12), + B( 8), B( 9), B(10), B(11), B(11), B(10), B( 9), B( 8), + B( 4), B( 5), B( 6), B( 7), B( 7), B( 6), B( 5), B( 4), + B( 0), B( 1), B( 2), B( 3), B( 3), B( 2), B( 1), B( 0) }, + { B( 0), B( 1), B( 2), B( 3), B( 3), B( 2), B( 1), B( 0), + B( 4), B( 5), B( 6), B( 7), B( 7), B( 6), B( 5), B( 4), + B( 8), B( 9), B(10), B(11), B(11), B(10), B( 9), B( 8), + B(12), B(13), B(14), B(15), B(15), B(14), B(13), B(12), + B(16), B(17), B(18), B(19), B(19), B(18), B(17), B(16), + B(20), B(21), B(22), B(23), B(23), B(22), B(21), B(20), + B(24), B(25), B(26), B(27), B(27), B(26), B(25), B(24), + B(28), B(29), B(30), B(31), B(31), B(30), B(29), B(28) } + }; +#undef B + + // Orient a square according to perspective (rotates by 180 for black) + static constexpr int OrientTBL[COLOR_NB][SQUARE_NB] = { + { SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1, + SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1, + SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1, + SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1, + SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1, + SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1, + SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1, + SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1 }, + { SQ_H8, SQ_H8, SQ_H8, SQ_H8, SQ_A8, SQ_A8, SQ_A8, SQ_A8, + SQ_H8, SQ_H8, SQ_H8, SQ_H8, SQ_A8, SQ_A8, SQ_A8, SQ_A8, + SQ_H8, SQ_H8, SQ_H8, SQ_H8, SQ_A8, SQ_A8, SQ_A8, SQ_A8, + SQ_H8, SQ_H8, SQ_H8, SQ_H8, SQ_A8, SQ_A8, SQ_A8, SQ_A8, + SQ_H8, SQ_H8, SQ_H8, SQ_H8, SQ_A8, SQ_A8, SQ_A8, SQ_A8, + SQ_H8, SQ_H8, SQ_H8, SQ_H8, SQ_A8, SQ_A8, SQ_A8, SQ_A8, + SQ_H8, SQ_H8, SQ_H8, SQ_H8, SQ_A8, SQ_A8, SQ_A8, SQ_A8, + SQ_H8, SQ_H8, SQ_H8, SQ_H8, SQ_A8, SQ_A8, SQ_A8, SQ_A8 } + }; + + // Maximum number of simultaneously active features. + static constexpr IndexType MaxActiveDimensions = 32; + using IndexList = ValueList; + + // Get a list of indices for active features + template + static void append_active_indices( + const Position& pos, + IndexList& active); + + // Get a list of indices for recently changed features + template + static void append_changed_indices( + Square ksq, + const DirtyPiece& dp, + IndexList& removed, + IndexList& added + ); + + // Returns the cost of updating one perspective, the most costly one. + // Assumes no refresh needed. + static int update_cost(const StateInfo* st); + static int refresh_cost(const Position& pos); + + // Returns whether the change stored in this StateInfo means that + // a full accumulator refresh is required. + static bool requires_refresh(const StateInfo* st, Color perspective); + }; + +} // namespace Stockfish::Eval::NNUE::Features + +#endif // #ifndef NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED diff --git a/src/nnue/layers/affine_transform.h b/src/nnue/layers/affine_transform.h new file mode 100644 index 0000000000000000000000000000000000000000..461a7b83eca829a9a45c5d215e11dd5991280af6 --- /dev/null +++ b/src/nnue/layers/affine_transform.h @@ -0,0 +1,545 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +// Definition of layer AffineTransform of NNUE evaluation function + +#ifndef NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED +#define NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED + +#include +#include +#include +#include "../nnue_common.h" +#include "simd.h" + +/* + This file contains the definition for a fully connected layer (aka affine transform). + Two approaches are employed, depending on the sizes of the transform. + + Approach 1: + - used when the PaddedInputDimensions >= 128 + - uses AVX512 if possible + - processes inputs in batches of 2*InputSimdWidth + - so in batches of 128 for AVX512 + - the weight blocks of size InputSimdWidth are transposed such that + access is sequential + - N columns of the weight matrix are processed a time, where N + depends on the architecture (the amount of registers) + - accumulate + hadd is used + + Approach 2: + - used when the PaddedInputDimensions < 128 + - does not use AVX512 + - expected use-case is for when PaddedInputDimensions == 32 and InputDimensions <= 32. + - that's why AVX512 is hard to implement + - expected use-case is small layers + - not optimized as well as the approach 1 + - inputs are processed in chunks of 4, weights are respectively transposed + - accumulation happens directly to int32s +*/ + +namespace Stockfish::Eval::NNUE::Layers { + +// Fallback implementation for older/other architectures. +// Identical for both approaches. Requires the input to be padded to at least 16 values. +#if !defined(USE_SSSE3) + template + static void affine_transform_non_ssse3(std::int32_t* output, const std::int8_t* weights, const std::int32_t* biases, const std::uint8_t* input) + { +# if defined(USE_SSE2) + // At least a multiple of 16, with SSE2. + constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 16) / 16; + const __m128i Zeros = _mm_setzero_si128(); + const auto inputVector = reinterpret_cast(input); + +# elif defined(USE_MMX) + constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 8) / 8; + const __m64 Zeros = _mm_setzero_si64(); + const auto inputVector = reinterpret_cast(input); + +# elif defined(USE_NEON) + constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 16) / 16; + const auto inputVector = reinterpret_cast(input); +# endif + + for (IndexType i = 0; i < OutputDimensions; ++i) { + const IndexType offset = i * PaddedInputDimensions; + +# if defined(USE_SSE2) + __m128i sumLo = _mm_cvtsi32_si128(biases[i]); + __m128i sumHi = Zeros; + const auto row = reinterpret_cast(&weights[offset]); + for (IndexType j = 0; j < NumChunks; ++j) { + __m128i row_j = _mm_load_si128(&row[j]); + __m128i input_j = _mm_load_si128(&inputVector[j]); + __m128i extendedRowLo = _mm_srai_epi16(_mm_unpacklo_epi8(row_j, row_j), 8); + __m128i extendedRowHi = _mm_srai_epi16(_mm_unpackhi_epi8(row_j, row_j), 8); + __m128i extendedInputLo = _mm_unpacklo_epi8(input_j, Zeros); + __m128i extendedInputHi = _mm_unpackhi_epi8(input_j, Zeros); + __m128i productLo = _mm_madd_epi16(extendedRowLo, extendedInputLo); + __m128i productHi = _mm_madd_epi16(extendedRowHi, extendedInputHi); + sumLo = _mm_add_epi32(sumLo, productLo); + sumHi = _mm_add_epi32(sumHi, productHi); + } + __m128i sum = _mm_add_epi32(sumLo, sumHi); + __m128i sumHigh_64 = _mm_shuffle_epi32(sum, _MM_SHUFFLE(1, 0, 3, 2)); + sum = _mm_add_epi32(sum, sumHigh_64); + __m128i sum_second_32 = _mm_shufflelo_epi16(sum, _MM_SHUFFLE(1, 0, 3, 2)); + sum = _mm_add_epi32(sum, sum_second_32); + output[i] = _mm_cvtsi128_si32(sum); + +# elif defined(USE_MMX) + __m64 sumLo = _mm_cvtsi32_si64(biases[i]); + __m64 sumHi = Zeros; + const auto row = reinterpret_cast(&weights[offset]); + for (IndexType j = 0; j < NumChunks; ++j) { + __m64 row_j = row[j]; + __m64 input_j = inputVector[j]; + __m64 extendedRowLo = _mm_srai_pi16(_mm_unpacklo_pi8(row_j, row_j), 8); + __m64 extendedRowHi = _mm_srai_pi16(_mm_unpackhi_pi8(row_j, row_j), 8); + __m64 extendedInputLo = _mm_unpacklo_pi8(input_j, Zeros); + __m64 extendedInputHi = _mm_unpackhi_pi8(input_j, Zeros); + __m64 productLo = _mm_madd_pi16(extendedRowLo, extendedInputLo); + __m64 productHi = _mm_madd_pi16(extendedRowHi, extendedInputHi); + sumLo = _mm_add_pi32(sumLo, productLo); + sumHi = _mm_add_pi32(sumHi, productHi); + } + __m64 sum = _mm_add_pi32(sumLo, sumHi); + sum = _mm_add_pi32(sum, _mm_unpackhi_pi32(sum, sum)); + output[i] = _mm_cvtsi64_si32(sum); + +# elif defined(USE_NEON) + int32x4_t sum = {biases[i]}; + const auto row = reinterpret_cast(&weights[offset]); + for (IndexType j = 0; j < NumChunks; ++j) { + int16x8_t product = vmull_s8(inputVector[j * 2], row[j * 2]); + product = vmlal_s8(product, inputVector[j * 2 + 1], row[j * 2 + 1]); + sum = vpadalq_s16(sum, product); + } + output[i] = sum[0] + sum[1] + sum[2] + sum[3]; + +# else + std::int32_t sum = biases[i]; + for (IndexType j = 0; j < InputDimensions; ++j) { + sum += weights[offset + j] * input[j]; + } + output[i] = sum; +# endif + } + +# if defined(USE_MMX) + _mm_empty(); +# endif + } +#endif + + template + class AffineTransform; + +#if defined (USE_AVX512) + constexpr IndexType LargeInputSize = 2 * 64; +#else + constexpr IndexType LargeInputSize = std::numeric_limits::max(); +#endif + + // A specialization for large inputs. + template + class AffineTransform(InDims, MaxSimdWidth) >= LargeInputSize)>> { + public: + // Input/output type + using InputType = std::uint8_t; + using OutputType = std::int32_t; + + // Number of input/output dimensions + static constexpr IndexType InputDimensions = InDims; + static constexpr IndexType OutputDimensions = OutDims; + + static constexpr IndexType PaddedInputDimensions = + ceil_to_multiple(InputDimensions, MaxSimdWidth); + static constexpr IndexType PaddedOutputDimensions = + ceil_to_multiple(OutputDimensions, MaxSimdWidth); + + using OutputBuffer = OutputType[PaddedOutputDimensions]; + + static_assert(PaddedInputDimensions >= LargeInputSize, "Something went wrong. This specialization should not have been chosen."); + +#if defined (USE_AVX512) + static constexpr const IndexType InputSimdWidth = 64; + static constexpr const IndexType MaxNumOutputRegs = 16; +#elif defined (USE_AVX2) + static constexpr const IndexType InputSimdWidth = 32; + static constexpr const IndexType MaxNumOutputRegs = 8; +#elif defined (USE_SSSE3) + static constexpr const IndexType InputSimdWidth = 16; + static constexpr const IndexType MaxNumOutputRegs = 8; +#elif defined (USE_NEON) + static constexpr const IndexType InputSimdWidth = 8; + static constexpr const IndexType MaxNumOutputRegs = 8; +#else + // The fallback implementation will not have permuted weights. + // We define these to avoid a lot of ifdefs later. + static constexpr const IndexType InputSimdWidth = 1; + static constexpr const IndexType MaxNumOutputRegs = 1; +#endif + + // A big block is a region in the weight matrix of the size [PaddedInputDimensions, NumOutputRegs]. + // A small block is a region of size [InputSimdWidth, 1] + + static constexpr const IndexType NumOutputRegs = std::min(MaxNumOutputRegs, OutputDimensions); + static constexpr const IndexType SmallBlockSize = InputSimdWidth; + static constexpr const IndexType BigBlockSize = NumOutputRegs * PaddedInputDimensions; + static constexpr const IndexType NumSmallBlocksInBigBlock = BigBlockSize / SmallBlockSize; + static constexpr const IndexType NumSmallBlocksPerOutput = PaddedInputDimensions / SmallBlockSize; + static constexpr const IndexType NumBigBlocks = OutputDimensions / NumOutputRegs; + + static_assert(OutputDimensions % NumOutputRegs == 0); + + // Hash value embedded in the evaluation file + static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) { + std::uint32_t hashValue = 0xCC03DAE4u; + hashValue += OutputDimensions; + hashValue ^= prevHash >> 1; + hashValue ^= prevHash << 31; + return hashValue; + } + + /* + Transposes the small blocks within a block. + Effectively means that weights can be traversed sequentially during inference. + */ + static IndexType get_weight_index(IndexType i) + { + const IndexType smallBlock = (i / SmallBlockSize) % NumSmallBlocksInBigBlock; + const IndexType smallBlockCol = smallBlock / NumSmallBlocksPerOutput; + const IndexType smallBlockRow = smallBlock % NumSmallBlocksPerOutput; + const IndexType bigBlock = i / BigBlockSize; + const IndexType rest = i % SmallBlockSize; + + const IndexType idx = + bigBlock * BigBlockSize + + smallBlockRow * SmallBlockSize * NumOutputRegs + + smallBlockCol * SmallBlockSize + + rest; + + return idx; + } + + // Read network parameters + bool read_parameters(std::istream& stream) { + for (IndexType i = 0; i < OutputDimensions; ++i) + biases[i] = read_little_endian(stream); + + for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) + weights[get_weight_index(i)] = read_little_endian(stream); + + return !stream.fail(); + } + + // Write network parameters + bool write_parameters(std::ostream& stream) const { + for (IndexType i = 0; i < OutputDimensions; ++i) + write_little_endian(stream, biases[i]); + + for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) + write_little_endian(stream, weights[get_weight_index(i)]); + + return !stream.fail(); + } + + // Forward propagation + const OutputType* propagate( + const InputType* input, OutputType* output) const { + +#if defined (USE_AVX512) + using acc_vec_t = __m512i; + using bias_vec_t = __m128i; + using weight_vec_t = __m512i; + using in_vec_t = __m512i; + #define vec_zero _mm512_setzero_si512() + #define vec_add_dpbusd_32x2 Simd::m512_add_dpbusd_epi32x2 + #define vec_hadd Simd::m512_hadd + #define vec_haddx4 Simd::m512_haddx4 +#elif defined (USE_AVX2) + using acc_vec_t = __m256i; + using bias_vec_t = __m128i; + using weight_vec_t = __m256i; + using in_vec_t = __m256i; + #define vec_zero _mm256_setzero_si256() + #define vec_add_dpbusd_32x2 Simd::m256_add_dpbusd_epi32x2 + #define vec_hadd Simd::m256_hadd + #define vec_haddx4 Simd::m256_haddx4 +#elif defined (USE_SSSE3) + using acc_vec_t = __m128i; + using bias_vec_t = __m128i; + using weight_vec_t = __m128i; + using in_vec_t = __m128i; + #define vec_zero _mm_setzero_si128() + #define vec_add_dpbusd_32x2 Simd::m128_add_dpbusd_epi32x2 + #define vec_hadd Simd::m128_hadd + #define vec_haddx4 Simd::m128_haddx4 +#elif defined (USE_NEON) + using acc_vec_t = int32x4_t; + using bias_vec_t = int32x4_t; + using weight_vec_t = int8x8_t; + using in_vec_t = int8x8_t; + #define vec_zero {0} + #define vec_add_dpbusd_32x2 Simd::neon_m128_add_dpbusd_epi32x2 + #define vec_hadd Simd::neon_m128_hadd + #define vec_haddx4 Simd::neon_m128_haddx4 +#endif + +#if defined (USE_SSSE3) || defined (USE_NEON) + const in_vec_t* invec = reinterpret_cast(input); + + // Perform accumulation to registers for each big block + for (IndexType bigBlock = 0; bigBlock < NumBigBlocks; ++bigBlock) + { + acc_vec_t acc[NumOutputRegs] = { vec_zero }; + + // Each big block has NumOutputRegs small blocks in each "row", one per register. + // We process two small blocks at a time to save on one addition without VNNI. + for (IndexType smallBlock = 0; smallBlock < NumSmallBlocksPerOutput; smallBlock += 2) + { + const weight_vec_t* weightvec = + reinterpret_cast( + weights + + bigBlock * BigBlockSize + + smallBlock * SmallBlockSize * NumOutputRegs); + + const in_vec_t in0 = invec[smallBlock + 0]; + const in_vec_t in1 = invec[smallBlock + 1]; + + for (IndexType k = 0; k < NumOutputRegs; ++k) + vec_add_dpbusd_32x2(acc[k], in0, weightvec[k], in1, weightvec[k + NumOutputRegs]); + } + + // Horizontally add all accumulators. + if constexpr (NumOutputRegs % 4 == 0) + { + bias_vec_t* outputvec = reinterpret_cast(output); + const bias_vec_t* biasvec = reinterpret_cast(biases); + + for (IndexType k = 0; k < NumOutputRegs; k += 4) + { + const IndexType idx = (bigBlock * NumOutputRegs + k) / 4; + outputvec[idx] = vec_haddx4(acc[k+0], acc[k+1], acc[k+2], acc[k+3], biasvec[idx]); + } + } + else + { + for (IndexType k = 0; k < NumOutputRegs; ++k) + { + const IndexType idx = (bigBlock * NumOutputRegs + k); + output[idx] = vec_hadd(acc[k], biases[idx]); + } + } + } + +# undef vec_zero +# undef vec_add_dpbusd_32x2 +# undef vec_hadd +# undef vec_haddx4 +#else + // Use old implementation for the other architectures. + affine_transform_non_ssse3< + InputDimensions, + PaddedInputDimensions, + OutputDimensions>(output, weights, biases, input); + +#endif + + return output; + } + + private: + using BiasType = OutputType; + using WeightType = std::int8_t; + + alignas(CacheLineSize) BiasType biases[OutputDimensions]; + alignas(CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions]; + }; + + template + class AffineTransform(InDims, MaxSimdWidth) < LargeInputSize)>> { + public: + // Input/output type + // Input/output type + using InputType = std::uint8_t; + using OutputType = std::int32_t; + + // Number of input/output dimensions + static constexpr IndexType InputDimensions = InDims; + static constexpr IndexType OutputDimensions = OutDims; + + static constexpr IndexType PaddedInputDimensions = + ceil_to_multiple(InputDimensions, MaxSimdWidth); + static constexpr IndexType PaddedOutputDimensions = + ceil_to_multiple(OutputDimensions, MaxSimdWidth); + + using OutputBuffer = OutputType[PaddedOutputDimensions]; + + static_assert(PaddedInputDimensions < LargeInputSize, "Something went wrong. This specialization should not have been chosen."); + +#if defined (USE_SSSE3) + static constexpr const IndexType OutputSimdWidth = SimdWidth / 4; + static constexpr const IndexType InputSimdWidth = SimdWidth; +#endif + + // Hash value embedded in the evaluation file + static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) { + std::uint32_t hashValue = 0xCC03DAE4u; + hashValue += OutputDimensions; + hashValue ^= prevHash >> 1; + hashValue ^= prevHash << 31; + return hashValue; + } + + static IndexType get_weight_index_scrambled(IndexType i) + { + return + (i / 4) % (PaddedInputDimensions / 4) * OutputDimensions * 4 + + i / PaddedInputDimensions * 4 + + i % 4; + } + + static IndexType get_weight_index(IndexType i) + { +#if defined (USE_SSSE3) + return get_weight_index_scrambled(i); +#else + return i; +#endif + } + + // Read network parameters + bool read_parameters(std::istream& stream) { + for (IndexType i = 0; i < OutputDimensions; ++i) + biases[i] = read_little_endian(stream); + for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) + weights[get_weight_index(i)] = read_little_endian(stream); + + return !stream.fail(); + } + + // Write network parameters + bool write_parameters(std::ostream& stream) const { + for (IndexType i = 0; i < OutputDimensions; ++i) + write_little_endian(stream, biases[i]); + + for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) + write_little_endian(stream, weights[get_weight_index(i)]); + + return !stream.fail(); + } + // Forward propagation + const OutputType* propagate( + const InputType* input, OutputType* output) const { + +#if defined (USE_AVX2) + using vec_t = __m256i; + #define vec_setzero _mm256_setzero_si256 + #define vec_set_32 _mm256_set1_epi32 + #define vec_add_dpbusd_32 Simd::m256_add_dpbusd_epi32 + #define vec_add_dpbusd_32x2 Simd::m256_add_dpbusd_epi32x2 + #define vec_add_dpbusd_32x4 Simd::m256_add_dpbusd_epi32x4 + #define vec_hadd Simd::m256_hadd + #define vec_haddx4 Simd::m256_haddx4 +#elif defined (USE_SSSE3) + using vec_t = __m128i; + #define vec_setzero _mm_setzero_si128 + #define vec_set_32 _mm_set1_epi32 + #define vec_add_dpbusd_32 Simd::m128_add_dpbusd_epi32 + #define vec_add_dpbusd_32x2 Simd::m128_add_dpbusd_epi32x2 + #define vec_add_dpbusd_32x4 Simd::m128_add_dpbusd_epi32x4 + #define vec_hadd Simd::m128_hadd + #define vec_haddx4 Simd::m128_haddx4 +#endif + +#if defined (USE_SSSE3) + const auto inputVector = reinterpret_cast(input); + + static_assert(OutputDimensions % OutputSimdWidth == 0 || OutputDimensions == 1); + + if constexpr (OutputDimensions % OutputSimdWidth == 0) + { + constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 8) / 4; + constexpr IndexType NumRegs = OutputDimensions / OutputSimdWidth; + + const auto input32 = reinterpret_cast(input); + const vec_t* biasvec = reinterpret_cast(biases); + vec_t acc[NumRegs]; + for (IndexType k = 0; k < NumRegs; ++k) + acc[k] = biasvec[k]; + + for (IndexType i = 0; i < NumChunks; i += 2) + { + const vec_t in0 = vec_set_32(input32[i + 0]); + const vec_t in1 = vec_set_32(input32[i + 1]); + const auto col0 = reinterpret_cast(&weights[(i + 0) * OutputDimensions * 4]); + const auto col1 = reinterpret_cast(&weights[(i + 1) * OutputDimensions * 4]); + for (IndexType k = 0; k < NumRegs; ++k) + vec_add_dpbusd_32x2(acc[k], in0, col0[k], in1, col1[k]); + } + + vec_t* outptr = reinterpret_cast(output); + for (IndexType k = 0; k < NumRegs; ++k) + outptr[k] = acc[k]; + } + else if constexpr (OutputDimensions == 1) + { + constexpr IndexType NumChunks = PaddedInputDimensions / SimdWidth; + vec_t sum0 = vec_setzero(); + const auto row0 = reinterpret_cast(&weights[0]); + + for (int j = 0; j < (int)NumChunks; ++j) + { + const vec_t in = inputVector[j]; + vec_add_dpbusd_32(sum0, in, row0[j]); + } + output[0] = vec_hadd(sum0, biases[0]); + } + +# undef vec_setzero +# undef vec_set_32 +# undef vec_add_dpbusd_32 +# undef vec_add_dpbusd_32x2 +# undef vec_add_dpbusd_32x4 +# undef vec_hadd +# undef vec_haddx4 +#else + // Use old implementation for the other architectures. + affine_transform_non_ssse3< + InputDimensions, + PaddedInputDimensions, + OutputDimensions>(output, weights, biases, input); +#endif + + return output; + } + + private: + using BiasType = OutputType; + using WeightType = std::int8_t; + + alignas(CacheLineSize) BiasType biases[OutputDimensions]; + alignas(CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions]; + }; + +} // namespace Stockfish::Eval::NNUE::Layers + +#endif // #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED diff --git a/src/nnue/layers/clipped_relu.h b/src/nnue/layers/clipped_relu.h new file mode 100644 index 0000000000000000000000000000000000000000..f94d30828d73ad7ce71b56be3c475071b92de257 --- /dev/null +++ b/src/nnue/layers/clipped_relu.h @@ -0,0 +1,180 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +// Definition of layer ClippedReLU of NNUE evaluation function + +#ifndef NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED +#define NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED + +#include "../nnue_common.h" + +namespace Stockfish::Eval::NNUE::Layers { + + // Clipped ReLU + template + class ClippedReLU { + public: + // Input/output type + using InputType = std::int32_t; + using OutputType = std::uint8_t; + + // Number of input/output dimensions + static constexpr IndexType InputDimensions = InDims; + static constexpr IndexType OutputDimensions = InputDimensions; + static constexpr IndexType PaddedOutputDimensions = + ceil_to_multiple(OutputDimensions, 32); + + using OutputBuffer = OutputType[PaddedOutputDimensions]; + + // Hash value embedded in the evaluation file + static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) { + std::uint32_t hashValue = 0x538D24C7u; + hashValue += prevHash; + return hashValue; + } + + // Read network parameters + bool read_parameters(std::istream&) { + return true; + } + + // Write network parameters + bool write_parameters(std::ostream&) const { + return true; + } + + // Forward propagation + const OutputType* propagate( + const InputType* input, OutputType* output) const { + + #if defined(USE_AVX2) + if constexpr (InputDimensions % SimdWidth == 0) { + constexpr IndexType NumChunks = InputDimensions / SimdWidth; + const __m256i Zero = _mm256_setzero_si256(); + const __m256i Offsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0); + const auto in = reinterpret_cast(input); + const auto out = reinterpret_cast<__m256i*>(output); + for (IndexType i = 0; i < NumChunks; ++i) { + const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32( + _mm256_load_si256(&in[i * 4 + 0]), + _mm256_load_si256(&in[i * 4 + 1])), WeightScaleBits); + const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32( + _mm256_load_si256(&in[i * 4 + 2]), + _mm256_load_si256(&in[i * 4 + 3])), WeightScaleBits); + _mm256_store_si256(&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8( + _mm256_packs_epi16(words0, words1), Zero), Offsets)); + } + } else { + constexpr IndexType NumChunks = InputDimensions / (SimdWidth / 2); + const __m128i Zero = _mm_setzero_si128(); + const auto in = reinterpret_cast(input); + const auto out = reinterpret_cast<__m128i*>(output); + for (IndexType i = 0; i < NumChunks; ++i) { + const __m128i words0 = _mm_srai_epi16(_mm_packs_epi32( + _mm_load_si128(&in[i * 4 + 0]), + _mm_load_si128(&in[i * 4 + 1])), WeightScaleBits); + const __m128i words1 = _mm_srai_epi16(_mm_packs_epi32( + _mm_load_si128(&in[i * 4 + 2]), + _mm_load_si128(&in[i * 4 + 3])), WeightScaleBits); + const __m128i packedbytes = _mm_packs_epi16(words0, words1); + _mm_store_si128(&out[i], _mm_max_epi8(packedbytes, Zero)); + } + } + constexpr IndexType Start = + InputDimensions % SimdWidth == 0 + ? InputDimensions / SimdWidth * SimdWidth + : InputDimensions / (SimdWidth / 2) * (SimdWidth / 2); + + #elif defined(USE_SSE2) + constexpr IndexType NumChunks = InputDimensions / SimdWidth; + + #ifdef USE_SSE41 + const __m128i Zero = _mm_setzero_si128(); + #else + const __m128i k0x80s = _mm_set1_epi8(-128); + #endif + + const auto in = reinterpret_cast(input); + const auto out = reinterpret_cast<__m128i*>(output); + for (IndexType i = 0; i < NumChunks; ++i) { + const __m128i words0 = _mm_srai_epi16(_mm_packs_epi32( + _mm_load_si128(&in[i * 4 + 0]), + _mm_load_si128(&in[i * 4 + 1])), WeightScaleBits); + const __m128i words1 = _mm_srai_epi16(_mm_packs_epi32( + _mm_load_si128(&in[i * 4 + 2]), + _mm_load_si128(&in[i * 4 + 3])), WeightScaleBits); + const __m128i packedbytes = _mm_packs_epi16(words0, words1); + _mm_store_si128(&out[i], + + #ifdef USE_SSE41 + _mm_max_epi8(packedbytes, Zero) + #else + _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s) + #endif + + ); + } + constexpr IndexType Start = NumChunks * SimdWidth; + + #elif defined(USE_MMX) + constexpr IndexType NumChunks = InputDimensions / SimdWidth; + const __m64 k0x80s = _mm_set1_pi8(-128); + const auto in = reinterpret_cast(input); + const auto out = reinterpret_cast<__m64*>(output); + for (IndexType i = 0; i < NumChunks; ++i) { + const __m64 words0 = _mm_srai_pi16( + _mm_packs_pi32(in[i * 4 + 0], in[i * 4 + 1]), + WeightScaleBits); + const __m64 words1 = _mm_srai_pi16( + _mm_packs_pi32(in[i * 4 + 2], in[i * 4 + 3]), + WeightScaleBits); + const __m64 packedbytes = _mm_packs_pi16(words0, words1); + out[i] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s); + } + _mm_empty(); + constexpr IndexType Start = NumChunks * SimdWidth; + + #elif defined(USE_NEON) + constexpr IndexType NumChunks = InputDimensions / (SimdWidth / 2); + const int8x8_t Zero = {0}; + const auto in = reinterpret_cast(input); + const auto out = reinterpret_cast(output); + for (IndexType i = 0; i < NumChunks; ++i) { + int16x8_t shifted; + const auto pack = reinterpret_cast(&shifted); + pack[0] = vqshrn_n_s32(in[i * 2 + 0], WeightScaleBits); + pack[1] = vqshrn_n_s32(in[i * 2 + 1], WeightScaleBits); + out[i] = vmax_s8(vqmovn_s16(shifted), Zero); + } + constexpr IndexType Start = NumChunks * (SimdWidth / 2); + #else + constexpr IndexType Start = 0; + #endif + + for (IndexType i = Start; i < InputDimensions; ++i) { + output[i] = static_cast( + std::max(0, std::min(127, input[i] >> WeightScaleBits))); + } + + return output; + } + }; + +} // namespace Stockfish::Eval::NNUE::Layers + +#endif // NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED diff --git a/src/nnue/layers/simd.h b/src/nnue/layers/simd.h new file mode 100644 index 0000000000000000000000000000000000000000..7b9e8fb23b7754d470ed8a34b2103186ae1fa818 --- /dev/null +++ b/src/nnue/layers/simd.h @@ -0,0 +1,387 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef STOCKFISH_SIMD_H_INCLUDED +#define STOCKFISH_SIMD_H_INCLUDED + +#if defined(USE_AVX2) +# include + +#elif defined(USE_SSE41) +# include + +#elif defined(USE_SSSE3) +# include + +#elif defined(USE_SSE2) +# include + +#elif defined(USE_MMX) +# include + +#elif defined(USE_NEON) +# include +#endif + +// The inline asm is only safe for GCC, where it is necessary to get good codegen. +// See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101693 +// Clang does fine without it. +// Play around here: https://godbolt.org/z/7EWqrYq51 +#if (defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)) +#define USE_INLINE_ASM +#endif + +// Use either the AVX512 or AVX-VNNI version of the VNNI instructions. +#if defined(USE_AVXVNNI) +#define VNNI_PREFIX "%{vex%} " +#else +#define VNNI_PREFIX "" +#endif + +namespace Stockfish::Simd { + +#if defined (USE_AVX512) + + [[maybe_unused]] static int m512_hadd(__m512i sum, int bias) { + return _mm512_reduce_add_epi32(sum) + bias; + } + + /* + Parameters: + sum0 = [zmm0.i128[0], zmm0.i128[1], zmm0.i128[2], zmm0.i128[3]] + sum1 = [zmm1.i128[0], zmm1.i128[1], zmm1.i128[2], zmm1.i128[3]] + sum2 = [zmm2.i128[0], zmm2.i128[1], zmm2.i128[2], zmm2.i128[3]] + sum3 = [zmm3.i128[0], zmm3.i128[1], zmm3.i128[2], zmm3.i128[3]] + + Returns: + ret = [ + reduce_add_epi32(zmm0.i128[0]), reduce_add_epi32(zmm1.i128[0]), reduce_add_epi32(zmm2.i128[0]), reduce_add_epi32(zmm3.i128[0]), + reduce_add_epi32(zmm0.i128[1]), reduce_add_epi32(zmm1.i128[1]), reduce_add_epi32(zmm2.i128[1]), reduce_add_epi32(zmm3.i128[1]), + reduce_add_epi32(zmm0.i128[2]), reduce_add_epi32(zmm1.i128[2]), reduce_add_epi32(zmm2.i128[2]), reduce_add_epi32(zmm3.i128[2]), + reduce_add_epi32(zmm0.i128[3]), reduce_add_epi32(zmm1.i128[3]), reduce_add_epi32(zmm2.i128[3]), reduce_add_epi32(zmm3.i128[3]) + ] + */ + [[maybe_unused]] static __m512i m512_hadd128x16_interleave( + __m512i sum0, __m512i sum1, __m512i sum2, __m512i sum3) { + + __m512i sum01a = _mm512_unpacklo_epi32(sum0, sum1); + __m512i sum01b = _mm512_unpackhi_epi32(sum0, sum1); + + __m512i sum23a = _mm512_unpacklo_epi32(sum2, sum3); + __m512i sum23b = _mm512_unpackhi_epi32(sum2, sum3); + + __m512i sum01 = _mm512_add_epi32(sum01a, sum01b); + __m512i sum23 = _mm512_add_epi32(sum23a, sum23b); + + __m512i sum0123a = _mm512_unpacklo_epi64(sum01, sum23); + __m512i sum0123b = _mm512_unpackhi_epi64(sum01, sum23); + + return _mm512_add_epi32(sum0123a, sum0123b); + } + + [[maybe_unused]] static __m128i m512_haddx4( + __m512i sum0, __m512i sum1, __m512i sum2, __m512i sum3, + __m128i bias) { + + __m512i sum = m512_hadd128x16_interleave(sum0, sum1, sum2, sum3); + + __m256i sum256lo = _mm512_castsi512_si256(sum); + __m256i sum256hi = _mm512_extracti64x4_epi64(sum, 1); + + sum256lo = _mm256_add_epi32(sum256lo, sum256hi); + + __m128i sum128lo = _mm256_castsi256_si128(sum256lo); + __m128i sum128hi = _mm256_extracti128_si256(sum256lo, 1); + + return _mm_add_epi32(_mm_add_epi32(sum128lo, sum128hi), bias); + } + + [[maybe_unused]] static void m512_add_dpbusd_epi32( + __m512i& acc, + __m512i a, + __m512i b) { + +# if defined (USE_VNNI) +# if defined (USE_INLINE_ASM) + asm( + "vpdpbusd %[b], %[a], %[acc]\n\t" + : [acc]"+v"(acc) + : [a]"v"(a), [b]"vm"(b) + ); +# else + acc = _mm512_dpbusd_epi32(acc, a, b); +# endif +# else +# if defined (USE_INLINE_ASM) + __m512i tmp = _mm512_maddubs_epi16(a, b); + asm( + "vpmaddwd %[tmp], %[ones], %[tmp]\n\t" + "vpaddd %[acc], %[tmp], %[acc]\n\t" + : [acc]"+v"(acc), [tmp]"+&v"(tmp) + : [ones]"v"(_mm512_set1_epi16(1)) + ); +# else + __m512i product0 = _mm512_maddubs_epi16(a, b); + product0 = _mm512_madd_epi16(product0, _mm512_set1_epi16(1)); + acc = _mm512_add_epi32(acc, product0); +# endif +# endif + } + + [[maybe_unused]] static void m512_add_dpbusd_epi32x2( + __m512i& acc, + __m512i a0, __m512i b0, + __m512i a1, __m512i b1) { + +# if defined (USE_VNNI) +# if defined (USE_INLINE_ASM) + asm( + "vpdpbusd %[b0], %[a0], %[acc]\n\t" + "vpdpbusd %[b1], %[a1], %[acc]\n\t" + : [acc]"+v"(acc) + : [a0]"v"(a0), [b0]"vm"(b0), [a1]"v"(a1), [b1]"vm"(b1) + ); +# else + acc = _mm512_dpbusd_epi32(acc, a0, b0); + acc = _mm512_dpbusd_epi32(acc, a1, b1); +# endif +# else +# if defined (USE_INLINE_ASM) + __m512i tmp0 = _mm512_maddubs_epi16(a0, b0); + __m512i tmp1 = _mm512_maddubs_epi16(a1, b1); + asm( + "vpaddsw %[tmp0], %[tmp1], %[tmp0]\n\t" + "vpmaddwd %[tmp0], %[ones], %[tmp0]\n\t" + "vpaddd %[acc], %[tmp0], %[acc]\n\t" + : [acc]"+v"(acc), [tmp0]"+&v"(tmp0) + : [tmp1]"v"(tmp1), [ones]"v"(_mm512_set1_epi16(1)) + ); +# else + __m512i product0 = _mm512_maddubs_epi16(a0, b0); + __m512i product1 = _mm512_maddubs_epi16(a1, b1); + product0 = _mm512_adds_epi16(product0, product1); + product0 = _mm512_madd_epi16(product0, _mm512_set1_epi16(1)); + acc = _mm512_add_epi32(acc, product0); +# endif +# endif + } + +#endif + +#if defined (USE_AVX2) + + [[maybe_unused]] static int m256_hadd(__m256i sum, int bias) { + __m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum, 1)); + sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_BADC)); + sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_CDAB)); + return _mm_cvtsi128_si32(sum128) + bias; + } + + [[maybe_unused]] static __m128i m256_haddx4( + __m256i sum0, __m256i sum1, __m256i sum2, __m256i sum3, + __m128i bias) { + + sum0 = _mm256_hadd_epi32(sum0, sum1); + sum2 = _mm256_hadd_epi32(sum2, sum3); + + sum0 = _mm256_hadd_epi32(sum0, sum2); + + __m128i sum128lo = _mm256_castsi256_si128(sum0); + __m128i sum128hi = _mm256_extracti128_si256(sum0, 1); + + return _mm_add_epi32(_mm_add_epi32(sum128lo, sum128hi), bias); + } + + [[maybe_unused]] static void m256_add_dpbusd_epi32( + __m256i& acc, + __m256i a, + __m256i b) { + +# if defined (USE_VNNI) +# if defined (USE_INLINE_ASM) + asm( + VNNI_PREFIX "vpdpbusd %[b], %[a], %[acc]\n\t" + : [acc]"+v"(acc) + : [a]"v"(a), [b]"vm"(b) + ); +# else + acc = _mm256_dpbusd_epi32(acc, a, b); +# endif +# else +# if defined (USE_INLINE_ASM) + __m256i tmp = _mm256_maddubs_epi16(a, b); + asm( + "vpmaddwd %[tmp], %[ones], %[tmp]\n\t" + "vpaddd %[acc], %[tmp], %[acc]\n\t" + : [acc]"+v"(acc), [tmp]"+&v"(tmp) + : [ones]"v"(_mm256_set1_epi16(1)) + ); +# else + __m256i product0 = _mm256_maddubs_epi16(a, b); + product0 = _mm256_madd_epi16(product0, _mm256_set1_epi16(1)); + acc = _mm256_add_epi32(acc, product0); +# endif +# endif + } + + [[maybe_unused]] static void m256_add_dpbusd_epi32x2( + __m256i& acc, + __m256i a0, __m256i b0, + __m256i a1, __m256i b1) { + +# if defined (USE_VNNI) +# if defined (USE_INLINE_ASM) + asm( + VNNI_PREFIX "vpdpbusd %[b0], %[a0], %[acc]\n\t" + VNNI_PREFIX "vpdpbusd %[b1], %[a1], %[acc]\n\t" + : [acc]"+v"(acc) + : [a0]"v"(a0), [b0]"vm"(b0), [a1]"v"(a1), [b1]"vm"(b1) + ); +# else + acc = _mm256_dpbusd_epi32(acc, a0, b0); + acc = _mm256_dpbusd_epi32(acc, a1, b1); +# endif +# else +# if defined (USE_INLINE_ASM) + __m256i tmp0 = _mm256_maddubs_epi16(a0, b0); + __m256i tmp1 = _mm256_maddubs_epi16(a1, b1); + asm( + "vpaddsw %[tmp0], %[tmp1], %[tmp0]\n\t" + "vpmaddwd %[tmp0], %[ones], %[tmp0]\n\t" + "vpaddd %[acc], %[tmp0], %[acc]\n\t" + : [acc]"+v"(acc), [tmp0]"+&v"(tmp0) + : [tmp1]"v"(tmp1), [ones]"v"(_mm256_set1_epi16(1)) + ); +# else + __m256i product0 = _mm256_maddubs_epi16(a0, b0); + __m256i product1 = _mm256_maddubs_epi16(a1, b1); + product0 = _mm256_adds_epi16(product0, product1); + product0 = _mm256_madd_epi16(product0, _mm256_set1_epi16(1)); + acc = _mm256_add_epi32(acc, product0); +# endif +# endif + } + +#endif + +#if defined (USE_SSSE3) + + [[maybe_unused]] static int m128_hadd(__m128i sum, int bias) { + sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0x4E)); //_MM_PERM_BADC + sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1)); //_MM_PERM_CDAB + return _mm_cvtsi128_si32(sum) + bias; + } + + [[maybe_unused]] static __m128i m128_haddx4( + __m128i sum0, __m128i sum1, __m128i sum2, __m128i sum3, + __m128i bias) { + + sum0 = _mm_hadd_epi32(sum0, sum1); + sum2 = _mm_hadd_epi32(sum2, sum3); + sum0 = _mm_hadd_epi32(sum0, sum2); + return _mm_add_epi32(sum0, bias); + } + + [[maybe_unused]] static void m128_add_dpbusd_epi32( + __m128i& acc, + __m128i a, + __m128i b) { + +# if defined (USE_INLINE_ASM) + __m128i tmp = _mm_maddubs_epi16(a, b); + asm( + "pmaddwd %[ones], %[tmp]\n\t" + "paddd %[tmp], %[acc]\n\t" + : [acc]"+v"(acc), [tmp]"+&v"(tmp) + : [ones]"v"(_mm_set1_epi16(1)) + ); +# else + __m128i product0 = _mm_maddubs_epi16(a, b); + product0 = _mm_madd_epi16(product0, _mm_set1_epi16(1)); + acc = _mm_add_epi32(acc, product0); +# endif + } + + [[maybe_unused]] static void m128_add_dpbusd_epi32x2( + __m128i& acc, + __m128i a0, __m128i b0, + __m128i a1, __m128i b1) { + +# if defined (USE_INLINE_ASM) + __m128i tmp0 = _mm_maddubs_epi16(a0, b0); + __m128i tmp1 = _mm_maddubs_epi16(a1, b1); + asm( + "paddsw %[tmp1], %[tmp0]\n\t" + "pmaddwd %[ones], %[tmp0]\n\t" + "paddd %[tmp0], %[acc]\n\t" + : [acc]"+v"(acc), [tmp0]"+&v"(tmp0) + : [tmp1]"v"(tmp1), [ones]"v"(_mm_set1_epi16(1)) + ); +# else + __m128i product0 = _mm_maddubs_epi16(a0, b0); + __m128i product1 = _mm_maddubs_epi16(a1, b1); + product0 = _mm_adds_epi16(product0, product1); + product0 = _mm_madd_epi16(product0, _mm_set1_epi16(1)); + acc = _mm_add_epi32(acc, product0); +# endif + } + +#endif + +#if defined (USE_NEON) + + [[maybe_unused]] static int neon_m128_reduce_add_epi32(int32x4_t s) { +# if USE_NEON >= 8 + return vaddvq_s32(s); +# else + return s[0] + s[1] + s[2] + s[3]; +# endif + } + + [[maybe_unused]] static int neon_m128_hadd(int32x4_t sum, int bias) { + return neon_m128_reduce_add_epi32(sum) + bias; + } + + [[maybe_unused]] static int32x4_t neon_m128_haddx4( + int32x4_t sum0, int32x4_t sum1, int32x4_t sum2, int32x4_t sum3, + int32x4_t bias) { + + int32x4_t hsums { + neon_m128_reduce_add_epi32(sum0), + neon_m128_reduce_add_epi32(sum1), + neon_m128_reduce_add_epi32(sum2), + neon_m128_reduce_add_epi32(sum3) + }; + return vaddq_s32(hsums, bias); + } + + [[maybe_unused]] static void neon_m128_add_dpbusd_epi32x2( + int32x4_t& acc, + int8x8_t a0, int8x8_t b0, + int8x8_t a1, int8x8_t b1) { + + int16x8_t product = vmull_s8(a0, b0); + product = vmlal_s8(product, a1, b1); + acc = vpadalq_s16(acc, product); + } + +#endif + +} + +#endif // STOCKFISH_SIMD_H_INCLUDED diff --git a/src/nnue/layers/sqr_clipped_relu.h b/src/nnue/layers/sqr_clipped_relu.h new file mode 100644 index 0000000000000000000000000000000000000000..b603a277fa2536770fcbe65400643afc4e0763f8 --- /dev/null +++ b/src/nnue/layers/sqr_clipped_relu.h @@ -0,0 +1,120 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +// Definition of layer ClippedReLU of NNUE evaluation function + +#ifndef NNUE_LAYERS_SQR_CLIPPED_RELU_H_INCLUDED +#define NNUE_LAYERS_SQR_CLIPPED_RELU_H_INCLUDED + +#include "../nnue_common.h" + +namespace Stockfish::Eval::NNUE::Layers { + + // Clipped ReLU + template + class SqrClippedReLU { + public: + // Input/output type + using InputType = std::int32_t; + using OutputType = std::uint8_t; + + // Number of input/output dimensions + static constexpr IndexType InputDimensions = InDims; + static constexpr IndexType OutputDimensions = InputDimensions; + static constexpr IndexType PaddedOutputDimensions = + ceil_to_multiple(OutputDimensions, 32); + + using OutputBuffer = OutputType[PaddedOutputDimensions]; + + // Hash value embedded in the evaluation file + static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) { + std::uint32_t hashValue = 0x538D24C7u; + hashValue += prevHash; + return hashValue; + } + + // Read network parameters + bool read_parameters(std::istream&) { + return true; + } + + // Write network parameters + bool write_parameters(std::ostream&) const { + return true; + } + + // Forward propagation + const OutputType* propagate( + const InputType* input, OutputType* output) const { + + #if defined(USE_SSE2) + constexpr IndexType NumChunks = InputDimensions / 16; + + #ifdef USE_SSE41 + const __m128i Zero = _mm_setzero_si128(); + #else + const __m128i k0x80s = _mm_set1_epi8(-128); + #endif + + static_assert(WeightScaleBits == 6); + const auto in = reinterpret_cast(input); + const auto out = reinterpret_cast<__m128i*>(output); + for (IndexType i = 0; i < NumChunks; ++i) { + __m128i words0 = _mm_packs_epi32( + _mm_load_si128(&in[i * 4 + 0]), + _mm_load_si128(&in[i * 4 + 1])); + __m128i words1 = _mm_packs_epi32( + _mm_load_si128(&in[i * 4 + 2]), + _mm_load_si128(&in[i * 4 + 3])); + + // Not sure if + words0 = _mm_srli_epi16(_mm_mulhi_epi16(words0, words0), 3); + words1 = _mm_srli_epi16(_mm_mulhi_epi16(words1, words1), 3); + + const __m128i packedbytes = _mm_packs_epi16(words0, words1); + + _mm_store_si128(&out[i], + + #ifdef USE_SSE41 + _mm_max_epi8(packedbytes, Zero) + #else + _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s) + #endif + + ); + } + constexpr IndexType Start = NumChunks * 16; + + #else + constexpr IndexType Start = 0; + #endif + + for (IndexType i = Start; i < InputDimensions; ++i) { + output[i] = static_cast( + // realy should be /127 but we need to make it fast + // needs to be accounted for in the trainer + std::max(0ll, std::min(127ll, (((long long)input[i] * input[i]) >> (2 * WeightScaleBits)) / 128))); + } + + return output; + } + }; + +} // namespace Stockfish::Eval::NNUE::Layers + +#endif // NNUE_LAYERS_SQR_CLIPPED_RELU_H_INCLUDED diff --git a/src/nnue/nnue_accumulator.h b/src/nnue/nnue_accumulator.h new file mode 100644 index 0000000000000000000000000000000000000000..600483b5cfdc2c0875280d4db9df706562077e69 --- /dev/null +++ b/src/nnue/nnue_accumulator.h @@ -0,0 +1,37 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +// Class for difference calculation of NNUE evaluation function + +#ifndef NNUE_ACCUMULATOR_H_INCLUDED +#define NNUE_ACCUMULATOR_H_INCLUDED + +#include "nnue_architecture.h" + +namespace Stockfish::Eval::NNUE { + + // Class that holds the result of affine transformation of input features + struct alignas(CacheLineSize) Accumulator { + std::int16_t accumulation[2][TransformedFeatureDimensions]; + std::int32_t psqtAccumulation[2][PSQTBuckets]; + bool computed[2]; + }; + +} // namespace Stockfish::Eval::NNUE + +#endif // NNUE_ACCUMULATOR_H_INCLUDED diff --git a/src/nnue/nnue_architecture.h b/src/nnue/nnue_architecture.h new file mode 100644 index 0000000000000000000000000000000000000000..cac8373075e368b37e3c72493490c7e23920afcc --- /dev/null +++ b/src/nnue/nnue_architecture.h @@ -0,0 +1,138 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +// Input features and network structure used in NNUE evaluation function + +#ifndef NNUE_ARCHITECTURE_H_INCLUDED +#define NNUE_ARCHITECTURE_H_INCLUDED + +#include + +#include "nnue_common.h" + +#include "features/half_ka_v2_hm.h" + +#include "layers/affine_transform.h" +#include "layers/clipped_relu.h" +#include "layers/sqr_clipped_relu.h" + +#include "../misc.h" + +namespace Stockfish::Eval::NNUE { + +// Input features used in evaluation function +using FeatureSet = Features::HalfKAv2_hm; + +// Number of input feature dimensions after conversion +constexpr IndexType TransformedFeatureDimensions = 1024; +constexpr IndexType PSQTBuckets = 8; +constexpr IndexType LayerStacks = 8; + +struct Network +{ + static constexpr int FC_0_OUTPUTS = 15; + static constexpr int FC_1_OUTPUTS = 32; + + Layers::AffineTransform fc_0; + Layers::SqrClippedReLU ac_sqr_0; + Layers::ClippedReLU ac_0; + Layers::AffineTransform fc_1; + Layers::ClippedReLU ac_1; + Layers::AffineTransform fc_2; + + // Hash value embedded in the evaluation file + static constexpr std::uint32_t get_hash_value() { + // input slice hash + std::uint32_t hashValue = 0xEC42E90Du; + hashValue ^= TransformedFeatureDimensions * 2; + + hashValue = decltype(fc_0)::get_hash_value(hashValue); + hashValue = decltype(ac_0)::get_hash_value(hashValue); + hashValue = decltype(fc_1)::get_hash_value(hashValue); + hashValue = decltype(ac_1)::get_hash_value(hashValue); + hashValue = decltype(fc_2)::get_hash_value(hashValue); + + return hashValue; + } + + // Read network parameters + bool read_parameters(std::istream& stream) { + if (!fc_0.read_parameters(stream)) return false; + if (!ac_0.read_parameters(stream)) return false; + if (!fc_1.read_parameters(stream)) return false; + if (!ac_1.read_parameters(stream)) return false; + if (!fc_2.read_parameters(stream)) return false; + return true; + } + + // Read network parameters + bool write_parameters(std::ostream& stream) const { + if (!fc_0.write_parameters(stream)) return false; + if (!ac_0.write_parameters(stream)) return false; + if (!fc_1.write_parameters(stream)) return false; + if (!ac_1.write_parameters(stream)) return false; + if (!fc_2.write_parameters(stream)) return false; + return true; + } + + std::int32_t propagate(const TransformedFeatureType* transformedFeatures) + { + struct alignas(CacheLineSize) Buffer + { + alignas(CacheLineSize) decltype(fc_0)::OutputBuffer fc_0_out; + alignas(CacheLineSize) decltype(ac_sqr_0)::OutputType ac_sqr_0_out[ceil_to_multiple(FC_0_OUTPUTS * 2, 32)]; + alignas(CacheLineSize) decltype(ac_0)::OutputBuffer ac_0_out; + alignas(CacheLineSize) decltype(fc_1)::OutputBuffer fc_1_out; + alignas(CacheLineSize) decltype(ac_1)::OutputBuffer ac_1_out; + alignas(CacheLineSize) decltype(fc_2)::OutputBuffer fc_2_out; + + Buffer() + { + std::memset(this, 0, sizeof(*this)); + } + }; + +#if defined(__clang__) && (__APPLE__) + // workaround for a bug reported with xcode 12 + static thread_local auto tlsBuffer = std::make_unique(); + // Access TLS only once, cache result. + Buffer& buffer = *tlsBuffer; +#else + alignas(CacheLineSize) static thread_local Buffer buffer; +#endif + + fc_0.propagate(transformedFeatures, buffer.fc_0_out); + ac_sqr_0.propagate(buffer.fc_0_out, buffer.ac_sqr_0_out); + ac_0.propagate(buffer.fc_0_out, buffer.ac_0_out); + std::memcpy(buffer.ac_sqr_0_out + FC_0_OUTPUTS, buffer.ac_0_out, FC_0_OUTPUTS * sizeof(decltype(ac_0)::OutputType)); + fc_1.propagate(buffer.ac_sqr_0_out, buffer.fc_1_out); + ac_1.propagate(buffer.fc_1_out, buffer.ac_1_out); + fc_2.propagate(buffer.ac_1_out, buffer.fc_2_out); + + // buffer.fc_0_out[FC_0_OUTPUTS] is such that 1.0 is equal to 127*(1<. +*/ + +// Constants used in NNUE evaluation function + +#ifndef NNUE_COMMON_H_INCLUDED +#define NNUE_COMMON_H_INCLUDED + +#include +#include + +#include "../misc.h" // for IsLittleEndian + +#if defined(USE_AVX2) +#include + +#elif defined(USE_SSE41) +#include + +#elif defined(USE_SSSE3) +#include + +#elif defined(USE_SSE2) +#include + +#elif defined(USE_MMX) +#include + +#elif defined(USE_NEON) +#include +#endif + +namespace Stockfish::Eval::NNUE { + + // Version of the evaluation file + constexpr std::uint32_t Version = 0x7AF32F20u; + + // Constant used in evaluation value calculation + constexpr int OutputScale = 16; + constexpr int WeightScaleBits = 6; + + // Size of cache line (in bytes) + constexpr std::size_t CacheLineSize = 64; + + // SIMD width (in bytes) + #if defined(USE_AVX2) + constexpr std::size_t SimdWidth = 32; + + #elif defined(USE_SSE2) + constexpr std::size_t SimdWidth = 16; + + #elif defined(USE_MMX) + constexpr std::size_t SimdWidth = 8; + + #elif defined(USE_NEON) + constexpr std::size_t SimdWidth = 16; + #endif + + constexpr std::size_t MaxSimdWidth = 32; + + // Type of input feature after conversion + using TransformedFeatureType = std::uint8_t; + using IndexType = std::uint32_t; + + // Round n up to be a multiple of base + template + constexpr IntType ceil_to_multiple(IntType n, IntType base) { + return (n + base - 1) / base * base; + } + + // read_little_endian() is our utility to read an integer (signed or unsigned, any size) + // from a stream in little-endian order. We swap the byte order after the read if + // necessary to return a result with the byte ordering of the compiling machine. + template + inline IntType read_little_endian(std::istream& stream) { + IntType result; + + if (IsLittleEndian) + stream.read(reinterpret_cast(&result), sizeof(IntType)); + else + { + std::uint8_t u[sizeof(IntType)]; + typename std::make_unsigned::type v = 0; + + stream.read(reinterpret_cast(u), sizeof(IntType)); + for (std::size_t i = 0; i < sizeof(IntType); ++i) + v = (v << 8) | u[sizeof(IntType) - i - 1]; + + std::memcpy(&result, &v, sizeof(IntType)); + } + + return result; + } + + // write_little_endian() is our utility to write an integer (signed or unsigned, any size) + // to a stream in little-endian order. We swap the byte order before the write if + // necessary to always write in little endian order, independently of the byte + // ordering of the compiling machine. + template + inline void write_little_endian(std::ostream& stream, IntType value) { + + if (IsLittleEndian) + stream.write(reinterpret_cast(&value), sizeof(IntType)); + else + { + std::uint8_t u[sizeof(IntType)]; + typename std::make_unsigned::type v = value; + + std::size_t i = 0; + // if constexpr to silence the warning about shift by 8 + if constexpr (sizeof(IntType) > 1) + { + for (; i + 1 < sizeof(IntType); ++i) + { + u[i] = (std::uint8_t)v; + v >>= 8; + } + } + u[i] = (std::uint8_t)v; + + stream.write(reinterpret_cast(u), sizeof(IntType)); + } + } + + // read_little_endian(s, out, N) : read integers in bulk from a little indian stream. + // This reads N integers from stream s and put them in array out. + template + inline void read_little_endian(std::istream& stream, IntType* out, std::size_t count) { + if (IsLittleEndian) + stream.read(reinterpret_cast(out), sizeof(IntType) * count); + else + for (std::size_t i = 0; i < count; ++i) + out[i] = read_little_endian(stream); + } + + // write_little_endian(s, values, N) : write integers in bulk to a little indian stream. + // This takes N integers from array values and writes them on stream s. + template + inline void write_little_endian(std::ostream& stream, const IntType* values, std::size_t count) { + if (IsLittleEndian) + stream.write(reinterpret_cast(values), sizeof(IntType) * count); + else + for (std::size_t i = 0; i < count; ++i) + write_little_endian(stream, values[i]); + } + +} // namespace Stockfish::Eval::NNUE + +#endif // #ifndef NNUE_COMMON_H_INCLUDED diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h new file mode 100644 index 0000000000000000000000000000000000000000..b6dd54d3378909ea44c8a1934f3037df2ff97043 --- /dev/null +++ b/src/nnue/nnue_feature_transformer.h @@ -0,0 +1,589 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +// A class that converts the input features of the NNUE evaluation function + +#ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED +#define NNUE_FEATURE_TRANSFORMER_H_INCLUDED + +#include "nnue_common.h" +#include "nnue_architecture.h" + +#include // std::memset() + +namespace Stockfish::Eval::NNUE { + + using BiasType = std::int16_t; + using WeightType = std::int16_t; + using PSQTWeightType = std::int32_t; + + // If vector instructions are enabled, we update and refresh the + // accumulator tile by tile such that each tile fits in the CPU's + // vector registers. + #define VECTOR + + static_assert(PSQTBuckets % 8 == 0, + "Per feature PSQT values cannot be processed at granularity lower than 8 at a time."); + + #ifdef USE_AVX512 + typedef __m512i vec_t; + typedef __m256i psqt_vec_t; + #define vec_load(a) _mm512_load_si512(a) + #define vec_store(a,b) _mm512_store_si512(a,b) + #define vec_add_16(a,b) _mm512_add_epi16(a,b) + #define vec_sub_16(a,b) _mm512_sub_epi16(a,b) + #define vec_mul_16(a,b) _mm512_mullo_epi16(a,b) + #define vec_zero() _mm512_setzero_epi32() + #define vec_set_16(a) _mm512_set1_epi16(a) + #define vec_max_16(a,b) _mm512_max_epi16(a,b) + #define vec_min_16(a,b) _mm512_min_epi16(a,b) + inline vec_t vec_msb_pack_16(vec_t a, vec_t b){ + vec_t compacted = _mm512_packs_epi16(_mm512_srli_epi16(a,7),_mm512_srli_epi16(b,7)); + return _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 2, 4, 6, 1, 3, 5, 7), compacted); + } + #define vec_load_psqt(a) _mm256_load_si256(a) + #define vec_store_psqt(a,b) _mm256_store_si256(a,b) + #define vec_add_psqt_32(a,b) _mm256_add_epi32(a,b) + #define vec_sub_psqt_32(a,b) _mm256_sub_epi32(a,b) + #define vec_zero_psqt() _mm256_setzero_si256() + #define NumRegistersSIMD 32 + #define MaxChunkSize 64 + + #elif USE_AVX2 + typedef __m256i vec_t; + typedef __m256i psqt_vec_t; + #define vec_load(a) _mm256_load_si256(a) + #define vec_store(a,b) _mm256_store_si256(a,b) + #define vec_add_16(a,b) _mm256_add_epi16(a,b) + #define vec_sub_16(a,b) _mm256_sub_epi16(a,b) + #define vec_mul_16(a,b) _mm256_mullo_epi16(a,b) + #define vec_zero() _mm256_setzero_si256() + #define vec_set_16(a) _mm256_set1_epi16(a) + #define vec_max_16(a,b) _mm256_max_epi16(a,b) + #define vec_min_16(a,b) _mm256_min_epi16(a,b) + inline vec_t vec_msb_pack_16(vec_t a, vec_t b){ + vec_t compacted = _mm256_packs_epi16(_mm256_srli_epi16(a,7), _mm256_srli_epi16(b,7)); + return _mm256_permute4x64_epi64(compacted, 0b11011000); + } + #define vec_load_psqt(a) _mm256_load_si256(a) + #define vec_store_psqt(a,b) _mm256_store_si256(a,b) + #define vec_add_psqt_32(a,b) _mm256_add_epi32(a,b) + #define vec_sub_psqt_32(a,b) _mm256_sub_epi32(a,b) + #define vec_zero_psqt() _mm256_setzero_si256() + #define NumRegistersSIMD 16 + #define MaxChunkSize 32 + + #elif USE_SSE2 + typedef __m128i vec_t; + typedef __m128i psqt_vec_t; + #define vec_load(a) (*(a)) + #define vec_store(a,b) *(a)=(b) + #define vec_add_16(a,b) _mm_add_epi16(a,b) + #define vec_sub_16(a,b) _mm_sub_epi16(a,b) + #define vec_mul_16(a,b) _mm_mullo_epi16(a,b) + #define vec_zero() _mm_setzero_si128() + #define vec_set_16(a) _mm_set1_epi16(a) + #define vec_max_16(a,b) _mm_max_epi16(a,b) + #define vec_min_16(a,b) _mm_min_epi16(a,b) + #define vec_msb_pack_16(a,b) _mm_packs_epi16(_mm_srli_epi16(a,7),_mm_srli_epi16(b,7)) + #define vec_load_psqt(a) (*(a)) + #define vec_store_psqt(a,b) *(a)=(b) + #define vec_add_psqt_32(a,b) _mm_add_epi32(a,b) + #define vec_sub_psqt_32(a,b) _mm_sub_epi32(a,b) + #define vec_zero_psqt() _mm_setzero_si128() + #define NumRegistersSIMD (Is64Bit ? 16 : 8) + #define MaxChunkSize 16 + + #elif USE_MMX + typedef __m64 vec_t; + typedef __m64 psqt_vec_t; + #define vec_load(a) (*(a)) + #define vec_store(a,b) *(a)=(b) + #define vec_add_16(a,b) _mm_add_pi16(a,b) + #define vec_sub_16(a,b) _mm_sub_pi16(a,b) + #define vec_mul_16(a,b) _mm_mullo_pi16(a,b) + #define vec_zero() _mm_setzero_si64() + #define vec_set_16(a) _mm_set1_pi16(a) + inline vec_t vec_max_16(vec_t a,vec_t b){ + vec_t comparison = _mm_cmpgt_pi16(a,b); + return _mm_or_si64(_mm_and_si64(comparison, a), _mm_andnot_si64(comparison, b)); + } + inline vec_t vec_min_16(vec_t a,vec_t b){ + vec_t comparison = _mm_cmpgt_pi16(a,b); + return _mm_or_si64(_mm_and_si64(comparison, b), _mm_andnot_si64(comparison, a)); + } + #define vec_msb_pack_16(a,b) _mm_packs_pi16(_mm_srli_pi16(a,7),_mm_srli_pi16(b,7)) + #define vec_load_psqt(a) (*(a)) + #define vec_store_psqt(a,b) *(a)=(b) + #define vec_add_psqt_32(a,b) _mm_add_pi32(a,b) + #define vec_sub_psqt_32(a,b) _mm_sub_pi32(a,b) + #define vec_zero_psqt() _mm_setzero_si64() + #define vec_cleanup() _mm_empty() + #define NumRegistersSIMD 8 + #define MaxChunkSize 8 + + #elif USE_NEON + typedef int16x8_t vec_t; + typedef int32x4_t psqt_vec_t; + #define vec_load(a) (*(a)) + #define vec_store(a,b) *(a)=(b) + #define vec_add_16(a,b) vaddq_s16(a,b) + #define vec_sub_16(a,b) vsubq_s16(a,b) + #define vec_mul_16(a,b) vmulq_s16(a,b) + #define vec_zero() vec_t{0} + #define vec_set_16(a) vdupq_n_s16(a) + #define vec_max_16(a,b) vmaxq_s16(a,b) + #define vec_min_16(a,b) vminq_s16(a,b) + inline vec_t vec_msb_pack_16(vec_t a, vec_t b){ + const int8x8_t shifta = vshrn_n_s16(a, 7); + const int8x8_t shiftb = vshrn_n_s16(b, 7); + const int8x16_t compacted = vcombine_s8(shifta,shiftb); + return *reinterpret_cast (&compacted); + } + #define vec_load_psqt(a) (*(a)) + #define vec_store_psqt(a,b) *(a)=(b) + #define vec_add_psqt_32(a,b) vaddq_s32(a,b) + #define vec_sub_psqt_32(a,b) vsubq_s32(a,b) + #define vec_zero_psqt() psqt_vec_t{0} + #define NumRegistersSIMD 16 + #define MaxChunkSize 16 + + #else + #undef VECTOR + + #endif + + + #ifdef VECTOR + + // Compute optimal SIMD register count for feature transformer accumulation. + + // We use __m* types as template arguments, which causes GCC to emit warnings + // about losing some attribute information. This is irrelevant to us as we + // only take their size, so the following pragma are harmless. + #if defined(__GNUC__) + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wignored-attributes" + #endif + + template + static constexpr int BestRegisterCount() + { + #define RegisterSize sizeof(SIMDRegisterType) + #define LaneSize sizeof(LaneType) + + static_assert(RegisterSize >= LaneSize); + static_assert(MaxRegisters <= NumRegistersSIMD); + static_assert(MaxRegisters > 0); + static_assert(NumRegistersSIMD > 0); + static_assert(RegisterSize % LaneSize == 0); + static_assert((NumLanes * LaneSize) % RegisterSize == 0); + + const int ideal = (NumLanes * LaneSize) / RegisterSize; + if (ideal <= MaxRegisters) + return ideal; + + // Look for the largest divisor of the ideal register count that is smaller than MaxRegisters + for (int divisor = MaxRegisters; divisor > 1; --divisor) + if (ideal % divisor == 0) + return divisor; + + return 1; + } + + static constexpr int NumRegs = BestRegisterCount(); + static constexpr int NumPsqtRegs = BestRegisterCount(); + #if defined(__GNUC__) + #pragma GCC diagnostic pop + #endif + #endif + + + + // Input feature converter + class FeatureTransformer { + + private: + // Number of output dimensions for one side + static constexpr IndexType HalfDimensions = TransformedFeatureDimensions; + + #ifdef VECTOR + static constexpr IndexType TileHeight = NumRegs * sizeof(vec_t) / 2; + static constexpr IndexType PsqtTileHeight = NumPsqtRegs * sizeof(psqt_vec_t) / 4; + static_assert(HalfDimensions % TileHeight == 0, "TileHeight must divide HalfDimensions"); + static_assert(PSQTBuckets % PsqtTileHeight == 0, "PsqtTileHeight must divide PSQTBuckets"); + #endif + + public: + // Output type + using OutputType = TransformedFeatureType; + + // Number of input/output dimensions + static constexpr IndexType InputDimensions = FeatureSet::Dimensions; + static constexpr IndexType OutputDimensions = HalfDimensions; + + // Size of forward propagation buffer + static constexpr std::size_t BufferSize = + OutputDimensions * sizeof(OutputType); + + // Hash value embedded in the evaluation file + static constexpr std::uint32_t get_hash_value() { + return FeatureSet::HashValue ^ (OutputDimensions * 2); + } + + // Read network parameters + bool read_parameters(std::istream& stream) { + + read_little_endian(stream, biases , HalfDimensions ); + read_little_endian(stream, weights , HalfDimensions * InputDimensions); + read_little_endian(stream, psqtWeights, PSQTBuckets * InputDimensions); + + return !stream.fail(); + } + + // Write network parameters + bool write_parameters(std::ostream& stream) const { + + write_little_endian(stream, biases , HalfDimensions ); + write_little_endian(stream, weights , HalfDimensions * InputDimensions); + write_little_endian(stream, psqtWeights, PSQTBuckets * InputDimensions); + + return !stream.fail(); + } + + // Convert input features + std::int32_t transform(const Position& pos, OutputType* output, int bucket) const { + update_accumulator(pos); + update_accumulator(pos); + + const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()}; + const auto& accumulation = pos.state()->accumulator.accumulation; + const auto& psqtAccumulation = pos.state()->accumulator.psqtAccumulation; + + const auto psqt = ( + psqtAccumulation[perspectives[0]][bucket] + - psqtAccumulation[perspectives[1]][bucket] + ) / 2; + + + for (IndexType p = 0; p < 2; ++p) + { + const IndexType offset = (HalfDimensions / 2) * p; + +#if defined(VECTOR) + + constexpr IndexType OutputChunkSize = MaxChunkSize; + static_assert((HalfDimensions / 2) % OutputChunkSize == 0); + constexpr IndexType NumOutputChunks = HalfDimensions / 2 / OutputChunkSize; + + vec_t Zero = vec_zero(); + vec_t One = vec_set_16(127); + + const vec_t* in0 = reinterpret_cast(&(accumulation[perspectives[p]][0])); + const vec_t* in1 = reinterpret_cast(&(accumulation[perspectives[p]][HalfDimensions / 2])); + vec_t* out = reinterpret_cast< vec_t*>(output + offset); + + for (IndexType j = 0; j < NumOutputChunks; j += 1) + { + const vec_t sum0a = vec_max_16(vec_min_16(in0[j * 2 + 0], One), Zero); + const vec_t sum0b = vec_max_16(vec_min_16(in0[j * 2 + 1], One), Zero); + const vec_t sum1a = vec_max_16(vec_min_16(in1[j * 2 + 0], One), Zero); + const vec_t sum1b = vec_max_16(vec_min_16(in1[j * 2 + 1], One), Zero); + + const vec_t pa = vec_mul_16(sum0a, sum1a); + const vec_t pb = vec_mul_16(sum0b, sum1b); + + out[j] = vec_msb_pack_16(pa, pb); + } + +#else + + for (IndexType j = 0; j < HalfDimensions / 2; ++j) { + BiasType sum0 = accumulation[static_cast(perspectives[p])][j + 0]; + BiasType sum1 = accumulation[static_cast(perspectives[p])][j + HalfDimensions / 2]; + sum0 = std::max(0, std::min(127, sum0)); + sum1 = std::max(0, std::min(127, sum1)); + output[offset + j] = static_cast(sum0 * sum1 / 128); + } + +#endif + } + +#if defined(vec_cleanup) + vec_cleanup(); +#endif + + return psqt; + + } // end of function transform() + + + + private: + template + void update_accumulator(const Position& pos) const { + + // The size must be enough to contain the largest possible update. + // That might depend on the feature set and generally relies on the + // feature set's update cost calculation to be correct and never + // allow updates with more added/removed features than MaxActiveDimensions. + + #ifdef VECTOR + // Gcc-10.2 unnecessarily spills AVX2 registers if this array + // is defined in the VECTOR code below, once in each branch + vec_t acc[NumRegs]; + psqt_vec_t psqt[NumPsqtRegs]; + #endif + + // Look for a usable accumulator of an earlier position. We keep track + // of the estimated gain in terms of features to be added/subtracted. + StateInfo *st = pos.state(), *next = nullptr; + int gain = FeatureSet::refresh_cost(pos); + while (st->previous && !st->accumulator.computed[Perspective]) + { + // This governs when a full feature refresh is needed and how many + // updates are better than just one full refresh. + if ( FeatureSet::requires_refresh(st, Perspective) + || (gain -= FeatureSet::update_cost(st) + 1) < 0) + break; + next = st; + st = st->previous; + } + + if (st->accumulator.computed[Perspective]) + { + if (next == nullptr) + return; + + // Update incrementally in two steps. First, we update the "next" + // accumulator. Then, we update the current accumulator (pos.state()). + + // Gather all features to be updated. + const Square ksq = pos.square(Perspective); + FeatureSet::IndexList removed[2], added[2]; + FeatureSet::append_changed_indices( + ksq, next->dirtyPiece, removed[0], added[0]); + for (StateInfo *st2 = pos.state(); st2 != next; st2 = st2->previous) + FeatureSet::append_changed_indices( + ksq, st2->dirtyPiece, removed[1], added[1]); + + // Mark the accumulators as computed. + next->accumulator.computed[Perspective] = true; + pos.state()->accumulator.computed[Perspective] = true; + + // Now update the accumulators listed in states_to_update[], where the last element is a sentinel. + StateInfo *states_to_update[3] = + { next, next == pos.state() ? nullptr : pos.state(), nullptr }; + #ifdef VECTOR + for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j) + { + // Load accumulator + auto accTile = reinterpret_cast( + &st->accumulator.accumulation[Perspective][j * TileHeight]); + for (IndexType k = 0; k < NumRegs; ++k) + acc[k] = vec_load(&accTile[k]); + + for (IndexType i = 0; states_to_update[i]; ++i) + { + // Difference calculation for the deactivated features + for (const auto index : removed[i]) + { + const IndexType offset = HalfDimensions * index + j * TileHeight; + auto column = reinterpret_cast(&weights[offset]); + for (IndexType k = 0; k < NumRegs; ++k) + acc[k] = vec_sub_16(acc[k], column[k]); + } + + // Difference calculation for the activated features + for (const auto index : added[i]) + { + const IndexType offset = HalfDimensions * index + j * TileHeight; + auto column = reinterpret_cast(&weights[offset]); + for (IndexType k = 0; k < NumRegs; ++k) + acc[k] = vec_add_16(acc[k], column[k]); + } + + // Store accumulator + accTile = reinterpret_cast( + &states_to_update[i]->accumulator.accumulation[Perspective][j * TileHeight]); + for (IndexType k = 0; k < NumRegs; ++k) + vec_store(&accTile[k], acc[k]); + } + } + + for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j) + { + // Load accumulator + auto accTilePsqt = reinterpret_cast( + &st->accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]); + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + psqt[k] = vec_load_psqt(&accTilePsqt[k]); + + for (IndexType i = 0; states_to_update[i]; ++i) + { + // Difference calculation for the deactivated features + for (const auto index : removed[i]) + { + const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; + auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]); + } + + // Difference calculation for the activated features + for (const auto index : added[i]) + { + const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; + auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]); + } + + // Store accumulator + accTilePsqt = reinterpret_cast( + &states_to_update[i]->accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]); + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + vec_store_psqt(&accTilePsqt[k], psqt[k]); + } + } + + #else + for (IndexType i = 0; states_to_update[i]; ++i) + { + std::memcpy(states_to_update[i]->accumulator.accumulation[Perspective], + st->accumulator.accumulation[Perspective], + HalfDimensions * sizeof(BiasType)); + + for (std::size_t k = 0; k < PSQTBuckets; ++k) + states_to_update[i]->accumulator.psqtAccumulation[Perspective][k] = st->accumulator.psqtAccumulation[Perspective][k]; + + st = states_to_update[i]; + + // Difference calculation for the deactivated features + for (const auto index : removed[i]) + { + const IndexType offset = HalfDimensions * index; + + for (IndexType j = 0; j < HalfDimensions; ++j) + st->accumulator.accumulation[Perspective][j] -= weights[offset + j]; + + for (std::size_t k = 0; k < PSQTBuckets; ++k) + st->accumulator.psqtAccumulation[Perspective][k] -= psqtWeights[index * PSQTBuckets + k]; + } + + // Difference calculation for the activated features + for (const auto index : added[i]) + { + const IndexType offset = HalfDimensions * index; + + for (IndexType j = 0; j < HalfDimensions; ++j) + st->accumulator.accumulation[Perspective][j] += weights[offset + j]; + + for (std::size_t k = 0; k < PSQTBuckets; ++k) + st->accumulator.psqtAccumulation[Perspective][k] += psqtWeights[index * PSQTBuckets + k]; + } + } + #endif + } + else + { + // Refresh the accumulator + auto& accumulator = pos.state()->accumulator; + accumulator.computed[Perspective] = true; + FeatureSet::IndexList active; + FeatureSet::append_active_indices(pos, active); + + #ifdef VECTOR + for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j) + { + auto biasesTile = reinterpret_cast( + &biases[j * TileHeight]); + for (IndexType k = 0; k < NumRegs; ++k) + acc[k] = biasesTile[k]; + + for (const auto index : active) + { + const IndexType offset = HalfDimensions * index + j * TileHeight; + auto column = reinterpret_cast(&weights[offset]); + + for (unsigned k = 0; k < NumRegs; ++k) + acc[k] = vec_add_16(acc[k], column[k]); + } + + auto accTile = reinterpret_cast( + &accumulator.accumulation[Perspective][j * TileHeight]); + for (unsigned k = 0; k < NumRegs; k++) + vec_store(&accTile[k], acc[k]); + } + + for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j) + { + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + psqt[k] = vec_zero_psqt(); + + for (const auto index : active) + { + const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; + auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); + + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]); + } + + auto accTilePsqt = reinterpret_cast( + &accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]); + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + vec_store_psqt(&accTilePsqt[k], psqt[k]); + } + + #else + std::memcpy(accumulator.accumulation[Perspective], biases, + HalfDimensions * sizeof(BiasType)); + + for (std::size_t k = 0; k < PSQTBuckets; ++k) + accumulator.psqtAccumulation[Perspective][k] = 0; + + for (const auto index : active) + { + const IndexType offset = HalfDimensions * index; + + for (IndexType j = 0; j < HalfDimensions; ++j) + accumulator.accumulation[Perspective][j] += weights[offset + j]; + + for (std::size_t k = 0; k < PSQTBuckets; ++k) + accumulator.psqtAccumulation[Perspective][k] += psqtWeights[index * PSQTBuckets + k]; + } + #endif + } + + #if defined(USE_MMX) + _mm_empty(); + #endif + } + + alignas(CacheLineSize) BiasType biases[HalfDimensions]; + alignas(CacheLineSize) WeightType weights[HalfDimensions * InputDimensions]; + alignas(CacheLineSize) PSQTWeightType psqtWeights[InputDimensions * PSQTBuckets]; + }; + +} // namespace Stockfish::Eval::NNUE + +#endif // #ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED diff --git a/src/pawns.cpp b/src/pawns.cpp new file mode 100644 index 0000000000000000000000000000000000000000..fdcfa0224e9d2b559daac24892d7726a00f99ac3 --- /dev/null +++ b/src/pawns.cpp @@ -0,0 +1,305 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include + +#include "bitboard.h" +#include "pawns.h" +#include "position.h" +#include "thread.h" + +namespace Stockfish { + +namespace { + + #define V Value + #define S(mg, eg) make_score(mg, eg) + + // Pawn penalties + constexpr Score Backward = S( 6, 19); + constexpr Score Doubled = S(11, 51); + constexpr Score DoubledEarly = S(17, 7); + constexpr Score Isolated = S( 1, 20); + constexpr Score WeakLever = S( 2, 57); + constexpr Score WeakUnopposed = S(15, 18); + + // Bonus for blocked pawns at 5th or 6th rank + constexpr Score BlockedPawn[2] = { S(-19, -8), S(-7, 3) }; + + constexpr Score BlockedStorm[RANK_NB] = { + S(0, 0), S(0, 0), S(64, 75), S(-3, 14), S(-12, 19), S(-7, 4), S(-10, 5) + }; + + // Connected pawn bonus + constexpr int Connected[RANK_NB] = { 0, 3, 7, 7, 15, 54, 86 }; + + // Strength of pawn shelter for our king by [distance from edge][rank]. + // RANK_1 = 0 is used for files where we have no pawn, or pawn is behind our king. + constexpr Value ShelterStrength[int(FILE_NB) / 2][RANK_NB] = { + { V(-2), V(85), V(95), V(53), V(39), V(23), V(25) }, + { V(-55), V(64), V(32), V(-55), V(-30), V(-11), V(-61) }, + { V(-11), V(75), V(19), V(-6), V(26), V(9), V(-47) }, + { V(-41), V(-11), V(-27), V(-58), V(-42), V(-66), V(-163) } + }; + + // Danger of enemy pawns moving toward our king by [distance from edge][rank]. + // RANK_1 = 0 is used for files where the enemy has no pawn, or their pawn + // is behind our king. Note that UnblockedStorm[0][1-2] accommodate opponent pawn + // on edge, likely blocked by our king. + constexpr Value UnblockedStorm[int(FILE_NB) / 2][RANK_NB] = { + { V(94), V(-280), V(-170), V(90), V(59), V(47), V(53) }, + { V(43), V(-17), V(128), V(39), V(26), V(-17), V(15) }, + { V(-9), V(62), V(170), V(34), V(-5), V(-20), V(-11) }, + { V(-27), V(-19), V(106), V(10), V(2), V(-13), V(-24) } + }; + + + // KingOnFile[semi-open Us][semi-open Them] contains bonuses/penalties + // for king when the king is on a semi-open or open file. + constexpr Score KingOnFile[2][2] = {{ S(-18,11), S(-6,-3) }, + { S( 0, 0), S( 5,-4) }}; + + #undef S + #undef V + + + /// evaluate() calculates a score for the static pawn structure of the given position. + /// We cannot use the location of pieces or king in this function, as the evaluation + /// of the pawn structure will be stored in a small cache for speed reasons, and will + /// be re-used even when the pieces have moved. + + template + Score evaluate(const Position& pos, Pawns::Entry* e) { + + constexpr Color Them = ~Us; + constexpr Direction Up = pawn_push(Us); + constexpr Direction Down = -Up; + + Bitboard neighbours, stoppers, support, phalanx, opposed; + Bitboard lever, leverPush, blocked; + Square s; + bool backward, passed, doubled; + Score score = SCORE_ZERO; + Bitboard b = pos.pieces(Us, PAWN); + + Bitboard ourPawns = pos.pieces( Us, PAWN); + Bitboard theirPawns = pos.pieces(Them, PAWN); + + Bitboard doubleAttackThem = pawn_double_attacks_bb(theirPawns); + + e->passedPawns[Us] = 0; + e->kingSquares[Us] = SQ_NONE; + e->pawnAttacks[Us] = e->pawnAttacksSpan[Us] = pawn_attacks_bb(ourPawns); + e->blockedCount += popcount(shift(ourPawns) & (theirPawns | doubleAttackThem)); + + // Loop through all pawns of the current color and score each pawn + while (b) + { + s = pop_lsb(b); + + assert(pos.piece_on(s) == make_piece(Us, PAWN)); + + Rank r = relative_rank(Us, s); + + // Flag the pawn + opposed = theirPawns & forward_file_bb(Us, s); + blocked = theirPawns & (s + Up); + stoppers = theirPawns & passed_pawn_span(Us, s); + lever = theirPawns & pawn_attacks_bb(Us, s); + leverPush = theirPawns & pawn_attacks_bb(Us, s + Up); + doubled = ourPawns & (s - Up); + neighbours = ourPawns & adjacent_files_bb(s); + phalanx = neighbours & rank_bb(s); + support = neighbours & rank_bb(s - Up); + + if (doubled) + { + // Additional doubled penalty if none of their pawns is fixed + if (!(ourPawns & shift(theirPawns | pawn_attacks_bb(theirPawns)))) + score -= DoubledEarly; + } + + // A pawn is backward when it is behind all pawns of the same color on + // the adjacent files and cannot safely advance. + backward = !(neighbours & forward_ranks_bb(Them, s + Up)) + && (leverPush | blocked); + + // Compute additional span if pawn is not backward nor blocked + if (!backward && !blocked) + e->pawnAttacksSpan[Us] |= pawn_attack_span(Us, s); + + // A pawn is passed if one of the three following conditions is true: + // (a) there is no stoppers except some levers + // (b) the only stoppers are the leverPush, but we outnumber them + // (c) there is only one front stopper which can be levered. + // (Refined in Evaluation::passed) + passed = !(stoppers ^ lever) + || ( !(stoppers ^ leverPush) + && popcount(phalanx) >= popcount(leverPush)) + || ( stoppers == blocked && r >= RANK_5 + && (shift(support) & ~(theirPawns | doubleAttackThem))); + + passed &= !(forward_file_bb(Us, s) & ourPawns); + + // Passed pawns will be properly scored later in evaluation when we have + // full attack info. + if (passed) + e->passedPawns[Us] |= s; + + // Score this pawn + if (support | phalanx) + { + int v = Connected[r] * (2 + bool(phalanx) - bool(opposed)) + + 22 * popcount(support); + + score += make_score(v, v * (r - 2) / 4); + } + + else if (!neighbours) + { + if ( opposed + && (ourPawns & forward_file_bb(Them, s)) + && !(theirPawns & adjacent_files_bb(s))) + score -= Doubled; + else + score -= Isolated + + WeakUnopposed * !opposed; + } + + else if (backward) + score -= Backward + + WeakUnopposed * !opposed * bool(~(FileABB | FileHBB) & s); + + if (!support) + score -= Doubled * doubled + + WeakLever * more_than_one(lever); + + if (blocked && r >= RANK_5) + score += BlockedPawn[r - RANK_5]; + } + + return score; + } + +} // namespace + +namespace Pawns { + + +/// Pawns::probe() looks up the current position's pawns configuration in +/// the pawns hash table. It returns a pointer to the Entry if the position +/// is found. Otherwise a new Entry is computed and stored there, so we don't +/// have to recompute all when the same pawns configuration occurs again. + +Entry* probe(const Position& pos) { + + Key key = pos.pawn_key(); + Entry* e = pos.this_thread()->pawnsTable[key]; + + if (e->key == key) + return e; + + e->key = key; + e->blockedCount = 0; + e->scores[WHITE] = evaluate(pos, e); + e->scores[BLACK] = evaluate(pos, e); + + return e; +} + + +/// Entry::evaluate_shelter() calculates the shelter bonus and the storm +/// penalty for a king, looking at the king file and the two closest files. + +template +Score Entry::evaluate_shelter(const Position& pos, Square ksq) const { + + constexpr Color Them = ~Us; + + Bitboard b = pos.pieces(PAWN) & ~forward_ranks_bb(Them, ksq); + Bitboard ourPawns = b & pos.pieces(Us) & ~pawnAttacks[Them]; + Bitboard theirPawns = b & pos.pieces(Them); + + Score bonus = make_score(5, 5); + + File center = std::clamp(file_of(ksq), FILE_B, FILE_G); + for (File f = File(center - 1); f <= File(center + 1); ++f) + { + b = ourPawns & file_bb(f); + int ourRank = b ? relative_rank(Us, frontmost_sq(Them, b)) : 0; + + b = theirPawns & file_bb(f); + int theirRank = b ? relative_rank(Us, frontmost_sq(Them, b)) : 0; + + int d = edge_distance(f); + bonus += make_score(ShelterStrength[d][ourRank], 0); + + if (ourRank && (ourRank == theirRank - 1)) + bonus -= BlockedStorm[theirRank]; + else + bonus -= make_score(UnblockedStorm[d][theirRank], 0); + } + + // King On File + bonus -= KingOnFile[pos.is_on_semiopen_file(Us, ksq)][pos.is_on_semiopen_file(Them, ksq)]; + + return bonus; +} + + +/// Entry::do_king_safety() calculates a bonus for king safety. It is called only +/// when king square changes, which is about 20% of total king_safety() calls. + +template +Score Entry::do_king_safety(const Position& pos) { + + Square ksq = pos.square(Us); + kingSquares[Us] = ksq; + castlingRights[Us] = pos.castling_rights(Us); + auto compare = [](Score a, Score b) { return mg_value(a) < mg_value(b); }; + + Score shelter = evaluate_shelter(pos, ksq); + + // If we can castle use the bonus after castling if it is bigger + + if (pos.can_castle(Us & KING_SIDE)) + shelter = std::max(shelter, evaluate_shelter(pos, relative_square(Us, SQ_G1)), compare); + + if (pos.can_castle(Us & QUEEN_SIDE)) + shelter = std::max(shelter, evaluate_shelter(pos, relative_square(Us, SQ_C1)), compare); + + // In endgame we like to bring our king near our closest pawn + Bitboard pawns = pos.pieces(Us, PAWN); + int minPawnDist = 6; + + if (pawns & attacks_bb(ksq)) + minPawnDist = 1; + else while (pawns) + minPawnDist = std::min(minPawnDist, distance(ksq, pop_lsb(pawns))); + + return shelter - make_score(0, 16 * minPawnDist); +} + +// Explicit template instantiation +template Score Entry::do_king_safety(const Position& pos); +template Score Entry::do_king_safety(const Position& pos); + +} // namespace Pawns + +} // namespace Stockfish diff --git a/src/pawns.h b/src/pawns.h new file mode 100644 index 0000000000000000000000000000000000000000..af0370fc41aa76abd70ab2889a6741ca3aed0f7f --- /dev/null +++ b/src/pawns.h @@ -0,0 +1,70 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef PAWNS_H_INCLUDED +#define PAWNS_H_INCLUDED + +#include "misc.h" +#include "position.h" +#include "types.h" + +namespace Stockfish::Pawns { + +/// Pawns::Entry contains various information about a pawn structure. A lookup +/// to the pawn hash table (performed by calling the probe function) returns a +/// pointer to an Entry object. + +struct Entry { + + Score pawn_score(Color c) const { return scores[c]; } + Bitboard pawn_attacks(Color c) const { return pawnAttacks[c]; } + Bitboard passed_pawns(Color c) const { return passedPawns[c]; } + Bitboard pawn_attacks_span(Color c) const { return pawnAttacksSpan[c]; } + int passed_count() const { return popcount(passedPawns[WHITE] | passedPawns[BLACK]); } + int blocked_count() const { return blockedCount; } + + template + Score king_safety(const Position& pos) { + return kingSquares[Us] == pos.square(Us) && castlingRights[Us] == pos.castling_rights(Us) + ? kingSafety[Us] : (kingSafety[Us] = do_king_safety(pos)); + } + + template + Score do_king_safety(const Position& pos); + + template + Score evaluate_shelter(const Position& pos, Square ksq) const; + + Key key; + Score scores[COLOR_NB]; + Bitboard passedPawns[COLOR_NB]; + Bitboard pawnAttacks[COLOR_NB]; + Bitboard pawnAttacksSpan[COLOR_NB]; + Square kingSquares[COLOR_NB]; + Score kingSafety[COLOR_NB]; + int castlingRights[COLOR_NB]; + int blockedCount; +}; + +typedef HashTable Table; + +Entry* probe(const Position& pos); + +} // namespace Stockfish::Pawns + +#endif // #ifndef PAWNS_H_INCLUDED diff --git a/src/position.cpp b/src/position.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5befcaf2cfcbd311034db63fee2a9ce934879d50 --- /dev/null +++ b/src/position.cpp @@ -0,0 +1,1353 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include +#include // For offsetof() +#include // For std::memset, std::memcmp +#include +#include + +#include "bitboard.h" +#include "misc.h" +#include "movegen.h" +#include "position.h" +#include "thread.h" +#include "tt.h" +#include "uci.h" +#include "syzygy/tbprobe.h" + +using std::string; + +namespace Stockfish { + +namespace Zobrist { + + Key psq[PIECE_NB][SQUARE_NB]; + Key enpassant[FILE_NB]; + Key castling[CASTLING_RIGHT_NB]; + Key side, noPawns; +} + +namespace { + +const string PieceToChar(" PNBRQK pnbrqk"); + +constexpr Piece Pieces[] = { W_PAWN, W_KNIGHT, W_BISHOP, W_ROOK, W_QUEEN, W_KING, + B_PAWN, B_KNIGHT, B_BISHOP, B_ROOK, B_QUEEN, B_KING }; +} // namespace + + +/// operator<<(Position) returns an ASCII representation of the position + +std::ostream& operator<<(std::ostream& os, const Position& pos) { + + os << "\n +---+---+---+---+---+---+---+---+\n"; + + for (Rank r = RANK_8; r >= RANK_1; --r) + { + for (File f = FILE_A; f <= FILE_H; ++f) + os << " | " << PieceToChar[pos.piece_on(make_square(f, r))]; + + os << " | " << (1 + r) << "\n +---+---+---+---+---+---+---+---+\n"; + } + + os << " a b c d e f g h\n" + << "\nFen: " << pos.fen() << "\nKey: " << std::hex << std::uppercase + << std::setfill('0') << std::setw(16) << pos.key() + << std::setfill(' ') << std::dec << "\nCheckers: "; + + for (Bitboard b = pos.checkers(); b; ) + os << UCI::square(pop_lsb(b)) << " "; + + if ( int(Tablebases::MaxCardinality) >= popcount(pos.pieces()) + && !pos.can_castle(ANY_CASTLING)) + { + StateInfo st; + ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize); + + Position p; + p.set(pos.fen(), pos.is_chess960(), &st, pos.this_thread()); + Tablebases::ProbeState s1, s2; + Tablebases::WDLScore wdl = Tablebases::probe_wdl(p, &s1); + int dtz = Tablebases::probe_dtz(p, &s2); + os << "\nTablebases WDL: " << std::setw(4) << wdl << " (" << s1 << ")" + << "\nTablebases DTZ: " << std::setw(4) << dtz << " (" << s2 << ")"; + } + + return os; +} + + +// Marcel van Kervinck's cuckoo algorithm for fast detection of "upcoming repetition" +// situations. Description of the algorithm in the following paper: +// https://marcelk.net/2013-04-06/paper/upcoming-rep-v2.pdf + +// First and second hash functions for indexing the cuckoo tables +inline int H1(Key h) { return h & 0x1fff; } +inline int H2(Key h) { return (h >> 16) & 0x1fff; } + +// Cuckoo tables with Zobrist hashes of valid reversible moves, and the moves themselves +Key cuckoo[8192]; +Move cuckooMove[8192]; + + +/// Position::init() initializes at startup the various arrays used to compute hash keys + +void Position::init() { + + PRNG rng(1070372); + + for (Piece pc : Pieces) + for (Square s = SQ_A1; s <= SQ_H8; ++s) + Zobrist::psq[pc][s] = rng.rand(); + + for (File f = FILE_A; f <= FILE_H; ++f) + Zobrist::enpassant[f] = rng.rand(); + + for (int cr = NO_CASTLING; cr <= ANY_CASTLING; ++cr) + Zobrist::castling[cr] = rng.rand(); + + Zobrist::side = rng.rand(); + Zobrist::noPawns = rng.rand(); + + // Prepare the cuckoo tables + std::memset(cuckoo, 0, sizeof(cuckoo)); + std::memset(cuckooMove, 0, sizeof(cuckooMove)); + [[maybe_unused]] int count = 0; + for (Piece pc : Pieces) + for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1) + for (Square s2 = Square(s1 + 1); s2 <= SQ_H8; ++s2) + if ((type_of(pc) != PAWN) && (attacks_bb(type_of(pc), s1, 0) & s2)) + { + Move move = make_move(s1, s2); + Key key = Zobrist::psq[pc][s1] ^ Zobrist::psq[pc][s2] ^ Zobrist::side; + int i = H1(key); + while (true) + { + std::swap(cuckoo[i], key); + std::swap(cuckooMove[i], move); + if (move == MOVE_NONE) // Arrived at empty slot? + break; + i = (i == H1(key)) ? H2(key) : H1(key); // Push victim to alternative slot + } + count++; + } + assert(count == 3668); +} + + +/// Position::set() initializes the position object with the given FEN string. +/// This function is not very robust - make sure that input FENs are correct, +/// this is assumed to be the responsibility of the GUI. + +Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Thread* th) { +/* + A FEN string defines a particular position using only the ASCII character set. + + A FEN string contains six fields separated by a space. The fields are: + + 1) Piece placement (from white's perspective). Each rank is described, starting + with rank 8 and ending with rank 1. Within each rank, the contents of each + square are described from file A through file H. Following the Standard + Algebraic Notation (SAN), each piece is identified by a single letter taken + from the standard English names. White pieces are designated using upper-case + letters ("PNBRQK") whilst Black uses lowercase ("pnbrqk"). Blank squares are + noted using digits 1 through 8 (the number of blank squares), and "/" + separates ranks. + + 2) Active color. "w" means white moves next, "b" means black. + + 3) Castling availability. If neither side can castle, this is "-". Otherwise, + this has one or more letters: "K" (White can castle kingside), "Q" (White + can castle queenside), "k" (Black can castle kingside), and/or "q" (Black + can castle queenside). + + 4) En passant target square (in algebraic notation). If there's no en passant + target square, this is "-". If a pawn has just made a 2-square move, this + is the position "behind" the pawn. Following X-FEN standard, this is recorded only + if there is a pawn in position to make an en passant capture, and if there really + is a pawn that might have advanced two squares. + + 5) Halfmove clock. This is the number of halfmoves since the last pawn advance + or capture. This is used to determine if a draw can be claimed under the + fifty-move rule. + + 6) Fullmove number. The number of the full move. It starts at 1, and is + incremented after Black's move. +*/ + + unsigned char col, row, token; + size_t idx; + Square sq = SQ_A8; + std::istringstream ss(fenStr); + + std::memset(this, 0, sizeof(Position)); + std::memset(si, 0, sizeof(StateInfo)); + st = si; + + ss >> std::noskipws; + + // 1. Piece placement + while ((ss >> token) && !isspace(token)) + { + if (isdigit(token)) + sq += (token - '0') * EAST; // Advance the given number of files + + else if (token == '/') + sq += 2 * SOUTH; + + else if ((idx = PieceToChar.find(token)) != string::npos) { + put_piece(Piece(idx), sq); + ++sq; + } + } + + // 2. Active color + ss >> token; + sideToMove = (token == 'w' ? WHITE : BLACK); + ss >> token; + + // 3. Castling availability. Compatible with 3 standards: Normal FEN standard, + // Shredder-FEN that uses the letters of the columns on which the rooks began + // the game instead of KQkq and also X-FEN standard that, in case of Chess960, + // if an inner rook is associated with the castling right, the castling tag is + // replaced by the file letter of the involved rook, as for the Shredder-FEN. + while ((ss >> token) && !isspace(token)) + { + Square rsq; + Color c = islower(token) ? BLACK : WHITE; + Piece rook = make_piece(c, ROOK); + + token = char(toupper(token)); + + if (token == 'K') + for (rsq = relative_square(c, SQ_H1); piece_on(rsq) != rook; --rsq) {} + + else if (token == 'Q') + for (rsq = relative_square(c, SQ_A1); piece_on(rsq) != rook; ++rsq) {} + + else if (token >= 'A' && token <= 'H') + rsq = make_square(File(token - 'A'), relative_rank(c, RANK_1)); + + else + continue; + + set_castling_right(c, rsq); + } + + // 4. En passant square. + // Ignore if square is invalid or not on side to move relative rank 6. + bool enpassant = false; + + if ( ((ss >> col) && (col >= 'a' && col <= 'h')) + && ((ss >> row) && (row == (sideToMove == WHITE ? '6' : '3')))) + { + st->epSquare = make_square(File(col - 'a'), Rank(row - '1')); + + // En passant square will be considered only if + // a) side to move have a pawn threatening epSquare + // b) there is an enemy pawn in front of epSquare + // c) there is no piece on epSquare or behind epSquare + enpassant = pawn_attacks_bb(~sideToMove, st->epSquare) & pieces(sideToMove, PAWN) + && (pieces(~sideToMove, PAWN) & (st->epSquare + pawn_push(~sideToMove))) + && !(pieces() & (st->epSquare | (st->epSquare + pawn_push(sideToMove)))); + } + + if (!enpassant) + st->epSquare = SQ_NONE; + + // 5-6. Halfmove clock and fullmove number + ss >> std::skipws >> st->rule50 >> gamePly; + + // Convert from fullmove starting from 1 to gamePly starting from 0, + // handle also common incorrect FEN with fullmove = 0. + gamePly = std::max(2 * (gamePly - 1), 0) + (sideToMove == BLACK); + + chess960 = isChess960; + thisThread = th; + set_state(st); + + assert(pos_is_ok()); + + return *this; +} + + +/// Position::set_castling_right() is a helper function used to set castling +/// rights given the corresponding color and the rook starting square. + +void Position::set_castling_right(Color c, Square rfrom) { + + Square kfrom = square(c); + CastlingRights cr = c & (kfrom < rfrom ? KING_SIDE: QUEEN_SIDE); + + st->castlingRights |= cr; + castlingRightsMask[kfrom] |= cr; + castlingRightsMask[rfrom] |= cr; + castlingRookSquare[cr] = rfrom; + + Square kto = relative_square(c, cr & KING_SIDE ? SQ_G1 : SQ_C1); + Square rto = relative_square(c, cr & KING_SIDE ? SQ_F1 : SQ_D1); + + castlingPath[cr] = (between_bb(rfrom, rto) | between_bb(kfrom, kto)) + & ~(kfrom | rfrom); +} + + +/// Position::set_check_info() sets king attacks to detect if a move gives check + +void Position::set_check_info(StateInfo* si) const { + + si->blockersForKing[WHITE] = slider_blockers(pieces(BLACK), square(WHITE), si->pinners[BLACK]); + si->blockersForKing[BLACK] = slider_blockers(pieces(WHITE), square(BLACK), si->pinners[WHITE]); + + Square ksq = square(~sideToMove); + + si->checkSquares[PAWN] = pawn_attacks_bb(~sideToMove, ksq); + si->checkSquares[KNIGHT] = attacks_bb(ksq); + si->checkSquares[BISHOP] = attacks_bb(ksq, pieces()); + si->checkSquares[ROOK] = attacks_bb(ksq, pieces()); + si->checkSquares[QUEEN] = si->checkSquares[BISHOP] | si->checkSquares[ROOK]; + si->checkSquares[KING] = 0; +} + + +/// Position::set_state() computes the hash keys of the position, and other +/// data that once computed is updated incrementally as moves are made. +/// The function is only used when a new position is set up, and to verify +/// the correctness of the StateInfo data when running in debug mode. + +void Position::set_state(StateInfo* si) const { + + si->key = si->materialKey = 0; + si->pawnKey = Zobrist::noPawns; + si->nonPawnMaterial[WHITE] = si->nonPawnMaterial[BLACK] = VALUE_ZERO; + si->checkersBB = attackers_to(square(sideToMove)) & pieces(~sideToMove); + + set_check_info(si); + + for (Bitboard b = pieces(); b; ) + { + Square s = pop_lsb(b); + Piece pc = piece_on(s); + si->key ^= Zobrist::psq[pc][s]; + + if (type_of(pc) == PAWN) + si->pawnKey ^= Zobrist::psq[pc][s]; + + else if (type_of(pc) != KING) + si->nonPawnMaterial[color_of(pc)] += PieceValue[MG][pc]; + } + + if (si->epSquare != SQ_NONE) + si->key ^= Zobrist::enpassant[file_of(si->epSquare)]; + + if (sideToMove == BLACK) + si->key ^= Zobrist::side; + + si->key ^= Zobrist::castling[si->castlingRights]; + + for (Piece pc : Pieces) + for (int cnt = 0; cnt < pieceCount[pc]; ++cnt) + si->materialKey ^= Zobrist::psq[pc][cnt]; +} + + +/// Position::set() is an overload to initialize the position object with +/// the given endgame code string like "KBPKN". It is mainly a helper to +/// get the material key out of an endgame code. + +Position& Position::set(const string& code, Color c, StateInfo* si) { + + assert(code[0] == 'K'); + + string sides[] = { code.substr(code.find('K', 1)), // Weak + code.substr(0, std::min(code.find('v'), code.find('K', 1))) }; // Strong + + assert(sides[0].length() > 0 && sides[0].length() < 8); + assert(sides[1].length() > 0 && sides[1].length() < 8); + + std::transform(sides[c].begin(), sides[c].end(), sides[c].begin(), tolower); + + string fenStr = "8/" + sides[0] + char(8 - sides[0].length() + '0') + "/8/8/8/8/" + + sides[1] + char(8 - sides[1].length() + '0') + "/8 w - - 0 10"; + + return set(fenStr, false, si, nullptr); +} + + +/// Position::fen() returns a FEN representation of the position. In case of +/// Chess960 the Shredder-FEN notation is used. This is mainly a debugging function. + +string Position::fen() const { + + int emptyCnt; + std::ostringstream ss; + + for (Rank r = RANK_8; r >= RANK_1; --r) + { + for (File f = FILE_A; f <= FILE_H; ++f) + { + for (emptyCnt = 0; f <= FILE_H && empty(make_square(f, r)); ++f) + ++emptyCnt; + + if (emptyCnt) + ss << emptyCnt; + + if (f <= FILE_H) + ss << PieceToChar[piece_on(make_square(f, r))]; + } + + if (r > RANK_1) + ss << '/'; + } + + ss << (sideToMove == WHITE ? " w " : " b "); + + if (can_castle(WHITE_OO)) + ss << (chess960 ? char('A' + file_of(castling_rook_square(WHITE_OO ))) : 'K'); + + if (can_castle(WHITE_OOO)) + ss << (chess960 ? char('A' + file_of(castling_rook_square(WHITE_OOO))) : 'Q'); + + if (can_castle(BLACK_OO)) + ss << (chess960 ? char('a' + file_of(castling_rook_square(BLACK_OO ))) : 'k'); + + if (can_castle(BLACK_OOO)) + ss << (chess960 ? char('a' + file_of(castling_rook_square(BLACK_OOO))) : 'q'); + + if (!can_castle(ANY_CASTLING)) + ss << '-'; + + ss << (ep_square() == SQ_NONE ? " - " : " " + UCI::square(ep_square()) + " ") + << st->rule50 << " " << 1 + (gamePly - (sideToMove == BLACK)) / 2; + + return ss.str(); +} + + +/// Position::slider_blockers() returns a bitboard of all the pieces (both colors) +/// that are blocking attacks on the square 's' from 'sliders'. A piece blocks a +/// slider if removing that piece from the board would result in a position where +/// square 's' is attacked. For example, a king-attack blocking piece can be either +/// a pinned or a discovered check piece, according if its color is the opposite +/// or the same of the color of the slider. + +Bitboard Position::slider_blockers(Bitboard sliders, Square s, Bitboard& pinners) const { + + Bitboard blockers = 0; + pinners = 0; + + // Snipers are sliders that attack 's' when a piece and other snipers are removed + Bitboard snipers = ( (attacks_bb< ROOK>(s) & pieces(QUEEN, ROOK)) + | (attacks_bb(s) & pieces(QUEEN, BISHOP))) & sliders; + Bitboard occupancy = pieces() ^ snipers; + + while (snipers) + { + Square sniperSq = pop_lsb(snipers); + Bitboard b = between_bb(s, sniperSq) & occupancy; + + if (b && !more_than_one(b)) + { + blockers |= b; + if (b & pieces(color_of(piece_on(s)))) + pinners |= sniperSq; + } + } + return blockers; +} + + +/// Position::attackers_to() computes a bitboard of all pieces which attack a +/// given square. Slider attacks use the occupied bitboard to indicate occupancy. + +Bitboard Position::attackers_to(Square s, Bitboard occupied) const { + + return (pawn_attacks_bb(BLACK, s) & pieces(WHITE, PAWN)) + | (pawn_attacks_bb(WHITE, s) & pieces(BLACK, PAWN)) + | (attacks_bb(s) & pieces(KNIGHT)) + | (attacks_bb< ROOK>(s, occupied) & pieces( ROOK, QUEEN)) + | (attacks_bb(s, occupied) & pieces(BISHOP, QUEEN)) + | (attacks_bb(s) & pieces(KING)); +} + + +/// Position::legal() tests whether a pseudo-legal move is legal + +bool Position::legal(Move m) const { + + assert(is_ok(m)); + + Color us = sideToMove; + Square from = from_sq(m); + Square to = to_sq(m); + + assert(color_of(moved_piece(m)) == us); + assert(piece_on(square(us)) == make_piece(us, KING)); + + // En passant captures are a tricky special case. Because they are rather + // uncommon, we do it simply by testing whether the king is attacked after + // the move is made. + if (type_of(m) == EN_PASSANT) + { + Square ksq = square(us); + Square capsq = to - pawn_push(us); + Bitboard occupied = (pieces() ^ from ^ capsq) | to; + + assert(to == ep_square()); + assert(moved_piece(m) == make_piece(us, PAWN)); + assert(piece_on(capsq) == make_piece(~us, PAWN)); + assert(piece_on(to) == NO_PIECE); + + return !(attacks_bb< ROOK>(ksq, occupied) & pieces(~us, QUEEN, ROOK)) + && !(attacks_bb(ksq, occupied) & pieces(~us, QUEEN, BISHOP)); + } + + // Castling moves generation does not check if the castling path is clear of + // enemy attacks, it is delayed at a later time: now! + if (type_of(m) == CASTLING) + { + // After castling, the rook and king final positions are the same in + // Chess960 as they would be in standard chess. + to = relative_square(us, to > from ? SQ_G1 : SQ_C1); + Direction step = to > from ? WEST : EAST; + + for (Square s = to; s != from; s += step) + if (attackers_to(s) & pieces(~us)) + return false; + + // In case of Chess960, verify if the Rook blocks some checks + // For instance an enemy queen in SQ_A1 when castling rook is in SQ_B1. + return !chess960 || !(blockers_for_king(us) & to_sq(m)); + } + + // If the moving piece is a king, check whether the destination square is + // attacked by the opponent. + if (type_of(piece_on(from)) == KING) + return !(attackers_to(to, pieces() ^ from) & pieces(~us)); + + // A non-king move is legal if and only if it is not pinned or it + // is moving along the ray towards or away from the king. + return !(blockers_for_king(us) & from) + || aligned(from, to, square(us)); +} + + +/// Position::pseudo_legal() takes a random move and tests whether the move is +/// pseudo legal. It is used to validate moves from TT that can be corrupted +/// due to SMP concurrent access or hash position key aliasing. + +bool Position::pseudo_legal(const Move m) const { + + Color us = sideToMove; + Square from = from_sq(m); + Square to = to_sq(m); + Piece pc = moved_piece(m); + + // Use a slower but simpler function for uncommon cases + // yet we skip the legality check of MoveList(). + if (type_of(m) != NORMAL) + return checkers() ? MoveList< EVASIONS>(*this).contains(m) + : MoveList(*this).contains(m); + + // Is not a promotion, so promotion piece must be empty + if (promotion_type(m) - KNIGHT != NO_PIECE_TYPE) + return false; + + // If the 'from' square is not occupied by a piece belonging to the side to + // move, the move is obviously not legal. + if (pc == NO_PIECE || color_of(pc) != us) + return false; + + // The destination square cannot be occupied by a friendly piece + if (pieces(us) & to) + return false; + + // Handle the special case of a pawn move + if (type_of(pc) == PAWN) + { + // We have already handled promotion moves, so destination + // cannot be on the 8th/1st rank. + if ((Rank8BB | Rank1BB) & to) + return false; + + if ( !(pawn_attacks_bb(us, from) & pieces(~us) & to) // Not a capture + && !((from + pawn_push(us) == to) && empty(to)) // Not a single push + && !( (from + 2 * pawn_push(us) == to) // Not a double push + && (relative_rank(us, from) == RANK_2) + && empty(to) + && empty(to - pawn_push(us)))) + return false; + } + else if (!(attacks_bb(type_of(pc), from, pieces()) & to)) + return false; + + // Evasions generator already takes care to avoid some kind of illegal moves + // and legal() relies on this. We therefore have to take care that the same + // kind of moves are filtered out here. + if (checkers()) + { + if (type_of(pc) != KING) + { + // Double check? In this case a king move is required + if (more_than_one(checkers())) + return false; + + // Our move must be a blocking interposition or a capture of the checking piece + if (!(between_bb(square(us), lsb(checkers())) & to)) + return false; + } + // In case of king moves under check we have to remove king so as to catch + // invalid moves like b1a1 when opposite queen is on c1. + else if (attackers_to(to, pieces() ^ from) & pieces(~us)) + return false; + } + + return true; +} + + +/// Position::gives_check() tests whether a pseudo-legal move gives a check + +bool Position::gives_check(Move m) const { + + assert(is_ok(m)); + assert(color_of(moved_piece(m)) == sideToMove); + + Square from = from_sq(m); + Square to = to_sq(m); + + // Is there a direct check? + if (check_squares(type_of(piece_on(from))) & to) + return true; + + // Is there a discovered check? + if ( (blockers_for_king(~sideToMove) & from) + && !aligned(from, to, square(~sideToMove))) + return true; + + switch (type_of(m)) + { + case NORMAL: + return false; + + case PROMOTION: + return attacks_bb(promotion_type(m), to, pieces() ^ from) & square(~sideToMove); + + // En passant capture with check? We have already handled the case + // of direct checks and ordinary discovered check, so the only case we + // need to handle is the unusual case of a discovered check through + // the captured pawn. + case EN_PASSANT: + { + Square capsq = make_square(file_of(to), rank_of(from)); + Bitboard b = (pieces() ^ from ^ capsq) | to; + + return (attacks_bb< ROOK>(square(~sideToMove), b) & pieces(sideToMove, QUEEN, ROOK)) + | (attacks_bb(square(~sideToMove), b) & pieces(sideToMove, QUEEN, BISHOP)); + } + default: //CASTLING + { + // Castling is encoded as 'king captures the rook' + Square ksq = square(~sideToMove); + Square rto = relative_square(sideToMove, to > from ? SQ_F1 : SQ_D1); + + return (attacks_bb(rto) & ksq) + && (attacks_bb(rto, pieces() ^ from ^ to) & ksq); + } + } +} + + +/// Position::do_move() makes a move, and saves all information necessary +/// to a StateInfo object. The move is assumed to be legal. Pseudo-legal +/// moves should be filtered out before this function is called. + +void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { + + assert(is_ok(m)); + assert(&newSt != st); + + thisThread->nodes.fetch_add(1, std::memory_order_relaxed); + Key k = st->key ^ Zobrist::side; + + // Copy some fields of the old state to our new StateInfo object except the + // ones which are going to be recalculated from scratch anyway and then switch + // our state pointer to point to the new (ready to be updated) state. + std::memcpy(&newSt, st, offsetof(StateInfo, key)); + newSt.previous = st; + st = &newSt; + + // Increment ply counters. In particular, rule50 will be reset to zero later on + // in case of a capture or a pawn move. + ++gamePly; + ++st->rule50; + ++st->pliesFromNull; + + // Used by NNUE + st->accumulator.computed[WHITE] = false; + st->accumulator.computed[BLACK] = false; + auto& dp = st->dirtyPiece; + dp.dirty_num = 1; + + Color us = sideToMove; + Color them = ~us; + Square from = from_sq(m); + Square to = to_sq(m); + Piece pc = piece_on(from); + Piece captured = type_of(m) == EN_PASSANT ? make_piece(them, PAWN) : piece_on(to); + + assert(color_of(pc) == us); + assert(captured == NO_PIECE || color_of(captured) == (type_of(m) != CASTLING ? them : us)); + assert(type_of(captured) != KING); + + if (type_of(m) == CASTLING) + { + assert(pc == make_piece(us, KING)); + assert(captured == make_piece(us, ROOK)); + + Square rfrom, rto; + do_castling(us, from, to, rfrom, rto); + + k ^= Zobrist::psq[captured][rfrom] ^ Zobrist::psq[captured][rto]; + captured = NO_PIECE; + } + + if (captured) + { + Square capsq = to; + + // If the captured piece is a pawn, update pawn hash key, otherwise + // update non-pawn material. + if (type_of(captured) == PAWN) + { + if (type_of(m) == EN_PASSANT) + { + capsq -= pawn_push(us); + + assert(pc == make_piece(us, PAWN)); + assert(to == st->epSquare); + assert(relative_rank(us, to) == RANK_6); + assert(piece_on(to) == NO_PIECE); + assert(piece_on(capsq) == make_piece(them, PAWN)); + } + + st->pawnKey ^= Zobrist::psq[captured][capsq]; + } + else + st->nonPawnMaterial[them] -= PieceValue[MG][captured]; + + if (Eval::useNNUE) + { + dp.dirty_num = 2; // 1 piece moved, 1 piece captured + dp.piece[1] = captured; + dp.from[1] = capsq; + dp.to[1] = SQ_NONE; + } + + // Update board and piece lists + remove_piece(capsq); + + if (type_of(m) == EN_PASSANT) + board[capsq] = NO_PIECE; + + // Update material hash key and prefetch access to materialTable + k ^= Zobrist::psq[captured][capsq]; + st->materialKey ^= Zobrist::psq[captured][pieceCount[captured]]; + prefetch(thisThread->materialTable[st->materialKey]); + + // Reset rule 50 counter + st->rule50 = 0; + } + + // Update hash key + k ^= Zobrist::psq[pc][from] ^ Zobrist::psq[pc][to]; + + // Reset en passant square + if (st->epSquare != SQ_NONE) + { + k ^= Zobrist::enpassant[file_of(st->epSquare)]; + st->epSquare = SQ_NONE; + } + + // Update castling rights if needed + if (st->castlingRights && (castlingRightsMask[from] | castlingRightsMask[to])) + { + k ^= Zobrist::castling[st->castlingRights]; + st->castlingRights &= ~(castlingRightsMask[from] | castlingRightsMask[to]); + k ^= Zobrist::castling[st->castlingRights]; + } + + // Move the piece. The tricky Chess960 castling is handled earlier + if (type_of(m) != CASTLING) + { + if (Eval::useNNUE) + { + dp.piece[0] = pc; + dp.from[0] = from; + dp.to[0] = to; + } + + move_piece(from, to); + } + + // If the moving piece is a pawn do some special extra work + if (type_of(pc) == PAWN) + { + // Set en passant square if the moved pawn can be captured + if ( (int(to) ^ int(from)) == 16 + && (pawn_attacks_bb(us, to - pawn_push(us)) & pieces(them, PAWN))) + { + st->epSquare = to - pawn_push(us); + k ^= Zobrist::enpassant[file_of(st->epSquare)]; + } + + else if (type_of(m) == PROMOTION) + { + Piece promotion = make_piece(us, promotion_type(m)); + + assert(relative_rank(us, to) == RANK_8); + assert(type_of(promotion) >= KNIGHT && type_of(promotion) <= QUEEN); + + remove_piece(to); + put_piece(promotion, to); + + if (Eval::useNNUE) + { + // Promoting pawn to SQ_NONE, promoted piece from SQ_NONE + dp.to[0] = SQ_NONE; + dp.piece[dp.dirty_num] = promotion; + dp.from[dp.dirty_num] = SQ_NONE; + dp.to[dp.dirty_num] = to; + dp.dirty_num++; + } + + // Update hash keys + k ^= Zobrist::psq[pc][to] ^ Zobrist::psq[promotion][to]; + st->pawnKey ^= Zobrist::psq[pc][to]; + st->materialKey ^= Zobrist::psq[promotion][pieceCount[promotion]-1] + ^ Zobrist::psq[pc][pieceCount[pc]]; + + // Update material + st->nonPawnMaterial[us] += PieceValue[MG][promotion]; + } + + // Update pawn hash key + st->pawnKey ^= Zobrist::psq[pc][from] ^ Zobrist::psq[pc][to]; + + // Reset rule 50 draw counter + st->rule50 = 0; + } + + // Set capture piece + st->capturedPiece = captured; + + // Update the key with the final value + st->key = k; + + // Calculate checkers bitboard (if move gives check) + st->checkersBB = givesCheck ? attackers_to(square(them)) & pieces(us) : 0; + + sideToMove = ~sideToMove; + + // Update king attacks used for fast check detection + set_check_info(st); + + // Calculate the repetition info. It is the ply distance from the previous + // occurrence of the same position, negative in the 3-fold case, or zero + // if the position was not repeated. + st->repetition = 0; + int end = std::min(st->rule50, st->pliesFromNull); + if (end >= 4) + { + StateInfo* stp = st->previous->previous; + for (int i = 4; i <= end; i += 2) + { + stp = stp->previous->previous; + if (stp->key == st->key) + { + st->repetition = stp->repetition ? -i : i; + break; + } + } + } + + assert(pos_is_ok()); +} + + +/// Position::undo_move() unmakes a move. When it returns, the position should +/// be restored to exactly the same state as before the move was made. + +void Position::undo_move(Move m) { + + assert(is_ok(m)); + + sideToMove = ~sideToMove; + + Color us = sideToMove; + Square from = from_sq(m); + Square to = to_sq(m); + Piece pc = piece_on(to); + + assert(empty(from) || type_of(m) == CASTLING); + assert(type_of(st->capturedPiece) != KING); + + if (type_of(m) == PROMOTION) + { + assert(relative_rank(us, to) == RANK_8); + assert(type_of(pc) == promotion_type(m)); + assert(type_of(pc) >= KNIGHT && type_of(pc) <= QUEEN); + + remove_piece(to); + pc = make_piece(us, PAWN); + put_piece(pc, to); + } + + if (type_of(m) == CASTLING) + { + Square rfrom, rto; + do_castling(us, from, to, rfrom, rto); + } + else + { + move_piece(to, from); // Put the piece back at the source square + + if (st->capturedPiece) + { + Square capsq = to; + + if (type_of(m) == EN_PASSANT) + { + capsq -= pawn_push(us); + + assert(type_of(pc) == PAWN); + assert(to == st->previous->epSquare); + assert(relative_rank(us, to) == RANK_6); + assert(piece_on(capsq) == NO_PIECE); + assert(st->capturedPiece == make_piece(~us, PAWN)); + } + + put_piece(st->capturedPiece, capsq); // Restore the captured piece + } + } + + // Finally point our state pointer back to the previous state + st = st->previous; + --gamePly; + + assert(pos_is_ok()); +} + + +/// Position::do_castling() is a helper used to do/undo a castling move. This +/// is a bit tricky in Chess960 where from/to squares can overlap. +template +void Position::do_castling(Color us, Square from, Square& to, Square& rfrom, Square& rto) { + + bool kingSide = to > from; + rfrom = to; // Castling is encoded as "king captures friendly rook" + rto = relative_square(us, kingSide ? SQ_F1 : SQ_D1); + to = relative_square(us, kingSide ? SQ_G1 : SQ_C1); + + if (Do && Eval::useNNUE) + { + auto& dp = st->dirtyPiece; + dp.piece[0] = make_piece(us, KING); + dp.from[0] = from; + dp.to[0] = to; + dp.piece[1] = make_piece(us, ROOK); + dp.from[1] = rfrom; + dp.to[1] = rto; + dp.dirty_num = 2; + } + + // Remove both pieces first since squares could overlap in Chess960 + remove_piece(Do ? from : to); + remove_piece(Do ? rfrom : rto); + board[Do ? from : to] = board[Do ? rfrom : rto] = NO_PIECE; // Since remove_piece doesn't do this for us + put_piece(make_piece(us, KING), Do ? to : from); + put_piece(make_piece(us, ROOK), Do ? rto : rfrom); +} + + +/// Position::do_null_move() is used to do a "null move": it flips +/// the side to move without executing any move on the board. + +void Position::do_null_move(StateInfo& newSt) { + + assert(!checkers()); + assert(&newSt != st); + + std::memcpy(&newSt, st, offsetof(StateInfo, accumulator)); + + newSt.previous = st; + st = &newSt; + + st->dirtyPiece.dirty_num = 0; + st->dirtyPiece.piece[0] = NO_PIECE; // Avoid checks in UpdateAccumulator() + st->accumulator.computed[WHITE] = false; + st->accumulator.computed[BLACK] = false; + + if (st->epSquare != SQ_NONE) + { + st->key ^= Zobrist::enpassant[file_of(st->epSquare)]; + st->epSquare = SQ_NONE; + } + + st->key ^= Zobrist::side; + ++st->rule50; + prefetch(TT.first_entry(key())); + + st->pliesFromNull = 0; + + sideToMove = ~sideToMove; + + set_check_info(st); + + st->repetition = 0; + + assert(pos_is_ok()); +} + + +/// Position::undo_null_move() must be used to undo a "null move" + +void Position::undo_null_move() { + + assert(!checkers()); + + st = st->previous; + sideToMove = ~sideToMove; +} + + +/// Position::key_after() computes the new hash key after the given move. Needed +/// for speculative prefetch. It doesn't recognize special moves like castling, +/// en passant and promotions. + +Key Position::key_after(Move m) const { + + Square from = from_sq(m); + Square to = to_sq(m); + Piece pc = piece_on(from); + Piece captured = piece_on(to); + Key k = st->key ^ Zobrist::side; + + if (captured) + k ^= Zobrist::psq[captured][to]; + + k ^= Zobrist::psq[pc][to] ^ Zobrist::psq[pc][from]; + + return (captured || type_of(pc) == PAWN) + ? k : adjust_key50(k); +} + + +/// Position::see_ge (Static Exchange Evaluation Greater or Equal) tests if the +/// SEE value of move is greater or equal to the given threshold. We'll use an +/// algorithm similar to alpha-beta pruning with a null window. + +bool Position::see_ge(Move m, Value threshold) const { + + assert(is_ok(m)); + + // Only deal with normal moves, assume others pass a simple SEE + if (type_of(m) != NORMAL) + return VALUE_ZERO >= threshold; + + Square from = from_sq(m), to = to_sq(m); + + int swap = PieceValue[MG][piece_on(to)] - threshold; + if (swap < 0) + return false; + + swap = PieceValue[MG][piece_on(from)] - swap; + if (swap <= 0) + return true; + + assert(color_of(piece_on(from)) == sideToMove); + Bitboard occupied = pieces() ^ from ^ to; + Color stm = sideToMove; + Bitboard attackers = attackers_to(to, occupied); + Bitboard stmAttackers, bb; + int res = 1; + + while (true) + { + stm = ~stm; + attackers &= occupied; + + // If stm has no more attackers then give up: stm loses + if (!(stmAttackers = attackers & pieces(stm))) + break; + + // Don't allow pinned pieces to attack as long as there are + // pinners on their original square. + if (pinners(~stm) & occupied) + { + stmAttackers &= ~blockers_for_king(stm); + + if (!stmAttackers) + break; + } + + res ^= 1; + + // Locate and remove the next least valuable attacker, and add to + // the bitboard 'attackers' any X-ray attackers behind it. + if ((bb = stmAttackers & pieces(PAWN))) + { + if ((swap = PawnValueMg - swap) < res) + break; + + occupied ^= least_significant_square_bb(bb); + attackers |= attacks_bb(to, occupied) & pieces(BISHOP, QUEEN); + } + + else if ((bb = stmAttackers & pieces(KNIGHT))) + { + if ((swap = KnightValueMg - swap) < res) + break; + + occupied ^= least_significant_square_bb(bb); + } + + else if ((bb = stmAttackers & pieces(BISHOP))) + { + if ((swap = BishopValueMg - swap) < res) + break; + + occupied ^= least_significant_square_bb(bb); + attackers |= attacks_bb(to, occupied) & pieces(BISHOP, QUEEN); + } + + else if ((bb = stmAttackers & pieces(ROOK))) + { + if ((swap = RookValueMg - swap) < res) + break; + + occupied ^= least_significant_square_bb(bb); + attackers |= attacks_bb(to, occupied) & pieces(ROOK, QUEEN); + } + + else if ((bb = stmAttackers & pieces(QUEEN))) + { + if ((swap = QueenValueMg - swap) < res) + break; + + occupied ^= least_significant_square_bb(bb); + attackers |= (attacks_bb(to, occupied) & pieces(BISHOP, QUEEN)) + | (attacks_bb(to, occupied) & pieces(ROOK , QUEEN)); + } + + else // KING + // If we "capture" with the king but opponent still has attackers, + // reverse the result. + return (attackers & ~pieces(stm)) ? res ^ 1 : res; + } + + return bool(res); +} + + +/// Position::is_draw() tests whether the position is drawn by 50-move rule +/// or by repetition. It does not detect stalemates. + +bool Position::is_draw(int ply) const { + + if (st->rule50 > 99 && (!checkers() || MoveList(*this).size())) + return true; + + // Return a draw score if a position repeats once earlier but strictly + // after the root, or repeats twice before or at the root. + return st->repetition && st->repetition < ply; +} + + +// Position::has_repeated() tests whether there has been at least one repetition +// of positions since the last capture or pawn move. + +bool Position::has_repeated() const { + + StateInfo* stc = st; + int end = std::min(st->rule50, st->pliesFromNull); + while (end-- >= 4) + { + if (stc->repetition) + return true; + + stc = stc->previous; + } + return false; +} + + +/// Position::has_game_cycle() tests if the position has a move which draws by repetition, +/// or an earlier position has a move that directly reaches the current position. + +bool Position::has_game_cycle(int ply) const { + + int j; + + int end = std::min(st->rule50, st->pliesFromNull); + + if (end < 3) + return false; + + Key originalKey = st->key; + StateInfo* stp = st->previous; + + for (int i = 3; i <= end; i += 2) + { + stp = stp->previous->previous; + + Key moveKey = originalKey ^ stp->key; + if ( (j = H1(moveKey), cuckoo[j] == moveKey) + || (j = H2(moveKey), cuckoo[j] == moveKey)) + { + Move move = cuckooMove[j]; + Square s1 = from_sq(move); + Square s2 = to_sq(move); + + if (!((between_bb(s1, s2) ^ s2) & pieces())) + { + if (ply > i) + return true; + + // For nodes before or at the root, check that the move is a + // repetition rather than a move to the current position. + // In the cuckoo table, both moves Rc1c5 and Rc5c1 are stored in + // the same location, so we have to select which square to check. + if (color_of(piece_on(empty(s1) ? s2 : s1)) != side_to_move()) + continue; + + // For repetitions before or at the root, require one more + if (stp->repetition) + return true; + } + } + } + return false; +} + + +/// Position::flip() flips position with the white and black sides reversed. This +/// is only useful for debugging e.g. for finding evaluation symmetry bugs. + +void Position::flip() { + + string f, token; + std::stringstream ss(fen()); + + for (Rank r = RANK_8; r >= RANK_1; --r) // Piece placement + { + std::getline(ss, token, r > RANK_1 ? '/' : ' '); + f.insert(0, token + (f.empty() ? " " : "/")); + } + + ss >> token; // Active color + f += (token == "w" ? "B " : "W "); // Will be lowercased later + + ss >> token; // Castling availability + f += token + " "; + + std::transform(f.begin(), f.end(), f.begin(), + [](char c) { return char(islower(c) ? toupper(c) : tolower(c)); }); + + ss >> token; // En passant square + f += (token == "-" ? token : token.replace(1, 1, token[1] == '3' ? "6" : "3")); + + std::getline(ss, token); // Half and full moves + f += token; + + set(f, is_chess960(), st, this_thread()); + + assert(pos_is_ok()); +} + + +/// Position::pos_is_ok() performs some consistency checks for the +/// position object and raises an asserts if something wrong is detected. +/// This is meant to be helpful when debugging. + +bool Position::pos_is_ok() const { + + constexpr bool Fast = true; // Quick (default) or full check? + + if ( (sideToMove != WHITE && sideToMove != BLACK) + || piece_on(square(WHITE)) != W_KING + || piece_on(square(BLACK)) != B_KING + || ( ep_square() != SQ_NONE + && relative_rank(sideToMove, ep_square()) != RANK_6)) + assert(0 && "pos_is_ok: Default"); + + if (Fast) + return true; + + if ( pieceCount[W_KING] != 1 + || pieceCount[B_KING] != 1 + || attackers_to(square(~sideToMove)) & pieces(sideToMove)) + assert(0 && "pos_is_ok: Kings"); + + if ( (pieces(PAWN) & (Rank1BB | Rank8BB)) + || pieceCount[W_PAWN] > 8 + || pieceCount[B_PAWN] > 8) + assert(0 && "pos_is_ok: Pawns"); + + if ( (pieces(WHITE) & pieces(BLACK)) + || (pieces(WHITE) | pieces(BLACK)) != pieces() + || popcount(pieces(WHITE)) > 16 + || popcount(pieces(BLACK)) > 16) + assert(0 && "pos_is_ok: Bitboards"); + + for (PieceType p1 = PAWN; p1 <= KING; ++p1) + for (PieceType p2 = PAWN; p2 <= KING; ++p2) + if (p1 != p2 && (pieces(p1) & pieces(p2))) + assert(0 && "pos_is_ok: Bitboards"); + + StateInfo si = *st; + ASSERT_ALIGNED(&si, Eval::NNUE::CacheLineSize); + + set_state(&si); + if (std::memcmp(&si, st, sizeof(StateInfo))) + assert(0 && "pos_is_ok: State"); + + for (Piece pc : Pieces) + if ( pieceCount[pc] != popcount(pieces(color_of(pc), type_of(pc))) + || pieceCount[pc] != std::count(board, board + SQUARE_NB, pc)) + assert(0 && "pos_is_ok: Pieces"); + + for (Color c : { WHITE, BLACK }) + for (CastlingRights cr : {c & KING_SIDE, c & QUEEN_SIDE}) + { + if (!can_castle(cr)) + continue; + + if ( piece_on(castlingRookSquare[cr]) != make_piece(c, ROOK) + || castlingRightsMask[castlingRookSquare[cr]] != cr + || (castlingRightsMask[square(c)] & cr) != cr) + assert(0 && "pos_is_ok: Castling"); + } + + return true; +} + +} // namespace Stockfish diff --git a/src/position.h b/src/position.h new file mode 100644 index 0000000000000000000000000000000000000000..078ff5b79f23e9e2d85b943734f2c4713bbbb3bd --- /dev/null +++ b/src/position.h @@ -0,0 +1,443 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef POSITION_H_INCLUDED +#define POSITION_H_INCLUDED + +#include +#include +#include // For std::unique_ptr +#include + +#include "bitboard.h" +#include "evaluate.h" +#include "psqt.h" +#include "types.h" + +#include "nnue/nnue_accumulator.h" + +namespace Stockfish { + +/// StateInfo struct stores information needed to restore a Position object to +/// its previous state when we retract a move. Whenever a move is made on the +/// board (by calling Position::do_move), a StateInfo object must be passed. + +struct StateInfo { + + // Copied when making a move + Key pawnKey; + Key materialKey; + Value nonPawnMaterial[COLOR_NB]; + int castlingRights; + int rule50; + int pliesFromNull; + Square epSquare; + + // Not copied when making a move (will be recomputed anyhow) + Key key; + Bitboard checkersBB; + StateInfo* previous; + Bitboard blockersForKing[COLOR_NB]; + Bitboard pinners[COLOR_NB]; + Bitboard checkSquares[PIECE_TYPE_NB]; + Piece capturedPiece; + int repetition; + + // Used by NNUE + Eval::NNUE::Accumulator accumulator; + DirtyPiece dirtyPiece; +}; + + +/// A list to keep track of the position states along the setup moves (from the +/// start position to the position just before the search starts). Needed by +/// 'draw by repetition' detection. Use a std::deque because pointers to +/// elements are not invalidated upon list resizing. +typedef std::unique_ptr> StateListPtr; + + +/// Position class stores information regarding the board representation as +/// pieces, side to move, hash keys, castling info, etc. Important methods are +/// do_move() and undo_move(), used by the search to update node info when +/// traversing the search tree. +class Thread; + +class Position { +public: + static void init(); + + Position() = default; + Position(const Position&) = delete; + Position& operator=(const Position&) = delete; + + // FEN string input/output + Position& set(const std::string& fenStr, bool isChess960, StateInfo* si, Thread* th); + Position& set(const std::string& code, Color c, StateInfo* si); + std::string fen() const; + + // Position representation + Bitboard pieces(PieceType pt) const; + Bitboard pieces(PieceType pt1, PieceType pt2) const; + Bitboard pieces(Color c) const; + Bitboard pieces(Color c, PieceType pt) const; + Bitboard pieces(Color c, PieceType pt1, PieceType pt2) const; + Piece piece_on(Square s) const; + Square ep_square() const; + bool empty(Square s) const; + template int count(Color c) const; + template int count() const; + template Square square(Color c) const; + bool is_on_semiopen_file(Color c, Square s) const; + + // Castling + CastlingRights castling_rights(Color c) const; + bool can_castle(CastlingRights cr) const; + bool castling_impeded(CastlingRights cr) const; + Square castling_rook_square(CastlingRights cr) const; + + // Checking + Bitboard checkers() const; + Bitboard blockers_for_king(Color c) const; + Bitboard check_squares(PieceType pt) const; + Bitboard pinners(Color c) const; + + // Attacks to/from a given square + Bitboard attackers_to(Square s) const; + Bitboard attackers_to(Square s, Bitboard occupied) const; + Bitboard slider_blockers(Bitboard sliders, Square s, Bitboard& pinners) const; + template Bitboard attacks_by(Color c) const; + + // Properties of moves + bool legal(Move m) const; + bool pseudo_legal(const Move m) const; + bool capture(Move m) const; + bool gives_check(Move m) const; + Piece moved_piece(Move m) const; + Piece captured_piece() const; + + // Piece specific + bool pawn_passed(Color c, Square s) const; + bool opposite_bishops() const; + int pawns_on_same_color_squares(Color c, Square s) const; + + // Doing and undoing moves + void do_move(Move m, StateInfo& newSt); + void do_move(Move m, StateInfo& newSt, bool givesCheck); + void undo_move(Move m); + void do_null_move(StateInfo& newSt); + void undo_null_move(); + + // Static Exchange Evaluation + bool see_ge(Move m, Value threshold = VALUE_ZERO) const; + + // Accessing hash keys + Key key() const; + Key key_after(Move m) const; + Key material_key() const; + Key pawn_key() const; + + // Other properties of the position + Color side_to_move() const; + int game_ply() const; + bool is_chess960() const; + Thread* this_thread() const; + bool is_draw(int ply) const; + bool has_game_cycle(int ply) const; + bool has_repeated() const; + int rule50_count() const; + Score psq_score() const; + Value psq_eg_stm() const; + Value non_pawn_material(Color c) const; + Value non_pawn_material() const; + + // Position consistency check, for debugging + bool pos_is_ok() const; + void flip(); + + // Used by NNUE + StateInfo* state() const; + + void put_piece(Piece pc, Square s); + void remove_piece(Square s); + +private: + // Initialization helpers (used while setting up a position) + void set_castling_right(Color c, Square rfrom); + void set_state(StateInfo* si) const; + void set_check_info(StateInfo* si) const; + + // Other helpers + void move_piece(Square from, Square to); + template + void do_castling(Color us, Square from, Square& to, Square& rfrom, Square& rto); + template + Key adjust_key50(Key k) const; + + // Data members + Piece board[SQUARE_NB]; + Bitboard byTypeBB[PIECE_TYPE_NB]; + Bitboard byColorBB[COLOR_NB]; + int pieceCount[PIECE_NB]; + int castlingRightsMask[SQUARE_NB]; + Square castlingRookSquare[CASTLING_RIGHT_NB]; + Bitboard castlingPath[CASTLING_RIGHT_NB]; + Thread* thisThread; + StateInfo* st; + int gamePly; + Color sideToMove; + Score psq; + bool chess960; +}; + +extern std::ostream& operator<<(std::ostream& os, const Position& pos); + +inline Color Position::side_to_move() const { + return sideToMove; +} + +inline Piece Position::piece_on(Square s) const { + assert(is_ok(s)); + return board[s]; +} + +inline bool Position::empty(Square s) const { + return piece_on(s) == NO_PIECE; +} + +inline Piece Position::moved_piece(Move m) const { + return piece_on(from_sq(m)); +} + +inline Bitboard Position::pieces(PieceType pt = ALL_PIECES) const { + return byTypeBB[pt]; +} + +inline Bitboard Position::pieces(PieceType pt1, PieceType pt2) const { + return pieces(pt1) | pieces(pt2); +} + +inline Bitboard Position::pieces(Color c) const { + return byColorBB[c]; +} + +inline Bitboard Position::pieces(Color c, PieceType pt) const { + return pieces(c) & pieces(pt); +} + +inline Bitboard Position::pieces(Color c, PieceType pt1, PieceType pt2) const { + return pieces(c) & (pieces(pt1) | pieces(pt2)); +} + +template inline int Position::count(Color c) const { + return pieceCount[make_piece(c, Pt)]; +} + +template inline int Position::count() const { + return count(WHITE) + count(BLACK); +} + +template inline Square Position::square(Color c) const { + assert(count(c) == 1); + return lsb(pieces(c, Pt)); +} + +inline Square Position::ep_square() const { + return st->epSquare; +} + +inline bool Position::is_on_semiopen_file(Color c, Square s) const { + return !(pieces(c, PAWN) & file_bb(s)); +} + +inline bool Position::can_castle(CastlingRights cr) const { + return st->castlingRights & cr; +} + +inline CastlingRights Position::castling_rights(Color c) const { + return c & CastlingRights(st->castlingRights); +} + +inline bool Position::castling_impeded(CastlingRights cr) const { + assert(cr == WHITE_OO || cr == WHITE_OOO || cr == BLACK_OO || cr == BLACK_OOO); + + return pieces() & castlingPath[cr]; +} + +inline Square Position::castling_rook_square(CastlingRights cr) const { + assert(cr == WHITE_OO || cr == WHITE_OOO || cr == BLACK_OO || cr == BLACK_OOO); + + return castlingRookSquare[cr]; +} + +inline Bitboard Position::attackers_to(Square s) const { + return attackers_to(s, pieces()); +} + +template +inline Bitboard Position::attacks_by(Color c) const { + + if constexpr (Pt == PAWN) + return c == WHITE ? pawn_attacks_bb(pieces(WHITE, PAWN)) + : pawn_attacks_bb(pieces(BLACK, PAWN)); + else + { + Bitboard threats = 0; + Bitboard attackers = pieces(c, Pt); + while (attackers) + threats |= attacks_bb(pop_lsb(attackers), pieces()); + return threats; + } +} + +inline Bitboard Position::checkers() const { + return st->checkersBB; +} + +inline Bitboard Position::blockers_for_king(Color c) const { + return st->blockersForKing[c]; +} + +inline Bitboard Position::pinners(Color c) const { + return st->pinners[c]; +} + +inline Bitboard Position::check_squares(PieceType pt) const { + return st->checkSquares[pt]; +} + +inline bool Position::pawn_passed(Color c, Square s) const { + return !(pieces(~c, PAWN) & passed_pawn_span(c, s)); +} + +inline int Position::pawns_on_same_color_squares(Color c, Square s) const { + return popcount(pieces(c, PAWN) & ((DarkSquares & s) ? DarkSquares : ~DarkSquares)); +} + +inline Key Position::key() const { + return adjust_key50(st->key); +} + +template +inline Key Position::adjust_key50(Key k) const +{ + return st->rule50 < 14 - AfterMove + ? k : k ^ make_key((st->rule50 - (14 - AfterMove)) / 8); +} + +inline Key Position::pawn_key() const { + return st->pawnKey; +} + +inline Key Position::material_key() const { + return st->materialKey; +} + +inline Score Position::psq_score() const { + return psq; +} + +inline Value Position::psq_eg_stm() const { + return (sideToMove == WHITE ? 1 : -1) * eg_value(psq); +} + +inline Value Position::non_pawn_material(Color c) const { + return st->nonPawnMaterial[c]; +} + +inline Value Position::non_pawn_material() const { + return non_pawn_material(WHITE) + non_pawn_material(BLACK); +} + +inline int Position::game_ply() const { + return gamePly; +} + +inline int Position::rule50_count() const { + return st->rule50; +} + +inline bool Position::opposite_bishops() const { + return count(WHITE) == 1 + && count(BLACK) == 1 + && opposite_colors(square(WHITE), square(BLACK)); +} + +inline bool Position::is_chess960() const { + return chess960; +} + +inline bool Position::capture(Move m) const { + assert(is_ok(m)); + // Castling is encoded as "king captures rook" + return (!empty(to_sq(m)) && type_of(m) != CASTLING) || type_of(m) == EN_PASSANT; +} + +inline Piece Position::captured_piece() const { + return st->capturedPiece; +} + +inline Thread* Position::this_thread() const { + return thisThread; +} + +inline void Position::put_piece(Piece pc, Square s) { + + board[s] = pc; + byTypeBB[ALL_PIECES] |= byTypeBB[type_of(pc)] |= s; + byColorBB[color_of(pc)] |= s; + pieceCount[pc]++; + pieceCount[make_piece(color_of(pc), ALL_PIECES)]++; + psq += PSQT::psq[pc][s]; +} + +inline void Position::remove_piece(Square s) { + + Piece pc = board[s]; + byTypeBB[ALL_PIECES] ^= s; + byTypeBB[type_of(pc)] ^= s; + byColorBB[color_of(pc)] ^= s; + board[s] = NO_PIECE; + pieceCount[pc]--; + pieceCount[make_piece(color_of(pc), ALL_PIECES)]--; + psq -= PSQT::psq[pc][s]; +} + +inline void Position::move_piece(Square from, Square to) { + + Piece pc = board[from]; + Bitboard fromTo = from | to; + byTypeBB[ALL_PIECES] ^= fromTo; + byTypeBB[type_of(pc)] ^= fromTo; + byColorBB[color_of(pc)] ^= fromTo; + board[from] = NO_PIECE; + board[to] = pc; + psq += PSQT::psq[pc][to] - PSQT::psq[pc][from]; +} + +inline void Position::do_move(Move m, StateInfo& newSt) { + do_move(m, newSt, gives_check(m)); +} + +inline StateInfo* Position::state() const { + + return st; +} + +} // namespace Stockfish + +#endif // #ifndef POSITION_H_INCLUDED diff --git a/src/psqt.cpp b/src/psqt.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ca5664c259ff5191589ec369974de69efe44ca44 --- /dev/null +++ b/src/psqt.cpp @@ -0,0 +1,131 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + + +#include "psqt.h" + +#include + +#include "bitboard.h" +#include "types.h" + +namespace Stockfish { + +namespace +{ + +auto constexpr S = make_score; + +// 'Bonus' contains Piece-Square parameters. +// Scores are explicit for files A to D, implicitly mirrored for E to H. +constexpr Score Bonus[][RANK_NB][int(FILE_NB) / 2] = { + { }, + { }, + { // Knight + { S(-175, -96), S(-92,-65), S(-74,-49), S(-73,-21) }, + { S( -77, -67), S(-41,-54), S(-27,-18), S(-15, 8) }, + { S( -61, -40), S(-17,-27), S( 6, -8), S( 12, 29) }, + { S( -35, -35), S( 8, -2), S( 40, 13), S( 49, 28) }, + { S( -34, -45), S( 13,-16), S( 44, 9), S( 51, 39) }, + { S( -9, -51), S( 22,-44), S( 58,-16), S( 53, 17) }, + { S( -67, -69), S(-27,-50), S( 4,-51), S( 37, 12) }, + { S(-201,-100), S(-83,-88), S(-56,-56), S(-26,-17) } + }, + { // Bishop + { S(-37,-40), S(-4 ,-21), S( -6,-26), S(-16, -8) }, + { S(-11,-26), S( 6, -9), S( 13,-12), S( 3, 1) }, + { S(-5 ,-11), S( 15, -1), S( -4, -1), S( 12, 7) }, + { S(-4 ,-14), S( 8, -4), S( 18, 0), S( 27, 12) }, + { S(-8 ,-12), S( 20, -1), S( 15,-10), S( 22, 11) }, + { S(-11,-21), S( 4, 4), S( 1, 3), S( 8, 4) }, + { S(-12,-22), S(-10,-14), S( 4, -1), S( 0, 1) }, + { S(-34,-32), S( 1,-29), S(-10,-26), S(-16,-17) } + }, + { // Rook + { S(-31, -9), S(-20,-13), S(-14,-10), S(-5, -9) }, + { S(-21,-12), S(-13, -9), S( -8, -1), S( 6, -2) }, + { S(-25, 6), S(-11, -8), S( -1, -2), S( 3, -6) }, + { S(-13, -6), S( -5, 1), S( -4, -9), S(-6, 7) }, + { S(-27, -5), S(-15, 8), S( -4, 7), S( 3, -6) }, + { S(-22, 6), S( -2, 1), S( 6, -7), S(12, 10) }, + { S( -2, 4), S( 12, 5), S( 16, 20), S(18, -5) }, + { S(-17, 18), S(-19, 0), S( -1, 19), S( 9, 13) } + }, + { // Queen + { S( 3,-69), S(-5,-57), S(-5,-47), S( 4,-26) }, + { S(-3,-54), S( 5,-31), S( 8,-22), S(12, -4) }, + { S(-3,-39), S( 6,-18), S(13, -9), S( 7, 3) }, + { S( 4,-23), S( 5, -3), S( 9, 13), S( 8, 24) }, + { S( 0,-29), S(14, -6), S(12, 9), S( 5, 21) }, + { S(-4,-38), S(10,-18), S( 6,-11), S( 8, 1) }, + { S(-5,-50), S( 6,-27), S(10,-24), S( 8, -8) }, + { S(-2,-74), S(-2,-52), S( 1,-43), S(-2,-34) } + }, + { // King + { S(271, 1), S(327, 45), S(271, 85), S(198, 76) }, + { S(278, 53), S(303,100), S(234,133), S(179,135) }, + { S(195, 88), S(258,130), S(169,169), S(120,175) }, + { S(164,103), S(190,156), S(138,172), S( 98,172) }, + { S(154, 96), S(179,166), S(105,199), S( 70,199) }, + { S(123, 92), S(145,172), S( 81,184), S( 31,191) }, + { S( 88, 47), S(120,121), S( 65,116), S( 33,131) }, + { S( 59, 11), S( 89, 59), S( 45, 73), S( -1, 78) } + } +}; + +constexpr Score PBonus[RANK_NB][FILE_NB] = + { // Pawn (asymmetric distribution) + { }, + { S( 2, -8), S( 4, -6), S( 11, 9), S( 18, 5), S( 16, 16), S( 21, 6), S( 9, -6), S( -3,-18) }, + { S( -9, -9), S(-15, -7), S( 11,-10), S( 15, 5), S( 31, 2), S( 23, 3), S( 6, -8), S(-20, -5) }, + { S( -3, 7), S(-20, 1), S( 8, -8), S( 19, -2), S( 39,-14), S( 17,-13), S( 2,-11), S( -5, -6) }, + { S( 11, 12), S( -4, 6), S(-11, 2), S( 2, -6), S( 11, -5), S( 0, -4), S(-12, 14), S( 5, 9) }, + { S( 3, 27), S(-11, 18), S( -6, 19), S( 22, 29), S( -8, 30), S( -5, 9), S(-14, 8), S(-11, 14) }, + { S( -7, -1), S( 6,-14), S( -2, 13), S(-11, 22), S( 4, 24), S(-14, 17), S( 10, 7), S( -9, 7) } + }; + +} // namespace + + +namespace PSQT +{ + +Score psq[PIECE_NB][SQUARE_NB]; + +// PSQT::init() initializes piece-square tables: the white halves of the tables are +// copied from Bonus[] and PBonus[], adding the piece value, then the black halves of +// the tables are initialized by flipping and changing the sign of the white scores. +void init() { + + for (Piece pc : {W_PAWN, W_KNIGHT, W_BISHOP, W_ROOK, W_QUEEN, W_KING}) + { + Score score = make_score(PieceValue[MG][pc], PieceValue[EG][pc]); + + for (Square s = SQ_A1; s <= SQ_H8; ++s) + { + File f = File(edge_distance(file_of(s))); + psq[ pc][s] = score + (type_of(pc) == PAWN ? PBonus[rank_of(s)][file_of(s)] + : Bonus[pc][rank_of(s)][f]); + psq[~pc][flip_rank(s)] = -psq[pc][s]; + } + } +} + +} // namespace PSQT + +} // namespace Stockfish diff --git a/src/psqt.h b/src/psqt.h new file mode 100644 index 0000000000000000000000000000000000000000..4ee0e379b145b5262ff53b413904813b229ecad2 --- /dev/null +++ b/src/psqt.h @@ -0,0 +1,38 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + + +#ifndef PSQT_H_INCLUDED +#define PSQT_H_INCLUDED + + +#include "types.h" + + +namespace Stockfish::PSQT +{ + +extern Score psq[PIECE_NB][SQUARE_NB]; + +// Fill psqt array from a set of internally linked parameters +extern void init(); + +} // namespace Stockfish::PSQT + + +#endif // PSQT_H_INCLUDED diff --git a/src/search.cpp b/src/search.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c8163d1fa214d702ea744a98df8373140f1bd390 --- /dev/null +++ b/src/search.cpp @@ -0,0 +1,1959 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include +#include +#include // For std::memset +#include +#include + +#include "evaluate.h" +#include "misc.h" +#include "movegen.h" +#include "movepick.h" +#include "position.h" +#include "search.h" +#include "thread.h" +#include "timeman.h" +#include "tt.h" +#include "uci.h" +#include "syzygy/tbprobe.h" + +namespace Stockfish { + +namespace Search { + + LimitsType Limits; +} + +namespace Tablebases { + + int Cardinality; + bool RootInTB; + bool UseRule50; + Depth ProbeDepth; +} + +namespace TB = Tablebases; + +using std::string; +using Eval::evaluate; +using namespace Search; + +namespace { + + // Different node types, used as a template parameter + enum NodeType { NonPV, PV, Root }; + + // Futility margin + Value futility_margin(Depth d, bool improving) { + return Value(165 * (d - improving)); + } + + // Reductions lookup table, initialized at startup + int Reductions[MAX_MOVES]; // [depth or moveNumber] + + Depth reduction(bool i, Depth d, int mn, Value delta, Value rootDelta) { + int r = Reductions[d] * Reductions[mn]; + return (r + 1642 - int(delta) * 1024 / int(rootDelta)) / 1024 + (!i && r > 916); + } + + constexpr int futility_move_count(bool improving, Depth depth) { + return improving ? (3 + depth * depth) + : (3 + depth * depth) / 2; + } + + // History and stats update bonus, based on depth + int stat_bonus(Depth d) { + return std::min((12 * d + 282) * d - 349 , 1594); + } + + // Add a small random component to draw evaluations to avoid 3-fold blindness + Value value_draw(const Thread* thisThread) { + return VALUE_DRAW - 1 + Value(thisThread->nodes & 0x2); + } + + // Skill structure is used to implement strength limit. If we have an uci_elo then + // we convert it to a suitable fractional skill level using anchoring to CCRL Elo + // (goldfish 1.13 = 2000) and a fit through Ordo derived Elo for match (TC 60+0.6) + // results spanning a wide range of k values. + struct Skill { + Skill(int skill_level, int uci_elo) { + if (uci_elo) + level = std::clamp(std::pow((uci_elo - 1346.6) / 143.4, 1 / 0.806), 0.0, 20.0); + else + level = double(skill_level); + } + bool enabled() const { return level < 20.0; } + bool time_to_pick(Depth depth) const { return depth == 1 + int(level); } + Move pick_best(size_t multiPV); + + double level; + Move best = MOVE_NONE; + }; + + template + Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode); + + template + Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth = 0); + + Value value_to_tt(Value v, int ply); + Value value_from_tt(Value v, int ply, int r50c); + void update_pv(Move* pv, Move move, const Move* childPv); + void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus); + void update_quiet_stats(const Position& pos, Stack* ss, Move move, int bonus); + void update_all_stats(const Position& pos, Stack* ss, Move bestMove, Value bestValue, Value beta, Square prevSq, + Move* quietsSearched, int quietCount, Move* capturesSearched, int captureCount, Depth depth); + + // perft() is our utility to verify move generation. All the leaf nodes up + // to the given depth are generated and counted, and the sum is returned. + template + uint64_t perft(Position& pos, Depth depth) { + + StateInfo st; + ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize); + + uint64_t cnt, nodes = 0; + const bool leaf = (depth == 2); + + for (const auto& m : MoveList(pos)) + { + if (Root && depth <= 1) + cnt = 1, nodes++; + else + { + pos.do_move(m, st); + cnt = leaf ? MoveList(pos).size() : perft(pos, depth - 1); + nodes += cnt; + pos.undo_move(m); + } + if (Root) + sync_cout << UCI::move(m, pos.is_chess960()) << ": " << cnt << sync_endl; + } + return nodes; + } + +} // namespace + + +/// Search::init() is called at startup to initialize various lookup tables + +void Search::init() { + + for (int i = 1; i < MAX_MOVES; ++i) + Reductions[i] = int((20.26 + std::log(Threads.size()) / 2) * std::log(i)); +} + + +/// Search::clear() resets search state to its initial value + +void Search::clear() { + + Threads.main()->wait_for_search_finished(); + + Time.availableNodes = 0; + TT.clear(); + Threads.clear(); + Tablebases::init(Options["SyzygyPath"]); // Free mapped files +} + + +/// MainThread::search() is started when the program receives the UCI 'go' +/// command. It searches from the root position and outputs the "bestmove". + +void MainThread::search() { + + if (Limits.perft) + { + nodes = perft(rootPos, Limits.perft); + sync_cout << "\nNodes searched: " << nodes << "\n" << sync_endl; + return; + } + + Color us = rootPos.side_to_move(); + Time.init(Limits, us, rootPos.game_ply()); + TT.new_search(); + + Eval::NNUE::verify(); + + if (rootMoves.empty()) + { + rootMoves.emplace_back(MOVE_NONE); + sync_cout << "info depth 0 score " + << UCI::value(rootPos.checkers() ? -VALUE_MATE : VALUE_DRAW) + << sync_endl; + } + else + { + Threads.start_searching(); // start non-main threads + Thread::search(); // main thread start searching + } + + // When we reach the maximum depth, we can arrive here without a raise of + // Threads.stop. However, if we are pondering or in an infinite search, + // the UCI protocol states that we shouldn't print the best move before the + // GUI sends a "stop" or "ponderhit" command. We therefore simply wait here + // until the GUI sends one of those commands. + + while (!Threads.stop && (ponder || Limits.infinite)) + {} // Busy wait for a stop or a ponder reset + + // Stop the threads if not already stopped (also raise the stop if + // "ponderhit" just reset Threads.ponder). + Threads.stop = true; + + // Wait until all threads have finished + Threads.wait_for_search_finished(); + + // When playing in 'nodes as time' mode, subtract the searched nodes from + // the available ones before exiting. + if (Limits.npmsec) + Time.availableNodes += Limits.inc[us] - Threads.nodes_searched(); + + Thread* bestThread = this; + Skill skill = Skill(Options["Skill Level"], Options["UCI_LimitStrength"] ? int(Options["UCI_Elo"]) : 0); + + if ( int(Options["MultiPV"]) == 1 + && !Limits.depth + && !skill.enabled() + && rootMoves[0].pv[0] != MOVE_NONE) + bestThread = Threads.get_best_thread(); + + bestPreviousScore = bestThread->rootMoves[0].score; + bestPreviousAverageScore = bestThread->rootMoves[0].averageScore; + + for (Thread* th : Threads) + th->previousDepth = bestThread->completedDepth; + + // Send again PV info if we have a new best thread + if (bestThread != this) + sync_cout << UCI::pv(bestThread->rootPos, bestThread->completedDepth) << sync_endl; + + sync_cout << "bestmove " << UCI::move(bestThread->rootMoves[0].pv[0], rootPos.is_chess960()); + + if (bestThread->rootMoves[0].pv.size() > 1 || bestThread->rootMoves[0].extract_ponder_from_tt(rootPos)) + std::cout << " ponder " << UCI::move(bestThread->rootMoves[0].pv[1], rootPos.is_chess960()); + + std::cout << sync_endl; +} + + +/// Thread::search() is the main iterative deepening loop. It calls search() +/// repeatedly with increasing depth until the allocated thinking time has been +/// consumed, the user stops the search, or the maximum search depth is reached. + +void Thread::search() { + + // To allow access to (ss-7) up to (ss+2), the stack must be oversized. + // The former is needed to allow update_continuation_histories(ss-1, ...), + // which accesses its argument at ss-6, also near the root. + // The latter is needed for statScore and killer initialization. + Stack stack[MAX_PLY+10], *ss = stack+7; + Move pv[MAX_PLY+1]; + Value alpha, beta, delta; + Move lastBestMove = MOVE_NONE; + Depth lastBestMoveDepth = 0; + MainThread* mainThread = (this == Threads.main() ? Threads.main() : nullptr); + double timeReduction = 1, totBestMoveChanges = 0; + Color us = rootPos.side_to_move(); + int iterIdx = 0; + + std::memset(ss-7, 0, 10 * sizeof(Stack)); + for (int i = 7; i > 0; i--) + (ss-i)->continuationHistory = &this->continuationHistory[0][0][NO_PIECE][0]; // Use as a sentinel + + for (int i = 0; i <= MAX_PLY + 2; ++i) + (ss+i)->ply = i; + + ss->pv = pv; + + bestValue = delta = alpha = -VALUE_INFINITE; + beta = VALUE_INFINITE; + + if (mainThread) + { + if (mainThread->bestPreviousScore == VALUE_INFINITE) + for (int i = 0; i < 4; ++i) + mainThread->iterValue[i] = VALUE_ZERO; + else + for (int i = 0; i < 4; ++i) + mainThread->iterValue[i] = mainThread->bestPreviousScore; + } + + size_t multiPV = size_t(Options["MultiPV"]); + Skill skill(Options["Skill Level"], Options["UCI_LimitStrength"] ? int(Options["UCI_Elo"]) : 0); + + // When playing with strength handicap enable MultiPV search that we will + // use behind the scenes to retrieve a set of possible moves. + if (skill.enabled()) + multiPV = std::max(multiPV, (size_t)4); + + multiPV = std::min(multiPV, rootMoves.size()); + + complexityAverage.set(155, 1); + + optimism[us] = optimism[~us] = VALUE_ZERO; + + int searchAgainCounter = 0; + + // Iterative deepening loop until requested to stop or the target depth is reached + while ( ++rootDepth < MAX_PLY + && !Threads.stop + && !(Limits.depth && mainThread && rootDepth > Limits.depth)) + { + // Age out PV variability metric + if (mainThread) + totBestMoveChanges /= 2; + + // Save the last iteration's scores before first PV line is searched and + // all the move scores except the (new) PV are set to -VALUE_INFINITE. + for (RootMove& rm : rootMoves) + rm.previousScore = rm.score; + + size_t pvFirst = 0; + pvLast = 0; + + if (!Threads.increaseDepth) + searchAgainCounter++; + + // MultiPV loop. We perform a full root search for each PV line + for (pvIdx = 0; pvIdx < multiPV && !Threads.stop; ++pvIdx) + { + if (pvIdx == pvLast) + { + pvFirst = pvLast; + for (pvLast++; pvLast < rootMoves.size(); pvLast++) + if (rootMoves[pvLast].tbRank != rootMoves[pvFirst].tbRank) + break; + } + + // Reset UCI info selDepth for each depth and each PV line + selDepth = 0; + + // Reset aspiration window starting size + if (rootDepth >= 4) + { + Value prev = rootMoves[pvIdx].averageScore; + delta = Value(10) + int(prev) * prev / 15620; + alpha = std::max(prev - delta,-VALUE_INFINITE); + beta = std::min(prev + delta, VALUE_INFINITE); + + // Adjust optimism based on root move's previousScore + int opt = 118 * prev / (std::abs(prev) + 169); + optimism[ us] = Value(opt); + optimism[~us] = -optimism[us]; + } + + // Start with a small aspiration window and, in the case of a fail + // high/low, re-search with a bigger window until we don't fail + // high/low anymore. + int failedHighCnt = 0; + while (true) + { + // Adjust the effective depth searched, but ensuring at least one effective increment for every + // four searchAgain steps (see issue #2717). + Depth adjustedDepth = std::max(1, rootDepth - failedHighCnt - 3 * (searchAgainCounter + 1) / 4); + bestValue = Stockfish::search(rootPos, ss, alpha, beta, adjustedDepth, false); + + // Bring the best move to the front. It is critical that sorting + // is done with a stable algorithm because all the values but the + // first and eventually the new best one are set to -VALUE_INFINITE + // and we want to keep the same order for all the moves except the + // new PV that goes to the front. Note that in case of MultiPV + // search the already searched PV lines are preserved. + std::stable_sort(rootMoves.begin() + pvIdx, rootMoves.begin() + pvLast); + + // If search has been stopped, we break immediately. Sorting is + // safe because RootMoves is still valid, although it refers to + // the previous iteration. + if (Threads.stop) + break; + + // When failing high/low give some update (without cluttering + // the UI) before a re-search. + if ( mainThread + && multiPV == 1 + && (bestValue <= alpha || bestValue >= beta) + && Time.elapsed() > 3000) + sync_cout << UCI::pv(rootPos, rootDepth) << sync_endl; + + // In case of failing low/high increase aspiration window and + // re-search, otherwise exit the loop. + if (bestValue <= alpha) + { + beta = (alpha + beta) / 2; + alpha = std::max(bestValue - delta, -VALUE_INFINITE); + + failedHighCnt = 0; + if (mainThread) + mainThread->stopOnPonderhit = false; + } + else if (bestValue >= beta) + { + beta = std::min(bestValue + delta, VALUE_INFINITE); + ++failedHighCnt; + } + else + break; + + delta += delta / 4 + 2; + + assert(alpha >= -VALUE_INFINITE && beta <= VALUE_INFINITE); + } + + // Sort the PV lines searched so far and update the GUI + std::stable_sort(rootMoves.begin() + pvFirst, rootMoves.begin() + pvIdx + 1); + + if ( mainThread + && (Threads.stop || pvIdx + 1 == multiPV || Time.elapsed() > 3000)) + sync_cout << UCI::pv(rootPos, rootDepth) << sync_endl; + } + + if (!Threads.stop) + completedDepth = rootDepth; + + if (rootMoves[0].pv[0] != lastBestMove) { + lastBestMove = rootMoves[0].pv[0]; + lastBestMoveDepth = rootDepth; + } + + // Have we found a "mate in x"? + if ( Limits.mate + && bestValue >= VALUE_MATE_IN_MAX_PLY + && VALUE_MATE - bestValue <= 2 * Limits.mate) + Threads.stop = true; + + if (!mainThread) + continue; + + // If skill level is enabled and time is up, pick a sub-optimal best move + if (skill.enabled() && skill.time_to_pick(rootDepth)) + skill.pick_best(multiPV); + + // Use part of the gained time from a previous stable move for the current move + for (Thread* th : Threads) + { + totBestMoveChanges += th->bestMoveChanges; + th->bestMoveChanges = 0; + } + + // Do we have time for the next iteration? Can we stop searching now? + if ( Limits.use_time_management() + && !Threads.stop + && !mainThread->stopOnPonderhit) + { + double fallingEval = (71 + 12 * (mainThread->bestPreviousAverageScore - bestValue) + + 6 * (mainThread->iterValue[iterIdx] - bestValue)) / 656.7; + fallingEval = std::clamp(fallingEval, 0.5, 1.5); + + // If the bestMove is stable over several iterations, reduce time accordingly + timeReduction = lastBestMoveDepth + 9 < completedDepth ? 1.37 : 0.65; + double reduction = (1.4 + mainThread->previousTimeReduction) / (2.15 * timeReduction); + double bestMoveInstability = 1 + 1.7 * totBestMoveChanges / Threads.size(); + int complexity = mainThread->complexityAverage.value(); + double complexPosition = std::min(1.0 + (complexity - 261) / 1738.7, 1.5); + + double totalTime = Time.optimum() * fallingEval * reduction * bestMoveInstability * complexPosition; + + // Cap used time in case of a single legal move for a better viewer experience in tournaments + // yielding correct scores and sufficiently fast moves. + if (rootMoves.size() == 1) + totalTime = std::min(500.0, totalTime); + + // Stop the search if we have exceeded the totalTime + if (Time.elapsed() > totalTime) + { + // If we are allowed to ponder do not stop the search now but + // keep pondering until the GUI sends "ponderhit" or "stop". + if (mainThread->ponder) + mainThread->stopOnPonderhit = true; + else + Threads.stop = true; + } + else if ( Threads.increaseDepth + && !mainThread->ponder + && Time.elapsed() > totalTime * 0.53) + Threads.increaseDepth = false; + else + Threads.increaseDepth = true; + } + + mainThread->iterValue[iterIdx] = bestValue; + iterIdx = (iterIdx + 1) & 3; + } + + if (!mainThread) + return; + + mainThread->previousTimeReduction = timeReduction; + + // If skill level is enabled, swap best PV line with the sub-optimal one + if (skill.enabled()) + std::swap(rootMoves[0], *std::find(rootMoves.begin(), rootMoves.end(), + skill.best ? skill.best : skill.pick_best(multiPV))); +} + + +namespace { + + // search<>() is the main search function for both PV and non-PV nodes + + template + Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode) { + + constexpr bool PvNode = nodeType != NonPV; + constexpr bool rootNode = nodeType == Root; + const Depth maxNextDepth = rootNode ? depth : depth + 1; + + // Check if we have an upcoming move which draws by repetition, or + // if the opponent had an alternative move earlier to this position. + if ( !rootNode + && pos.rule50_count() >= 3 + && alpha < VALUE_DRAW + && pos.has_game_cycle(ss->ply)) + { + alpha = value_draw(pos.this_thread()); + if (alpha >= beta) + return alpha; + } + + // Dive into quiescence search when the depth reaches zero + if (depth <= 0) + return qsearch(pos, ss, alpha, beta); + + assert(-VALUE_INFINITE <= alpha && alpha < beta && beta <= VALUE_INFINITE); + assert(PvNode || (alpha == beta - 1)); + assert(0 < depth && depth < MAX_PLY); + assert(!(PvNode && cutNode)); + + Move pv[MAX_PLY+1], capturesSearched[32], quietsSearched[64]; + StateInfo st; + ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize); + + TTEntry* tte; + Key posKey; + Move ttMove, move, excludedMove, bestMove; + Depth extension, newDepth; + Value bestValue, value, ttValue, eval, maxValue, probCutBeta; + bool givesCheck, improving, priorCapture, singularQuietLMR; + bool capture, moveCountPruning, ttCapture; + Piece movedPiece; + int moveCount, captureCount, quietCount, improvement, complexity; + + // Step 1. Initialize node + Thread* thisThread = pos.this_thread(); + ss->inCheck = pos.checkers(); + priorCapture = pos.captured_piece(); + Color us = pos.side_to_move(); + moveCount = captureCount = quietCount = ss->moveCount = 0; + bestValue = -VALUE_INFINITE; + maxValue = VALUE_INFINITE; + + // Check for the available remaining time + if (thisThread == Threads.main()) + static_cast(thisThread)->check_time(); + + // Used to send selDepth info to GUI (selDepth counts from 1, ply from 0) + if (PvNode && thisThread->selDepth < ss->ply + 1) + thisThread->selDepth = ss->ply + 1; + + if (!rootNode) + { + // Step 2. Check for aborted search and immediate draw + if ( Threads.stop.load(std::memory_order_relaxed) + || pos.is_draw(ss->ply) + || ss->ply >= MAX_PLY) + return (ss->ply >= MAX_PLY && !ss->inCheck) ? evaluate(pos) + : value_draw(pos.this_thread()); + + // Step 3. Mate distance pruning. Even if we mate at the next move our score + // would be at best mate_in(ss->ply+1), but if alpha is already bigger because + // a shorter mate was found upward in the tree then there is no need to search + // because we will never beat the current alpha. Same logic but with reversed + // signs applies also in the opposite condition of being mated instead of giving + // mate. In this case return a fail-high score. + alpha = std::max(mated_in(ss->ply), alpha); + beta = std::min(mate_in(ss->ply+1), beta); + if (alpha >= beta) + return alpha; + } + else + thisThread->rootDelta = beta - alpha; + + assert(0 <= ss->ply && ss->ply < MAX_PLY); + + (ss+1)->ttPv = false; + (ss+1)->excludedMove = bestMove = MOVE_NONE; + (ss+2)->killers[0] = (ss+2)->killers[1] = MOVE_NONE; + (ss+2)->cutoffCnt = 0; + ss->doubleExtensions = (ss-1)->doubleExtensions; + Square prevSq = to_sq((ss-1)->currentMove); + + // Initialize statScore to zero for the grandchildren of the current position. + // So statScore is shared between all grandchildren and only the first grandchild + // starts with statScore = 0. Later grandchildren start with the last calculated + // statScore of the previous grandchild. This influences the reduction rules in + // LMR which are based on the statScore of parent position. + if (!rootNode) + (ss+2)->statScore = 0; + + // Step 4. Transposition table lookup. We don't want the score of a partial + // search to overwrite a previous full search TT value, so we use a different + // position key in case of an excluded move. + excludedMove = ss->excludedMove; + posKey = excludedMove == MOVE_NONE ? pos.key() : pos.key() ^ make_key(excludedMove); + tte = TT.probe(posKey, ss->ttHit); + ttValue = ss->ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; + ttMove = rootNode ? thisThread->rootMoves[thisThread->pvIdx].pv[0] + : ss->ttHit ? tte->move() : MOVE_NONE; + ttCapture = ttMove && pos.capture(ttMove); + if (!excludedMove) + ss->ttPv = PvNode || (ss->ttHit && tte->is_pv()); + + // At non-PV nodes we check for an early TT cutoff + if ( !PvNode + && ss->ttHit + && tte->depth() > depth - (tte->bound() == BOUND_EXACT) + && ttValue != VALUE_NONE // Possible in case of TT access race + && (tte->bound() & (ttValue >= beta ? BOUND_LOWER : BOUND_UPPER))) + { + // If ttMove is quiet, update move sorting heuristics on TT hit (~1 Elo) + if (ttMove) + { + if (ttValue >= beta) + { + // Bonus for a quiet ttMove that fails high (~3 Elo) + if (!ttCapture) + update_quiet_stats(pos, ss, ttMove, stat_bonus(depth)); + + // Extra penalty for early quiet moves of the previous ply (~0 Elo) + if ((ss-1)->moveCount <= 2 && !priorCapture) + update_continuation_histories(ss-1, pos.piece_on(prevSq), prevSq, -stat_bonus(depth + 1)); + } + // Penalty for a quiet ttMove that fails low (~1 Elo) + else if (!ttCapture) + { + int penalty = -stat_bonus(depth); + thisThread->mainHistory[us][from_to(ttMove)] << penalty; + update_continuation_histories(ss, pos.moved_piece(ttMove), to_sq(ttMove), penalty); + } + } + + // Partial workaround for the graph history interaction problem + // For high rule50 counts don't produce transposition table cutoffs. + if (pos.rule50_count() < 90) + return ttValue; + } + + // Step 5. Tablebases probe + if (!rootNode && TB::Cardinality) + { + int piecesCount = pos.count(); + + if ( piecesCount <= TB::Cardinality + && (piecesCount < TB::Cardinality || depth >= TB::ProbeDepth) + && pos.rule50_count() == 0 + && !pos.can_castle(ANY_CASTLING)) + { + TB::ProbeState err; + TB::WDLScore wdl = Tablebases::probe_wdl(pos, &err); + + // Force check of time on the next occasion + if (thisThread == Threads.main()) + static_cast(thisThread)->callsCnt = 0; + + if (err != TB::ProbeState::FAIL) + { + thisThread->tbHits.fetch_add(1, std::memory_order_relaxed); + + int drawScore = TB::UseRule50 ? 1 : 0; + + // use the range VALUE_MATE_IN_MAX_PLY to VALUE_TB_WIN_IN_MAX_PLY to score + value = wdl < -drawScore ? VALUE_MATED_IN_MAX_PLY + ss->ply + 1 + : wdl > drawScore ? VALUE_MATE_IN_MAX_PLY - ss->ply - 1 + : VALUE_DRAW + 2 * wdl * drawScore; + + Bound b = wdl < -drawScore ? BOUND_UPPER + : wdl > drawScore ? BOUND_LOWER : BOUND_EXACT; + + if ( b == BOUND_EXACT + || (b == BOUND_LOWER ? value >= beta : value <= alpha)) + { + tte->save(posKey, value_to_tt(value, ss->ply), ss->ttPv, b, + std::min(MAX_PLY - 1, depth + 6), + MOVE_NONE, VALUE_NONE); + + return value; + } + + if (PvNode) + { + if (b == BOUND_LOWER) + bestValue = value, alpha = std::max(alpha, bestValue); + else + maxValue = value; + } + } + } + } + + CapturePieceToHistory& captureHistory = thisThread->captureHistory; + + // Step 6. Static evaluation of the position + if (ss->inCheck) + { + // Skip early pruning when in check + ss->staticEval = eval = VALUE_NONE; + improving = false; + improvement = 0; + complexity = 0; + goto moves_loop; + } + else if (ss->ttHit) + { + // Never assume anything about values stored in TT + ss->staticEval = eval = tte->eval(); + if (eval == VALUE_NONE) + ss->staticEval = eval = evaluate(pos, &complexity); + else // Fall back to (semi)classical complexity for TT hits, the NNUE complexity is lost + complexity = abs(ss->staticEval - pos.psq_eg_stm()); + + // ttValue can be used as a better position evaluation (~4 Elo) + if ( ttValue != VALUE_NONE + && (tte->bound() & (ttValue > eval ? BOUND_LOWER : BOUND_UPPER))) + eval = ttValue; + } + else + { + ss->staticEval = eval = evaluate(pos, &complexity); + + // Save static evaluation into transposition table + if (!excludedMove) + tte->save(posKey, VALUE_NONE, ss->ttPv, BOUND_NONE, DEPTH_NONE, MOVE_NONE, eval); + } + + thisThread->complexityAverage.update(complexity); + + // Use static evaluation difference to improve quiet move ordering (~3 Elo) + if (is_ok((ss-1)->currentMove) && !(ss-1)->inCheck && !priorCapture) + { + int bonus = std::clamp(-19 * int((ss-1)->staticEval + ss->staticEval), -1914, 1914); + thisThread->mainHistory[~us][from_to((ss-1)->currentMove)] << bonus; + } + + // Set up the improvement variable, which is the difference between the current + // static evaluation and the previous static evaluation at our turn (if we were + // in check at our previous move we look at the move prior to it). The improvement + // margin and the improving flag are used in various pruning heuristics. + improvement = (ss-2)->staticEval != VALUE_NONE ? ss->staticEval - (ss-2)->staticEval + : (ss-4)->staticEval != VALUE_NONE ? ss->staticEval - (ss-4)->staticEval + : 168; + improving = improvement > 0; + + // Step 7. Razoring. + // If eval is really low check with qsearch if it can exceed alpha, if it can't, + // return a fail low. + if (eval < alpha - 369 - 254 * depth * depth) + { + value = qsearch(pos, ss, alpha - 1, alpha); + if (value < alpha) + return value; + } + + // Step 8. Futility pruning: child node (~25 Elo). + // The depth condition is important for mate finding. + if ( !ss->ttPv + && depth < 8 + && eval - futility_margin(depth, improving) - (ss-1)->statScore / 303 >= beta + && eval >= beta + && eval < 28031) // larger than VALUE_KNOWN_WIN, but smaller than TB wins + return eval; + + // Step 9. Null move search with verification search (~22 Elo) + if ( !PvNode + && (ss-1)->currentMove != MOVE_NULL + && (ss-1)->statScore < 17139 + && eval >= beta + && eval >= ss->staticEval + && ss->staticEval >= beta - 20 * depth - improvement / 13 + 233 + complexity / 25 + && !excludedMove + && pos.non_pawn_material(us) + && (ss->ply >= thisThread->nmpMinPly || us != thisThread->nmpColor)) + { + assert(eval - beta >= 0); + + // Null move dynamic reduction based on depth, eval and complexity of position + Depth R = std::min(int(eval - beta) / 168, 7) + depth / 3 + 4 - (complexity > 861); + + ss->currentMove = MOVE_NULL; + ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0]; + + pos.do_null_move(st); + + Value nullValue = -search(pos, ss+1, -beta, -beta+1, depth-R, !cutNode); + + pos.undo_null_move(); + + if (nullValue >= beta) + { + // Do not return unproven mate or TB scores + if (nullValue >= VALUE_TB_WIN_IN_MAX_PLY) + nullValue = beta; + + if (thisThread->nmpMinPly || (abs(beta) < VALUE_KNOWN_WIN && depth < 14)) + return nullValue; + + assert(!thisThread->nmpMinPly); // Recursive verification is not allowed + + // Do verification search at high depths, with null move pruning disabled + // for us, until ply exceeds nmpMinPly. + thisThread->nmpMinPly = ss->ply + 3 * (depth-R) / 4; + thisThread->nmpColor = us; + + Value v = search(pos, ss, beta-1, beta, depth-R, false); + + thisThread->nmpMinPly = 0; + + if (v >= beta) + return nullValue; + } + } + + probCutBeta = beta + 191 - 54 * improving; + + // Step 10. ProbCut (~4 Elo) + // If we have a good enough capture and a reduced search returns a value + // much above beta, we can (almost) safely prune the previous move. + if ( !PvNode + && depth > 4 + && abs(beta) < VALUE_TB_WIN_IN_MAX_PLY + // if value from transposition table is lower than probCutBeta, don't attempt probCut + // there and in further interactions with transposition table cutoff depth is set to depth - 3 + // because probCut search has depth set to depth - 4 but we also do a move before it + // so effective depth is equal to depth - 3 + && !( ss->ttHit + && tte->depth() >= depth - 3 + && ttValue != VALUE_NONE + && ttValue < probCutBeta)) + { + assert(probCutBeta < VALUE_INFINITE); + + MovePicker mp(pos, ttMove, probCutBeta - ss->staticEval, &captureHistory); + + while ((move = mp.next_move()) != MOVE_NONE) + if (move != excludedMove && pos.legal(move)) + { + assert(pos.capture(move) || promotion_type(move) == QUEEN); + + ss->currentMove = move; + ss->continuationHistory = &thisThread->continuationHistory[ss->inCheck] + [true] + [pos.moved_piece(move)] + [to_sq(move)]; + + pos.do_move(move, st); + + // Perform a preliminary qsearch to verify that the move holds + value = -qsearch(pos, ss+1, -probCutBeta, -probCutBeta+1); + + // If the qsearch held, perform the regular search + if (value >= probCutBeta) + value = -search(pos, ss+1, -probCutBeta, -probCutBeta+1, depth - 4, !cutNode); + + pos.undo_move(move); + + if (value >= probCutBeta) + { + // Save ProbCut data into transposition table + tte->save(posKey, value_to_tt(value, ss->ply), ss->ttPv, BOUND_LOWER, depth - 3, move, ss->staticEval); + return value; + } + } + } + + // Step 11. If the position is not in TT, decrease depth by 3. + // Use qsearch if depth is equal or below zero (~4 Elo) + if ( PvNode + && !ttMove) + depth -= 3; + + if (depth <= 0) + return qsearch(pos, ss, alpha, beta); + + if ( cutNode + && depth >= 9 + && !ttMove) + depth -= 2; + +moves_loop: // When in check, search starts here + + // Step 12. A small Probcut idea, when we are in check (~0 Elo) + probCutBeta = beta + 417; + if ( ss->inCheck + && !PvNode + && depth >= 2 + && ttCapture + && (tte->bound() & BOUND_LOWER) + && tte->depth() >= depth - 3 + && ttValue >= probCutBeta + && abs(ttValue) <= VALUE_KNOWN_WIN + && abs(beta) <= VALUE_KNOWN_WIN + ) + return probCutBeta; + + + const PieceToHistory* contHist[] = { (ss-1)->continuationHistory, (ss-2)->continuationHistory, + nullptr , (ss-4)->continuationHistory, + nullptr , (ss-6)->continuationHistory }; + + Move countermove = thisThread->counterMoves[pos.piece_on(prevSq)][prevSq]; + + MovePicker mp(pos, ttMove, depth, &thisThread->mainHistory, + &captureHistory, + contHist, + countermove, + ss->killers); + + value = bestValue; + moveCountPruning = singularQuietLMR = false; + + // Indicate PvNodes that will probably fail low if the node was searched + // at a depth equal or greater than the current depth, and the result of this search was a fail low. + bool likelyFailLow = PvNode + && ttMove + && (tte->bound() & BOUND_UPPER) + && tte->depth() >= depth; + + // Step 13. Loop through all pseudo-legal moves until no moves remain + // or a beta cutoff occurs. + while ((move = mp.next_move(moveCountPruning)) != MOVE_NONE) + { + assert(is_ok(move)); + + if (move == excludedMove) + continue; + + // At root obey the "searchmoves" option and skip moves not listed in Root + // Move List. As a consequence any illegal move is also skipped. In MultiPV + // mode we also skip PV moves which have been already searched and those + // of lower "TB rank" if we are in a TB root position. + if (rootNode && !std::count(thisThread->rootMoves.begin() + thisThread->pvIdx, + thisThread->rootMoves.begin() + thisThread->pvLast, move)) + continue; + + // Check for legality + if (!rootNode && !pos.legal(move)) + continue; + + ss->moveCount = ++moveCount; + + if (rootNode && thisThread == Threads.main() && Time.elapsed() > 3000) + sync_cout << "info depth " << depth + << " currmove " << UCI::move(move, pos.is_chess960()) + << " currmovenumber " << moveCount + thisThread->pvIdx << sync_endl; + if (PvNode) + (ss+1)->pv = nullptr; + + extension = 0; + capture = pos.capture(move); + movedPiece = pos.moved_piece(move); + givesCheck = pos.gives_check(move); + + // Calculate new depth for this move + newDepth = depth - 1; + + Value delta = beta - alpha; + + // Step 14. Pruning at shallow depth (~98 Elo). Depth conditions are important for mate finding. + if ( !rootNode + && pos.non_pawn_material(us) + && bestValue > VALUE_TB_LOSS_IN_MAX_PLY) + { + // Skip quiet moves if movecount exceeds our FutilityMoveCount threshold (~7 Elo) + moveCountPruning = moveCount >= futility_move_count(improving, depth); + + // Reduced depth of the next LMR search + int lmrDepth = std::max(newDepth - reduction(improving, depth, moveCount, delta, thisThread->rootDelta), 0); + + if ( capture + || givesCheck) + { + // Futility pruning for captures (~0 Elo) + if ( !givesCheck + && !PvNode + && lmrDepth < 7 + && !ss->inCheck + && ss->staticEval + 180 + 201 * lmrDepth + PieceValue[EG][pos.piece_on(to_sq(move))] + + captureHistory[movedPiece][to_sq(move)][type_of(pos.piece_on(to_sq(move)))] / 6 < alpha) + continue; + + // SEE based pruning (~9 Elo) + if (!pos.see_ge(move, Value(-222) * depth)) + continue; + } + else + { + int history = (*contHist[0])[movedPiece][to_sq(move)] + + (*contHist[1])[movedPiece][to_sq(move)] + + (*contHist[3])[movedPiece][to_sq(move)]; + + // Continuation history based pruning (~2 Elo) + if ( lmrDepth < 5 + && history < -3875 * (depth - 1)) + continue; + + history += 2 * thisThread->mainHistory[us][from_to(move)]; + + // Futility pruning: parent node (~9 Elo) + if ( !ss->inCheck + && lmrDepth < 13 + && ss->staticEval + 106 + 145 * lmrDepth + history / 52 <= alpha) + continue; + + // Prune moves with negative SEE (~3 Elo) + if (!pos.see_ge(move, Value(-24 * lmrDepth * lmrDepth - 15 * lmrDepth))) + continue; + } + } + + // Step 15. Extensions (~66 Elo) + // We take care to not overdo to avoid search getting stuck. + if (ss->ply < thisThread->rootDepth * 2) + { + // Singular extension search (~58 Elo). If all moves but one fail low on a + // search of (alpha-s, beta-s), and just one fails high on (alpha, beta), + // then that move is singular and should be extended. To verify this we do + // a reduced search on all the other moves but the ttMove and if the + // result is lower than ttValue minus a margin, then we will extend the ttMove. + if ( !rootNode + && depth >= 4 - (thisThread->previousDepth > 24) + 2 * (PvNode && tte->is_pv()) + && move == ttMove + && !excludedMove // Avoid recursive singular search + /* && ttValue != VALUE_NONE Already implicit in the next condition */ + && abs(ttValue) < VALUE_KNOWN_WIN + && (tte->bound() & BOUND_LOWER) + && tte->depth() >= depth - 3) + { + Value singularBeta = ttValue - (3 + (ss->ttPv && !PvNode)) * depth; + Depth singularDepth = (depth - 1) / 2; + + ss->excludedMove = move; + value = search(pos, ss, singularBeta - 1, singularBeta, singularDepth, cutNode); + ss->excludedMove = MOVE_NONE; + + if (value < singularBeta) + { + extension = 1; + singularQuietLMR = !ttCapture; + + // Avoid search explosion by limiting the number of double extensions + if ( !PvNode + && value < singularBeta - 25 + && ss->doubleExtensions <= 9) + extension = 2; + } + + // Multi-cut pruning + // Our ttMove is assumed to fail high, and now we failed high also on a reduced + // search without the ttMove. So we assume this expected Cut-node is not singular, + // that multiple moves fail high, and we can prune the whole subtree by returning + // a soft bound. + else if (singularBeta >= beta) + return singularBeta; + + // If the eval of ttMove is greater than beta, we reduce it (negative extension) + else if (ttValue >= beta) + extension = -2; + + // If the eval of ttMove is less than alpha and value, we reduce it (negative extension) + else if (ttValue <= alpha && ttValue <= value) + extension = -1; + } + + // Check extensions (~1 Elo) + else if ( givesCheck + && depth > 9 + && abs(ss->staticEval) > 82) + extension = 1; + + // Quiet ttMove extensions (~0 Elo) + else if ( PvNode + && move == ttMove + && move == ss->killers[0] + && (*contHist[0])[movedPiece][to_sq(move)] >= 5177) + extension = 1; + } + + // Add extension to new depth + newDepth += extension; + ss->doubleExtensions = (ss-1)->doubleExtensions + (extension == 2); + + // Speculative prefetch as early as possible + prefetch(TT.first_entry(pos.key_after(move))); + + // Update the current move (this must be done after singular extension search) + ss->currentMove = move; + ss->continuationHistory = &thisThread->continuationHistory[ss->inCheck] + [capture] + [movedPiece] + [to_sq(move)]; + + // Step 16. Make the move + pos.do_move(move, st, givesCheck); + + // Step 17. Late moves reduction / extension (LMR, ~98 Elo) + // We use various heuristics for the sons of a node after the first son has + // been searched. In general we would like to reduce them, but there are many + // cases where we extend a son if it has good chances to be "interesting". + if ( depth >= 2 + && moveCount > 1 + (PvNode && ss->ply <= 1) + && ( !ss->ttPv + || !capture + || (cutNode && (ss-1)->moveCount > 1))) + { + Depth r = reduction(improving, depth, moveCount, delta, thisThread->rootDelta); + + // Decrease reduction if position is or has been on the PV + // and node is not likely to fail low. (~3 Elo) + if ( ss->ttPv + && !likelyFailLow) + r -= 2; + + // Decrease reduction if opponent's move count is high (~1 Elo) + if ((ss-1)->moveCount > 7) + r--; + + // Increase reduction for cut nodes (~3 Elo) + if (cutNode) + r += 2; + + // Increase reduction if ttMove is a capture (~3 Elo) + if (ttCapture) + r++; + + // Decrease reduction for PvNodes based on depth + if (PvNode) + r -= 1 + 11 / (3 + depth); + + // Decrease reduction if ttMove has been singularly extended (~1 Elo) + if (singularQuietLMR) + r--; + + // Decrease reduction if we move a threatened piece (~1 Elo) + if ( depth > 9 + && (mp.threatenedPieces & from_sq(move))) + r--; + + // Increase reduction if next ply has a lot of fail high + if ((ss+1)->cutoffCnt > 3) + r++; + + ss->statScore = 2 * thisThread->mainHistory[us][from_to(move)] + + (*contHist[0])[movedPiece][to_sq(move)] + + (*contHist[1])[movedPiece][to_sq(move)] + + (*contHist[3])[movedPiece][to_sq(move)] + - 4433; + + // Decrease/increase reduction for moves with a good/bad history (~30 Elo) + r -= ss->statScore / (13628 + 4000 * (depth > 7 && depth < 19)); + + // In general we want to cap the LMR depth search at newDepth, but when + // reduction is negative, we allow this move a limited search extension + // beyond the first move depth. This may lead to hidden double extensions. + Depth d = std::clamp(newDepth - r, 1, newDepth + 1); + + value = -search(pos, ss+1, -(alpha+1), -alpha, d, true); + + // Do full depth search when reduced LMR search fails high + if (value > alpha && d < newDepth) + { + // Adjust full depth search based on LMR results - if result + // was good enough search deeper, if it was bad enough search shallower + const bool doDeeperSearch = value > (alpha + 64 + 11 * (newDepth - d)); + const bool doShallowerSearch = value < bestValue + newDepth; + + newDepth += doDeeperSearch - doShallowerSearch; + + if (newDepth > d) + value = -search(pos, ss+1, -(alpha+1), -alpha, newDepth, !cutNode); + + int bonus = value > alpha ? stat_bonus(newDepth) + : -stat_bonus(newDepth); + + if (capture) + bonus /= 6; + + update_continuation_histories(ss, movedPiece, to_sq(move), bonus); + } + } + + // Step 18. Full depth search when LMR is skipped + else if (!PvNode || moveCount > 1) + { + value = -search(pos, ss+1, -(alpha+1), -alpha, newDepth, !cutNode); + } + + // For PV nodes only, do a full PV search on the first move or after a fail + // high (in the latter case search only if value < beta), otherwise let the + // parent node fail low with value <= alpha and try another move. + if (PvNode && (moveCount == 1 || (value > alpha && (rootNode || value < beta)))) + { + (ss+1)->pv = pv; + (ss+1)->pv[0] = MOVE_NONE; + + value = -search(pos, ss+1, -beta, -alpha, + std::min(maxNextDepth, newDepth), false); + } + + // Step 19. Undo move + pos.undo_move(move); + + assert(value > -VALUE_INFINITE && value < VALUE_INFINITE); + + // Step 20. Check for a new best move + // Finished searching the move. If a stop occurred, the return value of + // the search cannot be trusted, and we return immediately without + // updating best move, PV and TT. + if (Threads.stop.load(std::memory_order_relaxed)) + return VALUE_ZERO; + + if (rootNode) + { + RootMove& rm = *std::find(thisThread->rootMoves.begin(), + thisThread->rootMoves.end(), move); + + rm.averageScore = rm.averageScore != -VALUE_INFINITE ? (2 * value + rm.averageScore) / 3 : value; + + // PV move or new best move? + if (moveCount == 1 || value > alpha) + { + rm.score = value; + rm.selDepth = thisThread->selDepth; + rm.scoreLowerbound = value >= beta; + rm.scoreUpperbound = value <= alpha; + rm.pv.resize(1); + + assert((ss+1)->pv); + + for (Move* m = (ss+1)->pv; *m != MOVE_NONE; ++m) + rm.pv.push_back(*m); + + // We record how often the best move has been changed in each iteration. + // This information is used for time management. In MultiPV mode, + // we must take care to only do this for the first PV line. + if ( moveCount > 1 + && !thisThread->pvIdx) + ++thisThread->bestMoveChanges; + } + else + // All other moves but the PV are set to the lowest value: this + // is not a problem when sorting because the sort is stable and the + // move position in the list is preserved - just the PV is pushed up. + rm.score = -VALUE_INFINITE; + } + + if (value > bestValue) + { + bestValue = value; + + if (value > alpha) + { + bestMove = move; + + if (PvNode && !rootNode) // Update pv even in fail-high case + update_pv(ss->pv, move, (ss+1)->pv); + + if (PvNode && value < beta) // Update alpha! Always alpha < beta + { + alpha = value; + + // Reduce other moves if we have found at least one score improvement + if ( depth > 1 + && depth < 6 + && beta < VALUE_KNOWN_WIN + && alpha > -VALUE_KNOWN_WIN) + depth -= 1; + + assert(depth > 0); + } + else + { + ss->cutoffCnt++; + assert(value >= beta); // Fail high + break; + } + } + } + + + // If the move is worse than some previously searched move, remember it to update its stats later + if (move != bestMove) + { + if (capture && captureCount < 32) + capturesSearched[captureCount++] = move; + + else if (!capture && quietCount < 64) + quietsSearched[quietCount++] = move; + } + } + + // The following condition would detect a stop only after move loop has been + // completed. But in this case bestValue is valid because we have fully + // searched our subtree, and we can anyhow save the result in TT. + /* + if (Threads.stop) + return VALUE_DRAW; + */ + + // Step 21. Check for mate and stalemate + // All legal moves have been searched and if there are no legal moves, it + // must be a mate or a stalemate. If we are in a singular extension search then + // return a fail low score. + + assert(moveCount || !ss->inCheck || excludedMove || !MoveList(pos).size()); + + if (!moveCount) + bestValue = excludedMove ? alpha : + ss->inCheck ? mated_in(ss->ply) + : VALUE_DRAW; + + // If there is a move which produces search value greater than alpha we update stats of searched moves + else if (bestMove) + update_all_stats(pos, ss, bestMove, bestValue, beta, prevSq, + quietsSearched, quietCount, capturesSearched, captureCount, depth); + + // Bonus for prior countermove that caused the fail low + else if ( (depth >= 5 || PvNode) + && !priorCapture) + { + //Assign extra bonus if current node is PvNode or cutNode + //or fail low was really bad + bool extraBonus = PvNode + || cutNode + || bestValue < alpha - 62 * depth; + + update_continuation_histories(ss-1, pos.piece_on(prevSq), prevSq, stat_bonus(depth) * (1 + extraBonus)); + } + + if (PvNode) + bestValue = std::min(bestValue, maxValue); + + // If no good move is found and the previous position was ttPv, then the previous + // opponent move is probably good and the new position is added to the search tree. + if (bestValue <= alpha) + ss->ttPv = ss->ttPv || ((ss-1)->ttPv && depth > 3); + + // Write gathered information in transposition table + if (!excludedMove && !(rootNode && thisThread->pvIdx)) + tte->save(posKey, value_to_tt(bestValue, ss->ply), ss->ttPv, + bestValue >= beta ? BOUND_LOWER : + PvNode && bestMove ? BOUND_EXACT : BOUND_UPPER, + depth, bestMove, ss->staticEval); + + assert(bestValue > -VALUE_INFINITE && bestValue < VALUE_INFINITE); + + return bestValue; + } + + + // qsearch() is the quiescence search function, which is called by the main search + // function with zero depth, or recursively with further decreasing depth per call. + // (~155 elo) + template + Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth) { + + static_assert(nodeType != Root); + constexpr bool PvNode = nodeType == PV; + + assert(alpha >= -VALUE_INFINITE && alpha < beta && beta <= VALUE_INFINITE); + assert(PvNode || (alpha == beta - 1)); + assert(depth <= 0); + + Move pv[MAX_PLY+1]; + StateInfo st; + ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize); + + TTEntry* tte; + Key posKey; + Move ttMove, move, bestMove; + Depth ttDepth; + Value bestValue, value, ttValue, futilityValue, futilityBase; + bool pvHit, givesCheck, capture; + int moveCount; + + if (PvNode) + { + (ss+1)->pv = pv; + ss->pv[0] = MOVE_NONE; + } + + Thread* thisThread = pos.this_thread(); + bestMove = MOVE_NONE; + ss->inCheck = pos.checkers(); + moveCount = 0; + + // Check for an immediate draw or maximum ply reached + if ( pos.is_draw(ss->ply) + || ss->ply >= MAX_PLY) + return (ss->ply >= MAX_PLY && !ss->inCheck) ? evaluate(pos) : VALUE_DRAW; + + assert(0 <= ss->ply && ss->ply < MAX_PLY); + + // Decide whether or not to include checks: this fixes also the type of + // TT entry depth that we are going to use. Note that in qsearch we use + // only two types of depth in TT: DEPTH_QS_CHECKS or DEPTH_QS_NO_CHECKS. + ttDepth = ss->inCheck || depth >= DEPTH_QS_CHECKS ? DEPTH_QS_CHECKS + : DEPTH_QS_NO_CHECKS; + // Transposition table lookup + posKey = pos.key(); + tte = TT.probe(posKey, ss->ttHit); + ttValue = ss->ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; + ttMove = ss->ttHit ? tte->move() : MOVE_NONE; + pvHit = ss->ttHit && tte->is_pv(); + + if ( !PvNode + && ss->ttHit + && tte->depth() >= ttDepth + && ttValue != VALUE_NONE // Only in case of TT access race + && (tte->bound() & (ttValue >= beta ? BOUND_LOWER : BOUND_UPPER))) + return ttValue; + + // Evaluate the position statically + if (ss->inCheck) + { + ss->staticEval = VALUE_NONE; + bestValue = futilityBase = -VALUE_INFINITE; + } + else + { + if (ss->ttHit) + { + // Never assume anything about values stored in TT + if ((ss->staticEval = bestValue = tte->eval()) == VALUE_NONE) + ss->staticEval = bestValue = evaluate(pos); + + // ttValue can be used as a better position evaluation (~7 Elo) + if ( ttValue != VALUE_NONE + && (tte->bound() & (ttValue > bestValue ? BOUND_LOWER : BOUND_UPPER))) + bestValue = ttValue; + } + else + // In case of null move search use previous static eval with a different sign + ss->staticEval = bestValue = + (ss-1)->currentMove != MOVE_NULL ? evaluate(pos) + : -(ss-1)->staticEval; + + // Stand pat. Return immediately if static value is at least beta + if (bestValue >= beta) + { + // Save gathered info in transposition table + if (!ss->ttHit) + tte->save(posKey, value_to_tt(bestValue, ss->ply), false, BOUND_LOWER, + DEPTH_NONE, MOVE_NONE, ss->staticEval); + + return bestValue; + } + + if (PvNode && bestValue > alpha) + alpha = bestValue; + + futilityBase = bestValue + 153; + } + + const PieceToHistory* contHist[] = { (ss-1)->continuationHistory, (ss-2)->continuationHistory, + nullptr , (ss-4)->continuationHistory, + nullptr , (ss-6)->continuationHistory }; + + // Initialize a MovePicker object for the current position, and prepare + // to search the moves. Because the depth is <= 0 here, only captures, + // queen promotions, and other checks (only if depth >= DEPTH_QS_CHECKS) + // will be generated. + Square prevSq = to_sq((ss-1)->currentMove); + MovePicker mp(pos, ttMove, depth, &thisThread->mainHistory, + &thisThread->captureHistory, + contHist, + prevSq); + + int quietCheckEvasions = 0; + + // Loop through the moves until no moves remain or a beta cutoff occurs + while ((move = mp.next_move()) != MOVE_NONE) + { + assert(is_ok(move)); + + // Check for legality + if (!pos.legal(move)) + continue; + + givesCheck = pos.gives_check(move); + capture = pos.capture(move); + + moveCount++; + + // Futility pruning and moveCount pruning (~5 Elo) + if ( bestValue > VALUE_TB_LOSS_IN_MAX_PLY + && !givesCheck + && to_sq(move) != prevSq + && futilityBase > -VALUE_KNOWN_WIN + && type_of(move) != PROMOTION) + { + + if (moveCount > 2) + continue; + + futilityValue = futilityBase + PieceValue[EG][pos.piece_on(to_sq(move))]; + + if (futilityValue <= alpha) + { + bestValue = std::max(bestValue, futilityValue); + continue; + } + + if (futilityBase <= alpha && !pos.see_ge(move, VALUE_ZERO + 1)) + { + bestValue = std::max(bestValue, futilityBase); + continue; + } + } + + // Do not search moves with negative SEE values (~5 Elo) + if ( bestValue > VALUE_TB_LOSS_IN_MAX_PLY + && !pos.see_ge(move)) + continue; + + // Speculative prefetch as early as possible + prefetch(TT.first_entry(pos.key_after(move))); + + ss->currentMove = move; + ss->continuationHistory = &thisThread->continuationHistory[ss->inCheck] + [capture] + [pos.moved_piece(move)] + [to_sq(move)]; + + // Continuation history based pruning (~2 Elo) + if ( !capture + && bestValue > VALUE_TB_LOSS_IN_MAX_PLY + && (*contHist[0])[pos.moved_piece(move)][to_sq(move)] < 0 + && (*contHist[1])[pos.moved_piece(move)][to_sq(move)] < 0) + continue; + + // We prune after 2nd quiet check evasion where being 'in check' is implicitly checked through the counter + // and being a 'quiet' apart from being a tt move is assumed after an increment because captures are pushed ahead. + if ( bestValue > VALUE_TB_LOSS_IN_MAX_PLY + && quietCheckEvasions > 1) + break; + + quietCheckEvasions += !capture && ss->inCheck; + + // Make and search the move + pos.do_move(move, st, givesCheck); + value = -qsearch(pos, ss+1, -beta, -alpha, depth - 1); + pos.undo_move(move); + + assert(value > -VALUE_INFINITE && value < VALUE_INFINITE); + + // Check for a new best move + if (value > bestValue) + { + bestValue = value; + + if (value > alpha) + { + bestMove = move; + + if (PvNode) // Update pv even in fail-high case + update_pv(ss->pv, move, (ss+1)->pv); + + if (PvNode && value < beta) // Update alpha here! + alpha = value; + else + break; // Fail high + } + } + } + + // All legal moves have been searched. A special case: if we're in check + // and no legal moves were found, it is checkmate. + if (ss->inCheck && bestValue == -VALUE_INFINITE) + { + assert(!MoveList(pos).size()); + + return mated_in(ss->ply); // Plies to mate from the root + } + + // Save gathered info in transposition table + tte->save(posKey, value_to_tt(bestValue, ss->ply), pvHit, + bestValue >= beta ? BOUND_LOWER : BOUND_UPPER, + ttDepth, bestMove, ss->staticEval); + + assert(bestValue > -VALUE_INFINITE && bestValue < VALUE_INFINITE); + + return bestValue; + } + + + // value_to_tt() adjusts a mate or TB score from "plies to mate from the root" to + // "plies to mate from the current position". Standard scores are unchanged. + // The function is called before storing a value in the transposition table. + + Value value_to_tt(Value v, int ply) { + + assert(v != VALUE_NONE); + + return v >= VALUE_TB_WIN_IN_MAX_PLY ? v + ply + : v <= VALUE_TB_LOSS_IN_MAX_PLY ? v - ply : v; + } + + + // value_from_tt() is the inverse of value_to_tt(): it adjusts a mate or TB score + // from the transposition table (which refers to the plies to mate/be mated from + // current position) to "plies to mate/be mated (TB win/loss) from the root". However, + // for mate scores, to avoid potentially false mate scores related to the 50 moves rule + // and the graph history interaction, we return an optimal TB score instead. + + Value value_from_tt(Value v, int ply, int r50c) { + + if (v == VALUE_NONE) + return VALUE_NONE; + + if (v >= VALUE_TB_WIN_IN_MAX_PLY) // TB win or better + { + if (v >= VALUE_MATE_IN_MAX_PLY && VALUE_MATE - v > 99 - r50c) + return VALUE_MATE_IN_MAX_PLY - 1; // do not return a potentially false mate score + + return v - ply; + } + + if (v <= VALUE_TB_LOSS_IN_MAX_PLY) // TB loss or worse + { + if (v <= VALUE_MATED_IN_MAX_PLY && VALUE_MATE + v > 99 - r50c) + return VALUE_MATED_IN_MAX_PLY + 1; // do not return a potentially false mate score + + return v + ply; + } + + return v; + } + + + // update_pv() adds current move and appends child pv[] + + void update_pv(Move* pv, Move move, const Move* childPv) { + + for (*pv++ = move; childPv && *childPv != MOVE_NONE; ) + *pv++ = *childPv++; + *pv = MOVE_NONE; + } + + + // update_all_stats() updates stats at the end of search() when a bestMove is found + + void update_all_stats(const Position& pos, Stack* ss, Move bestMove, Value bestValue, Value beta, Square prevSq, + Move* quietsSearched, int quietCount, Move* capturesSearched, int captureCount, Depth depth) { + + Color us = pos.side_to_move(); + Thread* thisThread = pos.this_thread(); + CapturePieceToHistory& captureHistory = thisThread->captureHistory; + Piece moved_piece = pos.moved_piece(bestMove); + PieceType captured = type_of(pos.piece_on(to_sq(bestMove))); + int bonus1 = stat_bonus(depth + 1); + + if (!pos.capture(bestMove)) + { + int bonus2 = bestValue > beta + 137 ? bonus1 // larger bonus + : stat_bonus(depth); // smaller bonus + + // Increase stats for the best move in case it was a quiet move + update_quiet_stats(pos, ss, bestMove, bonus2); + + // Decrease stats for all non-best quiet moves + for (int i = 0; i < quietCount; ++i) + { + thisThread->mainHistory[us][from_to(quietsSearched[i])] << -bonus2; + update_continuation_histories(ss, pos.moved_piece(quietsSearched[i]), to_sq(quietsSearched[i]), -bonus2); + } + } + else + // Increase stats for the best move in case it was a capture move + captureHistory[moved_piece][to_sq(bestMove)][captured] << bonus1; + + // Extra penalty for a quiet early move that was not a TT move or + // main killer move in previous ply when it gets refuted. + if ( ((ss-1)->moveCount == 1 + (ss-1)->ttHit || ((ss-1)->currentMove == (ss-1)->killers[0])) + && !pos.captured_piece()) + update_continuation_histories(ss-1, pos.piece_on(prevSq), prevSq, -bonus1); + + // Decrease stats for all non-best capture moves + for (int i = 0; i < captureCount; ++i) + { + moved_piece = pos.moved_piece(capturesSearched[i]); + captured = type_of(pos.piece_on(to_sq(capturesSearched[i]))); + captureHistory[moved_piece][to_sq(capturesSearched[i])][captured] << -bonus1; + } + } + + + // update_continuation_histories() updates histories of the move pairs formed + // by moves at ply -1, -2, -4, and -6 with current move. + + void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus) { + + for (int i : {1, 2, 4, 6}) + { + // Only update first 2 continuation histories if we are in check + if (ss->inCheck && i > 2) + break; + if (is_ok((ss-i)->currentMove)) + (*(ss-i)->continuationHistory)[pc][to] << bonus; + } + } + + + // update_quiet_stats() updates move sorting heuristics + + void update_quiet_stats(const Position& pos, Stack* ss, Move move, int bonus) { + + // Update killers + if (ss->killers[0] != move) + { + ss->killers[1] = ss->killers[0]; + ss->killers[0] = move; + } + + Color us = pos.side_to_move(); + Thread* thisThread = pos.this_thread(); + thisThread->mainHistory[us][from_to(move)] << bonus; + update_continuation_histories(ss, pos.moved_piece(move), to_sq(move), bonus); + + // Update countermove history + if (is_ok((ss-1)->currentMove)) + { + Square prevSq = to_sq((ss-1)->currentMove); + thisThread->counterMoves[pos.piece_on(prevSq)][prevSq] = move; + } + } + + // When playing with strength handicap, choose best move among a set of RootMoves + // using a statistical rule dependent on 'level'. Idea by Heinz van Saanen. + + Move Skill::pick_best(size_t multiPV) { + + const RootMoves& rootMoves = Threads.main()->rootMoves; + static PRNG rng(now()); // PRNG sequence should be non-deterministic + + // RootMoves are already sorted by score in descending order + Value topScore = rootMoves[0].score; + int delta = std::min(topScore - rootMoves[multiPV - 1].score, PawnValueMg); + int maxScore = -VALUE_INFINITE; + double weakness = 120 - 2 * level; + + // Choose best move. For each move score we add two terms, both dependent on + // weakness. One is deterministic and bigger for weaker levels, and one is + // random. Then we choose the move with the resulting highest score. + for (size_t i = 0; i < multiPV; ++i) + { + // This is our magic formula + int push = int(( weakness * int(topScore - rootMoves[i].score) + + delta * (rng.rand() % int(weakness))) / 128); + + if (rootMoves[i].score + push >= maxScore) + { + maxScore = rootMoves[i].score + push; + best = rootMoves[i].pv[0]; + } + } + + return best; + } + +} // namespace + + +/// MainThread::check_time() is used to print debug info and, more importantly, +/// to detect when we are out of available time and thus stop the search. + +void MainThread::check_time() { + + if (--callsCnt > 0) + return; + + // When using nodes, ensure checking rate is not lower than 0.1% of nodes + callsCnt = Limits.nodes ? std::min(1024, int(Limits.nodes / 1024)) : 1024; + + static TimePoint lastInfoTime = now(); + + TimePoint elapsed = Time.elapsed(); + TimePoint tick = Limits.startTime + elapsed; + + if (tick - lastInfoTime >= 1000) + { + lastInfoTime = tick; + dbg_print(); + } + + // We should not stop pondering until told so by the GUI + if (ponder) + return; + + if ( (Limits.use_time_management() && (elapsed > Time.maximum() - 10 || stopOnPonderhit)) + || (Limits.movetime && elapsed >= Limits.movetime) + || (Limits.nodes && Threads.nodes_searched() >= (uint64_t)Limits.nodes)) + Threads.stop = true; +} + + +/// UCI::pv() formats PV information according to the UCI protocol. UCI requires +/// that all (if any) unsearched PV lines are sent using a previous search score. + +string UCI::pv(const Position& pos, Depth depth) { + + std::stringstream ss; + TimePoint elapsed = Time.elapsed() + 1; + const RootMoves& rootMoves = pos.this_thread()->rootMoves; + size_t pvIdx = pos.this_thread()->pvIdx; + size_t multiPV = std::min((size_t)Options["MultiPV"], rootMoves.size()); + uint64_t nodesSearched = Threads.nodes_searched(); + uint64_t tbHits = Threads.tb_hits() + (TB::RootInTB ? rootMoves.size() : 0); + + for (size_t i = 0; i < multiPV; ++i) + { + bool updated = rootMoves[i].score != -VALUE_INFINITE; + + if (depth == 1 && !updated && i > 0) + continue; + + Depth d = updated ? depth : std::max(1, depth - 1); + Value v = updated ? rootMoves[i].score : rootMoves[i].previousScore; + + if (v == -VALUE_INFINITE) + v = VALUE_ZERO; + + bool tb = TB::RootInTB && abs(v) < VALUE_MATE_IN_MAX_PLY; + v = tb ? rootMoves[i].tbScore : v; + + if (ss.rdbuf()->in_avail()) // Not at first line + ss << "\n"; + + ss << "info" + << " depth " << d + << " seldepth " << rootMoves[i].selDepth + << " multipv " << i + 1 + << " score " << UCI::value(v); + + if (Options["UCI_ShowWDL"]) + ss << UCI::wdl(v, pos.game_ply()); + + if (i == pvIdx && !tb && updated) // tablebase- and previous-scores are exact + ss << (rootMoves[i].scoreLowerbound ? " lowerbound" : (rootMoves[i].scoreUpperbound ? " upperbound" : "")); + + ss << " nodes " << nodesSearched + << " nps " << nodesSearched * 1000 / elapsed + << " hashfull " << TT.hashfull() + << " tbhits " << tbHits + << " time " << elapsed + << " pv"; + + for (Move m : rootMoves[i].pv) + ss << " " << UCI::move(m, pos.is_chess960()); + } + + return ss.str(); +} + + +/// RootMove::extract_ponder_from_tt() is called in case we have no ponder move +/// before exiting the search, for instance, in case we stop the search during a +/// fail high at root. We try hard to have a ponder move to return to the GUI, +/// otherwise in case of 'ponder on' we have nothing to think on. + +bool RootMove::extract_ponder_from_tt(Position& pos) { + + StateInfo st; + ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize); + + bool ttHit; + + assert(pv.size() == 1); + + if (pv[0] == MOVE_NONE) + return false; + + pos.do_move(pv[0], st); + TTEntry* tte = TT.probe(pos.key(), ttHit); + + if (ttHit) + { + Move m = tte->move(); // Local copy to be SMP safe + if (MoveList(pos).contains(m)) + pv.push_back(m); + } + + pos.undo_move(pv[0]); + return pv.size() > 1; +} + +void Tablebases::rank_root_moves(Position& pos, Search::RootMoves& rootMoves) { + + RootInTB = false; + UseRule50 = bool(Options["Syzygy50MoveRule"]); + ProbeDepth = int(Options["SyzygyProbeDepth"]); + Cardinality = int(Options["SyzygyProbeLimit"]); + bool dtz_available = true; + + // Tables with fewer pieces than SyzygyProbeLimit are searched with + // ProbeDepth == DEPTH_ZERO + if (Cardinality > MaxCardinality) + { + Cardinality = MaxCardinality; + ProbeDepth = 0; + } + + if (Cardinality >= popcount(pos.pieces()) && !pos.can_castle(ANY_CASTLING)) + { + // Rank moves using DTZ tables + RootInTB = root_probe(pos, rootMoves); + + if (!RootInTB) + { + // DTZ tables are missing; try to rank moves using WDL tables + dtz_available = false; + RootInTB = root_probe_wdl(pos, rootMoves); + } + } + + if (RootInTB) + { + // Sort moves according to TB rank + std::stable_sort(rootMoves.begin(), rootMoves.end(), + [](const RootMove &a, const RootMove &b) { return a.tbRank > b.tbRank; } ); + + // Probe during search only if DTZ is not available and we are winning + if (dtz_available || rootMoves[0].tbScore <= VALUE_DRAW) + Cardinality = 0; + } + else + { + // Clean up if root_probe() and root_probe_wdl() have failed + for (auto& m : rootMoves) + m.tbRank = 0; + } +} + +} // namespace Stockfish diff --git a/src/search.h b/src/search.h new file mode 100644 index 0000000000000000000000000000000000000000..60f2762a6f499044c17764910e6fd41bbf2aadd5 --- /dev/null +++ b/src/search.h @@ -0,0 +1,115 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef SEARCH_H_INCLUDED +#define SEARCH_H_INCLUDED + +#include + +#include "misc.h" +#include "movepick.h" +#include "types.h" + +namespace Stockfish { + +class Position; + +namespace Search { + + +/// Stack struct keeps track of the information we need to remember from nodes +/// shallower and deeper in the tree during the search. Each search thread has +/// its own array of Stack objects, indexed by the current ply. + +struct Stack { + Move* pv; + PieceToHistory* continuationHistory; + int ply; + Move currentMove; + Move excludedMove; + Move killers[2]; + Value staticEval; + int statScore; + int moveCount; + bool inCheck; + bool ttPv; + bool ttHit; + int doubleExtensions; + int cutoffCnt; +}; + + +/// RootMove struct is used for moves at the root of the tree. For each root move +/// we store a score and a PV (really a refutation in the case of moves which +/// fail low). Score is normally set at -VALUE_INFINITE for all non-pv moves. + +struct RootMove { + + explicit RootMove(Move m) : pv(1, m) {} + bool extract_ponder_from_tt(Position& pos); + bool operator==(const Move& m) const { return pv[0] == m; } + bool operator<(const RootMove& m) const { // Sort in descending order + return m.score != score ? m.score < score + : m.previousScore < previousScore; + } + + Value score = -VALUE_INFINITE; + Value previousScore = -VALUE_INFINITE; + Value averageScore = -VALUE_INFINITE; + bool scoreLowerbound = false; + bool scoreUpperbound = false; + int selDepth = 0; + int tbRank = 0; + Value tbScore; + std::vector pv; +}; + +typedef std::vector RootMoves; + + +/// LimitsType struct stores information sent by GUI about available time to +/// search the current move, maximum depth/time, or if we are in analysis mode. + +struct LimitsType { + + LimitsType() { // Init explicitly due to broken value-initialization of non POD in MSVC + time[WHITE] = time[BLACK] = inc[WHITE] = inc[BLACK] = npmsec = movetime = TimePoint(0); + movestogo = depth = mate = perft = infinite = 0; + nodes = 0; + } + + bool use_time_management() const { + return time[WHITE] || time[BLACK]; + } + + std::vector searchmoves; + TimePoint time[COLOR_NB], inc[COLOR_NB], npmsec, movetime, startTime; + int movestogo, depth, mate, perft, infinite; + int64_t nodes; +}; + +extern LimitsType Limits; + +void init(); +void clear(); + +} // namespace Search + +} // namespace Stockfish + +#endif // #ifndef SEARCH_H_INCLUDED diff --git a/src/syzygy/tbprobe.cpp b/src/syzygy/tbprobe.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f2de036df4a540efe6921fce0941bfd75d7ef22c --- /dev/null +++ b/src/syzygy/tbprobe.cpp @@ -0,0 +1,1628 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include +#include +#include // For std::memset and std::memcpy +#include +#include +#include +#include +#include +#include +#include + +#include "../bitboard.h" +#include "../movegen.h" +#include "../position.h" +#include "../search.h" +#include "../types.h" +#include "../uci.h" + +#include "tbprobe.h" + +#ifndef _WIN32 +#include +#include +#include +#include +#else +#define WIN32_LEAN_AND_MEAN +#ifndef NOMINMAX +# define NOMINMAX // Disable macros min() and max() +#endif +#include +#endif + +using namespace Stockfish::Tablebases; + +int Stockfish::Tablebases::MaxCardinality; + +namespace Stockfish { + +namespace { + +constexpr int TBPIECES = 7; // Max number of supported pieces +constexpr int MAX_DTZ = 1 << 18; // Max DTZ supported, large enough to deal with the syzygy TB limit. + +enum { BigEndian, LittleEndian }; +enum TBType { WDL, DTZ }; // Used as template parameter + +// Each table has a set of flags: all of them refer to DTZ tables, the last one to WDL tables +enum TBFlag { STM = 1, Mapped = 2, WinPlies = 4, LossPlies = 8, Wide = 16, SingleValue = 128 }; + +inline WDLScore operator-(WDLScore d) { return WDLScore(-int(d)); } +inline Square operator^(Square s, int i) { return Square(int(s) ^ i); } + +const std::string PieceToChar = " PNBRQK pnbrqk"; + +int MapPawns[SQUARE_NB]; +int MapB1H1H7[SQUARE_NB]; +int MapA1D1D4[SQUARE_NB]; +int MapKK[10][SQUARE_NB]; // [MapA1D1D4][SQUARE_NB] + +int Binomial[6][SQUARE_NB]; // [k][n] k elements from a set of n elements +int LeadPawnIdx[6][SQUARE_NB]; // [leadPawnsCnt][SQUARE_NB] +int LeadPawnsSize[6][4]; // [leadPawnsCnt][FILE_A..FILE_D] + +// Comparison function to sort leading pawns in ascending MapPawns[] order +bool pawns_comp(Square i, Square j) { return MapPawns[i] < MapPawns[j]; } +int off_A1H8(Square sq) { return int(rank_of(sq)) - file_of(sq); } + +constexpr Value WDL_to_value[] = { + -VALUE_MATE + MAX_PLY + 1, + VALUE_DRAW - 2, + VALUE_DRAW, + VALUE_DRAW + 2, + VALUE_MATE - MAX_PLY - 1 +}; + +template +inline void swap_endian(T& x) +{ + static_assert(std::is_unsigned::value, "Argument of swap_endian not unsigned"); + + uint8_t tmp, *c = (uint8_t*)&x; + for (int i = 0; i < Half; ++i) + tmp = c[i], c[i] = c[End - i], c[End - i] = tmp; +} +template<> inline void swap_endian(uint8_t&) {} + +template T number(void* addr) +{ + T v; + + if ((uintptr_t)addr & (alignof(T) - 1)) // Unaligned pointer (very rare) + std::memcpy(&v, addr, sizeof(T)); + else + v = *((T*)addr); + + if (LE != IsLittleEndian) + swap_endian(v); + return v; +} + +// DTZ tables don't store valid scores for moves that reset the rule50 counter +// like captures and pawn moves but we can easily recover the correct dtz of the +// previous move if we know the position's WDL score. +int dtz_before_zeroing(WDLScore wdl) { + return wdl == WDLWin ? 1 : + wdl == WDLCursedWin ? 101 : + wdl == WDLBlessedLoss ? -101 : + wdl == WDLLoss ? -1 : 0; +} + +// Return the sign of a number (-1, 0, 1) +template int sign_of(T val) { + return (T(0) < val) - (val < T(0)); +} + +// Numbers in little endian used by sparseIndex[] to point into blockLength[] +struct SparseEntry { + char block[4]; // Number of block + char offset[2]; // Offset within the block +}; + +static_assert(sizeof(SparseEntry) == 6, "SparseEntry must be 6 bytes"); + +typedef uint16_t Sym; // Huffman symbol + +struct LR { + enum Side { Left, Right }; + + uint8_t lr[3]; // The first 12 bits is the left-hand symbol, the second 12 + // bits is the right-hand symbol. If symbol has length 1, + // then the left-hand symbol is the stored value. + template + Sym get() { + return S == Left ? ((lr[1] & 0xF) << 8) | lr[0] : + S == Right ? (lr[2] << 4) | (lr[1] >> 4) : (assert(false), Sym(-1)); + } +}; + +static_assert(sizeof(LR) == 3, "LR tree entry must be 3 bytes"); + +// Tablebases data layout is structured as following: +// +// TBFile: memory maps/unmaps the physical .rtbw and .rtbz files +// TBTable: one object for each file with corresponding indexing information +// TBTables: has ownership of TBTable objects, keeping a list and a hash + +// class TBFile memory maps/unmaps the single .rtbw and .rtbz files. Files are +// memory mapped for best performance. Files are mapped at first access: at init +// time only existence of the file is checked. +class TBFile : public std::ifstream { + + std::string fname; + +public: + // Look for and open the file among the Paths directories where the .rtbw + // and .rtbz files can be found. Multiple directories are separated by ";" + // on Windows and by ":" on Unix-based operating systems. + // + // Example: + // C:\tb\wdl345;C:\tb\wdl6;D:\tb\dtz345;D:\tb\dtz6 + static std::string Paths; + + TBFile(const std::string& f) { + +#ifndef _WIN32 + constexpr char SepChar = ':'; +#else + constexpr char SepChar = ';'; +#endif + std::stringstream ss(Paths); + std::string path; + + while (std::getline(ss, path, SepChar)) + { + fname = path + "/" + f; + std::ifstream::open(fname); + if (is_open()) + return; + } + } + + // Memory map the file and check it. File should be already open and will be + // closed after mapping. + uint8_t* map(void** baseAddress, uint64_t* mapping, TBType type) { + + assert(is_open()); + + close(); // Need to re-open to get native file descriptor + +#ifndef _WIN32 + struct stat statbuf; + int fd = ::open(fname.c_str(), O_RDONLY); + + if (fd == -1) + return *baseAddress = nullptr, nullptr; + + fstat(fd, &statbuf); + + if (statbuf.st_size % 64 != 16) + { + std::cerr << "Corrupt tablebase file " << fname << std::endl; + exit(EXIT_FAILURE); + } + + *mapping = statbuf.st_size; + *baseAddress = mmap(nullptr, statbuf.st_size, PROT_READ, MAP_SHARED, fd, 0); +#if defined(MADV_RANDOM) + madvise(*baseAddress, statbuf.st_size, MADV_RANDOM); +#endif + ::close(fd); + + if (*baseAddress == MAP_FAILED) + { + std::cerr << "Could not mmap() " << fname << std::endl; + exit(EXIT_FAILURE); + } +#else + // Note FILE_FLAG_RANDOM_ACCESS is only a hint to Windows and as such may get ignored. + HANDLE fd = CreateFile(fname.c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr, + OPEN_EXISTING, FILE_FLAG_RANDOM_ACCESS, nullptr); + + if (fd == INVALID_HANDLE_VALUE) + return *baseAddress = nullptr, nullptr; + + DWORD size_high; + DWORD size_low = GetFileSize(fd, &size_high); + + if (size_low % 64 != 16) + { + std::cerr << "Corrupt tablebase file " << fname << std::endl; + exit(EXIT_FAILURE); + } + + HANDLE mmap = CreateFileMapping(fd, nullptr, PAGE_READONLY, size_high, size_low, nullptr); + CloseHandle(fd); + + if (!mmap) + { + std::cerr << "CreateFileMapping() failed" << std::endl; + exit(EXIT_FAILURE); + } + + *mapping = (uint64_t)mmap; + *baseAddress = MapViewOfFile(mmap, FILE_MAP_READ, 0, 0, 0); + + if (!*baseAddress) + { + std::cerr << "MapViewOfFile() failed, name = " << fname + << ", error = " << GetLastError() << std::endl; + exit(EXIT_FAILURE); + } +#endif + uint8_t* data = (uint8_t*)*baseAddress; + + constexpr uint8_t Magics[][4] = { { 0xD7, 0x66, 0x0C, 0xA5 }, + { 0x71, 0xE8, 0x23, 0x5D } }; + + if (memcmp(data, Magics[type == WDL], 4)) + { + std::cerr << "Corrupted table in file " << fname << std::endl; + unmap(*baseAddress, *mapping); + return *baseAddress = nullptr, nullptr; + } + + return data + 4; // Skip Magics's header + } + + static void unmap(void* baseAddress, uint64_t mapping) { + +#ifndef _WIN32 + munmap(baseAddress, mapping); +#else + UnmapViewOfFile(baseAddress); + CloseHandle((HANDLE)mapping); +#endif + } +}; + +std::string TBFile::Paths; + +// struct PairsData contains low level indexing information to access TB data. +// There are 8, 4 or 2 PairsData records for each TBTable, according to type of +// table and if positions have pawns or not. It is populated at first access. +struct PairsData { + uint8_t flags; // Table flags, see enum TBFlag + uint8_t maxSymLen; // Maximum length in bits of the Huffman symbols + uint8_t minSymLen; // Minimum length in bits of the Huffman symbols + uint32_t blocksNum; // Number of blocks in the TB file + size_t sizeofBlock; // Block size in bytes + size_t span; // About every span values there is a SparseIndex[] entry + Sym* lowestSym; // lowestSym[l] is the symbol of length l with the lowest value + LR* btree; // btree[sym] stores the left and right symbols that expand sym + uint16_t* blockLength; // Number of stored positions (minus one) for each block: 1..65536 + uint32_t blockLengthSize; // Size of blockLength[] table: padded so it's bigger than blocksNum + SparseEntry* sparseIndex; // Partial indices into blockLength[] + size_t sparseIndexSize; // Size of SparseIndex[] table + uint8_t* data; // Start of Huffman compressed data + std::vector base64; // base64[l - min_sym_len] is the 64bit-padded lowest symbol of length l + std::vector symlen; // Number of values (-1) represented by a given Huffman symbol: 1..256 + Piece pieces[TBPIECES]; // Position pieces: the order of pieces defines the groups + uint64_t groupIdx[TBPIECES+1]; // Start index used for the encoding of the group's pieces + int groupLen[TBPIECES+1]; // Number of pieces in a given group: KRKN -> (3, 1) + uint16_t map_idx[4]; // WDLWin, WDLLoss, WDLCursedWin, WDLBlessedLoss (used in DTZ) +}; + +// struct TBTable contains indexing information to access the corresponding TBFile. +// There are 2 types of TBTable, corresponding to a WDL or a DTZ file. TBTable +// is populated at init time but the nested PairsData records are populated at +// first access, when the corresponding file is memory mapped. +template +struct TBTable { + typedef typename std::conditional::type Ret; + + static constexpr int Sides = Type == WDL ? 2 : 1; + + std::atomic_bool ready; + void* baseAddress; + uint8_t* map; + uint64_t mapping; + Key key; + Key key2; + int pieceCount; + bool hasPawns; + bool hasUniquePieces; + uint8_t pawnCount[2]; // [Lead color / other color] + PairsData items[Sides][4]; // [wtm / btm][FILE_A..FILE_D or 0] + + PairsData* get(int stm, int f) { + return &items[stm % Sides][hasPawns ? f : 0]; + } + + TBTable() : ready(false), baseAddress(nullptr) {} + explicit TBTable(const std::string& code); + explicit TBTable(const TBTable& wdl); + + ~TBTable() { + if (baseAddress) + TBFile::unmap(baseAddress, mapping); + } +}; + +template<> +TBTable::TBTable(const std::string& code) : TBTable() { + + StateInfo st; + Position pos; + + key = pos.set(code, WHITE, &st).material_key(); + pieceCount = pos.count(); + hasPawns = pos.pieces(PAWN); + + hasUniquePieces = false; + for (Color c : { WHITE, BLACK }) + for (PieceType pt = PAWN; pt < KING; ++pt) + if (popcount(pos.pieces(c, pt)) == 1) + hasUniquePieces = true; + + // Set the leading color. In case both sides have pawns the leading color + // is the side with less pawns because this leads to better compression. + bool c = !pos.count(BLACK) + || ( pos.count(WHITE) + && pos.count(BLACK) >= pos.count(WHITE)); + + pawnCount[0] = pos.count(c ? WHITE : BLACK); + pawnCount[1] = pos.count(c ? BLACK : WHITE); + + key2 = pos.set(code, BLACK, &st).material_key(); +} + +template<> +TBTable::TBTable(const TBTable& wdl) : TBTable() { + + // Use the corresponding WDL table to avoid recalculating all from scratch + key = wdl.key; + key2 = wdl.key2; + pieceCount = wdl.pieceCount; + hasPawns = wdl.hasPawns; + hasUniquePieces = wdl.hasUniquePieces; + pawnCount[0] = wdl.pawnCount[0]; + pawnCount[1] = wdl.pawnCount[1]; +} + +// class TBTables creates and keeps ownership of the TBTable objects, one for +// each TB file found. It supports a fast, hash based, table lookup. Populated +// at init time, accessed at probe time. +class TBTables { + + struct Entry + { + Key key; + TBTable* wdl; + TBTable* dtz; + + template + TBTable* get() const { + return (TBTable*)(Type == WDL ? (void*)wdl : (void*)dtz); + } + }; + + static constexpr int Size = 1 << 12; // 4K table, indexed by key's 12 lsb + static constexpr int Overflow = 1; // Number of elements allowed to map to the last bucket + + Entry hashTable[Size + Overflow]; + + std::deque> wdlTable; + std::deque> dtzTable; + + void insert(Key key, TBTable* wdl, TBTable* dtz) { + uint32_t homeBucket = (uint32_t)key & (Size - 1); + Entry entry{ key, wdl, dtz }; + + // Ensure last element is empty to avoid overflow when looking up + for (uint32_t bucket = homeBucket; bucket < Size + Overflow - 1; ++bucket) { + Key otherKey = hashTable[bucket].key; + if (otherKey == key || !hashTable[bucket].get()) { + hashTable[bucket] = entry; + return; + } + + // Robin Hood hashing: If we've probed for longer than this element, + // insert here and search for a new spot for the other element instead. + uint32_t otherHomeBucket = (uint32_t)otherKey & (Size - 1); + if (otherHomeBucket > homeBucket) { + std::swap(entry, hashTable[bucket]); + key = otherKey; + homeBucket = otherHomeBucket; + } + } + std::cerr << "TB hash table size too low!" << std::endl; + exit(EXIT_FAILURE); + } + +public: + template + TBTable* get(Key key) { + for (const Entry* entry = &hashTable[(uint32_t)key & (Size - 1)]; ; ++entry) { + if (entry->key == key || !entry->get()) + return entry->get(); + } + } + + void clear() { + memset(hashTable, 0, sizeof(hashTable)); + wdlTable.clear(); + dtzTable.clear(); + } + size_t size() const { return wdlTable.size(); } + void add(const std::vector& pieces); +}; + +TBTables TBTables; + +// If the corresponding file exists two new objects TBTable and TBTable +// are created and added to the lists and hash table. Called at init time. +void TBTables::add(const std::vector& pieces) { + + std::string code; + + for (PieceType pt : pieces) + code += PieceToChar[pt]; + + TBFile file(code.insert(code.find('K', 1), "v") + ".rtbw"); // KRK -> KRvK + + if (!file.is_open()) // Only WDL file is checked + return; + + file.close(); + + MaxCardinality = std::max((int)pieces.size(), MaxCardinality); + + wdlTable.emplace_back(code); + dtzTable.emplace_back(wdlTable.back()); + + // Insert into the hash keys for both colors: KRvK with KR white and black + insert(wdlTable.back().key , &wdlTable.back(), &dtzTable.back()); + insert(wdlTable.back().key2, &wdlTable.back(), &dtzTable.back()); +} + +// TB tables are compressed with canonical Huffman code. The compressed data is divided into +// blocks of size d->sizeofBlock, and each block stores a variable number of symbols. +// Each symbol represents either a WDL or a (remapped) DTZ value, or a pair of other symbols +// (recursively). If you keep expanding the symbols in a block, you end up with up to 65536 +// WDL or DTZ values. Each symbol represents up to 256 values and will correspond after +// Huffman coding to at least 1 bit. So a block of 32 bytes corresponds to at most +// 32 x 8 x 256 = 65536 values. This maximum is only reached for tables that consist mostly +// of draws or mostly of wins, but such tables are actually quite common. In principle, the +// blocks in WDL tables are 64 bytes long (and will be aligned on cache lines). But for +// mostly-draw or mostly-win tables this can leave many 64-byte blocks only half-filled, so +// in such cases blocks are 32 bytes long. The blocks of DTZ tables are up to 1024 bytes long. +// The generator picks the size that leads to the smallest table. The "book" of symbols and +// Huffman codes is the same for all blocks in the table. A non-symmetric pawnless TB file +// will have one table for wtm and one for btm, a TB file with pawns will have tables per +// file a,b,c,d also in this case one set for wtm and one for btm. +int decompress_pairs(PairsData* d, uint64_t idx) { + + // Special case where all table positions store the same value + if (d->flags & TBFlag::SingleValue) + return d->minSymLen; + + // First we need to locate the right block that stores the value at index "idx". + // Because each block n stores blockLength[n] + 1 values, the index i of the block + // that contains the value at position idx is: + // + // for (i = -1, sum = 0; sum <= idx; i++) + // sum += blockLength[i + 1] + 1; + // + // This can be slow, so we use SparseIndex[] populated with a set of SparseEntry that + // point to known indices into blockLength[]. Namely SparseIndex[k] is a SparseEntry + // that stores the blockLength[] index and the offset within that block of the value + // with index I(k), where: + // + // I(k) = k * d->span + d->span / 2 (1) + + // First step is to get the 'k' of the I(k) nearest to our idx, using definition (1) + uint32_t k = uint32_t(idx / d->span); + + // Then we read the corresponding SparseIndex[] entry + uint32_t block = number(&d->sparseIndex[k].block); + int offset = number(&d->sparseIndex[k].offset); + + // Now compute the difference idx - I(k). From definition of k we know that + // + // idx = k * d->span + idx % d->span (2) + // + // So from (1) and (2) we can compute idx - I(K): + int diff = idx % d->span - d->span / 2; + + // Sum the above to offset to find the offset corresponding to our idx + offset += diff; + + // Move to previous/next block, until we reach the correct block that contains idx, + // that is when 0 <= offset <= d->blockLength[block] + while (offset < 0) + offset += d->blockLength[--block] + 1; + + while (offset > d->blockLength[block]) + offset -= d->blockLength[block++] + 1; + + // Finally, we find the start address of our block of canonical Huffman symbols + uint32_t* ptr = (uint32_t*)(d->data + ((uint64_t)block * d->sizeofBlock)); + + // Read the first 64 bits in our block, this is a (truncated) sequence of + // unknown number of symbols of unknown length but we know the first one + // is at the beginning of this 64 bits sequence. + uint64_t buf64 = number(ptr); ptr += 2; + int buf64Size = 64; + Sym sym; + + while (true) + { + int len = 0; // This is the symbol length - d->min_sym_len + + // Now get the symbol length. For any symbol s64 of length l right-padded + // to 64 bits we know that d->base64[l-1] >= s64 >= d->base64[l] so we + // can find the symbol length iterating through base64[]. + while (buf64 < d->base64[len]) + ++len; + + // All the symbols of a given length are consecutive integers (numerical + // sequence property), so we can compute the offset of our symbol of + // length len, stored at the beginning of buf64. + sym = Sym((buf64 - d->base64[len]) >> (64 - len - d->minSymLen)); + + // Now add the value of the lowest symbol of length len to get our symbol + sym += number(&d->lowestSym[len]); + + // If our offset is within the number of values represented by symbol sym + // we are done... + if (offset < d->symlen[sym] + 1) + break; + + // ...otherwise update the offset and continue to iterate + offset -= d->symlen[sym] + 1; + len += d->minSymLen; // Get the real length + buf64 <<= len; // Consume the just processed symbol + buf64Size -= len; + + if (buf64Size <= 32) { // Refill the buffer + buf64Size += 32; + buf64 |= (uint64_t)number(ptr++) << (64 - buf64Size); + } + } + + // Ok, now we have our symbol that expands into d->symlen[sym] + 1 symbols. + // We binary-search for our value recursively expanding into the left and + // right child symbols until we reach a leaf node where symlen[sym] + 1 == 1 + // that will store the value we need. + while (d->symlen[sym]) + { + Sym left = d->btree[sym].get(); + + // If a symbol contains 36 sub-symbols (d->symlen[sym] + 1 = 36) and + // expands in a pair (d->symlen[left] = 23, d->symlen[right] = 11), then + // we know that, for instance the ten-th value (offset = 10) will be on + // the left side because in Recursive Pairing child symbols are adjacent. + if (offset < d->symlen[left] + 1) + sym = left; + else { + offset -= d->symlen[left] + 1; + sym = d->btree[sym].get(); + } + } + + return d->btree[sym].get(); +} + +bool check_dtz_stm(TBTable*, int, File) { return true; } + +bool check_dtz_stm(TBTable* entry, int stm, File f) { + + auto flags = entry->get(stm, f)->flags; + return (flags & TBFlag::STM) == stm + || ((entry->key == entry->key2) && !entry->hasPawns); +} + +// DTZ scores are sorted by frequency of occurrence and then assigned the +// values 0, 1, 2, ... in order of decreasing frequency. This is done for each +// of the four WDLScore values. The mapping information necessary to reconstruct +// the original values is stored in the TB file and read during map[] init. +WDLScore map_score(TBTable*, File, int value, WDLScore) { return WDLScore(value - 2); } + +int map_score(TBTable* entry, File f, int value, WDLScore wdl) { + + constexpr int WDLMap[] = { 1, 3, 0, 2, 0 }; + + auto flags = entry->get(0, f)->flags; + + uint8_t* map = entry->map; + uint16_t* idx = entry->get(0, f)->map_idx; + if (flags & TBFlag::Mapped) { + if (flags & TBFlag::Wide) + value = ((uint16_t *)map)[idx[WDLMap[wdl + 2]] + value]; + else + value = map[idx[WDLMap[wdl + 2]] + value]; + } + + // DTZ tables store distance to zero in number of moves or plies. We + // want to return plies, so we have convert to plies when needed. + if ( (wdl == WDLWin && !(flags & TBFlag::WinPlies)) + || (wdl == WDLLoss && !(flags & TBFlag::LossPlies)) + || wdl == WDLCursedWin + || wdl == WDLBlessedLoss) + value *= 2; + + return value + 1; +} + +// Compute a unique index out of a position and use it to probe the TB file. To +// encode k pieces of same type and color, first sort the pieces by square in +// ascending order s1 <= s2 <= ... <= sk then compute the unique index as: +// +// idx = Binomial[1][s1] + Binomial[2][s2] + ... + Binomial[k][sk] +// +template +Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* result) { + + Square squares[TBPIECES]; + Piece pieces[TBPIECES]; + uint64_t idx; + int next = 0, size = 0, leadPawnsCnt = 0; + PairsData* d; + Bitboard b, leadPawns = 0; + File tbFile = FILE_A; + + // A given TB entry like KRK has associated two material keys: KRvk and Kvkr. + // If both sides have the same pieces keys are equal. In this case TB tables + // only store the 'white to move' case, so if the position to lookup has black + // to move, we need to switch the color and flip the squares before to lookup. + bool symmetricBlackToMove = (entry->key == entry->key2 && pos.side_to_move()); + + // TB files are calculated for white as stronger side. For instance we have + // KRvK, not KvKR. A position where stronger side is white will have its + // material key == entry->key, otherwise we have to switch the color and + // flip the squares before to lookup. + bool blackStronger = (pos.material_key() != entry->key); + + int flipColor = (symmetricBlackToMove || blackStronger) * 8; + int flipSquares = (symmetricBlackToMove || blackStronger) * 56; + int stm = (symmetricBlackToMove || blackStronger) ^ pos.side_to_move(); + + // For pawns, TB files store 4 separate tables according if leading pawn is on + // file a, b, c or d after reordering. The leading pawn is the one with maximum + // MapPawns[] value, that is the one most toward the edges and with lowest rank. + if (entry->hasPawns) { + + // In all the 4 tables, pawns are at the beginning of the piece sequence and + // their color is the reference one. So we just pick the first one. + Piece pc = Piece(entry->get(0, 0)->pieces[0] ^ flipColor); + + assert(type_of(pc) == PAWN); + + leadPawns = b = pos.pieces(color_of(pc), PAWN); + do + squares[size++] = pop_lsb(b) ^ flipSquares; + while (b); + + leadPawnsCnt = size; + + std::swap(squares[0], *std::max_element(squares, squares + leadPawnsCnt, pawns_comp)); + + tbFile = File(edge_distance(file_of(squares[0]))); + } + + // DTZ tables are one-sided, i.e. they store positions only for white to + // move or only for black to move, so check for side to move to be stm, + // early exit otherwise. + if (!check_dtz_stm(entry, stm, tbFile)) + return *result = CHANGE_STM, Ret(); + + // Now we are ready to get all the position pieces (but the lead pawns) and + // directly map them to the correct color and square. + b = pos.pieces() ^ leadPawns; + do { + Square s = pop_lsb(b); + squares[size] = s ^ flipSquares; + pieces[size++] = Piece(pos.piece_on(s) ^ flipColor); + } while (b); + + assert(size >= 2); + + d = entry->get(stm, tbFile); + + // Then we reorder the pieces to have the same sequence as the one stored + // in pieces[i]: the sequence that ensures the best compression. + for (int i = leadPawnsCnt; i < size - 1; ++i) + for (int j = i + 1; j < size; ++j) + if (d->pieces[i] == pieces[j]) + { + std::swap(pieces[i], pieces[j]); + std::swap(squares[i], squares[j]); + break; + } + + // Now we map again the squares so that the square of the lead piece is in + // the triangle A1-D1-D4. + if (file_of(squares[0]) > FILE_D) + for (int i = 0; i < size; ++i) + squares[i] = flip_file(squares[i]); + + // Encode leading pawns starting with the one with minimum MapPawns[] and + // proceeding in ascending order. + if (entry->hasPawns) { + idx = LeadPawnIdx[leadPawnsCnt][squares[0]]; + + std::stable_sort(squares + 1, squares + leadPawnsCnt, pawns_comp); + + for (int i = 1; i < leadPawnsCnt; ++i) + idx += Binomial[i][MapPawns[squares[i]]]; + + goto encode_remaining; // With pawns we have finished special treatments + } + + // In positions without pawns, we further flip the squares to ensure leading + // piece is below RANK_5. + if (rank_of(squares[0]) > RANK_4) + for (int i = 0; i < size; ++i) + squares[i] = flip_rank(squares[i]); + + // Look for the first piece of the leading group not on the A1-D4 diagonal + // and ensure it is mapped below the diagonal. + for (int i = 0; i < d->groupLen[0]; ++i) { + if (!off_A1H8(squares[i])) + continue; + + if (off_A1H8(squares[i]) > 0) // A1-H8 diagonal flip: SQ_A3 -> SQ_C1 + for (int j = i; j < size; ++j) + squares[j] = Square(((squares[j] >> 3) | (squares[j] << 3)) & 63); + break; + } + + // Encode the leading group. + // + // Suppose we have KRvK. Let's say the pieces are on square numbers wK, wR + // and bK (each 0...63). The simplest way to map this position to an index + // is like this: + // + // index = wK * 64 * 64 + wR * 64 + bK; + // + // But this way the TB is going to have 64*64*64 = 262144 positions, with + // lots of positions being equivalent (because they are mirrors of each + // other) and lots of positions being invalid (two pieces on one square, + // adjacent kings, etc.). + // Usually the first step is to take the wK and bK together. There are just + // 462 ways legal and not-mirrored ways to place the wK and bK on the board. + // Once we have placed the wK and bK, there are 62 squares left for the wR + // Mapping its square from 0..63 to available squares 0..61 can be done like: + // + // wR -= (wR > wK) + (wR > bK); + // + // In words: if wR "comes later" than wK, we deduct 1, and the same if wR + // "comes later" than bK. In case of two same pieces like KRRvK we want to + // place the two Rs "together". If we have 62 squares left, we can place two + // Rs "together" in 62 * 61 / 2 ways (we divide by 2 because rooks can be + // swapped and still get the same position.) + // + // In case we have at least 3 unique pieces (included kings) we encode them + // together. + if (entry->hasUniquePieces) { + + int adjust1 = squares[1] > squares[0]; + int adjust2 = (squares[2] > squares[0]) + (squares[2] > squares[1]); + + // First piece is below a1-h8 diagonal. MapA1D1D4[] maps the b1-d1-d3 + // triangle to 0...5. There are 63 squares for second piece and and 62 + // (mapped to 0...61) for the third. + if (off_A1H8(squares[0])) + idx = ( MapA1D1D4[squares[0]] * 63 + + (squares[1] - adjust1)) * 62 + + squares[2] - adjust2; + + // First piece is on a1-h8 diagonal, second below: map this occurrence to + // 6 to differentiate from the above case, rank_of() maps a1-d4 diagonal + // to 0...3 and finally MapB1H1H7[] maps the b1-h1-h7 triangle to 0..27. + else if (off_A1H8(squares[1])) + idx = ( 6 * 63 + rank_of(squares[0]) * 28 + + MapB1H1H7[squares[1]]) * 62 + + squares[2] - adjust2; + + // First two pieces are on a1-h8 diagonal, third below + else if (off_A1H8(squares[2])) + idx = 6 * 63 * 62 + 4 * 28 * 62 + + rank_of(squares[0]) * 7 * 28 + + (rank_of(squares[1]) - adjust1) * 28 + + MapB1H1H7[squares[2]]; + + // All 3 pieces on the diagonal a1-h8 + else + idx = 6 * 63 * 62 + 4 * 28 * 62 + 4 * 7 * 28 + + rank_of(squares[0]) * 7 * 6 + + (rank_of(squares[1]) - adjust1) * 6 + + (rank_of(squares[2]) - adjust2); + } else + // We don't have at least 3 unique pieces, like in KRRvKBB, just map + // the kings. + idx = MapKK[MapA1D1D4[squares[0]]][squares[1]]; + +encode_remaining: + idx *= d->groupIdx[0]; + Square* groupSq = squares + d->groupLen[0]; + + // Encode remaining pawns then pieces according to square, in ascending order + bool remainingPawns = entry->hasPawns && entry->pawnCount[1]; + + while (d->groupLen[++next]) + { + std::stable_sort(groupSq, groupSq + d->groupLen[next]); + uint64_t n = 0; + + // Map down a square if "comes later" than a square in the previous + // groups (similar to what done earlier for leading group pieces). + for (int i = 0; i < d->groupLen[next]; ++i) + { + auto f = [&](Square s) { return groupSq[i] > s; }; + auto adjust = std::count_if(squares, groupSq, f); + n += Binomial[i + 1][groupSq[i] - adjust - 8 * remainingPawns]; + } + + remainingPawns = false; + idx += n * d->groupIdx[next]; + groupSq += d->groupLen[next]; + } + + // Now that we have the index, decompress the pair and get the score + return map_score(entry, tbFile, decompress_pairs(d, idx), wdl); +} + +// Group together pieces that will be encoded together. The general rule is that +// a group contains pieces of same type and color. The exception is the leading +// group that, in case of positions without pawns, can be formed by 3 different +// pieces (default) or by the king pair when there is not a unique piece apart +// from the kings. When there are pawns, pawns are always first in pieces[]. +// +// As example KRKN -> KRK + N, KNNK -> KK + NN, KPPKP -> P + PP + K + K +// +// The actual grouping depends on the TB generator and can be inferred from the +// sequence of pieces in piece[] array. +template +void set_groups(T& e, PairsData* d, int order[], File f) { + + int n = 0, firstLen = e.hasPawns ? 0 : e.hasUniquePieces ? 3 : 2; + d->groupLen[n] = 1; + + // Number of pieces per group is stored in groupLen[], for instance in KRKN + // the encoder will default on '111', so groupLen[] will be (3, 1). + for (int i = 1; i < e.pieceCount; ++i) + if (--firstLen > 0 || d->pieces[i] == d->pieces[i - 1]) + d->groupLen[n]++; + else + d->groupLen[++n] = 1; + + d->groupLen[++n] = 0; // Zero-terminated + + // The sequence in pieces[] defines the groups, but not the order in which + // they are encoded. If the pieces in a group g can be combined on the board + // in N(g) different ways, then the position encoding will be of the form: + // + // g1 * N(g2) * N(g3) + g2 * N(g3) + g3 + // + // This ensures unique encoding for the whole position. The order of the + // groups is a per-table parameter and could not follow the canonical leading + // pawns/pieces -> remaining pawns -> remaining pieces. In particular the + // first group is at order[0] position and the remaining pawns, when present, + // are at order[1] position. + bool pp = e.hasPawns && e.pawnCount[1]; // Pawns on both sides + int next = pp ? 2 : 1; + int freeSquares = 64 - d->groupLen[0] - (pp ? d->groupLen[1] : 0); + uint64_t idx = 1; + + for (int k = 0; next < n || k == order[0] || k == order[1]; ++k) + if (k == order[0]) // Leading pawns or pieces + { + d->groupIdx[0] = idx; + idx *= e.hasPawns ? LeadPawnsSize[d->groupLen[0]][f] + : e.hasUniquePieces ? 31332 : 462; + } + else if (k == order[1]) // Remaining pawns + { + d->groupIdx[1] = idx; + idx *= Binomial[d->groupLen[1]][48 - d->groupLen[0]]; + } + else // Remaining pieces + { + d->groupIdx[next] = idx; + idx *= Binomial[d->groupLen[next]][freeSquares]; + freeSquares -= d->groupLen[next++]; + } + + d->groupIdx[n] = idx; +} + +// In Recursive Pairing each symbol represents a pair of children symbols. So +// read d->btree[] symbols data and expand each one in his left and right child +// symbol until reaching the leafs that represent the symbol value. +uint8_t set_symlen(PairsData* d, Sym s, std::vector& visited) { + + visited[s] = true; // We can set it now because tree is acyclic + Sym sr = d->btree[s].get(); + + if (sr == 0xFFF) + return 0; + + Sym sl = d->btree[s].get(); + + if (!visited[sl]) + d->symlen[sl] = set_symlen(d, sl, visited); + + if (!visited[sr]) + d->symlen[sr] = set_symlen(d, sr, visited); + + return d->symlen[sl] + d->symlen[sr] + 1; +} + +uint8_t* set_sizes(PairsData* d, uint8_t* data) { + + d->flags = *data++; + + if (d->flags & TBFlag::SingleValue) { + d->blocksNum = d->blockLengthSize = 0; + d->span = d->sparseIndexSize = 0; // Broken MSVC zero-init + d->minSymLen = *data++; // Here we store the single value + return data; + } + + // groupLen[] is a zero-terminated list of group lengths, the last groupIdx[] + // element stores the biggest index that is the tb size. + uint64_t tbSize = d->groupIdx[std::find(d->groupLen, d->groupLen + 7, 0) - d->groupLen]; + + d->sizeofBlock = 1ULL << *data++; + d->span = 1ULL << *data++; + d->sparseIndexSize = size_t((tbSize + d->span - 1) / d->span); // Round up + auto padding = number(data++); + d->blocksNum = number(data); data += sizeof(uint32_t); + d->blockLengthSize = d->blocksNum + padding; // Padded to ensure SparseIndex[] + // does not point out of range. + d->maxSymLen = *data++; + d->minSymLen = *data++; + d->lowestSym = (Sym*)data; + d->base64.resize(d->maxSymLen - d->minSymLen + 1); + + // The canonical code is ordered such that longer symbols (in terms of + // the number of bits of their Huffman code) have lower numeric value, + // so that d->lowestSym[i] >= d->lowestSym[i+1] (when read as LittleEndian). + // Starting from this we compute a base64[] table indexed by symbol length + // and containing 64 bit values so that d->base64[i] >= d->base64[i+1]. + // See https://en.wikipedia.org/wiki/Huffman_coding + for (int i = d->base64.size() - 2; i >= 0; --i) { + d->base64[i] = (d->base64[i + 1] + number(&d->lowestSym[i]) + - number(&d->lowestSym[i + 1])) / 2; + + assert(d->base64[i] * 2 >= d->base64[i+1]); + } + + // Now left-shift by an amount so that d->base64[i] gets shifted 1 bit more + // than d->base64[i+1] and given the above assert condition, we ensure that + // d->base64[i] >= d->base64[i+1]. Moreover for any symbol s64 of length i + // and right-padded to 64 bits holds d->base64[i-1] >= s64 >= d->base64[i]. + for (size_t i = 0; i < d->base64.size(); ++i) + d->base64[i] <<= 64 - i - d->minSymLen; // Right-padding to 64 bits + + data += d->base64.size() * sizeof(Sym); + d->symlen.resize(number(data)); data += sizeof(uint16_t); + d->btree = (LR*)data; + + // The compression scheme used is "Recursive Pairing", that replaces the most + // frequent adjacent pair of symbols in the source message by a new symbol, + // reevaluating the frequencies of all of the symbol pairs with respect to + // the extended alphabet, and then repeating the process. + // See http://www.larsson.dogma.net/dcc99.pdf + std::vector visited(d->symlen.size()); + + for (Sym sym = 0; sym < d->symlen.size(); ++sym) + if (!visited[sym]) + d->symlen[sym] = set_symlen(d, sym, visited); + + return data + d->symlen.size() * sizeof(LR) + (d->symlen.size() & 1); +} + +uint8_t* set_dtz_map(TBTable&, uint8_t* data, File) { return data; } + +uint8_t* set_dtz_map(TBTable& e, uint8_t* data, File maxFile) { + + e.map = data; + + for (File f = FILE_A; f <= maxFile; ++f) { + auto flags = e.get(0, f)->flags; + if (flags & TBFlag::Mapped) { + if (flags & TBFlag::Wide) { + data += (uintptr_t)data & 1; // Word alignment, we may have a mixed table + for (int i = 0; i < 4; ++i) { // Sequence like 3,x,x,x,1,x,0,2,x,x + e.get(0, f)->map_idx[i] = (uint16_t)((uint16_t *)data - (uint16_t *)e.map + 1); + data += 2 * number(data) + 2; + } + } + else { + for (int i = 0; i < 4; ++i) { + e.get(0, f)->map_idx[i] = (uint16_t)(data - e.map + 1); + data += *data + 1; + } + } + } + } + + return data += (uintptr_t)data & 1; // Word alignment +} + +// Populate entry's PairsData records with data from the just memory mapped file. +// Called at first access. +template +void set(T& e, uint8_t* data) { + + PairsData* d; + + enum { Split = 1, HasPawns = 2 }; + + assert(e.hasPawns == bool(*data & HasPawns)); + assert((e.key != e.key2) == bool(*data & Split)); + + data++; // First byte stores flags + + const int sides = T::Sides == 2 && (e.key != e.key2) ? 2 : 1; + const File maxFile = e.hasPawns ? FILE_D : FILE_A; + + bool pp = e.hasPawns && e.pawnCount[1]; // Pawns on both sides + + assert(!pp || e.pawnCount[0]); + + for (File f = FILE_A; f <= maxFile; ++f) { + + for (int i = 0; i < sides; i++) + *e.get(i, f) = PairsData(); + + int order[][2] = { { *data & 0xF, pp ? *(data + 1) & 0xF : 0xF }, + { *data >> 4, pp ? *(data + 1) >> 4 : 0xF } }; + data += 1 + pp; + + for (int k = 0; k < e.pieceCount; ++k, ++data) + for (int i = 0; i < sides; i++) + e.get(i, f)->pieces[k] = Piece(i ? *data >> 4 : *data & 0xF); + + for (int i = 0; i < sides; ++i) + set_groups(e, e.get(i, f), order[i], f); + } + + data += (uintptr_t)data & 1; // Word alignment + + for (File f = FILE_A; f <= maxFile; ++f) + for (int i = 0; i < sides; i++) + data = set_sizes(e.get(i, f), data); + + data = set_dtz_map(e, data, maxFile); + + for (File f = FILE_A; f <= maxFile; ++f) + for (int i = 0; i < sides; i++) { + (d = e.get(i, f))->sparseIndex = (SparseEntry*)data; + data += d->sparseIndexSize * sizeof(SparseEntry); + } + + for (File f = FILE_A; f <= maxFile; ++f) + for (int i = 0; i < sides; i++) { + (d = e.get(i, f))->blockLength = (uint16_t*)data; + data += d->blockLengthSize * sizeof(uint16_t); + } + + for (File f = FILE_A; f <= maxFile; ++f) + for (int i = 0; i < sides; i++) { + data = (uint8_t*)(((uintptr_t)data + 0x3F) & ~0x3F); // 64 byte alignment + (d = e.get(i, f))->data = data; + data += d->blocksNum * d->sizeofBlock; + } +} + +// If the TB file corresponding to the given position is already memory mapped +// then return its base address, otherwise try to memory map and init it. Called +// at every probe, memory map and init only at first access. Function is thread +// safe and can be called concurrently. +template +void* mapped(TBTable& e, const Position& pos) { + + static std::mutex mutex; + + // Use 'acquire' to avoid a thread reading 'ready' == true while + // another is still working. (compiler reordering may cause this). + if (e.ready.load(std::memory_order_acquire)) + return e.baseAddress; // Could be nullptr if file does not exist + + std::scoped_lock lk(mutex); + + if (e.ready.load(std::memory_order_relaxed)) // Recheck under lock + return e.baseAddress; + + // Pieces strings in decreasing order for each color, like ("KPP","KR") + std::string fname, w, b; + for (PieceType pt = KING; pt >= PAWN; --pt) { + w += std::string(popcount(pos.pieces(WHITE, pt)), PieceToChar[pt]); + b += std::string(popcount(pos.pieces(BLACK, pt)), PieceToChar[pt]); + } + + fname = (e.key == pos.material_key() ? w + 'v' + b : b + 'v' + w) + + (Type == WDL ? ".rtbw" : ".rtbz"); + + uint8_t* data = TBFile(fname).map(&e.baseAddress, &e.mapping, Type); + + if (data) + set(e, data); + + e.ready.store(true, std::memory_order_release); + return e.baseAddress; +} + +template::Ret> +Ret probe_table(const Position& pos, ProbeState* result, WDLScore wdl = WDLDraw) { + + if (pos.count() == 2) // KvK + return Ret(WDLDraw); + + TBTable* entry = TBTables.get(pos.material_key()); + + if (!entry || !mapped(*entry, pos)) + return *result = FAIL, Ret(); + + return do_probe_table(pos, entry, wdl, result); +} + +// For a position where the side to move has a winning capture it is not necessary +// to store a winning value so the generator treats such positions as "don't cares" +// and tries to assign to it a value that improves the compression ratio. Similarly, +// if the side to move has a drawing capture, then the position is at least drawn. +// If the position is won, then the TB needs to store a win value. But if the +// position is drawn, the TB may store a loss value if that is better for compression. +// All of this means that during probing, the engine must look at captures and probe +// their results and must probe the position itself. The "best" result of these +// probes is the correct result for the position. +// DTZ tables do not store values when a following move is a zeroing winning move +// (winning capture or winning pawn move). Also DTZ store wrong values for positions +// where the best move is an ep-move (even if losing). So in all these cases set +// the state to ZEROING_BEST_MOVE. +template +WDLScore search(Position& pos, ProbeState* result) { + + WDLScore value, bestValue = WDLLoss; + StateInfo st; + + auto moveList = MoveList(pos); + size_t totalCount = moveList.size(), moveCount = 0; + + for (const Move move : moveList) + { + if ( !pos.capture(move) + && (!CheckZeroingMoves || type_of(pos.moved_piece(move)) != PAWN)) + continue; + + moveCount++; + + pos.do_move(move, st); + value = -search(pos, result); + pos.undo_move(move); + + if (*result == FAIL) + return WDLDraw; + + if (value > bestValue) + { + bestValue = value; + + if (value >= WDLWin) + { + *result = ZEROING_BEST_MOVE; // Winning DTZ-zeroing move + return value; + } + } + } + + // In case we have already searched all the legal moves we don't have to probe + // the TB because the stored score could be wrong. For instance TB tables + // do not contain information on position with ep rights, so in this case + // the result of probe_wdl_table is wrong. Also in case of only capture + // moves, for instance here 4K3/4q3/6p1/2k5/6p1/8/8/8 w - - 0 7, we have to + // return with ZEROING_BEST_MOVE set. + bool noMoreMoves = (moveCount && moveCount == totalCount); + + if (noMoreMoves) + value = bestValue; + else + { + value = probe_table(pos, result); + + if (*result == FAIL) + return WDLDraw; + } + + // DTZ stores a "don't care" value if bestValue is a win + if (bestValue >= value) + return *result = ( bestValue > WDLDraw + || noMoreMoves ? ZEROING_BEST_MOVE : OK), bestValue; + + return *result = OK, value; +} + +} // namespace + + +/// Tablebases::init() is called at startup and after every change to +/// "SyzygyPath" UCI option to (re)create the various tables. It is not thread +/// safe, nor it needs to be. +void Tablebases::init(const std::string& paths) { + + TBTables.clear(); + MaxCardinality = 0; + TBFile::Paths = paths; + + if (paths.empty() || paths == "") + return; + + // MapB1H1H7[] encodes a square below a1-h8 diagonal to 0..27 + int code = 0; + for (Square s = SQ_A1; s <= SQ_H8; ++s) + if (off_A1H8(s) < 0) + MapB1H1H7[s] = code++; + + // MapA1D1D4[] encodes a square in the a1-d1-d4 triangle to 0..9 + std::vector diagonal; + code = 0; + for (Square s = SQ_A1; s <= SQ_D4; ++s) + if (off_A1H8(s) < 0 && file_of(s) <= FILE_D) + MapA1D1D4[s] = code++; + + else if (!off_A1H8(s) && file_of(s) <= FILE_D) + diagonal.push_back(s); + + // Diagonal squares are encoded as last ones + for (auto s : diagonal) + MapA1D1D4[s] = code++; + + // MapKK[] encodes all the 462 possible legal positions of two kings where + // the first is in the a1-d1-d4 triangle. If the first king is on the a1-d4 + // diagonal, the other one shall not to be above the a1-h8 diagonal. + std::vector> bothOnDiagonal; + code = 0; + for (int idx = 0; idx < 10; idx++) + for (Square s1 = SQ_A1; s1 <= SQ_D4; ++s1) + if (MapA1D1D4[s1] == idx && (idx || s1 == SQ_B1)) // SQ_B1 is mapped to 0 + { + for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2) + if ((PseudoAttacks[KING][s1] | s1) & s2) + continue; // Illegal position + + else if (!off_A1H8(s1) && off_A1H8(s2) > 0) + continue; // First on diagonal, second above + + else if (!off_A1H8(s1) && !off_A1H8(s2)) + bothOnDiagonal.emplace_back(idx, s2); + + else + MapKK[idx][s2] = code++; + } + + // Legal positions with both kings on diagonal are encoded as last ones + for (auto p : bothOnDiagonal) + MapKK[p.first][p.second] = code++; + + // Binomial[] stores the Binomial Coefficients using Pascal rule. There + // are Binomial[k][n] ways to choose k elements from a set of n elements. + Binomial[0][0] = 1; + + for (int n = 1; n < 64; n++) // Squares + for (int k = 0; k < 6 && k <= n; ++k) // Pieces + Binomial[k][n] = (k > 0 ? Binomial[k - 1][n - 1] : 0) + + (k < n ? Binomial[k ][n - 1] : 0); + + // MapPawns[s] encodes squares a2-h7 to 0..47. This is the number of possible + // available squares when the leading one is in 's'. Moreover the pawn with + // highest MapPawns[] is the leading pawn, the one nearest the edge and, + // among pawns with same file, the one with lowest rank. + int availableSquares = 47; // Available squares when lead pawn is in a2 + + // Init the tables for the encoding of leading pawns group: with 7-men TB we + // can have up to 5 leading pawns (KPPPPPK). + for (int leadPawnsCnt = 1; leadPawnsCnt <= 5; ++leadPawnsCnt) + for (File f = FILE_A; f <= FILE_D; ++f) + { + // Restart the index at every file because TB table is split + // by file, so we can reuse the same index for different files. + int idx = 0; + + // Sum all possible combinations for a given file, starting with + // the leading pawn on rank 2 and increasing the rank. + for (Rank r = RANK_2; r <= RANK_7; ++r) + { + Square sq = make_square(f, r); + + // Compute MapPawns[] at first pass. + // If sq is the leading pawn square, any other pawn cannot be + // below or more toward the edge of sq. There are 47 available + // squares when sq = a2 and reduced by 2 for any rank increase + // due to mirroring: sq == a3 -> no a2, h2, so MapPawns[a3] = 45 + if (leadPawnsCnt == 1) + { + MapPawns[sq] = availableSquares--; + MapPawns[flip_file(sq)] = availableSquares--; + } + LeadPawnIdx[leadPawnsCnt][sq] = idx; + idx += Binomial[leadPawnsCnt - 1][MapPawns[sq]]; + } + // After a file is traversed, store the cumulated per-file index + LeadPawnsSize[leadPawnsCnt][f] = idx; + } + + // Add entries in TB tables if the corresponding ".rtbw" file exists + for (PieceType p1 = PAWN; p1 < KING; ++p1) { + TBTables.add({KING, p1, KING}); + + for (PieceType p2 = PAWN; p2 <= p1; ++p2) { + TBTables.add({KING, p1, p2, KING}); + TBTables.add({KING, p1, KING, p2}); + + for (PieceType p3 = PAWN; p3 < KING; ++p3) + TBTables.add({KING, p1, p2, KING, p3}); + + for (PieceType p3 = PAWN; p3 <= p2; ++p3) { + TBTables.add({KING, p1, p2, p3, KING}); + + for (PieceType p4 = PAWN; p4 <= p3; ++p4) { + TBTables.add({KING, p1, p2, p3, p4, KING}); + + for (PieceType p5 = PAWN; p5 <= p4; ++p5) + TBTables.add({KING, p1, p2, p3, p4, p5, KING}); + + for (PieceType p5 = PAWN; p5 < KING; ++p5) + TBTables.add({KING, p1, p2, p3, p4, KING, p5}); + } + + for (PieceType p4 = PAWN; p4 < KING; ++p4) { + TBTables.add({KING, p1, p2, p3, KING, p4}); + + for (PieceType p5 = PAWN; p5 <= p4; ++p5) + TBTables.add({KING, p1, p2, p3, KING, p4, p5}); + } + } + + for (PieceType p3 = PAWN; p3 <= p1; ++p3) + for (PieceType p4 = PAWN; p4 <= (p1 == p3 ? p2 : p3); ++p4) + TBTables.add({KING, p1, p2, KING, p3, p4}); + } + } + + sync_cout << "info string Found " << TBTables.size() << " tablebases" << sync_endl; +} + +// Probe the WDL table for a particular position. +// If *result != FAIL, the probe was successful. +// The return value is from the point of view of the side to move: +// -2 : loss +// -1 : loss, but draw under 50-move rule +// 0 : draw +// 1 : win, but draw under 50-move rule +// 2 : win +WDLScore Tablebases::probe_wdl(Position& pos, ProbeState* result) { + + *result = OK; + return search(pos, result); +} + +// Probe the DTZ table for a particular position. +// If *result != FAIL, the probe was successful. +// The return value is from the point of view of the side to move: +// n < -100 : loss, but draw under 50-move rule +// -100 <= n < -1 : loss in n ply (assuming 50-move counter == 0) +// -1 : loss, the side to move is mated +// 0 : draw +// 1 < n <= 100 : win in n ply (assuming 50-move counter == 0) +// 100 < n : win, but draw under 50-move rule +// +// The return value n can be off by 1: a return value -n can mean a loss +// in n+1 ply and a return value +n can mean a win in n+1 ply. This +// cannot happen for tables with positions exactly on the "edge" of +// the 50-move rule. +// +// This implies that if dtz > 0 is returned, the position is certainly +// a win if dtz + 50-move-counter <= 99. Care must be taken that the engine +// picks moves that preserve dtz + 50-move-counter <= 99. +// +// If n = 100 immediately after a capture or pawn move, then the position +// is also certainly a win, and during the whole phase until the next +// capture or pawn move, the inequality to be preserved is +// dtz + 50-move-counter <= 100. +// +// In short, if a move is available resulting in dtz + 50-move-counter <= 99, +// then do not accept moves leading to dtz + 50-move-counter == 100. +int Tablebases::probe_dtz(Position& pos, ProbeState* result) { + + *result = OK; + WDLScore wdl = search(pos, result); + + if (*result == FAIL || wdl == WDLDraw) // DTZ tables don't store draws + return 0; + + // DTZ stores a 'don't care' value in this case, or even a plain wrong + // one as in case the best move is a losing ep, so it cannot be probed. + if (*result == ZEROING_BEST_MOVE) + return dtz_before_zeroing(wdl); + + int dtz = probe_table(pos, result, wdl); + + if (*result == FAIL) + return 0; + + if (*result != CHANGE_STM) + return (dtz + 100 * (wdl == WDLBlessedLoss || wdl == WDLCursedWin)) * sign_of(wdl); + + // DTZ stores results for the other side, so we need to do a 1-ply search and + // find the winning move that minimizes DTZ. + StateInfo st; + int minDTZ = 0xFFFF; + + for (const Move move : MoveList(pos)) + { + bool zeroing = pos.capture(move) || type_of(pos.moved_piece(move)) == PAWN; + + pos.do_move(move, st); + + // For zeroing moves we want the dtz of the move _before_ doing it, + // otherwise we will get the dtz of the next move sequence. Search the + // position after the move to get the score sign (because even in a + // winning position we could make a losing capture or going for a draw). + dtz = zeroing ? -dtz_before_zeroing(search(pos, result)) + : -probe_dtz(pos, result); + + // If the move mates, force minDTZ to 1 + if (dtz == 1 && pos.checkers() && MoveList(pos).size() == 0) + minDTZ = 1; + + // Convert result from 1-ply search. Zeroing moves are already accounted + // by dtz_before_zeroing() that returns the DTZ of the previous move. + if (!zeroing) + dtz += sign_of(dtz); + + // Skip the draws and if we are winning only pick positive dtz + if (dtz < minDTZ && sign_of(dtz) == sign_of(wdl)) + minDTZ = dtz; + + pos.undo_move(move); + + if (*result == FAIL) + return 0; + } + + // When there are no legal moves, the position is mate: we return -1 + return minDTZ == 0xFFFF ? -1 : minDTZ; +} + + +// Use the DTZ tables to rank root moves. +// +// A return value false indicates that not all probes were successful. +bool Tablebases::root_probe(Position& pos, Search::RootMoves& rootMoves) { + + ProbeState result; + StateInfo st; + + // Obtain 50-move counter for the root position + int cnt50 = pos.rule50_count(); + + // Check whether a position was repeated since the last zeroing move. + bool rep = pos.has_repeated(); + + int dtz, bound = Options["Syzygy50MoveRule"] ? (MAX_DTZ - 100) : 1; + + // Probe and rank each move + for (auto& m : rootMoves) + { + pos.do_move(m.pv[0], st); + + // Calculate dtz for the current move counting from the root position + if (pos.rule50_count() == 0) + { + // In case of a zeroing move, dtz is one of -101/-1/0/1/101 + WDLScore wdl = -probe_wdl(pos, &result); + dtz = dtz_before_zeroing(wdl); + } + else if (pos.is_draw(1)) + { + // In case a root move leads to a draw by repetition or + // 50-move rule, we set dtz to zero. Note: since we are + // only 1 ply from the root, this must be a true 3-fold + // repetition inside the game history. + dtz = 0; + } + else + { + // Otherwise, take dtz for the new position and correct by 1 ply + dtz = -probe_dtz(pos, &result); + dtz = dtz > 0 ? dtz + 1 + : dtz < 0 ? dtz - 1 : dtz; + } + + // Make sure that a mating move is assigned a dtz value of 1 + if ( pos.checkers() + && dtz == 2 + && MoveList(pos).size() == 0) + dtz = 1; + + pos.undo_move(m.pv[0]); + + if (result == FAIL) + return false; + + // Better moves are ranked higher. Certain wins are ranked equally. + // Losing moves are ranked equally unless a 50-move draw is in sight. + int r = dtz > 0 ? (dtz + cnt50 <= 99 && !rep ? MAX_DTZ : MAX_DTZ - (dtz + cnt50)) + : dtz < 0 ? (-dtz * 2 + cnt50 < 100 ? -MAX_DTZ : -MAX_DTZ + (-dtz + cnt50)) + : 0; + m.tbRank = r; + + // Determine the score to be displayed for this move. Assign at least + // 1 cp to cursed wins and let it grow to 49 cp as the positions gets + // closer to a real win. + m.tbScore = r >= bound ? VALUE_MATE - MAX_PLY - 1 + : r > 0 ? Value((std::max( 3, r - (MAX_DTZ - 200)) * int(PawnValueEg)) / 200) + : r == 0 ? VALUE_DRAW + : r > -bound ? Value((std::min(-3, r + (MAX_DTZ - 200)) * int(PawnValueEg)) / 200) + : -VALUE_MATE + MAX_PLY + 1; + } + + return true; +} + + +// Use the WDL tables to rank root moves. +// This is a fallback for the case that some or all DTZ tables are missing. +// +// A return value false indicates that not all probes were successful. +bool Tablebases::root_probe_wdl(Position& pos, Search::RootMoves& rootMoves) { + + static const int WDL_to_rank[] = { -MAX_DTZ, -MAX_DTZ + 101, 0, MAX_DTZ - 101, MAX_DTZ }; + + ProbeState result; + StateInfo st; + WDLScore wdl; + + bool rule50 = Options["Syzygy50MoveRule"]; + + // Probe and rank each move + for (auto& m : rootMoves) + { + pos.do_move(m.pv[0], st); + + if (pos.is_draw(1)) + wdl = WDLDraw; + else + wdl = -probe_wdl(pos, &result); + + pos.undo_move(m.pv[0]); + + if (result == FAIL) + return false; + + m.tbRank = WDL_to_rank[wdl + 2]; + + if (!rule50) + wdl = wdl > WDLDraw ? WDLWin + : wdl < WDLDraw ? WDLLoss : WDLDraw; + m.tbScore = WDL_to_value[wdl + 2]; + } + + return true; +} + +} // namespace Stockfish diff --git a/src/syzygy/tbprobe.h b/src/syzygy/tbprobe.h new file mode 100644 index 0000000000000000000000000000000000000000..179c75721c2335203bdbf4d914b20d885b0b5c07 --- /dev/null +++ b/src/syzygy/tbprobe.h @@ -0,0 +1,76 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef TBPROBE_H +#define TBPROBE_H + +#include + +#include "../search.h" + +namespace Stockfish::Tablebases { + +enum WDLScore { + WDLLoss = -2, // Loss + WDLBlessedLoss = -1, // Loss, but draw under 50-move rule + WDLDraw = 0, // Draw + WDLCursedWin = 1, // Win, but draw under 50-move rule + WDLWin = 2, // Win +}; + +// Possible states after a probing operation +enum ProbeState { + FAIL = 0, // Probe failed (missing file table) + OK = 1, // Probe successful + CHANGE_STM = -1, // DTZ should check the other side + ZEROING_BEST_MOVE = 2 // Best move zeroes DTZ (capture or pawn move) +}; + +extern int MaxCardinality; + +void init(const std::string& paths); +WDLScore probe_wdl(Position& pos, ProbeState* result); +int probe_dtz(Position& pos, ProbeState* result); +bool root_probe(Position& pos, Search::RootMoves& rootMoves); +bool root_probe_wdl(Position& pos, Search::RootMoves& rootMoves); +void rank_root_moves(Position& pos, Search::RootMoves& rootMoves); + +inline std::ostream& operator<<(std::ostream& os, const WDLScore v) { + + os << (v == WDLLoss ? "Loss" : + v == WDLBlessedLoss ? "Blessed loss" : + v == WDLDraw ? "Draw" : + v == WDLCursedWin ? "Cursed win" : + v == WDLWin ? "Win" : "None"); + + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const ProbeState v) { + + os << (v == FAIL ? "Failed" : + v == OK ? "Success" : + v == CHANGE_STM ? "Probed opponent side" : + v == ZEROING_BEST_MOVE ? "Best move zeroes DTZ" : "None"); + + return os; +} + +} // namespace Stockfish::Tablebases + +#endif diff --git a/src/thread.cpp b/src/thread.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b7471f6026758fc7c2a0763388f9940b9dda37d7 --- /dev/null +++ b/src/thread.cpp @@ -0,0 +1,268 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include // For std::count +#include "movegen.h" +#include "search.h" +#include "thread.h" +#include "uci.h" +#include "syzygy/tbprobe.h" +#include "tt.h" + +namespace Stockfish { + +ThreadPool Threads; // Global object + + +/// Thread constructor launches the thread and waits until it goes to sleep +/// in idle_loop(). Note that 'searching' and 'exit' should be already set. + +Thread::Thread(size_t n) : idx(n), stdThread(&Thread::idle_loop, this) { + + wait_for_search_finished(); +} + + +/// Thread destructor wakes up the thread in idle_loop() and waits +/// for its termination. Thread should be already waiting. + +Thread::~Thread() { + + assert(!searching); + + exit = true; + start_searching(); + stdThread.join(); +} + + +/// Thread::clear() reset histories, usually before a new game + +void Thread::clear() { + + counterMoves.fill(MOVE_NONE); + mainHistory.fill(0); + captureHistory.fill(0); + previousDepth = 0; + + for (bool inCheck : { false, true }) + for (StatsType c : { NoCaptures, Captures }) + for (auto& to : continuationHistory[inCheck][c]) + for (auto& h : to) + h->fill(-71); +} + + +/// Thread::start_searching() wakes up the thread that will start the search + +void Thread::start_searching() { + + std::lock_guard lk(mutex); + searching = true; + cv.notify_one(); // Wake up the thread in idle_loop() +} + + +/// Thread::wait_for_search_finished() blocks on the condition variable +/// until the thread has finished searching. + +void Thread::wait_for_search_finished() { + + std::unique_lock lk(mutex); + cv.wait(lk, [&]{ return !searching; }); +} + + +/// Thread::idle_loop() is where the thread is parked, blocked on the +/// condition variable, when it has no work to do. + +void Thread::idle_loop() { + + // If OS already scheduled us on a different group than 0 then don't overwrite + // the choice, eventually we are one of many one-threaded processes running on + // some Windows NUMA hardware, for instance in fishtest. To make it simple, + // just check if running threads are below a threshold, in this case all this + // NUMA machinery is not needed. + if (Options["Threads"] > 8) + WinProcGroup::bindThisThread(idx); + + while (true) + { + std::unique_lock lk(mutex); + searching = false; + cv.notify_one(); // Wake up anyone waiting for search finished + cv.wait(lk, [&]{ return searching; }); + + if (exit) + return; + + lk.unlock(); + + search(); + } +} + +/// ThreadPool::set() creates/destroys threads to match the requested number. +/// Created and launched threads will immediately go to sleep in idle_loop. +/// Upon resizing, threads are recreated to allow for binding if necessary. + +void ThreadPool::set(size_t requested) { + + if (size() > 0) // destroy any existing thread(s) + { + main()->wait_for_search_finished(); + + while (size() > 0) + delete back(), pop_back(); + } + + if (requested > 0) // create new thread(s) + { + push_back(new MainThread(0)); + + while (size() < requested) + push_back(new Thread(size())); + clear(); + + // Reallocate the hash with the new threadpool size + TT.resize(size_t(Options["Hash"])); + + // Init thread number dependent search params. + Search::init(); + } +} + + +/// ThreadPool::clear() sets threadPool data to initial values + +void ThreadPool::clear() { + + for (Thread* th : *this) + th->clear(); + + main()->callsCnt = 0; + main()->bestPreviousScore = VALUE_INFINITE; + main()->bestPreviousAverageScore = VALUE_INFINITE; + main()->previousTimeReduction = 1.0; +} + + +/// ThreadPool::start_thinking() wakes up main thread waiting in idle_loop() and +/// returns immediately. Main thread will wake up other threads and start the search. + +void ThreadPool::start_thinking(Position& pos, StateListPtr& states, + const Search::LimitsType& limits, bool ponderMode) { + + main()->wait_for_search_finished(); + + main()->stopOnPonderhit = stop = false; + increaseDepth = true; + main()->ponder = ponderMode; + Search::Limits = limits; + Search::RootMoves rootMoves; + + for (const auto& m : MoveList(pos)) + if ( limits.searchmoves.empty() + || std::count(limits.searchmoves.begin(), limits.searchmoves.end(), m)) + rootMoves.emplace_back(m); + + if (!rootMoves.empty()) + Tablebases::rank_root_moves(pos, rootMoves); + + // After ownership transfer 'states' becomes empty, so if we stop the search + // and call 'go' again without setting a new position states.get() == NULL. + assert(states.get() || setupStates.get()); + + if (states.get()) + setupStates = std::move(states); // Ownership transfer, states is now empty + + // We use Position::set() to set root position across threads. But there are + // some StateInfo fields (previous, pliesFromNull, capturedPiece) that cannot + // be deduced from a fen string, so set() clears them and they are set from + // setupStates->back() later. The rootState is per thread, earlier states are shared + // since they are read-only. + for (Thread* th : *this) + { + th->nodes = th->tbHits = th->nmpMinPly = th->bestMoveChanges = 0; + th->rootDepth = th->completedDepth = 0; + th->rootMoves = rootMoves; + th->rootPos.set(pos.fen(), pos.is_chess960(), &th->rootState, th); + th->rootState = setupStates->back(); + } + + main()->start_searching(); +} + +Thread* ThreadPool::get_best_thread() const { + + Thread* bestThread = front(); + std::map votes; + Value minScore = VALUE_NONE; + + // Find minimum score of all threads + for (Thread* th: *this) + minScore = std::min(minScore, th->rootMoves[0].score); + + // Vote according to score and depth, and select the best thread + auto thread_value = [minScore](Thread* th) { + return (th->rootMoves[0].score - minScore + 14) * int(th->completedDepth); + }; + + for (Thread* th : *this) + votes[th->rootMoves[0].pv[0]] += thread_value(th); + + for (Thread* th : *this) + if (abs(bestThread->rootMoves[0].score) >= VALUE_TB_WIN_IN_MAX_PLY) + { + // Make sure we pick the shortest mate / TB conversion or stave off mate the longest + if (th->rootMoves[0].score > bestThread->rootMoves[0].score) + bestThread = th; + } + else if ( th->rootMoves[0].score >= VALUE_TB_WIN_IN_MAX_PLY + || ( th->rootMoves[0].score > VALUE_TB_LOSS_IN_MAX_PLY + && ( votes[th->rootMoves[0].pv[0]] > votes[bestThread->rootMoves[0].pv[0]] + || ( votes[th->rootMoves[0].pv[0]] == votes[bestThread->rootMoves[0].pv[0]] + && thread_value(th) > thread_value(bestThread))))) + bestThread = th; + + return bestThread; +} + + +/// Start non-main threads + +void ThreadPool::start_searching() { + + for (Thread* th : *this) + if (th != front()) + th->start_searching(); +} + + +/// Wait for non-main threads + +void ThreadPool::wait_for_search_finished() const { + + for (Thread* th : *this) + if (th != front()) + th->wait_for_search_finished(); +} + +} // namespace Stockfish diff --git a/src/thread.h b/src/thread.h new file mode 100644 index 0000000000000000000000000000000000000000..5f0b2c3ed9cb3d86397ec8c2de309cacc033bbf3 --- /dev/null +++ b/src/thread.h @@ -0,0 +1,135 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef THREAD_H_INCLUDED +#define THREAD_H_INCLUDED + +#include +#include +#include +#include +#include + +#include "material.h" +#include "movepick.h" +#include "pawns.h" +#include "position.h" +#include "search.h" +#include "thread_win32_osx.h" + +namespace Stockfish { + +/// Thread class keeps together all the thread-related stuff. We use +/// per-thread pawn and material hash tables so that once we get a +/// pointer to an entry its life time is unlimited and we don't have +/// to care about someone changing the entry under our feet. + +class Thread { + + std::mutex mutex; + std::condition_variable cv; + size_t idx; + bool exit = false, searching = true; // Set before starting std::thread + NativeThread stdThread; + +public: + explicit Thread(size_t); + virtual ~Thread(); + virtual void search(); + void clear(); + void idle_loop(); + void start_searching(); + void wait_for_search_finished(); + size_t id() const { return idx; } + + Pawns::Table pawnsTable; + Material::Table materialTable; + size_t pvIdx, pvLast; + RunningAverage complexityAverage; + std::atomic nodes, tbHits, bestMoveChanges; + int selDepth, nmpMinPly; + Color nmpColor; + Value bestValue, optimism[COLOR_NB]; + + Position rootPos; + StateInfo rootState; + Search::RootMoves rootMoves; + Depth rootDepth, completedDepth, previousDepth; + Value rootDelta; + CounterMoveHistory counterMoves; + ButterflyHistory mainHistory; + CapturePieceToHistory captureHistory; + ContinuationHistory continuationHistory[2][2]; +}; + + +/// MainThread is a derived class specific for main thread + +struct MainThread : public Thread { + + using Thread::Thread; + + void search() override; + void check_time(); + + double previousTimeReduction; + Value bestPreviousScore; + Value bestPreviousAverageScore; + Value iterValue[4]; + int callsCnt; + bool stopOnPonderhit; + std::atomic_bool ponder; +}; + + +/// ThreadPool struct handles all the threads-related stuff like init, starting, +/// parking and, most importantly, launching a thread. All the access to threads +/// is done through this class. + +struct ThreadPool : public std::vector { + + void start_thinking(Position&, StateListPtr&, const Search::LimitsType&, bool = false); + void clear(); + void set(size_t); + + MainThread* main() const { return static_cast(front()); } + uint64_t nodes_searched() const { return accumulate(&Thread::nodes); } + uint64_t tb_hits() const { return accumulate(&Thread::tbHits); } + Thread* get_best_thread() const; + void start_searching(); + void wait_for_search_finished() const; + + std::atomic_bool stop, increaseDepth; + +private: + StateListPtr setupStates; + + uint64_t accumulate(std::atomic Thread::* member) const { + + uint64_t sum = 0; + for (Thread* th : *this) + sum += (th->*member).load(std::memory_order_relaxed); + return sum; + } +}; + +extern ThreadPool Threads; + +} // namespace Stockfish + +#endif // #ifndef THREAD_H_INCLUDED diff --git a/src/thread_win32_osx.h b/src/thread_win32_osx.h new file mode 100644 index 0000000000000000000000000000000000000000..77d1c3c7ef392c6bdca7c75c6cd41640a51a0c5b --- /dev/null +++ b/src/thread_win32_osx.h @@ -0,0 +1,74 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef THREAD_WIN32_OSX_H_INCLUDED +#define THREAD_WIN32_OSX_H_INCLUDED + +#include + +/// On OSX threads other than the main thread are created with a reduced stack +/// size of 512KB by default, this is too low for deep searches, which require +/// somewhat more than 1MB stack, so adjust it to TH_STACK_SIZE. +/// The implementation calls pthread_create() with the stack size parameter +/// equal to the linux 8MB default, on platforms that support it. + +#if defined(__APPLE__) || defined(__MINGW32__) || defined(__MINGW64__) || defined(USE_PTHREADS) + +#include + +namespace Stockfish { + +static const size_t TH_STACK_SIZE = 8 * 1024 * 1024; + +template > +void* start_routine(void* ptr) +{ + P* p = reinterpret_cast(ptr); + (p->first->*(p->second))(); // Call member function pointer + delete p; + return NULL; +} + +class NativeThread { + + pthread_t thread; + +public: + template> + explicit NativeThread(void(T::*fun)(), T* obj) { + pthread_attr_t attr_storage, *attr = &attr_storage; + pthread_attr_init(attr); + pthread_attr_setstacksize(attr, TH_STACK_SIZE); + pthread_create(&thread, attr, start_routine, new P(obj, fun)); + } + void join() { pthread_join(thread, NULL); } +}; + +} // namespace Stockfish + +#else // Default case: use STL classes + +namespace Stockfish { + +typedef std::thread NativeThread; + +} // namespace Stockfish + +#endif + +#endif // #ifndef THREAD_WIN32_OSX_H_INCLUDED diff --git a/src/timeman.cpp b/src/timeman.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cab0d7671a2e7503c47b2d080a00db98838c5c22 --- /dev/null +++ b/src/timeman.cpp @@ -0,0 +1,105 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include +#include + +#include "search.h" +#include "timeman.h" +#include "uci.h" + +namespace Stockfish { + +TimeManagement Time; // Our global time management object + + +/// TimeManagement::init() is called at the beginning of the search and calculates +/// the bounds of time allowed for the current game ply. We currently support: +// 1) x basetime (+ z increment) +// 2) x moves in y seconds (+ z increment) + +void TimeManagement::init(Search::LimitsType& limits, Color us, int ply) { + + TimePoint moveOverhead = TimePoint(Options["Move Overhead"]); + TimePoint slowMover = TimePoint(Options["Slow Mover"]); + TimePoint npmsec = TimePoint(Options["nodestime"]); + + // optScale is a percentage of available time to use for the current move. + // maxScale is a multiplier applied to optimumTime. + double optScale, maxScale; + + // If we have to play in 'nodes as time' mode, then convert from time + // to nodes, and use resulting values in time management formulas. + // WARNING: to avoid time losses, the given npmsec (nodes per millisecond) + // must be much lower than the real engine speed. + if (npmsec) + { + if (!availableNodes) // Only once at game start + availableNodes = npmsec * limits.time[us]; // Time is in msec + + // Convert from milliseconds to nodes + limits.time[us] = TimePoint(availableNodes); + limits.inc[us] *= npmsec; + limits.npmsec = npmsec; + } + + startTime = limits.startTime; + + // Maximum move horizon of 50 moves + int mtg = limits.movestogo ? std::min(limits.movestogo, 50) : 50; + + // Make sure timeLeft is > 0 since we may use it as a divisor + TimePoint timeLeft = std::max(TimePoint(1), + limits.time[us] + limits.inc[us] * (mtg - 1) - moveOverhead * (2 + mtg)); + + // Use extra time with larger increments + double optExtra = std::clamp(1.0 + 12.0 * limits.inc[us] / limits.time[us], 1.0, 1.12); + + // A user may scale time usage by setting UCI option "Slow Mover" + // Default is 100 and changing this value will probably lose elo. + timeLeft = slowMover * timeLeft / 100; + + // x basetime (+ z increment) + // If there is a healthy increment, timeLeft can exceed actual available + // game time for the current move, so also cap to 20% of available game time. + if (limits.movestogo == 0) + { + optScale = std::min(0.0120 + std::pow(ply + 3.0, 0.45) * 0.0039, + 0.2 * limits.time[us] / double(timeLeft)) + * optExtra; + maxScale = std::min(7.0, 4.0 + ply / 12.0); + } + + // x moves in y seconds (+ z increment) + else + { + optScale = std::min((0.88 + ply / 116.4) / mtg, + 0.88 * limits.time[us] / double(timeLeft)); + maxScale = std::min(6.3, 1.5 + 0.11 * mtg); + } + + // Never use more than 80% of the available time for this move + optimumTime = TimePoint(optScale * timeLeft); + maximumTime = TimePoint(std::min(0.8 * limits.time[us] - moveOverhead, maxScale * optimumTime)); + + if (Options["Ponder"]) + optimumTime += optimumTime / 4; +} + +} // namespace Stockfish diff --git a/src/timeman.h b/src/timeman.h new file mode 100644 index 0000000000000000000000000000000000000000..a86f07693a010ab38a2b0ca183f410970d9d54f9 --- /dev/null +++ b/src/timeman.h @@ -0,0 +1,51 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef TIMEMAN_H_INCLUDED +#define TIMEMAN_H_INCLUDED + +#include "misc.h" +#include "search.h" +#include "thread.h" + +namespace Stockfish { + +/// The TimeManagement class computes the optimal time to think depending on +/// the maximum available time, the game move number and other parameters. + +class TimeManagement { +public: + void init(Search::LimitsType& limits, Color us, int ply); + TimePoint optimum() const { return optimumTime; } + TimePoint maximum() const { return maximumTime; } + TimePoint elapsed() const { return Search::Limits.npmsec ? + TimePoint(Threads.nodes_searched()) : now() - startTime; } + + int64_t availableNodes; // When in 'nodes as time' mode + +private: + TimePoint startTime; + TimePoint optimumTime; + TimePoint maximumTime; +}; + +extern TimeManagement Time; + +} // namespace Stockfish + +#endif // #ifndef TIMEMAN_H_INCLUDED diff --git a/src/tt.cpp b/src/tt.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c7118aea3fc672647b55f97d488ff29262d16e95 --- /dev/null +++ b/src/tt.cpp @@ -0,0 +1,162 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include // For std::memset +#include +#include + +#include "bitboard.h" +#include "misc.h" +#include "thread.h" +#include "tt.h" +#include "uci.h" + +namespace Stockfish { + +TranspositionTable TT; // Our global transposition table + +/// TTEntry::save() populates the TTEntry with a new node's data, possibly +/// overwriting an old position. Update is not atomic and can be racy. + +void TTEntry::save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev) { + + // Preserve any existing move for the same position + if (m || (uint16_t)k != key16) + move16 = (uint16_t)m; + + // Overwrite less valuable entries (cheapest checks first) + if ( b == BOUND_EXACT + || (uint16_t)k != key16 + || d - DEPTH_OFFSET + 2 * pv > depth8 - 4) + { + assert(d > DEPTH_OFFSET); + assert(d < 256 + DEPTH_OFFSET); + + key16 = (uint16_t)k; + depth8 = (uint8_t)(d - DEPTH_OFFSET); + genBound8 = (uint8_t)(TT.generation8 | uint8_t(pv) << 2 | b); + value16 = (int16_t)v; + eval16 = (int16_t)ev; + } +} + + +/// TranspositionTable::resize() sets the size of the transposition table, +/// measured in megabytes. Transposition table consists of a power of 2 number +/// of clusters and each cluster consists of ClusterSize number of TTEntry. + +void TranspositionTable::resize(size_t mbSize) { + + Threads.main()->wait_for_search_finished(); + + aligned_large_pages_free(table); + + clusterCount = mbSize * 1024 * 1024 / sizeof(Cluster); + + table = static_cast(aligned_large_pages_alloc(clusterCount * sizeof(Cluster))); + if (!table) + { + std::cerr << "Failed to allocate " << mbSize + << "MB for transposition table." << std::endl; + exit(EXIT_FAILURE); + } + + clear(); +} + + +/// TranspositionTable::clear() initializes the entire transposition table to zero, +// in a multi-threaded way. + +void TranspositionTable::clear() { + + std::vector threads; + + for (size_t idx = 0; idx < Options["Threads"]; ++idx) + { + threads.emplace_back([this, idx]() { + + // Thread binding gives faster search on systems with a first-touch policy + if (Options["Threads"] > 8) + WinProcGroup::bindThisThread(idx); + + // Each thread will zero its part of the hash table + const size_t stride = size_t(clusterCount / Options["Threads"]), + start = size_t(stride * idx), + len = idx != Options["Threads"] - 1 ? + stride : clusterCount - start; + + std::memset(&table[start], 0, len * sizeof(Cluster)); + }); + } + + for (std::thread& th : threads) + th.join(); +} + + +/// TranspositionTable::probe() looks up the current position in the transposition +/// table. It returns true and a pointer to the TTEntry if the position is found. +/// Otherwise, it returns false and a pointer to an empty or least valuable TTEntry +/// to be replaced later. The replace value of an entry is calculated as its depth +/// minus 8 times its relative age. TTEntry t1 is considered more valuable than +/// TTEntry t2 if its replace value is greater than that of t2. + +TTEntry* TranspositionTable::probe(const Key key, bool& found) const { + + TTEntry* const tte = first_entry(key); + const uint16_t key16 = (uint16_t)key; // Use the low 16 bits as key inside the cluster + + for (int i = 0; i < ClusterSize; ++i) + if (tte[i].key16 == key16 || !tte[i].depth8) + { + tte[i].genBound8 = uint8_t(generation8 | (tte[i].genBound8 & (GENERATION_DELTA - 1))); // Refresh + + return found = (bool)tte[i].depth8, &tte[i]; + } + + // Find an entry to be replaced according to the replacement strategy + TTEntry* replace = tte; + for (int i = 1; i < ClusterSize; ++i) + // Due to our packed storage format for generation and its cyclic + // nature we add GENERATION_CYCLE (256 is the modulus, plus what + // is needed to keep the unrelated lowest n bits from affecting + // the result) to calculate the entry age correctly even after + // generation8 overflows into the next cycle. + if ( replace->depth8 - ((GENERATION_CYCLE + generation8 - replace->genBound8) & GENERATION_MASK) + > tte[i].depth8 - ((GENERATION_CYCLE + generation8 - tte[i].genBound8) & GENERATION_MASK)) + replace = &tte[i]; + + return found = false, replace; +} + + +/// TranspositionTable::hashfull() returns an approximation of the hashtable +/// occupation during a search. The hash is x permill full, as per UCI protocol. + +int TranspositionTable::hashfull() const { + + int cnt = 0; + for (int i = 0; i < 1000; ++i) + for (int j = 0; j < ClusterSize; ++j) + cnt += table[i].entry[j].depth8 && (table[i].entry[j].genBound8 & GENERATION_MASK) == generation8; + + return cnt / ClusterSize; +} + +} // namespace Stockfish diff --git a/src/tt.h b/src/tt.h new file mode 100644 index 0000000000000000000000000000000000000000..03fe3e143d1b9a46e6165772e92fa676367b8938 --- /dev/null +++ b/src/tt.h @@ -0,0 +1,107 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef TT_H_INCLUDED +#define TT_H_INCLUDED + +#include "misc.h" +#include "types.h" + +namespace Stockfish { + +/// TTEntry struct is the 10 bytes transposition table entry, defined as below: +/// +/// key 16 bit +/// depth 8 bit +/// generation 5 bit +/// pv node 1 bit +/// bound type 2 bit +/// move 16 bit +/// value 16 bit +/// eval value 16 bit + +struct TTEntry { + + Move move() const { return (Move )move16; } + Value value() const { return (Value)value16; } + Value eval() const { return (Value)eval16; } + Depth depth() const { return (Depth)depth8 + DEPTH_OFFSET; } + bool is_pv() const { return (bool)(genBound8 & 0x4); } + Bound bound() const { return (Bound)(genBound8 & 0x3); } + void save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev); + +private: + friend class TranspositionTable; + + uint16_t key16; + uint8_t depth8; + uint8_t genBound8; + uint16_t move16; + int16_t value16; + int16_t eval16; +}; + + +/// A TranspositionTable is an array of Cluster, of size clusterCount. Each +/// cluster consists of ClusterSize number of TTEntry. Each non-empty TTEntry +/// contains information on exactly one position. The size of a Cluster should +/// divide the size of a cache line for best performance, as the cacheline is +/// prefetched when possible. + +class TranspositionTable { + + static constexpr int ClusterSize = 3; + + struct Cluster { + TTEntry entry[ClusterSize]; + char padding[2]; // Pad to 32 bytes + }; + + static_assert(sizeof(Cluster) == 32, "Unexpected Cluster size"); + + // Constants used to refresh the hash table periodically + static constexpr unsigned GENERATION_BITS = 3; // nb of bits reserved for other things + static constexpr int GENERATION_DELTA = (1 << GENERATION_BITS); // increment for generation field + static constexpr int GENERATION_CYCLE = 255 + (1 << GENERATION_BITS); // cycle length + static constexpr int GENERATION_MASK = (0xFF << GENERATION_BITS) & 0xFF; // mask to pull out generation number + +public: + ~TranspositionTable() { aligned_large_pages_free(table); } + void new_search() { generation8 += GENERATION_DELTA; } // Lower bits are used for other things + TTEntry* probe(const Key key, bool& found) const; + int hashfull() const; + void resize(size_t mbSize); + void clear(); + + TTEntry* first_entry(const Key key) const { + return &table[mul_hi64(key, clusterCount)].entry[0]; + } + +private: + friend struct TTEntry; + + size_t clusterCount; + Cluster* table; + uint8_t generation8; // Size must be not bigger than TTEntry::genBound8 +}; + +extern TranspositionTable TT; + +} // namespace Stockfish + +#endif // #ifndef TT_H_INCLUDED diff --git a/src/tune.cpp b/src/tune.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a885845f75086823c03a1b6439dcefdce39c5638 --- /dev/null +++ b/src/tune.cpp @@ -0,0 +1,133 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include +#include + +#include "types.h" +#include "misc.h" +#include "uci.h" + +using std::string; + +namespace Stockfish { + +bool Tune::update_on_last; +const UCI::Option* LastOption = nullptr; +static std::map TuneResults; + +string Tune::next(string& names, bool pop) { + + string name; + + do { + string token = names.substr(0, names.find(',')); + + if (pop) + names.erase(0, token.size() + 1); + + std::stringstream ws(token); + name += (ws >> token, token); // Remove trailing whitespace + + } while ( std::count(name.begin(), name.end(), '(') + - std::count(name.begin(), name.end(), ')')); + + return name; +} + +static void on_tune(const UCI::Option& o) { + + if (!Tune::update_on_last || LastOption == &o) + Tune::read_options(); +} + +static void make_option(const string& n, int v, const SetRange& r) { + + // Do not generate option when there is nothing to tune (ie. min = max) + if (r(v).first == r(v).second) + return; + + if (TuneResults.count(n)) + v = TuneResults[n]; + + Options[n] << UCI::Option(v, r(v).first, r(v).second, on_tune); + LastOption = &Options[n]; + + // Print formatted parameters, ready to be copy-pasted in Fishtest + std::cout << n << "," + << v << "," + << r(v).first << "," << r(v).second << "," + << (r(v).second - r(v).first) / 20.0 << "," + << "0.0020" + << std::endl; +} + +template<> void Tune::Entry::init_option() { make_option(name, value, range); } + +template<> void Tune::Entry::read_option() { + if (Options.count(name)) + value = int(Options[name]); +} + +template<> void Tune::Entry::init_option() { make_option(name, value, range); } + +template<> void Tune::Entry::read_option() { + if (Options.count(name)) + value = Value(int(Options[name])); +} + +template<> void Tune::Entry::init_option() { + make_option("m" + name, mg_value(value), range); + make_option("e" + name, eg_value(value), range); +} + +template<> void Tune::Entry::read_option() { + if (Options.count("m" + name)) + value = make_score(int(Options["m" + name]), eg_value(value)); + + if (Options.count("e" + name)) + value = make_score(mg_value(value), int(Options["e" + name])); +} + +// Instead of a variable here we have a PostUpdate function: just call it +template<> void Tune::Entry::init_option() {} +template<> void Tune::Entry::read_option() { value(); } + +} // namespace Stockfish + + +// Init options with tuning session results instead of default values. Useful to +// get correct bench signature after a tuning session or to test tuned values. +// Just copy fishtest tuning results in a result.txt file and extract the +// values with: +// +// cat results.txt | sed 's/^param: \([^,]*\), best: \([^,]*\).*/ TuneResults["\1"] = int(round(\2));/' +// +// Then paste the output below, as the function body + +#include + +namespace Stockfish { + +void Tune::read_results() { + + /* ...insert your values here... */ +} + +} // namespace Stockfish diff --git a/src/tune.h b/src/tune.h new file mode 100644 index 0000000000000000000000000000000000000000..75ab484acf066bff951e7d40b1e79d4e28e302b2 --- /dev/null +++ b/src/tune.h @@ -0,0 +1,163 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef TUNE_H_INCLUDED +#define TUNE_H_INCLUDED + +#include +#include +#include +#include + +namespace Stockfish { + +typedef std::pair Range; // Option's min-max values +typedef Range (RangeFun) (int); + +// Default Range function, to calculate Option's min-max values +inline Range default_range(int v) { + return v > 0 ? Range(0, 2 * v) : Range(2 * v, 0); +} + +struct SetRange { + explicit SetRange(RangeFun f) : fun(f) {} + SetRange(int min, int max) : fun(nullptr), range(min, max) {} + Range operator()(int v) const { return fun ? fun(v) : range; } + + RangeFun* fun; + Range range; +}; + +#define SetDefaultRange SetRange(default_range) + + +/// Tune class implements the 'magic' code that makes the setup of a fishtest +/// tuning session as easy as it can be. Mainly you have just to remove const +/// qualifiers from the variables you want to tune and flag them for tuning, so +/// if you have: +/// +/// const Score myScore = S(10, 15); +/// const Value myValue[][2] = { { V(100), V(20) }, { V(7), V(78) } }; +/// +/// If you have a my_post_update() function to run after values have been updated, +/// and a my_range() function to set custom Option's min-max values, then you just +/// remove the 'const' qualifiers and write somewhere below in the file: +/// +/// TUNE(SetRange(my_range), myScore, myValue, my_post_update); +/// +/// You can also set the range directly, and restore the default at the end +/// +/// TUNE(SetRange(-100, 100), myScore, SetDefaultRange); +/// +/// In case update function is slow and you have many parameters, you can add: +/// +/// UPDATE_ON_LAST(); +/// +/// And the values update, including post update function call, will be done only +/// once, after the engine receives the last UCI option, that is the one defined +/// and created as the last one, so the GUI should send the options in the same +/// order in which have been defined. + +class Tune { + + typedef void (PostUpdate) (); // Post-update function + + Tune() { read_results(); } + Tune(const Tune&) = delete; + void operator=(const Tune&) = delete; + void read_results(); + + static Tune& instance() { static Tune t; return t; } // Singleton + + // Use polymorphism to accommodate Entry of different types in the same vector + struct EntryBase { + virtual ~EntryBase() = default; + virtual void init_option() = 0; + virtual void read_option() = 0; + }; + + template + struct Entry : public EntryBase { + + static_assert(!std::is_const::value, "Parameter cannot be const!"); + + static_assert( std::is_same::value + || std::is_same::value + || std::is_same::value + || std::is_same::value, "Parameter type not supported!"); + + Entry(const std::string& n, T& v, const SetRange& r) : name(n), value(v), range(r) {} + void operator=(const Entry&) = delete; // Because 'value' is a reference + void init_option() override; + void read_option() override; + + std::string name; + T& value; + SetRange range; + }; + + // Our facility to fill the container, each Entry corresponds to a parameter + // to tune. We use variadic templates to deal with an unspecified number of + // entries, each one of a possible different type. + static std::string next(std::string& names, bool pop = true); + + int add(const SetRange&, std::string&&) { return 0; } + + template + int add(const SetRange& range, std::string&& names, T& value, Args&&... args) { + list.push_back(std::unique_ptr(new Entry(next(names), value, range))); + return add(range, std::move(names), args...); + } + + // Template specialization for arrays: recursively handle multi-dimensional arrays + template + int add(const SetRange& range, std::string&& names, T (&value)[N], Args&&... args) { + for (size_t i = 0; i < N; i++) + add(range, next(names, i == N - 1) + "[" + std::to_string(i) + "]", value[i]); + return add(range, std::move(names), args...); + } + + // Template specialization for SetRange + template + int add(const SetRange&, std::string&& names, SetRange& value, Args&&... args) { + return add(value, (next(names), std::move(names)), args...); + } + + std::vector> list; + +public: + template + static int add(const std::string& names, Args&&... args) { + return instance().add(SetDefaultRange, names.substr(1, names.size() - 2), args...); // Remove trailing parenthesis + } + static void init() { for (auto& e : instance().list) e->init_option(); read_options(); } // Deferred, due to UCI::Options access + static void read_options() { for (auto& e : instance().list) e->read_option(); } + static bool update_on_last; +}; + +// Some macro magic :-) we define a dummy int variable that compiler initializes calling Tune::add() +#define STRINGIFY(x) #x +#define UNIQUE2(x, y) x ## y +#define UNIQUE(x, y) UNIQUE2(x, y) // Two indirection levels to expand __LINE__ +#define TUNE(...) int UNIQUE(p, __LINE__) = Tune::add(STRINGIFY((__VA_ARGS__)), __VA_ARGS__) + +#define UPDATE_ON_LAST() bool UNIQUE(p, __LINE__) = Tune::update_on_last = true + +} // namespace Stockfish + +#endif // #ifndef TUNE_H_INCLUDED diff --git a/src/types.h b/src/types.h new file mode 100644 index 0000000000000000000000000000000000000000..c2087c6c07b9e8fd84be7d3033c19da397f894b7 --- /dev/null +++ b/src/types.h @@ -0,0 +1,486 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef TYPES_H_INCLUDED +#define TYPES_H_INCLUDED + +/// When compiling with provided Makefile (e.g. for Linux and OSX), configuration +/// is done automatically. To get started type 'make help'. +/// +/// When Makefile is not used (e.g. with Microsoft Visual Studio) some switches +/// need to be set manually: +/// +/// -DNDEBUG | Disable debugging mode. Always use this for release. +/// +/// -DNO_PREFETCH | Disable use of prefetch asm-instruction. You may need this to +/// | run on some very old machines. +/// +/// -DUSE_POPCNT | Add runtime support for use of popcnt asm-instruction. Works +/// | only in 64-bit mode and requires hardware with popcnt support. +/// +/// -DUSE_PEXT | Add runtime support for use of pext asm-instruction. Works +/// | only in 64-bit mode and requires hardware with pext support. + +#include +#include +#include +#include +#include + +#if defined(_MSC_VER) +// Disable some silly and noisy warning from MSVC compiler +#pragma warning(disable: 4127) // Conditional expression is constant +#pragma warning(disable: 4146) // Unary minus operator applied to unsigned type +#pragma warning(disable: 4800) // Forcing value to bool 'true' or 'false' +#endif + +/// Predefined macros hell: +/// +/// __GNUC__ Compiler is gcc, Clang or Intel on Linux +/// __INTEL_COMPILER Compiler is Intel +/// _MSC_VER Compiler is MSVC or Intel on Windows +/// _WIN32 Building on Windows (any) +/// _WIN64 Building on Windows 64 bit + +#if defined(__GNUC__ ) && (__GNUC__ < 9 || (__GNUC__ == 9 && __GNUC_MINOR__ <= 2)) && defined(_WIN32) && !defined(__clang__) +#define ALIGNAS_ON_STACK_VARIABLES_BROKEN +#endif + +#define ASSERT_ALIGNED(ptr, alignment) assert(reinterpret_cast(ptr) % alignment == 0) + +#if defined(_WIN64) && defined(_MSC_VER) // No Makefile used +# include // Microsoft header for _BitScanForward64() +# define IS_64BIT +#endif + +#if defined(USE_POPCNT) && (defined(__INTEL_COMPILER) || defined(_MSC_VER)) +# include // Intel and Microsoft header for _mm_popcnt_u64() +#endif + +#if !defined(NO_PREFETCH) && (defined(__INTEL_COMPILER) || defined(_MSC_VER)) +# include // Intel and Microsoft header for _mm_prefetch() +#endif + +#if defined(USE_PEXT) +# include // Header for _pext_u64() intrinsic +# define pext(b, m) _pext_u64(b, m) +#else +# define pext(b, m) 0 +#endif + +namespace Stockfish { + +#ifdef USE_POPCNT +constexpr bool HasPopCnt = true; +#else +constexpr bool HasPopCnt = false; +#endif + +#ifdef USE_PEXT +constexpr bool HasPext = true; +#else +constexpr bool HasPext = false; +#endif + +#ifdef IS_64BIT +constexpr bool Is64Bit = true; +#else +constexpr bool Is64Bit = false; +#endif + +typedef uint64_t Key; +typedef uint64_t Bitboard; + +constexpr int MAX_MOVES = 256; +constexpr int MAX_PLY = 246; + +/// A move needs 16 bits to be stored +/// +/// bit 0- 5: destination square (from 0 to 63) +/// bit 6-11: origin square (from 0 to 63) +/// bit 12-13: promotion piece type - 2 (from KNIGHT-2 to QUEEN-2) +/// bit 14-15: special move flag: promotion (1), en passant (2), castling (3) +/// NOTE: en passant bit is set only when a pawn can be captured +/// +/// Special cases are MOVE_NONE and MOVE_NULL. We can sneak these in because in +/// any normal move destination square is always different from origin square +/// while MOVE_NONE and MOVE_NULL have the same origin and destination square. + +enum Move : int { + MOVE_NONE, + MOVE_NULL = 65 +}; + +enum MoveType { + NORMAL, + PROMOTION = 1 << 14, + EN_PASSANT = 2 << 14, + CASTLING = 3 << 14 +}; + +enum Color { + WHITE, BLACK, COLOR_NB = 2 +}; + +enum CastlingRights { + NO_CASTLING, + WHITE_OO, + WHITE_OOO = WHITE_OO << 1, + BLACK_OO = WHITE_OO << 2, + BLACK_OOO = WHITE_OO << 3, + + KING_SIDE = WHITE_OO | BLACK_OO, + QUEEN_SIDE = WHITE_OOO | BLACK_OOO, + WHITE_CASTLING = WHITE_OO | WHITE_OOO, + BLACK_CASTLING = BLACK_OO | BLACK_OOO, + ANY_CASTLING = WHITE_CASTLING | BLACK_CASTLING, + + CASTLING_RIGHT_NB = 16 +}; + +enum Phase { + PHASE_ENDGAME, + PHASE_MIDGAME = 128, + MG = 0, EG = 1, PHASE_NB = 2 +}; + +enum ScaleFactor { + SCALE_FACTOR_DRAW = 0, + SCALE_FACTOR_NORMAL = 64, + SCALE_FACTOR_MAX = 128, + SCALE_FACTOR_NONE = 255 +}; + +enum Bound { + BOUND_NONE, + BOUND_UPPER, + BOUND_LOWER, + BOUND_EXACT = BOUND_UPPER | BOUND_LOWER +}; + +enum Value : int { + VALUE_ZERO = 0, + VALUE_DRAW = 0, + VALUE_KNOWN_WIN = 10000, + VALUE_MATE = 32000, + VALUE_INFINITE = 32001, + VALUE_NONE = 32002, + + VALUE_TB_WIN_IN_MAX_PLY = VALUE_MATE - 2 * MAX_PLY, + VALUE_TB_LOSS_IN_MAX_PLY = -VALUE_TB_WIN_IN_MAX_PLY, + VALUE_MATE_IN_MAX_PLY = VALUE_MATE - MAX_PLY, + VALUE_MATED_IN_MAX_PLY = -VALUE_MATE_IN_MAX_PLY, + + PawnValueMg = 126, PawnValueEg = 208, + KnightValueMg = 781, KnightValueEg = 854, + BishopValueMg = 825, BishopValueEg = 915, + RookValueMg = 1276, RookValueEg = 1380, + QueenValueMg = 2538, QueenValueEg = 2682, + + MidgameLimit = 15258, EndgameLimit = 3915 +}; + +enum PieceType { + NO_PIECE_TYPE, PAWN, KNIGHT, BISHOP, ROOK, QUEEN, KING, + ALL_PIECES = 0, + PIECE_TYPE_NB = 8 +}; + +enum Piece { + NO_PIECE, + W_PAWN = PAWN, W_KNIGHT, W_BISHOP, W_ROOK, W_QUEEN, W_KING, + B_PAWN = PAWN + 8, B_KNIGHT, B_BISHOP, B_ROOK, B_QUEEN, B_KING, + PIECE_NB = 16 +}; + +constexpr Value PieceValue[PHASE_NB][PIECE_NB] = { + { VALUE_ZERO, PawnValueMg, KnightValueMg, BishopValueMg, RookValueMg, QueenValueMg, VALUE_ZERO, VALUE_ZERO, + VALUE_ZERO, PawnValueMg, KnightValueMg, BishopValueMg, RookValueMg, QueenValueMg, VALUE_ZERO, VALUE_ZERO }, + { VALUE_ZERO, PawnValueEg, KnightValueEg, BishopValueEg, RookValueEg, QueenValueEg, VALUE_ZERO, VALUE_ZERO, + VALUE_ZERO, PawnValueEg, KnightValueEg, BishopValueEg, RookValueEg, QueenValueEg, VALUE_ZERO, VALUE_ZERO } +}; + +typedef int Depth; + +enum : int { + DEPTH_QS_CHECKS = 0, + DEPTH_QS_NO_CHECKS = -1, + DEPTH_QS_RECAPTURES = -5, + + DEPTH_NONE = -6, + + DEPTH_OFFSET = -7 // value used only for TT entry occupancy check +}; + +enum Square : int { + SQ_A1, SQ_B1, SQ_C1, SQ_D1, SQ_E1, SQ_F1, SQ_G1, SQ_H1, + SQ_A2, SQ_B2, SQ_C2, SQ_D2, SQ_E2, SQ_F2, SQ_G2, SQ_H2, + SQ_A3, SQ_B3, SQ_C3, SQ_D3, SQ_E3, SQ_F3, SQ_G3, SQ_H3, + SQ_A4, SQ_B4, SQ_C4, SQ_D4, SQ_E4, SQ_F4, SQ_G4, SQ_H4, + SQ_A5, SQ_B5, SQ_C5, SQ_D5, SQ_E5, SQ_F5, SQ_G5, SQ_H5, + SQ_A6, SQ_B6, SQ_C6, SQ_D6, SQ_E6, SQ_F6, SQ_G6, SQ_H6, + SQ_A7, SQ_B7, SQ_C7, SQ_D7, SQ_E7, SQ_F7, SQ_G7, SQ_H7, + SQ_A8, SQ_B8, SQ_C8, SQ_D8, SQ_E8, SQ_F8, SQ_G8, SQ_H8, + SQ_NONE, + + SQUARE_ZERO = 0, + SQUARE_NB = 64 +}; + +enum Direction : int { + NORTH = 8, + EAST = 1, + SOUTH = -NORTH, + WEST = -EAST, + + NORTH_EAST = NORTH + EAST, + SOUTH_EAST = SOUTH + EAST, + SOUTH_WEST = SOUTH + WEST, + NORTH_WEST = NORTH + WEST +}; + +enum File : int { + FILE_A, FILE_B, FILE_C, FILE_D, FILE_E, FILE_F, FILE_G, FILE_H, FILE_NB +}; + +enum Rank : int { + RANK_1, RANK_2, RANK_3, RANK_4, RANK_5, RANK_6, RANK_7, RANK_8, RANK_NB +}; + +// Keep track of what a move changes on the board (used by NNUE) +struct DirtyPiece { + + // Number of changed pieces + int dirty_num; + + // Max 3 pieces can change in one move. A promotion with capture moves + // both the pawn and the captured piece to SQ_NONE and the piece promoted + // to from SQ_NONE to the capture square. + Piece piece[3]; + + // From and to squares, which may be SQ_NONE + Square from[3]; + Square to[3]; +}; + +/// Score enum stores a middlegame and an endgame value in a single integer (enum). +/// The least significant 16 bits are used to store the middlegame value and the +/// upper 16 bits are used to store the endgame value. We have to take care to +/// avoid left-shifting a signed int to avoid undefined behavior. +enum Score : int { SCORE_ZERO }; + +constexpr Score make_score(int mg, int eg) { + return Score((int)((unsigned int)eg << 16) + mg); +} + +/// Extracting the signed lower and upper 16 bits is not so trivial because +/// according to the standard a simple cast to short is implementation defined +/// and so is a right shift of a signed integer. +inline Value eg_value(Score s) { + union { uint16_t u; int16_t s; } eg = { uint16_t(unsigned(s + 0x8000) >> 16) }; + return Value(eg.s); +} + +inline Value mg_value(Score s) { + union { uint16_t u; int16_t s; } mg = { uint16_t(unsigned(s)) }; + return Value(mg.s); +} + +#define ENABLE_BASE_OPERATORS_ON(T) \ +constexpr T operator+(T d1, int d2) { return T(int(d1) + d2); } \ +constexpr T operator-(T d1, int d2) { return T(int(d1) - d2); } \ +constexpr T operator-(T d) { return T(-int(d)); } \ +inline T& operator+=(T& d1, int d2) { return d1 = d1 + d2; } \ +inline T& operator-=(T& d1, int d2) { return d1 = d1 - d2; } + +#define ENABLE_INCR_OPERATORS_ON(T) \ +inline T& operator++(T& d) { return d = T(int(d) + 1); } \ +inline T& operator--(T& d) { return d = T(int(d) - 1); } + +#define ENABLE_FULL_OPERATORS_ON(T) \ +ENABLE_BASE_OPERATORS_ON(T) \ +constexpr T operator*(int i, T d) { return T(i * int(d)); } \ +constexpr T operator*(T d, int i) { return T(int(d) * i); } \ +constexpr T operator/(T d, int i) { return T(int(d) / i); } \ +constexpr int operator/(T d1, T d2) { return int(d1) / int(d2); } \ +inline T& operator*=(T& d, int i) { return d = T(int(d) * i); } \ +inline T& operator/=(T& d, int i) { return d = T(int(d) / i); } + +ENABLE_FULL_OPERATORS_ON(Value) +ENABLE_FULL_OPERATORS_ON(Direction) + +ENABLE_INCR_OPERATORS_ON(Piece) +ENABLE_INCR_OPERATORS_ON(PieceType) +ENABLE_INCR_OPERATORS_ON(Square) +ENABLE_INCR_OPERATORS_ON(File) +ENABLE_INCR_OPERATORS_ON(Rank) + +ENABLE_BASE_OPERATORS_ON(Score) + +#undef ENABLE_FULL_OPERATORS_ON +#undef ENABLE_INCR_OPERATORS_ON +#undef ENABLE_BASE_OPERATORS_ON + +/// Additional operators to add a Direction to a Square +constexpr Square operator+(Square s, Direction d) { return Square(int(s) + int(d)); } +constexpr Square operator-(Square s, Direction d) { return Square(int(s) - int(d)); } +inline Square& operator+=(Square& s, Direction d) { return s = s + d; } +inline Square& operator-=(Square& s, Direction d) { return s = s - d; } + +/// Only declared but not defined. We don't want to multiply two scores due to +/// a very high risk of overflow. So user should explicitly convert to integer. +Score operator*(Score, Score) = delete; + +/// Division of a Score must be handled separately for each term +inline Score operator/(Score s, int i) { + return make_score(mg_value(s) / i, eg_value(s) / i); +} + +/// Multiplication of a Score by an integer. We check for overflow in debug mode. +inline Score operator*(Score s, int i) { + + Score result = Score(int(s) * i); + + assert(eg_value(result) == (i * eg_value(s))); + assert(mg_value(result) == (i * mg_value(s))); + assert((i == 0) || (result / i) == s); + + return result; +} + +/// Multiplication of a Score by a boolean +inline Score operator*(Score s, bool b) { + return b ? s : SCORE_ZERO; +} + +constexpr Color operator~(Color c) { + return Color(c ^ BLACK); // Toggle color +} + +constexpr Square flip_rank(Square s) { // Swap A1 <-> A8 + return Square(s ^ SQ_A8); +} + +constexpr Square flip_file(Square s) { // Swap A1 <-> H1 + return Square(s ^ SQ_H1); +} + +constexpr Piece operator~(Piece pc) { + return Piece(pc ^ 8); // Swap color of piece B_KNIGHT <-> W_KNIGHT +} + +constexpr CastlingRights operator&(Color c, CastlingRights cr) { + return CastlingRights((c == WHITE ? WHITE_CASTLING : BLACK_CASTLING) & cr); +} + +constexpr Value mate_in(int ply) { + return VALUE_MATE - ply; +} + +constexpr Value mated_in(int ply) { + return -VALUE_MATE + ply; +} + +constexpr Square make_square(File f, Rank r) { + return Square((r << 3) + f); +} + +constexpr Piece make_piece(Color c, PieceType pt) { + return Piece((c << 3) + pt); +} + +constexpr PieceType type_of(Piece pc) { + return PieceType(pc & 7); +} + +inline Color color_of(Piece pc) { + assert(pc != NO_PIECE); + return Color(pc >> 3); +} + +constexpr bool is_ok(Square s) { + return s >= SQ_A1 && s <= SQ_H8; +} + +constexpr File file_of(Square s) { + return File(s & 7); +} + +constexpr Rank rank_of(Square s) { + return Rank(s >> 3); +} + +constexpr Square relative_square(Color c, Square s) { + return Square(s ^ (c * 56)); +} + +constexpr Rank relative_rank(Color c, Rank r) { + return Rank(r ^ (c * 7)); +} + +constexpr Rank relative_rank(Color c, Square s) { + return relative_rank(c, rank_of(s)); +} + +constexpr Direction pawn_push(Color c) { + return c == WHITE ? NORTH : SOUTH; +} + +constexpr Square from_sq(Move m) { + return Square((m >> 6) & 0x3F); +} + +constexpr Square to_sq(Move m) { + return Square(m & 0x3F); +} + +constexpr int from_to(Move m) { + return m & 0xFFF; +} + +constexpr MoveType type_of(Move m) { + return MoveType(m & (3 << 14)); +} + +constexpr PieceType promotion_type(Move m) { + return PieceType(((m >> 12) & 3) + KNIGHT); +} + +constexpr Move make_move(Square from, Square to) { + return Move((from << 6) + to); +} + +template +constexpr Move make(Square from, Square to, PieceType pt = KNIGHT) { + return Move(T + ((pt - KNIGHT) << 12) + (from << 6) + to); +} + +constexpr bool is_ok(Move m) { + return from_sq(m) != to_sq(m); // Catch MOVE_NULL and MOVE_NONE +} + +/// Based on a congruential pseudo random number generator +constexpr Key make_key(uint64_t seed) { + return seed * 6364136223846793005ULL + 1442695040888963407ULL; +} + +} // namespace Stockfish + +#endif // #ifndef TYPES_H_INCLUDED + +#include "tune.h" // Global visibility to tuning setup diff --git a/src/uci.cpp b/src/uci.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5d842d25e73b6635b1472d2fead5eb525f602336 --- /dev/null +++ b/src/uci.cpp @@ -0,0 +1,393 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include +#include +#include +#include + +#include "evaluate.h" +#include "movegen.h" +#include "position.h" +#include "search.h" +#include "thread.h" +#include "timeman.h" +#include "tt.h" +#include "uci.h" +#include "syzygy/tbprobe.h" + +using namespace std; + +namespace Stockfish { + +extern vector setup_bench(const Position&, istream&); + +namespace { + + // FEN string for the initial position in standard chess + const char* StartFEN = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1"; + + + // position() is called when the engine receives the "position" UCI command. + // It sets up the position that is described in the given FEN string ("fen") or + // the initial position ("startpos") and then makes the moves given in the following + // move list ("moves"). + + void position(Position& pos, istringstream& is, StateListPtr& states) { + + Move m; + string token, fen; + + is >> token; + + if (token == "startpos") + { + fen = StartFEN; + is >> token; // Consume the "moves" token, if any + } + else if (token == "fen") + while (is >> token && token != "moves") + fen += token + " "; + else + return; + + states = StateListPtr(new std::deque(1)); // Drop the old state and create a new one + pos.set(fen, Options["UCI_Chess960"], &states->back(), Threads.main()); + + // Parse the move list, if any + while (is >> token && (m = UCI::to_move(pos, token)) != MOVE_NONE) + { + states->emplace_back(); + pos.do_move(m, states->back()); + } + } + + // trace_eval() prints the evaluation of the current position, consistent with + // the UCI options set so far. + + void trace_eval(Position& pos) { + + StateListPtr states(new std::deque(1)); + Position p; + p.set(pos.fen(), Options["UCI_Chess960"], &states->back(), Threads.main()); + + Eval::NNUE::verify(); + + sync_cout << "\n" << Eval::trace(p) << sync_endl; + } + + + // setoption() is called when the engine receives the "setoption" UCI command. + // The function updates the UCI option ("name") to the given value ("value"). + + void setoption(istringstream& is) { + + string token, name, value; + + is >> token; // Consume the "name" token + + // Read the option name (can contain spaces) + while (is >> token && token != "value") + name += (name.empty() ? "" : " ") + token; + + // Read the option value (can contain spaces) + while (is >> token) + value += (value.empty() ? "" : " ") + token; + + if (Options.count(name)) + Options[name] = value; + else + sync_cout << "No such option: " << name << sync_endl; + } + + + // go() is called when the engine receives the "go" UCI command. The function + // sets the thinking time and other parameters from the input string, then starts + // with a search. + + void go(Position& pos, istringstream& is, StateListPtr& states) { + + Search::LimitsType limits; + string token; + bool ponderMode = false; + + limits.startTime = now(); // The search starts as early as possible + + while (is >> token) + if (token == "searchmoves") // Needs to be the last command on the line + while (is >> token) + limits.searchmoves.push_back(UCI::to_move(pos, token)); + + else if (token == "wtime") is >> limits.time[WHITE]; + else if (token == "btime") is >> limits.time[BLACK]; + else if (token == "winc") is >> limits.inc[WHITE]; + else if (token == "binc") is >> limits.inc[BLACK]; + else if (token == "movestogo") is >> limits.movestogo; + else if (token == "depth") is >> limits.depth; + else if (token == "nodes") is >> limits.nodes; + else if (token == "movetime") is >> limits.movetime; + else if (token == "mate") is >> limits.mate; + else if (token == "perft") is >> limits.perft; + else if (token == "infinite") limits.infinite = 1; + else if (token == "ponder") ponderMode = true; + + Threads.start_thinking(pos, states, limits, ponderMode); + } + + + // bench() is called when the engine receives the "bench" command. + // Firstly, a list of UCI commands is set up according to the bench + // parameters, then it is run one by one, printing a summary at the end. + + void bench(Position& pos, istream& args, StateListPtr& states) { + + string token; + uint64_t num, nodes = 0, cnt = 1; + + vector list = setup_bench(pos, args); + num = count_if(list.begin(), list.end(), [](string s) { return s.find("go ") == 0 || s.find("eval") == 0; }); + + TimePoint elapsed = now(); + + for (const auto& cmd : list) + { + istringstream is(cmd); + is >> skipws >> token; + + if (token == "go" || token == "eval") + { + cerr << "\nPosition: " << cnt++ << '/' << num << " (" << pos.fen() << ")" << endl; + if (token == "go") + { + go(pos, is, states); + Threads.main()->wait_for_search_finished(); + nodes += Threads.nodes_searched(); + } + else + trace_eval(pos); + } + else if (token == "setoption") setoption(is); + else if (token == "position") position(pos, is, states); + else if (token == "ucinewgame") { Search::clear(); elapsed = now(); } // Search::clear() may take a while + } + + elapsed = now() - elapsed + 1; // Ensure positivity to avoid a 'divide by zero' + + dbg_print(); + + cerr << "\n===========================" + << "\nTotal time (ms) : " << elapsed + << "\nNodes searched : " << nodes + << "\nNodes/second : " << 1000 * nodes / elapsed << endl; + } + + // The win rate model returns the probability of winning (in per mille units) given an + // eval and a game ply. It fits the LTC fishtest statistics rather accurately. + int win_rate_model(Value v, int ply) { + + // The model only captures up to 240 plies, so limit the input and then rescale + double m = std::min(240, ply) / 64.0; + + // The coefficients of a third-order polynomial fit is based on the fishtest data + // for two parameters that need to transform eval to the argument of a logistic + // function. + constexpr double as[] = { -0.58270499, 2.68512549, 15.24638015, 344.49745382}; + constexpr double bs[] = { -2.65734562, 15.96509799, -20.69040836, 73.61029937 }; + + // Enforce that NormalizeToPawnValue corresponds to a 50% win rate at ply 64 + static_assert(UCI::NormalizeToPawnValue == int(as[0] + as[1] + as[2] + as[3])); + + double a = (((as[0] * m + as[1]) * m + as[2]) * m) + as[3]; + double b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3]; + + // Transform the eval to centipawns with limited range + double x = std::clamp(double(v), -4000.0, 4000.0); + + // Return the win rate in per mille units rounded to the nearest value + return int(0.5 + 1000 / (1 + std::exp((a - x) / b))); + } + +} // namespace + + +/// UCI::loop() waits for a command from the stdin, parses it and then calls the appropriate +/// function. It also intercepts an end-of-file (EOF) indication from the stdin to ensure a +/// graceful exit if the GUI dies unexpectedly. When called with some command-line arguments, +/// like running 'bench', the function returns immediately after the command is executed. +/// In addition to the UCI ones, some additional debug commands are also supported. + +void UCI::loop(int argc, char* argv[]) { + + Position pos; + string token, cmd; + StateListPtr states(new std::deque(1)); + + pos.set(StartFEN, false, &states->back(), Threads.main()); + + for (int i = 1; i < argc; ++i) + cmd += std::string(argv[i]) + " "; + + do { + if (argc == 1 && !getline(cin, cmd)) // Wait for an input or an end-of-file (EOF) indication + cmd = "quit"; + + istringstream is(cmd); + + token.clear(); // Avoid a stale if getline() returns nothing or a blank line + is >> skipws >> token; + + if ( token == "quit" + || token == "stop") + Threads.stop = true; + + // The GUI sends 'ponderhit' to tell that the user has played the expected move. + // So, 'ponderhit' is sent if pondering was done on the same move that the user + // has played. The search should continue, but should also switch from pondering + // to the normal search. + else if (token == "ponderhit") + Threads.main()->ponder = false; // Switch to the normal search + + else if (token == "uci") + sync_cout << "id name " << engine_info(true) + << "\n" << Options + << "\nuciok" << sync_endl; + + else if (token == "setoption") setoption(is); + else if (token == "go") go(pos, is, states); + else if (token == "position") position(pos, is, states); + else if (token == "ucinewgame") Search::clear(); + else if (token == "isready") sync_cout << "readyok" << sync_endl; + + // Add custom non-UCI commands, mainly for debugging purposes. + // These commands must not be used during a search! + else if (token == "flip") pos.flip(); + else if (token == "bench") bench(pos, is, states); + else if (token == "d") sync_cout << pos << sync_endl; + else if (token == "eval") trace_eval(pos); + else if (token == "compiler") sync_cout << compiler_info() << sync_endl; + else if (token == "export_net") + { + std::optional filename; + std::string f; + if (is >> skipws >> f) + filename = f; + Eval::NNUE::save_eval(filename); + } + else if (token == "--help" || token == "help" || token == "--license" || token == "license") + sync_cout << "\nStockfish is a powerful chess engine for playing and analyzing." + "\nIt is released as free software licensed under the GNU GPLv3 License." + "\nStockfish is normally used with a graphical user interface (GUI) and implements" + "\nthe Universal Chess Interface (UCI) protocol to communicate with a GUI, an API, etc." + "\nFor any further information, visit https://github.com/official-stockfish/Stockfish#readme" + "\nor read the corresponding README.md and Copying.txt files distributed along with this program.\n" << sync_endl; + else if (!token.empty() && token[0] != '#') + sync_cout << "Unknown command: '" << cmd << "'. Type help for more information." << sync_endl; + + } while (token != "quit" && argc == 1); // The command-line arguments are one-shot +} + + +/// UCI::value() converts a Value to a string by adhering to the UCI protocol specification: +/// +/// cp The score from the engine's point of view in centipawns. +/// mate Mate in 'y' moves (not plies). If the engine is getting mated, +/// uses negative values for 'y'. + +string UCI::value(Value v) { + + assert(-VALUE_INFINITE < v && v < VALUE_INFINITE); + + stringstream ss; + + if (abs(v) < VALUE_MATE_IN_MAX_PLY) + ss << "cp " << v * 100 / NormalizeToPawnValue; + else + ss << "mate " << (v > 0 ? VALUE_MATE - v + 1 : -VALUE_MATE - v) / 2; + + return ss.str(); +} + + +/// UCI::wdl() reports the win-draw-loss (WDL) statistics given an evaluation +/// and a game ply based on the data gathered for fishtest LTC games. + +string UCI::wdl(Value v, int ply) { + + stringstream ss; + + int wdl_w = win_rate_model( v, ply); + int wdl_l = win_rate_model(-v, ply); + int wdl_d = 1000 - wdl_w - wdl_l; + ss << " wdl " << wdl_w << " " << wdl_d << " " << wdl_l; + + return ss.str(); +} + + +/// UCI::square() converts a Square to a string in algebraic notation (g1, a7, etc.) + +std::string UCI::square(Square s) { + return std::string{ char('a' + file_of(s)), char('1' + rank_of(s)) }; +} + + +/// UCI::move() converts a Move to a string in coordinate notation (g1f3, a7a8q). +/// The only special case is castling where the e1g1 notation is printed in +/// standard chess mode and in e1h1 notation it is printed in Chess960 mode. +/// Internally, all castling moves are always encoded as 'king captures rook'. + +string UCI::move(Move m, bool chess960) { + + Square from = from_sq(m); + Square to = to_sq(m); + + if (m == MOVE_NONE) + return "(none)"; + + if (m == MOVE_NULL) + return "0000"; + + if (type_of(m) == CASTLING && !chess960) + to = make_square(to > from ? FILE_G : FILE_C, rank_of(from)); + + string move = UCI::square(from) + UCI::square(to); + + if (type_of(m) == PROMOTION) + move += " pnbrqk"[promotion_type(m)]; + + return move; +} + + +/// UCI::to_move() converts a string representing a move in coordinate notation +/// (g1f3, a7a8q) to the corresponding legal Move, if any. + +Move UCI::to_move(const Position& pos, string& str) { + + if (str.length() == 5) + str[4] = char(tolower(str[4])); // The promotion piece character must be lowercased + + for (const auto& m : MoveList(pos)) + if (str == UCI::move(m, pos.is_chess960())) + return m; + + return MOVE_NONE; +} + +} // namespace Stockfish diff --git a/src/uci.h b/src/uci.h new file mode 100644 index 0000000000000000000000000000000000000000..3b5a6764b48c8897cf0392689dae131bc81dfa31 --- /dev/null +++ b/src/uci.h @@ -0,0 +1,92 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef UCI_H_INCLUDED +#define UCI_H_INCLUDED + +#include +#include + +#include "types.h" + +namespace Stockfish { + +class Position; + +namespace UCI { + +// Normalizes the internal value as reported by evaluate or search +// to the UCI centipawn result used in output. This value is derived from +// the win_rate_model() such that Stockfish outputs an advantage of +// "100 centipawns" for a position if the engine has a 50% probability to win +// from this position in selfplay at fishtest LTC time control. +const int NormalizeToPawnValue = 361; + +class Option; + +/// Define a custom comparator, because the UCI options should be case-insensitive +struct CaseInsensitiveLess { + bool operator() (const std::string&, const std::string&) const; +}; + +/// The options container is defined as a std::map +typedef std::map OptionsMap; + +/// The Option class implements each option as specified by the UCI protocol +class Option { + + typedef void (*OnChange)(const Option&); + +public: + Option(OnChange = nullptr); + Option(bool v, OnChange = nullptr); + Option(const char* v, OnChange = nullptr); + Option(double v, int minv, int maxv, OnChange = nullptr); + Option(const char* v, const char* cur, OnChange = nullptr); + + Option& operator=(const std::string&); + void operator<<(const Option&); + operator double() const; + operator std::string() const; + bool operator==(const char*) const; + +private: + friend std::ostream& operator<<(std::ostream&, const OptionsMap&); + + std::string defaultValue, currentValue, type; + int min, max; + size_t idx; + OnChange on_change; +}; + +void init(OptionsMap&); +void loop(int argc, char* argv[]); +std::string value(Value v); +std::string square(Square s); +std::string move(Move m, bool chess960); +std::string pv(const Position& pos, Depth depth); +std::string wdl(Value v, int ply); +Move to_move(const Position& pos, std::string& str); + +} // namespace UCI + +extern UCI::OptionsMap Options; + +} // namespace Stockfish + +#endif // #ifndef UCI_H_INCLUDED diff --git a/src/ucioption.cpp b/src/ucioption.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9fb48345a06044b99ab17f56c2873c9ed3886a1d --- /dev/null +++ b/src/ucioption.cpp @@ -0,0 +1,194 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include +#include +#include + +#include "evaluate.h" +#include "misc.h" +#include "search.h" +#include "thread.h" +#include "tt.h" +#include "uci.h" +#include "syzygy/tbprobe.h" + +using std::string; + +namespace Stockfish { + +UCI::OptionsMap Options; // Global object + +namespace UCI { + +/// 'On change' actions, triggered by an option's value change +void on_clear_hash(const Option&) { Search::clear(); } +void on_hash_size(const Option& o) { TT.resize(size_t(o)); } +void on_logger(const Option& o) { start_logger(o); } +void on_threads(const Option& o) { Threads.set(size_t(o)); } +void on_tb_path(const Option& o) { Tablebases::init(o); } +void on_use_NNUE(const Option& ) { Eval::NNUE::init(); } +void on_eval_file(const Option& ) { Eval::NNUE::init(); } + +/// Our case insensitive less() function as required by UCI protocol +bool CaseInsensitiveLess::operator() (const string& s1, const string& s2) const { + + return std::lexicographical_compare(s1.begin(), s1.end(), s2.begin(), s2.end(), + [](char c1, char c2) { return tolower(c1) < tolower(c2); }); +} + + +/// UCI::init() initializes the UCI options to their hard-coded default values + +void init(OptionsMap& o) { + + constexpr int MaxHashMB = Is64Bit ? 33554432 : 2048; + + o["Debug Log File"] << Option("", on_logger); + o["Threads"] << Option(1, 1, 1024, on_threads); + o["Hash"] << Option(16, 1, MaxHashMB, on_hash_size); + o["Clear Hash"] << Option(on_clear_hash); + o["Ponder"] << Option(false); + o["MultiPV"] << Option(1, 1, 500); + o["Skill Level"] << Option(20, 0, 20); + o["Move Overhead"] << Option(10, 0, 5000); + o["Slow Mover"] << Option(100, 10, 1000); + o["nodestime"] << Option(0, 0, 10000); + o["UCI_Chess960"] << Option(false); + o["UCI_AnalyseMode"] << Option(false); + o["UCI_LimitStrength"] << Option(false); + o["UCI_Elo"] << Option(1350, 1350, 2850); + o["UCI_ShowWDL"] << Option(false); + o["SyzygyPath"] << Option("", on_tb_path); + o["SyzygyProbeDepth"] << Option(1, 1, 100); + o["Syzygy50MoveRule"] << Option(true); + o["SyzygyProbeLimit"] << Option(7, 0, 7); + o["Use NNUE"] << Option(true, on_use_NNUE); + o["EvalFile"] << Option(EvalFileDefaultName, on_eval_file); +} + + +/// operator<<() is used to print all the options default values in chronological +/// insertion order (the idx field) and in the format defined by the UCI protocol. + +std::ostream& operator<<(std::ostream& os, const OptionsMap& om) { + + for (size_t idx = 0; idx < om.size(); ++idx) + for (const auto& it : om) + if (it.second.idx == idx) + { + const Option& o = it.second; + os << "\noption name " << it.first << " type " << o.type; + + if (o.type == "string" || o.type == "check" || o.type == "combo") + os << " default " << o.defaultValue; + + if (o.type == "spin") + os << " default " << int(stof(o.defaultValue)) + << " min " << o.min + << " max " << o.max; + + break; + } + + return os; +} + + +/// Option class constructors and conversion operators + +Option::Option(const char* v, OnChange f) : type("string"), min(0), max(0), on_change(f) +{ defaultValue = currentValue = v; } + +Option::Option(bool v, OnChange f) : type("check"), min(0), max(0), on_change(f) +{ defaultValue = currentValue = (v ? "true" : "false"); } + +Option::Option(OnChange f) : type("button"), min(0), max(0), on_change(f) +{} + +Option::Option(double v, int minv, int maxv, OnChange f) : type("spin"), min(minv), max(maxv), on_change(f) +{ defaultValue = currentValue = std::to_string(v); } + +Option::Option(const char* v, const char* cur, OnChange f) : type("combo"), min(0), max(0), on_change(f) +{ defaultValue = v; currentValue = cur; } + +Option::operator double() const { + assert(type == "check" || type == "spin"); + return (type == "spin" ? stof(currentValue) : currentValue == "true"); +} + +Option::operator std::string() const { + assert(type == "string"); + return currentValue; +} + +bool Option::operator==(const char* s) const { + assert(type == "combo"); + return !CaseInsensitiveLess()(currentValue, s) + && !CaseInsensitiveLess()(s, currentValue); +} + + +/// operator<<() inits options and assigns idx in the correct printing order + +void Option::operator<<(const Option& o) { + + static size_t insert_order = 0; + + *this = o; + idx = insert_order++; +} + + +/// operator=() updates currentValue and triggers on_change() action. It's up to +/// the GUI to check for option's limits, but we could receive the new value +/// from the user by console window, so let's check the bounds anyway. + +Option& Option::operator=(const string& v) { + + assert(!type.empty()); + + if ( (type != "button" && type != "string" && v.empty()) + || (type == "check" && v != "true" && v != "false") + || (type == "spin" && (stof(v) < min || stof(v) > max))) + return *this; + + if (type == "combo") + { + OptionsMap comboMap; // To have case insensitive compare + string token; + std::istringstream ss(defaultValue); + while (ss >> token) + comboMap[token] << Option(); + if (!comboMap.count(v) || v == "var") + return *this; + } + + if (type != "button") + currentValue = v; + + if (on_change) + on_change(*this); + + return *this; +} + +} // namespace UCI + +} // namespace Stockfish