Update reference and arxiv paper link
Browse files- src/display/about.py +9 -0
src/display/about.py
CHANGED
@@ -4,6 +4,9 @@ TITLE = """
|
|
4 |
<h1 id="space-title">GTBench: Uncovering the Strategic Reasoning Limitation of LLMs via<br> Game-Theoretic Evaluations</h1>"""
|
5 |
|
6 |
INTRODUCTION_TEXT = """
|
|
|
|
|
|
|
7 |
GTBench aims to evaluate and rank LLMs’ reasoning abilities in competitive environments through game-theoretic tasks, e.g., board and card games.
|
8 |
It utilizes 10 widely recognized games supported by <a href="https://github.com/google-deepmind/open_spiel">OpenSpiel</a> and evaluate well-recognized LLM agents in a language-driven manner. The evaluation code and prompt templates can be found in <a href="https://github.com/jinhaoduan/GTBench" target="_blank" >GTBench</a>.
|
9 |
|
@@ -58,4 +61,10 @@ EVALUATION_QUEUE_TEXT = """
|
|
58 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
59 |
|
60 |
CITATION_BUTTON_TEXT = r"""
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
"""
|
|
|
4 |
<h1 id="space-title">GTBench: Uncovering the Strategic Reasoning Limitation of LLMs via<br> Game-Theoretic Evaluations</h1>"""
|
5 |
|
6 |
INTRODUCTION_TEXT = """
|
7 |
+
|
8 |
+
paper: https://arxiv.org/abs/2402.12348
|
9 |
+
|
10 |
GTBench aims to evaluate and rank LLMs’ reasoning abilities in competitive environments through game-theoretic tasks, e.g., board and card games.
|
11 |
It utilizes 10 widely recognized games supported by <a href="https://github.com/google-deepmind/open_spiel">OpenSpiel</a> and evaluate well-recognized LLM agents in a language-driven manner. The evaluation code and prompt templates can be found in <a href="https://github.com/jinhaoduan/GTBench" target="_blank" >GTBench</a>.
|
12 |
|
|
|
61 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
62 |
|
63 |
CITATION_BUTTON_TEXT = r"""
|
64 |
+
@article{duan2024gtbench,
|
65 |
+
title = {GTBench: Uncovering the Strategic Reasoning Limitations of LLMs via Game-Theoretic Evaluations},
|
66 |
+
author = {Duan, Jinhao and Zhang, Renming and Diffenderfer, James and Kailkhura, Bhavya and Sun, Lichao and Stengel-Eskin, Elias and Bansal, Mohit and Chen, Tianlong and Xu, Kaidi},
|
67 |
+
year = {2024},
|
68 |
+
journal={arXiv preprint 2402.12348}
|
69 |
+
}
|
70 |
"""
|