jhao commited on
Commit
2314beb
1 Parent(s): 9c2a184

Update reference and arxiv paper link

Browse files
Files changed (1) hide show
  1. src/display/about.py +9 -0
src/display/about.py CHANGED
@@ -4,6 +4,9 @@ TITLE = """
4
  <h1 id="space-title">GTBench: Uncovering the Strategic Reasoning Limitation of LLMs via<br> Game-Theoretic Evaluations</h1>"""
5
 
6
  INTRODUCTION_TEXT = """
 
 
 
7
  GTBench aims to evaluate and rank LLMs’ reasoning abilities in competitive environments through game-theoretic tasks, e.g., board and card games.
8
  It utilizes 10 widely recognized games supported by <a href="https://github.com/google-deepmind/open_spiel">OpenSpiel</a> and evaluate well-recognized LLM agents in a language-driven manner. The evaluation code and prompt templates can be found in <a href="https://github.com/jinhaoduan/GTBench" target="_blank" >GTBench</a>.
9
 
@@ -58,4 +61,10 @@ EVALUATION_QUEUE_TEXT = """
58
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
59
 
60
  CITATION_BUTTON_TEXT = r"""
 
 
 
 
 
 
61
  """
 
4
  <h1 id="space-title">GTBench: Uncovering the Strategic Reasoning Limitation of LLMs via<br> Game-Theoretic Evaluations</h1>"""
5
 
6
  INTRODUCTION_TEXT = """
7
+
8
+ paper: https://arxiv.org/abs/2402.12348
9
+
10
  GTBench aims to evaluate and rank LLMs’ reasoning abilities in competitive environments through game-theoretic tasks, e.g., board and card games.
11
  It utilizes 10 widely recognized games supported by <a href="https://github.com/google-deepmind/open_spiel">OpenSpiel</a> and evaluate well-recognized LLM agents in a language-driven manner. The evaluation code and prompt templates can be found in <a href="https://github.com/jinhaoduan/GTBench" target="_blank" >GTBench</a>.
12
 
 
61
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
62
 
63
  CITATION_BUTTON_TEXT = r"""
64
+ @article{duan2024gtbench,
65
+ title = {GTBench: Uncovering the Strategic Reasoning Limitations of LLMs via Game-Theoretic Evaluations},
66
+ author = {Duan, Jinhao and Zhang, Renming and Diffenderfer, James and Kailkhura, Bhavya and Sun, Lichao and Stengel-Eskin, Elias and Bansal, Mohit and Chen, Tianlong and Xu, Kaidi},
67
+ year = {2024},
68
+ journal={arXiv preprint 2402.12348}
69
+ }
70
  """