de-arena

Sleeping

yzabc007 commited on Oct 7, 2024

Commit

2a7d8c1

1 Parent(s): 5f0ee8c

Update space

Files changed (2) hide show

app.py CHANGED Viewed

@@ -139,6 +139,7 @@ with demo:
         with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=5):
             gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
         '''
         with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
             with gr.Column():

         with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=5):
             gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
         '''
         with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
             with gr.Column():

src/about.py CHANGED Viewed

@@ -25,7 +25,10 @@ TITLE = """<h1 align="center" id="space-title">Decentralized Arena</h1>"""
 # What does your leaderboard evaluate?
 INTRODUCTION_TEXT = """
-[Brief description of the leaderboard]: Decentralized Arena is an automatic LLM evaluation learderboard that leverages collective intelligence to evaluate LLMs ...
 """
 # Which evaluations are you running? how can people reproduce what you have?

 # What does your leaderboard evaluate?
 INTRODUCTION_TEXT = """
+TL;DR: We release Decentralized Arena that automates and scales “Chatbot Arena” for LLM evaluation across various
+fine-grained dimensions (e.g., math – algebra, geometry, probability; logical reasoning, social reasoning,
+biology, chemistry, …). The evaluation is decentralized and democratic, with all LLMs participating
+in evaluating others. It achieves a 97\% correlation with Chatbot Arena's overall rankings, while being fully transparent and reproducible.
 """
 # Which evaluations are you running? how can people reproduce what you have?