PLB commited on
Commit
f65f671
1 Parent(s): 351b8ad

Initial commit

Browse files
Files changed (5) hide show
  1. .gitignore +1 -0
  2. README.md +2 -2
  3. app.py +22 -0
  4. elo-20240326.csv +9 -0
  5. requirements.txt +65 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .venv/
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
  title: Llm Colosseum
3
- emoji: 🐢
4
  colorFrom: yellow
5
- colorTo: green
6
  sdk: gradio
7
  sdk_version: 4.23.0
8
  app_file: app.py
 
1
  ---
2
  title: Llm Colosseum
3
+ emoji: 🤼
4
  colorFrom: yellow
5
+ colorTo: red
6
  sdk: gradio
7
  sdk_version: 4.23.0
8
  app_file: app.py
app.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+
4
+ # Load the results
5
+ elo_df = pd.read_csv("elo-20240326.csv")
6
+
7
+ text_description = """
8
+ # 🤼 LLM Colosseum Leaderboard
9
+
10
+ LLM Colosseum is a new way to assess the relative performance of LLMs. We have them play Street Fighter III against each other, and we use the results to calculate their Elo ratings.
11
+
12
+ Watch a demo of LLMs playing Street Fighter III [here](https://youtu.be/Kk8foX3dm2I).
13
+ More info in the LLM Colosseum GitHub [repository](https://github.com/OpenGenerativeAI/llm-colosseum).
14
+ """
15
+
16
+ with gr.Blocks(
17
+ title="LLM Colosseum Leaderboard",
18
+ ) as demo:
19
+ gr.Markdown(text_description)
20
+ gr.Dataframe(value=elo_df, interactive=False)
21
+
22
+ demo.launch()
elo-20240326.csv ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ Model,Organization,Colosseum Elo
2
+ gpt-3.5-turbo-0125, openai,1776
3
+ mistral-small-latest, mistral,1586
4
+ gpt-4-1106-preview, openai,1585
5
+ gpt-4, openai,1517
6
+ gpt-4-turbo-preview, openai,1509
7
+ gpt-4-0125-preview, openai,1439
8
+ mistral-medium-latest, mistral,1356
9
+ mistral-large-latest, mistral,1231
requirements.txt ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ altair==5.2.0
3
+ annotated-types==0.6.0
4
+ anyio==4.3.0
5
+ attrs==23.2.0
6
+ certifi==2024.2.2
7
+ charset-normalizer==3.3.2
8
+ click==8.1.7
9
+ colorama==0.4.6
10
+ contourpy==1.2.0
11
+ cycler==0.12.1
12
+ fastapi==0.110.0
13
+ ffmpy==0.3.2
14
+ filelock==3.13.3
15
+ fonttools==4.50.0
16
+ fsspec==2024.3.1
17
+ gradio==4.23.0
18
+ gradio_client==0.14.0
19
+ h11==0.14.0
20
+ httpcore==1.0.4
21
+ httpx==0.27.0
22
+ huggingface-hub==0.22.1
23
+ idna==3.6
24
+ importlib_resources==6.4.0
25
+ Jinja2==3.1.3
26
+ jsonschema==4.21.1
27
+ jsonschema-specifications==2023.12.1
28
+ kiwisolver==1.4.5
29
+ markdown-it-py==3.0.0
30
+ MarkupSafe==2.1.5
31
+ matplotlib==3.8.3
32
+ mdurl==0.1.2
33
+ numpy==1.26.4
34
+ orjson==3.9.15
35
+ packaging==24.0
36
+ pandas==2.2.1
37
+ pillow==10.2.0
38
+ pydantic==2.6.4
39
+ pydantic_core==2.16.3
40
+ pydub==0.25.1
41
+ Pygments==2.17.2
42
+ pyparsing==3.1.2
43
+ python-dateutil==2.9.0.post0
44
+ python-multipart==0.0.9
45
+ pytz==2024.1
46
+ PyYAML==6.0.1
47
+ referencing==0.34.0
48
+ requests==2.31.0
49
+ rich==13.7.1
50
+ rpds-py==0.18.0
51
+ ruff==0.3.4
52
+ semantic-version==2.10.0
53
+ shellingham==1.5.4
54
+ six==1.16.0
55
+ sniffio==1.3.1
56
+ starlette==0.36.3
57
+ tomlkit==0.12.0
58
+ toolz==0.12.1
59
+ tqdm==4.66.2
60
+ typer==0.10.0
61
+ typing_extensions==4.10.0
62
+ tzdata==2024.1
63
+ urllib3==2.2.1
64
+ uvicorn==0.29.0
65
+ websockets==11.0.3