andrewrreed HF staff commited on
Commit
311dc3a
1 Parent(s): c237b5a
Files changed (5) hide show
  1. .gitignore +1 -0
  2. app.py +114 -0
  3. dev.ipynb +1340 -0
  4. release_date_mapping.json +457 -0
  5. requirements.txt +4 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .venv/
app.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+
3
+ import pandas as pd
4
+ import gradio as gr
5
+ from huggingface_hub import HfFileSystem, hf_hub_download
6
+
7
+ if gr.NO_RELOAD:
8
+ ###################
9
+ ### Load Data
10
+ ###################
11
+
12
+ key_to_category_name = {
13
+ "full": "Overall",
14
+ "coding": "Coding",
15
+ "long_user": "Longer Query",
16
+ "english": "English",
17
+ "chinese": "Chinese",
18
+ "french": "French",
19
+ "no_tie": "Exclude Ties",
20
+ "no_short": "Exclude Short Query (< 5 tokens)",
21
+ "no_refusal": "Exclude Refusal",
22
+ }
23
+ cat_name_to_explanation = {
24
+ "Overall": "Overall Questions",
25
+ "Coding": "Coding: whether conversation contains code snippets",
26
+ "Longer Query": "Longer Query (>= 500 tokens)",
27
+ "English": "English Prompts",
28
+ "Chinese": "Chinese Prompts",
29
+ "French": "French Prompts",
30
+ "Exclude Ties": "Exclude Ties and Bothbad",
31
+ "Exclude Short Query (< 5 tokens)": "Exclude Short User Query (< 5 tokens)",
32
+ "Exclude Refusal": 'Exclude model responses with refusal (e.g., "I cannot answer")',
33
+ }
34
+
35
+ fs = HfFileSystem()
36
+
37
+ def extract_date(filename):
38
+ return filename.split("/")[-1].split(".")[0].split("_")[-1]
39
+
40
+ # gather ELO data
41
+ ELO_DATA_FILES = "spaces/lmsys/chatbot-arena-leaderboard/*.pkl"
42
+ elo_files = fs.glob(ELO_DATA_FILES)
43
+ latest_elo_file = sorted(elo_files, key=extract_date, reverse=True)[0]
44
+
45
+ latest_elo_file_local = hf_hub_download(
46
+ repo_id="lmsys/chatbot-arena-leaderboard",
47
+ filename=latest_elo_file.split("/")[-1],
48
+ repo_type="space",
49
+ )
50
+
51
+ with open(latest_elo_file_local, "rb") as fin:
52
+ elo_results = pickle.load(fin)
53
+
54
+ arena_dfs = {}
55
+ for k in key_to_category_name.keys():
56
+ if k not in elo_results:
57
+ continue
58
+ arena_dfs[key_to_category_name[k]] = elo_results[k]["leaderboard_table_df"]
59
+
60
+ # gather open llm leaderboard data
61
+ LEADERBOARD_DATA_FILES = "spaces/lmsys/chatbot-arena-leaderboard/*.csv"
62
+ leaderboard_files = fs.glob(LEADERBOARD_DATA_FILES)
63
+ latest_leaderboard_file = sorted(leaderboard_files, key=extract_date, reverse=True)[
64
+ 0
65
+ ]
66
+
67
+ latest_leaderboard_file_local = hf_hub_download(
68
+ repo_id="lmsys/chatbot-arena-leaderboard",
69
+ filename=latest_leaderboard_file.split("/")[-1],
70
+ repo_type="space",
71
+ )
72
+ leaderboard_df = pd.read_csv(latest_leaderboard_file_local)
73
+
74
+ ###################
75
+ ### Prepare Data
76
+ ###################
77
+
78
+ # merge leaderboard data with ELO data
79
+ merged_dfs = {}
80
+ for k, v in arena_dfs.items():
81
+ merged_dfs[k] = (
82
+ pd.merge(arena_dfs[k], leaderboard_df, left_index=True, right_on="key")
83
+ .sort_values("rating", ascending=False)
84
+ .reset_index(drop=True)
85
+ )
86
+
87
+ # add release dates into the merged data
88
+ release_date_mapping = pd.read_json("release_date_mapping.json", orient="records")
89
+ for k, v in merged_dfs.items():
90
+ merged_dfs[k] = pd.merge(
91
+ merged_dfs[k], release_date_mapping[["key", "Release Date"]], on="key"
92
+ )
93
+ df = merged_dfs["Overall"]
94
+ y_min = df["rating"].min()
95
+ y_max = df["rating"].max()
96
+ y_buffer = (y_max - y_min) * 0.1
97
+
98
+ with gr.Blocks() as demo:
99
+ gr.Markdown("# Chatbot Arena Leaderboard")
100
+ with gr.Row():
101
+ gr.ScatterPlot(
102
+ df,
103
+ title="hello",
104
+ x="Release Date",
105
+ y="rating",
106
+ tooltip=["Model", "rating", "num_battles", "Organization", "License"],
107
+ width=1000,
108
+ height=700,
109
+ x_label_angle=-45,
110
+ y_lim=[y_min - y_buffer, y_max + y_buffer],
111
+ )
112
+
113
+ if __name__ == "__main__":
114
+ demo.launch()
dev.ipynb ADDED
@@ -0,0 +1,1340 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 47,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import os\n",
10
+ "import pickle\n",
11
+ "\n",
12
+ "import pandas as pd\n",
13
+ "from huggingface_hub import HfFileSystem, hf_hub_download"
14
+ ]
15
+ },
16
+ {
17
+ "cell_type": "markdown",
18
+ "metadata": {},
19
+ "source": [
20
+ "## Prepare data"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": 72,
26
+ "metadata": {},
27
+ "outputs": [],
28
+ "source": [
29
+ "fs = HfFileSystem()\n",
30
+ "\n",
31
+ "\n",
32
+ "def extract_date(filename):\n",
33
+ " return filename.split(\"/\")[-1].split(\".\")[0].split(\"_\")[-1]\n",
34
+ "\n",
35
+ "\n",
36
+ "ELO_DATA_FILES = \"spaces/lmsys/chatbot-arena-leaderboard/*.pkl\"\n",
37
+ "elo_files = fs.glob(ELO_DATA_FILES)\n",
38
+ "latest_elo_file = sorted(elo_files, key=extract_date, reverse=True)[0]\n",
39
+ "\n",
40
+ "LEADERBOARD_DATA_FILES = \"spaces/lmsys/chatbot-arena-leaderboard/*.csv\"\n",
41
+ "leaderboard_files = fs.glob(LEADERBOARD_DATA_FILES)\n",
42
+ "latest_leaderboard_file = sorted(leaderboard_files, key=extract_date, reverse=True)[0]"
43
+ ]
44
+ },
45
+ {
46
+ "cell_type": "code",
47
+ "execution_count": 73,
48
+ "metadata": {},
49
+ "outputs": [
50
+ {
51
+ "data": {
52
+ "text/plain": [
53
+ "('leaderboard_table_20240426.csv', 'elo_results_20240426.pkl')"
54
+ ]
55
+ },
56
+ "execution_count": 73,
57
+ "metadata": {},
58
+ "output_type": "execute_result"
59
+ }
60
+ ],
61
+ "source": [
62
+ "latest_leaderboard_file.split(\"/\")[-1], latest_elo_file.split(\"/\")[-1]"
63
+ ]
64
+ },
65
+ {
66
+ "cell_type": "code",
67
+ "execution_count": 74,
68
+ "metadata": {},
69
+ "outputs": [],
70
+ "source": [
71
+ "latest_elo_file_local = hf_hub_download(\n",
72
+ " repo_id=\"lmsys/chatbot-arena-leaderboard\",\n",
73
+ " filename=latest_elo_file.split(\"/\")[-1],\n",
74
+ " repo_type=\"space\",\n",
75
+ ")\n",
76
+ "latest_leaderboard_file_local = hf_hub_download(\n",
77
+ " repo_id=\"lmsys/chatbot-arena-leaderboard\",\n",
78
+ " filename=latest_leaderboard_file.split(\"/\")[-1],\n",
79
+ " repo_type=\"space\",\n",
80
+ ")"
81
+ ]
82
+ },
83
+ {
84
+ "cell_type": "code",
85
+ "execution_count": 76,
86
+ "metadata": {},
87
+ "outputs": [],
88
+ "source": [
89
+ "# load and prepare ELO data\n",
90
+ "key_to_category_name = {\n",
91
+ " \"full\": \"Overall\",\n",
92
+ " \"coding\": \"Coding\",\n",
93
+ " \"long_user\": \"Longer Query\",\n",
94
+ " \"english\": \"English\",\n",
95
+ " \"chinese\": \"Chinese\",\n",
96
+ " \"french\": \"French\",\n",
97
+ " \"no_tie\": \"Exclude Ties\",\n",
98
+ " \"no_short\": \"Exclude Short Query (< 5 tokens)\",\n",
99
+ " \"no_refusal\": \"Exclude Refusal\",\n",
100
+ "}\n",
101
+ "cat_name_to_explanation = {\n",
102
+ " \"Overall\": \"Overall Questions\",\n",
103
+ " \"Coding\": \"Coding: whether conversation contains code snippets\",\n",
104
+ " \"Longer Query\": \"Longer Query (>= 500 tokens)\",\n",
105
+ " \"English\": \"English Prompts\",\n",
106
+ " \"Chinese\": \"Chinese Prompts\",\n",
107
+ " \"French\": \"French Prompts\",\n",
108
+ " \"Exclude Ties\": \"Exclude Ties and Bothbad\",\n",
109
+ " \"Exclude Short Query (< 5 tokens)\": \"Exclude Short User Query (< 5 tokens)\",\n",
110
+ " \"Exclude Refusal\": 'Exclude model responses with refusal (e.g., \"I cannot answer\")',\n",
111
+ "}\n",
112
+ "\n",
113
+ "with open(latest_elo_file_local, \"rb\") as fin:\n",
114
+ " elo_results = pickle.load(fin)\n",
115
+ "\n",
116
+ "arena_dfs = {}\n",
117
+ "for k in key_to_category_name.keys():\n",
118
+ " if k not in elo_results:\n",
119
+ " continue\n",
120
+ " arena_dfs[key_to_category_name[k]] = elo_results[k][\"leaderboard_table_df\"]"
121
+ ]
122
+ },
123
+ {
124
+ "cell_type": "code",
125
+ "execution_count": 77,
126
+ "metadata": {},
127
+ "outputs": [
128
+ {
129
+ "data": {
130
+ "text/plain": [
131
+ "dict_keys(['Overall', 'Coding', 'Longer Query', 'English', 'Chinese', 'French', 'Exclude Ties', 'Exclude Short Query (< 5 tokens)', 'Exclude Refusal'])"
132
+ ]
133
+ },
134
+ "execution_count": 77,
135
+ "metadata": {},
136
+ "output_type": "execute_result"
137
+ }
138
+ ],
139
+ "source": [
140
+ "arena_dfs.keys()"
141
+ ]
142
+ },
143
+ {
144
+ "cell_type": "code",
145
+ "execution_count": 78,
146
+ "metadata": {},
147
+ "outputs": [
148
+ {
149
+ "data": {
150
+ "text/html": [
151
+ "<div>\n",
152
+ "<style scoped>\n",
153
+ " .dataframe tbody tr th:only-of-type {\n",
154
+ " vertical-align: middle;\n",
155
+ " }\n",
156
+ "\n",
157
+ " .dataframe tbody tr th {\n",
158
+ " vertical-align: top;\n",
159
+ " }\n",
160
+ "\n",
161
+ " .dataframe thead th {\n",
162
+ " text-align: right;\n",
163
+ " }\n",
164
+ "</style>\n",
165
+ "<table border=\"1\" class=\"dataframe\">\n",
166
+ " <thead>\n",
167
+ " <tr style=\"text-align: right;\">\n",
168
+ " <th></th>\n",
169
+ " <th>rating</th>\n",
170
+ " <th>variance</th>\n",
171
+ " <th>rating_q975</th>\n",
172
+ " <th>rating_q025</th>\n",
173
+ " <th>num_battles</th>\n",
174
+ " <th>final_ranking</th>\n",
175
+ " </tr>\n",
176
+ " </thead>\n",
177
+ " <tbody>\n",
178
+ " <tr>\n",
179
+ " <th>RWKV-4-Raven-14B</th>\n",
180
+ " <td>927.710294</td>\n",
181
+ " <td>27.143015</td>\n",
182
+ " <td>935.717850</td>\n",
183
+ " <td>916.546369</td>\n",
184
+ " <td>5129</td>\n",
185
+ " <td>81</td>\n",
186
+ " </tr>\n",
187
+ " <tr>\n",
188
+ " <th>alpaca-13b</th>\n",
189
+ " <td>907.324482</td>\n",
190
+ " <td>20.736682</td>\n",
191
+ " <td>915.536856</td>\n",
192
+ " <td>899.330070</td>\n",
193
+ " <td>6111</td>\n",
194
+ " <td>85</td>\n",
195
+ " </tr>\n",
196
+ " <tr>\n",
197
+ " <th>bard-jan-24-gemini-pro</th>\n",
198
+ " <td>1208.505408</td>\n",
199
+ " <td>6.679087</td>\n",
200
+ " <td>1213.291358</td>\n",
201
+ " <td>1203.926901</td>\n",
202
+ " <td>12388</td>\n",
203
+ " <td>6</td>\n",
204
+ " </tr>\n",
205
+ " <tr>\n",
206
+ " <th>chatglm-6b</th>\n",
207
+ " <td>886.107553</td>\n",
208
+ " <td>17.110417</td>\n",
209
+ " <td>894.034333</td>\n",
210
+ " <td>878.094776</td>\n",
211
+ " <td>5195</td>\n",
212
+ " <td>86</td>\n",
213
+ " </tr>\n",
214
+ " <tr>\n",
215
+ " <th>chatglm2-6b</th>\n",
216
+ " <td>932.678460</td>\n",
217
+ " <td>33.530570</td>\n",
218
+ " <td>943.455598</td>\n",
219
+ " <td>921.346322</td>\n",
220
+ " <td>2880</td>\n",
221
+ " <td>81</td>\n",
222
+ " </tr>\n",
223
+ " <tr>\n",
224
+ " <th>...</th>\n",
225
+ " <td>...</td>\n",
226
+ " <td>...</td>\n",
227
+ " <td>...</td>\n",
228
+ " <td>...</td>\n",
229
+ " <td>...</td>\n",
230
+ " <td>...</td>\n",
231
+ " </tr>\n",
232
+ " <tr>\n",
233
+ " <th>wizardlm-70b</th>\n",
234
+ " <td>1107.992552</td>\n",
235
+ " <td>9.385887</td>\n",
236
+ " <td>1114.218223</td>\n",
237
+ " <td>1102.655575</td>\n",
238
+ " <td>8868</td>\n",
239
+ " <td>29</td>\n",
240
+ " </tr>\n",
241
+ " <tr>\n",
242
+ " <th>yi-34b-chat</th>\n",
243
+ " <td>1109.722447</td>\n",
244
+ " <td>8.596908</td>\n",
245
+ " <td>1115.182579</td>\n",
246
+ " <td>1103.991095</td>\n",
247
+ " <td>12252</td>\n",
248
+ " <td>29</td>\n",
249
+ " </tr>\n",
250
+ " <tr>\n",
251
+ " <th>zephyr-7b-alpha</th>\n",
252
+ " <td>1042.108710</td>\n",
253
+ " <td>43.900714</td>\n",
254
+ " <td>1052.991768</td>\n",
255
+ " <td>1027.160917</td>\n",
256
+ " <td>1901</td>\n",
257
+ " <td>58</td>\n",
258
+ " </tr>\n",
259
+ " <tr>\n",
260
+ " <th>zephyr-7b-beta</th>\n",
261
+ " <td>1053.655680</td>\n",
262
+ " <td>10.297607</td>\n",
263
+ " <td>1059.923254</td>\n",
264
+ " <td>1047.601629</td>\n",
265
+ " <td>11924</td>\n",
266
+ " <td>54</td>\n",
267
+ " </tr>\n",
268
+ " <tr>\n",
269
+ " <th>zephyr-orpo-141b-A35b-v0.1</th>\n",
270
+ " <td>1124.677515</td>\n",
271
+ " <td>22.288515</td>\n",
272
+ " <td>1132.728887</td>\n",
273
+ " <td>1113.848432</td>\n",
274
+ " <td>4276</td>\n",
275
+ " <td>22</td>\n",
276
+ " </tr>\n",
277
+ " </tbody>\n",
278
+ "</table>\n",
279
+ "<p>91 rows × 6 columns</p>\n",
280
+ "</div>"
281
+ ],
282
+ "text/plain": [
283
+ " rating variance rating_q975 rating_q025 \\\n",
284
+ "RWKV-4-Raven-14B 927.710294 27.143015 935.717850 916.546369 \n",
285
+ "alpaca-13b 907.324482 20.736682 915.536856 899.330070 \n",
286
+ "bard-jan-24-gemini-pro 1208.505408 6.679087 1213.291358 1203.926901 \n",
287
+ "chatglm-6b 886.107553 17.110417 894.034333 878.094776 \n",
288
+ "chatglm2-6b 932.678460 33.530570 943.455598 921.346322 \n",
289
+ "... ... ... ... ... \n",
290
+ "wizardlm-70b 1107.992552 9.385887 1114.218223 1102.655575 \n",
291
+ "yi-34b-chat 1109.722447 8.596908 1115.182579 1103.991095 \n",
292
+ "zephyr-7b-alpha 1042.108710 43.900714 1052.991768 1027.160917 \n",
293
+ "zephyr-7b-beta 1053.655680 10.297607 1059.923254 1047.601629 \n",
294
+ "zephyr-orpo-141b-A35b-v0.1 1124.677515 22.288515 1132.728887 1113.848432 \n",
295
+ "\n",
296
+ " num_battles final_ranking \n",
297
+ "RWKV-4-Raven-14B 5129 81 \n",
298
+ "alpaca-13b 6111 85 \n",
299
+ "bard-jan-24-gemini-pro 12388 6 \n",
300
+ "chatglm-6b 5195 86 \n",
301
+ "chatglm2-6b 2880 81 \n",
302
+ "... ... ... \n",
303
+ "wizardlm-70b 8868 29 \n",
304
+ "yi-34b-chat 12252 29 \n",
305
+ "zephyr-7b-alpha 1901 58 \n",
306
+ "zephyr-7b-beta 11924 54 \n",
307
+ "zephyr-orpo-141b-A35b-v0.1 4276 22 \n",
308
+ "\n",
309
+ "[91 rows x 6 columns]"
310
+ ]
311
+ },
312
+ "execution_count": 78,
313
+ "metadata": {},
314
+ "output_type": "execute_result"
315
+ }
316
+ ],
317
+ "source": [
318
+ "arena_dfs[\"Overall\"]"
319
+ ]
320
+ },
321
+ {
322
+ "cell_type": "code",
323
+ "execution_count": 79,
324
+ "metadata": {},
325
+ "outputs": [],
326
+ "source": [
327
+ "# load and prepare Leaderboard data\n",
328
+ "leaderboard_df = pd.read_csv(latest_leaderboard_file_local)"
329
+ ]
330
+ },
331
+ {
332
+ "cell_type": "code",
333
+ "execution_count": 80,
334
+ "metadata": {},
335
+ "outputs": [
336
+ {
337
+ "data": {
338
+ "text/html": [
339
+ "<div>\n",
340
+ "<style scoped>\n",
341
+ " .dataframe tbody tr th:only-of-type {\n",
342
+ " vertical-align: middle;\n",
343
+ " }\n",
344
+ "\n",
345
+ " .dataframe tbody tr th {\n",
346
+ " vertical-align: top;\n",
347
+ " }\n",
348
+ "\n",
349
+ " .dataframe thead th {\n",
350
+ " text-align: right;\n",
351
+ " }\n",
352
+ "</style>\n",
353
+ "<table border=\"1\" class=\"dataframe\">\n",
354
+ " <thead>\n",
355
+ " <tr style=\"text-align: right;\">\n",
356
+ " <th></th>\n",
357
+ " <th>key</th>\n",
358
+ " <th>Model</th>\n",
359
+ " <th>MT-bench (score)</th>\n",
360
+ " <th>MMLU</th>\n",
361
+ " <th>Knowledge cutoff date</th>\n",
362
+ " <th>License</th>\n",
363
+ " <th>Organization</th>\n",
364
+ " <th>Link</th>\n",
365
+ " </tr>\n",
366
+ " </thead>\n",
367
+ " <tbody>\n",
368
+ " <tr>\n",
369
+ " <th>0</th>\n",
370
+ " <td>wizardlm-30b</td>\n",
371
+ " <td>WizardLM-30B</td>\n",
372
+ " <td>7.01</td>\n",
373
+ " <td>0.587</td>\n",
374
+ " <td>2023/6</td>\n",
375
+ " <td>Non-commercial</td>\n",
376
+ " <td>Microsoft</td>\n",
377
+ " <td>https://huggingface.co/WizardLM/WizardLM-30B-V1.0</td>\n",
378
+ " </tr>\n",
379
+ " <tr>\n",
380
+ " <th>1</th>\n",
381
+ " <td>vicuna-13b-16k</td>\n",
382
+ " <td>Vicuna-13B-16k</td>\n",
383
+ " <td>6.92</td>\n",
384
+ " <td>0.545</td>\n",
385
+ " <td>2023/7</td>\n",
386
+ " <td>Llama 2 Community</td>\n",
387
+ " <td>LMSYS</td>\n",
388
+ " <td>https://huggingface.co/lmsys/vicuna-13b-v1.5-16k</td>\n",
389
+ " </tr>\n",
390
+ " <tr>\n",
391
+ " <th>2</th>\n",
392
+ " <td>wizardlm-13b-v1.1</td>\n",
393
+ " <td>WizardLM-13B-v1.1</td>\n",
394
+ " <td>6.76</td>\n",
395
+ " <td>0.500</td>\n",
396
+ " <td>2023/7</td>\n",
397
+ " <td>Non-commercial</td>\n",
398
+ " <td>Microsoft</td>\n",
399
+ " <td>https://huggingface.co/WizardLM/WizardLM-13B-V1.1</td>\n",
400
+ " </tr>\n",
401
+ " <tr>\n",
402
+ " <th>3</th>\n",
403
+ " <td>tulu-30b</td>\n",
404
+ " <td>Tulu-30B</td>\n",
405
+ " <td>6.43</td>\n",
406
+ " <td>0.581</td>\n",
407
+ " <td>2023/6</td>\n",
408
+ " <td>Non-commercial</td>\n",
409
+ " <td>AllenAI/UW</td>\n",
410
+ " <td>https://huggingface.co/allenai/tulu-30b</td>\n",
411
+ " </tr>\n",
412
+ " <tr>\n",
413
+ " <th>4</th>\n",
414
+ " <td>guanaco-65b</td>\n",
415
+ " <td>Guanaco-65B</td>\n",
416
+ " <td>6.41</td>\n",
417
+ " <td>0.621</td>\n",
418
+ " <td>2023/5</td>\n",
419
+ " <td>Non-commercial</td>\n",
420
+ " <td>UW</td>\n",
421
+ " <td>https://huggingface.co/timdettmers/guanaco-65b...</td>\n",
422
+ " </tr>\n",
423
+ " <tr>\n",
424
+ " <th>...</th>\n",
425
+ " <td>...</td>\n",
426
+ " <td>...</td>\n",
427
+ " <td>...</td>\n",
428
+ " <td>...</td>\n",
429
+ " <td>...</td>\n",
430
+ " <td>...</td>\n",
431
+ " <td>...</td>\n",
432
+ " <td>...</td>\n",
433
+ " </tr>\n",
434
+ " <tr>\n",
435
+ " <th>100</th>\n",
436
+ " <td>mixtral-8x22b-instruct-v0.1</td>\n",
437
+ " <td>Mixtral-8x22b-Instruct-v0.1</td>\n",
438
+ " <td>-</td>\n",
439
+ " <td>0.778</td>\n",
440
+ " <td>2024/4</td>\n",
441
+ " <td>Apache 2.0</td>\n",
442
+ " <td>Mistral</td>\n",
443
+ " <td>https://mistral.ai/news/mixtral-8x22b/</td>\n",
444
+ " </tr>\n",
445
+ " <tr>\n",
446
+ " <th>101</th>\n",
447
+ " <td>llama-3-70b-instruct</td>\n",
448
+ " <td>Llama-3-70b-Instruct</td>\n",
449
+ " <td>-</td>\n",
450
+ " <td>0.820</td>\n",
451
+ " <td>2023/12</td>\n",
452
+ " <td>Llama 3 Community</td>\n",
453
+ " <td>Meta</td>\n",
454
+ " <td>https://llama.meta.com/llama3/</td>\n",
455
+ " </tr>\n",
456
+ " <tr>\n",
457
+ " <th>102</th>\n",
458
+ " <td>llama-3-8b-instruct</td>\n",
459
+ " <td>Llama-3-8b-Instruct</td>\n",
460
+ " <td>-</td>\n",
461
+ " <td>0.684</td>\n",
462
+ " <td>2023/3</td>\n",
463
+ " <td>Llama 3 Community</td>\n",
464
+ " <td>Meta</td>\n",
465
+ " <td>https://llama.meta.com/llama3/</td>\n",
466
+ " </tr>\n",
467
+ " <tr>\n",
468
+ " <th>103</th>\n",
469
+ " <td>gemini-1.5-pro-api-0409-preview</td>\n",
470
+ " <td>Gemini 1.5 Pro API-0409-Preview</td>\n",
471
+ " <td>-</td>\n",
472
+ " <td>0.819</td>\n",
473
+ " <td>2023/11</td>\n",
474
+ " <td>Proprietary</td>\n",
475
+ " <td>Google</td>\n",
476
+ " <td>https://blog.google/technology/ai/google-gemin...</td>\n",
477
+ " </tr>\n",
478
+ " <tr>\n",
479
+ " <th>104</th>\n",
480
+ " <td>phi-3-mini-128k-instruct</td>\n",
481
+ " <td>Phi-3-Mini-128k-Instruct</td>\n",
482
+ " <td>-</td>\n",
483
+ " <td>0.681</td>\n",
484
+ " <td>2023/10</td>\n",
485
+ " <td>MIT</td>\n",
486
+ " <td>Microsoft</td>\n",
487
+ " <td>https://azure.microsoft.com/en-us/blog/introdu...</td>\n",
488
+ " </tr>\n",
489
+ " </tbody>\n",
490
+ "</table>\n",
491
+ "<p>105 rows × 8 columns</p>\n",
492
+ "</div>"
493
+ ],
494
+ "text/plain": [
495
+ " key Model \\\n",
496
+ "0 wizardlm-30b WizardLM-30B \n",
497
+ "1 vicuna-13b-16k Vicuna-13B-16k \n",
498
+ "2 wizardlm-13b-v1.1 WizardLM-13B-v1.1 \n",
499
+ "3 tulu-30b Tulu-30B \n",
500
+ "4 guanaco-65b Guanaco-65B \n",
501
+ ".. ... ... \n",
502
+ "100 mixtral-8x22b-instruct-v0.1 Mixtral-8x22b-Instruct-v0.1 \n",
503
+ "101 llama-3-70b-instruct Llama-3-70b-Instruct \n",
504
+ "102 llama-3-8b-instruct Llama-3-8b-Instruct \n",
505
+ "103 gemini-1.5-pro-api-0409-preview Gemini 1.5 Pro API-0409-Preview \n",
506
+ "104 phi-3-mini-128k-instruct Phi-3-Mini-128k-Instruct \n",
507
+ "\n",
508
+ " MT-bench (score) MMLU Knowledge cutoff date License \\\n",
509
+ "0 7.01 0.587 2023/6 Non-commercial \n",
510
+ "1 6.92 0.545 2023/7 Llama 2 Community \n",
511
+ "2 6.76 0.500 2023/7 Non-commercial \n",
512
+ "3 6.43 0.581 2023/6 Non-commercial \n",
513
+ "4 6.41 0.621 2023/5 Non-commercial \n",
514
+ ".. ... ... ... ... \n",
515
+ "100 - 0.778 2024/4 Apache 2.0 \n",
516
+ "101 - 0.820 2023/12 Llama 3 Community \n",
517
+ "102 - 0.684 2023/3 Llama 3 Community \n",
518
+ "103 - 0.819 2023/11 Proprietary \n",
519
+ "104 - 0.681 2023/10 MIT \n",
520
+ "\n",
521
+ " Organization Link \n",
522
+ "0 Microsoft https://huggingface.co/WizardLM/WizardLM-30B-V1.0 \n",
523
+ "1 LMSYS https://huggingface.co/lmsys/vicuna-13b-v1.5-16k \n",
524
+ "2 Microsoft https://huggingface.co/WizardLM/WizardLM-13B-V1.1 \n",
525
+ "3 AllenAI/UW https://huggingface.co/allenai/tulu-30b \n",
526
+ "4 UW https://huggingface.co/timdettmers/guanaco-65b... \n",
527
+ ".. ... ... \n",
528
+ "100 Mistral https://mistral.ai/news/mixtral-8x22b/ \n",
529
+ "101 Meta https://llama.meta.com/llama3/ \n",
530
+ "102 Meta https://llama.meta.com/llama3/ \n",
531
+ "103 Google https://blog.google/technology/ai/google-gemin... \n",
532
+ "104 Microsoft https://azure.microsoft.com/en-us/blog/introdu... \n",
533
+ "\n",
534
+ "[105 rows x 8 columns]"
535
+ ]
536
+ },
537
+ "execution_count": 80,
538
+ "metadata": {},
539
+ "output_type": "execute_result"
540
+ }
541
+ ],
542
+ "source": [
543
+ "leaderboard_df"
544
+ ]
545
+ },
546
+ {
547
+ "cell_type": "code",
548
+ "execution_count": 82,
549
+ "metadata": {},
550
+ "outputs": [
551
+ {
552
+ "data": {
553
+ "text/plain": [
554
+ "dict_keys(['Overall', 'Coding', 'Longer Query', 'English', 'Chinese', 'French', 'Exclude Ties', 'Exclude Short Query (< 5 tokens)', 'Exclude Refusal'])"
555
+ ]
556
+ },
557
+ "execution_count": 82,
558
+ "metadata": {},
559
+ "output_type": "execute_result"
560
+ }
561
+ ],
562
+ "source": [
563
+ "arena_dfs.keys()"
564
+ ]
565
+ },
566
+ {
567
+ "cell_type": "code",
568
+ "execution_count": 86,
569
+ "metadata": {},
570
+ "outputs": [],
571
+ "source": [
572
+ "# merge ELO and Leaderboard data\n",
573
+ "merged_dfs = {}\n",
574
+ "for k, v in arena_dfs.items():\n",
575
+ " merged_dfs[k] = (\n",
576
+ " pd.merge(arena_dfs[k], leaderboard_df, left_index=True, right_on=\"key\")\n",
577
+ " .sort_values(\"rating\", ascending=False)\n",
578
+ " .reset_index(drop=True)\n",
579
+ " )"
580
+ ]
581
+ },
582
+ {
583
+ "cell_type": "code",
584
+ "execution_count": 101,
585
+ "metadata": {},
586
+ "outputs": [
587
+ {
588
+ "data": {
589
+ "text/html": [
590
+ "<div>\n",
591
+ "<style scoped>\n",
592
+ " .dataframe tbody tr th:only-of-type {\n",
593
+ " vertical-align: middle;\n",
594
+ " }\n",
595
+ "\n",
596
+ " .dataframe tbody tr th {\n",
597
+ " vertical-align: top;\n",
598
+ " }\n",
599
+ "\n",
600
+ " .dataframe thead th {\n",
601
+ " text-align: right;\n",
602
+ " }\n",
603
+ "</style>\n",
604
+ "<table border=\"1\" class=\"dataframe\">\n",
605
+ " <thead>\n",
606
+ " <tr style=\"text-align: right;\">\n",
607
+ " <th></th>\n",
608
+ " <th>rating</th>\n",
609
+ " <th>variance</th>\n",
610
+ " <th>rating_q975</th>\n",
611
+ " <th>rating_q025</th>\n",
612
+ " <th>num_battles</th>\n",
613
+ " <th>final_ranking</th>\n",
614
+ " <th>key</th>\n",
615
+ " <th>Model</th>\n",
616
+ " <th>MT-bench (score)</th>\n",
617
+ " <th>MMLU</th>\n",
618
+ " <th>Knowledge cutoff date</th>\n",
619
+ " <th>License</th>\n",
620
+ " <th>Organization</th>\n",
621
+ " <th>Link</th>\n",
622
+ " </tr>\n",
623
+ " </thead>\n",
624
+ " <tbody>\n",
625
+ " <tr>\n",
626
+ " <th>0</th>\n",
627
+ " <td>1257.399407</td>\n",
628
+ " <td>4.283316</td>\n",
629
+ " <td>1261.676224</td>\n",
630
+ " <td>1254.003626</td>\n",
631
+ " <td>30562</td>\n",
632
+ " <td>1</td>\n",
633
+ " <td>gpt-4-turbo-2024-04-09</td>\n",
634
+ " <td>GPT-4-Turbo-2024-04-09</td>\n",
635
+ " <td>-</td>\n",
636
+ " <td>-</td>\n",
637
+ " <td>2023/12</td>\n",
638
+ " <td>Proprietary</td>\n",
639
+ " <td>OpenAI</td>\n",
640
+ " <td>https://platform.openai.com/docs/models/gpt-4-...</td>\n",
641
+ " </tr>\n",
642
+ " <tr>\n",
643
+ " <th>1</th>\n",
644
+ " <td>1253.025095</td>\n",
645
+ " <td>2.069534</td>\n",
646
+ " <td>1256.111392</td>\n",
647
+ " <td>1250.435207</td>\n",
648
+ " <td>69871</td>\n",
649
+ " <td>1</td>\n",
650
+ " <td>gpt-4-1106-preview</td>\n",
651
+ " <td>GPT-4-1106-preview</td>\n",
652
+ " <td>9.32</td>\n",
653
+ " <td>-</td>\n",
654
+ " <td>2023/4</td>\n",
655
+ " <td>Proprietary</td>\n",
656
+ " <td>OpenAI</td>\n",
657
+ " <td>https://openai.com/blog/new-models-and-develop...</td>\n",
658
+ " </tr>\n",
659
+ " <tr>\n",
660
+ " <th>2</th>\n",
661
+ " <td>1251.114220</td>\n",
662
+ " <td>1.862842</td>\n",
663
+ " <td>1253.629093</td>\n",
664
+ " <td>1248.362042</td>\n",
665
+ " <td>75684</td>\n",
666
+ " <td>2</td>\n",
667
+ " <td>claude-3-opus-20240229</td>\n",
668
+ " <td>Claude 3 Opus</td>\n",
669
+ " <td>-</td>\n",
670
+ " <td>0.868</td>\n",
671
+ " <td>2023/8</td>\n",
672
+ " <td>Proprietary</td>\n",
673
+ " <td>Anthropic</td>\n",
674
+ " <td>https://www.anthropic.com/news/claude-3-family</td>\n",
675
+ " </tr>\n",
676
+ " <tr>\n",
677
+ " <th>3</th>\n",
678
+ " <td>1247.662508</td>\n",
679
+ " <td>3.263747</td>\n",
680
+ " <td>1251.582645</td>\n",
681
+ " <td>1244.380454</td>\n",
682
+ " <td>33723</td>\n",
683
+ " <td>2</td>\n",
684
+ " <td>gemini-1.5-pro-api-0409-preview</td>\n",
685
+ " <td>Gemini 1.5 Pro API-0409-Preview</td>\n",
686
+ " <td>-</td>\n",
687
+ " <td>0.819</td>\n",
688
+ " <td>2023/11</td>\n",
689
+ " <td>Proprietary</td>\n",
690
+ " <td>Google</td>\n",
691
+ " <td>https://blog.google/technology/ai/google-gemin...</td>\n",
692
+ " </tr>\n",
693
+ " <tr>\n",
694
+ " <th>4</th>\n",
695
+ " <td>1247.277052</td>\n",
696
+ " <td>1.923014</td>\n",
697
+ " <td>1249.489411</td>\n",
698
+ " <td>1244.340257</td>\n",
699
+ " <td>61924</td>\n",
700
+ " <td>3</td>\n",
701
+ " <td>gpt-4-0125-preview</td>\n",
702
+ " <td>GPT-4-0125-preview</td>\n",
703
+ " <td>-</td>\n",
704
+ " <td>-</td>\n",
705
+ " <td>2023/12</td>\n",
706
+ " <td>Proprietary</td>\n",
707
+ " <td>OpenAI</td>\n",
708
+ " <td>https://openai.com/blog/new-models-and-develop...</td>\n",
709
+ " </tr>\n",
710
+ " <tr>\n",
711
+ " <th>5</th>\n",
712
+ " <td>1208.505408</td>\n",
713
+ " <td>6.679087</td>\n",
714
+ " <td>1213.291358</td>\n",
715
+ " <td>1203.926901</td>\n",
716
+ " <td>12388</td>\n",
717
+ " <td>6</td>\n",
718
+ " <td>bard-jan-24-gemini-pro</td>\n",
719
+ " <td>Bard (Gemini Pro)</td>\n",
720
+ " <td>-</td>\n",
721
+ " <td>-</td>\n",
722
+ " <td>Online</td>\n",
723
+ " <td>Proprietary</td>\n",
724
+ " <td>Google</td>\n",
725
+ " <td>https://bard.google.com/</td>\n",
726
+ " </tr>\n",
727
+ " <tr>\n",
728
+ " <th>6</th>\n",
729
+ " <td>1207.497541</td>\n",
730
+ " <td>4.109466</td>\n",
731
+ " <td>1211.720734</td>\n",
732
+ " <td>1203.322762</td>\n",
733
+ " <td>27298</td>\n",
734
+ " <td>6</td>\n",
735
+ " <td>llama-3-70b-instruct</td>\n",
736
+ " <td>Llama-3-70b-Instruct</td>\n",
737
+ " <td>-</td>\n",
738
+ " <td>0.820</td>\n",
739
+ " <td>2023/12</td>\n",
740
+ " <td>Llama 3 Community</td>\n",
741
+ " <td>Meta</td>\n",
742
+ " <td>https://llama.meta.com/llama3/</td>\n",
743
+ " </tr>\n",
744
+ " <tr>\n",
745
+ " <th>7</th>\n",
746
+ " <td>1201.671254</td>\n",
747
+ " <td>2.525563</td>\n",
748
+ " <td>1204.862512</td>\n",
749
+ " <td>1198.658822</td>\n",
750
+ " <td>75418</td>\n",
751
+ " <td>6</td>\n",
752
+ " <td>claude-3-sonnet-20240229</td>\n",
753
+ " <td>Claude 3 Sonnet</td>\n",
754
+ " <td>-</td>\n",
755
+ " <td>0.790</td>\n",
756
+ " <td>2023/8</td>\n",
757
+ " <td>Proprietary</td>\n",
758
+ " <td>Anthropic</td>\n",
759
+ " <td>https://www.anthropic.com/news/claude-3-family</td>\n",
760
+ " </tr>\n",
761
+ " <tr>\n",
762
+ " <th>8</th>\n",
763
+ " <td>1191.684542</td>\n",
764
+ " <td>3.459717</td>\n",
765
+ " <td>1195.080256</td>\n",
766
+ " <td>1188.222382</td>\n",
767
+ " <td>41262</td>\n",
768
+ " <td>9</td>\n",
769
+ " <td>command-r-plus</td>\n",
770
+ " <td>Command R+</td>\n",
771
+ " <td>-</td>\n",
772
+ " <td>-</td>\n",
773
+ " <td>2024/3</td>\n",
774
+ " <td>CC-BY-NC-4.0</td>\n",
775
+ " <td>Cohere</td>\n",
776
+ " <td>https://txt.cohere.com/command-r-plus-microsof...</td>\n",
777
+ " </tr>\n",
778
+ " <tr>\n",
779
+ " <th>9</th>\n",
780
+ " <td>1188.987389</td>\n",
781
+ " <td>3.124792</td>\n",
782
+ " <td>1193.335535</td>\n",
783
+ " <td>1185.935928</td>\n",
784
+ " <td>48390</td>\n",
785
+ " <td>9</td>\n",
786
+ " <td>gpt-4-0314</td>\n",
787
+ " <td>GPT-4-0314</td>\n",
788
+ " <td>8.96</td>\n",
789
+ " <td>0.864</td>\n",
790
+ " <td>2021/9</td>\n",
791
+ " <td>Proprietary</td>\n",
792
+ " <td>OpenAI</td>\n",
793
+ " <td>https://openai.com/research/gpt-4</td>\n",
794
+ " </tr>\n",
795
+ " <tr>\n",
796
+ " <th>10</th>\n",
797
+ " <td>1180.606870</td>\n",
798
+ " <td>3.097542</td>\n",
799
+ " <td>1183.825403</td>\n",
800
+ " <td>1177.255203</td>\n",
801
+ " <td>66065</td>\n",
802
+ " <td>11</td>\n",
803
+ " <td>claude-3-haiku-20240307</td>\n",
804
+ " <td>Claude 3 Haiku</td>\n",
805
+ " <td>-</td>\n",
806
+ " <td>0.752</td>\n",
807
+ " <td>2023/8</td>\n",
808
+ " <td>Proprietary</td>\n",
809
+ " <td>Anthropic</td>\n",
810
+ " <td>https://www.anthropic.com/news/claude-3-family</td>\n",
811
+ " </tr>\n",
812
+ " <tr>\n",
813
+ " <th>11</th>\n",
814
+ " <td>1164.896561</td>\n",
815
+ " <td>2.585577</td>\n",
816
+ " <td>1167.595696</td>\n",
817
+ " <td>1161.727454</td>\n",
818
+ " <td>67038</td>\n",
819
+ " <td>12</td>\n",
820
+ " <td>gpt-4-0613</td>\n",
821
+ " <td>GPT-4-0613</td>\n",
822
+ " <td>9.18</td>\n",
823
+ " <td>-</td>\n",
824
+ " <td>2021/9</td>\n",
825
+ " <td>Proprietary</td>\n",
826
+ " <td>OpenAI</td>\n",
827
+ " <td>https://platform.openai.com/docs/models/gpt-4-...</td>\n",
828
+ " </tr>\n",
829
+ " <tr>\n",
830
+ " <th>12</th>\n",
831
+ " <td>1157.638992</td>\n",
832
+ " <td>2.541320</td>\n",
833
+ " <td>1160.496116</td>\n",
834
+ " <td>1154.927748</td>\n",
835
+ " <td>44120</td>\n",
836
+ " <td>13</td>\n",
837
+ " <td>mistral-large-2402</td>\n",
838
+ " <td>Mistral-Large-2402</td>\n",
839
+ " <td>-</td>\n",
840
+ " <td>0.812</td>\n",
841
+ " <td>-</td>\n",
842
+ " <td>Proprietary</td>\n",
843
+ " <td>Mistral</td>\n",
844
+ " <td>https://mistral.ai/news/mistral-large/</td>\n",
845
+ " </tr>\n",
846
+ " <tr>\n",
847
+ " <th>13</th>\n",
848
+ " <td>1153.464280</td>\n",
849
+ " <td>3.631512</td>\n",
850
+ " <td>1157.068850</td>\n",
851
+ " <td>1150.178903</td>\n",
852
+ " <td>32999</td>\n",
853
+ " <td>13</td>\n",
854
+ " <td>qwen1.5-72b-chat</td>\n",
855
+ " <td>Qwen1.5-72B-Chat</td>\n",
856
+ " <td>8.61</td>\n",
857
+ " <td>0.775</td>\n",
858
+ " <td>2024/2</td>\n",
859
+ " <td>Qianwen LICENSE</td>\n",
860
+ " <td>Alibaba</td>\n",
861
+ " <td>https://qwenlm.github.io/blog/qwen1.5/</td>\n",
862
+ " </tr>\n",
863
+ " <tr>\n",
864
+ " <th>14</th>\n",
865
+ " <td>1150.918473</td>\n",
866
+ " <td>9.062217</td>\n",
867
+ " <td>1155.969721</td>\n",
868
+ " <td>1145.229885</td>\n",
869
+ " <td>8622</td>\n",
870
+ " <td>13</td>\n",
871
+ " <td>reka-flash-21b-20240226-online</td>\n",
872
+ " <td>Reka-Flash-21B-online</td>\n",
873
+ " <td>-</td>\n",
874
+ " <td>-</td>\n",
875
+ " <td>Online</td>\n",
876
+ " <td>Proprietary</td>\n",
877
+ " <td>Reka AI</td>\n",
878
+ " <td>https://docs.reka.ai/http-api.html#generation</td>\n",
879
+ " </tr>\n",
880
+ " <tr>\n",
881
+ " <th>15</th>\n",
882
+ " <td>1150.244313</td>\n",
883
+ " <td>5.551373</td>\n",
884
+ " <td>1154.745214</td>\n",
885
+ " <td>1145.496466</td>\n",
886
+ " <td>21768</td>\n",
887
+ " <td>14</td>\n",
888
+ " <td>claude-1</td>\n",
889
+ " <td>Claude-1</td>\n",
890
+ " <td>7.90</td>\n",
891
+ " <td>0.770</td>\n",
892
+ " <td>-</td>\n",
893
+ " <td>Proprietary</td>\n",
894
+ " <td>Anthropic</td>\n",
895
+ " <td>https://www.anthropic.com/index/introducing-cl...</td>\n",
896
+ " </tr>\n",
897
+ " <tr>\n",
898
+ " <th>16</th>\n",
899
+ " <td>1149.267578</td>\n",
900
+ " <td>11.452272</td>\n",
901
+ " <td>1154.290155</td>\n",
902
+ " <td>1141.931621</td>\n",
903
+ " <td>9059</td>\n",
904
+ " <td>14</td>\n",
905
+ " <td>reka-flash-21b-20240226</td>\n",
906
+ " <td>Reka-Flash-21B</td>\n",
907
+ " <td>-</td>\n",
908
+ " <td>0.735</td>\n",
909
+ " <td>2023/11</td>\n",
910
+ " <td>Proprietary</td>\n",
911
+ " <td>Reka AI</td>\n",
912
+ " <td>https://www.reka.ai/news/reka-flash-efficient-...</td>\n",
913
+ " </tr>\n",
914
+ " <tr>\n",
915
+ " <th>17</th>\n",
916
+ " <td>1148.072155</td>\n",
917
+ " <td>3.071222</td>\n",
918
+ " <td>1151.980865</td>\n",
919
+ " <td>1144.992044</td>\n",
920
+ " <td>37413</td>\n",
921
+ " <td>14</td>\n",
922
+ " <td>command-r</td>\n",
923
+ " <td>Command R</td>\n",
924
+ " <td>-</td>\n",
925
+ " <td>-</td>\n",
926
+ " <td>2024/3</td>\n",
927
+ " <td>CC-BY-NC-4.0</td>\n",
928
+ " <td>Cohere</td>\n",
929
+ " <td>https://txt.cohere.com/command-r</td>\n",
930
+ " </tr>\n",
931
+ " <tr>\n",
932
+ " <th>18</th>\n",
933
+ " <td>1147.668325</td>\n",
934
+ " <td>3.542229</td>\n",
935
+ " <td>1150.726489</td>\n",
936
+ " <td>1143.868385</td>\n",
937
+ " <td>32738</td>\n",
938
+ " <td>14</td>\n",
939
+ " <td>mistral-medium</td>\n",
940
+ " <td>Mistral Medium</td>\n",
941
+ " <td>8.61</td>\n",
942
+ " <td>0.753</td>\n",
943
+ " <td>-</td>\n",
944
+ " <td>Proprietary</td>\n",
945
+ " <td>Mistral</td>\n",
946
+ " <td>https://mistral.ai/news/la-plateforme/</td>\n",
947
+ " </tr>\n",
948
+ " <tr>\n",
949
+ " <th>19</th>\n",
950
+ " <td>1147.473989</td>\n",
951
+ " <td>5.789710</td>\n",
952
+ " <td>1151.989352</td>\n",
953
+ " <td>1143.322918</td>\n",
954
+ " <td>17214</td>\n",
955
+ " <td>14</td>\n",
956
+ " <td>mixtral-8x22b-instruct-v0.1</td>\n",
957
+ " <td>Mixtral-8x22b-Instruct-v0.1</td>\n",
958
+ " <td>-</td>\n",
959
+ " <td>0.778</td>\n",
960
+ " <td>2024/4</td>\n",
961
+ " <td>Apache 2.0</td>\n",
962
+ " <td>Mistral</td>\n",
963
+ " <td>https://mistral.ai/news/mixtral-8x22b/</td>\n",
964
+ " </tr>\n",
965
+ " </tbody>\n",
966
+ "</table>\n",
967
+ "</div>"
968
+ ],
969
+ "text/plain": [
970
+ " rating variance rating_q975 rating_q025 num_battles \\\n",
971
+ "0 1257.399407 4.283316 1261.676224 1254.003626 30562 \n",
972
+ "1 1253.025095 2.069534 1256.111392 1250.435207 69871 \n",
973
+ "2 1251.114220 1.862842 1253.629093 1248.362042 75684 \n",
974
+ "3 1247.662508 3.263747 1251.582645 1244.380454 33723 \n",
975
+ "4 1247.277052 1.923014 1249.489411 1244.340257 61924 \n",
976
+ "5 1208.505408 6.679087 1213.291358 1203.926901 12388 \n",
977
+ "6 1207.497541 4.109466 1211.720734 1203.322762 27298 \n",
978
+ "7 1201.671254 2.525563 1204.862512 1198.658822 75418 \n",
979
+ "8 1191.684542 3.459717 1195.080256 1188.222382 41262 \n",
980
+ "9 1188.987389 3.124792 1193.335535 1185.935928 48390 \n",
981
+ "10 1180.606870 3.097542 1183.825403 1177.255203 66065 \n",
982
+ "11 1164.896561 2.585577 1167.595696 1161.727454 67038 \n",
983
+ "12 1157.638992 2.541320 1160.496116 1154.927748 44120 \n",
984
+ "13 1153.464280 3.631512 1157.068850 1150.178903 32999 \n",
985
+ "14 1150.918473 9.062217 1155.969721 1145.229885 8622 \n",
986
+ "15 1150.244313 5.551373 1154.745214 1145.496466 21768 \n",
987
+ "16 1149.267578 11.452272 1154.290155 1141.931621 9059 \n",
988
+ "17 1148.072155 3.071222 1151.980865 1144.992044 37413 \n",
989
+ "18 1147.668325 3.542229 1150.726489 1143.868385 32738 \n",
990
+ "19 1147.473989 5.789710 1151.989352 1143.322918 17214 \n",
991
+ "\n",
992
+ " final_ranking key \\\n",
993
+ "0 1 gpt-4-turbo-2024-04-09 \n",
994
+ "1 1 gpt-4-1106-preview \n",
995
+ "2 2 claude-3-opus-20240229 \n",
996
+ "3 2 gemini-1.5-pro-api-0409-preview \n",
997
+ "4 3 gpt-4-0125-preview \n",
998
+ "5 6 bard-jan-24-gemini-pro \n",
999
+ "6 6 llama-3-70b-instruct \n",
1000
+ "7 6 claude-3-sonnet-20240229 \n",
1001
+ "8 9 command-r-plus \n",
1002
+ "9 9 gpt-4-0314 \n",
1003
+ "10 11 claude-3-haiku-20240307 \n",
1004
+ "11 12 gpt-4-0613 \n",
1005
+ "12 13 mistral-large-2402 \n",
1006
+ "13 13 qwen1.5-72b-chat \n",
1007
+ "14 13 reka-flash-21b-20240226-online \n",
1008
+ "15 14 claude-1 \n",
1009
+ "16 14 reka-flash-21b-20240226 \n",
1010
+ "17 14 command-r \n",
1011
+ "18 14 mistral-medium \n",
1012
+ "19 14 mixtral-8x22b-instruct-v0.1 \n",
1013
+ "\n",
1014
+ " Model MT-bench (score) MMLU \\\n",
1015
+ "0 GPT-4-Turbo-2024-04-09 - - \n",
1016
+ "1 GPT-4-1106-preview 9.32 - \n",
1017
+ "2 Claude 3 Opus - 0.868 \n",
1018
+ "3 Gemini 1.5 Pro API-0409-Preview - 0.819 \n",
1019
+ "4 GPT-4-0125-preview - - \n",
1020
+ "5 Bard (Gemini Pro) - - \n",
1021
+ "6 Llama-3-70b-Instruct - 0.820 \n",
1022
+ "7 Claude 3 Sonnet - 0.790 \n",
1023
+ "8 Command R+ - - \n",
1024
+ "9 GPT-4-0314 8.96 0.864 \n",
1025
+ "10 Claude 3 Haiku - 0.752 \n",
1026
+ "11 GPT-4-0613 9.18 - \n",
1027
+ "12 Mistral-Large-2402 - 0.812 \n",
1028
+ "13 Qwen1.5-72B-Chat 8.61 0.775 \n",
1029
+ "14 Reka-Flash-21B-online - - \n",
1030
+ "15 Claude-1 7.90 0.770 \n",
1031
+ "16 Reka-Flash-21B - 0.735 \n",
1032
+ "17 Command R - - \n",
1033
+ "18 Mistral Medium 8.61 0.753 \n",
1034
+ "19 Mixtral-8x22b-Instruct-v0.1 - 0.778 \n",
1035
+ "\n",
1036
+ " Knowledge cutoff date License Organization \\\n",
1037
+ "0 2023/12 Proprietary OpenAI \n",
1038
+ "1 2023/4 Proprietary OpenAI \n",
1039
+ "2 2023/8 Proprietary Anthropic \n",
1040
+ "3 2023/11 Proprietary Google \n",
1041
+ "4 2023/12 Proprietary OpenAI \n",
1042
+ "5 Online Proprietary Google \n",
1043
+ "6 2023/12 Llama 3 Community Meta \n",
1044
+ "7 2023/8 Proprietary Anthropic \n",
1045
+ "8 2024/3 CC-BY-NC-4.0 Cohere \n",
1046
+ "9 2021/9 Proprietary OpenAI \n",
1047
+ "10 2023/8 Proprietary Anthropic \n",
1048
+ "11 2021/9 Proprietary OpenAI \n",
1049
+ "12 - Proprietary Mistral \n",
1050
+ "13 2024/2 Qianwen LICENSE Alibaba \n",
1051
+ "14 Online Proprietary Reka AI \n",
1052
+ "15 - Proprietary Anthropic \n",
1053
+ "16 2023/11 Proprietary Reka AI \n",
1054
+ "17 2024/3 CC-BY-NC-4.0 Cohere \n",
1055
+ "18 - Proprietary Mistral \n",
1056
+ "19 2024/4 Apache 2.0 Mistral \n",
1057
+ "\n",
1058
+ " Link \n",
1059
+ "0 https://platform.openai.com/docs/models/gpt-4-... \n",
1060
+ "1 https://openai.com/blog/new-models-and-develop... \n",
1061
+ "2 https://www.anthropic.com/news/claude-3-family \n",
1062
+ "3 https://blog.google/technology/ai/google-gemin... \n",
1063
+ "4 https://openai.com/blog/new-models-and-develop... \n",
1064
+ "5 https://bard.google.com/ \n",
1065
+ "6 https://llama.meta.com/llama3/ \n",
1066
+ "7 https://www.anthropic.com/news/claude-3-family \n",
1067
+ "8 https://txt.cohere.com/command-r-plus-microsof... \n",
1068
+ "9 https://openai.com/research/gpt-4 \n",
1069
+ "10 https://www.anthropic.com/news/claude-3-family \n",
1070
+ "11 https://platform.openai.com/docs/models/gpt-4-... \n",
1071
+ "12 https://mistral.ai/news/mistral-large/ \n",
1072
+ "13 https://qwenlm.github.io/blog/qwen1.5/ \n",
1073
+ "14 https://docs.reka.ai/http-api.html#generation \n",
1074
+ "15 https://www.anthropic.com/index/introducing-cl... \n",
1075
+ "16 https://www.reka.ai/news/reka-flash-efficient-... \n",
1076
+ "17 https://txt.cohere.com/command-r \n",
1077
+ "18 https://mistral.ai/news/la-plateforme/ \n",
1078
+ "19 https://mistral.ai/news/mixtral-8x22b/ "
1079
+ ]
1080
+ },
1081
+ "execution_count": 101,
1082
+ "metadata": {},
1083
+ "output_type": "execute_result"
1084
+ }
1085
+ ],
1086
+ "source": [
1087
+ "merged_dfs[\"Overall\"][:20]"
1088
+ ]
1089
+ },
1090
+ {
1091
+ "cell_type": "markdown",
1092
+ "metadata": {},
1093
+ "source": [
1094
+ "### Manually map release dates - MEH."
1095
+ ]
1096
+ },
1097
+ {
1098
+ "cell_type": "code",
1099
+ "execution_count": 113,
1100
+ "metadata": {},
1101
+ "outputs": [],
1102
+ "source": [
1103
+ "t = merged_dfs[\"Overall\"].loc[:, [\"key\", \"Model\"]]\n",
1104
+ "t[\"Release Date\"] = \"\""
1105
+ ]
1106
+ },
1107
+ {
1108
+ "cell_type": "code",
1109
+ "execution_count": 120,
1110
+ "metadata": {},
1111
+ "outputs": [],
1112
+ "source": [
1113
+ "t.to_json(\"release_date_mapping.json\", orient=\"records\", lines=True)"
1114
+ ]
1115
+ },
1116
+ {
1117
+ "cell_type": "code",
1118
+ "execution_count": 119,
1119
+ "metadata": {},
1120
+ "outputs": [
1121
+ {
1122
+ "data": {
1123
+ "text/plain": [
1124
+ "[{'key': 'gpt-4-turbo-2024-04-09',\n",
1125
+ " 'Model': 'GPT-4-Turbo-2024-04-09',\n",
1126
+ " 'Release Date': ''},\n",
1127
+ " {'key': 'gpt-4-1106-preview',\n",
1128
+ " 'Model': 'GPT-4-1106-preview',\n",
1129
+ " 'Release Date': ''},\n",
1130
+ " {'key': 'claude-3-opus-20240229',\n",
1131
+ " 'Model': 'Claude 3 Opus',\n",
1132
+ " 'Release Date': ''},\n",
1133
+ " {'key': 'gemini-1.5-pro-api-0409-preview',\n",
1134
+ " 'Model': 'Gemini 1.5 Pro API-0409-Preview',\n",
1135
+ " 'Release Date': ''},\n",
1136
+ " {'key': 'gpt-4-0125-preview',\n",
1137
+ " 'Model': 'GPT-4-0125-preview',\n",
1138
+ " 'Release Date': ''},\n",
1139
+ " {'key': 'bard-jan-24-gemini-pro',\n",
1140
+ " 'Model': 'Bard (Gemini Pro)',\n",
1141
+ " 'Release Date': ''},\n",
1142
+ " {'key': 'llama-3-70b-instruct',\n",
1143
+ " 'Model': 'Llama-3-70b-Instruct',\n",
1144
+ " 'Release Date': ''},\n",
1145
+ " {'key': 'claude-3-sonnet-20240229',\n",
1146
+ " 'Model': 'Claude 3 Sonnet',\n",
1147
+ " 'Release Date': ''},\n",
1148
+ " {'key': 'command-r-plus', 'Model': 'Command R+', 'Release Date': ''},\n",
1149
+ " {'key': 'gpt-4-0314', 'Model': 'GPT-4-0314', 'Release Date': ''},\n",
1150
+ " {'key': 'claude-3-haiku-20240307',\n",
1151
+ " 'Model': 'Claude 3 Haiku',\n",
1152
+ " 'Release Date': ''},\n",
1153
+ " {'key': 'gpt-4-0613', 'Model': 'GPT-4-0613', 'Release Date': ''},\n",
1154
+ " {'key': 'mistral-large-2402',\n",
1155
+ " 'Model': 'Mistral-Large-2402',\n",
1156
+ " 'Release Date': ''},\n",
1157
+ " {'key': 'qwen1.5-72b-chat', 'Model': 'Qwen1.5-72B-Chat', 'Release Date': ''},\n",
1158
+ " {'key': 'reka-flash-21b-20240226-online',\n",
1159
+ " 'Model': 'Reka-Flash-21B-online',\n",
1160
+ " 'Release Date': ''},\n",
1161
+ " {'key': 'claude-1', 'Model': 'Claude-1', 'Release Date': ''},\n",
1162
+ " {'key': 'reka-flash-21b-20240226',\n",
1163
+ " 'Model': 'Reka-Flash-21B',\n",
1164
+ " 'Release Date': ''},\n",
1165
+ " {'key': 'command-r', 'Model': 'Command R', 'Release Date': ''},\n",
1166
+ " {'key': 'mistral-medium', 'Model': 'Mistral Medium', 'Release Date': ''},\n",
1167
+ " {'key': 'mixtral-8x22b-instruct-v0.1',\n",
1168
+ " 'Model': 'Mixtral-8x22b-Instruct-v0.1',\n",
1169
+ " 'Release Date': ''},\n",
1170
+ " {'key': 'llama-3-8b-instruct',\n",
1171
+ " 'Model': 'Llama-3-8b-Instruct',\n",
1172
+ " 'Release Date': ''},\n",
1173
+ " {'key': 'gemini-pro-dev-api',\n",
1174
+ " 'Model': 'Gemini Pro (Dev API)',\n",
1175
+ " 'Release Date': ''},\n",
1176
+ " {'key': 'qwen1.5-32b-chat', 'Model': 'Qwen1.5-32B-Chat', 'Release Date': ''},\n",
1177
+ " {'key': 'claude-2.0', 'Model': 'Claude-2.0', 'Release Date': ''},\n",
1178
+ " {'key': 'mistral-next', 'Model': 'Mistral-Next', 'Release Date': ''},\n",
1179
+ " {'key': 'zephyr-orpo-141b-A35b-v0.1',\n",
1180
+ " 'Model': 'Zephyr-ORPO-141b-A35b-v0.1',\n",
1181
+ " 'Release Date': ''},\n",
1182
+ " {'key': 'gpt-3.5-turbo-0613',\n",
1183
+ " 'Model': 'GPT-3.5-Turbo-0613',\n",
1184
+ " 'Release Date': ''},\n",
1185
+ " {'key': 'claude-2.1', 'Model': 'Claude-2.1', 'Release Date': ''},\n",
1186
+ " {'key': 'qwen1.5-14b-chat', 'Model': 'Qwen1.5-14B-Chat', 'Release Date': ''},\n",
1187
+ " {'key': 'starling-lm-7b-beta',\n",
1188
+ " 'Model': 'Starling-LM-7B-beta',\n",
1189
+ " 'Release Date': ''},\n",
1190
+ " {'key': 'gemini-pro', 'Model': 'Gemini Pro', 'Release Date': ''},\n",
1191
+ " {'key': 'mixtral-8x7b-instruct-v0.1',\n",
1192
+ " 'Model': 'Mixtral-8x7b-Instruct-v0.1',\n",
1193
+ " 'Release Date': ''},\n",
1194
+ " {'key': 'claude-instant-1', 'Model': 'Claude-Instant-1', 'Release Date': ''},\n",
1195
+ " {'key': 'yi-34b-chat', 'Model': 'Yi-34B-Chat', 'Release Date': ''},\n",
1196
+ " {'key': 'gpt-3.5-turbo-0314',\n",
1197
+ " 'Model': 'GPT-3.5-Turbo-0314',\n",
1198
+ " 'Release Date': ''},\n",
1199
+ " {'key': 'wizardlm-70b', 'Model': 'WizardLM-70B-v1.0', 'Release Date': ''},\n",
1200
+ " {'key': 'gpt-3.5-turbo-0125',\n",
1201
+ " 'Model': 'GPT-3.5-Turbo-0125',\n",
1202
+ " 'Release Date': ''},\n",
1203
+ " {'key': 'tulu-2-dpo-70b', 'Model': 'Tulu-2-DPO-70B', 'Release Date': ''},\n",
1204
+ " {'key': 'dbrx-instruct-preview',\n",
1205
+ " 'Model': 'DBRX-Instruct-Preview',\n",
1206
+ " 'Release Date': ''},\n",
1207
+ " {'key': 'openchat-3.5-0106',\n",
1208
+ " 'Model': 'OpenChat-3.5-0106',\n",
1209
+ " 'Release Date': ''},\n",
1210
+ " {'key': 'vicuna-33b', 'Model': 'Vicuna-33B', 'Release Date': ''},\n",
1211
+ " {'key': 'starling-lm-7b-alpha',\n",
1212
+ " 'Model': 'Starling-LM-7B-alpha',\n",
1213
+ " 'Release Date': ''},\n",
1214
+ " {'key': 'llama-2-70b-chat', 'Model': 'Llama-2-70b-chat', 'Release Date': ''},\n",
1215
+ " {'key': 'nous-hermes-2-mixtral-8x7b-dpo',\n",
1216
+ " 'Model': 'Nous-Hermes-2-Mixtral-8x7B-DPO',\n",
1217
+ " 'Release Date': ''},\n",
1218
+ " {'key': 'gemma-1.1-7b-it', 'Model': 'Gemma-1.1-7B-it', 'Release Date': ''},\n",
1219
+ " {'key': 'llama2-70b-steerlm-chat',\n",
1220
+ " 'Model': 'NV-Llama2-70B-SteerLM-Chat',\n",
1221
+ " 'Release Date': ''},\n",
1222
+ " {'key': 'deepseek-llm-67b-chat',\n",
1223
+ " 'Model': 'DeepSeek-LLM-67B-Chat',\n",
1224
+ " 'Release Date': ''},\n",
1225
+ " {'key': 'openhermes-2.5-mistral-7b',\n",
1226
+ " 'Model': 'OpenHermes-2.5-Mistral-7b',\n",
1227
+ " 'Release Date': ''},\n",
1228
+ " {'key': 'openchat-3.5', 'Model': 'OpenChat-3.5', 'Release Date': ''},\n",
1229
+ " {'key': 'pplx-70b-online', 'Model': 'pplx-70b-online', 'Release Date': ''},\n",
1230
+ " {'key': 'mistral-7b-instruct-v0.2',\n",
1231
+ " 'Model': 'Mistral-7B-Instruct-v0.2',\n",
1232
+ " 'Release Date': ''},\n",
1233
+ " {'key': 'qwen1.5-7b-chat', 'Model': 'Qwen1.5-7B-Chat', 'Release Date': ''},\n",
1234
+ " {'key': 'gpt-3.5-turbo-1106',\n",
1235
+ " 'Model': 'GPT-3.5-Turbo-1106',\n",
1236
+ " 'Release Date': ''},\n",
1237
+ " {'key': 'dolphin-2.2.1-mistral-7b',\n",
1238
+ " 'Model': 'Dolphin-2.2.1-Mistral-7B',\n",
1239
+ " 'Release Date': ''},\n",
1240
+ " {'key': 'solar-10.7b-instruct-v1.0',\n",
1241
+ " 'Model': 'SOLAR-10.7B-Instruct-v1.0',\n",
1242
+ " 'Release Date': ''},\n",
1243
+ " {'key': 'phi-3-mini-128k-instruct',\n",
1244
+ " 'Model': 'Phi-3-Mini-128k-Instruct',\n",
1245
+ " 'Release Date': ''},\n",
1246
+ " {'key': 'wizardlm-13b', 'Model': 'WizardLM-13b-v1.2', 'Release Date': ''},\n",
1247
+ " {'key': 'llama-2-13b-chat', 'Model': 'Llama-2-13b-chat', 'Release Date': ''},\n",
1248
+ " {'key': 'zephyr-7b-beta', 'Model': 'Zephyr-7b-beta', 'Release Date': ''},\n",
1249
+ " {'key': 'codellama-70b-instruct',\n",
1250
+ " 'Model': 'CodeLlama-70B-instruct',\n",
1251
+ " 'Release Date': ''},\n",
1252
+ " {'key': 'mpt-30b-chat', 'Model': 'MPT-30B-chat', 'Release Date': ''},\n",
1253
+ " {'key': 'vicuna-13b', 'Model': 'Vicuna-13B', 'Release Date': ''},\n",
1254
+ " {'key': 'codellama-34b-instruct',\n",
1255
+ " 'Model': 'CodeLlama-34B-instruct',\n",
1256
+ " 'Release Date': ''},\n",
1257
+ " {'key': 'gemma-7b-it', 'Model': 'Gemma-7B-it', 'Release Date': ''},\n",
1258
+ " {'key': 'pplx-7b-online', 'Model': 'pplx-7b-online', 'Release Date': ''},\n",
1259
+ " {'key': 'zephyr-7b-alpha', 'Model': 'Zephyr-7b-alpha', 'Release Date': ''},\n",
1260
+ " {'key': 'llama-2-7b-chat', 'Model': 'Llama-2-7b-chat', 'Release Date': ''},\n",
1261
+ " {'key': 'qwen-14b-chat', 'Model': 'Qwen-14B-Chat', 'Release Date': ''},\n",
1262
+ " {'key': 'falcon-180b-chat', 'Model': 'falcon-180b-chat', 'Release Date': ''},\n",
1263
+ " {'key': 'guanaco-33b', 'Model': 'Guanaco-33B', 'Release Date': ''},\n",
1264
+ " {'key': 'stripedhyena-nous-7b',\n",
1265
+ " 'Model': 'StripedHyena-Nous-7B',\n",
1266
+ " 'Release Date': ''},\n",
1267
+ " {'key': 'olmo-7b-instruct', 'Model': 'OLMo-7B-instruct', 'Release Date': ''},\n",
1268
+ " {'key': 'gemma-1.1-2b-it', 'Model': 'Gemma-1.1-2B-it', 'Release Date': ''},\n",
1269
+ " {'key': 'mistral-7b-instruct',\n",
1270
+ " 'Model': 'Mistral-7B-Instruct-v0.1',\n",
1271
+ " 'Release Date': ''},\n",
1272
+ " {'key': 'palm-2', 'Model': 'PaLM-Chat-Bison-001', 'Release Date': ''},\n",
1273
+ " {'key': 'vicuna-7b', 'Model': 'Vicuna-7B', 'Release Date': ''},\n",
1274
+ " {'key': 'qwen1.5-4b-chat', 'Model': 'Qwen1.5-4B-Chat', 'Release Date': ''},\n",
1275
+ " {'key': 'gemma-2b-it', 'Model': 'Gemma-2B-it', 'Release Date': ''},\n",
1276
+ " {'key': 'koala-13b', 'Model': 'Koala-13B', 'Release Date': ''},\n",
1277
+ " {'key': 'chatglm3-6b', 'Model': 'ChatGLM3-6B', 'Release Date': ''},\n",
1278
+ " {'key': 'gpt4all-13b-snoozy',\n",
1279
+ " 'Model': 'GPT4All-13B-Snoozy',\n",
1280
+ " 'Release Date': ''},\n",
1281
+ " {'key': 'chatglm2-6b', 'Model': 'ChatGLM2-6B', 'Release Date': ''},\n",
1282
+ " {'key': 'mpt-7b-chat', 'Model': 'MPT-7B-Chat', 'Release Date': ''},\n",
1283
+ " {'key': 'RWKV-4-Raven-14B', 'Model': 'RWKV-4-Raven-14B', 'Release Date': ''},\n",
1284
+ " {'key': 'alpaca-13b', 'Model': 'Alpaca-13B', 'Release Date': ''},\n",
1285
+ " {'key': 'oasst-pythia-12b',\n",
1286
+ " 'Model': 'OpenAssistant-Pythia-12B',\n",
1287
+ " 'Release Date': ''},\n",
1288
+ " {'key': 'chatglm-6b', 'Model': 'ChatGLM-6B', 'Release Date': ''},\n",
1289
+ " {'key': 'fastchat-t5-3b', 'Model': 'FastChat-T5-3B', 'Release Date': ''},\n",
1290
+ " {'key': 'stablelm-tuned-alpha-7b',\n",
1291
+ " 'Model': 'StableLM-Tuned-Alpha-7B',\n",
1292
+ " 'Release Date': ''},\n",
1293
+ " {'key': 'dolly-v2-12b', 'Model': 'Dolly-V2-12B', 'Release Date': ''},\n",
1294
+ " {'key': 'llama-13b', 'Model': 'LLaMA-13B', 'Release Date': ''}]"
1295
+ ]
1296
+ },
1297
+ "execution_count": 119,
1298
+ "metadata": {},
1299
+ "output_type": "execute_result"
1300
+ }
1301
+ ],
1302
+ "source": [
1303
+ "t.to_dict(orient=\"records\")"
1304
+ ]
1305
+ },
1306
+ {
1307
+ "cell_type": "markdown",
1308
+ "metadata": {},
1309
+ "source": [
1310
+ "## Build plot"
1311
+ ]
1312
+ },
1313
+ {
1314
+ "cell_type": "markdown",
1315
+ "metadata": {},
1316
+ "source": []
1317
+ }
1318
+ ],
1319
+ "metadata": {
1320
+ "kernelspec": {
1321
+ "display_name": ".venv",
1322
+ "language": "python",
1323
+ "name": "python3"
1324
+ },
1325
+ "language_info": {
1326
+ "codemirror_mode": {
1327
+ "name": "ipython",
1328
+ "version": 3
1329
+ },
1330
+ "file_extension": ".py",
1331
+ "mimetype": "text/x-python",
1332
+ "name": "python",
1333
+ "nbconvert_exporter": "python",
1334
+ "pygments_lexer": "ipython3",
1335
+ "version": "3.10.11"
1336
+ }
1337
+ },
1338
+ "nbformat": 4,
1339
+ "nbformat_minor": 2
1340
+ }
release_date_mapping.json ADDED
@@ -0,0 +1,457 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "key": "gpt-4-turbo-2024-04-09",
4
+ "Model": "GPT-4-Turbo-2024-04-09",
5
+ "Release Date": "2024-04-09"
6
+ },
7
+ {
8
+ "key": "gpt-4-1106-preview",
9
+ "Model": "GPT-4-1106-preview",
10
+ "Release Date": "2024-11-06"
11
+ },
12
+ {
13
+ "key": "claude-3-opus-20240229",
14
+ "Model": "Claude 3 Opus",
15
+ "Release Date": "2024-02-29"
16
+ },
17
+ {
18
+ "key": "gemini-1.5-pro-api-0409-preview",
19
+ "Model": "Gemini 1.5 Pro API-0409-Preview",
20
+ "Release Date": "2024-04-09"
21
+ },
22
+ {
23
+ "key": "gpt-4-0125-preview",
24
+ "Model": "GPT-4-0125-preview",
25
+ "Release Date": "2024-01-25"
26
+ },
27
+ {
28
+ "key": "bard-jan-24-gemini-pro",
29
+ "Model": "Bard (Gemini Pro)",
30
+ "Release Date": "2024-02-01"
31
+ },
32
+ {
33
+ "key": "llama-3-70b-instruct",
34
+ "Model": "Llama-3-70b-Instruct",
35
+ "Release Date": "2024-04-18"
36
+ },
37
+ {
38
+ "key": "claude-3-sonnet-20240229",
39
+ "Model": "Claude 3 Sonnet",
40
+ "Release Date": "2024-02-29"
41
+ },
42
+ {
43
+ "key": "command-r-plus",
44
+ "Model": "Command R+",
45
+ "Release Date": "2024-04-04"
46
+ },
47
+ {
48
+ "key": "gpt-4-0314",
49
+ "Model": "GPT-4-0314",
50
+ "Release Date": "2024-03-14"
51
+ },
52
+ {
53
+ "key": "claude-3-haiku-20240307",
54
+ "Model": "Claude 3 Haiku",
55
+ "Release Date": "2024-03-07"
56
+ },
57
+ {
58
+ "key": "gpt-4-0613",
59
+ "Model": "GPT-4-0613",
60
+ "Release Date": "2023-06-13"
61
+ },
62
+ {
63
+ "key": "mistral-large-2402",
64
+ "Model": "Mistral-Large-2402",
65
+ "Release Date": "2024-02-24"
66
+ },
67
+ {
68
+ "key": "qwen1.5-72b-chat",
69
+ "Model": "Qwen1.5-72B-Chat",
70
+ "Release Date": "2024-02-04"
71
+ },
72
+ {
73
+ "key": "reka-flash-21b-20240226-online",
74
+ "Model": "Reka-Flash-21B-online",
75
+ "Release Date": "2024-02-26"
76
+ },
77
+ {
78
+ "key": "claude-1",
79
+ "Model": "Claude-1",
80
+ "Release Date": "2023-03-14"
81
+ },
82
+ {
83
+ "key": "reka-flash-21b-20240226",
84
+ "Model": "Reka-Flash-21B",
85
+ "Release Date": "2024-02-26"
86
+ },
87
+ {
88
+ "key": "command-r",
89
+ "Model": "Command R",
90
+ "Release Date": "2024-03-11"
91
+ },
92
+ {
93
+ "key": "mistral-medium",
94
+ "Model": "Mistral Medium",
95
+ "Release Date": "2023-12-11"
96
+ },
97
+ {
98
+ "key": "mixtral-8x22b-instruct-v0.1",
99
+ "Model": "Mixtral-8x22b-Instruct-v0.1",
100
+ "Release Date": "2024-04-17"
101
+ },
102
+ {
103
+ "key": "llama-3-8b-instruct",
104
+ "Model": "Llama-3-8b-Instruct",
105
+ "Release Date": "2024-04-18"
106
+ },
107
+ {
108
+ "key": "gemini-pro-dev-api",
109
+ "Model": "Gemini Pro (Dev API)",
110
+ "Release Date": "2023-12-13"
111
+ },
112
+ {
113
+ "key": "qwen1.5-32b-chat",
114
+ "Model": "Qwen1.5-32B-Chat",
115
+ "Release Date": "2024-02-04"
116
+ },
117
+ {
118
+ "key": "claude-2.0",
119
+ "Model": "Claude-2.0",
120
+ "Release Date": "2023-07-11"
121
+ },
122
+ {
123
+ "key": "mistral-next",
124
+ "Model": "Mistral-Next",
125
+ "Release Date": "2024-02-17"
126
+ },
127
+ {
128
+ "key": "zephyr-orpo-141b-A35b-v0.1",
129
+ "Model": "Zephyr-ORPO-141b-A35b-v0.1",
130
+ "Release Date": "2024-04-12"
131
+ },
132
+ {
133
+ "key": "gpt-3.5-turbo-0613",
134
+ "Model": "GPT-3.5-Turbo-0613",
135
+ "Release Date": "2023-06-13"
136
+ },
137
+ {
138
+ "key": "claude-2.1",
139
+ "Model": "Claude-2.1",
140
+ "Release Date": "2023-11-21"
141
+ },
142
+ {
143
+ "key": "qwen1.5-14b-chat",
144
+ "Model": "Qwen1.5-14B-Chat",
145
+ "Release Date": "2024-02-04"
146
+ },
147
+ {
148
+ "key": "starling-lm-7b-beta",
149
+ "Model": "Starling-LM-7B-beta",
150
+ "Release Date": "2024-03-20"
151
+ },
152
+ {
153
+ "key": "gemini-pro",
154
+ "Model": "Gemini Pro",
155
+ "Release Date": "2023-12-13"
156
+ },
157
+ {
158
+ "key": "mixtral-8x7b-instruct-v0.1",
159
+ "Model": "Mixtral-8x7b-Instruct-v0.1",
160
+ "Release Date": "2023-12-11"
161
+ },
162
+ {
163
+ "key": "claude-instant-1",
164
+ "Model": "Claude-Instant-1",
165
+ "Release Date": "2023-03-14"
166
+ },
167
+ {
168
+ "key": "yi-34b-chat",
169
+ "Model": "Yi-34B-Chat",
170
+ "Release Date": "2024-01-23"
171
+ },
172
+ {
173
+ "key": "gpt-3.5-turbo-0314",
174
+ "Model": "GPT-3.5-Turbo-0314",
175
+ "Release Date": "2024-03-14"
176
+ },
177
+ {
178
+ "key": "wizardlm-70b",
179
+ "Model": "WizardLM-70B-v1.0",
180
+ "Release Date": "2023-08-09"
181
+ },
182
+ {
183
+ "key": "gpt-3.5-turbo-0125",
184
+ "Model": "GPT-3.5-Turbo-0125",
185
+ "Release Date": "2024-01-25"
186
+ },
187
+ {
188
+ "key": "tulu-2-dpo-70b",
189
+ "Model": "Tulu-2-DPO-70B",
190
+ "Release Date": "2023-11-12"
191
+ },
192
+ {
193
+ "key": "dbrx-instruct-preview",
194
+ "Model": "DBRX-Instruct-Preview",
195
+ "Release Date": "2024-03-27"
196
+ },
197
+ {
198
+ "key": "openchat-3.5-0106",
199
+ "Model": "OpenChat-3.5-0106",
200
+ "Release Date": "2024-01-06"
201
+ },
202
+ {
203
+ "key": "vicuna-33b",
204
+ "Model": "Vicuna-33B",
205
+ "Release Date": "2023-06-21"
206
+ },
207
+ {
208
+ "key": "starling-lm-7b-alpha",
209
+ "Model": "Starling-LM-7B-alpha",
210
+ "Release Date": "2023-11-25"
211
+ },
212
+ {
213
+ "key": "llama-2-70b-chat",
214
+ "Model": "Llama-2-70b-chat",
215
+ "Release Date": "2023-07-18"
216
+ },
217
+ {
218
+ "key": "nous-hermes-2-mixtral-8x7b-dpo",
219
+ "Model": "Nous-Hermes-2-Mixtral-8x7B-DPO",
220
+ "Release Date": "2024-01-13"
221
+ },
222
+ {
223
+ "key": "gemma-1.1-7b-it",
224
+ "Model": "Gemma-1.1-7B-it",
225
+ "Release Date": "2024-04-09"
226
+ },
227
+ {
228
+ "key": "llama2-70b-steerlm-chat",
229
+ "Model": "NV-Llama2-70B-SteerLM-Chat",
230
+ "Release Date": "2023-11-24"
231
+ },
232
+ {
233
+ "key": "deepseek-llm-67b-chat",
234
+ "Model": "DeepSeek-LLM-67B-Chat",
235
+ "Release Date": "2023-11-29"
236
+ },
237
+ {
238
+ "key": "openhermes-2.5-mistral-7b",
239
+ "Model": "OpenHermes-2.5-Mistral-7b",
240
+ "Release Date": "2023-10-29"
241
+ },
242
+ {
243
+ "key": "openchat-3.5",
244
+ "Model": "OpenChat-3.5",
245
+ "Release Date": "2023-11-16"
246
+ },
247
+ {
248
+ "key": "pplx-70b-online",
249
+ "Model": "pplx-70b-online",
250
+ "Release Date": "2023-11-29"
251
+ },
252
+ {
253
+ "key": "mistral-7b-instruct-v0.2",
254
+ "Model": "Mistral-7B-Instruct-v0.2",
255
+ "Release Date": "2023-12-11"
256
+ },
257
+ {
258
+ "key": "qwen1.5-7b-chat",
259
+ "Model": "Qwen1.5-7B-Chat",
260
+ "Release Date": "2024-02-04"
261
+ },
262
+ {
263
+ "key": "gpt-3.5-turbo-1106",
264
+ "Model": "GPT-3.5-Turbo-1106",
265
+ "Release Date": "2023-11-06"
266
+ },
267
+ {
268
+ "key": "dolphin-2.2.1-mistral-7b",
269
+ "Model": "Dolphin-2.2.1-Mistral-7B",
270
+ "Release Date": "2023-10-30"
271
+ },
272
+ {
273
+ "key": "solar-10.7b-instruct-v1.0",
274
+ "Model": "SOLAR-10.7B-Instruct-v1.0",
275
+ "Release Date": "2023-12-13"
276
+ },
277
+ {
278
+ "key": "phi-3-mini-128k-instruct",
279
+ "Model": "Phi-3-Mini-128k-Instruct",
280
+ "Release Date": "2024-04-23"
281
+ },
282
+ {
283
+ "key": "wizardlm-13b",
284
+ "Model": "WizardLM-13b-v1.2",
285
+ "Release Date": "2023-07-25"
286
+ },
287
+ {
288
+ "key": "llama-2-13b-chat",
289
+ "Model": "Llama-2-13b-chat",
290
+ "Release Date": "2023-07-18"
291
+ },
292
+ {
293
+ "key": "zephyr-7b-beta",
294
+ "Model": "Zephyr-7b-beta",
295
+ "Release Date": "2023-10-26"
296
+ },
297
+ {
298
+ "key": "codellama-70b-instruct",
299
+ "Model": "CodeLlama-70B-instruct",
300
+ "Release Date": "2024-01-29"
301
+ },
302
+ {
303
+ "key": "mpt-30b-chat",
304
+ "Model": "MPT-30B-chat",
305
+ "Release Date": "2023-06-09"
306
+ },
307
+ {
308
+ "key": "vicuna-13b",
309
+ "Model": "Vicuna-13B",
310
+ "Release Date": "2023-07-23"
311
+ },
312
+ {
313
+ "key": "codellama-34b-instruct",
314
+ "Model": "CodeLlama-34B-instruct",
315
+ "Release Date": "2023-08-24"
316
+ },
317
+ {
318
+ "key": "gemma-7b-it",
319
+ "Model": "Gemma-7B-it",
320
+ "Release Date": "2024-02-21"
321
+ },
322
+ {
323
+ "key": "pplx-7b-online",
324
+ "Model": "pplx-7b-online",
325
+ "Release Date": "2023-11-29"
326
+ },
327
+ {
328
+ "key": "zephyr-7b-alpha",
329
+ "Model": "Zephyr-7b-alpha",
330
+ "Release Date": "2023-10-09"
331
+ },
332
+ {
333
+ "key": "llama-2-7b-chat",
334
+ "Model": "Llama-2-7b-chat",
335
+ "Release Date": "2023-07-18"
336
+ },
337
+ {
338
+ "key": "qwen-14b-chat",
339
+ "Model": "Qwen-14B-Chat",
340
+ "Release Date": "2023-09-24"
341
+ },
342
+ {
343
+ "key": "falcon-180b-chat",
344
+ "Model": "falcon-180b-chat",
345
+ "Release Date": "2023-09-05"
346
+ },
347
+ {
348
+ "key": "guanaco-33b",
349
+ "Model": "Guanaco-33B",
350
+ "Release Date": "2023-05-22"
351
+ },
352
+ {
353
+ "key": "stripedhyena-nous-7b",
354
+ "Model": "StripedHyena-Nous-7B",
355
+ "Release Date": "2023-12-07"
356
+ },
357
+ {
358
+ "key": "olmo-7b-instruct",
359
+ "Model": "OLMo-7B-instruct",
360
+ "Release Date": "2024-02-23"
361
+ },
362
+ {
363
+ "key": "gemma-1.1-2b-it",
364
+ "Model": "Gemma-1.1-2B-it",
365
+ "Release Date": "2024-04-09"
366
+ },
367
+ {
368
+ "key": "mistral-7b-instruct",
369
+ "Model": "Mistral-7B-Instruct-v0.1",
370
+ "Release Date": "2023-09-27"
371
+ },
372
+ {
373
+ "key": "palm-2",
374
+ "Model": "PaLM-Chat-Bison-001",
375
+ "Release Date": "2023-07-10"
376
+ },
377
+ {
378
+ "key": "vicuna-7b",
379
+ "Model": "Vicuna-7B",
380
+ "Release Date": "2023-07-29"
381
+ },
382
+ {
383
+ "key": "qwen1.5-4b-chat",
384
+ "Model": "Qwen1.5-4B-Chat",
385
+ "Release Date": "2024-02-04"
386
+ },
387
+ {
388
+ "key": "gemma-2b-it",
389
+ "Model": "Gemma-2B-it",
390
+ "Release Date": "2024-02-21"
391
+ },
392
+ {
393
+ "key": "koala-13b",
394
+ "Model": "Koala-13B",
395
+ "Release Date": "2023-04-03"
396
+ },
397
+ {
398
+ "key": "chatglm3-6b",
399
+ "Model": "ChatGLM3-6B",
400
+ "Release Date": "2023-10-25"
401
+ },
402
+ {
403
+ "key": "gpt4all-13b-snoozy",
404
+ "Model": "GPT4All-13B-Snoozy",
405
+ "Release Date": "2023-04-24"
406
+ },
407
+ {
408
+ "key": "chatglm2-6b",
409
+ "Model": "ChatGLM2-6B",
410
+ "Release Date": "2023-06-25"
411
+ },
412
+ {
413
+ "key": "mpt-7b-chat",
414
+ "Model": "MPT-7B-Chat",
415
+ "Release Date": "2023-05-04"
416
+ },
417
+ {
418
+ "key": "RWKV-4-Raven-14B",
419
+ "Model": "RWKV-4-Raven-14B",
420
+ "Release Date": "2023-05-22"
421
+ },
422
+ {
423
+ "key": "alpaca-13b",
424
+ "Model": "Alpaca-13B",
425
+ "Release Date": "2023-03-13"
426
+ },
427
+ {
428
+ "key": "oasst-pythia-12b",
429
+ "Model": "OpenAssistant-Pythia-12B",
430
+ "Release Date": "2023-04-03"
431
+ },
432
+ {
433
+ "key": "chatglm-6b",
434
+ "Model": "ChatGLM-6B",
435
+ "Release Date": "2023-03-13"
436
+ },
437
+ {
438
+ "key": "fastchat-t5-3b",
439
+ "Model": "FastChat-T5-3B",
440
+ "Release Date": "2023-04-27"
441
+ },
442
+ {
443
+ "key": "stablelm-tuned-alpha-7b",
444
+ "Model": "StableLM-Tuned-Alpha-7B",
445
+ "Release Date": "2023-04-19"
446
+ },
447
+ {
448
+ "key": "dolly-v2-12b",
449
+ "Model": "Dolly-V2-12B",
450
+ "Release Date": "2023-04-12"
451
+ },
452
+ {
453
+ "key": "llama-13b",
454
+ "Model": "LLaMA-13B",
455
+ "Release Date": "2023-02-27"
456
+ }
457
+ ]
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ huggingface_hub
2
+ pandas
3
+ plotly
4
+ gradio