app.py CHANGED
@@ -12,7 +12,6 @@ import pandas as pd
12
  # notebook_url = "https://colab.research.google.com/drive/1RAWb22-PFNI-X1gPVzc927SGUdfr6nsR?usp=sharing"
13
  notebook_url = "https://colab.research.google.com/drive/1KdwokPjirkTmpO_P1WByFNFiqxWQquwH#scrollTo=o_CpbkGEbhrK"
14
 
15
-
16
  basic_component_values = [None] * 6
17
  leader_component_values = [None] * 5
18
 
@@ -34,14 +33,26 @@ We've collected over **500,000** human preference votes to rank LLMs with the El
34
  def make_arena_leaderboard_md(arena_df):
35
  total_votes = sum(arena_df["num_battles"]) // 2
36
  total_models = len(arena_df)
37
-
38
  leaderboard_md = f"""
39
- Total #models: **{total_models}**. Total #votes: **{total_votes}**. Last updated: March 29, 2024.
40
 
41
- Contribute your vote πŸ—³οΈ at [chat.lmsys.org](https://chat.lmsys.org)! Find more analysis in the [notebook]({notebook_url}).
 
 
42
  """
43
  return leaderboard_md
44
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  def make_full_leaderboard_md(elo_results):
47
  leaderboard_md = f"""
@@ -202,52 +213,131 @@ def get_full_table(arena_df, model_table_df):
202
  values.sort(key=lambda x: -x[1] if not np.isnan(x[1]) else 1e9)
203
  return values
204
 
205
-
206
- def get_arena_table(arena_df, model_table_df):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  # sort by rating
208
- arena_df = arena_df.sort_values(by=["rating"], ascending=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
  values = []
210
  for i in range(len(arena_df)):
211
  row = []
212
  model_key = arena_df.index[i]
213
- model_name = model_table_df[model_table_df["key"] == model_key]["Model"].values[
214
- 0
215
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
 
217
- # rank
218
- ranking = arena_df.iloc[i].get("final_ranking") or i+1
219
- row.append(ranking)
220
- # model display name
221
- row.append(model_name)
222
- # elo rating
223
- row.append(round(arena_df.iloc[i]["rating"]))
224
- upper_diff = round(
225
- arena_df.iloc[i]["rating_q975"] - arena_df.iloc[i]["rating"]
226
- )
227
- lower_diff = round(
228
- arena_df.iloc[i]["rating"] - arena_df.iloc[i]["rating_q025"]
229
- )
230
- row.append(f"+{upper_diff}/-{lower_diff}")
231
- # num battles
232
- row.append(round(arena_df.iloc[i]["num_battles"]))
233
- # Organization
234
- row.append(
235
- model_table_df[model_table_df["key"] == model_key]["Organization"].values[0]
236
- )
237
- # license
238
- row.append(
239
- model_table_df[model_table_df["key"] == model_key]["License"].values[0]
240
- )
241
 
242
- cutoff_date = model_table_df[model_table_df["key"] == model_key]["Knowledge cutoff date"].values[0]
243
- if cutoff_date == "-":
244
- row.append("Unknown")
245
- else:
246
- row.append(cutoff_date)
247
- values.append(row)
248
- return values
249
 
250
  def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=False):
 
 
251
  if elo_results_file is None: # Do live update
252
  default_md = "Loading ..."
253
  p1 = p2 = p3 = p4 = None
@@ -255,14 +345,19 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
255
  with open(elo_results_file, "rb") as fin:
256
  elo_results = pickle.load(fin)
257
  if "full" in elo_results:
258
- elo_results = elo_results["full"]
259
-
260
- p1 = elo_results["win_fraction_heatmap"]
261
- p2 = elo_results["battle_count_heatmap"]
262
- p3 = elo_results["bootstrap_elo_rating"]
263
- p4 = elo_results["average_win_rate_bar"]
264
- arena_df = elo_results["leaderboard_table_df"]
265
- default_md = make_default_md(arena_df, elo_results)
 
 
 
 
 
266
 
267
  md_1 = gr.Markdown(default_md, elem_id="leaderboard_markdown")
268
  if leaderboard_table_file:
@@ -274,8 +369,15 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
274
  arena_table_vals = get_arena_table(arena_df, model_table_df)
275
  with gr.Tab("Arena Elo", id=0):
276
  md = make_arena_leaderboard_md(arena_df)
277
- gr.Markdown(md, elem_id="leaderboard_markdown")
278
- gr.Dataframe(
 
 
 
 
 
 
 
279
  headers=[
280
  "Rank",
281
  "πŸ€– Model",
@@ -287,7 +389,7 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
287
  "Knowledge Cutoff",
288
  ],
289
  datatype=[
290
- "str",
291
  "markdown",
292
  "number",
293
  "str",
@@ -299,9 +401,48 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
299
  value=arena_table_vals,
300
  elem_id="arena_leaderboard_dataframe",
301
  height=700,
302
- column_widths=[50, 200, 120, 100, 100, 150, 150, 100],
303
  wrap=True,
304
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
  with gr.Tab("Full Leaderboard", id=1):
306
  md = make_full_leaderboard_md(elo_results)
307
  gr.Markdown(md, elem_id="leaderboard_markdown")
@@ -332,53 +473,121 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
332
  else:
333
  pass
334
 
335
- gr.Markdown(
336
- f"""Note: we take the 95% confidence interval into account when determining a model's ranking.
337
- A model is ranked higher only if its lower bound of model score is higher than the upper bound of the other model's score.
338
- See Figure 3 below for visualization of the confidence intervals.
339
- """,
340
- elem_id="leaderboard_markdown"
341
- )
342
-
343
- leader_component_values[:] = [default_md, p1, p2, p3, p4]
344
-
345
- if show_plot:
346
- gr.Markdown(
347
- f"""## More Statistics for Chatbot Arena\n
348
- Below are figures for more statistics. The code for generating them is also included in this [notebook]({notebook_url}).
349
- You can find more discussions in this blog [post](https://lmsys.org/blog/2023-12-07-leaderboard/).
350
- """,
351
- elem_id="leaderboard_markdown"
352
- )
353
- with gr.Row():
354
- with gr.Column():
355
- gr.Markdown(
356
- "#### Figure 1: Fraction of Model A Wins for All Non-tied A vs. B Battles"
357
- )
358
- plot_1 = gr.Plot(p1, show_label=False)
359
- with gr.Column():
360
- gr.Markdown(
361
- "#### Figure 2: Battle Count for Each Combination of Models (without Ties)"
362
- )
363
- plot_2 = gr.Plot(p2, show_label=False)
364
- with gr.Row():
365
- with gr.Column():
366
- gr.Markdown(
367
- "#### Figure 3: Confidence Intervals on Model Strength (via Bootstrapping)"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
368
  )
369
- plot_3 = gr.Plot(p3, show_label=False)
370
- with gr.Column():
371
- gr.Markdown(
372
- "#### Figure 4: Average Win Rate Against All Other Models (Assuming Uniform Sampling and No Ties)"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
373
  )
374
- plot_4 = gr.Plot(p4, show_label=False)
375
 
376
- gr.Markdown(acknowledgment_md)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
377
 
378
  if show_plot:
379
  return [md_1, plot_1, plot_2, plot_3, plot_4]
380
  return [md_1]
381
 
 
382
  block_css = """
383
  #notice_markdown {
384
  font-size: 104%
@@ -390,6 +599,13 @@ block_css = """
390
  padding-top: 6px;
391
  padding-bottom: 6px;
392
  }
 
 
 
 
 
 
 
393
  #leaderboard_markdown {
394
  font-size: 104%
395
  }
@@ -397,9 +613,34 @@ block_css = """
397
  padding-top: 6px;
398
  padding-bottom: 6px;
399
  }
 
 
 
 
 
 
 
400
  #leaderboard_dataframe td {
401
  line-height: 0.1em;
402
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
403
  footer {
404
  display:none !important
405
  }
@@ -429,10 +670,13 @@ We thank [Kaggle](https://www.kaggle.com/), [MBZUAI](https://mbzuai.ac.ae/), [a1
429
 
430
  def build_demo(elo_results_file, leaderboard_table_file):
431
  text_size = gr.themes.sizes.text_lg
432
-
 
 
433
  with gr.Blocks(
434
  title="Chatbot Arena Leaderboard",
435
- theme=gr.themes.Base(text_size=text_size),
 
436
  css=block_css,
437
  ) as demo:
438
  leader_components = build_leaderboard_tab(
@@ -444,6 +688,8 @@ def build_demo(elo_results_file, leaderboard_table_file):
444
  if __name__ == "__main__":
445
  parser = argparse.ArgumentParser()
446
  parser.add_argument("--share", action="store_true")
 
 
447
  args = parser.parse_args()
448
 
449
  elo_result_files = glob.glob("elo_results_*.pkl")
@@ -455,4 +701,4 @@ if __name__ == "__main__":
455
  leaderboard_table_file = leaderboard_table_files[-1]
456
 
457
  demo = build_demo(elo_result_file, leaderboard_table_file)
458
- demo.launch(share=args.share)
 
12
  # notebook_url = "https://colab.research.google.com/drive/1RAWb22-PFNI-X1gPVzc927SGUdfr6nsR?usp=sharing"
13
  notebook_url = "https://colab.research.google.com/drive/1KdwokPjirkTmpO_P1WByFNFiqxWQquwH#scrollTo=o_CpbkGEbhrK"
14
 
 
15
  basic_component_values = [None] * 6
16
  leader_component_values = [None] * 5
17
 
 
33
  def make_arena_leaderboard_md(arena_df):
34
  total_votes = sum(arena_df["num_battles"]) // 2
35
  total_models = len(arena_df)
36
+ space = "   "
37
  leaderboard_md = f"""
38
+ Total #models: **{total_models}**.{space} Total #votes: **{"{:,}".format(total_votes)}**.{space} Last updated: April 11, 2024.
39
 
40
+ πŸ“£ **NEW!** View leaderboard for different categories (e.g., coding, long user query)!
41
+
42
+ Code to recreate leaderboard tables and plots in this [notebook]({notebook_url}). Cast your vote πŸ—³οΈ at [chat.lmsys.org](https://chat.lmsys.org)!
43
  """
44
  return leaderboard_md
45
 
46
+ def make_category_arena_leaderboard_md(arena_df, arena_subset_df, name="Overall"):
47
+ total_votes = sum(arena_df["num_battles"]) // 2
48
+ total_models = len(arena_df)
49
+ space = "   "
50
+ total_subset_votes = sum(arena_subset_df["num_battles"]) // 2
51
+ total_subset_models = len(arena_subset_df)
52
+ leaderboard_md = f"""### {cat_name_to_explanation[name]}
53
+ #### [Coverage] {space} #models: **{total_subset_models} ({round(total_subset_models/total_models *100)}%)** {space} #votes: **{"{:,}".format(total_subset_votes)} ({round(total_subset_votes/total_votes * 100)}%)**{space}
54
+ """
55
+ return leaderboard_md
56
 
57
  def make_full_leaderboard_md(elo_results):
58
  leaderboard_md = f"""
 
213
  values.sort(key=lambda x: -x[1] if not np.isnan(x[1]) else 1e9)
214
  return values
215
 
216
+ def create_ranking_str(ranking, ranking_difference):
217
+ if ranking_difference > 0:
218
+ # return f"{int(ranking)} (\u2191{int(ranking_difference)})"
219
+ return f"{int(ranking)} \u2191"
220
+ elif ranking_difference < 0:
221
+ # return f"{int(ranking)} (\u2193{int(-ranking_difference)})"
222
+ return f"{int(ranking)} \u2193"
223
+ else:
224
+ return f"{int(ranking)}"
225
+
226
+ def recompute_final_ranking(arena_df):
227
+ # compute ranking based on CI
228
+ ranking = {}
229
+ for i, model_a in enumerate(arena_df.index):
230
+ ranking[model_a] = 1
231
+ for j, model_b in enumerate(arena_df.index):
232
+ if i == j:
233
+ continue
234
+ if arena_df.loc[model_b]["rating_q025"] > arena_df.loc[model_a]["rating_q975"]:
235
+ ranking[model_a] += 1
236
+ return list(ranking.values())
237
+
238
+ def get_arena_table(arena_df, model_table_df, arena_subset_df=None):
239
+ arena_df = arena_df.sort_values(by=["final_ranking", "rating"], ascending=[True, False])
240
+ arena_df = arena_df[arena_df["num_battles"] > 2000]
241
+ arena_df["final_ranking"] = recompute_final_ranking(arena_df)
242
+ arena_df = arena_df.sort_values(by=["final_ranking"], ascending=True)
243
+
244
+ # arena_df["final_ranking"] = range(1, len(arena_df) + 1)
245
  # sort by rating
246
+ if arena_subset_df is not None:
247
+ # filter out models not in the arena_df
248
+ arena_subset_df = arena_subset_df[arena_subset_df.index.isin(arena_df.index)]
249
+ arena_subset_df = arena_subset_df.sort_values(by=["rating"], ascending=False)
250
+ # arena_subset_df = arena_subset_df.sort_values(by=["final_ranking"], ascending=True)
251
+ # arena_subset_df = arena_subset_df[arena_subset_df["num_battles"] > 500]
252
+ arena_subset_df["final_ranking"] = recompute_final_ranking(arena_subset_df)
253
+ # keep only the models in the subset in arena_df and recompute final_ranking
254
+ arena_df = arena_df[arena_df.index.isin(arena_subset_df.index)]
255
+ # recompute final ranking
256
+ arena_df["final_ranking"] = recompute_final_ranking(arena_df)
257
+
258
+ # assign ranking by the order
259
+ arena_subset_df["final_ranking_no_tie"] = range(1, len(arena_subset_df) + 1)
260
+ arena_df["final_ranking_no_tie"] = range(1, len(arena_df) + 1)
261
+ # join arena_df and arena_subset_df on index
262
+ arena_df = arena_subset_df.join(arena_df["final_ranking"], rsuffix="_global", how="inner")
263
+ arena_df["ranking_difference"] = arena_df["final_ranking_global"] - arena_df["final_ranking"]
264
+
265
+ # no tie version
266
+ # arena_df = arena_subset_df.join(arena_df["final_ranking_no_tie"], rsuffix="_global", how="inner")
267
+ # arena_df["ranking_difference"] = arena_df["final_ranking_no_tie_global"] - arena_df["final_ranking_no_tie"]
268
+
269
+ arena_df = arena_df.sort_values(by=["final_ranking", "rating"], ascending=[True, False])
270
+ arena_df["final_ranking"] = arena_df.apply(lambda x: create_ranking_str(x["final_ranking"], x["ranking_difference"]), axis=1)
271
+
272
  values = []
273
  for i in range(len(arena_df)):
274
  row = []
275
  model_key = arena_df.index[i]
276
+ try: # this is a janky fix for where the model key is not in the model table (model table and arena table dont contain all the same models)
277
+ model_name = model_table_df[model_table_df["key"] == model_key]["Model"].values[
278
+ 0
279
+ ]
280
+ # rank
281
+ ranking = arena_df.iloc[i].get("final_ranking") or i+1
282
+ row.append(ranking)
283
+ if arena_subset_df is not None:
284
+ row.append(arena_df.iloc[i].get("ranking_difference") or 0)
285
+ # model display name
286
+ row.append(model_name)
287
+ # elo rating
288
+ row.append(round(arena_df.iloc[i]["rating"]))
289
+ upper_diff = round(
290
+ arena_df.iloc[i]["rating_q975"] - arena_df.iloc[i]["rating"]
291
+ )
292
+ lower_diff = round(
293
+ arena_df.iloc[i]["rating"] - arena_df.iloc[i]["rating_q025"]
294
+ )
295
+ row.append(f"+{upper_diff}/-{lower_diff}")
296
+ # num battles
297
+ row.append(round(arena_df.iloc[i]["num_battles"]))
298
+ # Organization
299
+ row.append(
300
+ model_table_df[model_table_df["key"] == model_key]["Organization"].values[0]
301
+ )
302
+ # license
303
+ row.append(
304
+ model_table_df[model_table_df["key"] == model_key]["License"].values[0]
305
+ )
306
+ cutoff_date = model_table_df[model_table_df["key"] == model_key]["Knowledge cutoff date"].values[0]
307
+ if cutoff_date == "-":
308
+ row.append("Unknown")
309
+ else:
310
+ row.append(cutoff_date)
311
+ values.append(row)
312
+ except Exception as e:
313
+ print(f"{model_key} - {e}")
314
+ return values
315
 
316
+ key_to_category_name = {
317
+ "full": "Overall",
318
+ "coding": "Coding",
319
+ "long_user": "Longer Query",
320
+ "english": "English",
321
+ "chinese": "Chinese",
322
+ "french": "French",
323
+ "no_tie": "Exclude Ties",
324
+ "no_short": "Exclude Short",
325
+ }
326
+ cat_name_to_explanation = {
327
+ "Overall": "Overall Questions",
328
+ "Coding": "Coding: whether conversation contains code snippets",
329
+ "Longer Query": "Longer Query (>= 500 tokens)",
330
+ "English": "English Prompts",
331
+ "Chinese": "Chinese Prompts",
332
+ "French": "French Prompts",
333
+ "Exclude Ties": "Exclude Ties and Bothbad",
334
+ "Exclude Short": "User Query >= 5 tokens",
335
+ }
 
 
 
 
336
 
 
 
 
 
 
 
 
337
 
338
  def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=False):
339
+ arena_dfs = {}
340
+ category_elo_results = {}
341
  if elo_results_file is None: # Do live update
342
  default_md = "Loading ..."
343
  p1 = p2 = p3 = p4 = None
 
345
  with open(elo_results_file, "rb") as fin:
346
  elo_results = pickle.load(fin)
347
  if "full" in elo_results:
348
+ print("KEYS ", elo_results.keys())
349
+ for k in elo_results.keys():
350
+ if k not in key_to_category_name:
351
+ continue
352
+ arena_dfs[key_to_category_name[k]] = elo_results[k]["leaderboard_table_df"]
353
+ category_elo_results[key_to_category_name[k]] = elo_results[k]
354
+
355
+ p1 = category_elo_results["Overall"]["win_fraction_heatmap"]
356
+ p2 = category_elo_results["Overall"]["battle_count_heatmap"]
357
+ p3 = category_elo_results["Overall"]["bootstrap_elo_rating"]
358
+ p4 = category_elo_results["Overall"]["average_win_rate_bar"]
359
+ arena_df = arena_dfs["Overall"]
360
+ default_md = make_default_md(arena_df, category_elo_results["Overall"])
361
 
362
  md_1 = gr.Markdown(default_md, elem_id="leaderboard_markdown")
363
  if leaderboard_table_file:
 
369
  arena_table_vals = get_arena_table(arena_df, model_table_df)
370
  with gr.Tab("Arena Elo", id=0):
371
  md = make_arena_leaderboard_md(arena_df)
372
+ leaderboard_markdown = gr.Markdown(md, elem_id="leaderboard_markdown")
373
+ with gr.Row():
374
+ with gr.Column(scale=2):
375
+ category_dropdown = gr.Dropdown(choices=list(arena_dfs.keys()), label="Category", value="Overall")
376
+ default_category_details = make_category_arena_leaderboard_md(arena_df, arena_df, name="Overall")
377
+ with gr.Column(scale=4, variant="panel"):
378
+ category_deets = gr.Markdown(default_category_details, elem_id="category_deets")
379
+
380
+ elo_display_df = gr.Dataframe(
381
  headers=[
382
  "Rank",
383
  "πŸ€– Model",
 
389
  "Knowledge Cutoff",
390
  ],
391
  datatype=[
392
+ "number",
393
  "markdown",
394
  "number",
395
  "str",
 
401
  value=arena_table_vals,
402
  elem_id="arena_leaderboard_dataframe",
403
  height=700,
404
+ column_widths=[70, 190, 110, 100, 90, 160, 150, 140],
405
  wrap=True,
406
  )
407
+
408
+ gr.Markdown(
409
+ f"""Note: we take the 95% confidence interval into account when determining a model's ranking.
410
+ A model is ranked higher only if its lower bound of model score is higher than the upper bound of the other model's score.
411
+ See Figure 3 below for visualization of the confidence intervals. More details in [notebook]({notebook_url}).
412
+ """,
413
+ elem_id="leaderboard_markdown"
414
+ )
415
+
416
+ leader_component_values[:] = [default_md, p1, p2, p3, p4]
417
+
418
+ if show_plot:
419
+ more_stats_md = gr.Markdown(
420
+ f"""## More Statistics for Chatbot Arena (Overall)""",
421
+ elem_id="leaderboard_header_markdown"
422
+ )
423
+ with gr.Row():
424
+ with gr.Column():
425
+ gr.Markdown(
426
+ "#### Figure 1: Fraction of Model A Wins for All Non-tied A vs. B Battles", elem_id="plot-title"
427
+ )
428
+ plot_1 = gr.Plot(p1, show_label=False, elem_id="plot-container")
429
+ with gr.Column():
430
+ gr.Markdown(
431
+ "#### Figure 2: Battle Count for Each Combination of Models (without Ties)", elem_id="plot-title"
432
+ )
433
+ plot_2 = gr.Plot(p2, show_label=False)
434
+ with gr.Row():
435
+ with gr.Column():
436
+ gr.Markdown(
437
+ "#### Figure 3: Confidence Intervals on Model Strength (via Bootstrapping)", elem_id="plot-title"
438
+ )
439
+ plot_3 = gr.Plot(p3, show_label=False)
440
+ with gr.Column():
441
+ gr.Markdown(
442
+ "#### Figure 4: Average Win Rate Against All Other Models (Assuming Uniform Sampling and No Ties)", elem_id="plot-title"
443
+ )
444
+ plot_4 = gr.Plot(p4, show_label=False)
445
+
446
  with gr.Tab("Full Leaderboard", id=1):
447
  md = make_full_leaderboard_md(elo_results)
448
  gr.Markdown(md, elem_id="leaderboard_markdown")
 
473
  else:
474
  pass
475
 
476
+ def update_leaderboard_df(arena_table_vals):
477
+ elo_datarame = pd.DataFrame(arena_table_vals, columns=[ "Rank", "Delta", "πŸ€– Model", "⭐ Arena Elo", "πŸ“Š 95% CI", "πŸ—³οΈ Votes", "Organization", "License", "Knowledge Cutoff"])
478
+
479
+ # goal: color the rows based on the rank with styler
480
+ def highlight_max(s):
481
+ # all items in S which contain up arrow should be green, down arrow should be red, otherwise black
482
+ return ["color: green; font-weight: bold" if "\u2191" in v else "color: red; font-weight: bold" if "\u2193" in v else "" for v in s]
483
+
484
+ def highlight_rank_max(s):
485
+ return ["color: green; font-weight: bold" if v > 0 else "color: red; font-weight: bold" if v < 0 else "" for v in s]
486
+
487
+ return elo_datarame.style.apply(highlight_max, subset=["Rank"]).apply(highlight_rank_max, subset=["Delta"])
488
+
489
+ def update_leaderboard_and_plots(category):
490
+ arena_subset_df = arena_dfs[category]
491
+ arena_subset_df = arena_subset_df[arena_subset_df["num_battles"] > 500]
492
+ elo_subset_results = category_elo_results[category]
493
+ arena_df = arena_dfs["Overall"]
494
+ arena_values = get_arena_table(arena_df, model_table_df, arena_subset_df = arena_subset_df if category != "Overall" else None)
495
+ if category != "Overall":
496
+ arena_values = update_leaderboard_df(arena_values)
497
+ arena_values = gr.Dataframe(
498
+ headers=[
499
+ "Rank",
500
+ "Delta",
501
+ "πŸ€– Model",
502
+ "⭐ Arena Elo",
503
+ "πŸ“Š 95% CI",
504
+ "πŸ—³οΈ Votes",
505
+ "Organization",
506
+ "License",
507
+ "Knowledge Cutoff",
508
+ ],
509
+ datatype=[
510
+ "number",
511
+ "number",
512
+ "markdown",
513
+ "number",
514
+ "str",
515
+ "number",
516
+ "str",
517
+ "str",
518
+ "str",
519
+ ],
520
+ value=arena_values,
521
+ elem_id="arena_leaderboard_dataframe",
522
+ height=700,
523
+ column_widths=[60, 70, 190, 110, 100, 90, 160, 150, 140],
524
+ wrap=True,
525
  )
526
+ else:
527
+ arena_values = gr.Dataframe(
528
+ headers=[
529
+ "Rank",
530
+ "πŸ€– Model",
531
+ "⭐ Arena Elo",
532
+ "πŸ“Š 95% CI",
533
+ "πŸ—³οΈ Votes",
534
+ "Organization",
535
+ "License",
536
+ "Knowledge Cutoff",
537
+ ],
538
+ datatype=[
539
+ "number",
540
+ "markdown",
541
+ "number",
542
+ "str",
543
+ "number",
544
+ "str",
545
+ "str",
546
+ "str",
547
+ ],
548
+ value=arena_values,
549
+ elem_id="arena_leaderboard_dataframe",
550
+ height=700,
551
+ column_widths=[70, 190, 110, 100, 90, 160, 150, 140],
552
+ wrap=True,
553
  )
 
554
 
555
+ p1 = elo_subset_results["win_fraction_heatmap"]
556
+ p2 = elo_subset_results["battle_count_heatmap"]
557
+ p3 = elo_subset_results["bootstrap_elo_rating"]
558
+ p4 = elo_subset_results["average_win_rate_bar"]
559
+ more_stats_md = f"""## More Statistics for Chatbot Arena - {category}
560
+ """
561
+ leaderboard_md = make_category_arena_leaderboard_md(arena_df, arena_subset_df, name=category)
562
+ return arena_values, p1, p2, p3, p4, more_stats_md, leaderboard_md
563
+
564
+ category_dropdown.change(update_leaderboard_and_plots, inputs=[category_dropdown], outputs=[elo_display_df, plot_1, plot_2, plot_3, plot_4, more_stats_md, category_deets])
565
+
566
+ with gr.Accordion(
567
+ "πŸ“ Citation",
568
+ open=True,
569
+ ):
570
+ citation_md = """
571
+ ### Citation
572
+ Please cite the following paper if you find our leaderboard or dataset helpful.
573
+ ```
574
+ @misc{chiang2024chatbot,
575
+ title={Chatbot Arena: An Open Platform for Evaluating LLMs by Human Preference},
576
+ author={Wei-Lin Chiang and Lianmin Zheng and Ying Sheng and Anastasios Nikolas Angelopoulos and Tianle Li and Dacheng Li and Hao Zhang and Banghua Zhu and Michael Jordan and Joseph E. Gonzalez and Ion Stoica},
577
+ year={2024},
578
+ eprint={2403.04132},
579
+ archivePrefix={arXiv},
580
+ primaryClass={cs.AI}
581
+ }
582
+ """
583
+ gr.Markdown(citation_md, elem_id="leaderboard_markdown")
584
+ gr.Markdown(acknowledgment_md)
585
 
586
  if show_plot:
587
  return [md_1, plot_1, plot_2, plot_3, plot_4]
588
  return [md_1]
589
 
590
+
591
  block_css = """
592
  #notice_markdown {
593
  font-size: 104%
 
599
  padding-top: 6px;
600
  padding-bottom: 6px;
601
  }
602
+
603
+ #category_deets {
604
+ text-align: center;
605
+ padding: 0px;
606
+ padding-left: 5px;
607
+ }
608
+
609
  #leaderboard_markdown {
610
  font-size: 104%
611
  }
 
613
  padding-top: 6px;
614
  padding-bottom: 6px;
615
  }
616
+
617
+ #leaderboard_header_markdown {
618
+ font-size: 104%;
619
+ text-align: center;
620
+ display:block;
621
+ }
622
+
623
  #leaderboard_dataframe td {
624
  line-height: 0.1em;
625
  }
626
+
627
+ #plot-title {
628
+ text-align: center;
629
+ display:block;
630
+ }
631
+
632
+ #non-interactive-button {
633
+ display: inline-block;
634
+ padding: 10px 10px;
635
+ background-color: #f7f7f7; /* Super light grey background */
636
+ text-align: center;
637
+ font-size: 26px; /* Larger text */
638
+ border-radius: 0; /* Straight edges, no border radius */
639
+ border: 0px solid #dcdcdc; /* A light grey border to match the background */
640
+ user-select: none; /* The text inside the button is not selectable */
641
+ pointer-events: none; /* The button is non-interactive */
642
+ }
643
+
644
  footer {
645
  display:none !important
646
  }
 
670
 
671
  def build_demo(elo_results_file, leaderboard_table_file):
672
  text_size = gr.themes.sizes.text_lg
673
+ theme = gr.themes.Base(text_size=text_size)
674
+ theme.set(button_secondary_background_fill_hover="*primary_300",
675
+ button_secondary_background_fill_hover_dark="*primary_700")
676
  with gr.Blocks(
677
  title="Chatbot Arena Leaderboard",
678
+ theme=theme,
679
+ # theme = gr.themes.Base.load("theme.json"), # uncomment to use new cool theme
680
  css=block_css,
681
  ) as demo:
682
  leader_components = build_leaderboard_tab(
 
688
  if __name__ == "__main__":
689
  parser = argparse.ArgumentParser()
690
  parser.add_argument("--share", action="store_true")
691
+ parser.add_argument("--host", default="0.0.0.0")
692
+ parser.add_argument("--port", type=int, default=7860)
693
  args = parser.parse_args()
694
 
695
  elo_result_files = glob.glob("elo_results_*.pkl")
 
701
  leaderboard_table_file = leaderboard_table_files[-1]
702
 
703
  demo = build_demo(elo_result_file, leaderboard_table_file)
704
+ demo.launch(share=args.share, server_name=args.host, server_port=args.port)
elo_results_20240329.pkl β†’ elo_results_20240327.pkl RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f4c037f68c9ddbf27b70b1cb333ca37bf70ff9a3cddad7a93cd62bca709cd77
3
- size 115776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bab4e9fa00e9d7c8244723993174af2c4f35ffc8487cc3059504b72658f06f43
3
+ size 457743
elo_results_20240403.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce8cebf41da8c06eee0f37156e01be83cc43182e0f00444311b4ad97a83154be
3
+ size 690286
elo_results_20240409.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6774f780b63f569666e9a85b12eddceef3af75e1d1799ff7c6e0529102950c3
3
+ size 119947
elo_results_20240410.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a5757ab9692c6121451f2c787700507fe6b866837329ab0a47a9003a274338f
3
+ size 120963
elo_results_20240411.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fada8d86ddb6dae319c5bda602d921859cc4280fdd53388eff446d80c3ab8192
3
+ size 1183214
leaderboard_table_20240329.csv β†’ leaderboard_table_20240404.csv RENAMED
@@ -88,4 +88,7 @@ codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,
88
  olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct
89
  claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family
90
  starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta
91
- command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere AI,https://txt.cohere.com/command-r
 
 
 
 
88
  olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct
89
  claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family
90
  starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta
91
+ dbrx-instruct,DBRX-instruct,-,-,2024/3,Apache-2.0,Databricks,-
92
+ command-r,Command R,-,-,2024/3,Apache-2.0,Cohere,-
93
+ qwen1.5-14b-chat,Qwen1.5-14B-Chat,-,-,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/
94
+ qwen1.5-32b-chat,Qwen1.5-32B-Chat,-,-,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/
leaderboard_table_20240409.csv ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link
2
+ wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0
3
+ vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k
4
+ wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1
5
+ tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b
6
+ guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged
7
+ openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor
8
+ wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0
9
+ vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k
10
+ baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b
11
+ xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst
12
+ nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b
13
+ mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct
14
+ falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct
15
+ h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b
16
+ gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday
17
+ gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4
18
+ claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude
19
+ gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo
20
+ claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2
21
+ claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1
22
+ gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5
23
+ mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/
24
+ claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude
25
+ gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5
26
+ tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b
27
+ yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat
28
+ gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/
29
+ gemini-pro-dev-api,Gemini Pro (Dev API),-,0.718,2023/4,Proprietary,Google,https://ai.google.dev/docs/gemini_api_overview
30
+ bard-jan-24-gemini-pro,Bard (Gemini Pro),-,-,Online,Proprietary,Google,https://bard.google.com/
31
+ wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0
32
+ vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3
33
+ starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha
34
+ pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms
35
+ openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5
36
+ openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B
37
+ gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5
38
+ llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
39
+ solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0
40
+ dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b
41
+ wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2
42
+ zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta
43
+ mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat
44
+ vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5
45
+ qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat
46
+ zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha
47
+ codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf
48
+ falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat
49
+ guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged
50
+ llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf
51
+ mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1
52
+ pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms
53
+ llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf
54
+ vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5
55
+ palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models
56
+ koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/
57
+ chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b
58
+ gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy
59
+ mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat
60
+ chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b
61
+ RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven
62
+ alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html
63
+ oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5
64
+ chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b
65
+ fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0
66
+ stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b
67
+ dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b
68
+ llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971
69
+ mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/
70
+ llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat
71
+ stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B
72
+ deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat
73
+ gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday
74
+ qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/
75
+ qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/
76
+ qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/
77
+ openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106
78
+ nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO
79
+ gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo
80
+ mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat
81
+ mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/
82
+ gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it
83
+ gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it
84
+ mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2
85
+ claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family
86
+ claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family
87
+ codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf
88
+ olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct
89
+ claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family
90
+ starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta
91
+ command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r
92
+ qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/
93
+ qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/
94
+ command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/
95
+ gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it
leaderboard_table_20240410.csv ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link
2
+ wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0
3
+ vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k
4
+ wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1
5
+ tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b
6
+ guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged
7
+ openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor
8
+ wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0
9
+ vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k
10
+ baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b
11
+ xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst
12
+ nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b
13
+ mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct
14
+ falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct
15
+ h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b
16
+ gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday
17
+ gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4
18
+ claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude
19
+ gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo
20
+ claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2
21
+ claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1
22
+ gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5
23
+ mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/
24
+ claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude
25
+ gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5
26
+ tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b
27
+ yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat
28
+ gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/
29
+ gemini-pro-dev-api,Gemini Pro (Dev API),-,0.718,2023/4,Proprietary,Google,https://ai.google.dev/docs/gemini_api_overview
30
+ bard-jan-24-gemini-pro,Bard (Gemini Pro),-,-,Online,Proprietary,Google,https://bard.google.com/
31
+ wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0
32
+ vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3
33
+ starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha
34
+ pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms
35
+ openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5
36
+ openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B
37
+ gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5
38
+ llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
39
+ solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0
40
+ dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b
41
+ wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2
42
+ zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta
43
+ mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat
44
+ vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5
45
+ qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat
46
+ zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha
47
+ codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf
48
+ falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat
49
+ guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged
50
+ llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf
51
+ mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1
52
+ pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms
53
+ llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf
54
+ vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5
55
+ palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models
56
+ koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/
57
+ chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b
58
+ gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy
59
+ mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat
60
+ chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b
61
+ RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven
62
+ alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html
63
+ oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5
64
+ chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b
65
+ fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0
66
+ stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b
67
+ dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b
68
+ llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971
69
+ mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/
70
+ llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat
71
+ stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B
72
+ deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat
73
+ gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday
74
+ qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/
75
+ qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/
76
+ qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/
77
+ openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106
78
+ nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO
79
+ gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo
80
+ mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat
81
+ mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/
82
+ gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it
83
+ gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it
84
+ mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2
85
+ claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family
86
+ claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family
87
+ codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf
88
+ olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct
89
+ claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family
90
+ starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta
91
+ command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r
92
+ qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/
93
+ qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/
94
+ command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/
95
+ gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it
96
+ dbrx-instruct,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm
leaderboard_table_20240411.csv ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ key,Model,MT-bench (score),MMLU,Knowledge cutoff date,License,Organization,Link
2
+ wizardlm-30b,WizardLM-30B,7.01,0.587,2023/6,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-30B-V1.0
3
+ vicuna-13b-16k,Vicuna-13B-16k,6.92,0.545,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5-16k
4
+ wizardlm-13b-v1.1,WizardLM-13B-v1.1,6.76,0.500,2023/7,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.1
5
+ tulu-30b,Tulu-30B,6.43,0.581,2023/6,Non-commercial,AllenAI/UW,https://huggingface.co/allenai/tulu-30b
6
+ guanaco-65b,Guanaco-65B,6.41,0.621,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-65b-merged
7
+ openassistant-llama-30b,OpenAssistant-LLaMA-30B,6.41,0.560,2023/4,Non-commercial,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-6-llama-30b-xor
8
+ wizardlm-13b-v1.0,WizardLM-13B-v1.0,6.35,0.523,2023/5,Non-commercial,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.0
9
+ vicuna-7b-16k,Vicuna-7B-16k,6.22,0.485,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5-16k
10
+ baize-v2-13b,Baize-v2-13B,5.75,0.489,2023/4,Non-commercial,UCSD,https://huggingface.co/project-baize/baize-v2-13b
11
+ xgen-7b-8k-inst,XGen-7B-8K-Inst,5.55,0.421,2023/7,Non-commercial,Salesforce,https://huggingface.co/Salesforce/xgen-7b-8k-inst
12
+ nous-hermes-13b,Nous-Hermes-13B,5.51,0.493,2023/6,Non-commercial,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-13b
13
+ mpt-30b-instruct,MPT-30B-Instruct,5.22,0.478,2023/6,CC-BY-SA 3.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-instruct
14
+ falcon-40b-instruct,Falcon-40B-Instruct,5.17,0.547,2023/5,Apache 2.0,TII,https://huggingface.co/tiiuae/falcon-40b-instruct
15
+ h2o-oasst-openllama-13b,H2O-Oasst-OpenLLaMA-13B,4.63,0.428,2023/6,Apache 2.0,h2oai,https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b
16
+ gpt-4-1106-preview,GPT-4-1106-preview,9.32,-,2023/4,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday
17
+ gpt-4-0314,GPT-4-0314,8.96,0.864,2021/9,Proprietary,OpenAI,https://openai.com/research/gpt-4
18
+ claude-1,Claude-1,7.90,0.770,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude
19
+ gpt-4-0613,GPT-4-0613,9.18,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo
20
+ claude-2.0,Claude-2.0,8.06,0.785,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2
21
+ claude-2.1,Claude-2.1,8.18,-,-,Proprietary,Anthropic,https://www.anthropic.com/index/claude-2-1
22
+ gpt-3.5-turbo-0613,GPT-3.5-Turbo-0613,8.39,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5
23
+ mixtral-8x7b-instruct-v0.1,Mixtral-8x7b-Instruct-v0.1,8.30,0.706,2023/12,Apache 2.0,Mistral,https://mistral.ai/news/mixtral-of-experts/
24
+ claude-instant-1,Claude-Instant-1,7.85,0.734,-,Proprietary,Anthropic,https://www.anthropic.com/index/introducing-claude
25
+ gpt-3.5-turbo-0314,GPT-3.5-Turbo-0314,7.94,0.700,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5
26
+ tulu-2-dpo-70b,Tulu-2-DPO-70B,7.89,-,2023/11,AI2 ImpACT Low-risk,AllenAI/UW,https://huggingface.co/allenai/tulu-2-dpo-70b
27
+ yi-34b-chat,Yi-34B-Chat,-,0.735,2023/6,Yi License,01 AI,https://huggingface.co/01-ai/Yi-34B-Chat
28
+ gemini-pro,Gemini Pro,-,0.718,2023/4,Proprietary,Google,https://blog.google/technology/ai/gemini-api-developers-cloud/
29
+ gemini-pro-dev-api,Gemini Pro (Dev API),-,0.718,2023/4,Proprietary,Google,https://ai.google.dev/docs/gemini_api_overview
30
+ bard-jan-24-gemini-pro,Bard (Gemini Pro),-,-,Online,Proprietary,Google,https://bard.google.com/
31
+ wizardlm-70b,WizardLM-70B-v1.0,7.71,0.637,2023/8,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-70B-V1.0
32
+ vicuna-33b,Vicuna-33B,7.12,0.592,2023/8,Non-commercial,LMSYS,https://huggingface.co/lmsys/vicuna-33b-v1.3
33
+ starling-lm-7b-alpha,Starling-LM-7B-alpha,8.09,0.639,2023/11,CC-BY-NC-4.0,UC Berkeley,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha
34
+ pplx-70b-online,pplx-70b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms
35
+ openchat-3.5,OpenChat-3.5,7.81,0.643,2023/11,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat_3.5
36
+ openhermes-2.5-mistral-7b,OpenHermes-2.5-Mistral-7b,-,-,2023/11,Apache-2.0,NousResearch,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B
37
+ gpt-3.5-turbo-1106,GPT-3.5-Turbo-1106,8.32,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5
38
+ llama-2-70b-chat,Llama-2-70b-chat,6.86,0.630,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
39
+ solar-10.7b-instruct-v1.0,SOLAR-10.7B-Instruct-v1.0,7.58,0.662,2023/11,CC-BY-NC-4.0,Upstage AI,https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0
40
+ dolphin-2.2.1-mistral-7b,Dolphin-2.2.1-Mistral-7B,-,-,2023/10,Apache-2.0,Cognitive Computations,https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b
41
+ wizardlm-13b,WizardLM-13b-v1.2,7.20,0.527,2023/7,Llama 2 Community,Microsoft,https://huggingface.co/WizardLM/WizardLM-13B-V1.2
42
+ zephyr-7b-beta,Zephyr-7b-beta,7.34,0.614,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-beta
43
+ mpt-30b-chat,MPT-30B-chat,6.39,0.504,2023/6,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-30b-chat
44
+ vicuna-13b,Vicuna-13B,6.57,0.558,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-13b-v1.5
45
+ qwen-14b-chat,Qwen-14B-Chat,6.96,0.665,2023/8,Qianwen LICENSE,Alibaba,https://huggingface.co/Qwen/Qwen-14B-Chat
46
+ zephyr-7b-alpha,Zephyr-7b-alpha,6.88,-,2023/10,MIT,HuggingFace,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha
47
+ codellama-34b-instruct,CodeLlama-34B-instruct,-,0.537,2023/7,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf
48
+ falcon-180b-chat,falcon-180b-chat,-,0.680,2023/9,Falcon-180B TII License,TII,https://huggingface.co/tiiuae/falcon-180B-chat
49
+ guanaco-33b,Guanaco-33B,6.53,0.576,2023/5,Non-commercial,UW,https://huggingface.co/timdettmers/guanaco-33b-merged
50
+ llama-2-13b-chat,Llama-2-13b-chat,6.65,0.536,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-13b-chat-hf
51
+ mistral-7b-instruct,Mistral-7B-Instruct-v0.1,6.84,0.554,2023/9,Apache 2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1
52
+ pplx-7b-online,pplx-7b-online,-,-,Online,Proprietary,Perplexity AI,https://blog.perplexity.ai/blog/introducing-pplx-online-llms
53
+ llama-2-7b-chat,Llama-2-7b-chat,6.27,0.458,2023/7,Llama 2 Community,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf
54
+ vicuna-7b,Vicuna-7B,6.17,0.498,2023/7,Llama 2 Community,LMSYS,https://huggingface.co/lmsys/vicuna-7b-v1.5
55
+ palm-2,PaLM-Chat-Bison-001,6.40,-,2021/6,Proprietary,Google,https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models#foundation_models
56
+ koala-13b,Koala-13B,5.35,0.447,2023/4,Non-commercial,UC Berkeley,https://bair.berkeley.edu/blog/2023/04/03/koala/
57
+ chatglm3-6b,ChatGLM3-6B,-,-,2023/10,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm3-6b
58
+ gpt4all-13b-snoozy,GPT4All-13B-Snoozy,5.41,0.430,2023/3,Non-commercial,Nomic AI,https://huggingface.co/nomic-ai/gpt4all-13b-snoozy
59
+ mpt-7b-chat,MPT-7B-Chat,5.42,0.320,2023/5,CC-BY-NC-SA-4.0,MosaicML,https://huggingface.co/mosaicml/mpt-7b-chat
60
+ chatglm2-6b,ChatGLM2-6B,4.96,0.455,2023/6,Apache-2.0,Tsinghua,https://huggingface.co/THUDM/chatglm2-6b
61
+ RWKV-4-Raven-14B,RWKV-4-Raven-14B,3.98,0.256,2023/4,Apache 2.0,RWKV,https://huggingface.co/BlinkDL/rwkv-4-raven
62
+ alpaca-13b,Alpaca-13B,4.53,0.481,2023/3,Non-commercial,Stanford,https://crfm.stanford.edu/2023/03/13/alpaca.html
63
+ oasst-pythia-12b,OpenAssistant-Pythia-12B,4.32,0.270,2023/4,Apache 2.0,OpenAssistant,https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5
64
+ chatglm-6b,ChatGLM-6B,4.50,0.361,2023/3,Non-commercial,Tsinghua,https://huggingface.co/THUDM/chatglm-6b
65
+ fastchat-t5-3b,FastChat-T5-3B,3.04,0.477,2023/4,Apache 2.0,LMSYS,https://huggingface.co/lmsys/fastchat-t5-3b-v1.0
66
+ stablelm-tuned-alpha-7b,StableLM-Tuned-Alpha-7B,2.75,0.244,2023/4,CC-BY-NC-SA-4.0,Stability AI,https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b
67
+ dolly-v2-12b,Dolly-V2-12B,3.28,0.257,2023/4,MIT,Databricks,https://huggingface.co/databricks/dolly-v2-12b
68
+ llama-13b,LLaMA-13B,2.61,0.470,2023/2,Non-commercial,Meta,https://arxiv.org/abs/2302.13971
69
+ mistral-medium,Mistral Medium,8.61,0.753,-,Proprietary,Mistral,https://mistral.ai/news/la-plateforme/
70
+ llama2-70b-steerlm-chat,NV-Llama2-70B-SteerLM-Chat,7.54,0.685,2023/11,Llama 2 Community,Nvidia,https://huggingface.co/nvidia/Llama2-70B-SteerLM-Chat
71
+ stripedhyena-nous-7b,StripedHyena-Nous-7B,-,-,2023/12,Apache 2.0,Together AI,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B
72
+ deepseek-llm-67b-chat,DeepSeek-LLM-67B-Chat,-,0.713,2023/11,DeepSeek License,DeepSeek AI,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat
73
+ gpt-4-0125-preview,GPT-4-0125-preview,-,-,2023/12,Proprietary,OpenAI,https://openai.com/blog/new-models-and-developer-products-announced-at-devday
74
+ qwen1.5-72b-chat,Qwen1.5-72B-Chat,8.61,0.775,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/
75
+ qwen1.5-7b-chat,Qwen1.5-7B-Chat,7.6,0.610,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/
76
+ qwen1.5-4b-chat,Qwen1.5-4B-Chat,-,0.561,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/
77
+ openchat-3.5-0106,OpenChat-3.5-0106,7.8,0.658,2024/1,Apache-2.0,OpenChat,https://huggingface.co/openchat/openchat-3.5-0106
78
+ nous-hermes-2-mixtral-8x7b-dpo,Nous-Hermes-2-Mixtral-8x7B-DPO,-,-,2024/1,Apache-2.0,NousResearch,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO
79
+ gpt-3.5-turbo-0125,GPT-3.5-Turbo-0125,-,-,2021/9,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-3-5-turbo
80
+ mistral-next,Mistral-Next,-,-,-,Proprietary,Mistral,https://chat.mistral.ai/chat
81
+ mistral-large-2402,Mistral-Large-2402,-,0.812,-,Proprietary,Mistral,https://mistral.ai/news/mistral-large/
82
+ gemma-7b-it,Gemma-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-7b-it
83
+ gemma-2b-it,Gemma-2B-it,-,0.423,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-2b-it
84
+ mistral-7b-instruct-v0.2,Mistral-7B-Instruct-v0.2,7.6,-,2023/12,Apache-2.0,Mistral,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2
85
+ claude-3-sonnet-20240229,Claude 3 Sonnet,-,0.790,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family
86
+ claude-3-opus-20240229,Claude 3 Opus,-,0.868,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family
87
+ codellama-70b-instruct,CodeLlama-70B-instruct,-,-,2024/1,Llama 2 Community,Meta,https://huggingface.co/codellama/CodeLlama-70b-hf
88
+ olmo-7b-instruct,OLMo-7B-instruct,-,-,2024/2,Apache-2.0,Allen AI,https://huggingface.co/allenai/OLMo-7B-Instruct
89
+ claude-3-haiku-20240307,Claude 3 Haiku,-,0.752,2023/8,Proprietary,Anthropic,https://www.anthropic.com/news/claude-3-family
90
+ starling-lm-7b-beta,Starling-LM-7B-beta,8.12,-,2024/3,Apache-2.0,Nexusflow,https://huggingface.co/Nexusflow/Starling-LM-7B-beta
91
+ command-r,Command R,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r
92
+ qwen1.5-14b-chat,Qwen1.5-14B-Chat,7.91,0.676,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5/
93
+ qwen1.5-32b-chat,Qwen1.5-32B-Chat,8.30,0.734,2024/2,Qianwen LICENSE,Alibaba,https://qwenlm.github.io/blog/qwen1.5-32b/
94
+ command-r-plus,Command R+,-,-,2024/3,CC-BY-NC-4.0,Cohere,https://txt.cohere.com/command-r-plus-microsoft-azure/
95
+ gemma-1.1-7b-it,Gemma-1.1-7B-it,-,0.643,2024/2,Gemma license,Google,https://huggingface.co/google/gemma-1.1-7b-it
96
+ dbrx-instruct-preview,DBRX-Instruct-Preview,-,0.737,2023/12,DBRX LICENSE,Databricks,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm
97
+ gpt-4-turbo-2024-04-09,GPT-4-Turbo-2024-04-09,-,-,2023/12,Proprietary,OpenAI,https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4
theme.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"theme": {"_font": [{"__gradio_font__": true, "name": "Rubik", "class": "google"}, {"__gradio_font__": true, "name": "ui-sans-serif", "class": "font"}, {"__gradio_font__": true, "name": "system-ui", "class": "font"}, {"__gradio_font__": true, "name": "sans-serif", "class": "font"}], "_font_mono": [{"__gradio_font__": true, "name": "Inconsolata", "class": "google"}, {"__gradio_font__": true, "name": "ui-monospace", "class": "font"}, {"__gradio_font__": true, "name": "Consolas", "class": "font"}, {"__gradio_font__": true, "name": "monospace", "class": "font"}], "_stylesheets": ["https://fonts.googleapis.com/css2?family=Rubik:wght@400;500&display=swap", "https://fonts.googleapis.com/css2?family=Inconsolata:wght@400;500&display=swap"], "text_size": "20px", "background_fill_primary": "white", "background_fill_primary_dark": "*neutral_950", "background_fill_secondary": "*neutral_50", "background_fill_secondary_dark": "*neutral_900", "block_background_fill": "*background_fill_primary", "block_background_fill_dark": "*neutral_800", "block_border_color": "*border_color_primary", "block_border_color_dark": "*border_color_primary", "block_border_width": "1px", "block_border_width_dark": "1px", "block_info_text_color": "*body_text_color_subdued", "block_info_text_color_dark": "*body_text_color_subdued", "block_info_text_size": "*text_sm", "block_info_text_weight": "400", "block_label_background_fill": "*background_fill_primary", "block_label_background_fill_dark": "*background_fill_secondary", "block_label_border_color": "*border_color_primary", "block_label_border_color_dark": "*border_color_primary", "block_label_border_width": "1px", "block_label_border_width_dark": "1px", "block_label_margin": "0", "block_label_padding": "*spacing_sm *spacing_lg", "block_label_radius": "calc(*radius_lg - 1px) 0 calc(*radius_lg - 1px) 0", "block_label_right_radius": "0 calc(*radius_lg - 1px) 0 calc(*radius_lg - 1px)", "block_label_shadow": "*block_shadow", "block_label_text_color": "*neutral_500", "block_label_text_color_dark": "*neutral_200", "block_label_text_size": "*text_sm", "block_label_text_weight": "400", "block_padding": "*spacing_xl calc(*spacing_xl + 2px)", "block_radius": "*radius_lg", "block_shadow": "none", "block_shadow_dark": "none", "block_title_background_fill": "none", "block_title_background_fill_dark": "none", "block_title_border_color": "none", "block_title_border_color_dark": "none", "block_title_border_width": "0px", "block_title_border_width_dark": "0px", "block_title_padding": "0", "block_title_radius": "none", "block_title_text_color": "*neutral_500", "block_title_text_color_dark": "*neutral_200", "block_title_text_size": "*text_md", "block_title_text_weight": "400", "body_background_fill": "*background_fill_primary", "body_background_fill_dark": "*background_fill_primary", "body_text_color": "*neutral_700", "body_text_color_dark": "*neutral_200", "body_text_color_subdued": "*neutral_400", "body_text_color_subdued_dark": "*neutral_500", "body_text_size": "*text_md", "body_text_weight": "400", "border_color_accent": "*primary_300", "border_color_accent_dark": "*neutral_600", "border_color_primary": "*neutral_200", "border_color_primary_dark": "*neutral_700", "button_border_width": "*input_border_width", "button_border_width_dark": "*input_border_width", "button_cancel_background_fill": "*button_secondary_background_fill", "button_cancel_background_fill_dark": "*button_secondary_background_fill", "button_cancel_background_fill_hover": "*button_cancel_background_fill", "button_cancel_background_fill_hover_dark": "*button_cancel_background_fill", "button_cancel_border_color": "*button_secondary_border_color", "button_cancel_border_color_dark": "*button_secondary_border_color", "button_cancel_border_color_hover": "*button_cancel_border_color", "button_cancel_border_color_hover_dark": "*button_cancel_border_color", "button_cancel_text_color": "*button_secondary_text_color", "button_cancel_text_color_dark": "*button_secondary_text_color", "button_cancel_text_color_hover": "*button_cancel_text_color", "button_cancel_text_color_hover_dark": "*button_cancel_text_color", "button_large_padding": "*spacing_lg calc(2 * *spacing_lg)", "button_large_radius": "*radius_lg", "button_large_text_size": "*text_lg", "button_large_text_weight": "500", "button_primary_background_fill": "*primary_200", "button_primary_background_fill_dark": "*primary_700", "button_primary_background_fill_hover": "*button_primary_background_fill", "button_primary_background_fill_hover_dark": "*button_primary_background_fill", "button_primary_border_color": "*primary_200", "button_primary_border_color_dark": "*primary_600", "button_primary_border_color_hover": "*button_primary_border_color", "button_primary_border_color_hover_dark": "*button_primary_border_color", "button_primary_text_color": "*primary_600", "button_primary_text_color_dark": "white", "button_primary_text_color_hover": "*button_primary_text_color", "button_primary_text_color_hover_dark": "*button_primary_text_color", "button_secondary_background_fill": "*neutral_200", "button_secondary_background_fill_dark": "*neutral_600", "button_secondary_background_fill_hover": "*neutral_300", "button_secondary_background_fill_hover_dark": "*neutral_500", "button_secondary_border_color": "*neutral_200", "button_secondary_border_color_dark": "*neutral_600", "button_secondary_border_color_hover": "*button_secondary_border_color", "button_secondary_border_color_hover_dark": "*button_secondary_border_color", "button_secondary_text_color": "*neutral_700", "button_secondary_text_color_dark": "white", "button_secondary_text_color_hover": "*button_secondary_text_color", "button_secondary_text_color_hover_dark": "*button_secondary_text_color", "button_shadow": "none", "button_shadow_active": "none", "button_shadow_hover": "none", "button_small_padding": "*spacing_sm calc(2 * *spacing_sm)", "button_small_radius": "*radius_lg", "button_small_text_size": "*text_md", "button_small_text_weight": "400", "button_transition": "background-color 0.2s ease", "checkbox_background_color": "*background_fill_primary", "checkbox_background_color_dark": "*neutral_800", "checkbox_background_color_focus": "*checkbox_background_color", "checkbox_background_color_focus_dark": "*checkbox_background_color", "checkbox_background_color_hover": "*checkbox_background_color", "checkbox_background_color_hover_dark": "*checkbox_background_color", "checkbox_background_color_selected": "*secondary_600", "checkbox_background_color_selected_dark": "*secondary_600", "checkbox_border_color": "*neutral_300", "checkbox_border_color_dark": "*neutral_700", "checkbox_border_color_focus": "*secondary_500", "checkbox_border_color_focus_dark": "*secondary_500", "checkbox_border_color_hover": "*neutral_300", "checkbox_border_color_hover_dark": "*neutral_600", "checkbox_border_color_selected": "*secondary_600", "checkbox_border_color_selected_dark": "*secondary_600", "checkbox_border_radius": "*radius_sm", "checkbox_border_width": "*input_border_width", "checkbox_border_width_dark": "*input_border_width", "checkbox_check": "url(\"data:image/svg+xml,%3csvg viewBox='0 0 16 16' fill='white' xmlns='http://www.w3.org/2000/svg'%3e%3cpath d='M12.207 4.793a1 1 0 010 1.414l-5 5a1 1 0 01-1.414 0l-2-2a1 1 0 011.414-1.414L6.5 9.086l4.293-4.293a1 1 0 011.414 0z'/%3e%3c/svg%3e\")", "checkbox_label_background_fill": "*button_secondary_background_fill", "checkbox_label_background_fill_dark": "*button_secondary_background_fill", "checkbox_label_background_fill_hover": "*button_secondary_background_fill_hover", "checkbox_label_background_fill_hover_dark": "*button_secondary_background_fill_hover", "checkbox_label_background_fill_selected": "*checkbox_label_background_fill", "checkbox_label_background_fill_selected_dark": "*checkbox_label_background_fill", "checkbox_label_border_color": "*border_color_primary", "checkbox_label_border_color_dark": "*border_color_primary", "checkbox_label_border_color_hover": "*checkbox_label_border_color", "checkbox_label_border_color_hover_dark": "*checkbox_label_border_color", "checkbox_label_border_width": "*input_border_width", "checkbox_label_border_width_dark": "*input_border_width", "checkbox_label_gap": "*spacing_lg", "checkbox_label_padding": "*spacing_md calc(2 * *spacing_md)", "checkbox_label_shadow": "none", "checkbox_label_text_color": "*body_text_color", "checkbox_label_text_color_dark": "*body_text_color", "checkbox_label_text_color_selected": "*checkbox_label_text_color", "checkbox_label_text_color_selected_dark": "*checkbox_label_text_color", "checkbox_label_text_size": "*text_md", "checkbox_label_text_weight": "400", "checkbox_shadow": "*input_shadow", "color_accent": "*primary_500", "color_accent_soft": "*primary_50", "color_accent_soft_dark": "*neutral_700", "container_radius": "*radius_lg", "embed_radius": "*radius_md", "error_background_fill": "#fee2e2", "error_background_fill_dark": "*background_fill_primary", "error_border_color": "#fecaca", "error_border_color_dark": "*border_color_primary", "error_border_width": "1px", "error_border_width_dark": "1px", "error_text_color": "#ef4444", "error_text_color_dark": "#ef4444", "font": "'Rubik', 'ui-sans-serif', 'system-ui', sans-serif", "font_mono": "'Inconsolata', 'ui-monospace', 'Consolas', monospace", "form_gap_width": "0px", "input_background_fill": "*neutral_100", "input_background_fill_dark": "*neutral_700", "input_background_fill_focus": "*secondary_500", "input_background_fill_focus_dark": "*secondary_600", "input_background_fill_hover": "*input_background_fill", "input_background_fill_hover_dark": "*input_background_fill", "input_border_color": "*border_color_primary", "input_border_color_dark": "*border_color_primary", "input_border_color_focus": "*secondary_300", "input_border_color_focus_dark": "*neutral_700", "input_border_color_hover": "*input_border_color", "input_border_color_hover_dark": "*input_border_color", "input_border_width": "0px", "input_border_width_dark": "0px", "input_padding": "*spacing_xl", "input_placeholder_color": "*neutral_400", "input_placeholder_color_dark": "*neutral_500", "input_radius": "*radius_lg", "input_shadow": "none", "input_shadow_dark": "none", "input_shadow_focus": "*input_shadow", "input_shadow_focus_dark": "*input_shadow", "input_text_size": "*text_md", "input_text_weight": "400", "layout_gap": "*spacing_xxl", "link_text_color": "*secondary_600", "link_text_color_active": "*secondary_600", "link_text_color_active_dark": "*secondary_500", "link_text_color_dark": "*secondary_500", "link_text_color_hover": "*secondary_700", "link_text_color_hover_dark": "*secondary_400", "link_text_color_visited": "*secondary_500", "link_text_color_visited_dark": "*secondary_600", "loader_color": "*color_accent", "loader_color_dark": "*color_accent", "name": "base", "neutral_100": "#f5f5f4", "neutral_200": "#e7e5e4", "neutral_300": "#d6d3d1", "neutral_400": "#a8a29e", "neutral_50": "#fafaf9", "neutral_500": "#78716c", "neutral_600": "#57534e", "neutral_700": "#44403c", "neutral_800": "#292524", "neutral_900": "#1c1917", "neutral_950": "#0f0e0d", "panel_background_fill": "*background_fill_secondary", "panel_background_fill_dark": "*background_fill_secondary", "panel_border_color": "*border_color_primary", "panel_border_color_dark": "*border_color_primary", "panel_border_width": "0", "panel_border_width_dark": "0", "primary_100": "#e0f2fe", "primary_200": "#bae6fd", "primary_300": "#7dd3fc", "primary_400": "#38bdf8", "primary_50": "#f0f9ff", "primary_500": "#0ea5e9", "primary_600": "#0284c7", "primary_700": "#0369a1", "primary_800": "#075985", "primary_900": "#0c4a6e", "primary_950": "#0b4165", "prose_header_text_weight": "500", "prose_text_size": "*text_md", "prose_text_weight": "400", "radio_circle": "url(\"data:image/svg+xml,%3csvg viewBox='0 0 16 16' fill='white' xmlns='http://www.w3.org/2000/svg'%3e%3ccircle cx='8' cy='8' r='3'/%3e%3c/svg%3e\")", "radius_lg": "3px", "radius_md": "3px", "radius_sm": "3px", "radius_xl": "3px", "radius_xs": "3px", "radius_xxl": "3px", "radius_xxs": "3px", "secondary_100": "#e0f2fe", "secondary_200": "#bae6fd", "secondary_300": "#7dd3fc", "secondary_400": "#38bdf8", "secondary_50": "#f0f9ff", "secondary_500": "#0ea5e9", "secondary_600": "#0284c7", "secondary_700": "#0369a1", "secondary_800": "#075985", "secondary_900": "#0c4a6e", "secondary_950": "#0b4165", "section_header_text_size": "*text_md", "section_header_text_weight": "400", "shadow_drop": "rgba(0,0,0,0.05) 0px 1px 2px 0px", "shadow_drop_lg": "0 1px 3px 0 rgb(0 0 0 / 0.1), 0 1px 2px -1px rgb(0 0 0 / 0.1)", "shadow_inset": "rgba(0,0,0,0.05) 0px 2px 4px 0px inset", "shadow_spread": "3px", "shadow_spread_dark": "1px", "slider_color": "*primary_600", "slider_color_dark": "*primary_600", "spacing_lg": "8px", "spacing_md": "6px", "spacing_sm": "4px", "spacing_xl": "10px", "spacing_xs": "2px", "spacing_xxl": "16px", "spacing_xxs": "1px", "stat_background_fill": "*primary_300", "stat_background_fill_dark": "*primary_500", "table_border_color": "*neutral_300", "table_border_color_dark": "*neutral_700", "table_even_background_fill": "white", "table_even_background_fill_dark": "*neutral_950", "table_odd_background_fill": "*neutral_50", "table_odd_background_fill_dark": "*neutral_900", "table_radius": "*radius_lg", "table_row_focus": "*color_accent_soft", "table_row_focus_dark": "*color_accent_soft", "text_lg": "20px", "text_md": "16px", "text_sm": "14px", "text_xl": "24px", "text_xs": "12px", "text_xxl": "28px", "text_xxs": "10px"}, "version": "0.0.1"}