weichiang commited on
Commit
35f8ff4
β€’
1 Parent(s): 0bcfc15
Files changed (1) hide show
  1. app.py +4 -5
app.py CHANGED
@@ -25,9 +25,7 @@ def make_default_md(arena_df, elo_results):
25
  | [Vote](https://chat.lmsys.org) | [Blog](https://lmsys.org/blog/2023-05-03-arena/) | [GitHub](https://github.com/lm-sys/FastChat) | [Paper](https://arxiv.org/abs/2306.05685) | [Dataset](https://github.com/lm-sys/FastChat/blob/main/docs/dataset_release.md) | [Twitter](https://twitter.com/lmsysorg) | [Discord](https://discord.gg/HSWAKCrnFx) |
26
 
27
  LMSYS [Chatbot Arena](https://lmsys.org/blog/2023-05-03-arena/) is a crowdsourced open platform for LLM evals.
28
- We've collected over **500,000** human preference votes to rank LLMs with the Elo ranking system. Contribute your vote πŸ—³οΈ at [chat.lmsys.org](https://chat.lmsys.org)!
29
-
30
- Code to recreate leaderboard tables and plots in this [notebook]({notebook_url}) and more discussions in this blog [post](https://lmsys.org/blog/2023-12-07-leaderboard/).
31
  """
32
  return leaderboard_md
33
 
@@ -37,9 +35,10 @@ def make_arena_leaderboard_md(arena_df):
37
  total_models = len(arena_df)
38
  space = "   "
39
  leaderboard_md = f"""
40
- Total #models: **{total_models}**.{space} Total #votes: **{"{:,}".format(total_votes)}**.{space} Last updated: April 9, 2024.
41
 
42
  πŸ“£ **NEW!** View leaderboard for different categories (e.g., coding, long user query)!
 
43
  """
44
  return leaderboard_md
45
 
@@ -405,7 +404,7 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
405
  gr.Markdown(
406
  f"""Note: we take the 95% confidence interval into account when determining a model's ranking.
407
  A model is ranked higher only if its lower bound of model score is higher than the upper bound of the other model's score.
408
- See Figure 3 below for visualization of the confidence intervals. Code to recreate these tables and plots in this [notebook]({notebook_url}) and more discussions in this blog [post](https://lmsys.org/blog/2023-12-07-leaderboard/).
409
  """,
410
  elem_id="leaderboard_markdown"
411
  )
 
25
  | [Vote](https://chat.lmsys.org) | [Blog](https://lmsys.org/blog/2023-05-03-arena/) | [GitHub](https://github.com/lm-sys/FastChat) | [Paper](https://arxiv.org/abs/2306.05685) | [Dataset](https://github.com/lm-sys/FastChat/blob/main/docs/dataset_release.md) | [Twitter](https://twitter.com/lmsysorg) | [Discord](https://discord.gg/HSWAKCrnFx) |
26
 
27
  LMSYS [Chatbot Arena](https://lmsys.org/blog/2023-05-03-arena/) is a crowdsourced open platform for LLM evals.
28
+ We've collected over **500,000** human preference votes to rank LLMs with the Elo ranking system.
 
 
29
  """
30
  return leaderboard_md
31
 
 
35
  total_models = len(arena_df)
36
  space = "   "
37
  leaderboard_md = f"""
38
+ Total #models: **{total_models}**.{space} Total #votes: **{"{:,}".format(total_votes)}**.{space} Last updated: April 11, 2024.
39
 
40
  πŸ“£ **NEW!** View leaderboard for different categories (e.g., coding, long user query)!
41
+ Code to recreate leaderboard tables and plots in this [notebook]({notebook_url}). Cast your vote πŸ—³οΈ at [chat.lmsys.org](https://chat.lmsys.org)!
42
  """
43
  return leaderboard_md
44
 
 
404
  gr.Markdown(
405
  f"""Note: we take the 95% confidence interval into account when determining a model's ranking.
406
  A model is ranked higher only if its lower bound of model score is higher than the upper bound of the other model's score.
407
+ See Figure 3 below for visualization of the confidence intervals. More details in [notebook]({notebook_url}).
408
  """,
409
  elem_id="leaderboard_markdown"
410
  )