sheonhan commited on
Commit
2a73469
β€’
1 Parent(s): c131125

Add citation button

Browse files
Files changed (2) hide show
  1. app.py +39 -8
  2. content.py +25 -11
app.py CHANGED
@@ -17,6 +17,8 @@ LMEH_REPO = "HuggingFaceH4/lmeh_evaluations"
17
  IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", None))
18
 
19
  api = HfApi()
 
 
20
  def restart_space():
21
  api.restart_space(repo_id="HuggingFaceH4/open_llm_leaderboard", token=H4_TOKEN)
22
 
@@ -32,10 +34,11 @@ def get_all_requested_models(requested_models_dir):
32
 
33
  return set([file_name.lower().split("./evals/")[1] for file_name in file_names])
34
 
 
35
  repo = None
36
  requested_models = None
37
  if H4_TOKEN:
38
- print("pulling repo")
39
  # try:
40
  # shutil.rmtree("./evals/")
41
  # except:
@@ -111,9 +114,10 @@ def has_no_nan_values(df, columns):
111
  def has_nan_values(df, columns):
112
  return df[columns].isna().any(axis=1)
113
 
 
114
  def get_leaderboard():
115
  if repo:
116
- print("pulling changes")
117
  repo.git_pull()
118
 
119
  all_data = get_eval_results_dicts(IS_PUBLIC)
@@ -166,8 +170,9 @@ def get_leaderboard():
166
 
167
  def get_eval_table():
168
  if repo:
169
- print("pulling changes for eval")
170
  repo.git_pull()
 
171
  entries = [
172
  entry
173
  for entry in os.listdir("evals/eval_requests")
@@ -221,7 +226,7 @@ def is_model_on_hub(model_name, revision) -> bool:
221
  return True
222
 
223
  except Exception as e:
224
- print("Could not get the model config from the hub")
225
  print(e)
226
  return False
227
 
@@ -293,24 +298,50 @@ def refresh():
293
  finished_eval_queue, running_eval_queue, pending_eval_queue = get_eval_table()
294
  return leaderboard, finished_eval_queue, running_eval_queue, pending_eval_queue
295
 
 
296
  custom_css = """
297
  #changelog-text {
 
 
 
 
298
  font-size: 18px !important;
299
  }
300
 
301
  .markdown-text {
302
  font-size: 16px !important;
303
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
304
  """
305
 
306
  demo = gr.Blocks(css=custom_css)
307
  with demo:
308
  gr.HTML(TITLE)
309
- with gr.Row():
310
- gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
311
 
312
- with gr.Accordion("CHANGELOG", open=False):
313
- changelog = gr.Markdown(CHANGELOG_TEXT, elem_id="changelog-text")
 
 
 
 
 
 
 
 
 
314
 
315
  leaderboard_table = gr.components.Dataframe(
316
  value=leaderboard, headers=COLS, datatype=TYPES, max_rows=5
17
  IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", None))
18
 
19
  api = HfApi()
20
+
21
+
22
  def restart_space():
23
  api.restart_space(repo_id="HuggingFaceH4/open_llm_leaderboard", token=H4_TOKEN)
24
 
34
 
35
  return set([file_name.lower().split("./evals/")[1] for file_name in file_names])
36
 
37
+
38
  repo = None
39
  requested_models = None
40
  if H4_TOKEN:
41
+ print("Pulling evaluation requests and results.")
42
  # try:
43
  # shutil.rmtree("./evals/")
44
  # except:
114
  def has_nan_values(df, columns):
115
  return df[columns].isna().any(axis=1)
116
 
117
+
118
  def get_leaderboard():
119
  if repo:
120
+ print("Pulling evaluation results for the leaderboard.")
121
  repo.git_pull()
122
 
123
  all_data = get_eval_results_dicts(IS_PUBLIC)
170
 
171
  def get_eval_table():
172
  if repo:
173
+ print("Pulling changes for the evaluation queue.")
174
  repo.git_pull()
175
+
176
  entries = [
177
  entry
178
  for entry in os.listdir("evals/eval_requests")
226
  return True
227
 
228
  except Exception as e:
229
+ print("Could not get the model config from the hub.")
230
  print(e)
231
  return False
232
 
298
  finished_eval_queue, running_eval_queue, pending_eval_queue = get_eval_table()
299
  return leaderboard, finished_eval_queue, running_eval_queue, pending_eval_queue
300
 
301
+
302
  custom_css = """
303
  #changelog-text {
304
+ font-size: 16px !important;
305
+ }
306
+
307
+ #changelog-text h2 {
308
  font-size: 18px !important;
309
  }
310
 
311
  .markdown-text {
312
  font-size: 16px !important;
313
  }
314
+
315
+ #citation-button span {
316
+ font-size: 16px !important;
317
+ }
318
+
319
+ #citation-button textarea {
320
+ font-size: 16px !important;
321
+ }
322
+
323
+ #citation-button > label > button {
324
+ margin: 6px;
325
+ transform: scale(1.3);
326
+ }
327
  """
328
 
329
  demo = gr.Blocks(css=custom_css)
330
  with demo:
331
  gr.HTML(TITLE)
332
+ gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
 
333
 
334
+ with gr.Row():
335
+ with gr.Column():
336
+ with gr.Accordion("πŸ“™ Citation", open=False):
337
+ citation_button = gr.Textbox(
338
+ value=CITATION_BUTTON_TEXT,
339
+ label=CITATION_BUTTON_LABEL,
340
+ elem_id="citation-button",
341
+ ).style(show_copy_button=True)
342
+ with gr.Column():
343
+ with gr.Accordion("✨ CHANGELOG", open=False):
344
+ changelog = gr.Markdown(CHANGELOG_TEXT, elem_id="changelog-text")
345
 
346
  leaderboard_table = gr.components.Dataframe(
347
  value=leaderboard, headers=COLS, datatype=TYPES, max_rows=5
content.py CHANGED
@@ -1,29 +1,33 @@
1
  CHANGELOG_TEXT = f"""
 
 
 
 
2
  ## [2023-05-29]
3
- - Auto-restart every hour
4
  - Sync with the internal version (minor style changes)
5
 
6
  ## [2023-05-24]
7
- - Added a baseline that has 25.0 for all values.
8
- - Added CHANGELOG
9
 
10
  ## [2023-05-23]
11
- - Fixed a CSS issue that made the leaderboard hard to read in dark mode.
12
 
13
  ## [2023-05-22]
14
- - Display a success/error message after submitting evaluation requests.
15
- - Reject duplicate submission.
16
- - Do not display results that have incomplete results.
17
- - Display different queues for jobs that are RUNNING, PENDING, FINISHED status.
18
 
19
  ## [2023-05-15]
20
- - Fixed a typo: from "TruthQA" to "TruthfulQA"
21
 
22
  ## [2023-05-10]
23
- - Fixed a bug that prevented auto-refresh.
24
 
25
  ## [2023-05-10]
26
- - Released the leaderboard to public.
27
  """
28
 
29
  TITLE = """<h1 align="center" id="space-title">πŸ€— Open LLM Leaderboard</h1>"""
@@ -47,3 +51,13 @@ We chose these benchmarks as they test a variety of reasoning and general knowle
47
  EVALUATION_QUEUE_TEXT = f"""
48
  # Evaluation Queue for the πŸ€— Open LLM Leaderboard, these models will be automatically evaluated on the πŸ€— cluster
49
  """
 
 
 
 
 
 
 
 
 
 
1
  CHANGELOG_TEXT = f"""
2
+ ## [2023-05-30]
3
+ - Add a citation button
4
+ - Simplify Gradio layout
5
+
6
  ## [2023-05-29]
7
+ - Auto-restart every hour for the latest results
8
  - Sync with the internal version (minor style changes)
9
 
10
  ## [2023-05-24]
11
+ - Add a baseline that has 25.0 for all values
12
+ - Add CHANGELOG
13
 
14
  ## [2023-05-23]
15
+ - Fix a CSS issue that made the leaderboard hard to read in dark mode
16
 
17
  ## [2023-05-22]
18
+ - Display a success/error message after submitting evaluation requests
19
+ - Reject duplicate submission
20
+ - Do not display results that have incomplete results
21
+ - Display different queues for jobs that are RUNNING, PENDING, FINISHED status
22
 
23
  ## [2023-05-15]
24
+ - Fix a typo: from "TruthQA" to "TruthfulQA"
25
 
26
  ## [2023-05-10]
27
+ - Fix a bug that prevented auto-refresh
28
 
29
  ## [2023-05-10]
30
+ - Release the leaderboard to public
31
  """
32
 
33
  TITLE = """<h1 align="center" id="space-title">πŸ€— Open LLM Leaderboard</h1>"""
51
  EVALUATION_QUEUE_TEXT = f"""
52
  # Evaluation Queue for the πŸ€— Open LLM Leaderboard, these models will be automatically evaluated on the πŸ€— cluster
53
  """
54
+
55
+ CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
56
+ CITATION_BUTTON_TEXT = r"""@misc{open-llm-leaderboard,
57
+ author = {Edward Beeching, Sheon Han, Nathan Lambert, Nazneen Rajani, Omar Sanseviero, Lewis Tunstall, Thomas Wolf},
58
+ title = {Open LLM Leaderboard},
59
+ year = {2023},
60
+ publisher = {Hugging Face},
61
+ howpublished = "{\url{https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard}"}
62
+ }"""
63
+