AppleSwing commited on
Commit
2a18e0a
1 Parent(s): a549d9d

Add app debug mode and dynamic refresh tables

Browse files
Files changed (3) hide show
  1. app.py +289 -229
  2. src/envs.py +2 -2
  3. src/submission/submit.py +5 -1
app.py CHANGED
@@ -3,10 +3,11 @@
3
  import os
4
  import datetime
5
  import socket
 
6
 
7
  import gradio as gr
8
  import pandas as pd
9
-
10
  from apscheduler.schedulers.background import BackgroundScheduler
11
 
12
  from huggingface_hub import snapshot_download
@@ -38,11 +39,24 @@ from src.display.utils import (
38
  Precision,
39
  )
40
 
41
- from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, H4_TOKEN, IS_PUBLIC, QUEUE_REPO, REPO_ID, RESULTS_REPO
 
42
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
43
  from src.submission.submit import add_new_eval
44
  from src.utils import get_dataset_summary_table
45
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
  def ui_snapshot_download(repo_id, local_dir, repo_type, tqdm_class, etag_timeout):
48
  try:
@@ -76,11 +90,6 @@ def init_space():
76
  )
77
  return dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
78
 
79
-
80
- dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = init_space()
81
- leaderboard_df = original_df.copy()
82
-
83
-
84
  # Searching and filtering
85
  def update_table(
86
  hidden_df: pd.DataFrame, columns: list, type_query: list, precision_query: list, size_query: list, query: str
@@ -143,123 +152,158 @@ def filter_models(df: pd.DataFrame, type_query: list, size_query: list, precisio
143
 
144
  return filtered_df
145
 
 
 
 
146
 
147
- # triggered only once at startup => read query parameter if it exists
148
- def load_query(request: gr.Request):
149
- query = request.query_params.get("query") or ""
150
- return query
 
 
 
 
 
151
 
 
 
 
152
 
153
- demo = gr.Blocks(css=custom_css)
154
- with demo:
155
- gr.HTML(TITLE)
156
- gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
157
 
158
- with gr.Tabs(elem_classes="tab-buttons") as tabs:
159
- with gr.TabItem("MOE-LLM-GPU-Poor-Leaderboard Benchmark", elem_id="llm-benchmark-tab-table", id=0):
160
- with gr.Row():
161
- with gr.Column():
162
- with gr.Row():
163
- search_bar = gr.Textbox(
164
- placeholder=" 🔍 Model search (separate multiple queries with `;`)",
165
- show_label=False,
166
- elem_id="search-bar",
167
- )
168
- with gr.Row():
169
- shown_columns = gr.CheckboxGroup(
170
- choices=[
171
- c.name
172
- for c in fields(AutoEvalColumn)
173
- if not c.hidden and not c.never_hidden and not c.dummy
174
- ],
175
- value=[
176
- c.name
177
- for c in fields(AutoEvalColumn)
178
- if c.displayed_by_default and not c.hidden and not c.never_hidden
179
- ],
180
- label="Select columns to show",
181
- elem_id="column-select",
182
- interactive=True,
183
- )
184
 
185
- with gr.Column(min_width=320):
186
- filter_columns_size = gr.CheckboxGroup(
187
- label="Inference frameworks",
188
- choices=[t.to_str() for t in InferenceFramework],
189
- value=[t.to_str() for t in InferenceFramework],
190
- interactive=True,
191
- elem_id="filter-columns-size",
192
- )
193
 
194
- filter_columns_type = gr.CheckboxGroup(
195
- label="Model types",
196
- choices=[t.to_str() for t in ModelType],
197
- value=[t.to_str() for t in ModelType],
198
- interactive=True,
199
- elem_id="filter-columns-type",
200
- )
201
 
202
- filter_columns_precision = gr.CheckboxGroup(
203
- label="Precision",
204
- choices=[i.value.name for i in Precision],
205
- value=[i.value.name for i in Precision],
206
- interactive=True,
207
- elem_id="filter-columns-precision",
208
- )
 
 
 
 
209
 
210
- # filter_columns_size = gr.CheckboxGroup(
211
- # label="Model sizes (in billions of parameters)",
212
- # choices=list(NUMERIC_INTERVALS.keys()),
213
- # value=list(NUMERIC_INTERVALS.keys()),
214
- # interactive=True,
215
- # elem_id="filter-columns-size",
216
- # )
217
-
218
- # breakpoint()
219
-
220
- leaderboard_table = gr.components.Dataframe(
221
- value=(
222
- leaderboard_df[
223
- [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
224
- + shown_columns.value
225
- + [AutoEvalColumn.dummy.name]
226
- ]
227
- if leaderboard_df.empty is False
228
- else leaderboard_df
229
- ),
230
- headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
231
- datatype=TYPES,
232
- elem_id="leaderboard-table",
233
- interactive=False,
234
- visible=True,
235
- ) # column_widths=["2%", "20%"]
236
-
237
- # Dummy leaderboard for handling the case when the user uses backspace key
238
- hidden_leaderboard_table_for_search = gr.components.Dataframe(
239
- value=original_df[COLS] if original_df.empty is False else original_df,
240
- headers=COLS,
241
- datatype=TYPES,
242
- visible=False,
243
- )
244
-
245
- search_bar.submit(
246
- update_table,
247
- [
248
- hidden_leaderboard_table_for_search,
249
- shown_columns,
250
- filter_columns_type,
251
- filter_columns_precision,
252
- filter_columns_size,
253
- search_bar,
254
- ],
255
- leaderboard_table,
256
- )
257
-
258
- # Check query parameter once at startup and update search bar
259
- demo.load(load_query, inputs=[], outputs=[search_bar])
260
-
261
- for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size]:
262
- selector.change(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
263
  update_table,
264
  [
265
  hidden_leaderboard_table_for_search,
@@ -270,124 +314,139 @@ with demo:
270
  search_bar,
271
  ],
272
  leaderboard_table,
273
- queue=True,
274
  )
275
-
276
- with gr.TabItem("About", elem_id="llm-benchmark-tab-table", id=2):
277
- gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
278
-
279
- dataset_table = gr.components.Dataframe(
280
- value=dataset_df,
281
- headers=list(dataset_df.columns),
282
- datatype=["str", "markdown", "str", "str", "str"],
283
- elem_id="dataset-table",
284
- interactive=False,
285
- visible=True,
286
- column_widths=["15%", "20%"],
287
- )
288
-
289
- gr.Markdown(LLM_BENCHMARKS_DETAILS, elem_classes="markdown-text")
290
- gr.Markdown(FAQ_TEXT, elem_classes="markdown-text")
291
-
292
- with gr.TabItem("Submit a model ", elem_id="llm-benchmark-tab-table", id=3):
293
- with gr.Column():
294
- with gr.Row():
295
- gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
296
-
297
- with gr.Column():
298
- with gr.Accordion(f"✅ Finished Evaluations ({len(finished_eval_queue_df)})", open=False):
299
- with gr.Row():
300
- finished_eval_table = gr.components.Dataframe(
301
- value=finished_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5
302
- )
303
-
304
- with gr.Accordion(f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})", open=False):
305
- with gr.Row():
306
- running_eval_table = gr.components.Dataframe(
307
- value=running_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5
308
- )
309
-
310
- with gr.Accordion(f"⏳ Scheduled Evaluation Queue ({len(pending_eval_queue_df)})", open=False):
311
- with gr.Row():
312
- pending_eval_table = gr.components.Dataframe(
313
- value=pending_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5
314
- )
315
-
316
- with gr.Row():
317
- gr.Markdown("# Submit your model here", elem_classes="markdown-text")
318
-
319
- with gr.Row():
320
- inference_framework = gr.Dropdown(
321
- choices=[t.to_str() for t in InferenceFramework],
322
- label="Inference framework",
323
- multiselect=False,
324
- value=None,
325
- interactive=True,
326
  )
327
-
328
- with gr.Row():
 
 
329
  with gr.Column():
330
- model_name_textbox = gr.Textbox(label="Model name")
331
- revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
332
- private = gr.Checkbox(False, label="Private", visible=not IS_PUBLIC)
333
- model_type = gr.Dropdown(
334
- choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
335
- label="Model type",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
336
  multiselect=False,
337
  value=None,
338
  interactive=True,
339
  )
340
-
341
- with gr.Column():
342
- precision = gr.Dropdown(
343
- choices=[i.value.name for i in Precision if i != Precision.Unknown],
344
- label="Precision",
345
- multiselect=False,
346
- value="float32",
347
- interactive=True,
348
- )
349
-
350
- weight_type = gr.Dropdown(
351
- choices=[i.value.name for i in WeightType],
352
- label="Weights type",
353
- multiselect=False,
354
- value="Original",
355
- interactive=True,
356
- )
357
-
358
- base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
359
-
360
- submit_button = gr.Button("Submit Eval")
361
- submission_result = gr.Markdown()
362
- submit_button.click(
363
- add_new_eval,
364
- [
365
- model_name_textbox,
366
- base_model_name_textbox,
367
- revision_name_textbox,
368
- precision,
369
- private,
370
- weight_type,
371
- model_type,
372
- inference_framework,
373
- ],
374
- submission_result,
375
- )
376
-
377
- with gr.Row():
378
- with gr.Accordion("Citing this leaderboard", open=False):
379
- citation_button = gr.Textbox(
380
- value=CITATION_BUTTON_TEXT,
381
- label=CITATION_BUTTON_LABEL,
382
- lines=20,
383
- elem_id="citation-button",
384
- show_copy_button=True,
385
- )
386
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
387
  scheduler = BackgroundScheduler()
388
 
389
- scheduler.add_job(restart_space, "interval", seconds=6 * 60 * 60)
390
-
391
 
392
  def launch_backend():
393
  import subprocess
@@ -396,8 +455,9 @@ def launch_backend():
396
  if DEVICE not in {"cpu"}:
397
  _ = subprocess.run(["python", "backend-cli.py"])
398
 
399
-
400
  # scheduler.add_job(launch_backend, "interval", seconds=120)
401
-
402
- scheduler.start()
403
- demo.queue(default_concurrency_limit=40).launch()
 
 
3
  import os
4
  import datetime
5
  import socket
6
+ from threading import Thread
7
 
8
  import gradio as gr
9
  import pandas as pd
10
+ import time
11
  from apscheduler.schedulers.background import BackgroundScheduler
12
 
13
  from huggingface_hub import snapshot_download
 
39
  Precision,
40
  )
41
 
42
+ from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, H4_TOKEN, IS_PUBLIC, \
43
+ QUEUE_REPO, REPO_ID, RESULTS_REPO, DEBUG_QUEUE_REPO, DEBUG_RESULTS_REPO
44
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
45
  from src.submission.submit import add_new_eval
46
  from src.utils import get_dataset_summary_table
47
 
48
+ def get_args():
49
+ import argparse
50
+
51
+ parser = argparse.ArgumentParser(description="Run the LLM Leaderboard")
52
+ parser.add_argument("--debug", action="store_true", help="Run in debug mode")
53
+ return parser.parse_args()
54
+
55
+ args = get_args()
56
+ if args.debug:
57
+ print("Running in debug mode")
58
+ QUEUE_REPO = DEBUG_QUEUE_REPO
59
+ RESULTS_REPO = DEBUG_RESULTS_REPO
60
 
61
  def ui_snapshot_download(repo_id, local_dir, repo_type, tqdm_class, etag_timeout):
62
  try:
 
90
  )
91
  return dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
92
 
 
 
 
 
 
93
  # Searching and filtering
94
  def update_table(
95
  hidden_df: pd.DataFrame, columns: list, type_query: list, precision_query: list, size_query: list, query: str
 
152
 
153
  return filtered_df
154
 
155
+ shown_columns = None
156
+ dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = init_space()
157
+ leaderboard_df = original_df.copy()
158
 
159
+ def update_leaderboard_table():
160
+ global leaderboard_df, shown_columns
161
+ print("Updating leaderboard table")
162
+ return leaderboard_df[
163
+ [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
164
+ + shown_columns.value
165
+ + [AutoEvalColumn.dummy.name]
166
+ ] if not leaderboard_df.empty else leaderboard_df
167
+
168
 
169
+ def update_hidden_leaderboard_table():
170
+ global original_df
171
+ return original_df[COLS] if original_df.empty is False else original_df
172
 
173
+ def update_dataset_table():
174
+ global dataset_df
175
+ return dataset_df
 
176
 
177
+ def update_finish_table():
178
+ global finished_eval_queue_df
179
+ return finished_eval_queue_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
 
181
+ def update_running_table():
182
+ global running_eval_queue_df
183
+ return running_eval_queue_df
 
 
 
 
 
184
 
185
+ def update_pending_table():
186
+ global pending_eval_queue_df
187
+ return pending_eval_queue_df
 
 
 
 
188
 
189
+ def update_finish_num():
190
+ global finished_eval_queue_df
191
+ return len(finished_eval_queue_df)
192
+
193
+ def update_running_num():
194
+ global running_eval_queue_df
195
+ return len(running_eval_queue_df)
196
+
197
+ def update_pending_num():
198
+ global pending_eval_queue_df
199
+ return len(pending_eval_queue_df)
200
 
201
+ # triggered only once at startup => read query parameter if it exists
202
+ def load_query(request: gr.Request):
203
+ query = request.query_params.get("query") or ""
204
+ return query
205
+
206
+ def refresh_leaderboard():
207
+ return gr.update(value=update_leaderboard_table()), gr.update(value=update_hidden_leaderboard_table()), \
208
+ gr.update(value=update_dataset_table()), gr.update(value=update_finish_table()), \
209
+ gr.update(value=update_running_table()), gr.update(value=update_pending_table()), \
210
+ gr.update(value=update_finish_num()), gr.update(value=update_running_num()), gr.update(value=update_pending_num())
211
+
212
+ def periodic_init():
213
+ global dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df, leaderboard_df
214
+ while True:
215
+ time.sleep(60)
216
+ dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = init_space()
217
+ leaderboard_df = original_df.copy()
218
+
219
+ def block_launch():
220
+ global dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df, leaderboard_df, shown_columns
221
+ demo = gr.Blocks(css=custom_css)
222
+ with demo:
223
+ gr.HTML(TITLE)
224
+ gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
225
+ with gr.Tabs(elem_classes="tab-buttons") as tabs:
226
+ with gr.TabItem("MOE-LLM-GPU-Poor-Leaderboard Benchmark", elem_id="llm-benchmark-tab-table", id=0):
227
+ with gr.Row():
228
+ with gr.Column():
229
+ with gr.Row():
230
+ search_bar = gr.Textbox(
231
+ placeholder=" 🔍 Model search (separate multiple queries with `;`)",
232
+ show_label=False,
233
+ elem_id="search-bar",
234
+ )
235
+ with gr.Row():
236
+ shown_columns = gr.CheckboxGroup(
237
+ choices=[
238
+ c.name
239
+ for c in fields(AutoEvalColumn)
240
+ if not c.hidden and not c.never_hidden and not c.dummy
241
+ ],
242
+ value=[
243
+ c.name
244
+ for c in fields(AutoEvalColumn)
245
+ if c.displayed_by_default and not c.hidden and not c.never_hidden
246
+ ],
247
+ label="Select columns to show",
248
+ elem_id="column-select",
249
+ interactive=True,
250
+ )
251
+ with gr.Column(min_width=320):
252
+ filter_columns_size = gr.CheckboxGroup(
253
+ label="Inference frameworks",
254
+ choices=[t.to_str() for t in InferenceFramework],
255
+ value=[t.to_str() for t in InferenceFramework],
256
+ interactive=True,
257
+ elem_id="filter-columns-size",
258
+ )
259
+ filter_columns_type = gr.CheckboxGroup(
260
+ label="Model types",
261
+ choices=[t.to_str() for t in ModelType],
262
+ value=[t.to_str() for t in ModelType],
263
+ interactive=True,
264
+ elem_id="filter-columns-type",
265
+ )
266
+ filter_columns_precision = gr.CheckboxGroup(
267
+ label="Precision",
268
+ choices=[i.value.name for i in Precision],
269
+ value=[i.value.name for i in Precision],
270
+ interactive=True,
271
+ elem_id="filter-columns-precision",
272
+ )
273
+ # filter_columns_size = gr.CheckboxGroup(
274
+ # label="Model sizes (in billions of parameters)",
275
+ # choices=list(NUMERIC_INTERVALS.keys()),
276
+ # value=list(NUMERIC_INTERVALS.keys()),
277
+ # interactive=True,
278
+ # elem_id="filter-columns-size",
279
+ # )
280
+ # breakpoint()
281
+ refresh_button = gr.Button("Refresh", visible=True)
282
+ leaderboard_table = gr.components.Dataframe(
283
+ value=(
284
+ leaderboard_df[
285
+ [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
286
+ + shown_columns.value
287
+ + [AutoEvalColumn.dummy.name]
288
+ ]
289
+ if leaderboard_df.empty is False
290
+ else leaderboard_df
291
+ ),
292
+ headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
293
+ datatype=TYPES,
294
+ elem_id="leaderboard-table",
295
+ interactive=False,
296
+ visible=True,
297
+ ) # column_widths=["2%", "20%"]
298
+ # Dummy leaderboard for handling the case when the user uses backspace key
299
+ hidden_leaderboard_table_for_search = gr.components.Dataframe(
300
+ value=original_df[COLS] if original_df.empty is False else original_df,
301
+ headers=COLS,
302
+ datatype=TYPES,
303
+ visible=False,
304
+ )
305
+ # refresh_button.click(fn=update_leaderboard_tables, outputs=[leaderboard_table, hidden_leaderboard_table_for_search])
306
+ search_bar.submit(
307
  update_table,
308
  [
309
  hidden_leaderboard_table_for_search,
 
314
  search_bar,
315
  ],
316
  leaderboard_table,
 
317
  )
318
+ # Check query parameter once at startup and update search bar
319
+ demo.load(load_query, inputs=[], outputs=[search_bar])
320
+ for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size]:
321
+ selector.change(
322
+ update_table,
323
+ [
324
+ hidden_leaderboard_table_for_search,
325
+ shown_columns,
326
+ filter_columns_type,
327
+ filter_columns_precision,
328
+ filter_columns_size,
329
+ search_bar,
330
+ ],
331
+ leaderboard_table,
332
+ queue=True,
333
+ )
334
+ with gr.TabItem("About", elem_id="llm-benchmark-tab-table", id=2):
335
+ gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
336
+ dataset_table = gr.components.Dataframe(
337
+ value=dataset_df,
338
+ headers=list(dataset_df.columns),
339
+ datatype=["str", "markdown", "str", "str", "str"],
340
+ elem_id="dataset-table",
341
+ interactive=False,
342
+ visible=True,
343
+ column_widths=["15%", "20%"],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
344
  )
345
+ gr.Markdown(LLM_BENCHMARKS_DETAILS, elem_classes="markdown-text")
346
+ gr.Markdown(FAQ_TEXT, elem_classes="markdown-text")
347
+ # refresh_button.click(fn=update_dataset_table, outputs=[dataset_table])
348
+ with gr.TabItem("Submit a model ", elem_id="llm-benchmark-tab-table", id=3):
349
  with gr.Column():
350
+ with gr.Row():
351
+ gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
352
+ with gr.Column():
353
+ with gr.Accordion(f"✅ Finished Evaluations", open=False):
354
+ with gr.Column():
355
+ num_fin = gr.Number(len(finished_eval_queue_df), label="Number of finished evaluations", visible=True, interactive=False)
356
+ with gr.Row():
357
+ finished_eval_table = gr.components.Dataframe(
358
+ value=finished_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5
359
+ )
360
+ with gr.Accordion(f"🔄 Running Evaluation Queue", open=False):
361
+ with gr.Column():
362
+ num_run = gr.Number(len(running_eval_queue_df), label="Number of running evaluations", visible=True, interactive=False)
363
+ with gr.Row():
364
+ running_eval_table = gr.components.Dataframe(
365
+ value=running_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5
366
+ )
367
+ with gr.Accordion(f"⏳ Scheduled Evaluation Queue", open=False):
368
+ with gr.Column():
369
+ num_sche = gr.Number(len(pending_eval_queue_df), label="Number of scheduled evaluations", visible=True, interactive=False)
370
+ with gr.Row():
371
+ pending_eval_table = gr.components.Dataframe(
372
+ value=pending_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5
373
+ )
374
+ # refresh_button.click(fn=update_submit_tables,
375
+ # outputs=[finished_eval_table, running_eval_table, pending_eval_table])
376
+ with gr.Row():
377
+ gr.Markdown("# Submit your model here", elem_classes="markdown-text")
378
+ with gr.Row():
379
+ inference_framework = gr.Dropdown(
380
+ choices=[t.to_str() for t in InferenceFramework],
381
+ label="Inference framework",
382
  multiselect=False,
383
  value=None,
384
  interactive=True,
385
  )
386
+ with gr.Row():
387
+ with gr.Column():
388
+ model_name_textbox = gr.Textbox(label="Model name")
389
+ revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
390
+ private = gr.Checkbox(False, label="Private", visible=not IS_PUBLIC)
391
+ model_type = gr.Dropdown(
392
+ choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
393
+ label="Model type",
394
+ multiselect=False,
395
+ value=None,
396
+ interactive=True,
397
+ )
398
+ with gr.Column():
399
+ precision = gr.Dropdown(
400
+ choices=[i.value.name for i in Precision if i != Precision.Unknown],
401
+ label="Precision",
402
+ multiselect=False,
403
+ value="float32",
404
+ interactive=True,
405
+ )
406
+ weight_type = gr.Dropdown(
407
+ choices=[i.value.name for i in WeightType],
408
+ label="Weights type",
409
+ multiselect=False,
410
+ value="Original",
411
+ interactive=True,
412
+ )
413
+ base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
414
+ submit_button = gr.Button("Submit Eval")
415
+ submission_result = gr.Markdown()
416
+ debug = gr.Checkbox(args.debug, label="Debug", visible=False)
417
+ submit_button.click(
418
+ add_new_eval,
419
+ [
420
+ model_name_textbox,
421
+ base_model_name_textbox,
422
+ revision_name_textbox,
423
+ precision,
424
+ private,
425
+ weight_type,
426
+ model_type,
427
+ inference_framework,
428
+ debug
429
+ ],
430
+ submission_result,
431
+ )
432
+ refresh_button.click(refresh_leaderboard,
433
+ outputs=[leaderboard_table, hidden_leaderboard_table_for_search, dataset_table,
434
+ finished_eval_table, running_eval_table, pending_eval_table, num_fin, num_run, num_sche])
435
+
436
+ with gr.Row():
437
+ with gr.Accordion("Citing this leaderboard", open=False):
438
+ citation_button = gr.Textbox(
439
+ value=CITATION_BUTTON_TEXT,
440
+ label=CITATION_BUTTON_LABEL,
441
+ lines=20,
442
+ elem_id="citation-button",
443
+ show_copy_button=True,
444
+ )
445
+ demo.queue(default_concurrency_limit=40).launch()
446
+
447
  scheduler = BackgroundScheduler()
448
 
449
+ scheduler.add_job(restart_space, "interval", hours=6)
 
450
 
451
  def launch_backend():
452
  import subprocess
 
455
  if DEVICE not in {"cpu"}:
456
  _ = subprocess.run(["python", "backend-cli.py"])
457
 
458
+ Thread(target=periodic_init, daemon=True).start()
459
  # scheduler.add_job(launch_backend, "interval", seconds=120)
460
+ if __name__ == "__main__":
461
+ scheduler.start()
462
+ block_launch()
463
+
src/envs.py CHANGED
@@ -12,8 +12,8 @@ QUEUE_REPO = "sparse-generative-ai/requests"
12
  QUEUE_REPO_OPEN_LLM = "open-llm-leaderboard/requests"
13
  RESULTS_REPO = "sparse-generative-ai/results"
14
 
15
- PRIVATE_QUEUE_REPO = "sparse-generative-ai/private-requests"
16
- PRIVATE_RESULTS_REPO = "sparse-generative-ai/private-results"
17
 
18
  IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", True))
19
 
 
12
  QUEUE_REPO_OPEN_LLM = "open-llm-leaderboard/requests"
13
  RESULTS_REPO = "sparse-generative-ai/results"
14
 
15
+ DEBUG_QUEUE_REPO = "sparse-generative-ai/debug_requests"
16
+ DEBUG_RESULTS_REPO = "sparse-generative-ai/debug_results"
17
 
18
  IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", True))
19
 
src/submission/submit.py CHANGED
@@ -3,7 +3,7 @@ import os
3
  from datetime import datetime, timezone
4
 
5
  from src.display.formatting import styled_error, styled_message, styled_warning
6
- from src.envs import API, EVAL_REQUESTS_PATH, H4_TOKEN, QUEUE_REPO, RATE_LIMIT_PERIOD, RATE_LIMIT_QUOTA
7
  from src.leaderboard.filter_models import DO_NOT_SUBMIT_MODELS
8
  from src.submission.check_validity import (
9
  already_submitted_models,
@@ -26,12 +26,16 @@ def add_new_eval(
26
  weight_type: str,
27
  model_type: str,
28
  inference_framework: str,
 
29
  ):
30
  global REQUESTED_MODELS
31
  global USERS_TO_SUBMISSION_DATES
32
  if not REQUESTED_MODELS:
33
  REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)
34
 
 
 
 
35
  user_name = ""
36
  model_path = model
37
  if "/" in model:
 
3
  from datetime import datetime, timezone
4
 
5
  from src.display.formatting import styled_error, styled_message, styled_warning
6
+ from src.envs import API, EVAL_REQUESTS_PATH, H4_TOKEN, QUEUE_REPO, RATE_LIMIT_PERIOD, RATE_LIMIT_QUOTA, DEBUG_QUEUE_REPO
7
  from src.leaderboard.filter_models import DO_NOT_SUBMIT_MODELS
8
  from src.submission.check_validity import (
9
  already_submitted_models,
 
26
  weight_type: str,
27
  model_type: str,
28
  inference_framework: str,
29
+ debug: bool = False
30
  ):
31
  global REQUESTED_MODELS
32
  global USERS_TO_SUBMISSION_DATES
33
  if not REQUESTED_MODELS:
34
  REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)
35
 
36
+ if debug:
37
+ QUEUE_REPO = DEBUG_QUEUE_REPO
38
+
39
  user_name = ""
40
  model_path = model
41
  if "/" in model: