Files changed (4) hide show
  1. README.md +0 -2
  2. app.py +86 -86
  3. src/display/about.py +1 -1
  4. src/envs.py +1 -1
README.md CHANGED
@@ -8,8 +8,6 @@ sdk_version: 4.4.0
8
  app_file: app.py
9
  pinned: true
10
  license: apache-2.0
11
- tags:
12
- - leaderboard
13
  ---
14
 
15
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
8
  app_file: app.py
9
  pinned: true
10
  license: apache-2.0
 
 
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -239,92 +239,92 @@ with demo:
239
  with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=3):
240
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
241
 
242
- # with gr.TabItem("πŸš€ Submit here! ", elem_id="llm-benchmark-tab-table", id=4):
243
- # with gr.Column():
244
- # with gr.Row():
245
- # gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
246
-
247
- # with gr.Column():
248
- # with gr.Accordion(
249
- # f"βœ… Finished Evaluations ({len(finished_eval_queue_df)})",
250
- # open=False,
251
- # ):
252
- # with gr.Row():
253
- # finished_eval_table = gr.components.Dataframe(
254
- # value=finished_eval_queue_df,
255
- # headers=EVAL_COLS,
256
- # datatype=EVAL_TYPES,
257
- # row_count=5,
258
- # )
259
- # with gr.Accordion(
260
- # f"πŸ”„ Running Evaluation Queue ({len(running_eval_queue_df)})",
261
- # open=False,
262
- # ):
263
- # with gr.Row():
264
- # running_eval_table = gr.components.Dataframe(
265
- # value=running_eval_queue_df,
266
- # headers=EVAL_COLS,
267
- # datatype=EVAL_TYPES,
268
- # row_count=5,
269
- # )
270
-
271
- # with gr.Accordion(
272
- # f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
273
- # open=False,
274
- # ):
275
- # with gr.Row():
276
- # pending_eval_table = gr.components.Dataframe(
277
- # value=pending_eval_queue_df,
278
- # headers=EVAL_COLS,
279
- # datatype=EVAL_TYPES,
280
- # row_count=5,
281
- # )
282
- # with gr.Row():
283
- # gr.Markdown("# βœ‰οΈβœ¨ Submit your model here!", elem_classes="markdown-text")
284
-
285
- # with gr.Row():
286
- # with gr.Column():
287
- # model_name_textbox = gr.Textbox(label="Model name")
288
- # revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
289
- # model_type = gr.Dropdown(
290
- # choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
291
- # label="Model type",
292
- # multiselect=False,
293
- # value=None,
294
- # interactive=True,
295
- # )
296
-
297
- # with gr.Column():
298
- # precision = gr.Dropdown(
299
- # choices=[i.value.name for i in Precision if i != Precision.Unknown],
300
- # label="Precision",
301
- # multiselect=False,
302
- # value="float16",
303
- # interactive=True,
304
- # )
305
- # weight_type = gr.Dropdown(
306
- # choices=[i.value.name for i in WeightType],
307
- # label="Weights type",
308
- # multiselect=False,
309
- # value="Original",
310
- # interactive=True,
311
- # )
312
- # base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
313
-
314
- # submit_button = gr.Button("Submit Eval")
315
- # submission_result = gr.Markdown()
316
- # submit_button.click(
317
- # add_new_eval,
318
- # [
319
- # model_name_textbox,
320
- # base_model_name_textbox,
321
- # revision_name_textbox,
322
- # precision,
323
- # weight_type,
324
- # model_type,
325
- # ],
326
- # submission_result,
327
- # )
328
 
329
  with gr.Row():
330
  with gr.Accordion("πŸ“™ Comment", open=False):
 
239
  with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=3):
240
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
241
 
242
+ with gr.TabItem("πŸš€ Submit here! ", elem_id="llm-benchmark-tab-table", id=4):
243
+ with gr.Column():
244
+ with gr.Row():
245
+ gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
246
+
247
+ with gr.Column():
248
+ with gr.Accordion(
249
+ f"βœ… Finished Evaluations ({len(finished_eval_queue_df)})",
250
+ open=False,
251
+ ):
252
+ with gr.Row():
253
+ finished_eval_table = gr.components.Dataframe(
254
+ value=finished_eval_queue_df,
255
+ headers=EVAL_COLS,
256
+ datatype=EVAL_TYPES,
257
+ row_count=5,
258
+ )
259
+ with gr.Accordion(
260
+ f"πŸ”„ Running Evaluation Queue ({len(running_eval_queue_df)})",
261
+ open=False,
262
+ ):
263
+ with gr.Row():
264
+ running_eval_table = gr.components.Dataframe(
265
+ value=running_eval_queue_df,
266
+ headers=EVAL_COLS,
267
+ datatype=EVAL_TYPES,
268
+ row_count=5,
269
+ )
270
+
271
+ with gr.Accordion(
272
+ f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
273
+ open=False,
274
+ ):
275
+ with gr.Row():
276
+ pending_eval_table = gr.components.Dataframe(
277
+ value=pending_eval_queue_df,
278
+ headers=EVAL_COLS,
279
+ datatype=EVAL_TYPES,
280
+ row_count=5,
281
+ )
282
+ with gr.Row():
283
+ gr.Markdown("# βœ‰οΈβœ¨ Submit your model here!", elem_classes="markdown-text")
284
+
285
+ with gr.Row():
286
+ with gr.Column():
287
+ model_name_textbox = gr.Textbox(label="Model name")
288
+ revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
289
+ model_type = gr.Dropdown(
290
+ choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
291
+ label="Model type",
292
+ multiselect=False,
293
+ value=None,
294
+ interactive=True,
295
+ )
296
+
297
+ with gr.Column():
298
+ precision = gr.Dropdown(
299
+ choices=[i.value.name for i in Precision if i != Precision.Unknown],
300
+ label="Precision",
301
+ multiselect=False,
302
+ value="float16",
303
+ interactive=True,
304
+ )
305
+ weight_type = gr.Dropdown(
306
+ choices=[i.value.name for i in WeightType],
307
+ label="Weights type",
308
+ multiselect=False,
309
+ value="Original",
310
+ interactive=True,
311
+ )
312
+ base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
313
+
314
+ submit_button = gr.Button("Submit Eval")
315
+ submission_result = gr.Markdown()
316
+ submit_button.click(
317
+ add_new_eval,
318
+ [
319
+ model_name_textbox,
320
+ base_model_name_textbox,
321
+ revision_name_textbox,
322
+ precision,
323
+ weight_type,
324
+ model_type,
325
+ ],
326
+ submission_result,
327
+ )
328
 
329
  with gr.Row():
330
  with gr.Accordion("πŸ“™ Comment", open=False):
src/display/about.py CHANGED
@@ -29,7 +29,7 @@ TITLE = """<h1 align="center" id="space-title">NPHardEval leaderboard</h1>"""
29
  INTRODUCTION_TEXT = """
30
  <div align="center">
31
  <img
32
- src="https://raw.githubusercontent.com/casmlab/NPHardEval/main/figure/NPHardEval_text_right.png"
33
  style="width: 80%;"
34
  alt="Selected problems and the Euler diagram of computational complexity classes"
35
  >
 
29
  INTRODUCTION_TEXT = """
30
  <div align="center">
31
  <img
32
+ src="https://raw.githubusercontent.com/casmlab/NPHardEval/main/NPHardEval_text_right.jpg"
33
  style="width: 80%;"
34
  alt="Selected problems and the Euler diagram of computational complexity classes"
35
  >
src/envs.py CHANGED
@@ -5,7 +5,7 @@ from huggingface_hub import HfApi
5
  # clone / pull the lmeh eval data
6
  TOKEN = os.environ.get("TOKEN", None)
7
 
8
- OWNER = "NPHardEval"
9
  REPO_ID = f"{OWNER}/NPHardEval-leaderboard"
10
  QUEUE_REPO = f"{OWNER}/NPHardEval-requests"
11
  RESULTS_REPO = f"{OWNER}/NPHardEval-results"
 
5
  # clone / pull the lmeh eval data
6
  TOKEN = os.environ.get("TOKEN", None)
7
 
8
+ OWNER = "hyfrankl"
9
  REPO_ID = f"{OWNER}/NPHardEval-leaderboard"
10
  QUEUE_REPO = f"{OWNER}/NPHardEval-requests"
11
  RESULTS_REPO = f"{OWNER}/NPHardEval-results"