AppleSwing commited on
Commit
b2a2a5b
2 Parent(s): f5ff85d 08b56fc

Merge branch 'pr/15' into pr/18

Browse files
app.py CHANGED
@@ -2,10 +2,11 @@
2
  import os
3
  import datetime
4
  import socket
 
5
 
6
  import gradio as gr
7
  import pandas as pd
8
-
9
  from apscheduler.schedulers.background import BackgroundScheduler
10
 
11
  from huggingface_hub import snapshot_download
@@ -37,11 +38,24 @@ from src.display.utils import (
37
  Precision,
38
  )
39
 
40
- from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, H4_TOKEN, IS_PUBLIC, QUEUE_REPO, REPO_ID, RESULTS_REPO
 
41
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
42
  from src.submission.submit import add_new_eval
43
  from src.utils import get_dataset_summary_table
44
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  def ui_snapshot_download(repo_id, local_dir, repo_type, tqdm_class, etag_timeout):
47
  try:
@@ -75,11 +89,6 @@ def init_space():
75
  )
76
  return dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
77
 
78
-
79
- dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = init_space()
80
- leaderboard_df = original_df.copy()
81
-
82
-
83
  # Searching and filtering
84
  def update_table(
85
  hidden_df: pd.DataFrame, columns: list, type_query: list, precision_query: list, size_query: list, query: str
@@ -142,6 +151,51 @@ def filter_models(df: pd.DataFrame, type_query: list, size_query: list, precisio
142
 
143
  return filtered_df
144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
  # triggered only once at startup => read query parameter if it exists
147
  def load_query(request: gr.Request):
@@ -385,8 +439,7 @@ with demo:
385
 
386
  scheduler = BackgroundScheduler()
387
 
388
- scheduler.add_job(restart_space, "interval", seconds=6 * 60 * 60)
389
-
390
 
391
  def launch_backend():
392
  import subprocess
@@ -395,8 +448,9 @@ def launch_backend():
395
  if DEVICE not in {"cpu"}:
396
  _ = subprocess.run(["python", "backend-cli.py"])
397
 
398
-
399
  # scheduler.add_job(launch_backend, "interval", seconds=120)
400
-
401
- scheduler.start()
402
- demo.queue(default_concurrency_limit=40).launch()
 
 
2
  import os
3
  import datetime
4
  import socket
5
+ from threading import Thread
6
 
7
  import gradio as gr
8
  import pandas as pd
9
+ import time
10
  from apscheduler.schedulers.background import BackgroundScheduler
11
 
12
  from huggingface_hub import snapshot_download
 
38
  Precision,
39
  )
40
 
41
+ from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, H4_TOKEN, IS_PUBLIC, \
42
+ QUEUE_REPO, REPO_ID, RESULTS_REPO, DEBUG_QUEUE_REPO, DEBUG_RESULTS_REPO
43
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
44
  from src.submission.submit import add_new_eval
45
  from src.utils import get_dataset_summary_table
46
 
47
+ def get_args():
48
+ import argparse
49
+
50
+ parser = argparse.ArgumentParser(description="Run the LLM Leaderboard")
51
+ parser.add_argument("--debug", action="store_true", help="Run in debug mode")
52
+ return parser.parse_args()
53
+
54
+ args = get_args()
55
+ if args.debug:
56
+ print("Running in debug mode")
57
+ QUEUE_REPO = DEBUG_QUEUE_REPO
58
+ RESULTS_REPO = DEBUG_RESULTS_REPO
59
 
60
  def ui_snapshot_download(repo_id, local_dir, repo_type, tqdm_class, etag_timeout):
61
  try:
 
89
  )
90
  return dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
91
 
 
 
 
 
 
92
  # Searching and filtering
93
  def update_table(
94
  hidden_df: pd.DataFrame, columns: list, type_query: list, precision_query: list, size_query: list, query: str
 
151
 
152
  return filtered_df
153
 
154
+ shown_columns = None
155
+ dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = init_space()
156
+ leaderboard_df = original_df.copy()
157
+
158
+ def update_leaderboard_table():
159
+ global leaderboard_df, shown_columns
160
+ print("Updating leaderboard table")
161
+ return leaderboard_df[
162
+ [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
163
+ + shown_columns.value
164
+ + [AutoEvalColumn.dummy.name]
165
+ ] if not leaderboard_df.empty else leaderboard_df
166
+
167
+
168
+ def update_hidden_leaderboard_table():
169
+ global original_df
170
+ return original_df[COLS] if original_df.empty is False else original_df
171
+
172
+ def update_dataset_table():
173
+ global dataset_df
174
+ return dataset_df
175
+
176
+ def update_finish_table():
177
+ global finished_eval_queue_df
178
+ return finished_eval_queue_df
179
+
180
+ def update_running_table():
181
+ global running_eval_queue_df
182
+ return running_eval_queue_df
183
+
184
+ def update_pending_table():
185
+ global pending_eval_queue_df
186
+ return pending_eval_queue_df
187
+
188
+ def update_finish_num():
189
+ global finished_eval_queue_df
190
+ return len(finished_eval_queue_df)
191
+
192
+ def update_running_num():
193
+ global running_eval_queue_df
194
+ return len(running_eval_queue_df)
195
+
196
+ def update_pending_num():
197
+ global pending_eval_queue_df
198
+ return len(pending_eval_queue_df)
199
 
200
  # triggered only once at startup => read query parameter if it exists
201
  def load_query(request: gr.Request):
 
439
 
440
  scheduler = BackgroundScheduler()
441
 
442
+ scheduler.add_job(restart_space, "interval", hours=6)
 
443
 
444
  def launch_backend():
445
  import subprocess
 
448
  if DEVICE not in {"cpu"}:
449
  _ = subprocess.run(["python", "backend-cli.py"])
450
 
451
+ Thread(target=periodic_init, daemon=True).start()
452
  # scheduler.add_job(launch_backend, "interval", seconds=120)
453
+ if __name__ == "__main__":
454
+ scheduler.start()
455
+ block_launch()
456
+
src/backend/envs.py CHANGED
@@ -63,4 +63,4 @@ EVAL_REQUESTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-queue-bk")
63
  EVAL_REQUESTS_PATH_BACKEND_SYNC = os.path.join(CACHE_PATH, "eval-queue-bk-sync")
64
  EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk")
65
 
66
- DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 
63
  EVAL_REQUESTS_PATH_BACKEND_SYNC = os.path.join(CACHE_PATH, "eval-queue-bk-sync")
64
  EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk")
65
 
66
+ DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
src/display/utils.py CHANGED
@@ -140,6 +140,7 @@ class EvalQueueColumn: # Queue column
140
  private = ColumnContent("private", "bool", True)
141
  precision = ColumnContent("precision", "str", True)
142
  weight_type = ColumnContent("weight_type", "str", "Original")
 
143
  status = ColumnContent("status", "str", True)
144
 
145
 
 
140
  private = ColumnContent("private", "bool", True)
141
  precision = ColumnContent("precision", "str", True)
142
  weight_type = ColumnContent("weight_type", "str", "Original")
143
+ model_framework = ColumnContent("inference_framework", "str", True)
144
  status = ColumnContent("status", "str", True)
145
 
146
 
src/envs.py CHANGED
@@ -12,8 +12,8 @@ QUEUE_REPO = "sparse-generative-ai/requests"
12
  QUEUE_REPO_OPEN_LLM = "open-llm-leaderboard/requests"
13
  RESULTS_REPO = "sparse-generative-ai/results"
14
 
15
- PRIVATE_QUEUE_REPO = "sparse-generative-ai/private-requests"
16
- PRIVATE_RESULTS_REPO = "sparse-generative-ai/private-results"
17
 
18
  IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", True))
19
 
 
12
  QUEUE_REPO_OPEN_LLM = "open-llm-leaderboard/requests"
13
  RESULTS_REPO = "sparse-generative-ai/results"
14
 
15
+ DEBUG_QUEUE_REPO = "sparse-generative-ai/debug_requests"
16
+ DEBUG_RESULTS_REPO = "sparse-generative-ai/debug_results"
17
 
18
  IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", True))
19
 
src/populate.py CHANGED
@@ -95,6 +95,7 @@ def get_evaluation_queue_df(save_path: str, cols: list) -> tuple[pd.DataFrame, p
95
 
96
  data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
97
  data[EvalQueueColumn.revision.name] = data.get("revision", "main")
 
98
 
99
  all_evals.append(data)
100
  elif ".md" not in entry:
@@ -107,6 +108,7 @@ def get_evaluation_queue_df(save_path: str, cols: list) -> tuple[pd.DataFrame, p
107
 
108
  data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
109
  data[EvalQueueColumn.revision.name] = data.get("revision", "main")
 
110
  all_evals.append(data)
111
 
112
  pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
 
95
 
96
  data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
97
  data[EvalQueueColumn.revision.name] = data.get("revision", "main")
98
+ data[EvalQueueColumn.model_framework.name] = data.get("inference_framework", "-")
99
 
100
  all_evals.append(data)
101
  elif ".md" not in entry:
 
108
 
109
  data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
110
  data[EvalQueueColumn.revision.name] = data.get("revision", "main")
111
+ data[EvalQueueColumn.model_framework.name] = data.get("inference_framework", "-")
112
  all_evals.append(data)
113
 
114
  pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
src/submission/submit.py CHANGED
@@ -3,7 +3,7 @@ import os
3
  from datetime import datetime, timezone
4
 
5
  from src.display.formatting import styled_error, styled_message, styled_warning
6
- from src.envs import API, EVAL_REQUESTS_PATH, H4_TOKEN, QUEUE_REPO, RATE_LIMIT_PERIOD, RATE_LIMIT_QUOTA
7
  from src.leaderboard.filter_models import DO_NOT_SUBMIT_MODELS
8
  from src.submission.check_validity import (
9
  already_submitted_models,
@@ -26,12 +26,16 @@ def add_new_eval(
26
  weight_type: str,
27
  model_type: str,
28
  inference_framework: str,
 
29
  ):
30
  global REQUESTED_MODELS
31
  global USERS_TO_SUBMISSION_DATES
32
  if not REQUESTED_MODELS:
33
  REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)
34
 
 
 
 
35
  user_name = ""
36
  model_path = model
37
  if "/" in model:
 
3
  from datetime import datetime, timezone
4
 
5
  from src.display.formatting import styled_error, styled_message, styled_warning
6
+ from src.envs import API, EVAL_REQUESTS_PATH, H4_TOKEN, QUEUE_REPO, RATE_LIMIT_PERIOD, RATE_LIMIT_QUOTA, DEBUG_QUEUE_REPO
7
  from src.leaderboard.filter_models import DO_NOT_SUBMIT_MODELS
8
  from src.submission.check_validity import (
9
  already_submitted_models,
 
26
  weight_type: str,
27
  model_type: str,
28
  inference_framework: str,
29
+ debug: bool = False
30
  ):
31
  global REQUESTED_MODELS
32
  global USERS_TO_SUBMISSION_DATES
33
  if not REQUESTED_MODELS:
34
  REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)
35
 
36
+ if debug:
37
+ QUEUE_REPO = DEBUG_QUEUE_REPO
38
+
39
  user_name = ""
40
  model_path = model
41
  if "/" in model: