Spaces:
Runtime error
Runtime error
updates
Browse files- app.py +37 -10
- src/assets/css_html_js.py +2 -2
- src/display_models/get_model_metadata.py +5 -8
- src/display_models/read_results.py +1 -1
- src/load_from_hub.py +3 -2
- src/rate_limiting.py +1 -4
app.py
CHANGED
@@ -51,9 +51,8 @@ api = HfApi(token=H4_TOKEN)
|
|
51 |
|
52 |
|
53 |
def restart_space():
|
54 |
-
api.restart_space(
|
55 |
-
|
56 |
-
)
|
57 |
|
58 |
# Rate limit variables
|
59 |
RATE_LIMIT_PERIOD = 7
|
@@ -98,7 +97,7 @@ else:
|
|
98 |
eval_queue_private, eval_results_private = None, None
|
99 |
|
100 |
original_df = get_leaderboard_df(eval_results, eval_results_private, COLS, BENCHMARK_COLS)
|
101 |
-
models = original_df["model_name_for_query"].tolist()
|
102 |
|
103 |
to_be_dumped = f"models = {repr(models)}\n"
|
104 |
|
@@ -130,7 +129,9 @@ def add_new_eval(
|
|
130 |
error_msg = f"Organisation or user `{model.split('/')[0]}`"
|
131 |
error_msg += f"already has {num_models_submitted_in_period} model requests submitted to the leaderboard "
|
132 |
error_msg += f"in the last {RATE_LIMIT_PERIOD} days.\n"
|
133 |
-
error_msg +=
|
|
|
|
|
134 |
return styled_error(error_msg)
|
135 |
|
136 |
if model_type is None or model_type == "":
|
@@ -213,9 +214,29 @@ def change_tab(query_param: str):
|
|
213 |
|
214 |
|
215 |
# Searching and filtering
|
216 |
-
def update_table(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
217 |
filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, show_deleted)
|
218 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
219 |
filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, show_deleted)
|
220 |
if query != "":
|
221 |
filtered_df = search_table(filtered_df, query)
|
@@ -223,9 +244,11 @@ def update_table(hidden_df: pd.DataFrame, current_columns_df: pd.DataFrame, colu
|
|
223 |
|
224 |
return df
|
225 |
|
|
|
226 |
def search_table(df: pd.DataFrame, query: str) -> pd.DataFrame:
|
227 |
return df[(df[AutoEvalColumn.dummy.name].str.contains(query, case=False))]
|
228 |
|
|
|
229 |
def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
|
230 |
always_here_cols = [
|
231 |
AutoEvalColumn.model_type_symbol.name,
|
@@ -237,8 +260,9 @@ def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
|
|
237 |
]
|
238 |
return filtered_df
|
239 |
|
|
|
240 |
NUMERIC_INTERVALS = {
|
241 |
-
"Unknown": pd.Interval(-1, 0, closed="right"),
|
242 |
"< 1.5B": pd.Interval(0, 1.5, closed="right"),
|
243 |
"~3B": pd.Interval(1.5, 5, closed="right"),
|
244 |
"~7B": pd.Interval(6, 11, closed="right"),
|
@@ -247,9 +271,9 @@ NUMERIC_INTERVALS = {
|
|
247 |
"60B+": pd.Interval(55, 10000, closed="right"),
|
248 |
}
|
249 |
|
|
|
250 |
def filter_models(
|
251 |
df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, show_deleted: bool
|
252 |
-
df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, show_deleted: bool
|
253 |
) -> pd.DataFrame:
|
254 |
# Show all models
|
255 |
if show_deleted:
|
@@ -545,7 +569,10 @@ with demo:
|
|
545 |
max_rows=5,
|
546 |
)
|
547 |
with gr.Row():
|
548 |
-
gr.Markdown(
|
|
|
|
|
|
|
549 |
|
550 |
with gr.Row():
|
551 |
with gr.Accordion("📙 Citation", open=False):
|
|
|
51 |
|
52 |
|
53 |
def restart_space():
|
54 |
+
api.restart_space(repo_id="gsaivinay/open_llm_leaderboard", token=H4_TOKEN)
|
55 |
+
|
|
|
56 |
|
57 |
# Rate limit variables
|
58 |
RATE_LIMIT_PERIOD = 7
|
|
|
97 |
eval_queue_private, eval_results_private = None, None
|
98 |
|
99 |
original_df = get_leaderboard_df(eval_results, eval_results_private, COLS, BENCHMARK_COLS)
|
100 |
+
models = original_df["model_name_for_query"].tolist() # needed for model backlinks in their to the leaderboard
|
101 |
|
102 |
to_be_dumped = f"models = {repr(models)}\n"
|
103 |
|
|
|
129 |
error_msg = f"Organisation or user `{model.split('/')[0]}`"
|
130 |
error_msg += f"already has {num_models_submitted_in_period} model requests submitted to the leaderboard "
|
131 |
error_msg += f"in the last {RATE_LIMIT_PERIOD} days.\n"
|
132 |
+
error_msg += (
|
133 |
+
"Please wait a couple of days before resubmitting, so that everybody can enjoy using the leaderboard 🤗"
|
134 |
+
)
|
135 |
return styled_error(error_msg)
|
136 |
|
137 |
if model_type is None or model_type == "":
|
|
|
214 |
|
215 |
|
216 |
# Searching and filtering
|
217 |
+
def update_table(
|
218 |
+
hidden_df: pd.DataFrame,
|
219 |
+
current_columns_df: pd.DataFrame,
|
220 |
+
columns: list,
|
221 |
+
type_query: list,
|
222 |
+
precision_query: str,
|
223 |
+
size_query: list,
|
224 |
+
show_deleted: bool,
|
225 |
+
query: str,
|
226 |
+
):
|
227 |
filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, show_deleted)
|
228 |
+
|
229 |
+
|
230 |
+
def update_table(
|
231 |
+
hidden_df: pd.DataFrame,
|
232 |
+
current_columns_df: pd.DataFrame,
|
233 |
+
columns: list,
|
234 |
+
type_query: list,
|
235 |
+
precision_query: str,
|
236 |
+
size_query: list,
|
237 |
+
show_deleted: bool,
|
238 |
+
query: str,
|
239 |
+
):
|
240 |
filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, show_deleted)
|
241 |
if query != "":
|
242 |
filtered_df = search_table(filtered_df, query)
|
|
|
244 |
|
245 |
return df
|
246 |
|
247 |
+
|
248 |
def search_table(df: pd.DataFrame, query: str) -> pd.DataFrame:
|
249 |
return df[(df[AutoEvalColumn.dummy.name].str.contains(query, case=False))]
|
250 |
|
251 |
+
|
252 |
def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
|
253 |
always_here_cols = [
|
254 |
AutoEvalColumn.model_type_symbol.name,
|
|
|
260 |
]
|
261 |
return filtered_df
|
262 |
|
263 |
+
|
264 |
NUMERIC_INTERVALS = {
|
265 |
+
"Unknown": pd.Interval(-1, 0, closed="right"),
|
266 |
"< 1.5B": pd.Interval(0, 1.5, closed="right"),
|
267 |
"~3B": pd.Interval(1.5, 5, closed="right"),
|
268 |
"~7B": pd.Interval(6, 11, closed="right"),
|
|
|
271 |
"60B+": pd.Interval(55, 10000, closed="right"),
|
272 |
}
|
273 |
|
274 |
+
|
275 |
def filter_models(
|
276 |
df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, show_deleted: bool
|
|
|
277 |
) -> pd.DataFrame:
|
278 |
# Show all models
|
279 |
if show_deleted:
|
|
|
569 |
max_rows=5,
|
570 |
)
|
571 |
with gr.Row():
|
572 |
+
gr.Markdown(
|
573 |
+
"# ✉️✨ Submit your model [here!](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)",
|
574 |
+
elem_classes="markdown-text",
|
575 |
+
)
|
576 |
|
577 |
with gr.Row():
|
578 |
with gr.Accordion("📙 Citation", open=False):
|
src/assets/css_html_js.py
CHANGED
@@ -33,7 +33,7 @@ custom_css = """
|
|
33 |
background: none;
|
34 |
border: none;
|
35 |
}
|
36 |
-
|
37 |
#search-bar {
|
38 |
padding: 0px;
|
39 |
}
|
@@ -83,7 +83,7 @@ table th:first-child {
|
|
83 |
#filter_type label > .wrap{
|
84 |
width: 103px;
|
85 |
}
|
86 |
-
#filter_type label > .wrap .wrap-inner{
|
87 |
padding: 2px;
|
88 |
}
|
89 |
#filter_type label > .wrap .wrap-inner input{
|
|
|
33 |
background: none;
|
34 |
border: none;
|
35 |
}
|
36 |
+
|
37 |
#search-bar {
|
38 |
padding: 0px;
|
39 |
}
|
|
|
83 |
#filter_type label > .wrap{
|
84 |
width: 103px;
|
85 |
}
|
86 |
+
#filter_type label > .wrap .wrap-inner{
|
87 |
padding: 2px;
|
88 |
}
|
89 |
#filter_type label > .wrap .wrap-inner input{
|
src/display_models/get_model_metadata.py
CHANGED
@@ -1,17 +1,15 @@
|
|
1 |
import glob
|
2 |
import json
|
3 |
import os
|
4 |
-
import re
|
5 |
import pickle
|
|
|
6 |
from typing import List
|
7 |
|
8 |
import huggingface_hub
|
|
|
9 |
from huggingface_hub import HfApi
|
10 |
from tqdm import tqdm
|
11 |
-
from transformers import
|
12 |
-
from accelerate import init_empty_weights
|
13 |
-
from transformers import AutoModel, AutoConfig
|
14 |
-
from accelerate import init_empty_weights
|
15 |
|
16 |
from src.display_models.model_metadata_flags import DO_NOT_SUBMIT_MODELS, FLAGGED_MODELS
|
17 |
from src.display_models.model_metadata_type import MODEL_TYPE_METADATA, ModelType, model_type_from_str
|
@@ -25,7 +23,6 @@ def get_model_infos_from_hub(leaderboard_data: List[dict]):
|
|
25 |
try:
|
26 |
with open("model_info_cache.pkl", "rb") as f:
|
27 |
model_info_cache = pickle.load(f)
|
28 |
-
except (EOFError, FileNotFoundError):
|
29 |
except (EOFError, FileNotFoundError):
|
30 |
model_info_cache = {}
|
31 |
try:
|
@@ -67,7 +64,7 @@ def get_model_infos_from_hub(leaderboard_data: List[dict]):
|
|
67 |
if model_name not in model_size_cache:
|
68 |
model_size_cache[model_name] = get_model_size(model_name, model_info)
|
69 |
model_data[AutoEvalColumn.params.name] = model_size_cache[model_name]
|
70 |
-
|
71 |
# save cache to disk in pickle format
|
72 |
with open("model_info_cache.pkl", "wb") as f:
|
73 |
pickle.dump(model_info_cache, f)
|
@@ -101,7 +98,7 @@ def get_model_size(model_name, model_info):
|
|
101 |
with init_empty_weights():
|
102 |
model = AutoModel.from_config(config, trust_remote_code=False)
|
103 |
return round(sum(p.numel() for p in model.parameters() if p.requires_grad) / 1e9, 3)
|
104 |
-
except (EnvironmentError, ValueError, KeyError):
|
105 |
try:
|
106 |
size_match = re.search(size_pattern, model_name.lower())
|
107 |
size = size_match.group(0)
|
|
|
1 |
import glob
|
2 |
import json
|
3 |
import os
|
|
|
4 |
import pickle
|
5 |
+
import re
|
6 |
from typing import List
|
7 |
|
8 |
import huggingface_hub
|
9 |
+
from accelerate import init_empty_weights
|
10 |
from huggingface_hub import HfApi
|
11 |
from tqdm import tqdm
|
12 |
+
from transformers import AutoConfig, AutoModel
|
|
|
|
|
|
|
13 |
|
14 |
from src.display_models.model_metadata_flags import DO_NOT_SUBMIT_MODELS, FLAGGED_MODELS
|
15 |
from src.display_models.model_metadata_type import MODEL_TYPE_METADATA, ModelType, model_type_from_str
|
|
|
23 |
try:
|
24 |
with open("model_info_cache.pkl", "rb") as f:
|
25 |
model_info_cache = pickle.load(f)
|
|
|
26 |
except (EOFError, FileNotFoundError):
|
27 |
model_info_cache = {}
|
28 |
try:
|
|
|
64 |
if model_name not in model_size_cache:
|
65 |
model_size_cache[model_name] = get_model_size(model_name, model_info)
|
66 |
model_data[AutoEvalColumn.params.name] = model_size_cache[model_name]
|
67 |
+
|
68 |
# save cache to disk in pickle format
|
69 |
with open("model_info_cache.pkl", "wb") as f:
|
70 |
pickle.dump(model_info_cache, f)
|
|
|
98 |
with init_empty_weights():
|
99 |
model = AutoModel.from_config(config, trust_remote_code=False)
|
100 |
return round(sum(p.numel() for p in model.parameters() if p.requires_grad) / 1e9, 3)
|
101 |
+
except (EnvironmentError, ValueError, KeyError): # model config not found, likely private
|
102 |
try:
|
103 |
size_match = re.search(size_pattern, model_name.lower())
|
104 |
size = size_match.group(0)
|
src/display_models/read_results.py
CHANGED
@@ -107,7 +107,7 @@ def parse_eval_result(json_filepath: str) -> Tuple[str, list[dict]]:
|
|
107 |
revision=model_sha,
|
108 |
results={benchmark: mean_acc},
|
109 |
precision=precision, # todo model_type=, weight_type=
|
110 |
-
date=config.get("submission_date")
|
111 |
)
|
112 |
)
|
113 |
|
|
|
107 |
revision=model_sha,
|
108 |
results={benchmark: mean_acc},
|
109 |
precision=precision, # todo model_type=, weight_type=
|
110 |
+
date=config.get("submission_date"),
|
111 |
)
|
112 |
)
|
113 |
|
src/load_from_hub.py
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
import json
|
2 |
import os
|
|
|
3 |
|
4 |
import pandas as pd
|
5 |
from huggingface_hub import Repository
|
6 |
from transformers import AutoConfig
|
7 |
-
from collections import defaultdict
|
8 |
|
9 |
from src.assets.hardcoded_evals import baseline, gpt4_values, gpt35_values
|
10 |
from src.display_models.get_model_metadata import apply_metadata
|
@@ -23,7 +23,8 @@ def get_all_requested_models(requested_models_dir: str) -> set[str]:
|
|
23 |
current_depth = root.count(os.sep) - requested_models_dir.count(os.sep)
|
24 |
if current_depth == depth:
|
25 |
for file in files:
|
26 |
-
if not file.endswith(".json"):
|
|
|
27 |
with open(os.path.join(root, file), "r") as f:
|
28 |
info = json.load(f)
|
29 |
file_names.append(f"{info['model']}_{info['revision']}_{info['precision']}")
|
|
|
1 |
import json
|
2 |
import os
|
3 |
+
from collections import defaultdict
|
4 |
|
5 |
import pandas as pd
|
6 |
from huggingface_hub import Repository
|
7 |
from transformers import AutoConfig
|
|
|
8 |
|
9 |
from src.assets.hardcoded_evals import baseline, gpt4_values, gpt35_values
|
10 |
from src.display_models.get_model_metadata import apply_metadata
|
|
|
23 |
current_depth = root.count(os.sep) - requested_models_dir.count(os.sep)
|
24 |
if current_depth == depth:
|
25 |
for file in files:
|
26 |
+
if not file.endswith(".json"):
|
27 |
+
continue
|
28 |
with open(os.path.join(root, file), "r") as f:
|
29 |
info = json.load(f)
|
30 |
file_names.append(f"{info['model']}_{info['revision']}_{info['precision']}")
|
src/rate_limiting.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
-
|
2 |
-
from datetime import datetime, timezone, timedelta
|
3 |
|
4 |
|
5 |
def user_submission_permission(submission_name, users_to_submission_dates, rate_limit_period):
|
@@ -12,5 +11,3 @@ def user_submission_permission(submission_name, users_to_submission_dates, rate_
|
|
12 |
submissions_after_timelimit = [d for d in submission_dates if d > time_limit]
|
13 |
|
14 |
return len(submissions_after_timelimit)
|
15 |
-
|
16 |
-
|
|
|
1 |
+
from datetime import datetime, timedelta, timezone
|
|
|
2 |
|
3 |
|
4 |
def user_submission_permission(submission_name, users_to_submission_dates, rate_limit_period):
|
|
|
11 |
submissions_after_timelimit = [d for d in submission_dates if d > time_limit]
|
12 |
|
13 |
return len(submissions_after_timelimit)
|
|
|
|