Omartificial-Intelligence-Space
commited on
Commit
β’
14258b4
1
Parent(s):
233c78c
update app.py
Browse files
app.py
CHANGED
@@ -18,9 +18,9 @@ from src.display.utils import (
|
|
18 |
COLS,
|
19 |
EVAL_COLS,
|
20 |
EVAL_TYPES,
|
|
|
21 |
AutoEvalColumn,
|
22 |
ModelType,
|
23 |
-
fields,
|
24 |
WeightType,
|
25 |
Precision
|
26 |
)
|
@@ -28,11 +28,10 @@ from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REP
|
|
28 |
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
29 |
from src.submission.submit import add_new_eval
|
30 |
|
31 |
-
|
32 |
def restart_space():
|
33 |
API.restart_space(repo_id=REPO_ID)
|
34 |
|
35 |
-
### Space
|
36 |
try:
|
37 |
print(EVAL_REQUESTS_PATH)
|
38 |
snapshot_download(
|
@@ -62,30 +61,30 @@ def init_leaderboard(dataframe):
|
|
62 |
# Instead of raising an error, display an empty leaderboard with a message
|
63 |
print("Leaderboard DataFrame is empty. No models have been evaluated yet.")
|
64 |
# Create an empty DataFrame with the necessary columns
|
65 |
-
dataframe = pd.DataFrame(columns=[c.name for c in
|
66 |
# Optionally, you can add a message to the interface to inform users
|
67 |
return Leaderboard(
|
68 |
value=dataframe,
|
69 |
-
datatype=[c.type for c in
|
70 |
select_columns=SelectColumns(
|
71 |
-
default_selection=[c.name for c in
|
72 |
-
cant_deselect=[c.name for c in
|
73 |
label="Select Columns to Display:",
|
74 |
),
|
75 |
-
search_columns=[
|
76 |
-
hide_columns=[c.name for c in
|
77 |
filter_columns=[
|
78 |
-
ColumnFilter(
|
79 |
-
ColumnFilter(
|
80 |
ColumnFilter(
|
81 |
-
|
82 |
type="slider",
|
83 |
min=0.01,
|
84 |
max=150,
|
85 |
label="Select the number of parameters (B)",
|
86 |
),
|
87 |
ColumnFilter(
|
88 |
-
|
89 |
),
|
90 |
],
|
91 |
bool_checkboxgroup_label="Hide models",
|
@@ -100,7 +99,11 @@ with demo:
|
|
100 |
|
101 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
102 |
with gr.TabItem("π
LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|
103 |
-
|
|
|
|
|
|
|
|
|
104 |
|
105 |
with gr.TabItem("π About", elem_id="llm-benchmark-tab-table", id=2):
|
106 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
@@ -110,8 +113,7 @@ with demo:
|
|
110 |
with gr.Row():
|
111 |
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
|
112 |
|
113 |
-
# Since the evaluation queues are empty,
|
114 |
-
# Alternatively, display a message
|
115 |
with gr.Column():
|
116 |
gr.Markdown("Evaluations are performed immediately upon submission. There are no pending or running evaluations.")
|
117 |
|
@@ -163,7 +165,7 @@ with demo:
|
|
163 |
precision,
|
164 |
weight_type,
|
165 |
model_type,
|
166 |
-
num_examples_input #
|
167 |
],
|
168 |
submission_result,
|
169 |
)
|
@@ -181,4 +183,4 @@ with demo:
|
|
181 |
scheduler = BackgroundScheduler()
|
182 |
scheduler.add_job(restart_space, "interval", seconds=1800)
|
183 |
scheduler.start()
|
184 |
-
demo.queue(default_concurrency_limit=40).launch()
|
|
|
18 |
COLS,
|
19 |
EVAL_COLS,
|
20 |
EVAL_TYPES,
|
21 |
+
COLUMNS, # Added this line
|
22 |
AutoEvalColumn,
|
23 |
ModelType,
|
|
|
24 |
WeightType,
|
25 |
Precision
|
26 |
)
|
|
|
28 |
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
29 |
from src.submission.submit import add_new_eval
|
30 |
|
|
|
31 |
def restart_space():
|
32 |
API.restart_space(repo_id=REPO_ID)
|
33 |
|
34 |
+
### Space initialization
|
35 |
try:
|
36 |
print(EVAL_REQUESTS_PATH)
|
37 |
snapshot_download(
|
|
|
61 |
# Instead of raising an error, display an empty leaderboard with a message
|
62 |
print("Leaderboard DataFrame is empty. No models have been evaluated yet.")
|
63 |
# Create an empty DataFrame with the necessary columns
|
64 |
+
dataframe = pd.DataFrame(columns=[c.name for c in COLUMNS])
|
65 |
# Optionally, you can add a message to the interface to inform users
|
66 |
return Leaderboard(
|
67 |
value=dataframe,
|
68 |
+
datatype=[c.type for c in COLUMNS],
|
69 |
select_columns=SelectColumns(
|
70 |
+
default_selection=[c.name for c in COLUMNS if c.displayed_by_default],
|
71 |
+
cant_deselect=[c.name for c in COLUMNS if c.never_hidden],
|
72 |
label="Select Columns to Display:",
|
73 |
),
|
74 |
+
search_columns=[c.name for c in COLUMNS if c.name in ["model", "license"]],
|
75 |
+
hide_columns=[c.name for c in COLUMNS if c.hidden],
|
76 |
filter_columns=[
|
77 |
+
ColumnFilter("model_type", type="checkboxgroup", label="Model types"),
|
78 |
+
ColumnFilter("precision", type="checkboxgroup", label="Precision"),
|
79 |
ColumnFilter(
|
80 |
+
"params",
|
81 |
type="slider",
|
82 |
min=0.01,
|
83 |
max=150,
|
84 |
label="Select the number of parameters (B)",
|
85 |
),
|
86 |
ColumnFilter(
|
87 |
+
"still_on_hub", type="boolean", label="Deleted/incomplete", default=True
|
88 |
),
|
89 |
],
|
90 |
bool_checkboxgroup_label="Hide models",
|
|
|
99 |
|
100 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
101 |
with gr.TabItem("π
LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|
102 |
+
if LEADERBOARD_DF.empty:
|
103 |
+
gr.Markdown("No evaluations have been performed yet. The leaderboard is currently empty.")
|
104 |
+
else:
|
105 |
+
leaderboard = init_leaderboard(LEADERBOARD_DF)
|
106 |
+
leaderboard.render() # Ensure the leaderboard is rendered
|
107 |
|
108 |
with gr.TabItem("π About", elem_id="llm-benchmark-tab-table", id=2):
|
109 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
|
|
113 |
with gr.Row():
|
114 |
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
|
115 |
|
116 |
+
# Since the evaluation queues are empty, display a message
|
|
|
117 |
with gr.Column():
|
118 |
gr.Markdown("Evaluations are performed immediately upon submission. There are no pending or running evaluations.")
|
119 |
|
|
|
165 |
precision,
|
166 |
weight_type,
|
167 |
model_type,
|
168 |
+
num_examples_input # Included this line
|
169 |
],
|
170 |
submission_result,
|
171 |
)
|
|
|
183 |
scheduler = BackgroundScheduler()
|
184 |
scheduler.add_job(restart_space, "interval", seconds=1800)
|
185 |
scheduler.start()
|
186 |
+
demo.queue(default_concurrency_limit=40).launch()
|