Yyy0530 commited on
Commit
43bf3ba
1 Parent(s): e583379
app.py CHANGED
@@ -1,204 +1,170 @@
1
  import gradio as gr
2
- from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
3
  import pandas as pd
4
- from apscheduler.schedulers.background import BackgroundScheduler
5
- from huggingface_hub import snapshot_download
6
-
7
- from src.about import (
8
- CITATION_BUTTON_LABEL,
9
- CITATION_BUTTON_TEXT,
10
- EVALUATION_QUEUE_TEXT,
11
- INTRODUCTION_TEXT,
12
- LLM_BENCHMARKS_TEXT,
13
- TITLE,
14
- )
15
- from src.display.css_html_js import custom_css
16
- from src.display.utils import (
17
- BENCHMARK_COLS,
18
- COLS,
19
- EVAL_COLS,
20
- EVAL_TYPES,
21
- AutoEvalColumn,
22
- ModelType,
23
- fields,
24
- WeightType,
25
- Precision
26
- )
27
- from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
28
- from src.populate import get_evaluation_queue_df, get_leaderboard_df
29
- from src.submission.submit import add_new_eval
30
-
31
-
32
- def restart_space():
33
- API.restart_space(repo_id=REPO_ID)
34
-
35
- ### Space initialisation
36
- try:
37
- print(EVAL_REQUESTS_PATH)
38
- snapshot_download(
39
- repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
40
- )
41
- except Exception:
42
- restart_space()
43
- try:
44
- print(EVAL_RESULTS_PATH)
45
- snapshot_download(
46
- repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
47
- )
48
- except Exception:
49
- restart_space()
50
-
51
-
52
- LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
53
-
54
- (
55
- finished_eval_queue_df,
56
- running_eval_queue_df,
57
- pending_eval_queue_df,
58
- ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
59
-
60
- def init_leaderboard(dataframe):
61
- if dataframe is None or dataframe.empty:
62
- raise ValueError("Leaderboard DataFrame is empty or None.")
63
- return Leaderboard(
64
- value=dataframe,
65
- datatype=[c.type for c in fields(AutoEvalColumn)],
66
- select_columns=SelectColumns(
67
- default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
68
- cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
69
- label="Select Columns to Display:",
70
- ),
71
- search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
72
- hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
73
- filter_columns=[
74
- ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
75
- ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
76
- ColumnFilter(
77
- AutoEvalColumn.params.name,
78
- type="slider",
79
- min=0.01,
80
- max=150,
81
- label="Select the number of parameters (B)",
82
- ),
83
- ColumnFilter(
84
- AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
85
- ),
86
- ],
87
- bool_checkboxgroup_label="Hide models",
88
- interactive=False,
89
- )
90
-
91
-
92
- demo = gr.Blocks(css=custom_css)
93
- with demo:
94
- gr.HTML(TITLE)
95
- gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
96
-
97
- with gr.Tabs(elem_classes="tab-buttons") as tabs:
98
- with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
99
- leaderboard = init_leaderboard(LEADERBOARD_DF)
100
-
101
- with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
102
- gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
103
-
104
- with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
105
- with gr.Column():
106
- with gr.Row():
107
- gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
108
-
109
- with gr.Column():
110
- with gr.Accordion(
111
- f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
112
- open=False,
113
- ):
114
- with gr.Row():
115
- finished_eval_table = gr.components.Dataframe(
116
- value=finished_eval_queue_df,
117
- headers=EVAL_COLS,
118
- datatype=EVAL_TYPES,
119
- row_count=5,
120
- )
121
- with gr.Accordion(
122
- f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
123
- open=False,
124
- ):
125
- with gr.Row():
126
- running_eval_table = gr.components.Dataframe(
127
- value=running_eval_queue_df,
128
- headers=EVAL_COLS,
129
- datatype=EVAL_TYPES,
130
- row_count=5,
131
- )
132
-
133
- with gr.Accordion(
134
- f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
135
- open=False,
136
- ):
137
- with gr.Row():
138
- pending_eval_table = gr.components.Dataframe(
139
- value=pending_eval_queue_df,
140
- headers=EVAL_COLS,
141
- datatype=EVAL_TYPES,
142
- row_count=5,
143
- )
144
- with gr.Row():
145
- gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
146
-
147
- with gr.Row():
148
- with gr.Column():
149
- model_name_textbox = gr.Textbox(label="Model name")
150
- revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
151
- model_type = gr.Dropdown(
152
- choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
153
- label="Model type",
154
- multiselect=False,
155
- value=None,
156
- interactive=True,
157
- )
158
-
159
- with gr.Column():
160
- precision = gr.Dropdown(
161
- choices=[i.value.name for i in Precision if i != Precision.Unknown],
162
- label="Precision",
163
- multiselect=False,
164
- value="float16",
165
- interactive=True,
166
- )
167
- weight_type = gr.Dropdown(
168
- choices=[i.value.name for i in WeightType],
169
- label="Weights type",
170
- multiselect=False,
171
- value="Original",
172
- interactive=True,
173
- )
174
- base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
175
-
176
- submit_button = gr.Button("Submit Eval")
177
- submission_result = gr.Markdown()
178
- submit_button.click(
179
- add_new_eval,
180
- [
181
- model_name_textbox,
182
- base_model_name_textbox,
183
- revision_name_textbox,
184
- precision,
185
- weight_type,
186
- model_type,
187
- ],
188
- submission_result,
189
- )
190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  with gr.Row():
192
- with gr.Accordion("📙 Citation", open=False):
193
- citation_button = gr.Textbox(
194
- value=CITATION_BUTTON_TEXT,
195
- label=CITATION_BUTTON_LABEL,
196
- lines=20,
197
- elem_id="citation-button",
198
- show_copy_button=True,
199
- )
200
-
201
- scheduler = BackgroundScheduler()
202
- scheduler.add_job(restart_space, "interval", seconds=1800)
203
- scheduler.start()
204
- demo.queue(default_concurrency_limit=40).launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
2
  import pandas as pd
3
+ from functools import reduce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
+ import pandas as pd
6
+ import gradio as gr
7
+ from collections import defaultdict
8
+ import os
9
+ from yaml import safe_load
10
+
11
+ from collections import defaultdict
12
+ import os
13
+
14
+ CONFIG = safe_load(open("config.yaml"))
15
+
16
+ data = defaultdict(dict)
17
+
18
+ # 读取数据
19
+ for settings in CONFIG['settings']:
20
+ for type in CONFIG['types']:
21
+ # 根据配置文件中的路径读取数据
22
+ data[settings][type] = pd.read_excel(CONFIG["settings_mapping"][settings] + f"-{type}.xlsx")
23
+
24
+ # 添加平均分列
25
+ for settings in CONFIG['settings']:
26
+ for type in CONFIG['types']:
27
+ data[settings][type]["Average"] = data[settings][type].iloc[:,1:].mean(axis=1)
28
+ # 添加rank列
29
+ for settings in CONFIG['settings']:
30
+ for type in CONFIG['types']:
31
+ data[settings][type]["Rank"] = data[settings][type]["Average"].rank(ascending=False, method='min').astype(int)
32
+ # 将rank列放在第一列
33
+ for settings in CONFIG['settings']:
34
+ for type in CONFIG['types']:
35
+ cols = data[settings][type].columns.tolist()
36
+ cols = cols[-1:] + cols[:-1]
37
+ data[settings][type] = data[settings][type][cols]
38
+
39
+ css = """
40
+ table > thead {
41
+ white-space: normal;
42
+ }
43
+
44
+ table {
45
+ --cell-width-1: 250px;
46
+ }
47
+
48
+ table > tbody > tr > td:nth-child(2) > div {
49
+ overflow-x: auto;
50
+ }
51
+
52
+ .filter-checkbox-group {
53
+ max-width: max-content;
54
+ }
55
+
56
+ /* 确保第二列(Model)完全展开 */
57
+ table > tbody > tr > td:nth-child(2) {
58
+ white-space: nowrap;
59
+ width: auto;
60
+ }
61
+
62
+
63
+ /* 紧凑显示其他列 */
64
+ table > tbody > tr > td:not(:nth-child(2)) {
65
+ white-space: normal;
66
+ width: auto;
67
+ }
68
+ """
69
+
70
+ """
71
+ Each inner tab can have the following keys:
72
+ - language: The language of the leaderboard
73
+ - language_long: [optional] The long form of the language
74
+ - description: The description of the leaderboard
75
+ - credits: [optional] The credits for the leaderboard
76
+ - desc: [optional] The description of the leaderboard
77
+ - data: The data for the leaderboard
78
+ """
79
+ # 定义模型类型和大小(占位符)
80
+ MODEL_TYPES = [
81
+ "Open",
82
+ "Proprietary",
83
+ "Sentence Transformers",
84
+ "Cross-Encoders",
85
+ "Bi-Encoders",
86
+ "Uses Instructions",
87
+ "No Instructions",
88
+ ]
89
+
90
+ NUMERIC_INTERVALS = {
91
+ "<100M": pd.Interval(0, 100, closed="right"),
92
+ "100M to 250M": pd.Interval(100, 250, closed="right"),
93
+ "250M to 500M": pd.Interval(250, 500, closed="right"),
94
+ "500M to 1B": pd.Interval(500, 1000, closed="right"),
95
+ ">1B": pd.Interval(1000, 1_000_000, closed="right"),
96
+ }
97
+
98
+ #定义
99
+ def filter_data(search_query, model_types, model_sizes):
100
+ output_df = df.copy()
101
+
102
+ # Apply the search query
103
+ if search_query:
104
+ names = output_df.index.str.lower()
105
+ masks = []
106
+ for query in search_query.split(";"):
107
+ masks.append(names.str.contains(query.lower()))
108
+ output_df = output_df[reduce(lambda a, b: a | b, masks)]
109
+
110
+ # Apply the model type filtering
111
+ if set(model_types) != set(MODEL_TYPES):
112
+ # Placeholder logic for model type filtering
113
+ pass
114
+
115
+ # Apply the model size filtering
116
+ if model_sizes:
117
+ # Placeholder logic for model size filtering
118
+ pass
119
+
120
+ return output_df
121
+
122
+ # Create the Gradio interface
123
+ with gr.Blocks(css=css) as demo:
124
+ gr.Markdown("# Model Leaderboard")
125
+
126
  with gr.Row():
127
+ search_box = gr.Textbox(
128
+ label="Search Models (separate by ';')",
129
+ placeholder=" 🔍 Search for a model and press enter..."
130
+ )
131
+ model_type_checkbox_group = gr.CheckboxGroup(
132
+ label="Model types",
133
+ choices=MODEL_TYPES,
134
+ value=MODEL_TYPES,
135
+ interactive=True,
136
+ elem_classes=["filter-checkbox-group"],
137
+ scale=3
138
+ )
139
+ model_size_checkbox_group = gr.CheckboxGroup(
140
+ label="Model sizes (in number of parameters)",
141
+ choices=list(NUMERIC_INTERVALS.keys()),
142
+ value=list(NUMERIC_INTERVALS.keys()),
143
+ interactive=True,
144
+ elem_classes=["filter-checkbox-group"],
145
+ scale=2,
146
+ )
147
+ submit_button = gr.Button("Filter Data")
148
+
149
+ with gr.Tabs() as result_table:
150
+ for settings in CONFIG['settings']:
151
+ with gr.Tab(label=settings):
152
+ for type in CONFIG['types']:
153
+ with gr.Tab(label=type):
154
+ # gr.Dataframe(data[settings][type], headers=data[settings][type].columns.tolist(), datatype=["str"] + ["number"] * (len(data[settings][type].columns) - 1))
155
+ gr.DataFrame(data[settings][type], type="pandas")
156
+
157
+
158
+ # result_table = gr.Dataframe(headers=df.columns.tolist(), datatype=["str"] + ["number"] * (len(df.columns) - 1))
159
+
160
+ # Initially display the entire DataFrame
161
+ # vis = gr.DataFrame(df)
162
+
163
+ # submit_button.click(fn=filter_data, inputs=[search_box, model_type_checkbox_group, model_size_checkbox_group], outputs=result_table)
164
+
165
+ # Display the initial DataFrame
166
+
167
+ demo.launch()
168
+
169
+
170
+
config.yaml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ settings:
2
+ - "w/ meta w/ inst"
3
+ - "w/ meta w/o inst"
4
+ - "w/o meta w/ inst"
5
+ - "w/o meta w/o inst"
6
+ types:
7
+ - "Code"
8
+ - "API"
9
+ - "Customized"
10
+ - "Avg"
11
+ metrics:
12
+ - Comp@10
13
+ - Recall@10
14
+ - Prec@10
15
+ - NDCG@10
16
+ settings_mapping:
17
+ "w/ meta w/ inst": "w-w"
18
+ "w/ meta w/o inst": "w-wo"
19
+ "w/o meta w/ inst": "wo-w"
20
+ "w/o meta w/o inst": "wo-wo"
data/w-w-API.xlsx ADDED
Binary file (26.9 kB). View file
 
data/w-w-Avg.xlsx ADDED
Binary file (11.7 kB). View file
 
data/w-w-Code.xlsx ADDED
Binary file (28.4 kB). View file
 
data/w-w-Customized.xlsx ADDED
Binary file (11.3 kB). View file
 
data/w-wo-API.xlsx ADDED
Binary file (10.7 kB). View file
 
data/w-wo-Avg.xlsx ADDED
Binary file (28.4 kB). View file
 
data/w-wo-Code.xlsx ADDED
Binary file (28.6 kB). View file
 
data/w-wo-Customized.xlsx ADDED
Binary file (10.6 kB). View file
 
data/wo-w-API.xlsx ADDED
Binary file (10.6 kB). View file
 
data/wo-w-Avg.xlsx ADDED
Binary file (11.7 kB). View file
 
data/wo-w-Code.xlsx ADDED
Binary file (28.4 kB). View file
 
data/wo-w-Customized.xlsx ADDED
Binary file (10.6 kB). View file
 
data/wo-wo-API.xlsx ADDED
Binary file (10.6 kB). View file
 
data/wo-wo-Avg.xlsx ADDED
Binary file (28.4 kB). View file
 
data/wo-wo-Code.xlsx ADDED
Binary file (28.3 kB). View file
 
data/wo-wo-Customized.xlsx ADDED
Binary file (10.6 kB). View file