HelloGitHub commited on
Commit
c41e86b
·
1 Parent(s): 73f1b9b

add submit

Browse files
Files changed (2) hide show
  1. app.py +101 -2
  2. src/submission/submit.py +142 -0
app.py CHANGED
@@ -35,7 +35,13 @@ from src.display.utils import (
35
  )
36
  from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, DYNAMIC_INFO_REPO, DYNAMIC_INFO_FILE_PATH, DYNAMIC_INFO_PATH, IS_PUBLIC, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
37
  from src.populate import get_evaluation_queue_df, get_leaderboard_df, get_leaderboard_df_quota
38
- from src.submission.submit import add_new_eval
 
 
 
 
 
 
39
  from src.scripts.update_all_request_files import update_dynamic_files
40
  from src.tools.collections import update_collections
41
  from src.tools.datastatics import get_statics
@@ -429,7 +435,100 @@ with demo:
429
  queue=True,
430
  )
431
  gr.Markdown(EVALUATION_METRIC_TEXT, elem_classes="markdown-text")
432
- with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
433
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
434
  gr.HTML(TABLE_TEXT)
435
  gr.Markdown(LLM_BENCHMARKS_TEXT2, elem_classes="markdown-text")
 
35
  )
36
  from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, DYNAMIC_INFO_REPO, DYNAMIC_INFO_FILE_PATH, DYNAMIC_INFO_PATH, IS_PUBLIC, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
37
  from src.populate import get_evaluation_queue_df, get_leaderboard_df, get_leaderboard_df_quota
38
+ from src.submission.submit import (
39
+ add_new_eval,
40
+ submit_model_info,
41
+ submit_api_info,
42
+ submit_inference_info,
43
+ submit_eval_complete
44
+ )
45
  from src.scripts.update_all_request_files import update_dynamic_files
46
  from src.tools.collections import update_collections
47
  from src.tools.datastatics import get_statics
 
435
  queue=True,
436
  )
437
  gr.Markdown(EVALUATION_METRIC_TEXT, elem_classes="markdown-text")
438
+ with gr.TabItem("📤 Submit Model", elem_id="submit-model-tab", id=2):
439
+ gr.Markdown("## 提交模型进行评估", elem_classes="markdown-text")
440
+
441
+ with gr.Row():
442
+ with gr.Column(scale=1):
443
+ gr.Markdown("### 1. 提交模型信息")
444
+ model_name = gr.Textbox(
445
+ label="Model Name",
446
+ placeholder="请输入模型名称,格式:用户名/模型名",
447
+ info="例如:microsoft/DialoGPT-medium"
448
+ )
449
+ revision_commit = gr.Textbox(
450
+ label="Revision commit",
451
+ placeholder="请输入版本信息,默认为 main",
452
+ value="main"
453
+ )
454
+
455
+ # 模型信息验证按钮
456
+ model_info_btn = gr.Button("验证模型信息", variant="secondary")
457
+ model_info_output = gr.Textbox(label="验证结果", interactive=False)
458
+
459
+ with gr.Column(scale=1):
460
+ gr.Markdown("### 2. 提交API信息 (API only)")
461
+ model_api_url = gr.Textbox(
462
+ label="Model online api url",
463
+ placeholder="请输入API URL,例如:https://api.openai.com/v1/chat/completions"
464
+ )
465
+ model_api_key = gr.Textbox(
466
+ label="Model online api key",
467
+ placeholder="请输入API密钥",
468
+ type="password"
469
+ )
470
+ online_api_model_name = gr.Textbox(
471
+ label="Online api model name",
472
+ placeholder="请输入API模型名称,例如:gpt-3.5-turbo"
473
+ )
474
+
475
+ # API信息验证按钮
476
+ api_info_btn = gr.Button("验证API信息", variant="secondary")
477
+ api_info_output = gr.Textbox(label="验证结果", interactive=False)
478
+
479
+ with gr.Row():
480
+ with gr.Column(scale=1):
481
+ gr.Markdown("### 3. 提交推理信息 (inference only)")
482
+ runsh_file = gr.File(
483
+ label="upload run.sh file",
484
+ file_types=[".sh"],
485
+ file_count="single"
486
+ )
487
+ adapter_file = gr.File(
488
+ label="upload model_adapter.py file",
489
+ file_types=[".py"],
490
+ file_count="single"
491
+ )
492
+
493
+ # 推理信息验证按钮
494
+ inference_info_btn = gr.Button("验证推理文件", variant="secondary")
495
+ inference_info_output = gr.Textbox(label="验证结果", interactive=False)
496
+
497
+ with gr.Row():
498
+ with gr.Column():
499
+ gr.Markdown("### 4. 提交评估")
500
+ submit_btn = gr.Button("Submit Eval", variant="primary", size="lg")
501
+ submit_output = gr.Textbox(label="提交结果", interactive=False)
502
+
503
+ # 绑定事件
504
+ model_info_btn.click(
505
+ fn=lambda name, rev: submit_model_info(name, rev),
506
+ inputs=[model_name, revision_commit],
507
+ outputs=model_info_output
508
+ )
509
+
510
+ api_info_btn.click(
511
+ fn=lambda url, key, name: submit_api_info(url, key, name),
512
+ inputs=[model_api_url, model_api_key, online_api_model_name],
513
+ outputs=api_info_output
514
+ )
515
+
516
+ inference_info_btn.click(
517
+ fn=lambda runsh, adapter: submit_inference_info(runsh, adapter),
518
+ inputs=[runsh_file, adapter_file],
519
+ outputs=inference_info_output
520
+ )
521
+
522
+ submit_btn.click(
523
+ fn=lambda name, rev, url, key, api_name, runsh, adapter: submit_eval_complete(
524
+ name, rev, url, key, api_name, runsh, adapter
525
+ ),
526
+ inputs=[model_name, revision_commit, model_api_url, model_api_key,
527
+ online_api_model_name, runsh_file, adapter_file],
528
+ outputs=submit_output
529
+ )
530
+
531
+ with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=3):
532
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
533
  gr.HTML(TABLE_TEXT)
534
  gr.Markdown(LLM_BENCHMARKS_TEXT2, elem_classes="markdown-text")
src/submission/submit.py CHANGED
@@ -16,6 +16,148 @@ from src.submission.check_validity import (
16
  REQUESTED_MODELS = None
17
  USERS_TO_SUBMISSION_DATES = None
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  def add_new_eval(
20
  model: str,
21
  model_api_url: str,
 
16
  REQUESTED_MODELS = None
17
  USERS_TO_SUBMISSION_DATES = None
18
 
19
+ def submit_model_info(
20
+ model_name: str,
21
+ revision_commit: str
22
+ ):
23
+ """
24
+ 提交模型信息 - 对应页面上的 "Submit your modelinfos here!" 部分
25
+ """
26
+ if not model_name or not model_name.strip():
27
+ return styled_error("请输入模型名称")
28
+
29
+ if not revision_commit or not revision_commit.strip():
30
+ revision_commit = "main"
31
+
32
+ # 验证模型名称格式
33
+ if "/" not in model_name:
34
+ return styled_error("模型名称格式不正确,请使用 '用户名/模型名' 格式")
35
+
36
+ # 检查模型是否存在于Hugging Face Hub
37
+ try:
38
+ model_info = API.model_info(repo_id=model_name, revision=revision_commit)
39
+ return styled_message(f"模型信息验证成功!模型: {model_name}, 版本: {revision_commit}")
40
+ except Exception as e:
41
+ return styled_error(f"无法找到模型 {model_name},请检查模型名称和版本是否正确")
42
+
43
+ def submit_api_info(
44
+ model_api_url: str,
45
+ model_api_key: str,
46
+ online_api_model_name: str
47
+ ):
48
+ """
49
+ 提交API信息 - 对应页面上的 "Submit your API infos here! (API only)" 部分
50
+ """
51
+ if not model_api_url or not model_api_url.strip():
52
+ return styled_error("请输入模型在线API URL")
53
+
54
+ if not model_api_key or not model_api_key.strip():
55
+ return styled_error("请输入模型在线API密钥")
56
+
57
+ if not online_api_model_name or not online_api_model_name.strip():
58
+ return styled_error("请输入在线API模型名称")
59
+
60
+ # 验证URL格式
61
+ if not model_api_url.startswith(('http://', 'https://')):
62
+ return styled_error("API URL格式不正确,请以 http:// 或 https:// 开头")
63
+
64
+ return styled_message(f"API信息验证成功!URL: {model_api_url}, 模型名: {online_api_model_name}")
65
+
66
+ def submit_inference_info(
67
+ runsh_file,
68
+ adapter_file
69
+ ):
70
+ """
71
+ 提交推理信息 - 对应页面上的 "Submit your inference infos here! (inference only)" 部分
72
+ """
73
+ if not runsh_file:
74
+ return styled_error("请上传 run.sh 文件")
75
+
76
+ if not adapter_file:
77
+ return styled_error("请上传 model_adapter.py 文件")
78
+
79
+ # 检查文件大小
80
+ max_size = 5 * 1024 * 1024 # 5MB
81
+ if os.path.getsize(runsh_file.name) > max_size:
82
+ return styled_error("run.sh 文件大小不能超过 5MB")
83
+
84
+ if os.path.getsize(adapter_file.name) > max_size:
85
+ return styled_error("model_adapter.py 文件大小不能超过 5MB")
86
+
87
+ # 验证文件内容
88
+ try:
89
+ with open(runsh_file.name, "r") as f:
90
+ runsh_content = f.read()
91
+
92
+ with open(adapter_file.name, "r") as f:
93
+ adapter_content = f.read()
94
+
95
+ # 检查run.sh是否包含必要的脚本内容
96
+ if "python" not in runsh_content.lower():
97
+ return styled_warning("run.sh 文件可能不包含正确的Python执行脚本")
98
+
99
+ # 检查adapter文件是否包含必要的类定义
100
+ if "class" not in adapter_content and "def" not in adapter_content:
101
+ return styled_warning("model_adapter.py 文件可能不包含正确的类或函数定义")
102
+
103
+ return styled_message("推理文件验证成功!run.sh 和 model_adapter.py 文件已准备就绪")
104
+
105
+ except Exception as e:
106
+ return styled_error(f"文件读取失败: {str(e)}")
107
+
108
+ def submit_eval_complete(
109
+ model_name: str,
110
+ revision_commit: str,
111
+ model_api_url: str,
112
+ model_api_key: str,
113
+ online_api_model_name: str,
114
+ runsh_file,
115
+ adapter_file
116
+ ):
117
+ """
118
+ 完整的评估提交 - 整合所有三个部分的信息
119
+ """
120
+ # 验证模型信息
121
+ if not model_name or not model_name.strip():
122
+ return styled_error("请输入模型名称")
123
+
124
+ if not revision_commit or not revision_commit.strip():
125
+ revision_commit = "main"
126
+
127
+ # 验证API信息(如果提供)
128
+ if model_api_url and model_api_key and online_api_model_name:
129
+ if not model_api_url.startswith(('http://', 'https://')):
130
+ return styled_error("API URL格式不正确,请以 http:// 或 https:// 开头")
131
+
132
+ # 验证推理文件(如果提供)
133
+ if runsh_file and adapter_file:
134
+ max_size = 5 * 1024 * 1024 # 5MB
135
+ if os.path.getsize(runsh_file.name) > max_size:
136
+ return styled_error("run.sh 文件大小不能超过 5MB")
137
+
138
+ if os.path.getsize(adapter_file.name) > max_size:
139
+ return styled_error("model_adapter.py 文件大小不能超过 5MB")
140
+
141
+ # 调用原有的add_new_eval函数
142
+ try:
143
+ result = add_new_eval(
144
+ model=model_name,
145
+ model_api_url=model_api_url or "",
146
+ model_api_key=model_api_key or "",
147
+ model_api_name=online_api_model_name or "",
148
+ base_model="", # 可以根据需要设置
149
+ revision=revision_commit,
150
+ precision="float16", # 默认精度
151
+ private="false",
152
+ weight_type="Original", # 默认权重类型
153
+ model_type="", # 可以根据需要设置
154
+ runsh=runsh_file,
155
+ adapter=adapter_file
156
+ )
157
+ return result
158
+ except Exception as e:
159
+ return styled_error(f"提交失败: {str(e)}")
160
+
161
  def add_new_eval(
162
  model: str,
163
  model_api_url: str,