Taejin commited on
Commit
fd2a55f
·
1 Parent(s): d0951fd

Updating Content

Browse files

Signed-off-by: Taejin Park <tango4j@gmail.com>

Files changed (3) hide show
  1. app_new.py +0 -301
  2. app_old.py +0 -281
  3. content.py +19 -66
app_new.py DELETED
@@ -1,301 +0,0 @@
1
- import os
2
- import json
3
- import csv
4
- import datetime
5
- from email.utils import parseaddr
6
-
7
- import gradio as gr
8
- import pandas as pd
9
- import numpy as np
10
-
11
- from datasets import load_dataset
12
- from apscheduler.schedulers.background import BackgroundScheduler
13
- from huggingface_hub import HfApi
14
-
15
- from scorer import instruction_scorer
16
- from content import format_error, format_warning, format_log, TITLE, INTRODUCTION_TEXT, SUBMISSION_TEXT, CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, model_hyperlink
17
-
18
- TOKEN = os.environ.get("TOKEN", None)
19
- # OWNER="ucla-contextual"
20
- OWNER="Taejin"
21
- # TEST_DATASET = f"{OWNER}/contextual_test"
22
- # VAL_DATASET = f"{OWNER}/contextual_val"
23
- # SUBMISSION_DATASET = f"{OWNER}/submissions_internal"
24
- # CONTACT_DATASET = f"{OWNER}/contact_info"
25
- # RESULTS_DATASET = f"{OWNER}/results"
26
- # LEADERBOARD_PATH = f"{OWNER}/leaderboard"
27
-
28
- RESULTS_DATASET = f"{OWNER}/spk_tag_results"
29
- LEADERBOARD_PATH = f"{OWNER}/leaderboard"
30
- SUBMISSION_DATASET = f"{OWNER}/submission_leaderboard"
31
- api = HfApi()
32
-
33
- YEAR_VERSION = "2024"
34
-
35
- def read_json_file(filepath):
36
- with open(filepath) as infile:
37
- data_dict = json.load(infile)
38
- return data_dict
39
-
40
- def save_json_file(filepath, data_dict):
41
- with open(filepath, "w") as outfile:
42
- json.dump(data_dict, outfile)
43
-
44
- os.makedirs("scored", exist_ok=True)
45
-
46
- # test_data_files = {"test": "contextual_test.csv"}
47
- # test_dataset = load_dataset(TEST_DATASET, data_files=test_data_files , token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
48
-
49
- # val_data_files = {"val": "contextual_val.csv"}
50
- # val_dataset = load_dataset(VAL_DATASET, data_files=val_data_files , token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
51
-
52
- # results_data_files = {"test": "contextual_test_results.csv", "val": "contextual_val_results.csv"}
53
- # results = load_dataset(RESULTS_DATASET, data_files=results_data_files, token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
54
-
55
- # contacts_data_files = {"contacts": "contacts.csv"}
56
- # contact_infos = load_dataset(CONTACT_DATASET, data_files=contacts_data_files, token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
57
-
58
- # BASE_PATH="entry_data"
59
-
60
-
61
-
62
- # results_data_files = {"dev": f"{BASE_PATH}/dev_set_data.csv", "val": "contextual_val_results.csv"}
63
- results_data_files = {"dev": "dev_set_data.csv"}
64
- results = load_dataset(RESULTS_DATASET, data_files=results_data_files, token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
65
-
66
- # contacts_data_files = {"contacts": "contacts.csv"}
67
- # contact_infos = load_dataset(CONTACT_DATASET, data_files=contacts_data_files, token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
68
-
69
- def get_dataframe_from_results(results, split):
70
- df = results[split].to_pandas()
71
- # df.drop(columns=['URL'], inplace=True)
72
- df = df.sort_values(by=["cpWER"], ascending=False)
73
- return df
74
-
75
-
76
-
77
- # test_dataset_dataframe = test_dataset["test"].to_pandas()
78
- # val_dataset_dataframe = val_dataset["val"].to_pandas()
79
-
80
- # contacts_dataframe = contact_infos["contacts"].to_pandas()
81
-
82
- # val_results_dataframe = get_dataframe_from_results(results=results, split="val")
83
- # test_results_dataframe = get_dataframe_from_results(results=results, split="test")
84
-
85
- def restart_space():
86
- api.restart_space(repo_id=LEADERBOARD_PATH, token=TOKEN)
87
-
88
- # TYPES = ["markdown", "markdown", "markdown", "number", "number", "number","number", "number", "number", "number", "number", "number"]
89
- TYPES = ["markdown", "markdown", "markdown", "markdown", "number", "number"]
90
-
91
- # file_path = "dev_set_data.csv"
92
- # dev_dataframe= pd.read_csv(file_path)
93
- dev_dataset_dataframe= get_dataframe_from_results(results=results, split="dev")
94
-
95
- def add_new_eval(
96
- system_name: str,
97
- method: str,
98
- path_to_file: str,
99
- organisation: str,
100
- mail: str,
101
- ):
102
- print("printing all inputs:", system_name, method, path_to_file, organisation, mail)
103
-
104
- if len(system_name)==0:
105
- print("system_name none")
106
- raise gr.Error("Please provide a system_name name. Field empty!")
107
-
108
- if len(method)==0:
109
- print("method none")
110
- raise gr.Error("Please provide a method. Field empty!")
111
-
112
- if len(organisation)==0:
113
- print("org none")
114
- raise gr.Error("Please provide organisation information. Field empty!")
115
-
116
- # Very basic email parsing
117
- _, parsed_mail = parseaddr(mail)
118
- if not "@" in parsed_mail:
119
- print("email here")
120
- raise gr.Error("Please provide a valid email address.")
121
-
122
-
123
- # Check if the combination system_name/org already exists and prints a warning message if yes
124
- # if system_name.lower() in set([m.lower() for m in results["dev"]["System_name"]]) and organisation.lower() in set([o.lower() for o in results["dev"]["Organisation"]]):
125
- # print("system_name org combo here")
126
- # raise gr.Error("This system_name has been already submitted.")
127
-
128
- if path_to_file is None:
129
- print("file missing here")
130
- raise gr.Error("Please attach a file.")
131
-
132
- tmp_file_output = read_json_file(path_to_file.name)
133
-
134
- if len(tmp_file_output.keys())!=1:
135
- print("file format wrong here")
136
- raise gr.Error("Submission file format incorrect. Please refer to the format description!")
137
-
138
- tmp_output_key = list(tmp_file_output.keys())[0]
139
- if len(tmp_file_output[tmp_output_key].keys())!=100:
140
- print("file not 100 here")
141
- raise gr.Error("File must contain exactly 100 predictions.")
142
-
143
- # Save submitted file
144
- time_atm = datetime.datetime.today()
145
- api.upload_file(
146
- repo_id=SUBMISSION_DATASET,
147
- path_or_fileobj=path_to_file.name,
148
- path_in_repo=f"{organisation}/{system_name}/{YEAR_VERSION}_raw_{time_atm}.json",
149
- repo_type="dataset",
150
- token=TOKEN
151
- )
152
-
153
- # Compute score
154
- file_path = path_to_file.name
155
- # scores = instruction_scorer(val_dataset_dataframe, file_path , system_name)
156
- ref_file_path="seglst_files/err_dev.ref.seglst.json"
157
- scores = instruction_scorer(file_path_input= path_to_file.name, ref_file_path=ref_file_path, system_name=system_name)
158
-
159
- path_or_fileobj=f"scored/{organisation}_{system_name}.json"
160
- save_json_file(path_or_fileobj, scores)
161
-
162
- # Save scored file
163
- api.upload_file(
164
- repo_id=SUBMISSION_DATASET,
165
- path_or_fileobj=path_or_fileobj,
166
- path_in_repo=f"{organisation}/{system_name}/{YEAR_VERSION}_scored_{time_atm}.json",
167
- repo_type="dataset",
168
- token=TOKEN
169
- )
170
-
171
- # Actual submission
172
- eval_entry = {
173
- "System_name": system_name,
174
- "Method":method,
175
- "Organisation": organisation,
176
- "cpWER":scores["cpWER"],
177
- "WER":scores["WER"],
178
- }
179
-
180
-
181
- dev_set_data_csv = "dev_set_data.csv"
182
-
183
- val_results_dataframe = get_dataframe_from_results(results=results, split="val")
184
- val_results_dataframe = pd.concat([val_results_dataframe, pd.DataFrame([eval_entry])], ignore_index=True)
185
- val_results_dataframe.to_csv(dev_set_data_csv, index=False)
186
-
187
- api.upload_file(
188
- repo_id=RESULTS_DATASET,
189
- path_or_fileobj=dev_set_data_csv,
190
- path_in_repo=dev_set_data_csv,
191
- repo_type="dataset",
192
- token=TOKEN
193
- )
194
-
195
- # contact_info = {
196
- # "System_name": system_name,
197
- # "Organisation": organisation,
198
- # "Mail": mail,
199
- # }
200
-
201
- # contacts_dataframe = contact_infos["contacts"].to_pandas()
202
- # contacts_dataframe = pd.concat([contacts_dataframe, pd.DataFrame([contact_info])], ignore_index=True)
203
- # contacts_dataframe.to_csv('contacts.csv', index=False)
204
-
205
- # api.upload_file(
206
- # repo_id=CONTACT_DATASET,
207
- # path_or_fileobj="contacts.csv",
208
- # path_in_repo=f"contacts.csv",
209
- # repo_type="dataset",
210
- # token=TOKEN
211
- # )
212
-
213
- return format_log(f"System_name {system_name} submitted by {organisation} successfully! \nPlease refresh the val leaderboard, and wait a bit to see the score displayed")
214
-
215
-
216
- # def refresh():
217
- # results_data_files = {"test": "contextual_test_results.csv", "val": "contextual_val_results.csv"}
218
- # results = load_dataset(RESULTS_DATASET, data_files=
219
- # results_data_files, token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
220
- # val_results_dataframe = get_dataframe_from_results(results=results, split="val")
221
- # test_results_dataframe = get_dataframe_from_results(results=results, split="test")
222
- # return val_results_dataframe, test_results_dataframe
223
-
224
- def refresh():
225
- results_data_files = {"dev": "dev_set_data.csv"}
226
- results = load_dataset(RESULTS_DATASET, data_files=
227
- results_data_files, token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
228
- dev_results_dataframe = get_dataframe_from_results(results=results, split="dev")
229
- # test_results_dataframe = get_dataframe_from_results(results=results, split="test")
230
- return dev_results_dataframe
231
-
232
- def upload_file(files):
233
- file_paths = [file.name for file in files]
234
- return file_paths
235
-
236
-
237
-
238
-
239
- demo = gr.Blocks()
240
- with demo:
241
- gr.HTML(TITLE)
242
- gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
243
-
244
- with gr.Row():
245
- with gr.Accordion("🧐 Introduction", open=False):
246
- gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
247
-
248
- with gr.Row():
249
- with gr.Accordion("🎯 Submission Guidelines", open=False):
250
- gr.Markdown(SUBMISSION_TEXT, elem_classes="markdown-text")
251
-
252
- with gr.Row():
253
- with gr.Accordion("📙 Citation", open=False):
254
- citation_button = gr.TextArea(
255
- value=CITATION_BUTTON_TEXT,
256
- label=CITATION_BUTTON_LABEL,
257
- elem_id="citation-button",
258
- )
259
- with gr.Tab("Results: Dev"):
260
- leaderboard_table_dev = gr.components.Dataframe(
261
- value=dev_dataset_dataframe, datatype=TYPES, interactive=False,
262
- column_widths=["20%"]
263
- )
264
-
265
- refresh_button = gr.Button("Refresh")
266
- refresh_button.click(
267
- refresh,
268
- inputs=[],
269
- outputs=[
270
- leaderboard_table_dev,
271
- ],
272
- )
273
- with gr.Accordion("Submit a new system_name for evaluation"):
274
- with gr.Row():
275
- with gr.Column():
276
- system_name_textbox = gr.Textbox(label="System name", type='text')
277
- method_textbox = gr.Textbox(label="Method (LLM with prompt, beam-search, etc)", type='text')
278
- with gr.Column():
279
- organisation = gr.Textbox(label="Organisation or Team Name", type='text')
280
- mail = gr.Textbox(label="Contact email (will be stored privately, & used if there is an issue with your submission)", type='email')
281
- file_output = gr.File()
282
-
283
-
284
- submit_button = gr.Button("Submit Eval")
285
- submission_result = gr.Markdown()
286
- submit_button.click(
287
- add_new_eval,
288
- [
289
- system_name_textbox,
290
- method_textbox,
291
- file_output,
292
- organisation,
293
- mail
294
- ],
295
- submission_result,
296
- )
297
-
298
- scheduler = BackgroundScheduler()
299
- scheduler.add_job(restart_space, "interval", seconds=3600)
300
- scheduler.start()
301
- demo.launch(debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app_old.py DELETED
@@ -1,281 +0,0 @@
1
- import os
2
- import json
3
- import csv
4
- import datetime
5
- from email.utils import parseaddr
6
-
7
- import gradio as gr
8
- import pandas as pd
9
- import numpy as np
10
-
11
- from datasets import load_dataset
12
- from apscheduler.schedulers.background import BackgroundScheduler
13
- from huggingface_hub import HfApi
14
-
15
- from scorer import instruction_scorer
16
- from content import format_error, format_warning, format_log, TITLE, INTRODUCTION_TEXT, SUBMISSION_TEXT, CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, model_hyperlink
17
-
18
- TOKEN = os.environ.get("TOKEN", None)
19
- OWNER="ucla-contextual"
20
- TEST_DATASET = f"{OWNER}/contextual_test"
21
- VAL_DATASET = f"{OWNER}/contextual_val"
22
- SUBMISSION_DATASET = f"{OWNER}/submissions_internal"
23
- CONTACT_DATASET = f"{OWNER}/contact_info"
24
- RESULTS_DATASET = f"{OWNER}/results"
25
- LEADERBOARD_PATH = f"{OWNER}/leaderboard"
26
- api = HfApi()
27
-
28
- YEAR_VERSION = "2024"
29
-
30
- def read_json_file(filepath):
31
- with open(filepath) as infile:
32
- data_dict = json.load(infile)
33
- return data_dict
34
-
35
- def save_json_file(filepath, data_dict):
36
- with open(filepath, "w") as outfile:
37
- json.dump(data_dict, outfile)
38
-
39
- os.makedirs("scored", exist_ok=True)
40
-
41
- # test_data_files = {"test": "contextual_test.csv"}
42
- # test_dataset = load_dataset(TEST_DATASET, data_files=test_data_files , token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
43
-
44
- # val_data_files = {"val": "contextual_val.csv"}
45
- # val_dataset = load_dataset(VAL_DATASET, data_files=val_data_files , token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
46
-
47
- # results_data_files = {"test": "contextual_test_results.csv", "val": "contextual_val_results.csv"}
48
- # results = load_dataset(RESULTS_DATASET, data_files=results_data_files, token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
49
-
50
- # contacts_data_files = {"contacts": "contacts.csv"}
51
- # contact_infos = load_dataset(CONTACT_DATASET, data_files=contacts_data_files, token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
52
-
53
- def get_dataframe_from_results(results, split):
54
- df = results[split].to_pandas()
55
- df.drop(columns=['URL'], inplace=True)
56
- df = df.sort_values(by=["All"], ascending=False)
57
- return df
58
-
59
- # test_dataset_dataframe = test_dataset["test"].to_pandas()
60
- # val_dataset_dataframe = val_dataset["val"].to_pandas()
61
-
62
- # contacts_dataframe = contact_infos["contacts"].to_pandas()
63
-
64
- # val_results_dataframe = get_dataframe_from_results(results=results, split="val")
65
- # test_results_dataframe = get_dataframe_from_results(results=results, split="test")
66
-
67
- def restart_space():
68
- api.restart_space(repo_id=LEADERBOARD_PATH, token=TOKEN)
69
-
70
- TYPES = ["markdown", "markdown", "markdown", "number", "number", "number","number", "number", "number", "number", "number", "number"]
71
-
72
- def add_new_eval(
73
- model: str,
74
- method: str,
75
- url: str,
76
- path_to_file: str,
77
- organisation: str,
78
- mail: str,
79
- ):
80
- print("printing all inputs:", model, method, url, path_to_file, organisation, mail)
81
-
82
- if len(model)==0:
83
- print("model none")
84
- raise gr.Error("Please provide a model name. Field empty!")
85
-
86
- if len(method)==0:
87
- print("method none")
88
- raise gr.Error("Please provide a method. Field empty!")
89
-
90
- if len(organisation)==0:
91
- print("org none")
92
- raise gr.Error("Please provide organisation information. Field empty!")
93
-
94
- # Very basic email parsing
95
- _, parsed_mail = parseaddr(mail)
96
- if not "@" in parsed_mail:
97
- print("email here")
98
- raise gr.Error("Please provide a valid email address.")
99
-
100
-
101
- # Check if the combination model/org already exists and prints a warning message if yes
102
- if model.lower() in set([m.lower() for m in results["val"]["Model"]]) and organisation.lower() in set([o.lower() for o in results["val"]["Organisation"]]):
103
- print("model org combo here")
104
- raise gr.Error("This model has been already submitted.")
105
-
106
- if path_to_file is None:
107
- print("file missing here")
108
- raise gr.Error("Please attach a file.")
109
-
110
- tmp_file_output = read_json_file(path_to_file.name)
111
-
112
- if len(tmp_file_output.keys())!=1:
113
- print("file format wrong here")
114
- raise gr.Error("Submission file format incorrect. Please refer to the format description!")
115
-
116
- tmp_output_key = list(tmp_file_output.keys())[0]
117
- if len(tmp_file_output[tmp_output_key].keys())!=100:
118
- print("file not 100 here")
119
- raise gr.Error("File must contain exactly 100 predictions.")
120
-
121
- # Save submitted file
122
- time_atm = datetime.datetime.today()
123
- api.upload_file(
124
- repo_id=SUBMISSION_DATASET,
125
- path_or_fileobj=path_to_file.name,
126
- path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_raw_{time_atm}.json",
127
- repo_type="dataset",
128
- token=TOKEN
129
- )
130
-
131
- # Compute score
132
- file_path = path_to_file.name
133
- scores = instruction_scorer(val_dataset_dataframe, file_path , model)
134
-
135
- path_or_fileobj=f"scored/{organisation}_{model}.json"
136
- save_json_file(path_or_fileobj, scores)
137
-
138
- # Save scored file
139
- api.upload_file(
140
- repo_id=SUBMISSION_DATASET,
141
- path_or_fileobj=path_or_fileobj,
142
- path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_scored_{time_atm}.json",
143
- repo_type="dataset",
144
- token=TOKEN
145
- )
146
-
147
- # Actual submission
148
- eval_entry = {
149
- "Model": model,
150
- "Method":method,
151
- "Organisation": organisation,
152
- "URL": url,
153
- "All":scores["average"],
154
- "Time":scores["time"],
155
- "Shopping":scores["shopping"],
156
- "Navigation":scores["navigation-transportation"],
157
- "Abstract":scores["abstract"],
158
- "Application Usage":scores["app"],
159
- "Web Usage":scores["web"],
160
- "Infographic":scores["infographics"],
161
- "Miscellaneous Natural Scenes": scores["misc"]
162
- }
163
-
164
- val_results_dataframe = get_dataframe_from_results(results=results, split="val")
165
- val_results_dataframe = pd.concat([val_results_dataframe, pd.DataFrame([eval_entry])], ignore_index=True)
166
- val_results_dataframe.to_csv('contextual_val_results.csv', index=False)
167
-
168
- api.upload_file(
169
- repo_id=RESULTS_DATASET,
170
- path_or_fileobj="contextual_val_results.csv",
171
- path_in_repo=f"contextual_val_results.csv",
172
- repo_type="dataset",
173
- token=TOKEN
174
- )
175
-
176
- contact_info = {
177
- "Model": model,
178
- "URL": url,
179
- "Organisation": organisation,
180
- "Mail": mail,
181
- }
182
-
183
- contacts_dataframe = contact_infos["contacts"].to_pandas()
184
- contacts_dataframe = pd.concat([contacts_dataframe, pd.DataFrame([contact_info])], ignore_index=True)
185
- contacts_dataframe.to_csv('contacts.csv', index=False)
186
-
187
- api.upload_file(
188
- repo_id=CONTACT_DATASET,
189
- path_or_fileobj="contacts.csv",
190
- path_in_repo=f"contacts.csv",
191
- repo_type="dataset",
192
- token=TOKEN
193
- )
194
-
195
- return format_log(f"Model {model} submitted by {organisation} successfully! \nPlease refresh the val leaderboard, and wait a bit to see the score displayed")
196
-
197
-
198
- def refresh():
199
- results_data_files = {"test": "contextual_test_results.csv", "val": "contextual_val_results.csv"}
200
- results = load_dataset(RESULTS_DATASET, data_files=
201
- results_data_files, token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
202
- val_results_dataframe = get_dataframe_from_results(results=results, split="val")
203
- test_results_dataframe = get_dataframe_from_results(results=results, split="test")
204
- return val_results_dataframe, test_results_dataframe
205
-
206
- def upload_file(files):
207
- file_paths = [file.name for file in files]
208
- return file_paths
209
-
210
-
211
- demo = gr.Blocks()
212
- with demo:
213
- gr.HTML(TITLE)
214
- # gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
215
-
216
- with gr.Row():
217
- with gr.Accordion("🧐 Introduction", open=False):
218
- gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
219
-
220
- with gr.Row():
221
- with gr.Accordion("🎯 Submission Guidelines", open=False):
222
- gr.Markdown(SUBMISSION_TEXT, elem_classes="markdown-text")
223
-
224
- with gr.Row():
225
- with gr.Accordion("📙 Citation", open=False):
226
- citation_button = gr.TextArea(
227
- value=CITATION_BUTTON_TEXT,
228
- label=CITATION_BUTTON_LABEL,
229
- elem_id="citation-button",
230
- )
231
- with gr.Tab("Results: Test"):
232
- leaderboard_table_test = gr.components.Dataframe(
233
- value=test_results_dataframe, datatype=TYPES, interactive=False,
234
- column_widths=["20%"]
235
- )
236
- with gr.Tab("Results: Val"):
237
- leaderboard_table_val = gr.components.Dataframe(
238
- value=val_results_dataframe, datatype=TYPES, interactive=False,
239
- column_widths=["20%"]
240
- )
241
-
242
- refresh_button = gr.Button("Refresh")
243
- refresh_button.click(
244
- refresh,
245
- inputs=[],
246
- outputs=[
247
- leaderboard_table_val,
248
- leaderboard_table_test,
249
- ],
250
- )
251
- with gr.Accordion("Submit a new model for evaluation"):
252
- with gr.Row():
253
- with gr.Column():
254
- model_name_textbox = gr.Textbox(label="Model name", type='text')
255
- method_textbox = gr.Textbox(label="Method (LMM or Aug LLM or any other)", type='text')
256
- url_textbox = gr.Textbox(label="URL to model information", type='text')
257
- with gr.Column():
258
- organisation = gr.Textbox(label="Organisation", type='text')
259
- mail = gr.Textbox(label="Contact email (will be stored privately, & used if there is an issue with your submission)", type='email')
260
- file_output = gr.File()
261
-
262
-
263
- submit_button = gr.Button("Submit Eval")
264
- submission_result = gr.Markdown()
265
- submit_button.click(
266
- add_new_eval,
267
- [
268
- model_name_textbox,
269
- method_textbox,
270
- url_textbox,
271
- file_output,
272
- organisation,
273
- mail
274
- ],
275
- submission_result,
276
- )
277
-
278
- scheduler = BackgroundScheduler()
279
- scheduler.add_job(restart_space, "interval", seconds=3600)
280
- scheduler.start()
281
- demo.launch(debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
content.py CHANGED
@@ -1,76 +1,29 @@
1
- TITLE = """<h1 align="center" id="space-title">ConTextual Leaderboard</h1>"""
2
 
3
  INTRODUCTION_TEXT = """
4
- Models are becoming quite good at understanding text on its own, but what about text in images, which gives important contextual information? For example, navigating a map, or understanding a meme? The ability to reason about the interactions between the text and visual context in images can power many real-world applications, such as AI assistants, or tools to assist the visually impaired. We refer to these tasks as context-sensitive text-rich visual reasoning tasks.
5
-
6
- At the moment, most evaluations of instruction-tuned large multimodal models (LMMs) focus on testing how well models can respond to human instructions posed as questions or imperative tasks over images… but not how well they understand context-sensitive text-rich scenes! That’s why we created ConTextual, a Context-sensitive Text-rich visuaL reasoning dataset for evaluating LMMs. We also released a leaderboard, so that the community can see for themselves which models are the best at this task. (See our [paper](https://arxiv.org/abs/2401.13311) for more details.)
7
-
8
- ## Data
9
- ConTextual comprises **506 examples covering 8 real-world visual scenarios** - *Time Reading, Shopping, Navigation, Abstract Scenes, Mobile Application, Webpages, Infographics and Miscellaneous Natural Scenes*. Each sample consists of:
10
- - A text-rich image
11
- - A human-written instruction (question or imperative task)
12
- - A human-written reference response
13
-
14
-
15
- ### Data Access
16
- ConTextual data can be found on HuggingFace and GitHub.
17
- - HuggingFace
18
- - [Test](https://huggingface.co/datasets/ucla-contextual/contextual_test)
19
- - [Val](https://huggingface.co/datasets/ucla-contextual/contextual_val)
20
- - Github
21
- - [Test](https://github.com/rohan598/ConTextual/blob/main/data/contextual_test.csv)
22
- - [Val](https://github.com/rohan598/ConTextual/blob/main/data/contextual_val.csv)
23
-
24
- ### Data Format
25
- ```
26
- {
27
- "image_url": [string] url to the hosted image,
28
- "instruction" [string] instruction text,
29
- "response": [string] response text (only provided for samples in the val subset),
30
- "category": visual scenario this example belongs to like 'time' and 'shopping' out of 8 possible scenarios in ConTextual
31
- }
32
- ```
33
 
34
  """
35
 
36
  SUBMISSION_TEXT = """
37
  ## Submissions
38
- Results can be submitted for only validation here. Scores are expressed as the percentage of correct answers for a given split.
39
-
40
- Submission made by our team are labelled "ConTextual authors".
41
-
42
- ### Validation Results
43
- To submit your validation results to the leaderboard, you can run our auto-evaluation code (Evaluation Pipeline with GPT4), following the instructions [here](https://github.com/rohan598/ConTextual?tab=readme-ov-file#-evaluation-pipeline-gpt-4).
44
-
45
- We expect submissions to be json format as shown below:
46
- ```
47
- {"model_name": {"img_url": "1 or 0 as integer"}
48
- Replace model name with your model name (string)
49
- Replace img_url with img_url of the instance (string)
50
- Value for an img url is either 0 or 1 (int)
51
- There should be 100 predictions, corresponding to the 100 urls of the val set.
52
- ```
53
-
54
- **Please do not utilize the public dev set as part of training data for your models.**
55
-
56
- ### Test Results
57
- Once you are happy with your val results, you can send your model predictions to [rohan](mailto:rwadhawan7@g.ucla.edu) and [hritik](mailto:hbansal@g.ucla.edu).
58
-
59
- Please include in your email
60
- 1) A name for your model.
61
- 2) Organization (affiliation).
62
- 3) (Optionally) GitHub repo or paper link.
63
-
64
- We expect submissions to be json format similar to val set as shown below:
65
- ```
66
- {"model_name": {"img_url": "predicted response"}
67
- Replace model name with your model name (string)
68
- Replace img_url with img_url of the instance (string)
69
- Value for an img url is the predicted response for that instance (string)
70
- There should be 506 predictions, corresponding to the 506 urls of the test set.
71
- ```
72
-
73
- **Please revisit the test leaderboard within 1 to 2 days after sharing your prediction file to view your model scores and ranking on the leaderboard.**
74
 
75
  """
76
 
 
1
+ TITLE = """<h1 align="center" id="space-title">SLT GenSEC Challenge Track-2 Leaderboard</h1>"""
2
 
3
  INTRODUCTION_TEXT = """
4
+ This is the official leaderboard for the [SLT GenSEC Challenge Track-2].
5
+
6
+ Baseline Repository:
7
+ https://github.com/tango4j/llm_speaker_tagging
8
+
9
+ Dataset:
10
+ https://huggingface.co/datasets/GenSEC-LLM/SLT-Task2-Post-ASR-Speaker-Tagging
11
+
12
+ Leaderboard:
13
+ https://huggingface.co/spaces/Taejin/speaker_tagging_leaderboard
14
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  """
17
 
18
  SUBMISSION_TEXT = """
19
  ## Submissions
20
+
21
+ You must either submit `err_dev.hyp.seglst.json` for dev set or `err_dev.hyp.seglst.json` dev set files.
22
+
23
+ For the formats of the files, please refer to the [SLT GenSEC Challenge Track-2].
24
+
25
+ Baseline Repository:
26
+ https://github.com/tango4j/llm_speaker_tagging
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  """
29