Spaces:
Sleeping
Sleeping
Spark Chou
commited on
Commit
·
88c9820
1
Parent(s):
acceb2a
add app.py for latest English version
Browse files
app.py
CHANGED
|
@@ -13,6 +13,7 @@ from huggingface_hub import HfApi, hf_hub_download
|
|
| 13 |
from multiprocessing import TimeoutError
|
| 14 |
from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
|
| 15 |
|
|
|
|
| 16 |
dataset = load_dataset("intersteller2887/Turing-test-dataset-en", split="train")
|
| 17 |
dataset = dataset.cast_column("audio", Audio(decode=False)) # Prevent calling 'torchcodec' from newer version of 'datasets'
|
| 18 |
|
|
@@ -20,7 +21,6 @@ dataset = dataset.cast_column("audio", Audio(decode=False)) # Prevent calling 't
|
|
| 20 |
target_audio_dir = "/home/user/app/audio"
|
| 21 |
os.makedirs(target_audio_dir, exist_ok=True)
|
| 22 |
COUNT_JSON_PATH = "/home/user/app/count.json"
|
| 23 |
-
|
| 24 |
COUNT_JSON_REPO_PATH = "submissions/count.json" # Output directory (Huggingface dataset directory)
|
| 25 |
|
| 26 |
# Copy recordings to the working directory
|
|
@@ -42,90 +42,84 @@ sample1_audio_path = local_audio_paths[0]
|
|
| 42 |
print(sample1_audio_path)
|
| 43 |
|
| 44 |
# ==============================================================================
|
| 45 |
-
#
|
| 46 |
# ==============================================================================
|
| 47 |
|
| 48 |
DIMENSIONS_DATA = [
|
| 49 |
{
|
| 50 |
"title": "Semantic and Pragmatic Features",
|
| 51 |
-
"audio":
|
| 52 |
"sub_dims": [
|
| 53 |
-
"Memory Consistency: Human memory in short contexts
|
| 54 |
-
"Logical Coherence: Human
|
| 55 |
-
"Pronunciation Accuracy:
|
| 56 |
-
"Multilingual Mixing:
|
| 57 |
-
"
|
| 58 |
-
"
|
| 59 |
-
"Metaphor and Pragmatic Intent:
|
| 60 |
],
|
| 61 |
-
"reference_scores": [5, 5,
|
| 62 |
},
|
| 63 |
{
|
| 64 |
"title": "Non-Physiological Paralinguistic Features",
|
| 65 |
-
"audio":
|
| 66 |
"sub_dims": [
|
| 67 |
-
"Rhythm: Human
|
| 68 |
-
"Intonation:
|
| 69 |
-
"
|
| 70 |
-
"Auxiliary Vocalizations:
|
| 71 |
],
|
| 72 |
-
"reference_scores": [
|
| 73 |
},
|
| 74 |
{
|
| 75 |
"title": "Physiological Paralinguistic Features",
|
| 76 |
-
"audio":
|
| 77 |
"sub_dims": [
|
| 78 |
-
"Micro-physiological Noise: Human
|
| 79 |
-
"Pronunciation
|
| 80 |
-
"Accent:
|
| 81 |
],
|
| 82 |
-
"reference_scores": [
|
| 83 |
},
|
| 84 |
{
|
| 85 |
"title": "Mechanical Persona",
|
| 86 |
-
"audio":
|
| 87 |
"sub_dims": [
|
| 88 |
-
"
|
| 89 |
-
"
|
| 90 |
],
|
| 91 |
"reference_scores": [5, 5]
|
| 92 |
},
|
| 93 |
{
|
| 94 |
"title": "Emotional Expression",
|
| 95 |
-
"audio":
|
| 96 |
"sub_dims": [
|
| 97 |
-
"Semantic Level:
|
| 98 |
-
"Acoustic Level: Human
|
| 99 |
],
|
| 100 |
-
"reference_scores": [
|
| 101 |
}
|
| 102 |
]
|
| 103 |
|
| 104 |
|
| 105 |
DIMENSION_TITLES = [d["title"] for d in DIMENSIONS_DATA]
|
|
|
|
| 106 |
MAX_SUB_DIMS = max(len(d['sub_dims']) for d in DIMENSIONS_DATA)
|
|
|
|
| 107 |
|
| 108 |
-
"""
|
| 109 |
-
# Issue: this is initialized on the starting of the space, might somehow not covered
|
| 110 |
-
count_data = load_or_initialize_count_json(all_data_audio_paths)
|
| 111 |
-
selected_audio_paths, updated_count_data = sample_audio_paths(all_data_audio_paths, count_data, k=5)
|
| 112 |
|
| 113 |
-
QUESTION_SET = [
|
| 114 |
-
{"audio": path, "desc": f"这是音频文件 {os.path.basename(path)} 的描述"}
|
| 115 |
-
for path in selected_audio_paths
|
| 116 |
-
]"""
|
| 117 |
|
| 118 |
# ==============================================================================
|
| 119 |
-
#
|
| 120 |
# ==============================================================================
|
| 121 |
|
| 122 |
-
#
|
| 123 |
-
def load_or_initialize_count_json(audio_paths):
|
| 124 |
try:
|
| 125 |
# Only try downloading if file doesn't exist yet
|
| 126 |
if not os.path.exists(COUNT_JSON_PATH):
|
| 127 |
downloaded_path = hf_hub_download(
|
| 128 |
-
repo_id="intersteller2887/Turing-test-dataset
|
| 129 |
repo_type="dataset",
|
| 130 |
filename=COUNT_JSON_REPO_PATH,
|
| 131 |
token=os.getenv("HF_TOKEN")
|
|
@@ -134,7 +128,7 @@ def load_or_initialize_count_json(audio_paths):
|
|
| 134 |
with open(downloaded_path, "rb") as src, open(COUNT_JSON_PATH, "wb") as dst:
|
| 135 |
dst.write(src.read())
|
| 136 |
except Exception as e:
|
| 137 |
-
print(f"Could not download
|
| 138 |
|
| 139 |
# Add filelock to /workspace/count.json
|
| 140 |
lock_path = COUNT_JSON_PATH + ".lock"
|
|
@@ -168,75 +162,139 @@ def load_or_initialize_count_json(audio_paths):
|
|
| 168 |
with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
|
| 169 |
json.dump(count_data, f, indent=4, ensure_ascii=False)
|
| 170 |
|
| 171 |
-
return count_data
|
| 172 |
|
| 173 |
-
#
|
| 174 |
-
|
| 175 |
-
|
|
|
|
|
|
|
|
|
|
| 176 |
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
random.seed(int(time.time()))
|
| 186 |
-
|
| 187 |
-
selected = random.sample(eligible_paths_copy, k)
|
| 188 |
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
|
| 193 |
-
|
| 194 |
-
|
|
|
|
|
|
|
| 195 |
|
| 196 |
-
return
|
| 197 |
|
| 198 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
eligible_paths = [p for p in audio_paths if count_data.get(os.path.basename(p), 0) < max_count]
|
| 200 |
|
| 201 |
if len(eligible_paths) < k:
|
| 202 |
raise ValueError(f"可用音频数量不足(只剩 {len(eligible_paths)} 条 count<{max_count} 的音频),无法抽取 {k} 条")
|
| 203 |
|
|
|
|
| 204 |
selected = random.sample(eligible_paths, k)
|
| 205 |
|
|
|
|
| 206 |
for path in selected:
|
| 207 |
filename = os.path.basename(path)
|
| 208 |
count_data[filename] = count_data.get(filename, 0) + 1
|
| 209 |
|
|
|
|
| 210 |
lock_path = COUNT_JSON_PATH + ".lock"
|
| 211 |
with FileLock(lock_path, timeout=10):
|
| 212 |
with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
|
| 213 |
json.dump(count_data, f, indent=4, ensure_ascii=False)
|
| 214 |
|
| 215 |
-
return selected, count_data
|
| 216 |
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
#
|
| 220 |
-
|
| 221 |
|
| 222 |
-
#
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
|
|
|
|
|
|
| 234 |
|
| 235 |
# Save question_set in each user_data_state, preventing global sharing
|
| 236 |
def start_challenge(user_data_state):
|
| 237 |
|
| 238 |
-
|
| 239 |
-
selected_audio_paths, updated_count_data = sample_audio_paths(all_data_audio_paths,
|
|
|
|
|
|
|
| 240 |
|
| 241 |
question_set = [
|
| 242 |
{"audio": path, "desc": f"这是音频文件 {os.path.basename(path)} 的描述"}
|
|
@@ -244,13 +302,18 @@ def start_challenge(user_data_state):
|
|
| 244 |
]
|
| 245 |
|
| 246 |
user_data_state["question_set"] = question_set
|
| 247 |
-
user_data_state["updated_count_data"] = updated_count_data
|
| 248 |
-
return gr.update(visible=False), gr.update(visible=True), user_data_state
|
| 249 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 250 |
def toggle_education_other(choice):
|
| 251 |
is_other = (choice == "其他(请注明)")
|
| 252 |
return gr.update(visible=is_other, interactive=is_other, value="")
|
| 253 |
|
|
|
|
| 254 |
def check_info_complete(username, age, gender, education, education_other, ai_experience):
|
| 255 |
if username.strip() and age and gender and education and ai_experience:
|
| 256 |
if education == "其他(请注明)" and not education_other.strip():
|
|
@@ -258,6 +321,7 @@ def check_info_complete(username, age, gender, education, education_other, ai_ex
|
|
| 258 |
return gr.update(interactive=True)
|
| 259 |
return gr.update(interactive=False)
|
| 260 |
|
|
|
|
| 261 |
def show_sample_page_and_init(username, age, gender, education, education_other, ai_experience, user_data):
|
| 262 |
final_edu = education_other if education == "其他(请注明)" else education
|
| 263 |
user_data.update({
|
|
@@ -282,7 +346,7 @@ def update_sample_view(dimension_title):
|
|
| 282 |
# audio_up = gr.update(value=append_cache_buster(dim_data["audio"]))
|
| 283 |
interactive_view_up = gr.update(visible=True)
|
| 284 |
reference_view_up = gr.update(visible=False)
|
| 285 |
-
reference_btn_up = gr.update(value="
|
| 286 |
sample_slider_ups = []
|
| 287 |
ref_slider_ups = []
|
| 288 |
scores = dim_data.get("reference_scores", [])
|
|
@@ -302,23 +366,53 @@ def update_sample_view(dimension_title):
|
|
| 302 |
return empty_updates + slider_empty_updates
|
| 303 |
|
| 304 |
def update_test_dimension_view(d_idx, selections):
|
| 305 |
-
dimension = DIMENSIONS_DATA[d_idx]
|
| 306 |
-
progress_d = f"Dimension {d_idx + 1} / {len(DIMENSIONS_DATA)}: **{dimension['title']}**"
|
| 307 |
-
|
| 308 |
-
existing_scores = selections.get(dimension['title'], {})
|
| 309 |
-
|
| 310 |
slider_updates = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 311 |
for i in range(MAX_SUB_DIMS):
|
| 312 |
-
if i < len(
|
| 313 |
-
|
| 314 |
-
value
|
| 315 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 316 |
else:
|
| 317 |
-
slider_updates.append(gr.update(visible=False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 318 |
|
| 319 |
prev_btn_update = gr.update(interactive=(d_idx > 0))
|
| 320 |
next_btn_update = gr.update(
|
| 321 |
-
value="
|
| 322 |
interactive=True
|
| 323 |
)
|
| 324 |
|
|
@@ -326,10 +420,8 @@ def update_test_dimension_view(d_idx, selections):
|
|
| 326 |
|
| 327 |
def init_test_question(user_data, q_idx):
|
| 328 |
d_idx = 0
|
| 329 |
-
# question = QUESTION_SET[q_idx]
|
| 330 |
-
# progress_q = f"第 {q_idx + 1} / {len(QUESTION_SET)} 题"
|
| 331 |
question = user_data["question_set"][q_idx]
|
| 332 |
-
progress_q = f"
|
| 333 |
|
| 334 |
initial_updates = update_test_dimension_view(d_idx, {})
|
| 335 |
dim_title_update, prev_btn_update, next_btn_update = initial_updates[:3]
|
|
@@ -390,11 +482,33 @@ def navigate_dimensions(direction, q_idx, d_idx, selections, *slider_values):
|
|
| 390 |
next_btn_update,
|
| 391 |
) + tuple(slider_updates)
|
| 392 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 393 |
# ==============================================================================
|
| 394 |
-
#
|
| 395 |
# ==============================================================================
|
| 396 |
|
| 397 |
-
#
|
| 398 |
def retry_with_timeout(max_retries=3, timeout=10, backoff=1):
|
| 399 |
def decorator(func):
|
| 400 |
@wraps(func)
|
|
@@ -423,12 +537,12 @@ def retry_with_timeout(max_retries=3, timeout=10, backoff=1):
|
|
| 423 |
return wrapper
|
| 424 |
return decorator
|
| 425 |
|
| 426 |
-
def save_with_retry(all_results, user_data
|
| 427 |
# 尝试上传到Hugging Face Hub
|
| 428 |
try:
|
| 429 |
# 使用线程安全的保存方式
|
| 430 |
with ThreadPoolExecutor(max_workers=1) as executor:
|
| 431 |
-
future = executor.submit(save_all_results_to_file, all_results, user_data
|
| 432 |
try:
|
| 433 |
future.result(timeout=30) # 设置30秒超时
|
| 434 |
return True
|
|
@@ -525,6 +639,7 @@ def update_count_with_retry(count_data, question_set, max_retries=3):
|
|
| 525 |
gr.update(), gr.update(),
|
| 526 |
) + (gr.update(),) * MAX_SUB_DIMS + (all_results, result_str)"""
|
| 527 |
|
|
|
|
| 528 |
def submit_question_and_advance(q_idx, d_idx, selections, final_choice, all_results, user_data):
|
| 529 |
try:
|
| 530 |
# 准备数据
|
|
@@ -550,7 +665,7 @@ def submit_question_and_advance(q_idx, d_idx, selections, final_choice, all_resu
|
|
| 550 |
return init_q_updates + (all_results, gr.update(value=""))
|
| 551 |
else:
|
| 552 |
# 准备完整结果数据
|
| 553 |
-
result_str = "###
|
| 554 |
for res in all_results:
|
| 555 |
result_str += f"##### 最终判断: **{res['selections'].get('final_choice', '未选择')}**\n"
|
| 556 |
for dim_title, dim_data in res['selections'].items():
|
|
@@ -561,7 +676,8 @@ def submit_question_and_advance(q_idx, d_idx, selections, final_choice, all_resu
|
|
| 561 |
|
| 562 |
# 尝试上传(带重试)
|
| 563 |
try:
|
| 564 |
-
success = save_with_retry(all_results, user_data, user_data.get("updated_count_data"))
|
|
|
|
| 565 |
except Exception as e:
|
| 566 |
print(f"上传过程中发生错误: {e}")
|
| 567 |
success = False
|
|
@@ -574,7 +690,7 @@ def submit_question_and_advance(q_idx, d_idx, selections, final_choice, all_resu
|
|
| 574 |
|
| 575 |
# 准备数据包
|
| 576 |
user_info_clean = {
|
| 577 |
-
k: v for k, v in user_data.items() if k not in ["question_set"
|
| 578 |
}
|
| 579 |
final_data_package = {
|
| 580 |
"user_info": user_info_clean,
|
|
@@ -591,10 +707,10 @@ def submit_question_and_advance(q_idx, d_idx, selections, final_choice, all_resu
|
|
| 591 |
|
| 592 |
# 更新count.json(剔除未完成的题目)
|
| 593 |
try:
|
| 594 |
-
|
| 595 |
-
|
| 596 |
-
|
| 597 |
-
)
|
| 598 |
except Exception as e:
|
| 599 |
print(f"更新count.json失败: {e}")
|
| 600 |
count_update_success = False
|
|
@@ -671,14 +787,14 @@ def submit_question_and_advance(q_idx, d_idx, selections, final_choice, all_resu
|
|
| 671 |
except Exception as e:
|
| 672 |
print(f"上传出错: {e}")"""
|
| 673 |
|
| 674 |
-
def save_all_results_to_file(all_results, user_data
|
| 675 |
repo_id = "intersteller2887/Turing-test-dataset-en"
|
| 676 |
username = user_data.get("username", "user")
|
| 677 |
timestamp = pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')
|
| 678 |
submission_filename = f"submissions_{username}_{timestamp}.json"
|
| 679 |
|
| 680 |
user_info_clean = {
|
| 681 |
-
k: v for k, v in user_data.items() if k not in ["question_set"
|
| 682 |
}
|
| 683 |
|
| 684 |
final_data_package = {
|
|
@@ -703,41 +819,21 @@ def save_all_results_to_file(all_results, user_data, count_data=None):
|
|
| 703 |
commit_message=f"Add new submission from {username}"
|
| 704 |
)
|
| 705 |
|
| 706 |
-
|
| 707 |
with FileLock(COUNT_JSON_PATH + ".lock", timeout=5):
|
| 708 |
-
with open(COUNT_JSON_PATH, "
|
| 709 |
-
|
| 710 |
-
|
| 711 |
api.upload_file(
|
| 712 |
-
path_or_fileobj=
|
| 713 |
path_in_repo=COUNT_JSON_REPO_PATH,
|
| 714 |
repo_id=repo_id,
|
| 715 |
repo_type="dataset",
|
| 716 |
token=hf_token,
|
| 717 |
commit_message=f"Update count.json after submission by {username}"
|
| 718 |
)
|
| 719 |
-
|
| 720 |
-
|
| 721 |
-
if current == "参考":
|
| 722 |
-
return gr.update(visible=False), gr.update(visible=True), gr.update(value="返回")
|
| 723 |
-
else:
|
| 724 |
-
return gr.update(visible=True), gr.update(visible=False), gr.update(value="参考")
|
| 725 |
-
|
| 726 |
-
def back_to_welcome():
|
| 727 |
-
return (
|
| 728 |
-
gr.update(visible=True), # welcome_page
|
| 729 |
-
gr.update(visible=False), # info_page
|
| 730 |
-
gr.update(visible=False), # sample_page
|
| 731 |
-
gr.update(visible=False), # pretest_page
|
| 732 |
-
gr.update(visible=False), # test_page
|
| 733 |
-
gr.update(visible=False), # final_judgment_page
|
| 734 |
-
gr.update(visible=False), # result_page
|
| 735 |
-
{}, # user_data_state
|
| 736 |
-
0, # current_question_index
|
| 737 |
-
0, # current_test_dimension_index
|
| 738 |
-
{}, # current_question_selections
|
| 739 |
-
[] # test_results
|
| 740 |
-
)
|
| 741 |
|
| 742 |
# ==============================================================================
|
| 743 |
# Gradio 界面定义 (Gradio UI Definition)
|
|
@@ -763,82 +859,78 @@ with gr.Blocks(theme=gr.themes.Soft(), css=".gradio-container {max-width: 960px
|
|
| 763 |
}
|
| 764 |
|
| 765 |
with welcome_page:
|
| 766 |
-
gr.Markdown("# AI
|
| 767 |
-
start_btn = gr.Button("
|
| 768 |
|
| 769 |
with info_page:
|
| 770 |
-
gr.Markdown("##
|
| 771 |
-
username_input = gr.Textbox(label="
|
| 772 |
-
age_input = gr.Radio(["
|
| 773 |
-
gender_input = gr.Radio(["
|
| 774 |
-
education_input = gr.Radio(["
|
| 775 |
-
education_other_input = gr.Textbox(label="
|
| 776 |
-
ai_experience_input = gr.Radio([
|
| 777 |
-
|
| 778 |
-
"Occasionally exposed (e.g., watching others use)",
|
| 779 |
-
"Used a few times, understand basic functions",
|
| 780 |
-
"Use frequently, have some experience",
|
| 781 |
-
"Very familiar, have in-depth experience with multiple AI tools"
|
| 782 |
-
], label="Familiarity with AI Tools")
|
| 783 |
-
submit_info_btn = gr.Button("Submit and Start Learning Sample", variant="primary", interactive=False)
|
| 784 |
-
|
| 785 |
|
| 786 |
with sample_page:
|
| 787 |
-
|
| 788 |
-
|
|
|
|
| 789 |
with gr.Row():
|
| 790 |
with gr.Column(scale=1):
|
| 791 |
-
sample_audio = gr.Audio(label="
|
| 792 |
with gr.Column(scale=2):
|
| 793 |
with gr.Column(visible=True) as interactive_view:
|
| 794 |
-
gr.Markdown("####
|
| 795 |
sample_sliders = [gr.Slider(minimum=0, maximum=5, step=1, label=f"Sub-dim {i+1}", visible=False, interactive=True) for i in range(MAX_SUB_DIMS)]
|
| 796 |
with gr.Column(visible=False) as reference_view:
|
| 797 |
-
gr.Markdown("###
|
| 798 |
reference_sliders = [gr.Slider(minimum=0, maximum=5, step=1, label=f"Sub-dim {i+1}", visible=False, interactive=False) for i in range(MAX_SUB_DIMS)]
|
| 799 |
with gr.Row():
|
| 800 |
-
reference_btn = gr.Button("
|
| 801 |
-
go_to_pretest_btn = gr.Button("
|
| 802 |
|
| 803 |
with pretest_page:
|
| 804 |
-
gr.Markdown("##
|
| 805 |
-
|
| 806 |
-
|
| 807 |
-
|
| 808 |
-
|
| 809 |
-
|
| 810 |
-
|
| 811 |
-
|
| 812 |
-
|
| 813 |
-
|
| 814 |
-
|
| 815 |
-
|
| 816 |
-
|
|
|
|
|
|
|
|
|
|
| 817 |
|
| 818 |
-
|
| 819 |
-
|
| 820 |
-
|
| 821 |
with test_page:
|
| 822 |
-
gr.Markdown("##
|
| 823 |
question_progress_text = gr.Markdown()
|
| 824 |
test_dimension_title = gr.Markdown()
|
| 825 |
-
test_audio = gr.Audio(label="
|
| 826 |
-
gr.Markdown("--- \n ###
|
| 827 |
-
|
|
|
|
|
|
|
| 828 |
with gr.Row():
|
| 829 |
-
prev_dim_btn = gr.Button("
|
| 830 |
-
next_dim_btn = gr.Button("
|
| 831 |
|
| 832 |
with final_judgment_page:
|
| 833 |
-
gr.Markdown("##
|
| 834 |
-
gr.Markdown("
|
| 835 |
-
final_human_robot_radio = gr.Radio(["👤
|
| 836 |
-
submit_final_answer_btn = gr.Button("
|
| 837 |
|
| 838 |
with result_page:
|
| 839 |
-
gr.Markdown("##
|
| 840 |
result_text = gr.Markdown()
|
| 841 |
-
back_to_welcome_btn = gr.Button("
|
| 842 |
|
| 843 |
# ==============================================================================
|
| 844 |
# 事件绑定 (Event Binding) & IO 列表定义
|
|
|
|
| 13 |
from multiprocessing import TimeoutError
|
| 14 |
from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
|
| 15 |
|
| 16 |
+
# Load dataset from HuggingFace
|
| 17 |
dataset = load_dataset("intersteller2887/Turing-test-dataset-en", split="train")
|
| 18 |
dataset = dataset.cast_column("audio", Audio(decode=False)) # Prevent calling 'torchcodec' from newer version of 'datasets'
|
| 19 |
|
|
|
|
| 21 |
target_audio_dir = "/home/user/app/audio"
|
| 22 |
os.makedirs(target_audio_dir, exist_ok=True)
|
| 23 |
COUNT_JSON_PATH = "/home/user/app/count.json"
|
|
|
|
| 24 |
COUNT_JSON_REPO_PATH = "submissions/count.json" # Output directory (Huggingface dataset directory)
|
| 25 |
|
| 26 |
# Copy recordings to the working directory
|
|
|
|
| 42 |
print(sample1_audio_path)
|
| 43 |
|
| 44 |
# ==============================================================================
|
| 45 |
+
# Data Definition
|
| 46 |
# ==============================================================================
|
| 47 |
|
| 48 |
DIMENSIONS_DATA = [
|
| 49 |
{
|
| 50 |
"title": "Semantic and Pragmatic Features",
|
| 51 |
+
"audio": sample1_audio_path,
|
| 52 |
"sub_dims": [
|
| 53 |
+
"Memory Consistency: Human-like: Consistent memory in short contexts, and asks for clarification when memory deviations occur; Machine-like: Inconsistent memory across contexts and unable to detect or correct errors (e.g., forgetting key information and insisting on incorrect answers)",
|
| 54 |
+
"Logical Coherence: Human-like: Natural and smooth logic; Machine-like: Abrupt logical transitions or self-contradictions (e.g., suddenly changing topics without transition)",
|
| 55 |
+
"Pronunciation Accuracy: Human-like: Correct and natural pronunciation of words, with proper usage of polyphonic characters based on context; Machine-like: Unnatural pronunciation errors, mispronunciation of common polyphonic characters",
|
| 56 |
+
"Multilingual Mixing: Human-like: Multilingual mixing is often context-dependent (e.g., proper nouns, idiomatic expressions), with awkward or unnatural language switching; Machine-like: Rigid multilingual mixing without logical language switching",
|
| 57 |
+
"Imprecision in Language: Human-like: Uses vague expressions like 'more or less', 'probably', and may self-correct (e.g., 'no, no'); Machine-like: Rarely uses vague expressions, responses are precise and affirmative",
|
| 58 |
+
"Use of Fillers: Human-like: Frequently uses fillers (e.g., 'um', 'like') while thinking; Machine-like: Rare use of fillers or unnatural usage",
|
| 59 |
+
"Metaphor and Pragmatic Intent: Human-like: Uses metaphor, irony, and euphemism to convey layered meanings; Machine-like: Literal and direct, lacking semantic diversity, only capable of surface-level interpretation"
|
| 60 |
],
|
| 61 |
+
"reference_scores": [5, 5, 5, 0, 5, 5, 0]
|
| 62 |
},
|
| 63 |
{
|
| 64 |
"title": "Non-Physiological Paralinguistic Features",
|
| 65 |
+
"audio": sample1_audio_path,
|
| 66 |
"sub_dims": [
|
| 67 |
+
"Rhythm: Human-like: Speaking rate varies with semantic flow, occasional pauses or hesitations; Machine-like: Almost no pauses or mechanical pauses",
|
| 68 |
+
"Intonation: Human-like: Natural pitch rise or fall when expressing questions, surprise, or emphasis; Machine-like: Monotonous or overly regular pitch changes, inappropriate to the context",
|
| 69 |
+
"Stress: Human-like: Consciously emphasizes key words to highlight focus; Machine-like: No emphasis on words or abnormal emphasis placement",
|
| 70 |
+
"Auxiliary Vocalizations: Human-like: Produces context-appropriate non-verbal sounds, such as laughter or sighs; Machine-like: Contextually incorrect or mechanical auxiliary sounds, or completely absent"
|
| 71 |
],
|
| 72 |
+
"reference_scores": [5, 5, 5, 5]
|
| 73 |
},
|
| 74 |
{
|
| 75 |
"title": "Physiological Paralinguistic Features",
|
| 76 |
+
"audio": sample1_audio_path,
|
| 77 |
"sub_dims": [
|
| 78 |
+
"Micro-physiological Noise: Human-like: Presence of breathing sounds, saliva sounds, bubble noise, etc., naturally occurring during speech; Machine-like: Speech is overly clean or emits unnatural noises (e.g., electrical static)",
|
| 79 |
+
"Instability in Pronunciation: Human-like: Some irregularities in pronunciation (e.g., liaison, tremolo, slurred speech, nasal sounds); Machine-like: Pronunciation is overly clear and regular",
|
| 80 |
+
"Accent: Human-like: Natural regional accent or vocal traits; Machine-like: Stiff or unnatural accent"
|
| 81 |
],
|
| 82 |
+
"reference_scores": [5, 4, 4]
|
| 83 |
},
|
| 84 |
{
|
| 85 |
"title": "Mechanical Persona",
|
| 86 |
+
"audio": sample1_audio_path,
|
| 87 |
"sub_dims": [
|
| 88 |
+
"Sycophancy: Human-like: Judges whether to agree with requests or opinions based on context, doesn't always agree or echo; Machine-like: Frequently agrees, thanks, apologizes, excessively aligns with the other’s opinion, lacking genuine interaction",
|
| 89 |
+
"Written-style Expression: Human-like: Conversational, flexible, and varied expression; Machine-like: Responses are well-structured and formal, overly formal wording, frequent listing, and vague word choice"
|
| 90 |
],
|
| 91 |
"reference_scores": [5, 5]
|
| 92 |
},
|
| 93 |
{
|
| 94 |
"title": "Emotional Expression",
|
| 95 |
+
"audio": sample1_audio_path,
|
| 96 |
"sub_dims": [
|
| 97 |
+
"Semantic Level: Human-like: Displays human-like emotional responses to contexts such as sadness or joy; Machine-like: Fails to respond emotionally to the other’s feelings, or uses vague and context-inappropriate emotional language",
|
| 98 |
+
"Acoustic Level: Human-like: Pitch, volume, and rhythm dynamically change with emotion; Machine-like: Emotional tone is patterned or context-inappropriate"
|
| 99 |
],
|
| 100 |
+
"reference_scores": [5, 5]
|
| 101 |
}
|
| 102 |
]
|
| 103 |
|
| 104 |
|
| 105 |
DIMENSION_TITLES = [d["title"] for d in DIMENSIONS_DATA]
|
| 106 |
+
SPECIAL_KEYWORDS = ["Multilingual Mixing", "Metaphor and Pragmatic Intent", "Auxiliary Vocalizations", "Accent"]
|
| 107 |
MAX_SUB_DIMS = max(len(d['sub_dims']) for d in DIMENSIONS_DATA)
|
| 108 |
+
THE_SUB_DIMS = [d['sub_dims'] for d in DIMENSIONS_DATA]
|
| 109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
|
| 112 |
# ==============================================================================
|
| 113 |
+
# Backend Function Definitions
|
| 114 |
# ==============================================================================
|
| 115 |
|
| 116 |
+
# This version did not place file reading into filelock, concurrent read could happen
|
| 117 |
+
"""def load_or_initialize_count_json(audio_paths):
|
| 118 |
try:
|
| 119 |
# Only try downloading if file doesn't exist yet
|
| 120 |
if not os.path.exists(COUNT_JSON_PATH):
|
| 121 |
downloaded_path = hf_hub_download(
|
| 122 |
+
repo_id="intersteller2887/Turing-test-dataset",
|
| 123 |
repo_type="dataset",
|
| 124 |
filename=COUNT_JSON_REPO_PATH,
|
| 125 |
token=os.getenv("HF_TOKEN")
|
|
|
|
| 128 |
with open(downloaded_path, "rb") as src, open(COUNT_JSON_PATH, "wb") as dst:
|
| 129 |
dst.write(src.read())
|
| 130 |
except Exception as e:
|
| 131 |
+
print(f"Could not download count.json from HuggingFace dataset: {e}")
|
| 132 |
|
| 133 |
# Add filelock to /workspace/count.json
|
| 134 |
lock_path = COUNT_JSON_PATH + ".lock"
|
|
|
|
| 162 |
with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
|
| 163 |
json.dump(count_data, f, indent=4, ensure_ascii=False)
|
| 164 |
|
| 165 |
+
return count_data"""
|
| 166 |
|
| 167 |
+
# Function that load or initialize count.json
|
| 168 |
+
# Function is called when user start a challenge, and this will load or initialize count.json to working directory
|
| 169 |
+
# Initialize happens when count.json does not exist in the working directory as well as HuggingFace dataset
|
| 170 |
+
# Load happens when count.json exists in HuggingFace dataset, and it's not loaded to the working directory yet
|
| 171 |
+
# After load/initialize, all newly added audio files will be added to count.json with initial value of 0
|
| 172 |
+
# Load/Initialize will generate count.json in the working directory for all users under this space
|
| 173 |
|
| 174 |
+
# This version also places file reading into filelock, and modified
|
| 175 |
+
def load_or_initialize_count_json(audio_paths):
|
| 176 |
+
# Add filelock to /workspace/count.json
|
| 177 |
+
lock_path = COUNT_JSON_PATH + ".lock"
|
| 178 |
+
with FileLock(lock_path, timeout=10):
|
| 179 |
+
# If count.json does not exist in the working directory, try to download it from HuggingFace dataset
|
| 180 |
+
if not os.path.exists(COUNT_JSON_PATH):
|
| 181 |
+
try:
|
| 182 |
+
# Save latest count.json to working directory
|
| 183 |
+
downloaded_path = hf_hub_download(
|
| 184 |
+
repo_id="intersteller2887/Turing-test-dataset-en",
|
| 185 |
+
repo_type="dataset",
|
| 186 |
+
filename=COUNT_JSON_REPO_PATH,
|
| 187 |
+
token=os.getenv("HF_TOKEN")
|
| 188 |
+
)
|
| 189 |
+
with open(downloaded_path, "rb") as src, open(COUNT_JSON_PATH, "wb") as dst:
|
| 190 |
+
dst.write(src.read())
|
| 191 |
+
except Exception:
|
| 192 |
+
pass
|
| 193 |
+
|
| 194 |
+
# If count.json exists in the working directory: load into count_data for potential update
|
| 195 |
+
if os.path.exists(COUNT_JSON_PATH):
|
| 196 |
+
with open(COUNT_JSON_PATH, "r", encoding="utf-8") as f:
|
| 197 |
+
count_data = json.load(f, object_pairs_hook=collections.OrderedDict)
|
| 198 |
+
# Else initialize count_data with orderedDict
|
| 199 |
+
# This happens when there is no count.json (both working directory and HuggingFace dataset)
|
| 200 |
+
else:
|
| 201 |
+
count_data = collections.OrderedDict()
|
| 202 |
|
| 203 |
+
updated = False
|
| 204 |
+
sample_audio_files = {os.path.basename(d["audio"]) for d in DIMENSIONS_DATA}
|
|
|
|
|
|
|
|
|
|
| 205 |
|
| 206 |
+
# Guarantee that the sample recording won't be take into the pool
|
| 207 |
+
# Update newly updated recordings into count.json
|
| 208 |
+
for path in audio_paths:
|
| 209 |
+
filename = os.path.basename(path)
|
| 210 |
+
if filename not in count_data:
|
| 211 |
+
if filename in sample_audio_files:
|
| 212 |
+
count_data[filename] = 999
|
| 213 |
+
else:
|
| 214 |
+
count_data[filename] = 0
|
| 215 |
+
updated = True
|
| 216 |
|
| 217 |
+
# Write updated count_data to /home/user/app/count.json
|
| 218 |
+
if updated or not os.path.exists(COUNT_JSON_PATH):
|
| 219 |
+
with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
|
| 220 |
+
json.dump(count_data, f, indent=4, ensure_ascii=False)
|
| 221 |
|
| 222 |
+
return
|
| 223 |
|
| 224 |
+
# Shorten the time of playing previous audio when reached next question
|
| 225 |
+
def append_cache_buster(audio_path):
|
| 226 |
+
return f"{audio_path}?t={int(time.time() * 1000)}"
|
| 227 |
+
|
| 228 |
+
# Function that samples questions from avaliable question set
|
| 229 |
+
|
| 230 |
+
# This version utilizes a given count_data to sample audio paths
|
| 231 |
+
"""def sample_audio_paths(audio_paths, count_data, k=5, max_count=1): # k for questions per test; max_count for question limit in total
|
| 232 |
eligible_paths = [p for p in audio_paths if count_data.get(os.path.basename(p), 0) < max_count]
|
| 233 |
|
| 234 |
if len(eligible_paths) < k:
|
| 235 |
raise ValueError(f"可用音频数量不足(只剩 {len(eligible_paths)} 条 count<{max_count} 的音频),无法抽取 {k} 条")
|
| 236 |
|
| 237 |
+
# Shuffule to avoid fixed selections resulted from directory structure
|
| 238 |
selected = random.sample(eligible_paths, k)
|
| 239 |
|
| 240 |
+
# Once sampled a test, update these questions immediately
|
| 241 |
for path in selected:
|
| 242 |
filename = os.path.basename(path)
|
| 243 |
count_data[filename] = count_data.get(filename, 0) + 1
|
| 244 |
|
| 245 |
+
# Add filelock to /workspace/count.json
|
| 246 |
lock_path = COUNT_JSON_PATH + ".lock"
|
| 247 |
with FileLock(lock_path, timeout=10):
|
| 248 |
with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
|
| 249 |
json.dump(count_data, f, indent=4, ensure_ascii=False)
|
| 250 |
|
| 251 |
+
return selected, count_data"""
|
| 252 |
|
| 253 |
+
# This version places file reading into filelock to guarantee correct update of count.json
|
| 254 |
+
def sample_audio_paths(audio_paths, k=5, max_count=1):
|
| 255 |
+
# Add filelock to /workspace/count.json
|
| 256 |
+
lock_path = COUNT_JSON_PATH + ".lock"
|
| 257 |
|
| 258 |
+
# Load newest count.json
|
| 259 |
+
with FileLock(lock_path, timeout=10):
|
| 260 |
+
with open(COUNT_JSON_PATH, "r", encoding="utf-8") as f:
|
| 261 |
+
count_data = json.load(f)
|
| 262 |
+
|
| 263 |
+
eligible_paths = [
|
| 264 |
+
p for p in audio_paths
|
| 265 |
+
if count_data.get(os.path.basename(p), 0) < max_count
|
| 266 |
+
]
|
| 267 |
+
|
| 268 |
+
if len(eligible_paths) < k:
|
| 269 |
+
raise ValueError(f"可用音频数量不足(只剩 {len(eligible_paths)} 条 count<{max_count} 的音频),无法抽取 {k} 条")
|
| 270 |
+
|
| 271 |
+
selected = random.sample(eligible_paths, k)
|
| 272 |
+
|
| 273 |
+
# Update count_data
|
| 274 |
+
for path in selected:
|
| 275 |
+
filename = os.path.basename(path)
|
| 276 |
+
count_data[filename] = count_data.get(filename, 0) + 1
|
| 277 |
+
|
| 278 |
+
# Update count.json
|
| 279 |
+
with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
|
| 280 |
+
json.dump(count_data, f, indent=4, ensure_ascii=False)
|
| 281 |
+
|
| 282 |
+
# return selected, count_data
|
| 283 |
+
# Keep count_data atomic
|
| 284 |
|
| 285 |
+
return selected
|
| 286 |
+
|
| 287 |
+
# ==============================================================================
|
| 288 |
+
# Frontend Function Definitions
|
| 289 |
+
# ==============================================================================
|
| 290 |
|
| 291 |
# Save question_set in each user_data_state, preventing global sharing
|
| 292 |
def start_challenge(user_data_state):
|
| 293 |
|
| 294 |
+
load_or_initialize_count_json(all_data_audio_paths)
|
| 295 |
+
# selected_audio_paths, updated_count_data = sample_audio_paths(all_data_audio_paths, k=5)
|
| 296 |
+
# Keep count_data atomic
|
| 297 |
+
selected_audio_paths = sample_audio_paths(all_data_audio_paths, k=5)
|
| 298 |
|
| 299 |
question_set = [
|
| 300 |
{"audio": path, "desc": f"这是音频文件 {os.path.basename(path)} 的描述"}
|
|
|
|
| 302 |
]
|
| 303 |
|
| 304 |
user_data_state["question_set"] = question_set
|
|
|
|
|
|
|
| 305 |
|
| 306 |
+
# count_data is not needed in the user data
|
| 307 |
+
# user_data_state["updated_count_data"] = updated_count_data
|
| 308 |
+
|
| 309 |
+
return gr.update(visible=False), gr.update(visible=True), user_data_state
|
| 310 |
+
|
| 311 |
+
# This function toggles the visibility of the "其他(请注明)" input field based on the selected education choice
|
| 312 |
def toggle_education_other(choice):
|
| 313 |
is_other = (choice == "其他(请注明)")
|
| 314 |
return gr.update(visible=is_other, interactive=is_other, value="")
|
| 315 |
|
| 316 |
+
# This function checks if the user information is complete
|
| 317 |
def check_info_complete(username, age, gender, education, education_other, ai_experience):
|
| 318 |
if username.strip() and age and gender and education and ai_experience:
|
| 319 |
if education == "其他(请注明)" and not education_other.strip():
|
|
|
|
| 321 |
return gr.update(interactive=True)
|
| 322 |
return gr.update(interactive=False)
|
| 323 |
|
| 324 |
+
# This function updates user_data and initializes the sample page (called when user submits their info)
|
| 325 |
def show_sample_page_and_init(username, age, gender, education, education_other, ai_experience, user_data):
|
| 326 |
final_edu = education_other if education == "其他(请注明)" else education
|
| 327 |
user_data.update({
|
|
|
|
| 346 |
# audio_up = gr.update(value=append_cache_buster(dim_data["audio"]))
|
| 347 |
interactive_view_up = gr.update(visible=True)
|
| 348 |
reference_view_up = gr.update(visible=False)
|
| 349 |
+
reference_btn_up = gr.update(value="参考")
|
| 350 |
sample_slider_ups = []
|
| 351 |
ref_slider_ups = []
|
| 352 |
scores = dim_data.get("reference_scores", [])
|
|
|
|
| 366 |
return empty_updates + slider_empty_updates
|
| 367 |
|
| 368 |
def update_test_dimension_view(d_idx, selections):
|
| 369 |
+
# dimension = DIMENSIONS_DATA[d_idx]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 370 |
slider_updates = []
|
| 371 |
+
dim_data = DIMENSIONS_DATA[d_idx]
|
| 372 |
+
sub_dims = dim_data["sub_dims"]
|
| 373 |
+
dim_title = dim_data["title"]
|
| 374 |
+
existing_scores = selections.get(dim_data['title'], {})
|
| 375 |
+
progress_d = f"维度 {d_idx + 1} / {len(DIMENSIONS_DATA)}: **{dim_data['title']}**"
|
| 376 |
+
|
| 377 |
for i in range(MAX_SUB_DIMS):
|
| 378 |
+
if i < len(sub_dims):
|
| 379 |
+
desc = sub_dims[i]
|
| 380 |
+
print(f"{desc} -> default value: {existing_scores.get(desc, 0)}")
|
| 381 |
+
name = desc.split(":")[0].strip()
|
| 382 |
+
default_value = 0 if name in SPECIAL_KEYWORDS else 1
|
| 383 |
+
value = existing_scores.get(desc, default_value)
|
| 384 |
+
|
| 385 |
+
slider_updates.append(gr.update(
|
| 386 |
+
visible=True,
|
| 387 |
+
label=desc,
|
| 388 |
+
minimum=default_value,
|
| 389 |
+
maximum=5,
|
| 390 |
+
step=1,
|
| 391 |
+
value=value,
|
| 392 |
+
interactive=True,
|
| 393 |
+
))
|
| 394 |
+
# slider_updates.append(gr.update(
|
| 395 |
+
# visible=True,
|
| 396 |
+
# label=desc,
|
| 397 |
+
# minimum=0 if name in SPECIAL_KEYWORDS else 1,
|
| 398 |
+
# maximum=5,
|
| 399 |
+
# value = existing_scores.get(desc, 0),
|
| 400 |
+
# interactive=True,
|
| 401 |
+
# ))
|
| 402 |
else:
|
| 403 |
+
slider_updates.append(gr.update(visible=False))
|
| 404 |
+
print(f"{desc} -> default value: {existing_scores.get(desc, 0)}")
|
| 405 |
+
# for i in range(MAX_SUB_DIMS):
|
| 406 |
+
# if i < len(dimension['sub_dims']):
|
| 407 |
+
# sub_dim_label = dimension['sub_dims'][i]
|
| 408 |
+
# value = existing_scores.get(sub_dim_label, 0)
|
| 409 |
+
# slider_updates.append(gr.update(visible=True, label=sub_dim_label, value=value))
|
| 410 |
+
# else:
|
| 411 |
+
# slider_updates.append(gr.update(visible=False, value=0))
|
| 412 |
|
| 413 |
prev_btn_update = gr.update(interactive=(d_idx > 0))
|
| 414 |
next_btn_update = gr.update(
|
| 415 |
+
value="进入最终判断" if d_idx == len(DIMENSIONS_DATA) - 1 else "下一维度",
|
| 416 |
interactive=True
|
| 417 |
)
|
| 418 |
|
|
|
|
| 420 |
|
| 421 |
def init_test_question(user_data, q_idx):
|
| 422 |
d_idx = 0
|
|
|
|
|
|
|
| 423 |
question = user_data["question_set"][q_idx]
|
| 424 |
+
progress_q = f"第 {q_idx + 1} / {len(user_data['question_set'])} 题"
|
| 425 |
|
| 426 |
initial_updates = update_test_dimension_view(d_idx, {})
|
| 427 |
dim_title_update, prev_btn_update, next_btn_update = initial_updates[:3]
|
|
|
|
| 482 |
next_btn_update,
|
| 483 |
) + tuple(slider_updates)
|
| 484 |
|
| 485 |
+
def toggle_reference_view(current):
|
| 486 |
+
if current == "参考":
|
| 487 |
+
return gr.update(visible=False), gr.update(visible=True), gr.update(value="返回")
|
| 488 |
+
else:
|
| 489 |
+
return gr.update(visible=True), gr.update(visible=False), gr.update(value="参考")
|
| 490 |
+
|
| 491 |
+
def back_to_welcome():
|
| 492 |
+
return (
|
| 493 |
+
gr.update(visible=True), # welcome_page
|
| 494 |
+
gr.update(visible=False), # info_page
|
| 495 |
+
gr.update(visible=False), # sample_page
|
| 496 |
+
gr.update(visible=False), # pretest_page
|
| 497 |
+
gr.update(visible=False), # test_page
|
| 498 |
+
gr.update(visible=False), # final_judgment_page
|
| 499 |
+
gr.update(visible=False), # result_page
|
| 500 |
+
{}, # user_data_state
|
| 501 |
+
0, # current_question_index
|
| 502 |
+
0, # current_test_dimension_index
|
| 503 |
+
{}, # current_question_selections
|
| 504 |
+
[] # test_results
|
| 505 |
+
)
|
| 506 |
+
|
| 507 |
# ==============================================================================
|
| 508 |
+
# Retry Function Definitions
|
| 509 |
# ==============================================================================
|
| 510 |
|
| 511 |
+
# Decorator function that allows to use ThreadPoolExecutor to retry a function with timeout
|
| 512 |
def retry_with_timeout(max_retries=3, timeout=10, backoff=1):
|
| 513 |
def decorator(func):
|
| 514 |
@wraps(func)
|
|
|
|
| 537 |
return wrapper
|
| 538 |
return decorator
|
| 539 |
|
| 540 |
+
def save_with_retry(all_results, user_data):
|
| 541 |
# 尝试上传到Hugging Face Hub
|
| 542 |
try:
|
| 543 |
# 使用线程安全的保存方式
|
| 544 |
with ThreadPoolExecutor(max_workers=1) as executor:
|
| 545 |
+
future = executor.submit(save_all_results_to_file, all_results, user_data)
|
| 546 |
try:
|
| 547 |
future.result(timeout=30) # 设置30秒超时
|
| 548 |
return True
|
|
|
|
| 639 |
gr.update(), gr.update(),
|
| 640 |
) + (gr.update(),) * MAX_SUB_DIMS + (all_results, result_str)"""
|
| 641 |
|
| 642 |
+
# user_data now no further contain "updated_count_data", which should be read/write with filelock and be directly accessed from working directory
|
| 643 |
def submit_question_and_advance(q_idx, d_idx, selections, final_choice, all_results, user_data):
|
| 644 |
try:
|
| 645 |
# 准备数据
|
|
|
|
| 665 |
return init_q_updates + (all_results, gr.update(value=""))
|
| 666 |
else:
|
| 667 |
# 准备完整结果数据
|
| 668 |
+
result_str = "### 测试全部完成!\n\n你的提交结果概览:\n"
|
| 669 |
for res in all_results:
|
| 670 |
result_str += f"##### 最终判断: **{res['selections'].get('final_choice', '未选择')}**\n"
|
| 671 |
for dim_title, dim_data in res['selections'].items():
|
|
|
|
| 676 |
|
| 677 |
# 尝试上传(带重试)
|
| 678 |
try:
|
| 679 |
+
# success = save_with_retry(all_results, user_data, user_data.get("updated_count_data"))
|
| 680 |
+
success = save_with_retry(all_results, user_data)
|
| 681 |
except Exception as e:
|
| 682 |
print(f"上传过程中发生错误: {e}")
|
| 683 |
success = False
|
|
|
|
| 690 |
|
| 691 |
# 准备数据包
|
| 692 |
user_info_clean = {
|
| 693 |
+
k: v for k, v in user_data.items() if k not in ["question_set"]
|
| 694 |
}
|
| 695 |
final_data_package = {
|
| 696 |
"user_info": user_info_clean,
|
|
|
|
| 707 |
|
| 708 |
# 更新count.json(剔除未完成的题目)
|
| 709 |
try:
|
| 710 |
+
with FileLock(COUNT_JSON_PATH + ".lock", timeout=5):
|
| 711 |
+
with open(COUNT_JSON_PATH, "r", encoding="utf-8") as f:
|
| 712 |
+
count_data = json.load(f, object_pairs_hook=collections.OrderedDict)
|
| 713 |
+
count_update_success = update_count_with_retry(count_data, user_data["question_set"])
|
| 714 |
except Exception as e:
|
| 715 |
print(f"更新count.json失败: {e}")
|
| 716 |
count_update_success = False
|
|
|
|
| 787 |
except Exception as e:
|
| 788 |
print(f"上传出错: {e}")"""
|
| 789 |
|
| 790 |
+
def save_all_results_to_file(all_results, user_data):
|
| 791 |
repo_id = "intersteller2887/Turing-test-dataset-en"
|
| 792 |
username = user_data.get("username", "user")
|
| 793 |
timestamp = pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')
|
| 794 |
submission_filename = f"submissions_{username}_{timestamp}.json"
|
| 795 |
|
| 796 |
user_info_clean = {
|
| 797 |
+
k: v for k, v in user_data.items() if k not in ["question_set"]
|
| 798 |
}
|
| 799 |
|
| 800 |
final_data_package = {
|
|
|
|
| 819 |
commit_message=f"Add new submission from {username}"
|
| 820 |
)
|
| 821 |
|
| 822 |
+
try:
|
| 823 |
with FileLock(COUNT_JSON_PATH + ".lock", timeout=5):
|
| 824 |
+
with open(COUNT_JSON_PATH, "r", encoding="utf-8") as f:
|
| 825 |
+
count_data_str = f.read()
|
| 826 |
+
|
| 827 |
api.upload_file(
|
| 828 |
+
path_or_fileobj=bytes(count_data_str, "utf-8"),
|
| 829 |
path_in_repo=COUNT_JSON_REPO_PATH,
|
| 830 |
repo_id=repo_id,
|
| 831 |
repo_type="dataset",
|
| 832 |
token=hf_token,
|
| 833 |
commit_message=f"Update count.json after submission by {username}"
|
| 834 |
)
|
| 835 |
+
except Exception as e:
|
| 836 |
+
print(f"上传 count.json 失败: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 837 |
|
| 838 |
# ==============================================================================
|
| 839 |
# Gradio 界面定义 (Gradio UI Definition)
|
|
|
|
| 859 |
}
|
| 860 |
|
| 861 |
with welcome_page:
|
| 862 |
+
gr.Markdown("# AI 识破者\n你将听到一系列对话,请判断哪个回应者是 AI。")
|
| 863 |
+
start_btn = gr.Button("开始挑战", variant="primary")
|
| 864 |
|
| 865 |
with info_page:
|
| 866 |
+
gr.Markdown("## 请提供一些基本信息")
|
| 867 |
+
username_input = gr.Textbox(label="用户名", placeholder="请输入你的昵称")
|
| 868 |
+
age_input = gr.Radio(["18岁以下", "18-25岁", "26-35岁", "36-50岁", "50岁以上"], label="年龄")
|
| 869 |
+
gender_input = gr.Radio(["男", "女", "其他"], label="性别")
|
| 870 |
+
education_input = gr.Radio(["高中及以下", "本科", "硕士", "博士", "其他"], label="学历")
|
| 871 |
+
education_other_input = gr.Textbox(label="请填写你的学历", visible=False, interactive=False)
|
| 872 |
+
ai_experience_input = gr.Radio(["从未使用过", "偶尔接触(如看别人用)", "使用过几次,了解基本功能", "经常使用,有一定操作经验", "非常熟悉,深入使用过多个 AI 工具"], label="对 AI 工具的熟悉程度")
|
| 873 |
+
submit_info_btn = gr.Button("提交并开始学习样例", variant="primary", interactive=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 874 |
|
| 875 |
with sample_page:
|
| 876 |
+
|
| 877 |
+
gr.Markdown("## 样例分析\n请选择一个维度进行学习和打分练习。所有维度共用同一个样例音频。")
|
| 878 |
+
sample_dimension_selector = gr.Radio(DIMENSION_TITLES, label="选择学习维度", value=DIMENSION_TITLES[0])
|
| 879 |
with gr.Row():
|
| 880 |
with gr.Column(scale=1):
|
| 881 |
+
sample_audio = gr.Audio(label="样例音频", value=DIMENSIONS_DATA[0]["audio"])
|
| 882 |
with gr.Column(scale=2):
|
| 883 |
with gr.Column(visible=True) as interactive_view:
|
| 884 |
+
gr.Markdown("#### 请为以下特征打分 (0-5分。0-特征无体现;1-机器;3-特征无偏向;5-人类)")
|
| 885 |
sample_sliders = [gr.Slider(minimum=0, maximum=5, step=1, label=f"Sub-dim {i+1}", visible=False, interactive=True) for i in range(MAX_SUB_DIMS)]
|
| 886 |
with gr.Column(visible=False) as reference_view:
|
| 887 |
+
gr.Markdown("### 参考答案解析")
|
| 888 |
reference_sliders = [gr.Slider(minimum=0, maximum=5, step=1, label=f"Sub-dim {i+1}", visible=False, interactive=False) for i in range(MAX_SUB_DIMS)]
|
| 889 |
with gr.Row():
|
| 890 |
+
reference_btn = gr.Button("参考")
|
| 891 |
+
go_to_pretest_btn = gr.Button("我明白了,开始测试", variant="primary")
|
| 892 |
|
| 893 |
with pretest_page:
|
| 894 |
+
gr.Markdown("## 测试说明\n"
|
| 895 |
+
"- 对于每一道题,你都需要对全部 **5 个维度** 进行评估。\n"
|
| 896 |
+
"- 在每个维度下,请为出现的每个特征 **从0到5打分**。\n"
|
| 897 |
+
"- **评分解释如下:**\n"
|
| 898 |
+
" - **0 分:特征未体现** (有些特征一定会体现,所以按1到5打分);\n"
|
| 899 |
+
" - **1 分:极度符合机器特征**;\n"
|
| 900 |
+
" - **2 分:较为符合机器特征**;\n"
|
| 901 |
+
" - **3 分:无明显人类或机器倾向**;\n"
|
| 902 |
+
" - **4 分:较为符合人类特征**;\n"
|
| 903 |
+
" - **5 分:极度符合人类特征**。\n"
|
| 904 |
+
"- 完成所有维度后,请根据整体印象对回应方的身份做出做出“人类”或“机器人”的 **最终判断**。\n"
|
| 905 |
+
"- 你可以使用“上一维度”和“下一维度”按钮在5个维度间自由切换和修改分数。\n"
|
| 906 |
+
"## 特别注意\n"
|
| 907 |
+
"- 我们希望您���判断每个维度上**回应者**的表现是**偏向人还是机器**,分数的大小反映回应者的语音类人的程度,而**不是**这个维度体现的程度多少\n(如读音正确也不代表是人类,读音错误也不代表是机器,您应当判断的是“听到的发音更偏向机器还是人类”)\n"
|
| 908 |
+
"- 即使您一开始就已经很肯定回应方的身份,同样应当**独立地**对每个维度上回应方的表现进行细致的评判。比如您很肯定回应方是机器,也需要独立地对每个维度判断,而非简单地将每个维度归为偏机器。")
|
| 909 |
+
go_to_test_btn = gr.Button("开始测试", variant="primary")
|
| 910 |
|
|
|
|
|
|
|
|
|
|
| 911 |
with test_page:
|
| 912 |
+
gr.Markdown("## 正式测试")
|
| 913 |
question_progress_text = gr.Markdown()
|
| 914 |
test_dimension_title = gr.Markdown()
|
| 915 |
+
test_audio = gr.Audio(label="测试音频")
|
| 916 |
+
gr.Markdown("--- \n ### 请为对话中的回应者(非发起者)针对以下特征打分 (0-5分。0-特征无体现;1-机器;3-特征无偏向;5-人类)")
|
| 917 |
+
|
| 918 |
+
test_sliders = [gr.Slider(minimum=0, maximum=5, step=1, label=f"Sub-dim {i+1}", visible=False, interactive=True, show_label = True) for i in range(MAX_SUB_DIMS)]
|
| 919 |
+
|
| 920 |
with gr.Row():
|
| 921 |
+
prev_dim_btn = gr.Button("上一维度")
|
| 922 |
+
next_dim_btn = gr.Button("下一维度", variant="primary")
|
| 923 |
|
| 924 |
with final_judgment_page:
|
| 925 |
+
gr.Markdown("## 最终判断")
|
| 926 |
+
gr.Markdown("您已完成对所有维度的评分。请根据您的综合印象,做出最终判断。")
|
| 927 |
+
final_human_robot_radio = gr.Radio(["👤 人类", "🤖 机器人"], label="请判断回应者类型 (必填)")
|
| 928 |
+
submit_final_answer_btn = gr.Button("提交本题答案", variant="primary", interactive=False)
|
| 929 |
|
| 930 |
with result_page:
|
| 931 |
+
gr.Markdown("## 测试完成")
|
| 932 |
result_text = gr.Markdown()
|
| 933 |
+
back_to_welcome_btn = gr.Button("返回主界面", variant="primary")
|
| 934 |
|
| 935 |
# ==============================================================================
|
| 936 |
# 事件绑定 (Event Binding) & IO 列表定义
|