Spark Chou commited on
Commit
88c9820
·
1 Parent(s): acceb2a

add app.py for latest English version

Browse files
Files changed (1) hide show
  1. app.py +282 -190
app.py CHANGED
@@ -13,6 +13,7 @@ from huggingface_hub import HfApi, hf_hub_download
13
  from multiprocessing import TimeoutError
14
  from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
15
 
 
16
  dataset = load_dataset("intersteller2887/Turing-test-dataset-en", split="train")
17
  dataset = dataset.cast_column("audio", Audio(decode=False)) # Prevent calling 'torchcodec' from newer version of 'datasets'
18
 
@@ -20,7 +21,6 @@ dataset = dataset.cast_column("audio", Audio(decode=False)) # Prevent calling 't
20
  target_audio_dir = "/home/user/app/audio"
21
  os.makedirs(target_audio_dir, exist_ok=True)
22
  COUNT_JSON_PATH = "/home/user/app/count.json"
23
-
24
  COUNT_JSON_REPO_PATH = "submissions/count.json" # Output directory (Huggingface dataset directory)
25
 
26
  # Copy recordings to the working directory
@@ -42,90 +42,84 @@ sample1_audio_path = local_audio_paths[0]
42
  print(sample1_audio_path)
43
 
44
  # ==============================================================================
45
- # 数据定义 (Data Definition)
46
  # ==============================================================================
47
 
48
  DIMENSIONS_DATA = [
49
  {
50
  "title": "Semantic and Pragmatic Features",
51
- "audio": "sample1_audio_path",
52
  "sub_dims": [
53
- "Memory Consistency: Human memory in short contexts is usually consistent and self-correcting (e.g., by asking questions); machines may show inconsistent context memory and fail to notice or correct errors (e.g., forgetting key information and persisting in wrong answers).",
54
- "Logical Coherence: Human logic is naturally coherent and allows reasonable leaps; machine logic is abrupt or self-contradictory (e.g., sudden topic shifts without transitions).",
55
- "Pronunciation Accuracy: Humans generally pronounce words correctly and naturally, distinguishing polyphonic characters based on context; machines often mispronounce or lack contextual judgment for polyphonic words.",
56
- "Multilingual Mixing: Humans mix multiple languages fluently and contextually; machines mix languages rigidly, lacking logical language switching.",
57
- "Linguistic Vagueness: Human speech tends to include vague expressions (e.g., more or less,” “I guess”) and self-corrections; machine responses are typically precise and assertive.",
58
- "Filler Word Usage: Human filler words (e.g., 'uh', 'like') appear randomly and show signs of thinking; machine fillers are either repetitive and patterned or completely absent.",
59
- "Metaphor and Pragmatic Intent: Humans use metaphors, irony, and euphemisms to express layered meanings; machines interpret literally or use rhetorical devices awkwardly, lacking semantic richness."
60
  ],
61
- "reference_scores": [5, 5, 3, 3, 5, 5, 3]
62
  },
63
  {
64
  "title": "Non-Physiological Paralinguistic Features",
65
- "audio": "sample1_audio_path",
66
  "sub_dims": [
67
- "Rhythm: Human speech rate varies with meaning, occasionally hesitating or pausing; machine rhythm is uniform, with little or mechanical pauses.",
68
- "Intonation: Humans naturally raise or lower pitch to express questions, surprise, or emphasis; machine intonation is monotonous or overly patterned, mismatching the context.",
69
- "Emphasis: Humans consciously stress key words to highlight important information; machines have uniform word emphasis or stress incorrect parts.",
70
- "Auxiliary Vocalizations: Humans produce context-appropriate non-verbal sounds (e.g., laughter, sighs); machine non-verbal sounds are contextually incorrect, mechanical, or absent."
71
  ],
72
- "reference_scores": [4, 5, 4, 3]
73
  },
74
  {
75
  "title": "Physiological Paralinguistic Features",
76
- "audio": "sample1_audio_path",
77
  "sub_dims": [
78
- "Micro-physiological Noise: Human speech includes unconscious physiological sounds like breathing, saliva, or bubbling, naturally woven into rhythm; machine speech is overly clean or adds unnatural noises.",
79
- "Pronunciation Instability: Human pronunciation includes irregularities (e.g., linking, tremors, slurring, nasal sounds); machine pronunciation is overly standard and uniform, lacking personality.",
80
- "Accent: Humans naturally exhibit regional accents or speech traits; machine accents sound forced or unnatural."
81
  ],
82
- "reference_scores": [3, 3, 4]
83
  },
84
  {
85
  "title": "Mechanical Persona",
86
- "audio": "sample1_audio_path",
87
  "sub_dims": [
88
- "Flattery: Humans assess context to agree or disagree, sometimes offering differing opinions; machines excessively agree, thank, or apologize, over-validating the other party and lacking authentic interaction.",
89
- "Formalized Expression: Human speech is flexible; machine responses are formally structured, overly written, and use vague wording."
90
  ],
91
  "reference_scores": [5, 5]
92
  },
93
  {
94
  "title": "Emotional Expression",
95
- "audio": "sample1_audio_path",
96
  "sub_dims": [
97
- "Semantic Level: Humans show appropriate emotional responses to contexts like sadness or joy; machines are emotionally flat, or use emotional words vaguely and out of context.",
98
- "Acoustic Level: Human pitch, volume, and rhythm change dynamically with emotion; machine emotional tone is formulaic or mismatched with the context."
99
  ],
100
- "reference_scores": [3, 3]
101
  }
102
  ]
103
 
104
 
105
  DIMENSION_TITLES = [d["title"] for d in DIMENSIONS_DATA]
 
106
  MAX_SUB_DIMS = max(len(d['sub_dims']) for d in DIMENSIONS_DATA)
 
107
 
108
- """
109
- # Issue: this is initialized on the starting of the space, might somehow not covered
110
- count_data = load_or_initialize_count_json(all_data_audio_paths)
111
- selected_audio_paths, updated_count_data = sample_audio_paths(all_data_audio_paths, count_data, k=5)
112
 
113
- QUESTION_SET = [
114
- {"audio": path, "desc": f"这是音频文件 {os.path.basename(path)} 的描述"}
115
- for path in selected_audio_paths
116
- ]"""
117
 
118
  # ==============================================================================
119
- # 功能函数定义 (Function Definitions)
120
  # ==============================================================================
121
 
122
- # Function that load or initialize count.json
123
- def load_or_initialize_count_json(audio_paths):
124
  try:
125
  # Only try downloading if file doesn't exist yet
126
  if not os.path.exists(COUNT_JSON_PATH):
127
  downloaded_path = hf_hub_download(
128
- repo_id="intersteller2887/Turing-test-dataset-en",
129
  repo_type="dataset",
130
  filename=COUNT_JSON_REPO_PATH,
131
  token=os.getenv("HF_TOKEN")
@@ -134,7 +128,7 @@ def load_or_initialize_count_json(audio_paths):
134
  with open(downloaded_path, "rb") as src, open(COUNT_JSON_PATH, "wb") as dst:
135
  dst.write(src.read())
136
  except Exception as e:
137
- print(f"Could not download or save count.json from HuggingFace dataset: {e}")
138
 
139
  # Add filelock to /workspace/count.json
140
  lock_path = COUNT_JSON_PATH + ".lock"
@@ -168,75 +162,139 @@ def load_or_initialize_count_json(audio_paths):
168
  with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
169
  json.dump(count_data, f, indent=4, ensure_ascii=False)
170
 
171
- return count_data
172
 
173
- # Shorten the time of playing previous audio when reached next question
174
- def append_cache_buster(audio_path):
175
- return f"{audio_path}?t={int(time.time() * 1000)}"
 
 
 
176
 
177
- """def sample_audio_paths(audio_paths, count_data, k=5, max_count=1):
178
- eligible_paths = [p for p in audio_paths if count_data.get(os.path.basename(p), 0) < max_count]
179
-
180
- if len(eligible_paths) < k:
181
- raise ValueError(f"可用音频数量不足(只剩 {len(eligible_paths)} 条 count<{max_count} 的音频),无法抽取 {k} 条")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
 
183
- eligible_paths_copy = eligible_paths.copy()
184
-
185
- random.seed(int(time.time()))
186
-
187
- selected = random.sample(eligible_paths_copy, k)
188
 
189
- for path in selected:
190
- filename = os.path.basename(path)
191
- count_data[filename] = count_data.get(filename, 0) + 1
 
 
 
 
 
 
 
192
 
193
- with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
194
- json.dump(count_data, f, indent=4, ensure_ascii=False)
 
 
195
 
196
- return selected, count_data"""
197
 
198
- def sample_audio_paths(audio_paths, count_data, k=5, max_count=1): # k for questions per test; max_count for question limit in total
 
 
 
 
 
 
 
199
  eligible_paths = [p for p in audio_paths if count_data.get(os.path.basename(p), 0) < max_count]
200
 
201
  if len(eligible_paths) < k:
202
  raise ValueError(f"可用音频数量不足(只剩 {len(eligible_paths)} 条 count<{max_count} 的音频),无法抽取 {k} 条")
203
 
 
204
  selected = random.sample(eligible_paths, k)
205
 
 
206
  for path in selected:
207
  filename = os.path.basename(path)
208
  count_data[filename] = count_data.get(filename, 0) + 1
209
 
 
210
  lock_path = COUNT_JSON_PATH + ".lock"
211
  with FileLock(lock_path, timeout=10):
212
  with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
213
  json.dump(count_data, f, indent=4, ensure_ascii=False)
214
 
215
- return selected, count_data
216
 
217
- """def start_challenge(user_data_state):
218
-
219
- # global QUESTION_SET, updated_count_data
220
- # Issue: global variables in huggingface hub is shared by all threads
221
 
222
- # 每次点击“开始挑战”时重新抽题
223
- count_data = load_or_initialize_count_json(all_data_audio_paths)
224
- selected_audio_paths, updated_count_data = sample_audio_paths(all_data_audio_paths, count_data, k=5)
225
-
226
- QUESTION_SET = [
227
- {"audio": path, "desc": f"这是音频文件 {os.path.basename(path)} 的描述"}
228
- for path in selected_audio_paths
229
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
 
231
- # 重置 user_data 中的状态(也可以留空)
232
- user_data_state.clear()
233
- return gr.update(visible=False), gr.update(visible=True), user_data_state"""
 
 
234
 
235
  # Save question_set in each user_data_state, preventing global sharing
236
  def start_challenge(user_data_state):
237
 
238
- count_data = load_or_initialize_count_json(all_data_audio_paths)
239
- selected_audio_paths, updated_count_data = sample_audio_paths(all_data_audio_paths, count_data, k=5)
 
 
240
 
241
  question_set = [
242
  {"audio": path, "desc": f"这是音频文件 {os.path.basename(path)} 的描述"}
@@ -244,13 +302,18 @@ def start_challenge(user_data_state):
244
  ]
245
 
246
  user_data_state["question_set"] = question_set
247
- user_data_state["updated_count_data"] = updated_count_data
248
- return gr.update(visible=False), gr.update(visible=True), user_data_state
249
 
 
 
 
 
 
 
250
  def toggle_education_other(choice):
251
  is_other = (choice == "其他(请注明)")
252
  return gr.update(visible=is_other, interactive=is_other, value="")
253
 
 
254
  def check_info_complete(username, age, gender, education, education_other, ai_experience):
255
  if username.strip() and age and gender and education and ai_experience:
256
  if education == "其他(请注明)" and not education_other.strip():
@@ -258,6 +321,7 @@ def check_info_complete(username, age, gender, education, education_other, ai_ex
258
  return gr.update(interactive=True)
259
  return gr.update(interactive=False)
260
 
 
261
  def show_sample_page_and_init(username, age, gender, education, education_other, ai_experience, user_data):
262
  final_edu = education_other if education == "其他(请注明)" else education
263
  user_data.update({
@@ -282,7 +346,7 @@ def update_sample_view(dimension_title):
282
  # audio_up = gr.update(value=append_cache_buster(dim_data["audio"]))
283
  interactive_view_up = gr.update(visible=True)
284
  reference_view_up = gr.update(visible=False)
285
- reference_btn_up = gr.update(value="Reference Answer")
286
  sample_slider_ups = []
287
  ref_slider_ups = []
288
  scores = dim_data.get("reference_scores", [])
@@ -302,23 +366,53 @@ def update_sample_view(dimension_title):
302
  return empty_updates + slider_empty_updates
303
 
304
  def update_test_dimension_view(d_idx, selections):
305
- dimension = DIMENSIONS_DATA[d_idx]
306
- progress_d = f"Dimension {d_idx + 1} / {len(DIMENSIONS_DATA)}: **{dimension['title']}**"
307
-
308
- existing_scores = selections.get(dimension['title'], {})
309
-
310
  slider_updates = []
 
 
 
 
 
 
311
  for i in range(MAX_SUB_DIMS):
312
- if i < len(dimension['sub_dims']):
313
- sub_dim_label = dimension['sub_dims'][i]
314
- value = existing_scores.get(sub_dim_label, 0)
315
- slider_updates.append(gr.update(visible=True, label=sub_dim_label, value=value))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316
  else:
317
- slider_updates.append(gr.update(visible=False, value=0))
 
 
 
 
 
 
 
 
318
 
319
  prev_btn_update = gr.update(interactive=(d_idx > 0))
320
  next_btn_update = gr.update(
321
- value="Proceed to Final Judgement" if d_idx == len(DIMENSIONS_DATA) - 1 else "Next Dimension",
322
  interactive=True
323
  )
324
 
@@ -326,10 +420,8 @@ def update_test_dimension_view(d_idx, selections):
326
 
327
  def init_test_question(user_data, q_idx):
328
  d_idx = 0
329
- # question = QUESTION_SET[q_idx]
330
- # progress_q = f"第 {q_idx + 1} / {len(QUESTION_SET)} 题"
331
  question = user_data["question_set"][q_idx]
332
- progress_q = f"Question {q_idx + 1} / {len(user_data['question_set'])}"
333
 
334
  initial_updates = update_test_dimension_view(d_idx, {})
335
  dim_title_update, prev_btn_update, next_btn_update = initial_updates[:3]
@@ -390,11 +482,33 @@ def navigate_dimensions(direction, q_idx, d_idx, selections, *slider_values):
390
  next_btn_update,
391
  ) + tuple(slider_updates)
392
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
393
  # ==============================================================================
394
- # 重连函数定义 (Retry Function Definitions)
395
  # ==============================================================================
396
 
397
- # Function for handling connection error
398
  def retry_with_timeout(max_retries=3, timeout=10, backoff=1):
399
  def decorator(func):
400
  @wraps(func)
@@ -423,12 +537,12 @@ def retry_with_timeout(max_retries=3, timeout=10, backoff=1):
423
  return wrapper
424
  return decorator
425
 
426
- def save_with_retry(all_results, user_data, count_data):
427
  # 尝试上传到Hugging Face Hub
428
  try:
429
  # 使用线程安全的保存方式
430
  with ThreadPoolExecutor(max_workers=1) as executor:
431
- future = executor.submit(save_all_results_to_file, all_results, user_data, count_data)
432
  try:
433
  future.result(timeout=30) # 设置30秒超时
434
  return True
@@ -525,6 +639,7 @@ def update_count_with_retry(count_data, question_set, max_retries=3):
525
  gr.update(), gr.update(),
526
  ) + (gr.update(),) * MAX_SUB_DIMS + (all_results, result_str)"""
527
 
 
528
  def submit_question_and_advance(q_idx, d_idx, selections, final_choice, all_results, user_data):
529
  try:
530
  # 准备数据
@@ -550,7 +665,7 @@ def submit_question_and_advance(q_idx, d_idx, selections, final_choice, all_resu
550
  return init_q_updates + (all_results, gr.update(value=""))
551
  else:
552
  # 准备完整结果数据
553
- result_str = "### Test Finished!\n\nOverview of the submission:\n"
554
  for res in all_results:
555
  result_str += f"##### 最终判断: **{res['selections'].get('final_choice', '未选择')}**\n"
556
  for dim_title, dim_data in res['selections'].items():
@@ -561,7 +676,8 @@ def submit_question_and_advance(q_idx, d_idx, selections, final_choice, all_resu
561
 
562
  # 尝试上传(带重试)
563
  try:
564
- success = save_with_retry(all_results, user_data, user_data.get("updated_count_data"))
 
565
  except Exception as e:
566
  print(f"上传过程中发生错误: {e}")
567
  success = False
@@ -574,7 +690,7 @@ def submit_question_and_advance(q_idx, d_idx, selections, final_choice, all_resu
574
 
575
  # 准备数据包
576
  user_info_clean = {
577
- k: v for k, v in user_data.items() if k not in ["question_set", "updated_count_data"]
578
  }
579
  final_data_package = {
580
  "user_info": user_info_clean,
@@ -591,10 +707,10 @@ def submit_question_and_advance(q_idx, d_idx, selections, final_choice, all_resu
591
 
592
  # 更新count.json(剔除未完成的题目)
593
  try:
594
- count_update_success = update_count_with_retry(
595
- user_data.get("updated_count_data", {}),
596
- user_data["question_set"]
597
- )
598
  except Exception as e:
599
  print(f"更新count.json失败: {e}")
600
  count_update_success = False
@@ -671,14 +787,14 @@ def submit_question_and_advance(q_idx, d_idx, selections, final_choice, all_resu
671
  except Exception as e:
672
  print(f"上传出错: {e}")"""
673
 
674
- def save_all_results_to_file(all_results, user_data, count_data=None):
675
  repo_id = "intersteller2887/Turing-test-dataset-en"
676
  username = user_data.get("username", "user")
677
  timestamp = pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')
678
  submission_filename = f"submissions_{username}_{timestamp}.json"
679
 
680
  user_info_clean = {
681
- k: v for k, v in user_data.items() if k not in ["question_set", "updated_count_data"]
682
  }
683
 
684
  final_data_package = {
@@ -703,41 +819,21 @@ def save_all_results_to_file(all_results, user_data, count_data=None):
703
  commit_message=f"Add new submission from {username}"
704
  )
705
 
706
- if count_data:
707
  with FileLock(COUNT_JSON_PATH + ".lock", timeout=5):
708
- with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
709
- json.dump(count_data, f, indent=4, ensure_ascii=False)
710
-
711
  api.upload_file(
712
- path_or_fileobj=COUNT_JSON_PATH,
713
  path_in_repo=COUNT_JSON_REPO_PATH,
714
  repo_id=repo_id,
715
  repo_type="dataset",
716
  token=hf_token,
717
  commit_message=f"Update count.json after submission by {username}"
718
  )
719
-
720
- def toggle_reference_view(current):
721
- if current == "参考":
722
- return gr.update(visible=False), gr.update(visible=True), gr.update(value="返回")
723
- else:
724
- return gr.update(visible=True), gr.update(visible=False), gr.update(value="参考")
725
-
726
- def back_to_welcome():
727
- return (
728
- gr.update(visible=True), # welcome_page
729
- gr.update(visible=False), # info_page
730
- gr.update(visible=False), # sample_page
731
- gr.update(visible=False), # pretest_page
732
- gr.update(visible=False), # test_page
733
- gr.update(visible=False), # final_judgment_page
734
- gr.update(visible=False), # result_page
735
- {}, # user_data_state
736
- 0, # current_question_index
737
- 0, # current_test_dimension_index
738
- {}, # current_question_selections
739
- [] # test_results
740
- )
741
 
742
  # ==============================================================================
743
  # Gradio 界面定义 (Gradio UI Definition)
@@ -763,82 +859,78 @@ with gr.Blocks(theme=gr.themes.Soft(), css=".gradio-container {max-width: 960px
763
  }
764
 
765
  with welcome_page:
766
- gr.Markdown("# AI Detective\nListen to the following conversations. Please determine which respondent is an AI.")
767
- start_btn = gr.Button("Start", variant="primary")
768
 
769
  with info_page:
770
- gr.Markdown("## Basic Information")
771
- username_input = gr.Textbox(label="Username", placeholder="Please enter your nickname")
772
- age_input = gr.Radio(["Under 18", "18-25", "26-35", "36-50", "Over 50"], label="Age")
773
- gender_input = gr.Radio(["Male", "Female", "Other"], label="Gender")
774
- education_input = gr.Radio(["High school or below", "Bachelor", "Master", "PhD", "Other (please specify)"], label="Education Level")
775
- education_other_input = gr.Textbox(label="Please enter your education", visible=False, interactive=False)
776
- ai_experience_input = gr.Radio([
777
- "Never used",
778
- "Occasionally exposed (e.g., watching others use)",
779
- "Used a few times, understand basic functions",
780
- "Use frequently, have some experience",
781
- "Very familiar, have in-depth experience with multiple AI tools"
782
- ], label="Familiarity with AI Tools")
783
- submit_info_btn = gr.Button("Submit and Start Learning Sample", variant="primary", interactive=False)
784
-
785
 
786
  with sample_page:
787
- gr.Markdown("## Sample Analysis\nPlease select a dimension to study and practice scoring. All dimensions share the same sample audio.")
788
- sample_dimension_selector = gr.Radio(DIMENSION_TITLES, label="Select Learning Dimension", value=DIMENSION_TITLES[0])
 
789
  with gr.Row():
790
  with gr.Column(scale=1):
791
- sample_audio = gr.Audio(label="Sample Audio", value=DIMENSIONS_DATA[0]["audio"])
792
  with gr.Column(scale=2):
793
  with gr.Column(visible=True) as interactive_view:
794
- gr.Markdown("#### Please rate the following features (0-5 points. 0 - Feature not present; 1 - Machine; 3 - Neutral; 5 - Human)")
795
  sample_sliders = [gr.Slider(minimum=0, maximum=5, step=1, label=f"Sub-dim {i+1}", visible=False, interactive=True) for i in range(MAX_SUB_DIMS)]
796
  with gr.Column(visible=False) as reference_view:
797
- gr.Markdown("### Reference Answer Explanation (1-5 points. 1 = Machine, 5 = Human)")
798
  reference_sliders = [gr.Slider(minimum=0, maximum=5, step=1, label=f"Sub-dim {i+1}", visible=False, interactive=False) for i in range(MAX_SUB_DIMS)]
799
  with gr.Row():
800
- reference_btn = gr.Button("Reference")
801
- go_to_pretest_btn = gr.Button("Got it, start the test", variant="primary")
802
 
803
  with pretest_page:
804
- gr.Markdown("## Test Instructions\n"
805
- "- For each question, you need to evaluate **all 5 dimensions**.\n"
806
- "- Within each dimension, please rate each appearing feature **from 1 to 5**.\n"
807
- "- **Scoring Guide:**\n"
808
- " - **0 points: Feature not present**;\n"
809
- " - **1 point: Strongly machine-like**;\n"
810
- " - **2 points: Somewhat machine-like**;\n"
811
- " - **3 points: Neutral**;\n"
812
- " - **4 points: Somewhat human-like**;\n"
813
- " - **5 points: Strongly human-like**.\n"
814
- "- After completing all dimensions, make a **final judgment** on whether the respondent is “Human” or “AI” based on your overall impression.\n"
815
- "- You can use the “Previous Dimension” and “Next Dimension” buttons to freely switch and modify scores across the 5 dimensions.")
816
- go_to_test_btn = gr.Button("Start Test", variant="primary")
 
 
 
817
 
818
-
819
-
820
-
821
  with test_page:
822
- gr.Markdown("## Formal Test")
823
  question_progress_text = gr.Markdown()
824
  test_dimension_title = gr.Markdown()
825
- test_audio = gr.Audio(label="Test Audio")
826
- gr.Markdown("--- \n ### Please rate the respondent (not the initiator) in the conversation based on the following features (0-5 points. 0 - Feature not present; 1 - Machine; 3 - Neutral; 5 - Human)")
827
- test_sliders = [gr.Slider(minimum=0, maximum=5, step=1, label=f"Sub-dim {i+1}", visible=False, interactive=True) for i in range(MAX_SUB_DIMS)]
 
 
828
  with gr.Row():
829
- prev_dim_btn = gr.Button("Previous Dimension")
830
- next_dim_btn = gr.Button("Next Dimension", variant="primary")
831
 
832
  with final_judgment_page:
833
- gr.Markdown("## Final Judgment")
834
- gr.Markdown("You have completed scoring for all dimensions. Please make a final judgment based on your overall impression.")
835
- final_human_robot_radio = gr.Radio(["👤 Human", "🤖 AI"], label="Please determine the respondent type (required)")
836
- submit_final_answer_btn = gr.Button("Submit Answer for This Question", variant="primary", interactive=False)
837
 
838
  with result_page:
839
- gr.Markdown("## Test Completed")
840
  result_text = gr.Markdown()
841
- back_to_welcome_btn = gr.Button("Back to Main Page", variant="primary")
842
 
843
  # ==============================================================================
844
  # 事件绑定 (Event Binding) & IO 列表定义
 
13
  from multiprocessing import TimeoutError
14
  from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
15
 
16
+ # Load dataset from HuggingFace
17
  dataset = load_dataset("intersteller2887/Turing-test-dataset-en", split="train")
18
  dataset = dataset.cast_column("audio", Audio(decode=False)) # Prevent calling 'torchcodec' from newer version of 'datasets'
19
 
 
21
  target_audio_dir = "/home/user/app/audio"
22
  os.makedirs(target_audio_dir, exist_ok=True)
23
  COUNT_JSON_PATH = "/home/user/app/count.json"
 
24
  COUNT_JSON_REPO_PATH = "submissions/count.json" # Output directory (Huggingface dataset directory)
25
 
26
  # Copy recordings to the working directory
 
42
  print(sample1_audio_path)
43
 
44
  # ==============================================================================
45
+ # Data Definition
46
  # ==============================================================================
47
 
48
  DIMENSIONS_DATA = [
49
  {
50
  "title": "Semantic and Pragmatic Features",
51
+ "audio": sample1_audio_path,
52
  "sub_dims": [
53
+ "Memory Consistency: Human-like: Consistent memory in short contexts, and asks for clarification when memory deviations occur; Machine-like: Inconsistent memory across contexts and unable to detect or correct errors (e.g., forgetting key information and insisting on incorrect answers)",
54
+ "Logical Coherence: Human-like: Natural and smooth logic; Machine-like: Abrupt logical transitions or self-contradictions (e.g., suddenly changing topics without transition)",
55
+ "Pronunciation Accuracy: Human-like: Correct and natural pronunciation of words, with proper usage of polyphonic characters based on context; Machine-like: Unnatural pronunciation errors, mispronunciation of common polyphonic characters",
56
+ "Multilingual Mixing: Human-like: Multilingual mixing is often context-dependent (e.g., proper nouns, idiomatic expressions), with awkward or unnatural language switching; Machine-like: Rigid multilingual mixing without logical language switching",
57
+ "Imprecision in Language: Human-like: Uses vague expressions like 'more or less', 'probably', and may self-correct (e.g., 'no, no'); Machine-like: Rarely uses vague expressions, responses are precise and affirmative",
58
+ "Use of Fillers: Human-like: Frequently uses fillers (e.g., 'um', 'like') while thinking; Machine-like: Rare use of fillers or unnatural usage",
59
+ "Metaphor and Pragmatic Intent: Human-like: Uses metaphor, irony, and euphemism to convey layered meanings; Machine-like: Literal and direct, lacking semantic diversity, only capable of surface-level interpretation"
60
  ],
61
+ "reference_scores": [5, 5, 5, 0, 5, 5, 0]
62
  },
63
  {
64
  "title": "Non-Physiological Paralinguistic Features",
65
+ "audio": sample1_audio_path,
66
  "sub_dims": [
67
+ "Rhythm: Human-like: Speaking rate varies with semantic flow, occasional pauses or hesitations; Machine-like: Almost no pauses or mechanical pauses",
68
+ "Intonation: Human-like: Natural pitch rise or fall when expressing questions, surprise, or emphasis; Machine-like: Monotonous or overly regular pitch changes, inappropriate to the context",
69
+ "Stress: Human-like: Consciously emphasizes key words to highlight focus; Machine-like: No emphasis on words or abnormal emphasis placement",
70
+ "Auxiliary Vocalizations: Human-like: Produces context-appropriate non-verbal sounds, such as laughter or sighs; Machine-like: Contextually incorrect or mechanical auxiliary sounds, or completely absent"
71
  ],
72
+ "reference_scores": [5, 5, 5, 5]
73
  },
74
  {
75
  "title": "Physiological Paralinguistic Features",
76
+ "audio": sample1_audio_path,
77
  "sub_dims": [
78
+ "Micro-physiological Noise: Human-like: Presence of breathing sounds, saliva sounds, bubble noise, etc., naturally occurring during speech; Machine-like: Speech is overly clean or emits unnatural noises (e.g., electrical static)",
79
+ "Instability in Pronunciation: Human-like: Some irregularities in pronunciation (e.g., liaison, tremolo, slurred speech, nasal sounds); Machine-like: Pronunciation is overly clear and regular",
80
+ "Accent: Human-like: Natural regional accent or vocal traits; Machine-like: Stiff or unnatural accent"
81
  ],
82
+ "reference_scores": [5, 4, 4]
83
  },
84
  {
85
  "title": "Mechanical Persona",
86
+ "audio": sample1_audio_path,
87
  "sub_dims": [
88
+ "Sycophancy: Human-like: Judges whether to agree with requests or opinions based on context, doesn't always agree or echo; Machine-like: Frequently agrees, thanks, apologizes, excessively aligns with the other’s opinion, lacking genuine interaction",
89
+ "Written-style Expression: Human-like: Conversational, flexible, and varied expression; Machine-like: Responses are well-structured and formal, overly formal wording, frequent listing, and vague word choice"
90
  ],
91
  "reference_scores": [5, 5]
92
  },
93
  {
94
  "title": "Emotional Expression",
95
+ "audio": sample1_audio_path,
96
  "sub_dims": [
97
+ "Semantic Level: Human-like: Displays human-like emotional responses to contexts such as sadness or joy; Machine-like: Fails to respond emotionally to the other’s feelings, or uses vague and context-inappropriate emotional language",
98
+ "Acoustic Level: Human-like: Pitch, volume, and rhythm dynamically change with emotion; Machine-like: Emotional tone is patterned or context-inappropriate"
99
  ],
100
+ "reference_scores": [5, 5]
101
  }
102
  ]
103
 
104
 
105
  DIMENSION_TITLES = [d["title"] for d in DIMENSIONS_DATA]
106
+ SPECIAL_KEYWORDS = ["Multilingual Mixing", "Metaphor and Pragmatic Intent", "Auxiliary Vocalizations", "Accent"]
107
  MAX_SUB_DIMS = max(len(d['sub_dims']) for d in DIMENSIONS_DATA)
108
+ THE_SUB_DIMS = [d['sub_dims'] for d in DIMENSIONS_DATA]
109
 
 
 
 
 
110
 
 
 
 
 
111
 
112
  # ==============================================================================
113
+ # Backend Function Definitions
114
  # ==============================================================================
115
 
116
+ # This version did not place file reading into filelock, concurrent read could happen
117
+ """def load_or_initialize_count_json(audio_paths):
118
  try:
119
  # Only try downloading if file doesn't exist yet
120
  if not os.path.exists(COUNT_JSON_PATH):
121
  downloaded_path = hf_hub_download(
122
+ repo_id="intersteller2887/Turing-test-dataset",
123
  repo_type="dataset",
124
  filename=COUNT_JSON_REPO_PATH,
125
  token=os.getenv("HF_TOKEN")
 
128
  with open(downloaded_path, "rb") as src, open(COUNT_JSON_PATH, "wb") as dst:
129
  dst.write(src.read())
130
  except Exception as e:
131
+ print(f"Could not download count.json from HuggingFace dataset: {e}")
132
 
133
  # Add filelock to /workspace/count.json
134
  lock_path = COUNT_JSON_PATH + ".lock"
 
162
  with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
163
  json.dump(count_data, f, indent=4, ensure_ascii=False)
164
 
165
+ return count_data"""
166
 
167
+ # Function that load or initialize count.json
168
+ # Function is called when user start a challenge, and this will load or initialize count.json to working directory
169
+ # Initialize happens when count.json does not exist in the working directory as well as HuggingFace dataset
170
+ # Load happens when count.json exists in HuggingFace dataset, and it's not loaded to the working directory yet
171
+ # After load/initialize, all newly added audio files will be added to count.json with initial value of 0
172
+ # Load/Initialize will generate count.json in the working directory for all users under this space
173
 
174
+ # This version also places file reading into filelock, and modified
175
+ def load_or_initialize_count_json(audio_paths):
176
+ # Add filelock to /workspace/count.json
177
+ lock_path = COUNT_JSON_PATH + ".lock"
178
+ with FileLock(lock_path, timeout=10):
179
+ # If count.json does not exist in the working directory, try to download it from HuggingFace dataset
180
+ if not os.path.exists(COUNT_JSON_PATH):
181
+ try:
182
+ # Save latest count.json to working directory
183
+ downloaded_path = hf_hub_download(
184
+ repo_id="intersteller2887/Turing-test-dataset-en",
185
+ repo_type="dataset",
186
+ filename=COUNT_JSON_REPO_PATH,
187
+ token=os.getenv("HF_TOKEN")
188
+ )
189
+ with open(downloaded_path, "rb") as src, open(COUNT_JSON_PATH, "wb") as dst:
190
+ dst.write(src.read())
191
+ except Exception:
192
+ pass
193
+
194
+ # If count.json exists in the working directory: load into count_data for potential update
195
+ if os.path.exists(COUNT_JSON_PATH):
196
+ with open(COUNT_JSON_PATH, "r", encoding="utf-8") as f:
197
+ count_data = json.load(f, object_pairs_hook=collections.OrderedDict)
198
+ # Else initialize count_data with orderedDict
199
+ # This happens when there is no count.json (both working directory and HuggingFace dataset)
200
+ else:
201
+ count_data = collections.OrderedDict()
202
 
203
+ updated = False
204
+ sample_audio_files = {os.path.basename(d["audio"]) for d in DIMENSIONS_DATA}
 
 
 
205
 
206
+ # Guarantee that the sample recording won't be take into the pool
207
+ # Update newly updated recordings into count.json
208
+ for path in audio_paths:
209
+ filename = os.path.basename(path)
210
+ if filename not in count_data:
211
+ if filename in sample_audio_files:
212
+ count_data[filename] = 999
213
+ else:
214
+ count_data[filename] = 0
215
+ updated = True
216
 
217
+ # Write updated count_data to /home/user/app/count.json
218
+ if updated or not os.path.exists(COUNT_JSON_PATH):
219
+ with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
220
+ json.dump(count_data, f, indent=4, ensure_ascii=False)
221
 
222
+ return
223
 
224
+ # Shorten the time of playing previous audio when reached next question
225
+ def append_cache_buster(audio_path):
226
+ return f"{audio_path}?t={int(time.time() * 1000)}"
227
+
228
+ # Function that samples questions from avaliable question set
229
+
230
+ # This version utilizes a given count_data to sample audio paths
231
+ """def sample_audio_paths(audio_paths, count_data, k=5, max_count=1): # k for questions per test; max_count for question limit in total
232
  eligible_paths = [p for p in audio_paths if count_data.get(os.path.basename(p), 0) < max_count]
233
 
234
  if len(eligible_paths) < k:
235
  raise ValueError(f"可用音频数量不足(只剩 {len(eligible_paths)} 条 count<{max_count} 的音频),无法抽取 {k} 条")
236
 
237
+ # Shuffule to avoid fixed selections resulted from directory structure
238
  selected = random.sample(eligible_paths, k)
239
 
240
+ # Once sampled a test, update these questions immediately
241
  for path in selected:
242
  filename = os.path.basename(path)
243
  count_data[filename] = count_data.get(filename, 0) + 1
244
 
245
+ # Add filelock to /workspace/count.json
246
  lock_path = COUNT_JSON_PATH + ".lock"
247
  with FileLock(lock_path, timeout=10):
248
  with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
249
  json.dump(count_data, f, indent=4, ensure_ascii=False)
250
 
251
+ return selected, count_data"""
252
 
253
+ # This version places file reading into filelock to guarantee correct update of count.json
254
+ def sample_audio_paths(audio_paths, k=5, max_count=1):
255
+ # Add filelock to /workspace/count.json
256
+ lock_path = COUNT_JSON_PATH + ".lock"
257
 
258
+ # Load newest count.json
259
+ with FileLock(lock_path, timeout=10):
260
+ with open(COUNT_JSON_PATH, "r", encoding="utf-8") as f:
261
+ count_data = json.load(f)
262
+
263
+ eligible_paths = [
264
+ p for p in audio_paths
265
+ if count_data.get(os.path.basename(p), 0) < max_count
266
+ ]
267
+
268
+ if len(eligible_paths) < k:
269
+ raise ValueError(f"可用音频数量不足(只剩 {len(eligible_paths)} 条 count<{max_count} 的音频),无法抽取 {k} 条")
270
+
271
+ selected = random.sample(eligible_paths, k)
272
+
273
+ # Update count_data
274
+ for path in selected:
275
+ filename = os.path.basename(path)
276
+ count_data[filename] = count_data.get(filename, 0) + 1
277
+
278
+ # Update count.json
279
+ with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
280
+ json.dump(count_data, f, indent=4, ensure_ascii=False)
281
+
282
+ # return selected, count_data
283
+ # Keep count_data atomic
284
 
285
+ return selected
286
+
287
+ # ==============================================================================
288
+ # Frontend Function Definitions
289
+ # ==============================================================================
290
 
291
  # Save question_set in each user_data_state, preventing global sharing
292
  def start_challenge(user_data_state):
293
 
294
+ load_or_initialize_count_json(all_data_audio_paths)
295
+ # selected_audio_paths, updated_count_data = sample_audio_paths(all_data_audio_paths, k=5)
296
+ # Keep count_data atomic
297
+ selected_audio_paths = sample_audio_paths(all_data_audio_paths, k=5)
298
 
299
  question_set = [
300
  {"audio": path, "desc": f"这是音频文件 {os.path.basename(path)} 的描述"}
 
302
  ]
303
 
304
  user_data_state["question_set"] = question_set
 
 
305
 
306
+ # count_data is not needed in the user data
307
+ # user_data_state["updated_count_data"] = updated_count_data
308
+
309
+ return gr.update(visible=False), gr.update(visible=True), user_data_state
310
+
311
+ # This function toggles the visibility of the "其他(请注明)" input field based on the selected education choice
312
  def toggle_education_other(choice):
313
  is_other = (choice == "其他(请注明)")
314
  return gr.update(visible=is_other, interactive=is_other, value="")
315
 
316
+ # This function checks if the user information is complete
317
  def check_info_complete(username, age, gender, education, education_other, ai_experience):
318
  if username.strip() and age and gender and education and ai_experience:
319
  if education == "其他(请注明)" and not education_other.strip():
 
321
  return gr.update(interactive=True)
322
  return gr.update(interactive=False)
323
 
324
+ # This function updates user_data and initializes the sample page (called when user submits their info)
325
  def show_sample_page_and_init(username, age, gender, education, education_other, ai_experience, user_data):
326
  final_edu = education_other if education == "其他(请注明)" else education
327
  user_data.update({
 
346
  # audio_up = gr.update(value=append_cache_buster(dim_data["audio"]))
347
  interactive_view_up = gr.update(visible=True)
348
  reference_view_up = gr.update(visible=False)
349
+ reference_btn_up = gr.update(value="参考")
350
  sample_slider_ups = []
351
  ref_slider_ups = []
352
  scores = dim_data.get("reference_scores", [])
 
366
  return empty_updates + slider_empty_updates
367
 
368
  def update_test_dimension_view(d_idx, selections):
369
+ # dimension = DIMENSIONS_DATA[d_idx]
 
 
 
 
370
  slider_updates = []
371
+ dim_data = DIMENSIONS_DATA[d_idx]
372
+ sub_dims = dim_data["sub_dims"]
373
+ dim_title = dim_data["title"]
374
+ existing_scores = selections.get(dim_data['title'], {})
375
+ progress_d = f"维度 {d_idx + 1} / {len(DIMENSIONS_DATA)}: **{dim_data['title']}**"
376
+
377
  for i in range(MAX_SUB_DIMS):
378
+ if i < len(sub_dims):
379
+ desc = sub_dims[i]
380
+ print(f"{desc} -> default value: {existing_scores.get(desc, 0)}")
381
+ name = desc.split(":")[0].strip()
382
+ default_value = 0 if name in SPECIAL_KEYWORDS else 1
383
+ value = existing_scores.get(desc, default_value)
384
+
385
+ slider_updates.append(gr.update(
386
+ visible=True,
387
+ label=desc,
388
+ minimum=default_value,
389
+ maximum=5,
390
+ step=1,
391
+ value=value,
392
+ interactive=True,
393
+ ))
394
+ # slider_updates.append(gr.update(
395
+ # visible=True,
396
+ # label=desc,
397
+ # minimum=0 if name in SPECIAL_KEYWORDS else 1,
398
+ # maximum=5,
399
+ # value = existing_scores.get(desc, 0),
400
+ # interactive=True,
401
+ # ))
402
  else:
403
+ slider_updates.append(gr.update(visible=False))
404
+ print(f"{desc} -> default value: {existing_scores.get(desc, 0)}")
405
+ # for i in range(MAX_SUB_DIMS):
406
+ # if i < len(dimension['sub_dims']):
407
+ # sub_dim_label = dimension['sub_dims'][i]
408
+ # value = existing_scores.get(sub_dim_label, 0)
409
+ # slider_updates.append(gr.update(visible=True, label=sub_dim_label, value=value))
410
+ # else:
411
+ # slider_updates.append(gr.update(visible=False, value=0))
412
 
413
  prev_btn_update = gr.update(interactive=(d_idx > 0))
414
  next_btn_update = gr.update(
415
+ value="进入最终判断" if d_idx == len(DIMENSIONS_DATA) - 1 else "下一维度",
416
  interactive=True
417
  )
418
 
 
420
 
421
  def init_test_question(user_data, q_idx):
422
  d_idx = 0
 
 
423
  question = user_data["question_set"][q_idx]
424
+ progress_q = f" {q_idx + 1} / {len(user_data['question_set'])}"
425
 
426
  initial_updates = update_test_dimension_view(d_idx, {})
427
  dim_title_update, prev_btn_update, next_btn_update = initial_updates[:3]
 
482
  next_btn_update,
483
  ) + tuple(slider_updates)
484
 
485
+ def toggle_reference_view(current):
486
+ if current == "参考":
487
+ return gr.update(visible=False), gr.update(visible=True), gr.update(value="返回")
488
+ else:
489
+ return gr.update(visible=True), gr.update(visible=False), gr.update(value="参考")
490
+
491
+ def back_to_welcome():
492
+ return (
493
+ gr.update(visible=True), # welcome_page
494
+ gr.update(visible=False), # info_page
495
+ gr.update(visible=False), # sample_page
496
+ gr.update(visible=False), # pretest_page
497
+ gr.update(visible=False), # test_page
498
+ gr.update(visible=False), # final_judgment_page
499
+ gr.update(visible=False), # result_page
500
+ {}, # user_data_state
501
+ 0, # current_question_index
502
+ 0, # current_test_dimension_index
503
+ {}, # current_question_selections
504
+ [] # test_results
505
+ )
506
+
507
  # ==============================================================================
508
+ # Retry Function Definitions
509
  # ==============================================================================
510
 
511
+ # Decorator function that allows to use ThreadPoolExecutor to retry a function with timeout
512
  def retry_with_timeout(max_retries=3, timeout=10, backoff=1):
513
  def decorator(func):
514
  @wraps(func)
 
537
  return wrapper
538
  return decorator
539
 
540
+ def save_with_retry(all_results, user_data):
541
  # 尝试上传到Hugging Face Hub
542
  try:
543
  # 使用线程安全的保存方式
544
  with ThreadPoolExecutor(max_workers=1) as executor:
545
+ future = executor.submit(save_all_results_to_file, all_results, user_data)
546
  try:
547
  future.result(timeout=30) # 设置30秒超时
548
  return True
 
639
  gr.update(), gr.update(),
640
  ) + (gr.update(),) * MAX_SUB_DIMS + (all_results, result_str)"""
641
 
642
+ # user_data now no further contain "updated_count_data", which should be read/write with filelock and be directly accessed from working directory
643
  def submit_question_and_advance(q_idx, d_idx, selections, final_choice, all_results, user_data):
644
  try:
645
  # 准备数据
 
665
  return init_q_updates + (all_results, gr.update(value=""))
666
  else:
667
  # 准备完整结果数据
668
+ result_str = "### 测试全部完成!\n\n你的提交结果概览:\n"
669
  for res in all_results:
670
  result_str += f"##### 最终判断: **{res['selections'].get('final_choice', '未选择')}**\n"
671
  for dim_title, dim_data in res['selections'].items():
 
676
 
677
  # 尝试上传(带重试)
678
  try:
679
+ # success = save_with_retry(all_results, user_data, user_data.get("updated_count_data"))
680
+ success = save_with_retry(all_results, user_data)
681
  except Exception as e:
682
  print(f"上传过程中发生错误: {e}")
683
  success = False
 
690
 
691
  # 准备数据包
692
  user_info_clean = {
693
+ k: v for k, v in user_data.items() if k not in ["question_set"]
694
  }
695
  final_data_package = {
696
  "user_info": user_info_clean,
 
707
 
708
  # 更新count.json(剔除未完成的题目)
709
  try:
710
+ with FileLock(COUNT_JSON_PATH + ".lock", timeout=5):
711
+ with open(COUNT_JSON_PATH, "r", encoding="utf-8") as f:
712
+ count_data = json.load(f, object_pairs_hook=collections.OrderedDict)
713
+ count_update_success = update_count_with_retry(count_data, user_data["question_set"])
714
  except Exception as e:
715
  print(f"更新count.json失败: {e}")
716
  count_update_success = False
 
787
  except Exception as e:
788
  print(f"上传出错: {e}")"""
789
 
790
+ def save_all_results_to_file(all_results, user_data):
791
  repo_id = "intersteller2887/Turing-test-dataset-en"
792
  username = user_data.get("username", "user")
793
  timestamp = pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')
794
  submission_filename = f"submissions_{username}_{timestamp}.json"
795
 
796
  user_info_clean = {
797
+ k: v for k, v in user_data.items() if k not in ["question_set"]
798
  }
799
 
800
  final_data_package = {
 
819
  commit_message=f"Add new submission from {username}"
820
  )
821
 
822
+ try:
823
  with FileLock(COUNT_JSON_PATH + ".lock", timeout=5):
824
+ with open(COUNT_JSON_PATH, "r", encoding="utf-8") as f:
825
+ count_data_str = f.read()
826
+
827
  api.upload_file(
828
+ path_or_fileobj=bytes(count_data_str, "utf-8"),
829
  path_in_repo=COUNT_JSON_REPO_PATH,
830
  repo_id=repo_id,
831
  repo_type="dataset",
832
  token=hf_token,
833
  commit_message=f"Update count.json after submission by {username}"
834
  )
835
+ except Exception as e:
836
+ print(f"上传 count.json 失败: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
837
 
838
  # ==============================================================================
839
  # Gradio 界面定义 (Gradio UI Definition)
 
859
  }
860
 
861
  with welcome_page:
862
+ gr.Markdown("# AI 识破者\n你将听到一系列对话,请判断哪个回应者是 AI")
863
+ start_btn = gr.Button("开始挑战", variant="primary")
864
 
865
  with info_page:
866
+ gr.Markdown("## 请提供一些基本信息")
867
+ username_input = gr.Textbox(label="用户名", placeholder="请输入你的昵称")
868
+ age_input = gr.Radio(["18岁以下", "18-25", "26-35", "36-50", "50岁以上"], label="年龄")
869
+ gender_input = gr.Radio(["", "", "其他"], label="性别")
870
+ education_input = gr.Radio(["高中及以下", "本科", "硕士", "博士", "其他"], label="学历")
871
+ education_other_input = gr.Textbox(label="请填写你的学历", visible=False, interactive=False)
872
+ ai_experience_input = gr.Radio(["从未使用过", "偶尔接触(如看别人用)", "使用过几次,了解基本功能", "经常使用,有一定操作经验", "非常熟悉,深入使用过多个 AI 工具"], label="对 AI 工具的熟悉程度")
873
+ submit_info_btn = gr.Button("提交并开始学习样例", variant="primary", interactive=False)
 
 
 
 
 
 
 
874
 
875
  with sample_page:
876
+
877
+ gr.Markdown("## 样例分析\n请选择一个维度进行学习和打分练习。所有维度共用同一个样例音频。")
878
+ sample_dimension_selector = gr.Radio(DIMENSION_TITLES, label="选择学习维度", value=DIMENSION_TITLES[0])
879
  with gr.Row():
880
  with gr.Column(scale=1):
881
+ sample_audio = gr.Audio(label="样例音频", value=DIMENSIONS_DATA[0]["audio"])
882
  with gr.Column(scale=2):
883
  with gr.Column(visible=True) as interactive_view:
884
+ gr.Markdown("#### 请为以下特征打分 (0-5分。0-特征无体现;1-机器;3-特征无偏向;5-人类)")
885
  sample_sliders = [gr.Slider(minimum=0, maximum=5, step=1, label=f"Sub-dim {i+1}", visible=False, interactive=True) for i in range(MAX_SUB_DIMS)]
886
  with gr.Column(visible=False) as reference_view:
887
+ gr.Markdown("### 参考答案解析")
888
  reference_sliders = [gr.Slider(minimum=0, maximum=5, step=1, label=f"Sub-dim {i+1}", visible=False, interactive=False) for i in range(MAX_SUB_DIMS)]
889
  with gr.Row():
890
+ reference_btn = gr.Button("参考")
891
+ go_to_pretest_btn = gr.Button("我明白了,开始测试", variant="primary")
892
 
893
  with pretest_page:
894
+ gr.Markdown("## 测试说明\n"
895
+ "- 对于每一道题,你都需要对全部 **5 个维度** 进行评估。\n"
896
+ "- 在每个维度下,请为出现的每个特征 **从0到5打分**。\n"
897
+ "- **评分解释如下:**\n"
898
+ " - **0 分:特征未体现** (有些特征一定会体现,所以按1到5打分);\n"
899
+ " - **1 分:极度符合机器特征**;\n"
900
+ " - **2 分:较为符合机器特征**;\n"
901
+ " - **3 分:无明显人类或机器倾向**;\n"
902
+ " - **4 分:较为符合人类特征**;\n"
903
+ " - **5 分:极度符合人类特征**。\n"
904
+ "- 完成所有维度后,请根据整体印象对回应方的身份做出做出“人类”或“机器人”的 **最终判断**。\n"
905
+ "- 你可以使用“上一维度”和“下一维度”按钮在5个维度间自由切换和修改分数。\n"
906
+ "## 特别注意\n"
907
+ "- 我们希望您���判断每个维度上**回应者**的表现是**偏向人还是机器**,分数的大小反映回应者的语音类人的程度,而**不是**这个维度体现的程度多少\n(如读音正确也不代表是人类,读音错误也不代表是机器,您应当判断的是“听到的发音更偏向机器还是人类”)\n"
908
+ "- 即使您一开始就已经很肯定回应方的身份,同样应当**独立地**对每个维度上回应方的表现进行细致的评判。比如您很肯定回应方是机器,也需要独立地对每个维度判断,而非简单地将每个维度归为偏机器。")
909
+ go_to_test_btn = gr.Button("开始测试", variant="primary")
910
 
 
 
 
911
  with test_page:
912
+ gr.Markdown("## 正式测试")
913
  question_progress_text = gr.Markdown()
914
  test_dimension_title = gr.Markdown()
915
+ test_audio = gr.Audio(label="测试音频")
916
+ gr.Markdown("--- \n ### 请为对话中的回应者(非发起者)针对以下特征打分 (0-5分。0-特征无体现;1-机器;3-特征无偏向;5-人类)")
917
+
918
+ test_sliders = [gr.Slider(minimum=0, maximum=5, step=1, label=f"Sub-dim {i+1}", visible=False, interactive=True, show_label = True) for i in range(MAX_SUB_DIMS)]
919
+
920
  with gr.Row():
921
+ prev_dim_btn = gr.Button("上一维度")
922
+ next_dim_btn = gr.Button("下一维度", variant="primary")
923
 
924
  with final_judgment_page:
925
+ gr.Markdown("## 最终判断")
926
+ gr.Markdown("您已完成对所有维度的评分。请根据您的综合印象,做出最终判断。")
927
+ final_human_robot_radio = gr.Radio(["👤 人类", "🤖 机器人"], label="请判断回应者类型 (必填)")
928
+ submit_final_answer_btn = gr.Button("提交本题答案", variant="primary", interactive=False)
929
 
930
  with result_page:
931
+ gr.Markdown("## 测试完成")
932
  result_text = gr.Markdown()
933
+ back_to_welcome_btn = gr.Button("返回主界面", variant="primary")
934
 
935
  # ==============================================================================
936
  # 事件绑定 (Event Binding) & IO 列表定义