intersteller2887 commited on
Commit
3b52a72
·
verified ·
1 Parent(s): f2f0345

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -8
app.py CHANGED
@@ -2,18 +2,26 @@ import gradio as gr
2
  import os
3
  import json
4
  import pandas as pd
 
5
  from datasets import load_dataset
6
  from huggingface_hub import HfApi
7
 
8
  dataset = load_dataset("intersteller2887/Turing-test-dataset", split="train")
9
 
 
 
 
 
 
 
 
10
  # ==============================================================================
11
  # 数据定义 (Data Definition)
12
  # ==============================================================================
13
  DIMENSIONS_DATA = [
14
  {
15
  "title": "语义和语用特征",
16
- "audio": "data/sample1.wav",
17
  "desc": "这是“语义和语用特征”维度的文本描述示例。",
18
  "sub_dims": [
19
  "记忆一致性:回应者是否能够正确并正确并延续并记忆并延续对话信息?是否存在对上下文的误解或不自洽?", "逻辑连贯性:回应者在语义与对话结构上保持前后一致、合乎逻辑?是否存在前后矛盾的情况?",
@@ -33,7 +41,7 @@ DIMENSIONS_DATA = [
33
  },
34
  {
35
  "title": "非生理性副语言特征",
36
- "audio": "data/sample1.wav",
37
  "desc": "这是“非生理性副语言特征”维度的文本描述示例。",
38
  "sub_dims": [
39
  "节奏:回应者是否存在自然的停顿?语速是否存在自然、流畅的变化?", "语调:在表达疑问、惊讶、强调时,回应者的音调是否会自然上扬或下降?是否表现出符合语境的变化?",
@@ -48,7 +56,7 @@ DIMENSIONS_DATA = [
48
  },
49
  {
50
  "title": "生理性副语言特征",
51
- "audio": "data/sample1.wav",
52
  "desc": "这是“生理性副语言特征”维度的文本描述示例。",
53
  "sub_dims": [
54
  "微生理杂音:回应中是否出现如呼吸声、口水音、气泡音等无意识发声?这些发声是否自然地穿插在恰当的语流节奏当中?",
@@ -62,7 +70,7 @@ DIMENSIONS_DATA = [
62
  },
63
  {
64
  "title": "机械人格",
65
- "audio": "data/sample1.wav",
66
  "desc": "这是“机械人格”维度的文本描述示例。",
67
  "sub_dims": [
68
  "谄媚现象:回应者是否频繁地赞同用户、重复用户的说法、不断表示感谢或道歉?是否存在“无论用户说什么都肯定或支持”的语气模式?",
@@ -75,7 +83,7 @@ DIMENSIONS_DATA = [
75
  },
76
  {
77
  "title": "情感表达",
78
- "audio": "data/sample1.wav",
79
  "desc": "这是“情感表达”维度的文本描述示例。",
80
  "sub_dims": [
81
  "语义层面:回应者的语言内容是否体现出符合上下文的情绪反应?是否表达了人类对某些情境应有的情感态度?",
@@ -87,12 +95,23 @@ DIMENSIONS_DATA = [
87
  """
88
  }
89
  ]
 
90
  DIMENSION_TITLES = [d["title"] for d in DIMENSIONS_DATA]
 
 
 
 
91
  QUESTION_SET = [
 
 
 
 
 
92
  {"audio": "data/Ses02F_impro01.wav", "desc": "这是第一个测试文件的描述",},
93
  {"audio": "data/Ses02F_impro02.wav", "desc": "这是第二个测试文件的描述",},
94
  {"audio": "data/Ses02F_impro03.wav", "desc": "这是第三个测试文件的描述",},
95
- ]
 
96
  MAX_SUB_DIMS = max(len(d['sub_dims']) for d in DIMENSIONS_DATA)
97
 
98
  # ==============================================================================
@@ -462,8 +481,8 @@ with gr.Blocks(theme=gr.themes.Soft(), css=".gradio-container {max-width: 960px
462
  # 程序入口 (Entry Point)
463
  # ==============================================================================
464
  if __name__ == "__main__":
465
- if not os.path.exists("audio"):
466
- os.makedirs("audio")
467
  # A quick check to see if we're in a deployed Space, to avoid local errors.
468
  if "SPACE_ID" in os.environ:
469
  print("Running in a Hugging Face Space, checking for audio files...")
 
2
  import os
3
  import json
4
  import pandas as pd
5
+ import random
6
  from datasets import load_dataset
7
  from huggingface_hub import HfApi
8
 
9
  dataset = load_dataset("intersteller2887/Turing-test-dataset", split="train")
10
 
11
+ all_data_audio_paths = [
12
+ item['audio']['path'] for item in dataset
13
+ if item['audio']['path'].endswith(".wav") and "/data/" in item['audio']['path'].replace("\\", "/")
14
+ ]
15
+
16
+ sample1_audio_path = next((p for p in all_data_audio_paths if p.endswith("sample1.wav")), None)
17
+
18
  # ==============================================================================
19
  # 数据定义 (Data Definition)
20
  # ==============================================================================
21
  DIMENSIONS_DATA = [
22
  {
23
  "title": "语义和语用特征",
24
+ "audio": sample1_audio_path,
25
  "desc": "这是“语义和语用特征”维度的文本描述示例。",
26
  "sub_dims": [
27
  "记忆一致性:回应者是否能够正确并正确并延续并记忆并延续对话信息?是否存在对上下文的误解或不自洽?", "逻辑连贯性:回应者在语义与对话结构上保持前后一致、合乎逻辑?是否存在前后矛盾的情况?",
 
41
  },
42
  {
43
  "title": "非生理性副语言特征",
44
+ "audio": sample1_audio_path,
45
  "desc": "这是“非生理性副语言特征”维度的文本描述示例。",
46
  "sub_dims": [
47
  "节奏:回应者是否存在自然的停顿?语速是否存在自然、流畅的变化?", "语调:在表达疑问、惊讶、强调时,回应者的音调是否会自然上扬或下降?是否表现出符合语境的变化?",
 
56
  },
57
  {
58
  "title": "生理性副语言特征",
59
+ "audio": sample1_audio_path,
60
  "desc": "这是“生理性副语言特征”维度的文本描述示例。",
61
  "sub_dims": [
62
  "微生理杂音:回应中是否出现如呼吸声、口水音、气泡音等无意识发声?这些发声是否自然地穿插在恰当的语流节奏当中?",
 
70
  },
71
  {
72
  "title": "机械人格",
73
+ "audio": sample1_audio_path,
74
  "desc": "这是“机械人格”维度的文本描述示例。",
75
  "sub_dims": [
76
  "谄媚现象:回应者是否频繁地赞同用户、重复用户的说法、不断表示感谢或道歉?是否存在“无论用户说什么都肯定或支持”的语气模式?",
 
83
  },
84
  {
85
  "title": "情感表达",
86
+ "audio": sample1_audio_path,
87
  "desc": "这是“情感表达”维度的文本描述示例。",
88
  "sub_dims": [
89
  "语义层面:回应者的语言内容是否体现出符合上下文的情绪反应?是否表达了人类对某些情境应有的情感态度?",
 
95
  """
96
  }
97
  ]
98
+
99
  DIMENSION_TITLES = [d["title"] for d in DIMENSIONS_DATA]
100
+
101
+ random.seed()
102
+ selected_audio_paths = random.sample(all_data_audio_paths, 5)
103
+
104
  QUESTION_SET = [
105
+ {"audio": path, "desc": f"这是音频文件 {os.path.basename(path)} 的描述"}
106
+ for path in selected_audio_paths
107
+ ]
108
+
109
+ """QUESTION_SET = [
110
  {"audio": "data/Ses02F_impro01.wav", "desc": "这是第一个测试文件的描述",},
111
  {"audio": "data/Ses02F_impro02.wav", "desc": "这是第二个测试文件的描述",},
112
  {"audio": "data/Ses02F_impro03.wav", "desc": "这是第三个测试文件的描述",},
113
+ ]"""
114
+
115
  MAX_SUB_DIMS = max(len(d['sub_dims']) for d in DIMENSIONS_DATA)
116
 
117
  # ==============================================================================
 
481
  # 程序入口 (Entry Point)
482
  # ==============================================================================
483
  if __name__ == "__main__":
484
+ """if not os.path.exists("audio"):
485
+ os.makedirs("audio")"""
486
  # A quick check to see if we're in a deployed Space, to avoid local errors.
487
  if "SPACE_ID" in os.environ:
488
  print("Running in a Hugging Face Space, checking for audio files...")