jianuo commited on
Commit
9d3730d
1 Parent(s): 0d4f556

添加长语音合成,优化报错提示

Browse files
TTSs/{volcengine_test → volcengine}/__init__.py RENAMED
File without changes
TTSs/{volcengine_test → volcengine}/voice_list.xlsx RENAMED
File without changes
TTSs/{volcengine_test → volcengine}/volcengine.py RENAMED
@@ -140,7 +140,7 @@ class Volcengine_TTS(Base_TTS):
140
  resp = requests.post(api_url, data=json.dumps(request_json), headers=header)
141
 
142
  if "data" not in resp.json():
143
- return str(resp.json()), None, None
144
 
145
  data = resp.json()["data"]
146
  mp3_file = base64.b64decode(data)
 
140
  resp = requests.post(api_url, data=json.dumps(request_json), headers=header)
141
 
142
  if "data" not in resp.json():
143
+ raise Exception(resp.json())
144
 
145
  data = resp.json()["data"]
146
  mp3_file = base64.b64decode(data)
TTSs/volcengine_long/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .volcengine_long import Volcengine_long_TTS
TTSs/volcengine_long/voice_list.xlsx ADDED
Binary file (14.6 kB). View file
 
TTSs/volcengine_long/volcengine_long.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import json
3
+ import os
4
+ import time
5
+ import uuid
6
+ from typing import Optional
7
+
8
+ import gradio as gr
9
+ import pandas as pd
10
+ import requests
11
+ from pydub import AudioSegment
12
+
13
+ from TTSs.base_tts import Base_TTS
14
+
15
+
16
+ class avaliable_voice_type:
17
+ 语言: Optional[str] = ""
18
+ 场景: Optional[str] = ""
19
+ 音色名称: str
20
+ voice_type: str
21
+ 时间戳支持: bool = False
22
+ 支持情感与风格类型: Optional[str] = ""
23
+ 支持语言类型: Optional[str] = ""
24
+
25
+ def __repr__(self):
26
+ data = self.__dict__
27
+ text = ""
28
+
29
+ text += f"{data['音色名称']}"
30
+ if data["语言"]:
31
+ text += f"——{data['语言']}"
32
+ if data["场景"]:
33
+ text += f"——{data['场景']}"
34
+ if data["支持情感与风格类型"]:
35
+ text += f"——{data['支持情感与风格类型']}"
36
+ if data["支持语言类型"]:
37
+ text += f"——{data['支持语言类型']}"
38
+
39
+ return text
40
+
41
+
42
+ class Volcengine_long_TTS(Base_TTS):
43
+
44
+ def get_name(self):
45
+ return '火山引擎精品长语音'
46
+
47
+ def __init__(self):
48
+ self.useful_voice = self.get_data_map()
49
+ self.emo_voice = {
50
+ "擎苍": "BV701_streaming",
51
+ "阳光青年": "BV123_streaming",
52
+ "反卷青年": "BV120_streaming",
53
+ "通用赘婿": "BV119_streaming",
54
+ "古风少御": "BV115_streaming",
55
+ "霸气青叔": "BV107_streaming",
56
+ "质朴青年": "BV100_streaming",
57
+ "温柔淑女": "BV104_streaming",
58
+ "开朗青年": "BV004_streaming",
59
+ "甜宠少御": "BV113_streaming",
60
+ "儒雅青年": "BV102_streaming"
61
+ }
62
+
63
+ def get_data_map(self, filename="voice_list.xlsx"):
64
+ path = os.path.join(os.path.dirname(os.path.abspath(__file__)), filename)
65
+ df = pd.read_excel(path)
66
+ df.fillna('', inplace=True)
67
+
68
+ useful_voice = {}
69
+ for index, row in df.iterrows():
70
+ data = avaliable_voice_type()
71
+ data.语言 = row['语言']
72
+ data.场景 = row['场景']
73
+ data.音色名称 = row['音色名称']
74
+ data.voice_type = row['voice_type']
75
+ data.时间戳支持 = row['时间戳']
76
+ data.支持情感与风格类型 = row['支持情感/风格类型']
77
+ data.支持语言类型 = row['支持语言类型']
78
+ useful_voice[str(data)] = data
79
+
80
+ return useful_voice
81
+
82
+ def update_dropdown(self, version):
83
+ if version == "普通版(不支持情感预测)":
84
+ voices = list(self.useful_voice.keys())
85
+ elif version == "情感预测版":
86
+ voices = list(self.emo_voice.keys())
87
+
88
+ change = gr.Dropdown(choices=voices, value=voices[0])
89
+ return change
90
+
91
+ def _get_config_page(self):
92
+ with gr.Group(visible=False) as config_volcengine:
93
+ voices = list(self.useful_voice.keys())
94
+
95
+ with gr.Row():
96
+ volcengine_appid = gr.Textbox(label="volcengine的appid(默认为环境变量值)",
97
+ placeholder="请输入volcengine的appid",
98
+ type="password",
99
+ interactive=True,
100
+ value=os.environ.get('VOLCENGINE_APPID', ''))
101
+ volcengine_access_token = gr.Textbox(label="volcengine的access_token(默认为环境变量值)",
102
+ placeholder="请输入volengine的access_token",
103
+ type="password",
104
+ interactive=True,
105
+ value=os.environ.get('VOLCENGINE_ACCESS_TOKEN', ''))
106
+
107
+ version = gr.Dropdown(choices=["普通版(不支持情感预测)", "情感预测版"], value="普通版(不支持情感预测)",
108
+ label="使用版本", interactive=True)
109
+
110
+ voice_type = gr.Dropdown(choices=voices, value=voices[0], label="音色选择", interactive=True)
111
+
112
+ with gr.Row():
113
+ speed_ratio = gr.Slider(minimum=0.2, maximum=3, value=1, step=0.1, label="语速",
114
+ interactive=True)
115
+ volume_ratio = gr.Slider(minimum=0.1, maximum=3, value=1, step=0.1, label="音量",
116
+ interactive=True)
117
+ pitch_ratio = gr.Slider(minimum=0.1, maximum=3, value=1, step=0.1, label="音高",
118
+ interactive=True)
119
+
120
+ with gr.Row():
121
+ emotion = gr.Textbox(label="情感/风格(还未适配)", placeholder="请输入情感", interactive=True)
122
+ language = gr.Textbox(label="语言类型(还未适配)", placeholder="请输入语言", interactive=True)
123
+
124
+ version.change(self.update_dropdown, inputs=[version], outputs=[voice_type])
125
+
126
+ inputs = [
127
+ volcengine_appid, version, volcengine_access_token, voice_type, speed_ratio,
128
+ volume_ratio, pitch_ratio
129
+ ]
130
+
131
+ return config_volcengine, inputs
132
+
133
+ def _generate(self, text, appid, version, access_token, voice, speed_ratio,
134
+ volume_ratio, pitch_ratio):
135
+ host = "openspeech.bytedance.com"
136
+
137
+ if version == "普通版(不支持情感预测)":
138
+ submit_api_url = f"https://{host}/api/v1/tts_async/submit"
139
+ query_api_url = f"https://{host}/api/v1/tts_async/query"
140
+ header = {"Authorization": f"Bearer;{access_token}", "Resource-Id": "volc.tts_async.default"}
141
+ voice_type = self.useful_voice[voice].voice_type
142
+ elif version == "情感预测版":
143
+ submit_api_url = f"https://{host}/api/v1/tts_async_with_emotion/submit"
144
+ query_api_url = f"https://{host}/api/v1/tts_async_with_emotion/query"
145
+ header = {"Authorization": f"Bearer;{access_token}", "Resource-Id": "volc.tts_async.emotion"}
146
+ voice_type = self.emo_voice[voice]
147
+
148
+ request_json = {
149
+ "appid": appid,
150
+ "format": "mp3",
151
+ "reqid": str(uuid.uuid4()),
152
+ "voice_type": voice_type,
153
+ "speed": speed_ratio,
154
+ "volume": volume_ratio,
155
+ "pitch": pitch_ratio,
156
+ "text": text
157
+ }
158
+
159
+ resp = requests.post(submit_api_url, data=json.dumps(request_json), headers=header)
160
+ resp = resp.json()
161
+ if resp['task_status'] != 0:
162
+ raise Exception(resp)
163
+
164
+ task_id = resp['task_id']
165
+ # 每隔1秒查询一次任务状态,设置超时时间为300秒
166
+ query_json = {
167
+ "appid": appid,
168
+ "task_id": task_id
169
+ }
170
+
171
+ start_time = time.time()
172
+ while True:
173
+ time.sleep(2)
174
+ query_resp = requests.get(query_api_url, params=query_json, headers=header)
175
+ query_resp = query_resp.json()
176
+ if query_resp.get('task_status', 2) == 2:
177
+ raise Exception(query_resp)
178
+ elif query_resp.get('task_status') == 1:
179
+ break
180
+
181
+ if time.time() - start_time > 300:
182
+ raise TimeoutError("等待任务超时")
183
+
184
+ audio_url = query_resp['audio_url']
185
+ data = requests.get(audio_url).content
186
+
187
+ original_audio = AudioSegment.from_mp3(io.BytesIO(data))
188
+
189
+ return original_audio
tts_config.py CHANGED
@@ -3,7 +3,8 @@ from TTSs.genshin_api_tts import genshin_api_TTS
3
  from TTSs.genshin_local.genshin_local_tts import genshin_local_TTS
4
  from TTSs.openai_tts import OpenAI_TTS
5
  from TTSs.reecho_ai import REECHO_TTS
6
- from TTSs.volcengine_test import Volcengine_TTS
 
7
 
8
 
9
- load_TTS = [eleven_TTS(), genshin_api_TTS(), genshin_local_TTS(), OpenAI_TTS(), REECHO_TTS(), Volcengine_TTS()]
 
3
  from TTSs.genshin_local.genshin_local_tts import genshin_local_TTS
4
  from TTSs.openai_tts import OpenAI_TTS
5
  from TTSs.reecho_ai import REECHO_TTS
6
+ from TTSs.volcengine import Volcengine_TTS
7
+ from TTSs.volcengine_long import Volcengine_long_TTS
8
 
9
 
10
+ load_TTS = [eleven_TTS(), genshin_api_TTS(), genshin_local_TTS(), OpenAI_TTS(), REECHO_TTS(), Volcengine_TTS(), Volcengine_long_TTS()]