Spaces:
Runtime error
Runtime error
jianuo
commited on
Commit
•
9d3730d
1
Parent(s):
0d4f556
添加长语音合成,优化报错提示
Browse files- TTSs/{volcengine_test → volcengine}/__init__.py +0 -0
- TTSs/{volcengine_test → volcengine}/voice_list.xlsx +0 -0
- TTSs/{volcengine_test → volcengine}/volcengine.py +1 -1
- TTSs/volcengine_long/__init__.py +1 -0
- TTSs/volcengine_long/voice_list.xlsx +0 -0
- TTSs/volcengine_long/volcengine_long.py +189 -0
- tts_config.py +3 -2
TTSs/{volcengine_test → volcengine}/__init__.py
RENAMED
File without changes
|
TTSs/{volcengine_test → volcengine}/voice_list.xlsx
RENAMED
File without changes
|
TTSs/{volcengine_test → volcengine}/volcengine.py
RENAMED
@@ -140,7 +140,7 @@ class Volcengine_TTS(Base_TTS):
|
|
140 |
resp = requests.post(api_url, data=json.dumps(request_json), headers=header)
|
141 |
|
142 |
if "data" not in resp.json():
|
143 |
-
|
144 |
|
145 |
data = resp.json()["data"]
|
146 |
mp3_file = base64.b64decode(data)
|
|
|
140 |
resp = requests.post(api_url, data=json.dumps(request_json), headers=header)
|
141 |
|
142 |
if "data" not in resp.json():
|
143 |
+
raise Exception(resp.json())
|
144 |
|
145 |
data = resp.json()["data"]
|
146 |
mp3_file = base64.b64decode(data)
|
TTSs/volcengine_long/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from .volcengine_long import Volcengine_long_TTS
|
TTSs/volcengine_long/voice_list.xlsx
ADDED
Binary file (14.6 kB). View file
|
|
TTSs/volcengine_long/volcengine_long.py
ADDED
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import io
|
2 |
+
import json
|
3 |
+
import os
|
4 |
+
import time
|
5 |
+
import uuid
|
6 |
+
from typing import Optional
|
7 |
+
|
8 |
+
import gradio as gr
|
9 |
+
import pandas as pd
|
10 |
+
import requests
|
11 |
+
from pydub import AudioSegment
|
12 |
+
|
13 |
+
from TTSs.base_tts import Base_TTS
|
14 |
+
|
15 |
+
|
16 |
+
class avaliable_voice_type:
|
17 |
+
语言: Optional[str] = ""
|
18 |
+
场景: Optional[str] = ""
|
19 |
+
音色名称: str
|
20 |
+
voice_type: str
|
21 |
+
时间戳支持: bool = False
|
22 |
+
支持情感与风格类型: Optional[str] = ""
|
23 |
+
支持语言类型: Optional[str] = ""
|
24 |
+
|
25 |
+
def __repr__(self):
|
26 |
+
data = self.__dict__
|
27 |
+
text = ""
|
28 |
+
|
29 |
+
text += f"{data['音色名称']}"
|
30 |
+
if data["语言"]:
|
31 |
+
text += f"——{data['语言']}"
|
32 |
+
if data["场景"]:
|
33 |
+
text += f"——{data['场景']}"
|
34 |
+
if data["支持情感与风格类型"]:
|
35 |
+
text += f"——{data['支持情感与风格类型']}"
|
36 |
+
if data["支持语言类型"]:
|
37 |
+
text += f"——{data['支持语言类型']}"
|
38 |
+
|
39 |
+
return text
|
40 |
+
|
41 |
+
|
42 |
+
class Volcengine_long_TTS(Base_TTS):
|
43 |
+
|
44 |
+
def get_name(self):
|
45 |
+
return '火山引擎精品长语音'
|
46 |
+
|
47 |
+
def __init__(self):
|
48 |
+
self.useful_voice = self.get_data_map()
|
49 |
+
self.emo_voice = {
|
50 |
+
"擎苍": "BV701_streaming",
|
51 |
+
"阳光青年": "BV123_streaming",
|
52 |
+
"反卷青年": "BV120_streaming",
|
53 |
+
"通用赘婿": "BV119_streaming",
|
54 |
+
"古风少御": "BV115_streaming",
|
55 |
+
"霸气青叔": "BV107_streaming",
|
56 |
+
"质朴青年": "BV100_streaming",
|
57 |
+
"温柔淑女": "BV104_streaming",
|
58 |
+
"开朗青年": "BV004_streaming",
|
59 |
+
"甜宠少御": "BV113_streaming",
|
60 |
+
"儒雅青年": "BV102_streaming"
|
61 |
+
}
|
62 |
+
|
63 |
+
def get_data_map(self, filename="voice_list.xlsx"):
|
64 |
+
path = os.path.join(os.path.dirname(os.path.abspath(__file__)), filename)
|
65 |
+
df = pd.read_excel(path)
|
66 |
+
df.fillna('', inplace=True)
|
67 |
+
|
68 |
+
useful_voice = {}
|
69 |
+
for index, row in df.iterrows():
|
70 |
+
data = avaliable_voice_type()
|
71 |
+
data.语言 = row['语言']
|
72 |
+
data.场景 = row['场景']
|
73 |
+
data.音色名称 = row['音色名称']
|
74 |
+
data.voice_type = row['voice_type']
|
75 |
+
data.时间戳支持 = row['时间戳']
|
76 |
+
data.支持情感与风格类型 = row['支持情感/风格类型']
|
77 |
+
data.支持语言类型 = row['支持语言类型']
|
78 |
+
useful_voice[str(data)] = data
|
79 |
+
|
80 |
+
return useful_voice
|
81 |
+
|
82 |
+
def update_dropdown(self, version):
|
83 |
+
if version == "普通版(不支持情感预测)":
|
84 |
+
voices = list(self.useful_voice.keys())
|
85 |
+
elif version == "情感预测版":
|
86 |
+
voices = list(self.emo_voice.keys())
|
87 |
+
|
88 |
+
change = gr.Dropdown(choices=voices, value=voices[0])
|
89 |
+
return change
|
90 |
+
|
91 |
+
def _get_config_page(self):
|
92 |
+
with gr.Group(visible=False) as config_volcengine:
|
93 |
+
voices = list(self.useful_voice.keys())
|
94 |
+
|
95 |
+
with gr.Row():
|
96 |
+
volcengine_appid = gr.Textbox(label="volcengine的appid(默认为环境变量值)",
|
97 |
+
placeholder="请输入volcengine的appid",
|
98 |
+
type="password",
|
99 |
+
interactive=True,
|
100 |
+
value=os.environ.get('VOLCENGINE_APPID', ''))
|
101 |
+
volcengine_access_token = gr.Textbox(label="volcengine的access_token(默认为环境变量值)",
|
102 |
+
placeholder="请输入volengine的access_token",
|
103 |
+
type="password",
|
104 |
+
interactive=True,
|
105 |
+
value=os.environ.get('VOLCENGINE_ACCESS_TOKEN', ''))
|
106 |
+
|
107 |
+
version = gr.Dropdown(choices=["普通版(不支持情感预测)", "情感预测版"], value="普通版(不支持情感预测)",
|
108 |
+
label="使用版本", interactive=True)
|
109 |
+
|
110 |
+
voice_type = gr.Dropdown(choices=voices, value=voices[0], label="音色选择", interactive=True)
|
111 |
+
|
112 |
+
with gr.Row():
|
113 |
+
speed_ratio = gr.Slider(minimum=0.2, maximum=3, value=1, step=0.1, label="语速",
|
114 |
+
interactive=True)
|
115 |
+
volume_ratio = gr.Slider(minimum=0.1, maximum=3, value=1, step=0.1, label="音量",
|
116 |
+
interactive=True)
|
117 |
+
pitch_ratio = gr.Slider(minimum=0.1, maximum=3, value=1, step=0.1, label="音高",
|
118 |
+
interactive=True)
|
119 |
+
|
120 |
+
with gr.Row():
|
121 |
+
emotion = gr.Textbox(label="情感/风格(还未适配)", placeholder="请输入情感", interactive=True)
|
122 |
+
language = gr.Textbox(label="语言类型(还未适配)", placeholder="请输入语言", interactive=True)
|
123 |
+
|
124 |
+
version.change(self.update_dropdown, inputs=[version], outputs=[voice_type])
|
125 |
+
|
126 |
+
inputs = [
|
127 |
+
volcengine_appid, version, volcengine_access_token, voice_type, speed_ratio,
|
128 |
+
volume_ratio, pitch_ratio
|
129 |
+
]
|
130 |
+
|
131 |
+
return config_volcengine, inputs
|
132 |
+
|
133 |
+
def _generate(self, text, appid, version, access_token, voice, speed_ratio,
|
134 |
+
volume_ratio, pitch_ratio):
|
135 |
+
host = "openspeech.bytedance.com"
|
136 |
+
|
137 |
+
if version == "普通版(不支持情感预测)":
|
138 |
+
submit_api_url = f"https://{host}/api/v1/tts_async/submit"
|
139 |
+
query_api_url = f"https://{host}/api/v1/tts_async/query"
|
140 |
+
header = {"Authorization": f"Bearer;{access_token}", "Resource-Id": "volc.tts_async.default"}
|
141 |
+
voice_type = self.useful_voice[voice].voice_type
|
142 |
+
elif version == "情感预测版":
|
143 |
+
submit_api_url = f"https://{host}/api/v1/tts_async_with_emotion/submit"
|
144 |
+
query_api_url = f"https://{host}/api/v1/tts_async_with_emotion/query"
|
145 |
+
header = {"Authorization": f"Bearer;{access_token}", "Resource-Id": "volc.tts_async.emotion"}
|
146 |
+
voice_type = self.emo_voice[voice]
|
147 |
+
|
148 |
+
request_json = {
|
149 |
+
"appid": appid,
|
150 |
+
"format": "mp3",
|
151 |
+
"reqid": str(uuid.uuid4()),
|
152 |
+
"voice_type": voice_type,
|
153 |
+
"speed": speed_ratio,
|
154 |
+
"volume": volume_ratio,
|
155 |
+
"pitch": pitch_ratio,
|
156 |
+
"text": text
|
157 |
+
}
|
158 |
+
|
159 |
+
resp = requests.post(submit_api_url, data=json.dumps(request_json), headers=header)
|
160 |
+
resp = resp.json()
|
161 |
+
if resp['task_status'] != 0:
|
162 |
+
raise Exception(resp)
|
163 |
+
|
164 |
+
task_id = resp['task_id']
|
165 |
+
# 每隔1秒查询一次任务状态,设置超时时间为300秒
|
166 |
+
query_json = {
|
167 |
+
"appid": appid,
|
168 |
+
"task_id": task_id
|
169 |
+
}
|
170 |
+
|
171 |
+
start_time = time.time()
|
172 |
+
while True:
|
173 |
+
time.sleep(2)
|
174 |
+
query_resp = requests.get(query_api_url, params=query_json, headers=header)
|
175 |
+
query_resp = query_resp.json()
|
176 |
+
if query_resp.get('task_status', 2) == 2:
|
177 |
+
raise Exception(query_resp)
|
178 |
+
elif query_resp.get('task_status') == 1:
|
179 |
+
break
|
180 |
+
|
181 |
+
if time.time() - start_time > 300:
|
182 |
+
raise TimeoutError("等待任务超时")
|
183 |
+
|
184 |
+
audio_url = query_resp['audio_url']
|
185 |
+
data = requests.get(audio_url).content
|
186 |
+
|
187 |
+
original_audio = AudioSegment.from_mp3(io.BytesIO(data))
|
188 |
+
|
189 |
+
return original_audio
|
tts_config.py
CHANGED
@@ -3,7 +3,8 @@ from TTSs.genshin_api_tts import genshin_api_TTS
|
|
3 |
from TTSs.genshin_local.genshin_local_tts import genshin_local_TTS
|
4 |
from TTSs.openai_tts import OpenAI_TTS
|
5 |
from TTSs.reecho_ai import REECHO_TTS
|
6 |
-
from TTSs.
|
|
|
7 |
|
8 |
|
9 |
-
load_TTS = [eleven_TTS(), genshin_api_TTS(), genshin_local_TTS(), OpenAI_TTS(), REECHO_TTS(), Volcengine_TTS()]
|
|
|
3 |
from TTSs.genshin_local.genshin_local_tts import genshin_local_TTS
|
4 |
from TTSs.openai_tts import OpenAI_TTS
|
5 |
from TTSs.reecho_ai import REECHO_TTS
|
6 |
+
from TTSs.volcengine import Volcengine_TTS
|
7 |
+
from TTSs.volcengine_long import Volcengine_long_TTS
|
8 |
|
9 |
|
10 |
+
load_TTS = [eleven_TTS(), genshin_api_TTS(), genshin_local_TTS(), OpenAI_TTS(), REECHO_TTS(), Volcengine_TTS(), Volcengine_long_TTS()]
|