Spaces:
Runtime error
Runtime error
jianuo
commited on
Commit
•
6cce716
1
Parent(s):
446c342
添加新引擎
Browse files- README.md +9 -1
- TTSs/volcengine/__init__.py +1 -0
- TTSs/volcengine/voice_list.xlsx +0 -0
- TTSs/volcengine/volcengine.py +111 -0
- main.py +48 -4
- requirements.txt +2 -0
- requirements_light.txt +10 -0
README.md
CHANGED
@@ -10,7 +10,7 @@ pinned: false
|
|
10 |
license: mit
|
11 |
---
|
12 |
|
13 |
-
# TTS
|
14 |
|
15 |
这是一个网页版的TTS引擎,支持以下4个TTS引擎:
|
16 |
|
@@ -18,6 +18,7 @@ license: mit
|
|
18 |
- openai
|
19 |
- genshin(原神)
|
20 |
- REECHO 睿声
|
|
|
21 |
|
22 |
其中,原神TTS引擎原作者为[红血球AE3803](https://space.bilibili.com/6589795)
|
23 |
|
@@ -64,6 +65,13 @@ license: mit
|
|
64 |
|
65 |
API申请地址:https://dash.reecho.ai/apiKey
|
66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
## 安装方法
|
68 |
|
69 |
安装python(推荐3.10)
|
|
|
10 |
license: mit
|
11 |
---
|
12 |
|
13 |
+
# TTS 5合1 引擎
|
14 |
|
15 |
这是一个网页版的TTS引擎,支持以下4个TTS引擎:
|
16 |
|
|
|
18 |
- openai
|
19 |
- genshin(原神)
|
20 |
- REECHO 睿声
|
21 |
+
- volcengine(火山引擎)
|
22 |
|
23 |
其中,原神TTS引擎原作者为[红血球AE3803](https://space.bilibili.com/6589795)
|
24 |
|
|
|
65 |
|
66 |
API申请地址:https://dash.reecho.ai/apiKey
|
67 |
|
68 |
+
### volcengine(火山引擎)
|
69 |
+
控制台&申请地址:https://console.volcengine.com/speech/service/8?AppID=9866746965
|
70 |
+
|
71 |
+
`VOLCENGINE_APPID`:用于volcengine
|
72 |
+
|
73 |
+
`VOLCENGINE_ACCESS_TOKEN`:用于volcengine
|
74 |
+
|
75 |
## 安装方法
|
76 |
|
77 |
安装python(推荐3.10)
|
TTSs/volcengine/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from .volcengine import tts, useful_voice
|
TTSs/volcengine/voice_list.xlsx
ADDED
Binary file (14.6 kB). View file
|
|
TTSs/volcengine/volcengine.py
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import base64
|
2 |
+
import io
|
3 |
+
import json
|
4 |
+
import os
|
5 |
+
import uuid
|
6 |
+
from typing import Optional
|
7 |
+
|
8 |
+
import pandas as pd
|
9 |
+
import requests
|
10 |
+
from pydub import AudioSegment
|
11 |
+
|
12 |
+
from ..tts_utils import mix_background_music
|
13 |
+
|
14 |
+
|
15 |
+
class avaliable_voice_type:
|
16 |
+
语言: Optional[str] = ""
|
17 |
+
场景: Optional[str] = ""
|
18 |
+
音色名称: str
|
19 |
+
voice_type: str
|
20 |
+
时间戳支持: bool = False
|
21 |
+
支持情感与风格类型: Optional[str] = ""
|
22 |
+
支持语言类型: Optional[str] = ""
|
23 |
+
|
24 |
+
def __repr__(self):
|
25 |
+
data = self.__dict__
|
26 |
+
text = ""
|
27 |
+
|
28 |
+
text += f"音色: {data['音色名称']}"
|
29 |
+
if data["语言"]:
|
30 |
+
text += f"——{data['语言']}"
|
31 |
+
if data["场景"]:
|
32 |
+
text += f"——{data['场景']}"
|
33 |
+
if data["支持情感与风格类型"]:
|
34 |
+
text += f"——{data['支持情感与风格类型']}"
|
35 |
+
if data["支持语言类型"]:
|
36 |
+
text += f"——{data['支持语言类型']}"
|
37 |
+
|
38 |
+
return text
|
39 |
+
|
40 |
+
|
41 |
+
def get_data_map(filename="voice_list.xlsx"):
|
42 |
+
path = os.path.join(os.path.dirname(os.path.abspath(__file__)), filename)
|
43 |
+
df = pd.read_excel(path)
|
44 |
+
df.fillna('', inplace=True)
|
45 |
+
|
46 |
+
useful_voice = {}
|
47 |
+
for index, row in df.iterrows():
|
48 |
+
data = avaliable_voice_type()
|
49 |
+
data.语言 = row['语言']
|
50 |
+
data.场景 = row['场景']
|
51 |
+
data.音色名称 = row['音色名称']
|
52 |
+
data.voice_type = row['voice_type']
|
53 |
+
data.时间戳支持 = row['时间戳']
|
54 |
+
data.支持情感与风格类型 = row['支持情感/风格类型']
|
55 |
+
data.支持语言类型 = row['支持语言类型']
|
56 |
+
useful_voice[str(data)] = data
|
57 |
+
|
58 |
+
return useful_voice
|
59 |
+
|
60 |
+
|
61 |
+
def tts(text, appid, access_token, voice, speed_ratio,
|
62 |
+
volume_ratio, pitch_ratio, 背景音乐, speaker_up, back_up):
|
63 |
+
host = "openspeech.bytedance.com"
|
64 |
+
api_url = f"https://{host}/api/v1/tts"
|
65 |
+
|
66 |
+
header = {"Authorization": f"Bearer;{access_token}"}
|
67 |
+
|
68 |
+
request_json = {
|
69 |
+
"app": {
|
70 |
+
"appid": appid,
|
71 |
+
"token": "access_token",
|
72 |
+
"cluster": "volcano_tts"
|
73 |
+
},
|
74 |
+
"user": {
|
75 |
+
"uid": "388808087185088"
|
76 |
+
},
|
77 |
+
"audio": {
|
78 |
+
"voice_type": useful_voice[voice].voice_type,
|
79 |
+
"encoding": "mp3",
|
80 |
+
"speed_ratio": speed_ratio,
|
81 |
+
"volume_ratio": volume_ratio,
|
82 |
+
"pitch_ratio": pitch_ratio,
|
83 |
+
},
|
84 |
+
"request": {
|
85 |
+
"reqid": str(uuid.uuid4()),
|
86 |
+
"text": text,
|
87 |
+
"text_type": "plain",
|
88 |
+
"operation": "query",
|
89 |
+
"with_frontend": 1,
|
90 |
+
"frontend_type": "unitTson"
|
91 |
+
|
92 |
+
}
|
93 |
+
}
|
94 |
+
try:
|
95 |
+
resp = requests.post(api_url, data=json.dumps(request_json), headers=header)
|
96 |
+
|
97 |
+
if "data" not in resp.json():
|
98 |
+
return str(resp.json()), None, None
|
99 |
+
|
100 |
+
data = resp.json()["data"]
|
101 |
+
mp3_file = base64.b64decode(data)
|
102 |
+
|
103 |
+
original_audio = AudioSegment.from_mp3(io.BytesIO(mp3_file))
|
104 |
+
|
105 |
+
return None, *mix_background_music(original_audio, 背景音乐, speaker_up,
|
106 |
+
back_up)
|
107 |
+
except Exception as e:
|
108 |
+
return str(e), None, None
|
109 |
+
|
110 |
+
|
111 |
+
useful_voice = get_data_map()
|
main.py
CHANGED
@@ -6,6 +6,7 @@ from TTSs import elevenlabs_tts
|
|
6 |
from TTSs import genshin_api_tts
|
7 |
from TTSs import openai_tts
|
8 |
from TTSs import reecho_ai
|
|
|
9 |
|
10 |
try:
|
11 |
import load_env
|
@@ -26,10 +27,12 @@ with gr.Blocks() as TTS_merge:
|
|
26 |
config_genshin_api: gr.Group(visible=False),
|
27 |
config_genshin_local: gr.Group(visible=False),
|
28 |
config_reecho_ai: gr.Group(visible=False),
|
|
|
29 |
btn_eleven: gr.Button("一键合成", variant="primary", visible=False),
|
30 |
btn_openai: gr.Button("一键合成", variant="primary", visible=False),
|
31 |
btn_genshin_api: gr.Button("一键合成", variant="primary", visible=False),
|
32 |
btn_recho_ai: gr.Button("一键合成", variant="primary", visible=False),
|
|
|
33 |
btn_genshin_local: gr.Button("一键合成", variant="primary", visible=False),
|
34 |
}
|
35 |
else:
|
@@ -38,9 +41,11 @@ with gr.Blocks() as TTS_merge:
|
|
38 |
config_openai: gr.Group(visible=False),
|
39 |
config_genshin_api: gr.Group(visible=False),
|
40 |
config_reecho_ai: gr.Group(visible=False),
|
|
|
41 |
btn_eleven: gr.Button("一键合成", variant="primary", visible=False),
|
42 |
btn_openai: gr.Button("一键合成", variant="primary", visible=False),
|
43 |
btn_recho_ai: gr.Button("一键合成", variant="primary", visible=False),
|
|
|
44 |
btn_genshin_api: gr.Button("一键合成", variant="primary", visible=False),
|
45 |
}
|
46 |
|
@@ -60,6 +65,10 @@ with gr.Blocks() as TTS_merge:
|
|
60 |
elif select_bar == 'reecho-ai':
|
61 |
available_configs[config_reecho_ai] = gr.Group(visible=True)
|
62 |
available_configs[btn_recho_ai] = gr.Button("一键合成", variant="primary", visible=True)
|
|
|
|
|
|
|
|
|
63 |
else:
|
64 |
raise Exception('select error')
|
65 |
|
@@ -69,9 +78,9 @@ with gr.Blocks() as TTS_merge:
|
|
69 |
with gr.Row():
|
70 |
with gr.Column():
|
71 |
if using_local:
|
72 |
-
select = ['elevenlabs', 'openai', 'genshin-api', 'reecho-ai', 'genshin-local']
|
73 |
else:
|
74 |
-
select = ['elevenlabs', 'openai', 'genshin-api', 'reecho-ai']
|
75 |
|
76 |
with gr.Group():
|
77 |
select_bar = gr.Dropdown(choices=select, label="选择测试接口", value=select[0], interactive=True)
|
@@ -189,6 +198,35 @@ with gr.Blocks() as TTS_merge:
|
|
189 |
inputs=[reecho_api_key],
|
190 |
outputs=[role])
|
191 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
192 |
if using_local:
|
193 |
with gr.Group(visible=False) as config_genshin_local:
|
194 |
from TTSs.genshin_bg import speakers as speakers_genshin_local
|
@@ -238,6 +276,7 @@ with gr.Blocks() as TTS_merge:
|
|
238 |
btn_openai = gr.Button("一键合成", variant="primary", visible=False)
|
239 |
btn_genshin_api = gr.Button("一键合成", variant="primary", visible=False)
|
240 |
btn_recho_ai = gr.Button("一键合成", variant="primary", visible=False)
|
|
|
241 |
|
242 |
if using_local:
|
243 |
btn_genshin_local = gr.Button("一键合成", variant="primary", visible=False)
|
@@ -251,12 +290,12 @@ with gr.Blocks() as TTS_merge:
|
|
251 |
select_bar.change(change_config_page, inputs=[select_bar],
|
252 |
outputs=[config_eleven, config_openai, config_genshin_api, config_reecho_ai,
|
253 |
config_genshin_local, btn_eleven,
|
254 |
-
btn_openai, btn_genshin_api, btn_recho_ai, btn_genshin_local])
|
255 |
else:
|
256 |
select_bar.change(change_config_page, inputs=[select_bar],
|
257 |
outputs=[config_eleven, config_openai, config_genshin_api, config_reecho_ai, btn_eleven,
|
258 |
btn_openai,
|
259 |
-
btn_recho_ai, btn_genshin_api])
|
260 |
|
261 |
btn_eleven.click(elevenlabs_tts.merge_audio,
|
262 |
inputs=[elevenlabs_api_key, text, audio, speaker_eleven, stability,
|
@@ -279,6 +318,11 @@ with gr.Blocks() as TTS_merge:
|
|
279 |
audio, speaker_up, back_up],
|
280 |
outputs=[text_output, ori_audio_output, mix_audio_output])
|
281 |
|
|
|
|
|
|
|
|
|
|
|
282 |
if using_local:
|
283 |
btn_genshin_local.click(genshin_local_tts.func_genshin,
|
284 |
inputs=[
|
|
|
6 |
from TTSs import genshin_api_tts
|
7 |
from TTSs import openai_tts
|
8 |
from TTSs import reecho_ai
|
9 |
+
from TTSs import volcengine
|
10 |
|
11 |
try:
|
12 |
import load_env
|
|
|
27 |
config_genshin_api: gr.Group(visible=False),
|
28 |
config_genshin_local: gr.Group(visible=False),
|
29 |
config_reecho_ai: gr.Group(visible=False),
|
30 |
+
config_volcengine: gr.Group(visible=False),
|
31 |
btn_eleven: gr.Button("一键合成", variant="primary", visible=False),
|
32 |
btn_openai: gr.Button("一键合成", variant="primary", visible=False),
|
33 |
btn_genshin_api: gr.Button("一键合成", variant="primary", visible=False),
|
34 |
btn_recho_ai: gr.Button("一键合成", variant="primary", visible=False),
|
35 |
+
btn_volcengine: gr.Button("一键合成", variant="primary", visible=False),
|
36 |
btn_genshin_local: gr.Button("一键合成", variant="primary", visible=False),
|
37 |
}
|
38 |
else:
|
|
|
41 |
config_openai: gr.Group(visible=False),
|
42 |
config_genshin_api: gr.Group(visible=False),
|
43 |
config_reecho_ai: gr.Group(visible=False),
|
44 |
+
config_volcengine: gr.Group(visible=False),
|
45 |
btn_eleven: gr.Button("一键合成", variant="primary", visible=False),
|
46 |
btn_openai: gr.Button("一键合成", variant="primary", visible=False),
|
47 |
btn_recho_ai: gr.Button("一键合成", variant="primary", visible=False),
|
48 |
+
btn_volcengine: gr.Button("一键合成", variant="primary", visible=False),
|
49 |
btn_genshin_api: gr.Button("一键合成", variant="primary", visible=False),
|
50 |
}
|
51 |
|
|
|
65 |
elif select_bar == 'reecho-ai':
|
66 |
available_configs[config_reecho_ai] = gr.Group(visible=True)
|
67 |
available_configs[btn_recho_ai] = gr.Button("一键合成", variant="primary", visible=True)
|
68 |
+
elif select_bar == 'volcengine':
|
69 |
+
available_configs[config_volcengine] = gr.Group(visible=True)
|
70 |
+
available_configs[btn_volcengine] = gr.Button("一键合成", variant="primary", visible=True)
|
71 |
+
|
72 |
else:
|
73 |
raise Exception('select error')
|
74 |
|
|
|
78 |
with gr.Row():
|
79 |
with gr.Column():
|
80 |
if using_local:
|
81 |
+
select = ['elevenlabs', 'openai', 'genshin-api', 'reecho-ai', 'volcengine', 'genshin-local']
|
82 |
else:
|
83 |
+
select = ['elevenlabs', 'openai', 'genshin-api', 'reecho-ai', 'volcengine']
|
84 |
|
85 |
with gr.Group():
|
86 |
select_bar = gr.Dropdown(choices=select, label="选择测试接口", value=select[0], interactive=True)
|
|
|
198 |
inputs=[reecho_api_key],
|
199 |
outputs=[role])
|
200 |
|
201 |
+
with gr.Group(visible=False) as config_volcengine:
|
202 |
+
voices = list(volcengine.useful_voice.keys())
|
203 |
+
|
204 |
+
with gr.Row():
|
205 |
+
volcengine_appid = gr.Textbox(label="volcengine的appid(默认为环境变量值)",
|
206 |
+
placeholder="请输入volcengine的appid",
|
207 |
+
type="password",
|
208 |
+
interactive=True,
|
209 |
+
value=os.environ.get('VOLCENGINE_APPID', ''))
|
210 |
+
volcengine_access_token = gr.Textbox(label="volcengine的access_token(默认为环境变量值)",
|
211 |
+
placeholder="请输入volengine的access_token",
|
212 |
+
type="password",
|
213 |
+
interactive=True,
|
214 |
+
value=os.environ.get('VOLCENGINE_ACCESS_TOKEN', ''))
|
215 |
+
|
216 |
+
voice_type = gr.Dropdown(choices=voices, value=voices[0], label="音色选择", interactive=True)
|
217 |
+
|
218 |
+
with gr.Row():
|
219 |
+
speed_ratio = gr.Slider(minimum=0.2, maximum=3, value=1, step=0.1, label="语速",
|
220 |
+
interactive=True)
|
221 |
+
volume_ratio = gr.Slider(minimum=0.1, maximum=3, value=1, step=0.1, label="音量",
|
222 |
+
interactive=True)
|
223 |
+
pitch_ratio = gr.Slider(minimum=0.1, maximum=3, value=1, step=0.1, label="音高",
|
224 |
+
interactive=True)
|
225 |
+
|
226 |
+
with gr.Row():
|
227 |
+
emotion = gr.Textbox(label="情感/风格(还未适配)", placeholder="请输入情感", interactive=True)
|
228 |
+
language = gr.Textbox(label="语言类型(还未适配)", placeholder="请输入语言", interactive=True)
|
229 |
+
|
230 |
if using_local:
|
231 |
with gr.Group(visible=False) as config_genshin_local:
|
232 |
from TTSs.genshin_bg import speakers as speakers_genshin_local
|
|
|
276 |
btn_openai = gr.Button("一键合成", variant="primary", visible=False)
|
277 |
btn_genshin_api = gr.Button("一键合成", variant="primary", visible=False)
|
278 |
btn_recho_ai = gr.Button("一键合成", variant="primary", visible=False)
|
279 |
+
btn_volcengine = gr.Button("一键合成", variant="primary", visible=True)
|
280 |
|
281 |
if using_local:
|
282 |
btn_genshin_local = gr.Button("一键合成", variant="primary", visible=False)
|
|
|
290 |
select_bar.change(change_config_page, inputs=[select_bar],
|
291 |
outputs=[config_eleven, config_openai, config_genshin_api, config_reecho_ai,
|
292 |
config_genshin_local, btn_eleven,
|
293 |
+
btn_openai, btn_genshin_api, btn_recho_ai, btn_genshin_local, btn_volcengine, config_volcengine])
|
294 |
else:
|
295 |
select_bar.change(change_config_page, inputs=[select_bar],
|
296 |
outputs=[config_eleven, config_openai, config_genshin_api, config_reecho_ai, btn_eleven,
|
297 |
btn_openai,
|
298 |
+
btn_recho_ai, btn_genshin_api, btn_volcengine, config_volcengine])
|
299 |
|
300 |
btn_eleven.click(elevenlabs_tts.merge_audio,
|
301 |
inputs=[elevenlabs_api_key, text, audio, speaker_eleven, stability,
|
|
|
318 |
audio, speaker_up, back_up],
|
319 |
outputs=[text_output, ori_audio_output, mix_audio_output])
|
320 |
|
321 |
+
btn_volcengine.click(volcengine.tts,
|
322 |
+
inputs=[text, volcengine_appid, volcengine_access_token, voice_type, speed_ratio,
|
323 |
+
volume_ratio, pitch_ratio, audio, speaker_up, back_up],
|
324 |
+
outputs=[text_output, ori_audio_output, mix_audio_output])
|
325 |
+
|
326 |
if using_local:
|
327 |
btn_genshin_local.click(genshin_local_tts.func_genshin,
|
328 |
inputs=[
|
requirements.txt
CHANGED
@@ -32,3 +32,5 @@ g2p_en
|
|
32 |
sentencepiece
|
33 |
pykakasi
|
34 |
langid
|
|
|
|
|
|
32 |
sentencepiece
|
33 |
pykakasi
|
34 |
langid
|
35 |
+
pandas
|
36 |
+
openpyxl
|
requirements_light.txt
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
librosa==0.9.2
|
2 |
+
matplotlib
|
3 |
+
elevenlabs
|
4 |
+
openai
|
5 |
+
numpy
|
6 |
+
numba
|
7 |
+
scipy
|
8 |
+
gradio
|
9 |
+
pandas
|
10 |
+
openpyxl
|