jianuo commited on
Commit
6cce716
1 Parent(s): 446c342

添加新引擎

Browse files
README.md CHANGED
@@ -10,7 +10,7 @@ pinned: false
10
  license: mit
11
  ---
12
 
13
- # TTS 4合1 引擎
14
 
15
  这是一个网页版的TTS引擎,支持以下4个TTS引擎:
16
 
@@ -18,6 +18,7 @@ license: mit
18
  - openai
19
  - genshin(原神)
20
  - REECHO 睿声
 
21
 
22
  其中,原神TTS引擎原作者为[红血球AE3803](https://space.bilibili.com/6589795)
23
 
@@ -64,6 +65,13 @@ license: mit
64
 
65
  API申请地址:https://dash.reecho.ai/apiKey
66
 
 
 
 
 
 
 
 
67
  ## 安装方法
68
 
69
  安装python(推荐3.10)
 
10
  license: mit
11
  ---
12
 
13
+ # TTS 5合1 引擎
14
 
15
  这是一个网页版的TTS引擎,支持以下4个TTS引擎:
16
 
 
18
  - openai
19
  - genshin(原神)
20
  - REECHO 睿声
21
+ - volcengine(火山引擎)
22
 
23
  其中,原神TTS引擎原作者为[红血球AE3803](https://space.bilibili.com/6589795)
24
 
 
65
 
66
  API申请地址:https://dash.reecho.ai/apiKey
67
 
68
+ ### volcengine(火山引擎)
69
+ 控制台&申请地址:https://console.volcengine.com/speech/service/8?AppID=9866746965
70
+
71
+ `VOLCENGINE_APPID`:用于volcengine
72
+
73
+ `VOLCENGINE_ACCESS_TOKEN`:用于volcengine
74
+
75
  ## 安装方法
76
 
77
  安装python(推荐3.10)
TTSs/volcengine/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .volcengine import tts, useful_voice
TTSs/volcengine/voice_list.xlsx ADDED
Binary file (14.6 kB). View file
 
TTSs/volcengine/volcengine.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import io
3
+ import json
4
+ import os
5
+ import uuid
6
+ from typing import Optional
7
+
8
+ import pandas as pd
9
+ import requests
10
+ from pydub import AudioSegment
11
+
12
+ from ..tts_utils import mix_background_music
13
+
14
+
15
+ class avaliable_voice_type:
16
+ 语言: Optional[str] = ""
17
+ 场景: Optional[str] = ""
18
+ 音色名称: str
19
+ voice_type: str
20
+ 时间戳支持: bool = False
21
+ 支持情感与风格类型: Optional[str] = ""
22
+ 支持语言类型: Optional[str] = ""
23
+
24
+ def __repr__(self):
25
+ data = self.__dict__
26
+ text = ""
27
+
28
+ text += f"音色: {data['音色名称']}"
29
+ if data["语言"]:
30
+ text += f"——{data['语言']}"
31
+ if data["场景"]:
32
+ text += f"——{data['场景']}"
33
+ if data["支持情感与风格类型"]:
34
+ text += f"——{data['支持情感与风格类型']}"
35
+ if data["支持语言类型"]:
36
+ text += f"——{data['支持语言类型']}"
37
+
38
+ return text
39
+
40
+
41
+ def get_data_map(filename="voice_list.xlsx"):
42
+ path = os.path.join(os.path.dirname(os.path.abspath(__file__)), filename)
43
+ df = pd.read_excel(path)
44
+ df.fillna('', inplace=True)
45
+
46
+ useful_voice = {}
47
+ for index, row in df.iterrows():
48
+ data = avaliable_voice_type()
49
+ data.语言 = row['语言']
50
+ data.场景 = row['场景']
51
+ data.音色名称 = row['音色名称']
52
+ data.voice_type = row['voice_type']
53
+ data.时间戳支持 = row['时间戳']
54
+ data.支持情感与风格类型 = row['支持情感/风格类型']
55
+ data.支持语言类型 = row['支持语言类型']
56
+ useful_voice[str(data)] = data
57
+
58
+ return useful_voice
59
+
60
+
61
+ def tts(text, appid, access_token, voice, speed_ratio,
62
+ volume_ratio, pitch_ratio, 背景音乐, speaker_up, back_up):
63
+ host = "openspeech.bytedance.com"
64
+ api_url = f"https://{host}/api/v1/tts"
65
+
66
+ header = {"Authorization": f"Bearer;{access_token}"}
67
+
68
+ request_json = {
69
+ "app": {
70
+ "appid": appid,
71
+ "token": "access_token",
72
+ "cluster": "volcano_tts"
73
+ },
74
+ "user": {
75
+ "uid": "388808087185088"
76
+ },
77
+ "audio": {
78
+ "voice_type": useful_voice[voice].voice_type,
79
+ "encoding": "mp3",
80
+ "speed_ratio": speed_ratio,
81
+ "volume_ratio": volume_ratio,
82
+ "pitch_ratio": pitch_ratio,
83
+ },
84
+ "request": {
85
+ "reqid": str(uuid.uuid4()),
86
+ "text": text,
87
+ "text_type": "plain",
88
+ "operation": "query",
89
+ "with_frontend": 1,
90
+ "frontend_type": "unitTson"
91
+
92
+ }
93
+ }
94
+ try:
95
+ resp = requests.post(api_url, data=json.dumps(request_json), headers=header)
96
+
97
+ if "data" not in resp.json():
98
+ return str(resp.json()), None, None
99
+
100
+ data = resp.json()["data"]
101
+ mp3_file = base64.b64decode(data)
102
+
103
+ original_audio = AudioSegment.from_mp3(io.BytesIO(mp3_file))
104
+
105
+ return None, *mix_background_music(original_audio, 背景音乐, speaker_up,
106
+ back_up)
107
+ except Exception as e:
108
+ return str(e), None, None
109
+
110
+
111
+ useful_voice = get_data_map()
main.py CHANGED
@@ -6,6 +6,7 @@ from TTSs import elevenlabs_tts
6
  from TTSs import genshin_api_tts
7
  from TTSs import openai_tts
8
  from TTSs import reecho_ai
 
9
 
10
  try:
11
  import load_env
@@ -26,10 +27,12 @@ with gr.Blocks() as TTS_merge:
26
  config_genshin_api: gr.Group(visible=False),
27
  config_genshin_local: gr.Group(visible=False),
28
  config_reecho_ai: gr.Group(visible=False),
 
29
  btn_eleven: gr.Button("一键合成", variant="primary", visible=False),
30
  btn_openai: gr.Button("一键合成", variant="primary", visible=False),
31
  btn_genshin_api: gr.Button("一键合成", variant="primary", visible=False),
32
  btn_recho_ai: gr.Button("一键合成", variant="primary", visible=False),
 
33
  btn_genshin_local: gr.Button("一键合成", variant="primary", visible=False),
34
  }
35
  else:
@@ -38,9 +41,11 @@ with gr.Blocks() as TTS_merge:
38
  config_openai: gr.Group(visible=False),
39
  config_genshin_api: gr.Group(visible=False),
40
  config_reecho_ai: gr.Group(visible=False),
 
41
  btn_eleven: gr.Button("一键合成", variant="primary", visible=False),
42
  btn_openai: gr.Button("一键合成", variant="primary", visible=False),
43
  btn_recho_ai: gr.Button("一键合成", variant="primary", visible=False),
 
44
  btn_genshin_api: gr.Button("一键合成", variant="primary", visible=False),
45
  }
46
 
@@ -60,6 +65,10 @@ with gr.Blocks() as TTS_merge:
60
  elif select_bar == 'reecho-ai':
61
  available_configs[config_reecho_ai] = gr.Group(visible=True)
62
  available_configs[btn_recho_ai] = gr.Button("一键合成", variant="primary", visible=True)
 
 
 
 
63
  else:
64
  raise Exception('select error')
65
 
@@ -69,9 +78,9 @@ with gr.Blocks() as TTS_merge:
69
  with gr.Row():
70
  with gr.Column():
71
  if using_local:
72
- select = ['elevenlabs', 'openai', 'genshin-api', 'reecho-ai', 'genshin-local']
73
  else:
74
- select = ['elevenlabs', 'openai', 'genshin-api', 'reecho-ai']
75
 
76
  with gr.Group():
77
  select_bar = gr.Dropdown(choices=select, label="选择测试接口", value=select[0], interactive=True)
@@ -189,6 +198,35 @@ with gr.Blocks() as TTS_merge:
189
  inputs=[reecho_api_key],
190
  outputs=[role])
191
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  if using_local:
193
  with gr.Group(visible=False) as config_genshin_local:
194
  from TTSs.genshin_bg import speakers as speakers_genshin_local
@@ -238,6 +276,7 @@ with gr.Blocks() as TTS_merge:
238
  btn_openai = gr.Button("一键合成", variant="primary", visible=False)
239
  btn_genshin_api = gr.Button("一键合成", variant="primary", visible=False)
240
  btn_recho_ai = gr.Button("一键合成", variant="primary", visible=False)
 
241
 
242
  if using_local:
243
  btn_genshin_local = gr.Button("一键合成", variant="primary", visible=False)
@@ -251,12 +290,12 @@ with gr.Blocks() as TTS_merge:
251
  select_bar.change(change_config_page, inputs=[select_bar],
252
  outputs=[config_eleven, config_openai, config_genshin_api, config_reecho_ai,
253
  config_genshin_local, btn_eleven,
254
- btn_openai, btn_genshin_api, btn_recho_ai, btn_genshin_local])
255
  else:
256
  select_bar.change(change_config_page, inputs=[select_bar],
257
  outputs=[config_eleven, config_openai, config_genshin_api, config_reecho_ai, btn_eleven,
258
  btn_openai,
259
- btn_recho_ai, btn_genshin_api])
260
 
261
  btn_eleven.click(elevenlabs_tts.merge_audio,
262
  inputs=[elevenlabs_api_key, text, audio, speaker_eleven, stability,
@@ -279,6 +318,11 @@ with gr.Blocks() as TTS_merge:
279
  audio, speaker_up, back_up],
280
  outputs=[text_output, ori_audio_output, mix_audio_output])
281
 
 
 
 
 
 
282
  if using_local:
283
  btn_genshin_local.click(genshin_local_tts.func_genshin,
284
  inputs=[
 
6
  from TTSs import genshin_api_tts
7
  from TTSs import openai_tts
8
  from TTSs import reecho_ai
9
+ from TTSs import volcengine
10
 
11
  try:
12
  import load_env
 
27
  config_genshin_api: gr.Group(visible=False),
28
  config_genshin_local: gr.Group(visible=False),
29
  config_reecho_ai: gr.Group(visible=False),
30
+ config_volcengine: gr.Group(visible=False),
31
  btn_eleven: gr.Button("一键合成", variant="primary", visible=False),
32
  btn_openai: gr.Button("一键合成", variant="primary", visible=False),
33
  btn_genshin_api: gr.Button("一键合成", variant="primary", visible=False),
34
  btn_recho_ai: gr.Button("一键合成", variant="primary", visible=False),
35
+ btn_volcengine: gr.Button("一键合成", variant="primary", visible=False),
36
  btn_genshin_local: gr.Button("一键合成", variant="primary", visible=False),
37
  }
38
  else:
 
41
  config_openai: gr.Group(visible=False),
42
  config_genshin_api: gr.Group(visible=False),
43
  config_reecho_ai: gr.Group(visible=False),
44
+ config_volcengine: gr.Group(visible=False),
45
  btn_eleven: gr.Button("一键合成", variant="primary", visible=False),
46
  btn_openai: gr.Button("一键合成", variant="primary", visible=False),
47
  btn_recho_ai: gr.Button("一键合成", variant="primary", visible=False),
48
+ btn_volcengine: gr.Button("一键合成", variant="primary", visible=False),
49
  btn_genshin_api: gr.Button("一键合成", variant="primary", visible=False),
50
  }
51
 
 
65
  elif select_bar == 'reecho-ai':
66
  available_configs[config_reecho_ai] = gr.Group(visible=True)
67
  available_configs[btn_recho_ai] = gr.Button("一键合成", variant="primary", visible=True)
68
+ elif select_bar == 'volcengine':
69
+ available_configs[config_volcengine] = gr.Group(visible=True)
70
+ available_configs[btn_volcengine] = gr.Button("一键合成", variant="primary", visible=True)
71
+
72
  else:
73
  raise Exception('select error')
74
 
 
78
  with gr.Row():
79
  with gr.Column():
80
  if using_local:
81
+ select = ['elevenlabs', 'openai', 'genshin-api', 'reecho-ai', 'volcengine', 'genshin-local']
82
  else:
83
+ select = ['elevenlabs', 'openai', 'genshin-api', 'reecho-ai', 'volcengine']
84
 
85
  with gr.Group():
86
  select_bar = gr.Dropdown(choices=select, label="选择测试接口", value=select[0], interactive=True)
 
198
  inputs=[reecho_api_key],
199
  outputs=[role])
200
 
201
+ with gr.Group(visible=False) as config_volcengine:
202
+ voices = list(volcengine.useful_voice.keys())
203
+
204
+ with gr.Row():
205
+ volcengine_appid = gr.Textbox(label="volcengine的appid(默认为环境变量值)",
206
+ placeholder="请输入volcengine的appid",
207
+ type="password",
208
+ interactive=True,
209
+ value=os.environ.get('VOLCENGINE_APPID', ''))
210
+ volcengine_access_token = gr.Textbox(label="volcengine的access_token(默认为环境变量值)",
211
+ placeholder="请输入volengine的access_token",
212
+ type="password",
213
+ interactive=True,
214
+ value=os.environ.get('VOLCENGINE_ACCESS_TOKEN', ''))
215
+
216
+ voice_type = gr.Dropdown(choices=voices, value=voices[0], label="音色选择", interactive=True)
217
+
218
+ with gr.Row():
219
+ speed_ratio = gr.Slider(minimum=0.2, maximum=3, value=1, step=0.1, label="语速",
220
+ interactive=True)
221
+ volume_ratio = gr.Slider(minimum=0.1, maximum=3, value=1, step=0.1, label="音量",
222
+ interactive=True)
223
+ pitch_ratio = gr.Slider(minimum=0.1, maximum=3, value=1, step=0.1, label="音高",
224
+ interactive=True)
225
+
226
+ with gr.Row():
227
+ emotion = gr.Textbox(label="情感/风格(还未适配)", placeholder="请输入情感", interactive=True)
228
+ language = gr.Textbox(label="语言类型(还未适配)", placeholder="请输入语言", interactive=True)
229
+
230
  if using_local:
231
  with gr.Group(visible=False) as config_genshin_local:
232
  from TTSs.genshin_bg import speakers as speakers_genshin_local
 
276
  btn_openai = gr.Button("一键合成", variant="primary", visible=False)
277
  btn_genshin_api = gr.Button("一键合成", variant="primary", visible=False)
278
  btn_recho_ai = gr.Button("一键合成", variant="primary", visible=False)
279
+ btn_volcengine = gr.Button("一键合成", variant="primary", visible=True)
280
 
281
  if using_local:
282
  btn_genshin_local = gr.Button("一键合成", variant="primary", visible=False)
 
290
  select_bar.change(change_config_page, inputs=[select_bar],
291
  outputs=[config_eleven, config_openai, config_genshin_api, config_reecho_ai,
292
  config_genshin_local, btn_eleven,
293
+ btn_openai, btn_genshin_api, btn_recho_ai, btn_genshin_local, btn_volcengine, config_volcengine])
294
  else:
295
  select_bar.change(change_config_page, inputs=[select_bar],
296
  outputs=[config_eleven, config_openai, config_genshin_api, config_reecho_ai, btn_eleven,
297
  btn_openai,
298
+ btn_recho_ai, btn_genshin_api, btn_volcengine, config_volcengine])
299
 
300
  btn_eleven.click(elevenlabs_tts.merge_audio,
301
  inputs=[elevenlabs_api_key, text, audio, speaker_eleven, stability,
 
318
  audio, speaker_up, back_up],
319
  outputs=[text_output, ori_audio_output, mix_audio_output])
320
 
321
+ btn_volcengine.click(volcengine.tts,
322
+ inputs=[text, volcengine_appid, volcengine_access_token, voice_type, speed_ratio,
323
+ volume_ratio, pitch_ratio, audio, speaker_up, back_up],
324
+ outputs=[text_output, ori_audio_output, mix_audio_output])
325
+
326
  if using_local:
327
  btn_genshin_local.click(genshin_local_tts.func_genshin,
328
  inputs=[
requirements.txt CHANGED
@@ -32,3 +32,5 @@ g2p_en
32
  sentencepiece
33
  pykakasi
34
  langid
 
 
 
32
  sentencepiece
33
  pykakasi
34
  langid
35
+ pandas
36
+ openpyxl
requirements_light.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ librosa==0.9.2
2
+ matplotlib
3
+ elevenlabs
4
+ openai
5
+ numpy
6
+ numba
7
+ scipy
8
+ gradio
9
+ pandas
10
+ openpyxl