TTS_all_in_one / TTSs /elevenlabs_tts.py
jianuo
代码重钩,将功能模块化,使代码高内聚,第耦合
14a6e16
raw
history blame
No virus
4.18 kB
import io
import os
import gradio as gr
from elevenlabs import voices, generate, set_api_key, VoiceSettings, Voice
from pydub import AudioSegment
from TTSs.base_tts import Base_TTS
class eleven_TTS(Base_TTS):
def __init__(self):
self.voices_eleven, self.speakers_eleven, self.select_key = self.get_eleven_spk()
def get_name(self):
return 'ElevenLabs'
def _get_config_page(self):
with gr.Group(visible=False) as config:
with gr.Row():
elevenlabs_api_key = gr.Textbox(label="elevenlabs的API Key(默认为环境变量值)",
placeholder="请输入elevenlabs的API Key",
type="password",
interactive=True,
value=os.environ.get('ELEVENLABS_API_KEY', ''))
speaker_eleven = gr.Dropdown(choices=self.select_key,
value=self.select_key[0], label="音色选择",
interactive=True)
with gr.Blocks():
with gr.Row():
stability = gr.Slider(minimum=0, maximum=1, value=0.5, label='stability', interactive=True)
similarity_boost = gr.Slider(minimum=0, maximum=1, value=0.75, label='similarity_boost',
interactive=True)
with gr.Row():
style = gr.Slider(minimum=0, maximum=1, value=0, label='style', interactive=True)
use_speaker_boost = gr.Checkbox(label="Use Speaker Boost", value=True, interactive=True)
inputs = [
elevenlabs_api_key,
speaker_eleven,
stability,
similarity_boost,
style,
use_speaker_boost
]
return config, inputs
def _get_submit_button(self):
"""
要求返回1个参数:btn
btn: gr.Button对象,是提交按钮
:return: btn
"""
btn = gr.Button(value="ElevenLabs提交", variant="primary", interactive=True, visible=False)
return btn
def _generate(self, text, token, 音色选择, stability, similarity_boost, style, use_speaker_boost):
set_api_key(token)
original_audio_bytes = generate(
text=text,
voice=Voice(voice_id=self.voices_eleven[self.speakers_eleven[音色选择]].voice_id,
settings=VoiceSettings(stability=stability, similarity_boost=similarity_boost, style=style,
use_speaker_boost=use_speaker_boost)),
model="eleven_multilingual_v2",
)
# 将字节串转换为AudioSegment对象
original_audio = AudioSegment.from_file(io.BytesIO(original_audio_bytes), format="mp3")
return original_audio
def get_eleven_spk(self):
try:
voices_eleven = voices()
speakers_eleven = {}
for i, v in enumerate(voices_eleven):
label = {}
label['口音'] = v.labels.get('accent')
if 'description ' in v.labels:
label['描述'] = v.labels['description ']
elif 'description' in v.labels:
label['描述'] = v.labels['description']
label['年龄'] = v.labels['age']
label['性别'] = v.labels['gender']
if 'use case' in v.labels:
label['用例'] = v.labels['use case']
elif 'usecase' in v.labels:
label['用例'] = v.labels['usecase']
else:
raise Exception('use case not found')
key = v.name + '-' + str(label)
speakers_eleven[key] = i
select_key = list(speakers_eleven.keys())
return voices_eleven, speakers_eleven, select_key
except Exception as e:
print(e)
return [], {'获取音色失败': '获取音色失败'}, ['获取音色失败']