|
import io |
|
import os |
|
|
|
import gradio as gr |
|
from elevenlabs import voices, generate, set_api_key, VoiceSettings, Voice |
|
from pydub import AudioSegment |
|
|
|
from TTSs.base_tts import Base_TTS |
|
|
|
|
|
class eleven_TTS(Base_TTS): |
|
def __init__(self): |
|
self.voices_eleven, self.speakers_eleven, self.select_key = self.get_eleven_spk() |
|
|
|
def get_name(self): |
|
return 'ElevenLabs' |
|
|
|
def _get_config_page(self): |
|
with gr.Group(visible=False) as config: |
|
with gr.Row(): |
|
elevenlabs_api_key = gr.Textbox(label="elevenlabs的API Key(默认为环境变量值)", |
|
placeholder="请输入elevenlabs的API Key", |
|
type="password", |
|
interactive=True, |
|
value=os.environ.get('ELEVENLABS_API_KEY', '')) |
|
speaker_eleven = gr.Dropdown(choices=self.select_key, |
|
value=self.select_key[0], label="音色选择", |
|
interactive=True) |
|
with gr.Blocks(): |
|
with gr.Row(): |
|
stability = gr.Slider(minimum=0, maximum=1, value=0.5, label='stability', interactive=True) |
|
similarity_boost = gr.Slider(minimum=0, maximum=1, value=0.75, label='similarity_boost', |
|
interactive=True) |
|
|
|
with gr.Row(): |
|
style = gr.Slider(minimum=0, maximum=1, value=0, label='style', interactive=True) |
|
use_speaker_boost = gr.Checkbox(label="Use Speaker Boost", value=True, interactive=True) |
|
|
|
inputs = [ |
|
elevenlabs_api_key, |
|
speaker_eleven, |
|
stability, |
|
similarity_boost, |
|
style, |
|
use_speaker_boost |
|
] |
|
|
|
return config, inputs |
|
|
|
def _get_submit_button(self): |
|
""" |
|
要求返回1个参数:btn |
|
btn: gr.Button对象,是提交按钮 |
|
|
|
:return: btn |
|
""" |
|
btn = gr.Button(value="ElevenLabs提交", variant="primary", interactive=True, visible=False) |
|
return btn |
|
|
|
def _generate(self, text, token, 音色选择, stability, similarity_boost, style, use_speaker_boost): |
|
set_api_key(token) |
|
|
|
original_audio_bytes = generate( |
|
text=text, |
|
voice=Voice(voice_id=self.voices_eleven[self.speakers_eleven[音色选择]].voice_id, |
|
settings=VoiceSettings(stability=stability, similarity_boost=similarity_boost, style=style, |
|
use_speaker_boost=use_speaker_boost)), |
|
model="eleven_multilingual_v2", |
|
|
|
) |
|
|
|
|
|
original_audio = AudioSegment.from_file(io.BytesIO(original_audio_bytes), format="mp3") |
|
return original_audio |
|
|
|
def get_eleven_spk(self): |
|
try: |
|
voices_eleven = voices() |
|
speakers_eleven = {} |
|
for i, v in enumerate(voices_eleven): |
|
label = {} |
|
label['口音'] = v.labels.get('accent') |
|
|
|
if 'description ' in v.labels: |
|
label['描述'] = v.labels['description '] |
|
elif 'description' in v.labels: |
|
label['描述'] = v.labels['description'] |
|
|
|
label['年龄'] = v.labels['age'] |
|
label['性别'] = v.labels['gender'] |
|
|
|
if 'use case' in v.labels: |
|
label['用例'] = v.labels['use case'] |
|
elif 'usecase' in v.labels: |
|
label['用例'] = v.labels['usecase'] |
|
else: |
|
raise Exception('use case not found') |
|
|
|
key = v.name + '-' + str(label) |
|
speakers_eleven[key] = i |
|
select_key = list(speakers_eleven.keys()) |
|
|
|
return voices_eleven, speakers_eleven, select_key |
|
except Exception as e: |
|
print(e) |
|
return [], {'获取音色失败': '获取音色失败'}, ['获取音色失败'] |
|
|