import io import logging import os import traceback import gradio as gr import scipy.io.wavfile as wavfile from pydub import AudioSegment from TTSs.base_tts import Base_TTS class genshin_local_TTS(Base_TTS): def __init__(self): if self.is_show(): try: from .genshin_bg import speakers from .genshin_bg import languages from .genshin_bg import tts_fn from .genshin_bg import change_dir self.speakers_genshin_local = speakers self.languages = languages self.genshin_tts_fn = tts_fn self.change_dir = change_dir logging.info('导入原神本地语音合成模块成功') except Exception as e: traceback.print_exc() logging.error('导入原神本地语音合成模块失败') logging.error(e) else: logging.info('未启用原神本地语音合成模块') def is_show(self): if os.environ.get('USING_LOCAL', 'false').lower() == 'true': return True else: return False def get_name(self): return '原神语音合成-local' def _get_submit_button(self): """ 要求返回1个参数:btn btn: gr.Button对象,是提交按钮 :return: btn """ btn = gr.Button(value="原神语音合成(本地版)", variant="primary", interactive=True, visible=False) return btn def _get_config_page(self): with gr.Group(visible=False) as config_genshin_local: gr.Markdown('原作者:红血球AE3803\nhttps://space.bilibili.com/6589795/dynamic') with gr.Row(): speaker_local = gr.Dropdown(choices=self.speakers_genshin_local, value=self.speakers_genshin_local[0], label="角色", interactive=True) search_local = gr.Textbox(label="搜索角色", lines=1, interactive=True) btn2 = gr.Button(value="搜索") with gr.Column(): with gr.Row(): sdp_ratio_local = gr.Slider(minimum=0, maximum=1, value=0.5, step=0.1, label="SDP Ratio", interactive=True) noise_scale_local = gr.Slider(minimum=0.1, maximum=2, value=0.6, step=0.1, label="Noise", interactive=True) with gr.Row(): noise_scale_w_local = gr.Slider(minimum=0.1, maximum=2, value=0.9, step=0.1, label="Noise_W", interactive=True) length_scale_local = gr.Slider(minimum=0.1, maximum=2, value=1.0, step=0.1, label="Length", interactive=True) language_local = gr.Dropdown( choices=self.languages, value=self.languages[0], label="Language", interactive=True ) btn2.click(self.search_speaker, inputs=[search_local], outputs=[speaker_local]) inputs = [ speaker_local, sdp_ratio_local, noise_scale_local, noise_scale_w_local, length_scale_local, language_local ] return config_genshin_local, inputs def _generate(self, text, speaker_local, sdp_ratio_local, noise_scale_local, noise_scale_w_local, length_scale_local, language_local): with self.change_dir(): ori_audio_data = self.genshin_tts_fn(text, speaker_local, sdp_ratio_local, noise_scale_local, noise_scale_w_local, length_scale_local, language_local, None, 'Happy', 'Text prompt', 'style_text', 0.7)[1] wav_io = io.BytesIO() wavfile.write(wav_io, ori_audio_data[0], ori_audio_data[1]) wav_io.seek(0) original_audio = AudioSegment.from_wav(wav_io) return original_audio def search_speaker(self, search_value): for s in self.speakers_genshin_local: if search_value == s: return s for s in self.speakers_genshin_local: if search_value in s: return s