import io import json import os import gradio as gr import requests from pydub import AudioSegment from TTSs.base_tts import Base_TTS class genshin_api_TTS(Base_TTS): def __init__(self): self.genshin_api = "https://tirs.ai-lab.top" self.speakers_genshin = self.get_spk() def get_name(self): return '原神语音合成-api' def _get_submit_button(self): """ 要求返回1个参数:btn btn: gr.Button对象,是提交按钮 :return: btn """ btn = gr.Button(value="原神提交", variant="primary", interactive=True, visible=False) return btn def _get_config_page(self): with gr.Group(visible=False) as config_genshin_api: gr.Markdown('原作者:https://modelscope.cn/studios/erythrocyte/Bert-VITS2_Genshin_TTS/summary') with gr.Row(): token = gr.Textbox(label="AccessToken", placeholder="请输入AccessToken", type="password", interactive=True, value=os.environ.get('GENSHIN_API_KEY', '')) appid = gr.Textbox(label="AppID", placeholder="请输入AppID", interactive=True, value='ig8t76x6036h3hpw') with gr.Row(): gr.Markdown( "AppID不要改,AccessToken最好换自己的。") with gr.Row(): speaker = gr.Dropdown(choices=self.speakers_genshin, value=self.speakers_genshin[0], label="角色", interactive=True) search = gr.Textbox(label="搜索角色", lines=1, interactive=True) btn2 = gr.Button(value="搜索") with gr.Column(): with gr.Row(): sdp_ratio = gr.Slider(minimum=0, maximum=1, value=0.2, step=0.1, label="SDP/DP 混合比", interactive=True) noise_scale = gr.Slider(minimum=0.1, maximum=2, value=0.6, step=0.1, label="感情", interactive=True) with gr.Row(): noise_scale_w = gr.Slider(minimum=0.1, maximum=2, value=0.8, step=0.1, label="音素长度", interactive=True) length_scale = gr.Slider(minimum=-99, maximum=99, value=0, step=0.1, label="语速(%)", interactive=True) btn2.click(self.search_speaker, inputs=[search], outputs=[speaker]) inputs = [ token, appid, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale ] return config_genshin_api, inputs def _generate(self, text, token, appid, speaker, sdp, noise, noisew, length): speed = (100 - length) / 100 headers = {'Content-Type': 'application/json'} infer_info = {'lang': 'zh', 'appid': appid, 'token': token, 'speaker': speaker, 'text': text, 'sdp_ratio': sdp, 'noise': noise, 'noisew': noisew, 'length': speed} resp = requests.post(url=f"{self.genshin_api}/api/ex/vits", headers=headers, data=json.dumps(infer_info)) data = json.loads(resp.text) wmv_file = requests.get(data["audio"]).content original_audio = AudioSegment.from_wav(io.BytesIO(wmv_file)) return original_audio def get_spk(self): resp = requests.get(url=f"{self.genshin_api}/spklist/spks.json") data = json.loads(resp.text) return data def search_speaker(self, search_value): for s in self.speakers_genshin: if search_value == s: return s for s in self.speakers_genshin: if search_value in s: return s