import os import httpx import torch import gradio as gr from tempfile import NamedTemporaryFile from pathlib import Path from mockingbirdforuse import MockingBird mockingbird = MockingBird() mockingbird_path = Path(os.path.dirname(__file__)) / "data" base_url = "https://al.smoe.top/d/Home/source/mockingbird/" for sy in ["encoder.pt", "g_hifigan.pt", "wavernn.pt"]: if not os.path.exists(os.path.join(mockingbird_path, sy)): torch.hub.download_url_to_file(f"{base_url}/{sy}", mockingbird_path / sy) for model in ["azusa", "nanmei", "ltyai", "tianyi"]: model_path = mockingbird_path / model model_path.mkdir(parents=True, exist_ok=True) for file_name in ["record.wav", f"{model}.pt"]: if not os.path.exists(os.path.join(model_path, file_name)): torch.hub.download_url_to_file( f"{base_url}/{model}/{file_name}", model_path / file_name ) mockingbird.load_model( Path(os.path.join(mockingbird_path, "encoder.pt")), Path(os.path.join(mockingbird_path, "g_hifigan.pt")), Path(os.path.join(mockingbird_path, "wavernn.pt")), ) def inference( text: str, model_name: str, vocoder_type: str = "HifiGan", style_idx: int = 0, min_stop_token: int = 9, steps: int = 2000, ): model_path = mockingbird_path / model_name mockingbird.set_synthesizer(Path(os.path.join(model_path, f"{model_name}.pt"))) fd = NamedTemporaryFile(suffix=".wav", delete=False) record = mockingbird.synthesize( text=str(text), input_wav=model_path / "record.wav", vocoder_type=vocoder_type, style_idx=style_idx, min_stop_token=min_stop_token, steps=steps, ) with open(fd.name, "wb") as file: file.write(record.getvalue()) return fd.name title = "MockingBird" description = "🚀AI拟声: 5秒内克隆您的声音并生成任意语音内容 Clone a voice in 5 seconds to generate arbitrary speech in real-time" article = "Github Repo

" gr.Interface( inference, [ gr.Textbox(label="Input"), gr.Radio( ["azusa", "nanmei", "ltyai", "tianyi"], label="model type", value="azusa", ), gr.Radio( ["HifiGan", "WaveRNN"], label="Vocoder type", value="HifiGan", ), gr.Slider(minimum=-1, maximum=9, step=1, label="style idx", value=0), gr.Slider(minimum=3, maximum=9, label="min stop token", value=9), gr.Slider(minimum=200, maximum=2000, label="steps", value=2000), ], gr.Audio(type="filepath", label="Output"), title=title, description=description, article=article, examples=[["阿梓不是你的电子播放器", "azusa", "HifiGan", 0, 9, 2000], ["不是", "nanmei", "HifiGan", 0, 9, 2000]], ).launch()