MockingBird / app.py
gongqianshao's picture
Duplicate from Marne/MockingBird
ecdf01c
raw
history blame contribute delete
No virus
2.88 kB
import os
import httpx
import torch
import gradio as gr
from tempfile import NamedTemporaryFile
from pathlib import Path
from mockingbirdforuse import MockingBird
mockingbird = MockingBird()
mockingbird_path = Path(os.path.dirname(__file__)) / "data"
base_url = "https://al.smoe.top/d/Home/source/mockingbird/"
for sy in ["encoder.pt", "g_hifigan.pt", "wavernn.pt"]:
if not os.path.exists(os.path.join(mockingbird_path, sy)):
torch.hub.download_url_to_file(f"{base_url}/{sy}", mockingbird_path / sy)
for model in ["azusa", "nanmei", "ltyai", "tianyi"]:
model_path = mockingbird_path / model
model_path.mkdir(parents=True, exist_ok=True)
for file_name in ["record.wav", f"{model}.pt"]:
if not os.path.exists(os.path.join(model_path, file_name)):
torch.hub.download_url_to_file(
f"{base_url}/{model}/{file_name}", model_path / file_name
)
mockingbird.load_model(
Path(os.path.join(mockingbird_path, "encoder.pt")),
Path(os.path.join(mockingbird_path, "g_hifigan.pt")),
Path(os.path.join(mockingbird_path, "wavernn.pt")),
)
def inference(
text: str,
model_name: str,
vocoder_type: str = "HifiGan",
style_idx: int = 0,
min_stop_token: int = 9,
steps: int = 2000,
):
model_path = mockingbird_path / model_name
mockingbird.set_synthesizer(Path(os.path.join(model_path, f"{model_name}.pt")))
fd = NamedTemporaryFile(suffix=".wav", delete=False)
record = mockingbird.synthesize(
text=str(text),
input_wav=model_path / "record.wav",
vocoder_type=vocoder_type,
style_idx=style_idx,
min_stop_token=min_stop_token,
steps=steps,
)
with open(fd.name, "wb") as file:
file.write(record.getvalue())
return fd.name
title = "MockingBird"
description = "🚀AI拟声: 5秒内克隆您的声音并生成任意语音内容 Clone a voice in 5 seconds to generate arbitrary speech in real-time"
article = "<a href='https://github.com/babysor/MockingBird'>Github Repo</a></p>"
gr.Interface(
inference,
[
gr.Textbox(label="Input"),
gr.Radio(
["azusa", "nanmei", "ltyai", "tianyi"],
label="model type",
value="azusa",
),
gr.Radio(
["HifiGan", "WaveRNN"],
label="Vocoder type",
value="HifiGan",
),
gr.Slider(minimum=-1, maximum=9, step=1, label="style idx", value=0),
gr.Slider(minimum=3, maximum=9, label="min stop token", value=9),
gr.Slider(minimum=200, maximum=2000, label="steps", value=2000),
],
gr.Audio(type="filepath", label="Output"),
title=title,
description=description,
article=article,
examples=[["阿梓不是你的电子播放器", "azusa", "HifiGan", 0, 9, 2000], ["不是", "nanmei", "HifiGan", 0, 9, 2000]],
).launch()