Spaces:
Running
Running
| import json | |
| from pathlib import Path | |
| import gradio as gr | |
| import librosa | |
| import numpy as np | |
| import torch | |
| from huggingface_hub import hf_hub_download, list_repo_files | |
| from so_vits_svc_fork.hparams import HParams | |
| from so_vits_svc_fork.inference.core import Svc | |
| ########################################################## | |
| # REPLACE THESE VALUES TO CHANGE THE MODEL REPO/CKPT NAME | |
| ########################################################## | |
| repo_id = "Shashashasha/yoshi" | |
| ckpt_name = None # None will pick latest | |
| ########################################################## | |
| # Figure out the latest generator by taking highest value one. | |
| # Ex. if the repo has: G_0.pth, G_100.pth, G_200.pth, we'd use G_200.pth | |
| if ckpt_name is None: | |
| latest_id = sorted( | |
| [ | |
| int(Path(x).stem.split("_")[1]) | |
| for x in list_repo_files(repo_id) | |
| if x.startswith("G_") and x.endswith(".pth") | |
| ] | |
| )[-1] | |
| ckpt_name = f"G_{latest_id}.pth" | |
| generator_path = hf_hub_download(repo_id, ckpt_name) | |
| config_path = hf_hub_download(repo_id, "config.json") | |
| hparams = HParams(**json.loads(Path(config_path).read_text())) | |
| speakers = list(hparams.spk.keys()) | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| model = Svc(net_g_path=generator_path, config_path=config_path, device=device, cluster_model_path=None) | |
| def predict( | |
| speaker, | |
| audio, | |
| transpose: int = 0, | |
| auto_predict_f0: bool = False, | |
| cluster_infer_ratio: float = 0, | |
| noise_scale: float = 0.4, | |
| f0_method: str = "crepe", | |
| db_thresh: int = -40, | |
| pad_seconds: float = 0.5, | |
| chunk_seconds: float = 0.5, | |
| absolute_thresh: bool = False, | |
| ): | |
| audio, _ = librosa.load(audio, sr=model.target_sample) | |
| audio = model.infer_silence( | |
| audio.astype(np.float32), | |
| speaker=speaker, | |
| transpose=transpose, | |
| auto_predict_f0=auto_predict_f0, | |
| cluster_infer_ratio=cluster_infer_ratio, | |
| noise_scale=noise_scale, | |
| f0_method=f0_method, | |
| db_thresh=db_thresh, | |
| pad_seconds=pad_seconds, | |
| chunk_seconds=chunk_seconds, | |
| absolute_thresh=absolute_thresh, | |
| ) | |
| return model.target_sample, audio | |
| description=f""" | |
| Это йоши нейросеть! Настройки не крутите. А если вам пофиг крутите ломайте! | |
| А что тут написать? | |
| """.strip() | |
| article=""" | |
| <p style='text-align: center'> | |
| <a href='https://github.com/voicepaw/so-vits-svc-fork' target='_blank'>Github Repo</a> | |
| </p> | |
| """.strip() | |
| interface_mic = gr.Interface( | |
| predict, | |
| inputs=[ | |
| gr.Dropdown(speakers, value=speakers[0], label="Target Speaker"), | |
| gr.Audio(type="filepath", source="microphone", label="Source Audio"), | |
| gr.Slider(-12, 12, value=0, step=1, label="Transpose (Semitones)"), | |
| gr.Checkbox(False, label="Auto Predict F0"), | |
| gr.Slider(0.0, 1.0, value=0.0, step=0.1, label='cluster infer ratio'), | |
| gr.Slider(0.0, 1.0, value=0.4, step=0.1, label="noise scale"), | |
| gr.Dropdown(choices=["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"], value='crepe', label="f0 method"), | |
| ], | |
| outputs="audio", | |
| title="Voice Cloning", | |
| description=description, | |
| article=article, | |
| ) | |
| interface_file = gr.Interface( | |
| predict, | |
| inputs=[ | |
| gr.Dropdown(speakers, value=speakers[0], label="Target Speaker"), | |
| gr.Audio(type="filepath", source="upload", label="Source Audio"), | |
| gr.Slider(-12, 12, value=0, step=1, label="Transpose (Semitones)"), | |
| gr.Checkbox(False, label="Auto Predict F0"), | |
| gr.Slider(0.0, 1.0, value=0.0, step=0.1, label='cluster infer ratio'), | |
| gr.Slider(0.0, 1.0, value=0.4, step=0.1, label="noise scale"), | |
| gr.Dropdown(choices=["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"], value='crepe', label="f0 method"), | |
| ], | |
| outputs="audio", | |
| title="Йоши Нейросеть", | |
| description=description, | |
| article=article, | |
| ) | |
| interface = gr.TabbedInterface( | |
| [interface_mic, interface_file], | |
| ["Микрофон", "Аудио файл"], | |
| ) | |
| if __name__ == '__main__': | |
| interface.launch() |