VQMIVC / app.py
Ahsen Khaliq
Update app.py
3cb3114
raw
history blame
1.78 kB
import os
import shutil
import zipfile
import gradio as gr
os.system('gdown https://drive.google.com/uc?id=1Flw6Z0K2QdRrTn5F-gVt6HdR9TRPiaKy')
with zipfile.ZipFile('VQMIVC-pretrained models.zip', 'r') as zip_ref:
zip_ref.extractall('.')
shutil.move('VQMIVC-pretrained models/checkpoints/', '.')
shutil.move('VQMIVC-pretrained models/vocoder/', '.')
def inference(audio1, audio2):
os.system("python convert_example.py -s "+ audio1.name+" -r "+ audio2.name+ " -c converted -m 'checkpoints/useCSMITrue_useCPMITrue_usePSMITrue_useAmpTrue/VQMIVC-model.ckpt-500.pt'")
out = os.path.basename(str(audio1)).split(".")[0] + "_converted_gen.wav"
return out
inputs = [gr.inputs.Audio(label="Source Audio", type='file'),gr.inputs.Audio(label="Reference Audio", type='file')]
outputs = gr.outputs.Audio(label="Output Audio", type='file')
title = "VITS"
description = "demo for VITS: Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech. To use it, simply add your text, or click one of the examples to load them. Read more at the links below."
article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2106.06103'>Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech</a> | <a href='https://github.com/jaywalnut310/vits'>Github Repo</a></p>"
examples = [
["We propose VITS, Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech."],
["Our method adopts variational inference augmented with normalizing flows and an adversarial training process, which improves the expressive power of generative modeling."]
]
gr.Interface(inference, inputs, outputs, title=title, description=description, article=article, examples=examples).launch()