import librosa import gradio as gr import pedalboard import soundfile as sf def inference(audio): y, sr = librosa.load(audio.name, sr=44100) reverb = pedalboard.Reverb() reverb reverb.room_size reverb.wet_level = 1.0 effected = reverb(y, sample_rate=sr) with sf.SoundFile('./processed-output-stereo.wav', 'w', samplerate=sr, channels=len(effected.shape)) as f: f.write(effected) return './processed-output-stereo.wav' inputs = gr.inputs.Audio(label="Input Audio", type="file") outputs = gr.outputs.Audio(label="Output Audio", type="file") title = "VITS" description = "demo for VITS: Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech. To use it, simply add your text, or click one of the examples to load them. Read more at the links below." article = "

Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech | Github Repo

" gr.Interface(inference, inputs, outputs, title=title, description=description, article=article).launch(debug=True)