Spaces:
Running
Running
"""Gradio demo for denoisers.""" | |
import tempfile | |
from pathlib import Path | |
import gradio as gr | |
import numpy as np | |
import torch | |
import torchaudio | |
from denoisers import UNet1DModel, WaveUNetModel | |
from tqdm import tqdm | |
MODELS = [ | |
"wrice/unet1d-vctk-48khz", | |
"wrice/waveunet-vctk-48khz", | |
"wrice/waveunet-vctk-24khz", | |
] | |
def denoise(model_name: str, audio_path: str): | |
"""Denoise audio.""" | |
if "unet1d" in model_name: | |
model = UNet1DModel.from_pretrained(model_name) | |
else: | |
model = WaveUNetModel.from_pretrained(model_name) | |
if torch.cuda.is_available(): | |
model = model.cuda() | |
stream_reader = torchaudio.io.StreamReader(audio_path) | |
stream_reader.add_basic_audio_stream( | |
frames_per_chunk=model.config.max_length, | |
sample_rate=model.config.sample_rate, | |
num_channels=1, | |
) | |
stream_writer = torchaudio.io.StreamWriter("denoised.wav") | |
stream_writer.add_audio_stream(sample_rate=model.config.sample_rate, num_channels=1) | |
chunk_size = model.config.max_length | |
with stream_writer.open(): | |
for (audio_chunk,) in tqdm(stream_reader.stream()): | |
if audio_chunk is None: | |
break | |
audio_chunk = audio_chunk.permute(1, 0) | |
original_chunk_size = audio_chunk.size(-1) | |
if audio_chunk.size(-1) < chunk_size: | |
padding = chunk_size - audio_chunk.size(-1) | |
audio_chunk = torch.nn.functional.pad(audio_chunk, (0, padding)) | |
if torch.cuda.is_available(): | |
audio_chunk = audio_chunk.cuda() | |
with torch.no_grad(): | |
denoised_chunk = model(audio_chunk[None]).audio | |
denoised_chunk = denoised_chunk[:, :, :original_chunk_size] | |
stream_writer.write_audio_chunk( | |
0, denoised_chunk.squeeze(0).permute(1, 0).cpu() | |
) | |
return "denoised.wav" | |
iface = gr.Interface( | |
fn=denoise, | |
inputs=[gr.Dropdown(choices=MODELS, value=MODELS[0]), gr.Audio(type="filepath")], | |
outputs=gr.Audio(type="filepath"), | |
) | |
iface.launch() | |