Spaces:
Running
Running
import gradio as gr | |
import numpy as np | |
import torch | |
import torchaudio | |
from denoisers import WaveUNetModel | |
from tqdm import tqdm | |
MODEL = WaveUNetModel.from_pretrained("wrice/waveunet-vctk-24khz") | |
def denoise(inputs): | |
sr, audio = inputs | |
audio = torch.from_numpy(audio)[None] | |
audio = audio / 32768.0 | |
print(f"Audio shape: {audio.shape}") | |
print(f"Sample rate: {sr}") | |
if sr != MODEL.config.sample_rate: | |
audio = torchaudio.functional.resample(audio, sr, MODEL.config.sample_rate) | |
chunk_size = MODEL.config.max_length | |
padding = abs(audio.size(-1) % chunk_size - chunk_size) | |
padded = torch.nn.functional.pad(audio, (0, padding)) | |
clean = [] | |
for i in tqdm(range(0, padded.shape[-1], chunk_size)): | |
audio_chunk = padded[:, i : i + chunk_size] | |
with torch.no_grad(): | |
clean_chunk = MODEL(audio_chunk[None]).logits | |
clean.append(clean_chunk.squeeze(0)) | |
denoised = torch.concat(clean).flatten()[: audio.shape[-1]].clamp(-1.0, 1.0) | |
denoised = (denoised * 32767.0).numpy().astype("int16") | |
print(f"Denoised shape: {denoised.shape}") | |
return MODEL.config.sample_rate, denoised[np.newaxis, :] | |
iface = gr.Interface(fn=denoise, inputs="audio", outputs="audio") | |
iface.launch() | |