Spaces:
Running
Running
KanvaBhatia
commited on
Commit
·
183b9ba
1
Parent(s):
2729fbc
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
|
3 |
+
import torch
|
4 |
+
from torchaudio.backend.common import AudioMetaData
|
5 |
+
from df.enhance import enhance, load_audio, save_audio
|
6 |
+
from df.io import resample
|
7 |
+
from libdf import DF
|
8 |
+
from df.model import ModelParams
|
9 |
+
from df import config
|
10 |
+
import moviepy.editor as mp
|
11 |
+
import numpy as np
|
12 |
+
|
13 |
+
try:
|
14 |
+
config.load('config.ini')
|
15 |
+
except Exception as e:
|
16 |
+
print(e)
|
17 |
+
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
18 |
+
|
19 |
+
p = ModelParams()
|
20 |
+
df = DF(
|
21 |
+
sr=p.sr,
|
22 |
+
fft_size=p.fft_size,
|
23 |
+
hop_size=p.hop_size,
|
24 |
+
nb_bands=p.nb_erb,
|
25 |
+
min_nb_erb_freqs=p.min_nb_freqs,
|
26 |
+
)
|
27 |
+
|
28 |
+
|
29 |
+
print("Device - ", DEVICE)
|
30 |
+
model = torch.load(("model.pth"), map_location=torch.device('cpu'))
|
31 |
+
model.to(DEVICE)
|
32 |
+
model.eval()
|
33 |
+
|
34 |
+
def identity(x):
|
35 |
+
print(x)
|
36 |
+
# audio = mp.AudioFileClip(x)
|
37 |
+
wav_file = x
|
38 |
+
# audio.write_audiofile(wav_file)
|
39 |
+
print("Wav stored.")
|
40 |
+
meta = AudioMetaData(-1, -1, -1, -1, "")
|
41 |
+
sr = config("sr", 48000, int, section="df")
|
42 |
+
sample, meta = load_audio(wav_file, sr)
|
43 |
+
len_audio = (meta.num_frames/meta.sample_rate)/60
|
44 |
+
max_min = 1
|
45 |
+
if len_audio % max_min < 0.1:
|
46 |
+
num_chunks = len_audio // max_min
|
47 |
+
else:
|
48 |
+
num_chunks = len_audio // max_min + 1
|
49 |
+
print(f"Total length of audio = {len_audio} chunks = {num_chunks}")
|
50 |
+
estimate = []
|
51 |
+
split_tensors = torch.tensor_split(sample, int(num_chunks), dim = 1)
|
52 |
+
for i in range(len(split_tensors)):
|
53 |
+
enhanced = enhance(model, df, split_tensors[i])
|
54 |
+
enhanced = enhance(model, df, enhanced)
|
55 |
+
lim = torch.linspace(0.0, 1.0, int(sr * 0.15)).unsqueeze(0)
|
56 |
+
lim = torch.cat((lim, torch.ones(1, enhanced.shape[1] - lim.shape[1])), dim=1)
|
57 |
+
enhanced = enhanced * lim
|
58 |
+
enhanced = resample(enhanced, sr, meta.sample_rate)
|
59 |
+
estimate.append(enhanced)
|
60 |
+
estimate = tuple(estimate)
|
61 |
+
enhanced = torch.cat(estimate, dim = -1)
|
62 |
+
sr = meta.sample_rate
|
63 |
+
save_audio("enhanced_aud.wav", enhanced, sr)
|
64 |
+
return "enhanced_aud.wav"
|
65 |
+
|
66 |
+
demo = gr.Interface(
|
67 |
+
fn=identity,
|
68 |
+
title="Audio Denoiser using DeepFilterNet V3",
|
69 |
+
description="Implemented audio denoising using DeepFilterNet V3, enabled processing of larger files even on cpu, by splitting up the audio file into chunks of 1 minute each.",
|
70 |
+
inputs=gr.Audio(type='filepath'),
|
71 |
+
outputs=gr.Audio(label="Output Audio"),
|
72 |
+
)
|
73 |
+
demo.launch()
|