Hendrik Schroeter commited on
Commit
0f79c5b
1 Parent(s): d446ca4

Initial working space

Browse files
app.py CHANGED
@@ -1,10 +1,16 @@
 
 
 
1
  import gradio
2
  import gradio.inputs
3
  import gradio.outputs
4
  import torch
5
- from df.enhance import enhance, init_df
 
6
 
7
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
 
8
 
9
 
10
  def mix_at_snr(clean, noise, snr, eps=1e-10):
@@ -21,22 +27,34 @@ def mix_at_snr(clean, noise, snr, eps=1e-10):
21
  mix: 1D Tensor with added clean and noise signals.
22
 
23
  """
24
- clean = torch.as_tensor(clean)
25
- noise = torch.as_tensor(noise)
 
 
 
26
  E_speech = torch.mean(clean.pow(2)) + eps
27
  E_noise = torch.mean(noise.pow(2))
28
  K = torch.sqrt((E_noise / E_speech) * 10 ** (snr / 10) + eps)
29
  noise = noise / K
30
  mixture = clean + noise
31
- assert torch.isfinite(mixture)
32
  return clean, noise, mixture
33
 
 
 
 
34
 
35
  def mix_and_denoise(speech, noise, snr):
36
- model, df, _ = init_df()
 
 
 
37
  speech, noise, noisy = mix_at_snr(speech, noise, snr)
38
- enhanced = enhance(model.to(device=device).eval(), df, noisy)
39
- return speech, noisy, enhanced
 
 
 
40
 
41
 
42
  inputs = [
@@ -49,8 +67,7 @@ inputs = [
49
  gradio.inputs.Slider(minimum=-10, maximum=40, step=5, default=10),
50
  ]
51
  examples = [
52
- [],
53
- ["samples/noise_freesound_2530.wav", "samples/noise_freesound_573577.wav"],
54
  ]
55
  outputs = [
56
  gradio.outputs.Audio(label="Clean"),
 
1
+ import math
2
+
3
+ import numpy as np
4
  import gradio
5
  import gradio.inputs
6
  import gradio.outputs
7
  import torch
8
+ from df import config
9
+ from df.enhance import enhance, init_df, load_audio, save_audio
10
 
11
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
+ model, df, _ = init_df()
13
+ model = model.to(device=device).eval()
14
 
15
 
16
  def mix_at_snr(clean, noise, snr, eps=1e-10):
 
27
  mix: 1D Tensor with added clean and noise signals.
28
 
29
  """
30
+ clean = torch.as_tensor(clean).mean(0, keepdim=True)
31
+ noise = torch.as_tensor(noise).mean(0, keepdim=True)
32
+ if noise.shape[1] < clean.shape[1]:
33
+ noise = noise.repeat((1, int(math.ceil(clean.shape[1] / noise.shape[1]))))
34
+ noise = noise[:, : clean.shape[1]]
35
  E_speech = torch.mean(clean.pow(2)) + eps
36
  E_noise = torch.mean(noise.pow(2))
37
  K = torch.sqrt((E_noise / E_speech) * 10 ** (snr / 10) + eps)
38
  noise = noise / K
39
  mixture = clean + noise
40
+ assert torch.isfinite(mixture).all()
41
  return clean, noise, mixture
42
 
43
+ def as_gradio_audio(x):
44
+ sr = config.get("sr", "df", int)
45
+ return sr, (x/0x7fff).to(torch.int16).cpu().numpy()
46
 
47
  def mix_and_denoise(speech, noise, snr):
48
+ print(speech, noise, snr)
49
+ sr = config.get("sr", "df", int)
50
+ speech, _ = load_audio(speech, sr)
51
+ noise, _ = load_audio(noise, sr)
52
  speech, noise, noisy = mix_at_snr(speech, noise, snr)
53
+ enhanced = enhance(model, df, noisy)
54
+ save_audio("clean.wav", speech, sr)
55
+ save_audio("noisy.wav", noisy, sr)
56
+ save_audio("enhanced.wav", enhanced, sr)
57
+ return "clean.wav", "noisy.wav", "enhanced.wav"
58
 
59
 
60
  inputs = [
 
67
  gradio.inputs.Slider(minimum=-10, maximum=40, step=5, default=10),
68
  ]
69
  examples = [
70
+ ["samples/p232_013_clean.wav", "samples/noise_freesound_2530.wav", 10],
 
71
  ]
72
  outputs = [
73
  gradio.outputs.Audio(label="Clean"),
samples/noise_freesound_573577.wav → clean.wav RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb367b36e4e9d72d112377dd57bf354e13f0b30f8402c9e841ac47639e773497
3
- size 474010
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7a51b4fdfb02657cf9410dbd34b4ea165acbec48581a8a074e1d45fdd3b3334
3
+ size 378612
enhanced.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97ff9dc5db07e3a2410f0dd416d9bccdcdc9bd173ed46f415e405208a4105d04
3
+ size 378284
noisy.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3b658209be05042ce017aa2b3db444e56e84c3cc6f58535599ff8887c9ee5f7
3
+ size 378612
samples/p232_013_clean.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7a51b4fdfb02657cf9410dbd34b4ea165acbec48581a8a074e1d45fdd3b3334
3
+ size 378612