kurianbenoy commited on
Commit
f21fcbc
1 Parent(s): 84ee359

Add demo on recording voice functionality to gradio

Browse files
Files changed (1) hide show
  1. app.py +37 -1
app.py CHANGED
@@ -22,6 +22,37 @@ interface_options = {
22
  "theme": "default",
23
  }
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  def predict(img):
27
  img = PILImage.create(img)
@@ -30,8 +61,13 @@ def predict(img):
30
  return labels_probs
31
 
32
 
 
 
 
 
 
33
  demo = gradio.Interface(
34
- fn=predict,
35
  inputs=gradio.inputs.Image(shape=(512, 512)),
36
  outputs=gradio.outputs.Label(num_top_classes=5),
37
  **interface_options,
 
22
  "theme": "default",
23
  }
24
 
25
+ N_FFT = 2048
26
+ HOP_LEN = 1024
27
+
28
+
29
+ def create_spectrogram(filename):
30
+ audio, sr = torchaudio.load(filename)
31
+ specgram = torchaudio.transforms.MelSpectrogram(
32
+ sample_rate=sr,
33
+ n_fft=N_FFT,
34
+ win_length=N_FFT,
35
+ hop_length=HOP_LEN,
36
+ center=True,
37
+ pad_mode="reflect",
38
+ power=2.0,
39
+ norm="slaney",
40
+ onesided=True,
41
+ n_mels=224,
42
+ mel_scale="htk",
43
+ )(audio).mean(axis=0)
44
+ specgram = torchaudio.transforms.AmplitudeToDB()(specgram)
45
+ specgram = specgram - specgram.min()
46
+ specgram = specgram / specgram.max()
47
+
48
+ return specgram
49
+
50
+
51
+ def create_image(filename):
52
+ specgram = create_spectrogram(filename)
53
+ dest = Path("temp.png")
54
+ save_image(specgram, "temp.png")
55
+
56
 
57
  def predict(img):
58
  img = PILImage.create(img)
 
61
  return labels_probs
62
 
63
 
64
+ def end2endpipeline(filename):
65
+ create_image(filename)
66
+ return predict("temp.png")
67
+
68
+
69
  demo = gradio.Interface(
70
+ fn=end2endpipeline,
71
  inputs=gradio.inputs.Image(shape=(512, 512)),
72
  outputs=gradio.outputs.Label(num_top_classes=5),
73
  **interface_options,