Spaces:
Runtime error
Runtime error
kurianbenoy
commited on
Commit
•
f21fcbc
1
Parent(s):
84ee359
Add demo on recording voice functionality to gradio
Browse files
app.py
CHANGED
@@ -22,6 +22,37 @@ interface_options = {
|
|
22 |
"theme": "default",
|
23 |
}
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
def predict(img):
|
27 |
img = PILImage.create(img)
|
@@ -30,8 +61,13 @@ def predict(img):
|
|
30 |
return labels_probs
|
31 |
|
32 |
|
|
|
|
|
|
|
|
|
|
|
33 |
demo = gradio.Interface(
|
34 |
-
fn=
|
35 |
inputs=gradio.inputs.Image(shape=(512, 512)),
|
36 |
outputs=gradio.outputs.Label(num_top_classes=5),
|
37 |
**interface_options,
|
|
|
22 |
"theme": "default",
|
23 |
}
|
24 |
|
25 |
+
N_FFT = 2048
|
26 |
+
HOP_LEN = 1024
|
27 |
+
|
28 |
+
|
29 |
+
def create_spectrogram(filename):
|
30 |
+
audio, sr = torchaudio.load(filename)
|
31 |
+
specgram = torchaudio.transforms.MelSpectrogram(
|
32 |
+
sample_rate=sr,
|
33 |
+
n_fft=N_FFT,
|
34 |
+
win_length=N_FFT,
|
35 |
+
hop_length=HOP_LEN,
|
36 |
+
center=True,
|
37 |
+
pad_mode="reflect",
|
38 |
+
power=2.0,
|
39 |
+
norm="slaney",
|
40 |
+
onesided=True,
|
41 |
+
n_mels=224,
|
42 |
+
mel_scale="htk",
|
43 |
+
)(audio).mean(axis=0)
|
44 |
+
specgram = torchaudio.transforms.AmplitudeToDB()(specgram)
|
45 |
+
specgram = specgram - specgram.min()
|
46 |
+
specgram = specgram / specgram.max()
|
47 |
+
|
48 |
+
return specgram
|
49 |
+
|
50 |
+
|
51 |
+
def create_image(filename):
|
52 |
+
specgram = create_spectrogram(filename)
|
53 |
+
dest = Path("temp.png")
|
54 |
+
save_image(specgram, "temp.png")
|
55 |
+
|
56 |
|
57 |
def predict(img):
|
58 |
img = PILImage.create(img)
|
|
|
61 |
return labels_probs
|
62 |
|
63 |
|
64 |
+
def end2endpipeline(filename):
|
65 |
+
create_image(filename)
|
66 |
+
return predict("temp.png")
|
67 |
+
|
68 |
+
|
69 |
demo = gradio.Interface(
|
70 |
+
fn=end2endpipeline,
|
71 |
inputs=gradio.inputs.Image(shape=(512, 512)),
|
72 |
outputs=gradio.outputs.Label(num_top_classes=5),
|
73 |
**interface_options,
|