DrSnake commited on
Commit
c93a9e4
·
1 Parent(s): f1c57cb

feat: add audio classification model and app code

Browse files
Files changed (2) hide show
  1. app.py +54 -4
  2. sample-classifier-model-01.pkl +3 -0
app.py CHANGED
@@ -1,7 +1,57 @@
1
  import gradio as gr
 
 
 
 
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
 
 
 
 
 
5
 
6
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ # Import libraries
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import librosa
6
+ import librosa.display
7
+ import os
8
+ from fastai.vision.all import *
9
+ from PIL import Image
10
 
11
+ def fig2img(fig):
12
+ """Convert a Matplotlib figure to a PIL Image and return it"""
13
+ import io
14
+ buf = io.BytesIO()
15
+ fig.savefig(buf)
16
+ buf.seek(0)
17
+ img = Image.open(buf)
18
+ return img
19
 
20
+ # Define function to convert given audio file to spectogram
21
+ def audio_to_spectogram(audio_path, save_path=None):
22
+ """Computes the spectogram for given audio_path and saves spectogram as a image into save_path"""
23
+ y, sr = librosa.load(audio_path, sr=None)
24
+
25
+ # Compute the spectrogram
26
+ D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
27
+
28
+ plt.figure(figsize=(10,4))
29
+ librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
30
+ plt.colorbar(format='%+2.0f dB')
31
+ plt.title('Spectrogram')
32
+
33
+ if save_path is not None:
34
+ # Save the figure as an image
35
+ plt.savefig(save_path)
36
+ plt.close()
37
+ else:
38
+ fig = plt.gcf()
39
+ return fig2img(fig)
40
+
41
+ def label_fn(x): return x.parent.name
42
+
43
+ categories = ('claps', 'click', 'cymbals', 'hats_closed', 'hats_open', 'kicks', 'percussion',
44
+ 'rides', 'rimshot', 'shakers', 'snaps', 'snares', 'tambourines', 'toms')
45
+
46
+ learn = load_learner('sample-classifier-model-01.pkl')
47
+
48
+ def classify_image(audio):
49
+ audio_to_spectogram(audio, save_path="spect.png")
50
+ pred, idx, probs = learn.predict(PILImage.create("spect.png"))
51
+ return dict(zip(categories, map(float, probs)))
52
+
53
+ audio = gr.components.Audio(type='filepath')
54
+ label = gr.outputs.Label()
55
+
56
+ iface = gr.Interface(fn=classify_image, inputs=audio, outputs=label)
57
+ iface.launch(inline=False)
sample-classifier-model-01.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40115239f2d8b54fff2bed8903f79a4f22a2c39a19bd2fcd79e1eec3ed381d8c
3
+ size 87521410