File size: 1,008 Bytes
6215757
 
 
fa38146
e78c067
 
 
 
6215757
 
e78c067
6215757
 
 
fa38146
223dc9c
6215757
 
 
 
fa38146
63cf66c
 
6215757
 
 
e78c067
 
 
 
 
 
33b0769
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from fastai.vision.all import *
import librosa
import gradio as gr
import numpy as np
import wandb
from fastai.callback.wandb import *

wandb.init()

learn = load_learner('audio_mnist_classifier_v1.pkl')
categories = learn.dls.vocab

def mel_spectrogram_tfm(file):
    y, sr = librosa.load(file)
    y, _ = librosa.effects.trim(y)
    spec = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=512)
    spec_db = librosa.amplitude_to_db(spec, ref=np.max)
    return spec_db

def classify(audio):
    spec_db = mel_spectrogram_tfm(audio)
    img = 'tmp.png'
    plt.imsave(img, spec_db)
    pred, idx, prob = learn.predict(img)
    return dict(zip(categories, map(float, prob)))

interface = gr.Interface(fn=classify, 
             inputs=gr.Audio(source="microphone", type="filepath"), 
             outputs=gr.outputs.Label(num_top_classes=10),
             title='Audio MNIST Classification',
             description='Identifying digits (from 0 to 9) from an audio clip')

interface.launch()