|
|
|
|
|
|
|
__all__ = ['learn', 'iface', 'create_spectogram', 'spectogram_to_image', 'predict'] |
|
|
|
|
|
import PIL |
|
import gradio as gr |
|
from fastai.vision.all import * |
|
import librosa |
|
import matplotlib.pyplot as plt |
|
import numpy as np |
|
import shutil |
|
import os |
|
|
|
|
|
learn = load_learner('./model.pkl') |
|
|
|
|
|
def create_spectogram(path): |
|
samples, sample_rate = librosa.load(path, duration=5.0) |
|
S = librosa.feature.melspectrogram(y=samples, sr=sample_rate) |
|
return S |
|
|
|
def spectogram_to_image(spec): |
|
fig = plt.figure(figsize=[0.72, 0.72]) |
|
ax = fig.add_subplot(111) |
|
ax.axes.get_xaxis().set_visible(False) |
|
ax.axes.get_yaxis().set_visible(False) |
|
ax.set_frame_on(False) |
|
librosa.display.specshow(librosa.power_to_db(spec, ref=np.max)) |
|
fig.canvas.draw() |
|
return PIL.Image.frombytes('RGB',fig.canvas.get_width_height(),fig.canvas.tostring_rgb()) |
|
|
|
def predict(audio_file_path): |
|
spec = create_spectogram(audio_file_path) |
|
spec_image = spectogram_to_image(spec) |
|
prediction,other,probs = learn.predict(spec_image) |
|
return prediction |
|
|
|
|
|
iface = gr.Interface(fn=predict, inputs=[ |
|
gr.components.Audio(type='filepath')], outputs="text") |
|
iface.launch() |
|
|