|
|
|
|
|
|
|
|
|
|
|
|
|
import gradio |
|
|
|
from fastai.vision.all import * |
|
from fastai.data.all import * |
|
from pathlib import Path |
|
import pandas as pd |
|
from matplotlib.pyplot import specgram |
|
import librosa |
|
import librosa.display |
|
from huggingface_hub import hf_hub_download |
|
from fastai.learner import load_learner |
|
|
|
|
|
|
|
|
|
|
|
ref_file = hf_hub_download("gputrain/UrbanSound8K-model", "UrbanSound8K.csv") |
|
|
|
model_file = hf_hub_download("gputrain/UrbanSound8K-model", "model.pkl") |
|
|
|
|
|
|
|
|
|
|
|
df = pd.read_csv(ref_file) |
|
df['fname'] = df[['slice_file_name','fold']].apply (lambda x: str(x['slice_file_name'][:-4])+'.png'.strip(),axis=1 ) |
|
my_dict = dict(zip(df.fname,df['class'])) |
|
def label_func(f_name): |
|
f_name = str(f_name).split('/')[-1:][0] |
|
return my_dict[f_name] |
|
model = load_learner (model_file) |
|
EXAMPLES_PATH = Path("./examples") |
|
labels = model.dls.vocab |
|
|
|
|
|
|
|
|
|
|
|
with open("article.md") as f: |
|
article = f.read() |
|
|
|
|
|
|
|
|
|
|
|
interface_options = { |
|
"title": "Urban Sound 8K Classification", |
|
"description": "Fast AI example of using a pre-trained Resnet34 vision model for an audio classification task on the [Urban Sounds](https://urbansounddataset.weebly.com/urbansound8k.html) dataset. ", |
|
"article": article, |
|
"interpretation": "default", |
|
"layout": "horizontal", |
|
|
|
"examples": ["dog_bark.wav", "children_playing.wav", "air_conditioner.wav", "street_music.wav", "engine_idling.wav", |
|
"jackhammer.wav", "drilling.wav", "siren.wav","car_horn.wav","gun_shot.wav"], |
|
"allow_flagging": "never" |
|
} |
|
|
|
|
|
|
|
|
|
|
|
def convert_sounds_melspectogram (audio_file): |
|
|
|
samples, sample_rate = librosa.load(audio_file) |
|
|
|
fig = plt.figure(figsize=[0.72,0.72]) |
|
ax = fig.add_subplot(111) |
|
ax.axes.get_xaxis().set_visible(False) |
|
ax.axes.get_yaxis().set_visible(False) |
|
ax.set_frame_on(False) |
|
melS = librosa.feature.melspectrogram(y=samples, sr=sample_rate) |
|
librosa.display.specshow(librosa.power_to_db(melS, ref=np.max)) |
|
filename = 'temp.png' |
|
plt.savefig(filename, dpi=400, bbox_inches='tight',pad_inches=0) |
|
plt.close('all') |
|
|
|
return None |
|
|
|
|
|
|
|
|
|
|
|
def predict(): |
|
img = PILImage.create('temp.png') |
|
pred,pred_idx,probs = model.predict(img) |
|
return {labels[i]: float(probs[i]) for i in range(len(labels))} |
|
return labels_probs |
|
|
|
|
|
|
|
|
|
|
|
def end2endpipeline(filename): |
|
convert_sounds_melspectogram(filename) |
|
return predict() |
|
|
|
|
|
|
|
|
|
|
|
demo = gradio.Interface( |
|
fn=end2endpipeline, |
|
inputs=gradio.inputs.Audio(source="upload", type="filepath"), |
|
outputs=gradio.outputs.Label(num_top_classes=10), |
|
**interface_options, |
|
) |
|
|
|
|
|
|
|
|
|
|
|
launch_options = { |
|
"enable_queue": True, |
|
"share": False, |
|
|
|
} |
|
|
|
demo.launch(**launch_options) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|