#!/usr/bin/env python # coding: utf-8 # In[1]: import gradio from fastai.vision.all import * from fastai.data.all import * from pathlib import Path import pandas as pd from matplotlib.pyplot import specgram import librosa import librosa.display from huggingface_hub import hf_hub_download from fastai.learner import load_learner # In[9]: ref_file = hf_hub_download("gputrain/UrbanSound8K-model", "UrbanSound8K.csv") model_file = hf_hub_download("gputrain/UrbanSound8K-model", "model.pkl") # In[10]: df = pd.read_csv(ref_file) df['fname'] = df[['slice_file_name','fold']].apply (lambda x: str(x['slice_file_name'][:-4])+'.png'.strip(),axis=1 ) my_dict = dict(zip(df.fname,df['class'])) def label_func(f_name): f_name = str(f_name).split('/')[-1:][0] return my_dict[f_name] model = load_learner (model_file) EXAMPLES_PATH = Path("./examples") labels = model.dls.vocab # In[11]: with open("article.md") as f: article = f.read() # In[12]: interface_options = { "title": "Urban Sound 8K Classification", "description": "Fast AI example of using a pre-trained Resnet34 vision model for an audio classification task on the [Urban Sounds](https://urbansounddataset.weebly.com/urbansound8k.html) dataset. ", "article": article, "interpretation": "default", "layout": "horizontal", # Audio from validation file "examples": ["dog_bark.wav", "children_playing.wav", "air_conditioner.wav", "street_music.wav", "engine_idling.wav", "jackhammer.wav", "drilling.wav", "siren.wav","car_horn.wav","gun_shot.wav"], "allow_flagging": "never" } # In[13]: def convert_sounds_melspectogram (audio_file): samples, sample_rate = librosa.load(audio_file) #create onces with librosa fig = plt.figure(figsize=[0.72,0.72]) ax = fig.add_subplot(111) ax.axes.get_xaxis().set_visible(False) ax.axes.get_yaxis().set_visible(False) ax.set_frame_on(False) melS = librosa.feature.melspectrogram(y=samples, sr=sample_rate) librosa.display.specshow(librosa.power_to_db(melS, ref=np.max)) filename = 'temp.png' plt.savefig(filename, dpi=400, bbox_inches='tight',pad_inches=0) plt.close('all') return None # In[14]: def predict(): img = PILImage.create('temp.png') pred,pred_idx,probs = model.predict(img) return {labels[i]: float(probs[i]) for i in range(len(labels))} return labels_probs # In[20]: def end2endpipeline(filename): convert_sounds_melspectogram(filename) return predict() # In[16]: demo = gradio.Interface( fn=end2endpipeline, inputs=gradio.inputs.Audio(source="upload", type="filepath"), outputs=gradio.outputs.Label(num_top_classes=10), **interface_options, ) # In[19]: launch_options = { "enable_queue": True, "share": False, #"cache_examples": True, } demo.launch(**launch_options) # In[ ]: