Spaces:
Runtime error
Runtime error
import gradio | |
import torchaudio | |
from fastai.vision.all import * | |
from fastai.learner import load_learner | |
from torchvision.utils import save_image | |
from huggingface_hub import hf_hub_download | |
model = load_learner( | |
hf_hub_download("kurianbenoy/music_genre_classification_baseline", "model.pkl") | |
) | |
EXAMPLES_PATH = Path("./examples") | |
labels = model.dls.vocab | |
with open("article.md") as f: | |
article = f.read() | |
interface_options = { | |
"title": "Music Genre Classification", | |
"description": "A simple baseline model for classifying music genres with fast.ai on [Kaggle competition data](https://www.kaggle.com/competitions/kaggle-pog-series-s01e02/data)", | |
"article": article, | |
"interpretation": "default", | |
"layout": "horizontal", | |
# Audio from validation file | |
"examples": ["000003.ogg", "000032.ogg", "000038.ogg", "000050.ogg", "000103.ogg"], | |
"allow_flagging": "never" | |
} | |
## Code from Dien Hoa Truong inference notebook: https://www.kaggle.com/code/dienhoa/inference-submission-music-genre | |
N_FFT = 2048 | |
HOP_LEN = 1024 | |
def create_spectrogram(filename): | |
audio, sr = torchaudio.load(filename) | |
specgram = torchaudio.transforms.MelSpectrogram( | |
sample_rate=sr, | |
n_fft=N_FFT, | |
win_length=N_FFT, | |
hop_length=HOP_LEN, | |
center=True, | |
pad_mode="reflect", | |
power=2.0, | |
norm="slaney", | |
onesided=True, | |
n_mels=224, | |
mel_scale="htk", | |
)(audio).mean(axis=0) | |
specgram = torchaudio.transforms.AmplitudeToDB()(specgram) | |
specgram = specgram - specgram.min() | |
specgram = specgram / specgram.max() | |
return specgram | |
def create_image(filename): | |
specgram = create_spectrogram(filename) | |
dest = Path("temp.png") | |
save_image(specgram, "temp.png") | |
# Code from: https://huggingface.co/spaces/suvash/food-101-resnet50 | |
def predict(img): | |
img = PILImage.create(img) | |
_pred, _pred_w_idx, probs = model.predict(img) | |
# gradio doesn't support tensors, so converting to float | |
labels_probs = {labels[i]: float(probs[i]) for i, _ in enumerate(labels)} | |
return labels_probs | |
def end2endpipeline(filename): | |
create_image(filename) | |
return predict("temp.png") | |
demo = gradio.Interface( | |
fn=end2endpipeline, | |
inputs=gradio.inputs.Audio(source="upload", type="filepath"), | |
outputs=gradio.outputs.Label(num_top_classes=5), | |
**interface_options, | |
) | |
launch_options = { | |
"enable_queue": True, | |
"share": False, | |
# thanks Alex for pointing this option to cache examples | |
"cache_examples": True, | |
} | |
demo.launch(**launch_options) | |