pruth23's picture
Duplicate from kurianbenoy/audioclassification
64df49c
import gradio
import torchaudio
from fastai.vision.all import *
from fastai.learner import load_learner
from torchvision.utils import save_image
from huggingface_hub import hf_hub_download
model = load_learner(
hf_hub_download("kurianbenoy/music_genre_classification_baseline", "model.pkl")
)
EXAMPLES_PATH = Path("./examples")
labels = model.dls.vocab
with open("article.md") as f:
article = f.read()
interface_options = {
"title": "Music Genre Classification",
"description": "A simple baseline model for classifying music genres with fast.ai on [Kaggle competition data](https://www.kaggle.com/competitions/kaggle-pog-series-s01e02/data)",
"article": article,
"interpretation": "default",
"layout": "horizontal",
# Audio from validation file
"examples": ["000003.ogg", "000032.ogg", "000038.ogg", "000050.ogg", "000103.ogg"],
"allow_flagging": "never"
}
## Code from Dien Hoa Truong inference notebook: https://www.kaggle.com/code/dienhoa/inference-submission-music-genre
N_FFT = 2048
HOP_LEN = 1024
def create_spectrogram(filename):
audio, sr = torchaudio.load(filename)
specgram = torchaudio.transforms.MelSpectrogram(
sample_rate=sr,
n_fft=N_FFT,
win_length=N_FFT,
hop_length=HOP_LEN,
center=True,
pad_mode="reflect",
power=2.0,
norm="slaney",
onesided=True,
n_mels=224,
mel_scale="htk",
)(audio).mean(axis=0)
specgram = torchaudio.transforms.AmplitudeToDB()(specgram)
specgram = specgram - specgram.min()
specgram = specgram / specgram.max()
return specgram
def create_image(filename):
specgram = create_spectrogram(filename)
dest = Path("temp.png")
save_image(specgram, "temp.png")
# Code from: https://huggingface.co/spaces/suvash/food-101-resnet50
def predict(img):
img = PILImage.create(img)
_pred, _pred_w_idx, probs = model.predict(img)
# gradio doesn't support tensors, so converting to float
labels_probs = {labels[i]: float(probs[i]) for i, _ in enumerate(labels)}
return labels_probs
def end2endpipeline(filename):
create_image(filename)
return predict("temp.png")
demo = gradio.Interface(
fn=end2endpipeline,
inputs=gradio.inputs.Audio(source="upload", type="filepath"),
outputs=gradio.outputs.Label(num_top_classes=5),
**interface_options,
)
launch_options = {
"enable_queue": True,
"share": False,
# thanks Alex for pointing this option to cache examples
"cache_examples": True,
}
demo.launch(**launch_options)