Spaces:

kurianbenoy
/

audioclassification

Runtime error

File size: 2,578 Bytes

c165076
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ac1ef50
 
 
c165076
 
 
cef4bbb
c165076
 
ac1ef50
 
b7e6938
c165076
 
bbdbdcc
f21fcbc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c165076
bbdbdcc
c165076
 
 
a83c171
c165076
 
 
 
f21fcbc
 
 
 
 
c165076
f21fcbc
b8c0bc8
c165076
d2ef383
c165076
 
 
 
 
a83c171
b7e6938
c165076

import gradio
import torchaudio
from fastai.vision.all import *
from fastai.learner import load_learner
from torchvision.utils import save_image
from huggingface_hub import hf_hub_download


model = load_learner(
    hf_hub_download("kurianbenoy/music_genre_classification_baseline", "model.pkl")
)

EXAMPLES_PATH = Path("./examples")
labels = model.dls.vocab

with open("article.md") as f:
    article = f.read()

interface_options = {
    "title": "Music Genre Classification",
    "description": "A simple baseline model for classifying music genres with fast.ai on [Kaggle competition data](https://www.kaggle.com/competitions/kaggle-pog-series-s01e02/data)",
    "article": article,
    "interpretation": "default",
    "layout": "horizontal",
    # Audio from validation file
    "examples": ["000003.ogg", "000032.ogg", "000038.ogg", "000050.ogg", "000103.ogg"],
    "allow_flagging": "never"
}

## Code from Dien Hoa Truong inference notebook: https://www.kaggle.com/code/dienhoa/inference-submission-music-genre
N_FFT = 2048
HOP_LEN = 1024


def create_spectrogram(filename):
    audio, sr = torchaudio.load(filename)
    specgram = torchaudio.transforms.MelSpectrogram(
        sample_rate=sr,
        n_fft=N_FFT,
        win_length=N_FFT,
        hop_length=HOP_LEN,
        center=True,
        pad_mode="reflect",
        power=2.0,
        norm="slaney",
        onesided=True,
        n_mels=224,
        mel_scale="htk",
    )(audio).mean(axis=0)
    specgram = torchaudio.transforms.AmplitudeToDB()(specgram)
    specgram = specgram - specgram.min()
    specgram = specgram / specgram.max()

    return specgram


def create_image(filename):
    specgram = create_spectrogram(filename)
    dest = Path("temp.png")
    save_image(specgram, "temp.png")


# Code from: https://huggingface.co/spaces/suvash/food-101-resnet50
def predict(img):
    img = PILImage.create(img)
    _pred, _pred_w_idx, probs = model.predict(img)
    # gradio doesn't support tensors, so converting to float
    labels_probs = {labels[i]: float(probs[i]) for i, _ in enumerate(labels)}
    return labels_probs


def end2endpipeline(filename):
    create_image(filename)
    return predict("temp.png")


demo = gradio.Interface(
    fn=end2endpipeline,
    inputs=gradio.inputs.Audio(source="upload", type="filepath"),
    outputs=gradio.outputs.Label(num_top_classes=5),
    **interface_options,
)

launch_options = {
    "enable_queue": True,
    "share": False,
    # thanks Alex for pointing this option to cache examples
    "cache_examples": True,
}

demo.launch(**launch_options)