|
import gradio |
|
import torchaudio |
|
from fastai.vision.all import * |
|
from fastai.learner import load_learner |
|
from torchvision.utils import save_image |
|
from huggingface_hub import hf_hub_download |
|
|
|
|
|
model = load_learner( |
|
hf_hub_download("kurianbenoy/music_genre_classification_baseline", "model.pkl") |
|
) |
|
|
|
EXAMPLES_PATH = Path("./examples") |
|
labels = model.dls.vocab |
|
|
|
with open("article.md") as f: |
|
article = f.read() |
|
|
|
interface_options = { |
|
"title": "Music Genre Classification", |
|
"description": "A simple baseline model for classifying music genres with fast.ai on [Kaggle competition data](https://www.kaggle.com/competitions/kaggle-pog-series-s01e02/data)", |
|
"article": article, |
|
"interpretation": "default", |
|
"layout": "horizontal", |
|
|
|
"examples": ["000003.ogg", "000032.ogg", "000038.ogg", "000050.ogg", "000103.ogg"], |
|
"allow_flagging": "never" |
|
} |
|
|
|
|
|
N_FFT = 2048 |
|
HOP_LEN = 1024 |
|
|
|
|
|
def create_spectrogram(filename): |
|
audio, sr = torchaudio.load(filename) |
|
specgram = torchaudio.transforms.MelSpectrogram( |
|
sample_rate=sr, |
|
n_fft=N_FFT, |
|
win_length=N_FFT, |
|
hop_length=HOP_LEN, |
|
center=True, |
|
pad_mode="reflect", |
|
power=2.0, |
|
norm="slaney", |
|
onesided=True, |
|
n_mels=224, |
|
mel_scale="htk", |
|
)(audio).mean(axis=0) |
|
specgram = torchaudio.transforms.AmplitudeToDB()(specgram) |
|
specgram = specgram - specgram.min() |
|
specgram = specgram / specgram.max() |
|
|
|
return specgram |
|
|
|
|
|
def create_image(filename): |
|
specgram = create_spectrogram(filename) |
|
dest = Path("temp.png") |
|
save_image(specgram, "temp.png") |
|
|
|
|
|
|
|
def predict(img): |
|
img = PILImage.create(img) |
|
_pred, _pred_w_idx, probs = model.predict(img) |
|
|
|
labels_probs = {labels[i]: float(probs[i]) for i, _ in enumerate(labels)} |
|
return labels_probs |
|
|
|
|
|
def end2endpipeline(filename): |
|
create_image(filename) |
|
return predict("temp.png") |
|
|
|
|
|
demo = gradio.Interface( |
|
fn=end2endpipeline, |
|
inputs=gradio.inputs.Audio(source="upload", type="filepath"), |
|
outputs=gradio.outputs.Label(num_top_classes=5), |
|
**interface_options, |
|
) |
|
|
|
launch_options = { |
|
"enable_queue": True, |
|
"share": False, |
|
|
|
"cache_examples": True, |
|
} |
|
|
|
demo.launch(**launch_options) |
|
|