Spaces:
Runtime error
Runtime error
File size: 2,578 Bytes
c165076 ac1ef50 c165076 cef4bbb c165076 ac1ef50 b7e6938 c165076 bbdbdcc f21fcbc c165076 bbdbdcc c165076 a83c171 c165076 f21fcbc c165076 f21fcbc b8c0bc8 c165076 d2ef383 c165076 a83c171 b7e6938 c165076 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import gradio
import torchaudio
from fastai.vision.all import *
from fastai.learner import load_learner
from torchvision.utils import save_image
from huggingface_hub import hf_hub_download
model = load_learner(
hf_hub_download("kurianbenoy/music_genre_classification_baseline", "model.pkl")
)
EXAMPLES_PATH = Path("./examples")
labels = model.dls.vocab
with open("article.md") as f:
article = f.read()
interface_options = {
"title": "Music Genre Classification",
"description": "A simple baseline model for classifying music genres with fast.ai on [Kaggle competition data](https://www.kaggle.com/competitions/kaggle-pog-series-s01e02/data)",
"article": article,
"interpretation": "default",
"layout": "horizontal",
# Audio from validation file
"examples": ["000003.ogg", "000032.ogg", "000038.ogg", "000050.ogg", "000103.ogg"],
"allow_flagging": "never"
}
## Code from Dien Hoa Truong inference notebook: https://www.kaggle.com/code/dienhoa/inference-submission-music-genre
N_FFT = 2048
HOP_LEN = 1024
def create_spectrogram(filename):
audio, sr = torchaudio.load(filename)
specgram = torchaudio.transforms.MelSpectrogram(
sample_rate=sr,
n_fft=N_FFT,
win_length=N_FFT,
hop_length=HOP_LEN,
center=True,
pad_mode="reflect",
power=2.0,
norm="slaney",
onesided=True,
n_mels=224,
mel_scale="htk",
)(audio).mean(axis=0)
specgram = torchaudio.transforms.AmplitudeToDB()(specgram)
specgram = specgram - specgram.min()
specgram = specgram / specgram.max()
return specgram
def create_image(filename):
specgram = create_spectrogram(filename)
dest = Path("temp.png")
save_image(specgram, "temp.png")
# Code from: https://huggingface.co/spaces/suvash/food-101-resnet50
def predict(img):
img = PILImage.create(img)
_pred, _pred_w_idx, probs = model.predict(img)
# gradio doesn't support tensors, so converting to float
labels_probs = {labels[i]: float(probs[i]) for i, _ in enumerate(labels)}
return labels_probs
def end2endpipeline(filename):
create_image(filename)
return predict("temp.png")
demo = gradio.Interface(
fn=end2endpipeline,
inputs=gradio.inputs.Audio(source="upload", type="filepath"),
outputs=gradio.outputs.Label(num_top_classes=5),
**interface_options,
)
launch_options = {
"enable_queue": True,
"share": False,
# thanks Alex for pointing this option to cache examples
"cache_examples": True,
}
demo.launch(**launch_options)
|