audioclassification-duplicated

Sleeping

App Files Files Community

audioclassification-duplicated / app.py

pruth23

Duplicate from kurianbenoy/audioclassification

64df49c about 2 years ago

raw

history blame contribute delete

2.58 kB

	import gradio
	import torchaudio
	from fastai.vision.all import *
	from fastai.learner import load_learner
	from torchvision.utils import save_image
	from huggingface_hub import hf_hub_download


	model = load_learner(
	hf_hub_download("kurianbenoy/music_genre_classification_baseline", "model.pkl")
	)

	EXAMPLES_PATH = Path("./examples")
	labels = model.dls.vocab

	with open("article.md") as f:
	article = f.read()

	interface_options = {
	"title": "Music Genre Classification",
	"description": "A simple baseline model for classifying music genres with fast.ai on [Kaggle competition data](https://www.kaggle.com/competitions/kaggle-pog-series-s01e02/data)",
	"article": article,
	"interpretation": "default",
	"layout": "horizontal",
	# Audio from validation file
	"examples": ["000003.ogg", "000032.ogg", "000038.ogg", "000050.ogg", "000103.ogg"],
	"allow_flagging": "never"
	}

	## Code from Dien Hoa Truong inference notebook: https://www.kaggle.com/code/dienhoa/inference-submission-music-genre
	N_FFT = 2048
	HOP_LEN = 1024


	def create_spectrogram(filename):
	audio, sr = torchaudio.load(filename)
	specgram = torchaudio.transforms.MelSpectrogram(
	sample_rate=sr,
	n_fft=N_FFT,
	win_length=N_FFT,
	hop_length=HOP_LEN,
	center=True,
	pad_mode="reflect",
	power=2.0,
	norm="slaney",
	onesided=True,
	n_mels=224,
	mel_scale="htk",
	)(audio).mean(axis=0)
	specgram = torchaudio.transforms.AmplitudeToDB()(specgram)
	specgram = specgram - specgram.min()
	specgram = specgram / specgram.max()

	return specgram


	def create_image(filename):
	specgram = create_spectrogram(filename)
	dest = Path("temp.png")
	save_image(specgram, "temp.png")


	# Code from: https://huggingface.co/spaces/suvash/food-101-resnet50
	def predict(img):
	img = PILImage.create(img)
	_pred, _pred_w_idx, probs = model.predict(img)
	# gradio doesn't support tensors, so converting to float
	labels_probs = {labels[i]: float(probs[i]) for i, _ in enumerate(labels)}
	return labels_probs


	def end2endpipeline(filename):
	create_image(filename)
	return predict("temp.png")


	demo = gradio.Interface(
	fn=end2endpipeline,
	inputs=gradio.inputs.Audio(source="upload", type="filepath"),
	outputs=gradio.outputs.Label(num_top_classes=5),
	**interface_options,
	)

	launch_options = {
	"enable_queue": True,
	"share": False,
	# thanks Alex for pointing this option to cache examples
	"cache_examples": True,
	}

	demo.launch(**launch_options)