Spaces:

alvi123
/

demo_apps

Runtime error

App Files Files Community

demo_apps / app.py

alvi123

commit

c59d0a4 over 1 year ago

raw

history blame

3.15 kB

	import gradio as gr
	import librosa
	import matplotlib.pyplot as plt
	import plotly.express as px
	from radar_chart import radar_factory

	from keras.models import load_model
	import os
	import numpy as np



	model = load_model(os.path.join("model", "Emotion_Voice_Detection_Model_tuned_2.h5"))


	def convert_class_to_emotion(pred):
	"""
	Method to convert the predictions (int) into human readable strings.
	"""

	# label_conversion = {0: 'neutral',
	# 1: 'calm',
	# 2: 'happy',
	# 3: 'sad',
	# 4: 'angry',
	# 5: 'fearful',
	# 6: 'disgust',
	# 7: 'surprised'}

	label_conversion = {0: 'kata_sifat',
	1: 'kata_benda',
	2: 'kata_kerja',
	3: 'kata_keterangan}

	return label_conversion[int(pred)]


	def make_predictions(file, micro=None):
	"""
	Method to process the files and create your features.
	"""
	if file is not None and micro is None:
	input_audio = file
	elif file is None and micro is not None:
	input_audio = micro
	else:
	print("THERE IS A PROBLEM")
	input_audio = file

	data, sampling_rate = librosa.load(input_audio)
	print(data)
	print(f"THE SAMPLING RATE IS {sampling_rate}")
	mfccs = np.mean(librosa.feature.mfcc(y=data, sr=sampling_rate, n_mfcc=40).T, axis=0)
	x = np.expand_dims(mfccs, axis=1)
	x = np.expand_dims(x, axis=0)
	predictions = np.argmax(model.predict(x), axis=1)

	N = 8
	theta = radar_factory(N, frame='polygon')
	spoke_labels = np.array(['kata_benda',
	'kata_kerja',
	'kata_keterangan',
	'kata_sifat'])
	fig_radar, axs = plt.subplots(figsize=(8, 8), nrows=1, ncols=1,
	subplot_kw=dict(projection='radar'))
	vec = model.predict(x)[0]
	axs.plot(theta, vec, color="b")
	axs.fill(theta, vec, alpha=0.3)

	axs.set_varlabels(spoke_labels)

	fig = plt.figure()
	plt.plot(data, alpha=0.8)
	plt.xlabel("temps")
	plt.ylabel("amplitude")


	return convert_class_to_emotion(predictions), fig, fig_radar



	# Set the starting state to an empty string
	iface = gr.Interface(
	fn=make_predictions,
	title="identify emotion of a chunk of audio speech",
	description="a simple interface to perform emotion recognition from an audio file",
	article="Author: <a href=\"https://huggingface.co/poisso\">Poisso</a>.",
	inputs=[gr.Audio(source="upload", type="filepath", label="File"),
	gr.Audio(source="microphone", type="filepath", streaming=False, label="Microphone")]
	,
	examples=[[os.path.join("examples", filename)] for filename in os.listdir("examples")],
	outputs=[gr.Textbox(label="Text output"), gr.Plot(), gr.Plot()]
	)
	iface.launch(debug=True)