Spaces:

miracle01
/

gbengaadewuyi

Sleeping

App Files Files Community

gbengaadewuyi / app.py

miracle01

Update app.py

4d55c37 verified 11 months ago

raw

history blame contribute delete

19.3 kB

	import numpy as np
	import streamlit as st
	import cv2
	import librosa
	import librosa.display
	from tensorflow.keras.models import load_model
	import os
	from datetime import datetime
	import streamlit.components.v1 as components
	import matplotlib.pyplot as plt
	from PIL import Image
	from melspec import plot_colored_polar, plot_melspec

	# load models
	model = load_model("model3.h5")

	# constants
	starttime = datetime.now()

	CAT6 = ['fear', 'angry', 'neutral', 'happy', 'sad', 'surprise']
	CAT7 = ['fear', 'disgust', 'neutral', 'happy', 'sad', 'surprise', 'angry']
	CAT3 = ["positive", "neutral", "negative"]

	COLOR_DICT = {"neutral": "grey",
	"positive": "green",
	"happy": "green",
	"surprise": "orange",
	"fear": "purple",
	"negative": "red",
	"angry": "red",
	"sad": "lightblue",
	"disgust": "brown"}

	TEST_CAT = ['fear', 'disgust', 'neutral', 'happy', 'sad', 'surprise', 'angry']
	TEST_PRED = np.array([.3, .3, .4, .1, .6, .9, .1])

	# page settings
	st.set_page_config(page_title="SER web-app", page_icon=":speech_balloon:", layout="wide")
	# COLOR = "#1f1f2e"
	# BACKGROUND_COLOR = "#d1d1e0"


	# @st.cache(hash_funcs={tf_agents.utils.object_identity.ObjectIdentityDictionary: load_model})
	# def load_model_cache(model):
	# return load_model(model)

	# @st.cache
	def log_file(txt=None):
	with open("log.txt", "a") as f:
	datetoday = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
	f.write(f"{txt} - {datetoday};\n")


	# @st.cache
	def save_audio(file):
	if file.size > 4000000:
	return 1
	# if not os.path.exists("audio"):
	# os.makedirs("audio")
	folder = "audio"
	datetoday = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
	# clear the folder to avoid storage overload
	for filename in os.listdir(folder):
	file_path = os.path.join(folder, filename)
	try:
	if os.path.isfile(file_path) or os.path.islink(file_path):
	os.unlink(file_path)
	except Exception as e:
	print('Failed to delete %s. Reason: %s' % (file_path, e))

	try:
	with open("log0.txt", "a") as f:
	f.write(f"{file.name} - {file.size} - {datetoday};\n")
	except:
	pass

	with open(os.path.join(folder, file.name), "wb") as f:
	f.write(file.getbuffer())
	return 0


	# @st.cache
	def get_melspec(audio):
	y, sr = librosa.load(audio, sr=44100)
	X = librosa.stft(y)
	Xdb = librosa.amplitude_to_db(abs(X))
	img = np.stack((Xdb,) * 3, -1)
	img = img.astype(np.uint8)
	grayImage = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	grayImage = cv2.resize(grayImage, (224, 224))
	rgbImage = np.repeat(grayImage[..., np.newaxis], 3, -1)
	return (rgbImage, Xdb)


	# @st.cache
	def get_mfccs(audio, limit):
	y, sr = librosa.load(audio)
	a = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
	if a.shape[1] > limit:
	mfccs = a[:, :limit]
	elif a.shape[1] < limit:
	mfccs = np.zeros((a.shape[0], limit))
	mfccs[:, :a.shape[1]] = a
	return mfccs


	@st.cache_data
	def get_title(predictions, categories=CAT6):
	title = f"Detected emotion: {categories[predictions.argmax()]} \
	- {predictions.max() * 100:.2f}%"
	return title


	@st.cache_data
	def color_dict(coldict=COLOR_DICT):
	return COLOR_DICT


	@st.cache_data
	def plot_polar(fig, predictions=TEST_PRED, categories=TEST_CAT,
	title="TEST", colors=COLOR_DICT):
	# color_sector = "grey"

	N = len(predictions)
	ind = predictions.argmax()

	COLOR = color_sector = colors[categories[ind]]
	theta = np.linspace(0.0, 2 * np.pi, N, endpoint=False)
	radii = np.zeros_like(predictions)
	radii[predictions.argmax()] = predictions.max() * 10
	width = np.pi / 1.8 * predictions
	fig.set_facecolor("#d1d1e0")
	ax = plt.subplot(111, polar="True")
	ax.bar(theta, radii, width=width, bottom=0.0, color=color_sector, alpha=0.25)

	angles = [i / float(N) * 2 * np.pi for i in range(N)]
	angles += angles[:1]

	data = list(predictions)
	data += data[:1]
	plt.polar(angles, data, color=COLOR, linewidth=2)
	plt.fill(angles, data, facecolor=COLOR, alpha=0.25)

	ax.spines['polar'].set_color('lightgrey')
	ax.set_theta_offset(np.pi / 3)
	ax.set_theta_direction(-1)
	plt.xticks(angles[:-1], categories)
	ax.set_rlabel_position(0)
	plt.yticks([0, .25, .5, .75, 1], color="grey", size=8)
	plt.suptitle(title, color="darkblue", size=12)
	plt.title(f"BIG {N}\n", color=COLOR)
	plt.ylim(0, 1)
	plt.subplots_adjust(top=0.75)


	def main():
	side_img = Image.open("images/emotion3.jpg")
	with st.sidebar:
	st.image(side_img, width=300)
	st.sidebar.subheader("Menu")
	website_menu = st.sidebar.selectbox("Menu", ("Emotion Recognition", "Project description"))
	st.set_option('deprecation.showfileUploaderEncoding', False)

	if website_menu == "Emotion Recognition":
	st.sidebar.subheader("Model")
	model_type = st.sidebar.selectbox("How would you like to predict?", ("mfccs", "mel-specs"))
	em3 = em6 = em7 = gender = False
	st.sidebar.subheader("Settings")

	st.markdown("## Upload the file")
	with st.container():
	col1, col2, col3 = st.columns(3)
	# audio_file = None
	# path = None
	with col1:
	audio_file = st.file_uploader("Upload audio file", type=['wav', 'mp3', 'ogg'])
	if audio_file is not None:
	if not os.path.exists("audio"):
	os.makedirs("audio")
	path = os.path.join("audio", audio_file.name)
	if_save_audio = save_audio(audio_file)
	if if_save_audio == 1:
	st.warning("File size is too large. Try another file.")
	elif if_save_audio == 0:
	# extract features
	# display audio
	st.audio(audio_file, format='audio/wav', start_time=0)
	try:
	wav, sr = librosa.load(path, sr=44100)
	Xdb = get_melspec(path)[1]
	mfccs = librosa.feature.mfcc(y=wav, sr=sr)
	# # display audio
	# st.audio(audio_file, format='audio/wav', start_time=0)
	except Exception as e:
	audio_file = None
	st.error(f"Error {e} - wrong format of the file. Try another .wav file.")
	else:
	st.error("Unknown error")
	else:
	if st.button("Try test file"):
	wav, sr = librosa.load("test.wav", sr=44100)
	Xdb = get_melspec("test.wav")[1]
	mfccs = librosa.feature.mfcc(y=wav, sr=sr)
	# display audio
	st.audio("test.wav", format='audio/wav', start_time=0)
	path = "test.wav"
	audio_file = "test"
	with col2:
	if audio_file is not None:
	fig = plt.figure(figsize=(10, 2))
	fig.set_facecolor('#d1d1e0')
	plt.title("Wave-form")
	librosa.display.waveshow(wav, sr=44100, color="blue")
	plt.gca().axes.get_yaxis().set_visible(False)
	plt.gca().axes.get_xaxis().set_visible(False)
	plt.gca().axes.spines["right"].set_visible(False)
	plt.gca().axes.spines["left"].set_visible(False)
	plt.gca().axes.spines["top"].set_visible(False)
	plt.gca().axes.spines["bottom"].set_visible(False)
	plt.gca().axes.set_facecolor('#d1d1e0')
	st.write(fig)
	else:
	pass
	# st.write("Record audio file")
	# if st.button('Record'):
	# with st.spinner(f'Recording for 5 seconds ....'):
	# st.write("Recording...")
	# time.sleep(3)
	# st.success("Recording completed")
	# st.write("Error while loading the file")
	with col3:
	st.title("Convert any MP3 audio file to .WAV")
	st.subheader("Convert audio file")

	link = '[File conversion]' \
	'(https://cloudconvert.com/mp3-to-wav)'
	st.markdown(link, unsafe_allow_html=True)


	if model_type == "mfccs":
	em3 = st.sidebar.checkbox("3 emotions", True)
	em6 = st.sidebar.checkbox("6 emotions", True)
	em7 = st.sidebar.checkbox("7 emotions")
	gender = st.sidebar.checkbox("gender")

	elif model_type == "mel-specs":
	st.sidebar.warning("This model is temporarily disabled")

	else:
	st.sidebar.warning("This model is temporarily disabled")

	# with st.sidebar.expander("Change colors"):
	# st.sidebar.write("Use this options after you got the plots")
	# col1, col2, col3, col4, col5, col6, col7 = st.columns(7)
	#
	# with col1:
	# a = st.color_picker("Angry", value="#FF0000")
	# with col2:
	# f = st.color_picker("Fear", value="#800080")
	# with col3:
	# d = st.color_picker("Disgust", value="#A52A2A")
	# with col4:
	# sd = st.color_picker("Sad", value="#ADD8E6")
	# with col5:
	# n = st.color_picker("Neutral", value="#808080")
	# with col6:
	# sp = st.color_picker("Surprise", value="#FFA500")
	# with col7:
	# h = st.color_picker("Happy", value="#008000")
	# if st.button("Update colors"):
	# global COLOR_DICT
	# COLOR_DICT = {"neutral": n,
	# "positive": h,
	# "happy": h,
	# "surprise": sp,
	# "fear": f,
	# "negative": a,
	# "angry": a,
	# "sad": sd,
	# "disgust": d}
	# st.success(COLOR_DICT)

	if audio_file is not None:
	st.markdown("## Analyzing...")
	if not audio_file == "test":
	st.sidebar.subheader("Audio file")
	file_details = {"Filename": audio_file.name, "FileSize": audio_file.size}
	st.sidebar.write(file_details)

	with st.container():
	col1, col2 = st.columns(2)
	with col1:
	fig = plt.figure(figsize=(10, 2))
	fig.set_facecolor('#d1d1e0')
	plt.title("MFCCs")
	librosa.display.specshow(mfccs, sr=sr, x_axis='time')
	plt.gca().axes.get_yaxis().set_visible(False)
	plt.gca().axes.spines["right"].set_visible(False)
	plt.gca().axes.spines["left"].set_visible(False)
	plt.gca().axes.spines["top"].set_visible(False)
	st.write(fig)
	with col2:
	fig2 = plt.figure(figsize=(10, 2))
	fig2.set_facecolor('#d1d1e0')
	plt.title("Mel-log-spectrogram")
	librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='hz')
	plt.gca().axes.get_yaxis().set_visible(False)
	plt.gca().axes.spines["right"].set_visible(False)
	plt.gca().axes.spines["left"].set_visible(False)
	plt.gca().axes.spines["top"].set_visible(False)
	st.write(fig2)

	if model_type == "mfccs":
	st.markdown("## Predictions")
	with st.container():
	col1, col2, col3, col4 = st.columns(4)
	mfccs = get_mfccs(path, model.input_shape[-1])
	mfccs = mfccs.reshape(1, *mfccs.shape)
	pred = model.predict(mfccs)[0]

	with col1:
	if em3:
	pos = pred[3] + pred[5] * .5
	neu = pred[2] + pred[5] * .5 + pred[4] * .5
	neg = pred[0] + pred[1] + pred[4] * .5
	data3 = np.array([pos, neu, neg])
	txt = "MFCCs\n" + get_title(data3, CAT3)
	fig = plt.figure(figsize=(5, 5))
	COLORS = color_dict(COLOR_DICT)
	plot_colored_polar(fig, predictions=data3, categories=CAT3,
	title=txt, colors=COLORS)
	# plot_polar(fig, predictions=data3, categories=CAT3,
	# title=txt, colors=COLORS)
	st.write(fig)
	with col2:
	if em6:
	txt = "MFCCs\n" + get_title(pred, CAT6)
	fig2 = plt.figure(figsize=(5, 5))
	COLORS = color_dict(COLOR_DICT)
	plot_colored_polar(fig2, predictions=pred, categories=CAT6,
	title=txt, colors=COLORS)
	# plot_polar(fig2, predictions=pred, categories=CAT6,
	# title=txt, colors=COLORS)
	st.write(fig2)
	with col3:
	if em7:
	model_ = load_model("model4.h5")
	mfccs_ = get_mfccs(path, model_.input_shape[-2])
	mfccs_ = mfccs_.T.reshape(1, *mfccs_.T.shape)
	pred_ = model_.predict(mfccs_)[0]
	txt = "MFCCs\n" + get_title(pred_, CAT7)
	fig3 = plt.figure(figsize=(5, 5))
	COLORS = color_dict(COLOR_DICT)
	plot_colored_polar(fig3, predictions=pred_, categories=CAT7,
	title=txt, colors=COLORS)
	# plot_polar(fig3, predictions=pred_, categories=CAT7,
	# title=txt, colors=COLORS)
	st.write(fig3)
	with col4:
	if gender:
	with st.spinner('Wait for it...'):
	gmodel = load_model("model_mw.h5")
	gmfccs = get_mfccs(path, gmodel.input_shape[-1])
	gmfccs = gmfccs.reshape(1, *gmfccs.shape)
	gpred = gmodel.predict(gmfccs)[0]
	gdict = [["female", "woman.png"], ["male", "man.png"]]
	ind = gpred.argmax()
	txt = "Predicted gender: " + gdict[ind][0]
	img = Image.open("images/" + gdict[ind][1])

	fig4 = plt.figure(figsize=(3, 3))
	fig4.set_facecolor('#d1d1e0')
	plt.title(txt)
	plt.imshow(img)
	plt.axis("off")
	st.write(fig4)

	# if model_type == "mel-specs":
	# st.markdown("## Predictions")
	# st.warning("The model in test mode. It may not be working properly.")
	# if st.checkbox("I'm OK with it"):
	# try:
	# with st.spinner("Wait... It can take some time"):
	# global tmodel
	# tmodel = load_model_cache("tmodel_all.h5")
	# fig, tpred = plot_melspec(path, tmodel)
	# col1, col2, col3 = st.columns(3)
	# with col1:
	# st.markdown("### Emotional spectrum")
	# dimg = Image.open("images/spectrum.png")
	# st.image(dimg, use_column_width=True)
	# with col2:
	# fig_, tpred_ = plot_melspec(path=path,
	# tmodel=tmodel,
	# three=True)
	# st.write(fig_, use_column_width=True)
	# with col3:
	# st.write(fig, use_column_width=True)
	# except Exception as e:
	# st.error(f"Error {e}, model is not loaded")


	elif website_menu == "Project description":
	import pandas as pd
	import plotly.express as px
	st.title("Project description")
	st.subheader("Student Details")
	txt = """
	Student information include;
	* Student Name: Adewuyi Gbenga Kolawole
	* Student Matric No: HNDCOM/22/035
	* Session: 2022/2023
	* Class: HND 2
	* Level: 400L

	This machine learning web-application PROJECT is a partial fulfillment of requirement in Higher National Diploma (HND) computer science The Federal College of Animal Health and Production Technology FCAHPTIB, 2023.
	"""
	st.markdown(txt, unsafe_allow_html=True)

	st.subheader("Theory")
	link = '[Theory behind - the project(emotion recognition) ]'
	st.markdown(link + ":clap::clap::clap:", unsafe_allow_html=True)
	with st.expander("See Wikipedia definition"):
	components.iframe("https://en.wikipedia.org/wiki/Emotion_recognition",
	height=320, scrolling=True)

	st.subheader("Dataset")
	txt = """

	Datasets used in this project
	* Crowd-sourced Emotional Mutimodal Actors Dataset (Crema-D) ("https://www.kaggle.com/code/ejlok1/audio-emotion-part-1-explore-data")
	* Ryerson Audio-Visual Database of Emotional Speech and Song (Ravdess) ("https://www.kaggle.com/datasets/uwrfkaggler/ravdess-emotional-speech-audio")
	* Surrey Audio-Visual Expressed Emotion (Savee) ("https://www.kaggle.com/datasets/ejlok1/surrey-audiovisual-expressed-emotion-savee")
	* Toronto emotional speech set (Tess)

	All datasets used can be found on Kaggle

	The above datasets was used in the model training of this software before deployment
	"""
	st.markdown(txt, unsafe_allow_html=True)

	df = pd.read_csv("df_audio.csv")
	fig = px.violin(df, y="source", x="emotion4", color="actors", box=True, points="all", hover_data=df.columns)
	st.plotly_chart(fig, use_container_width=True)

	else:
	pass


	if __name__ == '__main__':
	main()