{
2 |
"cells": [
3 |
4 |
"cell_type": "code",
5 |
"execution_count": 52,
6 |
"id": "c8f31382-77ac-47f8-bd3a-1c805b2d3e75",
7 |
"metadata": {},
8 |
"outputs": [],
9 |
"source": [
10 |
"import librosa\n",
11 |
"import soundfile\n",
12 |
"import os, glob, pickle\n",
13 |
"import numpy as np\n",
14 |
"from sklearn.model_selection import train_test_split\n",
15 |
"from sklearn.neural_network import MLPClassifier\n",
16 |
"from sklearn.metrics import accuracy_score"
17 |
18 |
19 |
20 |
"cell_type": "code",
21 |
"execution_count": 57,
22 |
"id": "b0510279-2195-4784-a52b-20b6c18e216c",
23 |
"metadata": {},
24 |
"outputs": [],
25 |
"source": [
26 |
"# Extract features (mfcc, chroma, mel) from a sound file\n",
27 |
"def extract_feature(file_name, mfcc, chroma, mel):\n",
28 |
" with soundfile.SoundFile(file_name) as sound_file:\n",
29 |
" X = sound_file.read(dtype=\"float32\")\n",
30 |
" sample_rate=sound_file.samplerate\n",
31 |
" if chroma:\n",
32 |
" stft=np.abs(librosa.stft(X))\n",
33 |
" result=np.array([])\n",
34 |
" if mfcc:\n",
35 |
" mfccs=np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)\n",
36 |
" result=np.hstack((result, mfccs))\n",
37 |
" if chroma:\n",
38 |
" chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)\n",
39 |
" result=np.hstack((result, chroma))\n",
40 |
" if mel:\n",
41 |
" mel=np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate).T,axis=0)\n",
42 |
" result=np.hstack((result, mel))\n",
43 |
" return result"
44 |
45 |
46 |
47 |
"cell_type": "code",
48 |
"execution_count": 58,
49 |
"id": "d84a7785-e5b3-44ee-b484-a45fe61aa2af",
50 |
"metadata": {},
51 |
"outputs": [],
52 |
"source": [
53 |
"#Emotions in the RAVDESS dataset\n",
54 |
55 |
" '01':'neutral',\n",
56 |
" '02':'calm',\n",
57 |
" '03':'happy',\n",
58 |
" '04':'sad',\n",
59 |
" '05':'angry',\n",
60 |
" '06':'fearful',\n",
61 |
" '07':'disgust',\n",
62 |
" '08':'surprised'\n",
63 |
64 |
65 |
"# Emotions to observe\n",
66 |
"observed_emotions=['calm', 'happy', 'fearful', 'disgust']"
67 |
68 |
69 |
70 |
"cell_type": "code",
71 |
"execution_count": 59,
72 |
"id": "5ebdbf11-1c7d-4bbf-9ff7-41b04cfbc902",
73 |
"metadata": {},
74 |
"outputs": [],
75 |
"source": [
76 |
"#Load the data and extract features for each sound file\n",
77 |
"def load_data(test_size=0.2):\n",
78 |
" x,y=[],[]\n",
79 |
" for file in glob.glob(\"C:\\\\Users\\\\Abhay\\\\Downloads\\\\dataset\\\\Actor_*\\\\*.wav\"):\n",
80 |
" file_name = os.path.basename(file)\n",
81 |
" emotion=emotions[file_name.split(\"-\")[2]]\n",
82 |
" if emotion not in observed_emotions:\n",
83 |
" continue\n",
84 |
" feature = extract_feature(file, mfcc=True, chroma=True, mel=True)\n",
85 |
" x.append(feature)\n",
86 |
" y.append(emotion)\n",
87 |
" return train_test_split(np.array(x), y, test_size=test_size, random_state=9)"
88 |
89 |
90 |
91 |
"cell_type": "code",
92 |
"execution_count": 61,
93 |
"id": "17e9421d-b474-4fc8-8321-435a2093c0cb",
94 |
"metadata": {},
95 |
"outputs": [],
96 |
"source": [
97 |
"#Split the dataset\n",
98 |
"x_train,x_test,y_train,y_test = load_data(test_size=0.25)"
99 |
100 |
101 |
102 |
"cell_type": "code",
103 |
"execution_count": 62,
104 |
"id": "eb1d0e4a-1766-4d3d-85ea-f69d88b6a007",
105 |
"metadata": {},
106 |
"outputs": [
107 |
108 |
"name": "stdout",
109 |
"output_type": "stream",
110 |
"text": [
111 |
(576, 192)
112 |
113 |
114 |
115 |
"source": [
116 |
"#Get the shape of the training and testing datasets\n",
117 |
"print((x_train.shape[0], x_test.shape[0]))"
118 |
119 |
120 |
121 |
"cell_type": "code",
122 |
"execution_count": 63,
123 |
"id": "5a765afc-663d-48c0-9dbd-d58caf9069cc",
124 |
"metadata": {},
125 |
"outputs": [
126 |
127 |
"name": "stdout",
128 |
"output_type": "stream",
129 |
"text": [
130 |
Features extracted: 180
131 |
132 |
133 |
134 |
"source": [
135 |
"# Get the number of features extracted\n",
136 |
"print(f'Features extracted: {x_train.shape[1]}')"
137 |
138 |
139 |
140 |
"cell_type": "code",
141 |
"execution_count": 64,
142 |
"id": "29c258f3-dbb6-4214-aea4-590487f5c68a",
143 |
"metadata": {},
144 |
"outputs": [],
145 |
"source": [
146 |
"#Initialize the Multi Layer Perceptron Classifier\n",
147 |
"model=MLPClassifier(alpha=0.01, batch_size=256, epsilon=1e-08, hidden_layer_sizes=(300,), learning_rate='adaptive', max_iter=500)"
148 |
149 |
150 |
151 |
"cell_type": "code",
152 |
"execution_count": 65,
153 |
"id": "76939a33-c7fb-4ee3-b25f-af609dd3a5ce",
154 |
"metadata": {},
155 |
"outputs": [
156 |
157 |
"data": {
158 |
"text/html": [
159 |
MLPClassifier(alpha=0.01, batch_size=256, hidden_layer_sizes=(300,),
 learning_rate='adaptive', max_iter=500)
160 |
" learning_rate='adaptive', max_iter=500)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">MLPClassifier</label><div class=\"sk-toggleable__content\"><pre>MLPClassifier(alpha=0.01, batch_size=256, hidden_layer_sizes=(300,),\n",
161 |
" learning_rate='adaptive', max_iter=500)</pre></div></div></div></div></div>"
162 |
163 |
"text/plain": [
164 |
"MLPClassifier(alpha=0.01, batch_size=256, hidden_layer_sizes=(300,),\n",
165 |
" learning_rate='adaptive', max_iter=500)"
166 |
167 |
168 |
"execution_count": 65,
169 |
"metadata": {},
170 |
"output_type": "execute_result"
171 |
172 |
173 |
"source": [
174 |
"#Train the model\n",
175 |
176 |
177 |
178 |
179 |
"cell_type": "code",
180 |
"execution_count": 66,
181 |
"id": "41976825-55d6-46eb-a389-eba2cacc540d",
182 |
"metadata": {},
183 |
"outputs": [],
184 |
"source": [
185 |
"# Predict for the test set\n",
186 |
187 |
188 |
189 |
190 |
"cell_type": "code",
191 |
"execution_count": 67,
192 |
"id": "2401ce73-6268-4751-9d68-3aa15f870f99",
193 |
"metadata": {},
194 |
"outputs": [
195 |
196 |
"name": "stdout",
197 |
"output_type": "stream",
198 |
"text": [
199 |
Accuracy: 66.67%
200 |
201 |
202 |
203 |
"source": [
204 |
"# Calculate the accuracy of our model\n",
205 |
"accuracy=accuracy_score(y_true=y_test, y_pred=y_pred)\n",
206 |
207 |
"# Print the accuracy\n",
208 |
"print(\"Accuracy: {:.2f}%\".format(accuracy*100))"
209 |
210 |
211 |
212 |
"cell_type": "code",
213 |
"execution_count": 68,
214 |
"id": "568ff907-2558-4f2b-bf4a-f10b889233cc",
215 |
"metadata": {},
216 |
"outputs": [],
217 |
"source": [
218 |
"with open('ser_model.pickle','wb') as f:\n",
219 |
" pickle.dump(model,f)"
220 |
221 |
222 |
223 |
"cell_type": "code",
224 |
"execution_count": 69,
225 |
"id": "19d865fc-504e-40dd-9822-a49ae0f3e568",
226 |
"metadata": {},
227 |
"outputs": [],
228 |
"source": [
229 |
"with open('ser_model.pickle','rb') as f:\n",
230 |
" mod = pickle.load(f)"
231 |
232 |
233 |
234 |
"cell_type": "code",
235 |
"execution_count": 71,
236 |
"id": "7cecff7e-060b-461d-a597-2b11ee731d97",
237 |
"metadata": {},
238 |
"outputs": [
239 |
240 |
"data": {
241 |
"text/plain": [
242 |
243 |
244 |
245 |
"execution_count": 71,
246 |
"metadata": {},
247 |
"output_type": "execute_result"
248 |
249 |
250 |
"source": [
251 |
252 |
253 |
254 |
255 |
"cell_type": "code",
256 |
"execution_count": null,
257 |
"id": "bd184951-3715-4256-86ae-20d00a17a57b",
258 |
"metadata": {},
259 |
"outputs": [],
260 |
"source": []
261 |
262 |
263 |
"metadata": {
264 |
"kernelspec": {
265 |
"display_name": "Python 3 (ipykernel)",
266 |
"language": "python",
267 |
"name": "python3"
268 |
269 |
"language_info": {
270 |
"codemirror_mode": {
271 |
"name": "ipython",
272 |
"version": 3
273 |
274 |
"file_extension": ".py",
275 |
"mimetype": "text/x-python",
276 |
"name": "python",
277 |
"nbconvert_exporter": "python",
278 |
"pygments_lexer": "ipython3",
279 |
"version": "3.8.16"
280 |
281 |
282 |
"nbformat": 4,
283 |
"nbformat_minor": 5
284 |
1 |
version https://git-lfs.github.com/spec/v1
2 |
oid sha256:682d2b3749ad1132a8e11d21cd7c77479dedbcae9367478cf8f634c6bdbe37c8
3 |
size 1338552
1 |
<!DOCTYPE html>
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
1 |
<!DOCTYPE html>
2 |
<html lang="en">
3 |
4 |
5 |
<meta charset="UTF-8">
6 |
<meta http-equiv="X-UA-Compatible" content="IE=edge">
7 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
8 |
<link rel="stylesheet" href="style.css">
9 |
10 |
11 |
12 |
13 |
<div class="cont">
14 |
<div class="body">
15 |
<h1> Speech Emotion Detection </h1>
16 |
<h2> Select a File from list to Predict Emotion</h2>
17 |
<form id="get_emotion" method="post">
18 |
<select name="file_name" id="file-sel" required>
19 |
<option value=""> -- Select file for Emotion Detection -- </option>
</select>
20 |
21 |
<div class="audio" id="audio"></div>
22 |
<button type="submit">Predict Emotion</button>
23 |
<textarea name="emotion" id="emotion" cols="5" rows="1" disabled placeholder="Predicted Emotion"></textarea>
</form>
</div>
</div>
24 |
25 |
26 |
27 |
28 |
29 |
<script src="https://code.jquery.com/jquery-3.6.4.min.js"
30 |
integrity="sha256-oP6HI9z1XaZNBrJURtCoUT5SUnxFr8s3BzRl+cbzUq8=" crossorigin="anonymous"></script>
<script>
31 |
32 |
33 |
34 |
function setFileNames(arr) {
35 |
file = document.getElementById("file-sel");
36 |
arr.forEach(element => {
37 |
opt_list = `<option value=${element}> ${element}</option>`
38 |
file.insertAdjacentHTML('beforeend', opt_list)
});
}
39 |
40 |
41 |
42 |
43 |
.then((response) => response.json())
44 |
45 |
46 |
document.forms['get_emotion'].addEventListener('submit', (event) => {
47 |
48 |
fetch('', {
49 |
method: 'POST',
50 |
body: new URLSearchParams(new FormData(event.target))
}).then((response) => {
51 |
}).then((response) => {
52 |
if (!response.ok) {
53 |
throw new Error(`HTTP error! Status: ${response.status}`);
}
54 |
55 |
return response.text();
56 |
}).then((body) => {
57 |
document.getElementById("emotion").innerText = ` ${body.toString()}`;
58 |
59 |
}).catch((error) => {
60 |
// TODO handle error
61 |
62 |
63 |
64 |
65 |
async function postData(url , data) {
66 |
const response = await fetch(url, {
67 |
method: 'POST',
68 |
headers: {
69 |
'Content-Type': 'application/json'
},
70 |
71 |
body: JSON.stringify(data)
});
72 |
73 |
return response.blob();
}
74 |
75 |
76 |
function load_sound_file(child){
77 |
postData('', { "file_name": this.value })
.then(blob => {
78 |
.then(blob => {
79 |
const audioURL = URL.createObjectURL(blob);
80 |
const audioElement = document.createElement('audio');
81 |
audioElement.src = audioURL;
82 |
audioElement.controls = true;
83 |
ad= document.getElementById("audio");
84 |
ad.innerHTML = "";
85 |
86 |
87 |
88 |
89 |
document.getElementById("file-sel").addEventListener('change', load_sound_file);
</script>
</body>
</html>
90 |
91 |
92 |
93 |
1 |
import requests
2 |
import util
3 |
import glob
4 |
import json
5 |
import os.path
6 |
import pickle
7 |
8 |
from flask import Flask, request, jsonify, send_file, send_from_directory
9 |
app = Flask(__name__)
10 |
11 |
white = ['']
12 |
13 |
def add_cors_headers(response):
14 |
15 |
r = request.referrer[:-1]
16 |
if r in white:
17 |
response.headers.add('Access-Control-Allow-Origin', r)
18 |
response.headers.add('Access-Control-Allow-Credentials', 'true')
19 |
response.headers.add('Access-Control-Allow-Headers', 'Content-Type')
20 |
response.headers.add('Access-Control-Allow-Headers', 'Cache-Control')
21 |
response.headers.add('Access-Control-Allow-Headers', 'X-Requested-With')
22 |
response.headers.add('Access-Control-Allow-Headers', 'Authorization')
23 |
response.headers.add('Access-Control-Allow-Methods', 'GET, POST, OPTIONS, PUT, DELETE')
return response
24 |
return response
25 |
26 |
27 |
28 |
def hello():
29 |
return "hi"
30 |
31 |
32 |
33 |
def get_file_names():
34 |
folder_path = util.__folder_path__ + "Actor_*\\*.wav"
35 |
file_names = []
36 |
for file in glob.glob(folder_path):
37 |
file_name = os.path.basename(file)
38 |
file_path = os.path.dirname(file).split('\\')[-1]
39 |
rel_file_name = file_path+"/"+file_name
40 |
41 |
return file_names
42 |
43 |
44 |
@app.route('/get_file', methods=['POST'])
45 |
def get_file():
46 |
file_name = request.json['file_name']
47 |
file_path = util.__folder_path__+file_name
48 |
return send_file(file_path)
49 |
50 |
51 |
@app.route('/get_emotion', methods=['POST'])
52 |
def get_emotion():
53 |
file_name = request.form['file_name']
54 |
file_path = util.__folder_path__+file_name
55 |
56 |
emotion = util.predict_emotion(file_path)
57 |
return emotion
58 |
59 |
60 |
if __name__ == "__main__":
61 |
62 |
# app.debug = True
63 |
64 |
65 |
1 |
body {
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
margin: 0 auto;
21 |
padding: 16px;
22 |
border: 1px solid lightgray;
23 |
border-radius: 16px;
24 |
25 |
26 |
27 |
28 |
1 |
body {
2 |
font-family: Arial, sans-serif;
3 |
margin: 0;
4 |
padding: 0;
5 |
text-align: center;
6 |
height: 100%;
7 |
8 |
9 |
form {
10 |
display: flex;
11 |
flex-direction: column;
12 |
align-items: left;
13 |
margin-top: 50px;
14 |
15 |
16 |
h2 {
17 |
text-align: left;
18 |
19 |
20 |
21 |
22 |
button {
23 |
font-size: 18px;
24 |
padding: 10px;
25 |
margin-bottom: 20px;
26 |
27 |
28 |
29 |
button {
30 |
width: 400px;
31 |
32 |
33 |
textarea {
34 |
width: 40%;
35 |
resize: none;
36 |
37 |
38 |
button {
39 |
background-color: rgba(26, 25, 25, 0.811);
40 |
color: white;
41 |
border: none;
42 |
cursor: pointer;
43 |
44 |
45 |
button:hover {
46 |
background-color: black;
47 |
48 |
49 |
.audio {
50 |
/* display: flex;
51 |
justify-content: center; */
52 |
text-align: left;
53 |
margin-left: 0%;
54 |
padding-left: 0%;
55 |
56 |
57 |
h1 {
58 |
font-size: 3rem;
59 |
text-align: center;
60 |
margin-top: 0;
61 |
62 |
63 |
option {
64 |
margin: 5px 0;
65 |
padding: 5px 10px !important;
66 |
67 |
68 |
.body {
69 |
margin: auto;
70 |
align-self: center !important;
71 |
margin: auto;
72 |
width: fit-content;
73 |
74 |
75 |
.cont {
76 |
77 |
display: flex;
78 |
justify-content: center;
79 |
height: 100vh;
80 |
1 |
import pickle
2 |
import numpy as np
3 |
import librosa
4 |
import soundfile
5 |
6 |
__model__ = None
7 |
__folder_path__ = "C:\\Users\\Abhay\\Downloads\\dataset\\"
8 |
9 |
10 |
def load_model():
11 |
with open("assets/ser_model.pickle", 'rb') as f:
12 |
model = pickle.load(f)
13 |
global __model__
14 |
__model__ = model
15 |
16 |
17 |
# Extract features (mfcc, chroma, mel) from a sound file
18 |
def extract_feature(file_name, mfcc, chroma, mel):
19 |
with soundfile.SoundFile(file_name) as sound_file:
20 |
X = sound_file.read(dtype="float32")
21 |
sample_rate = sound_file.samplerate
22 |
if chroma:
23 |
stft = np.abs(librosa.stft(X))
24 |
result = np.array([])
25 |
if mfcc:
26 |
mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
27 |
result = np.hstack((result, mfccs))
28 |
if chroma:
29 |
chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
30 |
result = np.hstack((result, chroma))
31 |
if mel:
32 |
mel = np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate).T, axis=0)
33 |
result = np.hstack((result, mel))
34 |
return result
35 |
36 |
def predict_emotion(file):
37 |
feature = extract_feature(file, mfcc=True, chroma=True, mel=True)
38 |
emo = __model__.predict([feature])
39 |
return emo[0]
40 |
41 |
42 |
43 |