Spaces:

abps
/

practicum

Running

App Files Files Community

abps commited on Apr 21, 2023

Commit

8701113

1 Parent(s): 9e37df5

Upload 6 files

Browse files

Files changed (6) hide show

assets/ser.ipynb +284 -0
assets/ser_model.pickle +3 -0
index.html +92 -18
server.py +65 -0
style.css +70 -18
util.py +43 -0

assets/ser.ipynb ADDED Viewed

	@@ -0,0 +1,284 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "id": "c8f31382-77ac-47f8-bd3a-1c805b2d3e75",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import librosa\n",
+    "import soundfile\n",
+    "import os, glob, pickle\n",
+    "import numpy as np\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.neural_network import MLPClassifier\n",
+    "from sklearn.metrics import accuracy_score"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "id": "b0510279-2195-4784-a52b-20b6c18e216c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Extract features (mfcc, chroma, mel) from a sound file\n",
+    "def extract_feature(file_name, mfcc, chroma, mel):\n",
+    "    with soundfile.SoundFile(file_name) as sound_file:\n",
+    "        X = sound_file.read(dtype=\"float32\")\n",
+    "        sample_rate=sound_file.samplerate\n",
+    "        if chroma:\n",
+    "            stft=np.abs(librosa.stft(X))\n",
+    "            result=np.array([])\n",
+    "        if mfcc:\n",
+    "            mfccs=np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)\n",
+    "            result=np.hstack((result, mfccs))\n",
+    "        if chroma:\n",
+    "            chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)\n",
+    "            result=np.hstack((result, chroma))\n",
+    "        if mel:\n",
+    "            mel=np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate).T,axis=0)\n",
+    "            result=np.hstack((result, mel))\n",
+    "    return result"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "id": "d84a7785-e5b3-44ee-b484-a45fe61aa2af",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Emotions in the RAVDESS dataset\n",
+    "emotions={\n",
+    "  '01':'neutral',\n",
+    "  '02':'calm',\n",
+    "  '03':'happy',\n",
+    "  '04':'sad',\n",
+    "  '05':'angry',\n",
+    "  '06':'fearful',\n",
+    "  '07':'disgust',\n",
+    "  '08':'surprised'\n",
+    "}\n",
+    "\n",
+    "# Emotions to observe\n",
+    "observed_emotions=['calm', 'happy', 'fearful', 'disgust']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "id": "5ebdbf11-1c7d-4bbf-9ff7-41b04cfbc902",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Load the data and extract features for each sound file\n",
+    "def load_data(test_size=0.2):\n",
+    "    x,y=[],[]\n",
+    "    for file in glob.glob(\"C:\\\\Users\\\\Abhay\\\\Downloads\\\\dataset\\\\Actor_*\\\\*.wav\"):\n",
+    "        file_name = os.path.basename(file)\n",
+    "        emotion=emotions[file_name.split(\"-\")[2]]\n",
+    "        if emotion not in observed_emotions:\n",
+    "            continue\n",
+    "        feature = extract_feature(file, mfcc=True, chroma=True, mel=True)\n",
+    "        x.append(feature)\n",
+    "        y.append(emotion)\n",
+    "    return train_test_split(np.array(x), y, test_size=test_size, random_state=9)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 61,
+   "id": "17e9421d-b474-4fc8-8321-435a2093c0cb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Split the dataset\n",
+    "x_train,x_test,y_train,y_test = load_data(test_size=0.25)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "id": "eb1d0e4a-1766-4d3d-85ea-f69d88b6a007",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(576, 192)\n"
+     ]
+    }
+   ],
+   "source": [
+    "#Get the shape of the training and testing datasets\n",
+    "print((x_train.shape[0], x_test.shape[0]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 63,
+   "id": "5a765afc-663d-48c0-9dbd-d58caf9069cc",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Features extracted: 180\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Get the number of features extracted\n",
+    "print(f'Features extracted: {x_train.shape[1]}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 64,
+   "id": "29c258f3-dbb6-4214-aea4-590487f5c68a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Initialize the Multi Layer Perceptron Classifier\n",
+    "model=MLPClassifier(alpha=0.01, batch_size=256, epsilon=1e-08, hidden_layer_sizes=(300,), learning_rate='adaptive', max_iter=500)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "id": "76939a33-c7fb-4ee3-b25f-af609dd3a5ce",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>MLPClassifier(alpha=0.01, batch_size=256, hidden_layer_sizes=(300,),\n",
+       "              learning_rate=&#x27;adaptive&#x27;, max_iter=500)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">MLPClassifier</label><div class=\"sk-toggleable__content\"><pre>MLPClassifier(alpha=0.01, batch_size=256, hidden_layer_sizes=(300,),\n",
+       "              learning_rate=&#x27;adaptive&#x27;, max_iter=500)</pre></div></div></div></div></div>"
+      ],
+      "text/plain": [
+       "MLPClassifier(alpha=0.01, batch_size=256, hidden_layer_sizes=(300,),\n",
+       "              learning_rate='adaptive', max_iter=500)"
+      ]
+     },
+     "execution_count": 65,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#Train the model\n",
+    "model.fit(x_train,y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 66,
+   "id": "41976825-55d6-46eb-a389-eba2cacc540d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Predict for the test set\n",
+    "y_pred=model.predict(x_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 67,
+   "id": "2401ce73-6268-4751-9d68-3aa15f870f99",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Accuracy: 66.67%\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Calculate the accuracy of our model\n",
+    "accuracy=accuracy_score(y_true=y_test, y_pred=y_pred)\n",
+    "\n",
+    "# Print the accuracy\n",
+    "print(\"Accuracy: {:.2f}%\".format(accuracy*100))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 68,
+   "id": "568ff907-2558-4f2b-bf4a-f10b889233cc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with  open('ser_model.pickle','wb') as f:\n",
+    "    pickle.dump(model,f)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 69,
+   "id": "19d865fc-504e-40dd-9822-a49ae0f3e568",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with  open('ser_model.pickle','rb') as f:\n",
+    "    mod = pickle.load(f)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 71,
+   "id": "7cecff7e-060b-461d-a597-2b11ee731d97",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.6666666666666666"
+      ]
+     },
+     "execution_count": 71,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "mod.score(x_test,y_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bd184951-3715-4256-86ae-20d00a17a57b",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.16"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

assets/ser_model.pickle ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:682d2b3749ad1132a8e11d21cd7c77479dedbcae9367478cf8f634c6bdbe37c8
+size 1338552

index.html CHANGED Viewed

@@ -1,19 +1,93 @@
 <!DOCTYPE html>
-<html>
-	<head>
-		<meta charset="utf-8" />
-		<meta name="viewport" content="width=device-width" />
-		<title>My static Space</title>
-		<link rel="stylesheet" href="style.css" />
-	</head>
-	<body>
-		<div class="card">
-			<h1>Welcome to your static Space!</h1>
-			<p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
-			<p>
-				Also don't forget to check the
-				<a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
-			</p>
-		</div>
-	</body>
-</html>

 <!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta http-equiv="X-UA-Compatible" content="IE=edge">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <link rel="stylesheet" href="style.css">
+    <title>Practicum-SER</title>
+</head>
+<body>
+    <div class="cont">
+        <div class="body">
+    <h1> Speech Emotion Detection  </h1>
+    <h2> Select a File from list to Predict Emotion</h2>
+    <form id="get_emotion" method="post">
+        <select name="file_name" id="file-sel" required>
+            <option value=""> -- Select file for Emotion Detection -- </option>
+        </select>
+    <div class="audio" id="audio"></div>
+    <button type="submit">Predict Emotion</button>
+    <textarea name="emotion" id="emotion" cols="5" rows="1" disabled placeholder="Predicted Emotion"></textarea>
+    </form>
+    </div>
+    </div>
+    <script src="https://code.jquery.com/jquery-3.6.4.min.js"
+        integrity="sha256-oP6HI9z1XaZNBrJURtCoUT5SUnxFr8s3BzRl+cbzUq8=" crossorigin="anonymous"></script>
+    </script>
+    <script>
+        function setFileNames(arr) {
+            file = document.getElementById("file-sel");
+            arr.forEach(element => {
+                opt_list = `<option value=${element}> ${element}</option>`
+                file.insertAdjacentHTML('beforeend', opt_list)
+            });
+        }
+        fetch("http://127.0.0.1:8000/files")
+            .then((response) => response.json())
+            .then(setFileNames);
+            document.forms['get_emotion'].addEventListener('submit', (event) => {
+                    event.preventDefault();
+                    fetch('http://127.0.0.1:8000/get_emotion', {
+                        method: 'POST',
+                        body: new URLSearchParams(new FormData(event.target))
+                    }).then((response) => {
+                        if (!response.ok) {
+                            throw new Error(`HTTP error! Status: ${response.status}`);
+                        }
+                        return response.text();
+                    }).then((body) => {
+                        document.getElementById("emotion").innerText = ` ${body.toString()}`;
+                    }).catch((error) => {
+                        // TODO handle error
+                        console.log(error);
+                    });
+                });
+                async function postData(url , data) {
+                        const response = await fetch(url, {
+                            method: 'POST',
+                            headers: {
+                                'Content-Type': 'application/json'
+                            },
+                            body: JSON.stringify(data)
+                        });
+                        return response.blob();
+                    }
+                    function load_sound_file(child){
+                        postData('http://127.0.0.1:8000/get_file', { "file_name": this.value })
+                            .then(blob => {
+                                const audioURL = URL.createObjectURL(blob);
+                                const audioElement = document.createElement('audio');
+                                audioElement.src = audioURL;
+                                audioElement.controls = true;
+                                ad= document.getElementById("audio");
+                                ad.innerHTML = "";
+                                ad.appendChild(audioElement);
+                            });
+                    }
+                document.getElementById("file-sel").addEventListener('change', load_sound_file);
+    </script>
+</body>
+</html>

server.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import requests
+import util
+import glob
+import json
+import os.path
+import pickle
+from flask import Flask, request, jsonify, send_file, send_from_directory
+app = Flask(__name__)
+white = ['http://127.0.0.1:5500']
+@app.after_request
+def add_cors_headers(response):
+    if(request.referrer):
+        r = request.referrer[:-1]
+        if r in white:
+            response.headers.add('Access-Control-Allow-Origin', r)
+            response.headers.add('Access-Control-Allow-Credentials', 'true')
+            response.headers.add('Access-Control-Allow-Headers', 'Content-Type')
+            response.headers.add('Access-Control-Allow-Headers', 'Cache-Control')
+            response.headers.add('Access-Control-Allow-Headers', 'X-Requested-With')
+            response.headers.add('Access-Control-Allow-Headers', 'Authorization')
+            response.headers.add('Access-Control-Allow-Methods', 'GET, POST, OPTIONS, PUT, DELETE')
+    return response
+@app.route('/h')
+def hello():
+    return "hi"
+@app.route('/files')
+def get_file_names():
+    folder_path = util.__folder_path__ + "Actor_*\\*.wav"
+    file_names = []
+    for file in glob.glob(folder_path):
+        file_name = os.path.basename(file)
+        file_path = os.path.dirname(file).split('\\')[-1]
+        rel_file_name = file_path+"/"+file_name
+        file_names.append(rel_file_name)
+    return file_names
+@app.route('/get_file', methods=['POST'])
+def get_file():
+    file_name = request.json['file_name']
+    file_path = util.__folder_path__+file_name
+    return send_file(file_path)
+@app.route('/get_emotion', methods=['POST'])
+def get_emotion():
+    file_name = request.form['file_name']
+    file_path = util.__folder_path__+file_name
+    util.load_model()
+    emotion = util.predict_emotion(file_path)
+    return emotion
+if __name__ == "__main__":
+    print("starting")
+    # app.debug = True
+    app.run(port=8000,debug=True)

style.css CHANGED Viewed

@@ -1,28 +1,80 @@
 body {
-	padding: 2rem;
-	font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
 }
-h1 {
-	font-size: 16px;
-	margin-top: 0;
 }
-p {
-	color: rgb(107, 114, 128);
-	font-size: 15px;
-	margin-bottom: 10px;
-	margin-top: 5px;
 }
-.card {
-	max-width: 620px;
-	margin: 0 auto;
-	padding: 16px;
-	border: 1px solid lightgray;
-	border-radius: 16px;
 }
-.card p:last-child {
-	margin-bottom: 0;
 }

 body {
+    font-family: Arial, sans-serif;
+    margin: 0;
+    padding: 0;
+    text-align: center;
+    height: 100%;
 }
+form {
+    display: flex;
+    flex-direction: column;
+    align-items: left;
+    margin-top: 50px;
+}
+h2 {
+    text-align: left;
+}
+select,
+textarea,
+button {
+    font-size: 18px;
+    padding: 10px;
+    margin-bottom: 20px;
+}
+select,
+button {
+    width: 400px;
+}
+textarea {
+    width: 40%;
+    resize: none;
 }
+button {
+    background-color: rgba(26, 25, 25, 0.811);
+    color: white;
+    border: none;
+    cursor: pointer;
 }
+button:hover {
+    background-color: black;
 }
+.audio {
+    /* display: flex;
+    justify-content: center; */
+    text-align: left;
+    margin-left: 0%;
+    padding-left: 0%;
 }
+h1 {
+    font-size: 3rem;
+    text-align: center;
+    margin-top: 0;
+}
+option {
+    margin: 5px 0;
+    padding: 5px 10px !important;
+}
+.body {
+    margin: auto;
+    align-self: center !important;
+    margin: auto;
+    width: fit-content;
+}
+.cont {
+    display: flex;
+    justify-content: center;
+    height: 100vh;
+}

util.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import pickle
+import numpy as np
+import librosa
+import soundfile
+__model__ = None
+__folder_path__ = "C:\\Users\\Abhay\\Downloads\\dataset\\"
+def load_model():
+    with open("assets/ser_model.pickle", 'rb') as f:
+        model = pickle.load(f)
+    global __model__
+    __model__ = model
+# Extract features (mfcc, chroma, mel) from a sound file
+def extract_feature(file_name, mfcc, chroma, mel):
+    with soundfile.SoundFile(file_name) as sound_file:
+        X = sound_file.read(dtype="float32")
+        sample_rate = sound_file.samplerate
+        if chroma:
+            stft = np.abs(librosa.stft(X))
+            result = np.array([])
+        if mfcc:
+            mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
+            result = np.hstack((result, mfccs))
+        if chroma:
+            chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
+            result = np.hstack((result, chroma))
+        if mel:
+            mel = np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate).T, axis=0)
+            result = np.hstack((result, mel))
+    return result
+def predict_emotion(file):
+    feature = extract_feature(file, mfcc=True, chroma=True, mel=True)
+    emo = __model__.predict([feature])
+    return emo[0]
+load_model()
+print(predict_emotion("C:\\Users\\Abhay\\Downloads\\dataset\\Actor_18\\03-01-03-01-02-02-18.wav"))