{ "cells": [ { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "edc5629a-00ee-47dc-a271-e42d01e4f94e", "showTitle": false, "title": "" } }, "outputs": [ { "output_type": "stream", "name": "stdout", "output_type": "stream", "text": [ "\u001B[43mNote: you may need to restart the kernel using dbutils.library.restartPython() to use updated packages.\u001B[0m\nLooking in indexes: https://aws:****@adi-dp-pypi-232090812308.d.codeartifact.ap-southeast-1.amazonaws.com/pypi/adi-dp-pypi/simple/\nCollecting git+https://github.com/openai/whisper.git\n Cloning https://github.com/openai/whisper.git to /tmp/pip-req-build-ou0jayhp\n Running command git clone --filter=blob:none --quiet https://github.com/openai/whisper.git /tmp/pip-req-build-ou0jayhp\n Resolved https://github.com/openai/whisper.git to commit e8622f9afc4eba139bf796c210f5c01081000472\n Installing build dependencies: started\n Installing build dependencies: finished with status 'done'\n Getting requirements to build wheel: started\n Getting requirements to build wheel: finished with status 'done'\n Preparing metadata (pyproject.toml): started\n Preparing metadata (pyproject.toml): finished with status 'done'\nRequirement already satisfied: numba in /databricks/python3/lib/python3.10/site-packages (from openai-whisper==20230314) (0.55.1)\nRequirement already satisfied: tqdm in /databricks/python3/lib/python3.10/site-packages (from openai-whisper==20230314) (4.64.1)\nRequirement already satisfied: triton==2.0.0 in /local_disk0/.ephemeral_nfs/envs/pythonEnv-92b71cb9-7d4b-4828-a60e-efe79e60c1e5/lib/python3.10/site-packages (from openai-whisper==20230314) (2.0.0)\nRequirement already satisfied: numpy in /databricks/python3/lib/python3.10/site-packages (from openai-whisper==20230314) (1.21.5)\nRequirement already satisfied: more-itertools in /usr/lib/python3/dist-packages (from openai-whisper==20230314) (8.10.0)\nRequirement already satisfied: torch in /databricks/python3/lib/python3.10/site-packages (from openai-whisper==20230314) (1.13.1+cu117)\nRequirement already satisfied: tiktoken==0.3.3 in /databricks/python3/lib/python3.10/site-packages (from openai-whisper==20230314) (0.3.3)\nRequirement already satisfied: requests>=2.26.0 in /databricks/python3/lib/python3.10/site-packages (from tiktoken==0.3.3->openai-whisper==20230314) (2.28.1)\nRequirement already satisfied: regex>=2022.1.18 in /databricks/python3/lib/python3.10/site-packages (from tiktoken==0.3.3->openai-whisper==20230314) (2022.7.9)\nRequirement already satisfied: cmake in /local_disk0/.ephemeral_nfs/envs/pythonEnv-92b71cb9-7d4b-4828-a60e-efe79e60c1e5/lib/python3.10/site-packages (from triton==2.0.0->openai-whisper==20230314) (3.27.2)\nRequirement already satisfied: filelock in /databricks/python3/lib/python3.10/site-packages (from triton==2.0.0->openai-whisper==20230314) (3.6.0)\nRequirement already satisfied: lit in /local_disk0/.ephemeral_nfs/envs/pythonEnv-92b71cb9-7d4b-4828-a60e-efe79e60c1e5/lib/python3.10/site-packages (from triton==2.0.0->openai-whisper==20230314) (16.0.6)\nRequirement already satisfied: setuptools in /databricks/python3/lib/python3.10/site-packages (from numba->openai-whisper==20230314) (63.4.1)\nRequirement already satisfied: llvmlite<0.39,>=0.38.0rc1 in /databricks/python3/lib/python3.10/site-packages (from numba->openai-whisper==20230314) (0.38.0)\nRequirement already satisfied: typing-extensions in /databricks/python3/lib/python3.10/site-packages (from torch->openai-whisper==20230314) (4.3.0)\nRequirement already satisfied: idna<4,>=2.5 in /databricks/python3/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken==0.3.3->openai-whisper==20230314) (3.3)\nRequirement already satisfied: urllib3<1.27,>=1.21.1 in /databricks/python3/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken==0.3.3->openai-whisper==20230314) (1.26.11)\nRequirement already satisfied: certifi>=2017.4.17 in /databricks/python3/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken==0.3.3->openai-whisper==20230314) (2022.9.14)\nRequirement already satisfied: charset-normalizer<3,>=2 in /databricks/python3/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken==0.3.3->openai-whisper==20230314) (2.0.4)\n\u001B[43mNote: you may need to restart the kernel using dbutils.library.restartPython() to use updated packages.\u001B[0m\n" ] } ], "source": [ "pip install git+https://github.com/openai/whisper.git" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "875ddbae-7375-49e2-8f0c-e80135b796c2", "showTitle": false, "title": "" } }, "outputs": [], "source": [ "import whisper" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "60477e79-5151-4316-b3e7-ad91b525cb2f", "showTitle": false, "title": "" } }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "device(type='cuda')" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import os\n", "import re\n", "import glob\n", "import librosa\n", "import librosa.display\n", "import IPython.display as ipd\n", "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "import numpy as np\n", "import soundfile\n", "from tqdm import tqdm\n", "from sklearn.preprocessing import StandardScaler,LabelEncoder\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.model_selection import StratifiedShuffleSplit\n", "from sklearn.compose import ColumnTransformer\n", "import tensorflow\n", "from sklearn.decomposition import PCA \n", "import seaborn as sns\n", "from PIL import Image\n", "from matplotlib import cm\n", "from collections import Counter\n", "from torchvision import datasets, transforms, models\n", "import torch.optim as optim\n", "\n", "from sklearn.utils.multiclass import unique_labels\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.model_selection import train_test_split, cross_val_score\n", "from sklearn.tree import DecisionTreeClassifier\n", "from sklearn.metrics import classification_report, confusion_matrix, accuracy_score,balanced_accuracy_score\n", "from sklearn.neural_network import MLPClassifier\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.ensemble import RandomForestClassifier\n", "# import lightgbm as lgb\n", "# import xgboost as xgb\n", "\n", "import tensorflow as tf\n", "from tensorflow import keras\n", "from tensorflow.keras.utils import to_categorical\n", "from keras.models import Model,load_model\n", "from keras.layers import Input,Dense,Flatten\n", "from keras.layers import Conv2D,MaxPooling2D,Dropout,BatchNormalization,Conv1D,MaxPooling1D\n", "from keras.models import load_model\n", "from sklearn.model_selection import StratifiedShuffleSplit\n", "import torchvision.transforms as transforms\n", "from torchvision.datasets import ImageFolder\n", "\n", "from datasets import load_dataset\n", "import os\n", "import pandas as pd\n", "#pd.set_option('display.max_colwidth', None)\n", "import logging\n", "import librosa\n", "import torch\n", "from tqdm import tqdm\n", "import numpy as np\n", "\n", "from datasets import DatasetDict, load_dataset, load_metric\n", "\n", "from transformers import (\n", " HubertForSequenceClassification,\n", " PretrainedConfig,\n", " Trainer,\n", " TrainingArguments,\n", " Wav2Vec2FeatureExtractor,\n", ")\n", "from torch.utils.data import DataLoader\n", "from torch.cuda.amp import GradScaler, autocast\n", "from transformers import AdamW\n", "from torch.nn.utils.rnn import pad_sequence\n", "import torch.nn as nn\n", "from transformers import Wav2Vec2Processor,AutoTokenizer\n", "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", "device\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "06a125f7-a71b-435c-b25a-95bb1b9aca0f", "showTitle": false, "title": "" } }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "['/dbfs/FileStore/wav-files/FinalExperiments/RAVDESS/Actor_01/03-01-01-01-01-01-01.wav',\n", " '/dbfs/FileStore/wav-files/FinalExperiments/RAVDESS/Actor_01/03-01-01-01-01-02-01.wav',\n", " '/dbfs/FileStore/wav-files/FinalExperiments/RAVDESS/Actor_01/03-01-01-01-02-01-01.wav',\n", " '/dbfs/FileStore/wav-files/FinalExperiments/RAVDESS/Actor_01/03-01-01-01-02-02-01.wav',\n", " '/dbfs/FileStore/wav-files/FinalExperiments/RAVDESS/Actor_01/03-01-02-01-01-01-01.wav']" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "directory_path = '/dbfs/FileStore/wav-files/FinalExperiments/RAVDESS/'\n", "directory_path_with_forward_slashes = directory_path.replace(\"\\\\\", \"/\")\n", "\n", "tempwavfiles = sorted(glob.glob(directory_path+'/**/*.wav', recursive=True))\n", "tempwavfiles[0:5]" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "6a719e56-2eed-446e-b867-5ecfc45468e5", "showTitle": false, "title": "" } }, "outputs": [], "source": [ "## Defining a general purpose function\n", "def get_file_name(link):\n", " '''\n", " General purpose function to get absolute filename from relative path\n", " Parameters: Filename with relative path\n", " Returns: Filename\n", " '''\n", " newPath = link.replace(os.sep, '/')\n", " filename = newPath.split('/')[::-1][0]\n", " return filename" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "47ff51f4-b07c-49a5-8597-45345fe11a42", "showTitle": false, "title": "" } }, "outputs": [], "source": [ "# Get labels from filename\n", "def get_label_from_filename(file_list):\n", " '''\n", " Function to extract label from the filename.\n", " Argument: Full file path.\n", " '''\n", " feeling_list=[]\n", " item_list = []\n", " for fullfilename in file_list:\n", " item = get_file_name(fullfilename)\n", " item_list.append(fullfilename)\n", " #print(f'item is {item}')\n", " if (item[6:-16]=='02' or item[6:-16]=='01'):\n", " feeling_list.append('calm')\n", " elif item[6:-16]=='03':\n", " feeling_list.append('happy')\n", " elif item[6:-16]=='04':\n", " feeling_list.append('sad')\n", " elif item[6:-16]=='05':\n", " feeling_list.append('angry')\n", " elif item[6:-16]=='06':\n", " feeling_list.append('fearful')\n", " elif item[6:-16]=='07':\n", " feeling_list.append('disgust')\n", " elif item[6:-16]=='08':\n", " feeling_list.append('surprised') \n", " return feeling_list,item_list" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "30d054bb-ce6e-43ca-afb8-a325400e8871", "showTitle": false, "title": "" } }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "{'angry', 'calm', 'disgust', 'fearful', 'happy', 'sad', 'surprised'}" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "set(get_label_from_filename(tempwavfiles)[0])" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "afa454c9-2cb7-4aa9-806d-32cfe24b7c61", "showTitle": false, "title": "" } }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "2880" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(get_label_from_filename(tempwavfiles)[1])" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "d091fdee-78c6-48d6-ba94-546e85f81dd3", "showTitle": false, "title": "" } }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "2880" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(get_label_from_filename(tempwavfiles)[0])" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "5ad4c5c3-2911-4d28-b720-71b431e63acb", "showTitle": false, "title": "" } }, "outputs": [], "source": [ "df_ravdess = pd.DataFrame({'Filenames':get_label_from_filename(tempwavfiles)[1],'Emotions':get_label_from_filename(tempwavfiles)[0],'Dataset':['RAVDESS']*len(get_label_from_filename(tempwavfiles)[0])})" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "1090a657-b53a-4b55-be6d-5e696b30aa95", "showTitle": false, "title": "" } }, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FilenamesEmotionsDataset
0/dbfs/FileStore/wav-files/FinalExperiments/RAV...calmRAVDESS
1/dbfs/FileStore/wav-files/FinalExperiments/RAV...calmRAVDESS
2/dbfs/FileStore/wav-files/FinalExperiments/RAV...calmRAVDESS
3/dbfs/FileStore/wav-files/FinalExperiments/RAV...calmRAVDESS
4/dbfs/FileStore/wav-files/FinalExperiments/RAV...calmRAVDESS
............
2875/dbfs/FileStore/wav-files/FinalExperiments/RAV...surprisedRAVDESS
2876/dbfs/FileStore/wav-files/FinalExperiments/RAV...surprisedRAVDESS
2877/dbfs/FileStore/wav-files/FinalExperiments/RAV...surprisedRAVDESS
2878/dbfs/FileStore/wav-files/FinalExperiments/RAV...surprisedRAVDESS
2879/dbfs/FileStore/wav-files/FinalExperiments/RAV...surprisedRAVDESS
\n", "

2880 rows × 3 columns

\n", "
" ], "text/plain": [ " Filenames Emotions Dataset\n", "0 /dbfs/FileStore/wav-files/FinalExperiments/RAV... calm RAVDESS\n", "1 /dbfs/FileStore/wav-files/FinalExperiments/RAV... calm RAVDESS\n", "2 /dbfs/FileStore/wav-files/FinalExperiments/RAV... calm RAVDESS\n", "3 /dbfs/FileStore/wav-files/FinalExperiments/RAV... calm RAVDESS\n", "4 /dbfs/FileStore/wav-files/FinalExperiments/RAV... calm RAVDESS\n", "... ... ... ...\n", "2875 /dbfs/FileStore/wav-files/FinalExperiments/RAV... surprised RAVDESS\n", "2876 /dbfs/FileStore/wav-files/FinalExperiments/RAV... surprised RAVDESS\n", "2877 /dbfs/FileStore/wav-files/FinalExperiments/RAV... surprised RAVDESS\n", "2878 /dbfs/FileStore/wav-files/FinalExperiments/RAV... surprised RAVDESS\n", "2879 /dbfs/FileStore/wav-files/FinalExperiments/RAV... surprised RAVDESS\n", "\n", "[2880 rows x 3 columns]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_ravdess" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "e7fc694c-0244-46ea-8a28-20626d5497da", "showTitle": false, "title": "" } }, "outputs": [ { "output_type": "stream", "name": "stdout", "output_type": "stream", "text": [ "\u001B[43mNote: you may need to restart the kernel using dbutils.library.restartPython() to use updated packages.\u001B[0m\nLooking in indexes: https://aws:****@adi-dp-pypi-232090812308.d.codeartifact.ap-southeast-1.amazonaws.com/pypi/adi-dp-pypi/simple/\nCollecting git+https://github.com/openai/whisper.git\n Cloning https://github.com/openai/whisper.git to /tmp/pip-req-build-9qdu9iba\n Running command git clone --filter=blob:none --quiet https://github.com/openai/whisper.git /tmp/pip-req-build-9qdu9iba\n Resolved https://github.com/openai/whisper.git to commit e8622f9afc4eba139bf796c210f5c01081000472\n Installing build dependencies: started\n Installing build dependencies: finished with status 'done'\n Getting requirements to build wheel: started\n Getting requirements to build wheel: finished with status 'done'\n Preparing metadata (pyproject.toml): started\n Preparing metadata (pyproject.toml): finished with status 'done'\nRequirement already satisfied: more-itertools in /usr/lib/python3/dist-packages (from openai-whisper==20230314) (8.10.0)\nRequirement already satisfied: torch in /databricks/python3/lib/python3.10/site-packages (from openai-whisper==20230314) (1.13.1+cu117)\nCollecting triton==2.0.0\n Downloading https://adi-dp-pypi-232090812308.d.codeartifact.ap-southeast-1.amazonaws.com/pypi/adi-dp-pypi/simple/triton/2.0.0/triton-2.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (63.3 MB)\n ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 63.3/63.3 MB 15.7 MB/s eta 0:00:00\nRequirement already satisfied: numba in /databricks/python3/lib/python3.10/site-packages (from openai-whisper==20230314) (0.55.1)\nRequirement already satisfied: numpy in /databricks/python3/lib/python3.10/site-packages (from openai-whisper==20230314) (1.21.5)\nRequirement already satisfied: tqdm in /databricks/python3/lib/python3.10/site-packages (from openai-whisper==20230314) (4.64.1)\nRequirement already satisfied: tiktoken==0.3.3 in /databricks/python3/lib/python3.10/site-packages (from openai-whisper==20230314) (0.3.3)\nRequirement already satisfied: requests>=2.26.0 in /databricks/python3/lib/python3.10/site-packages (from tiktoken==0.3.3->openai-whisper==20230314) (2.28.1)\nRequirement already satisfied: regex>=2022.1.18 in /databricks/python3/lib/python3.10/site-packages (from tiktoken==0.3.3->openai-whisper==20230314) (2022.7.9)\nRequirement already satisfied: filelock in /databricks/python3/lib/python3.10/site-packages (from triton==2.0.0->openai-whisper==20230314) (3.6.0)\nCollecting cmake\n Downloading https://adi-dp-pypi-232090812308.d.codeartifact.ap-southeast-1.amazonaws.com/pypi/adi-dp-pypi/simple/cmake/3.27.2/cmake-3.27.2-py2.py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (26.1 MB)\n ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 26.1/26.1 MB 33.2 MB/s eta 0:00:00\nCollecting lit\n Downloading https://adi-dp-pypi-232090812308.d.codeartifact.ap-southeast-1.amazonaws.com/pypi/adi-dp-pypi/simple/lit/16.0.6/lit-16.0.6.tar.gz (153 kB)\n ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 153.7/153.7 kB 28.5 MB/s eta 0:00:00\n Installing build dependencies: started\n Installing build dependencies: finished with status 'done'\n Getting requirements to build wheel: started\n Getting requirements to build wheel: finished with status 'done'\n Installing backend dependencies: started\n Installing backend dependencies: finished with status 'done'\n Preparing metadata (pyproject.toml): started\n Preparing metadata (pyproject.toml): finished with status 'done'\nRequirement already satisfied: llvmlite<0.39,>=0.38.0rc1 in /databricks/python3/lib/python3.10/site-packages (from numba->openai-whisper==20230314) (0.38.0)\nRequirement already satisfied: setuptools in /databricks/python3/lib/python3.10/site-packages (from numba->openai-whisper==20230314) (63.4.1)\nRequirement already satisfied: typing-extensions in /databricks/python3/lib/python3.10/site-packages (from torch->openai-whisper==20230314) (4.3.0)\nRequirement already satisfied: idna<4,>=2.5 in /databricks/python3/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken==0.3.3->openai-whisper==20230314) (3.3)\nRequirement already satisfied: charset-normalizer<3,>=2 in /databricks/python3/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken==0.3.3->openai-whisper==20230314) (2.0.4)\nRequirement already satisfied: urllib3<1.27,>=1.21.1 in /databricks/python3/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken==0.3.3->openai-whisper==20230314) (1.26.11)\nRequirement already satisfied: certifi>=2017.4.17 in /databricks/python3/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken==0.3.3->openai-whisper==20230314) (2022.9.14)\nBuilding wheels for collected packages: openai-whisper, lit\n Building wheel for openai-whisper (pyproject.toml): started\n Building wheel for openai-whisper (pyproject.toml): finished with status 'done'\n Created wheel for openai-whisper: filename=openai_whisper-20230314-py3-none-any.whl size=798395 sha256=0cb784f67ad93c8c9a11770d000a7395190446567eac4267417af48b91103c11\n Stored in directory: /tmp/pip-ephem-wheel-cache-_k32lioi/wheels/8b/6c/d0/622666868c179f156cf595c8b6f06f88bc5d80c4b31dccaa03\n Building wheel for lit (pyproject.toml): started\n Building wheel for lit (pyproject.toml): finished with status 'done'\n Created wheel for lit: filename=lit-16.0.6-py3-none-any.whl size=93589 sha256=47e926de3cf93bfce2b42fb10fb061d63e6012d0850673629b1bec50278c6585\n Stored in directory: /root/.cache/pip/wheels/d6/07/d6/1cd2e698633b5d603ab2aa3137ff42349a51edf71da7a50bb5\nSuccessfully built openai-whisper lit\nInstalling collected packages: lit, cmake, triton, openai-whisper\nSuccessfully installed cmake-3.27.2 lit-16.0.6 openai-whisper-20230314 triton-2.0.0\n\u001B[43mNote: you may need to restart the kernel using dbutils.library.restartPython() to use updated packages.\u001B[0m\n" ] } ], "source": [] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "22d5d79d-efa4-4dbc-ad0d-d06610474eed", "showTitle": false, "title": "" } }, "outputs": [ { "output_type": "stream", "name": "stderr", "output_type": "stream", "text": [ "\r 0%| | 0.00/139M [00:00\n", " .ansiout {\n", " display: block;\n", " unicode-bidi: embed;\n", " white-space: pre-wrap;\n", " word-wrap: break-word;\n", " word-break: break-all;\n", " font-family: \"Source Code Pro\", \"Menlo\", monospace;;\n", " font-size: 13px;\n", " color: #555;\n", " margin-left: 4px;\n", " line-height: 19px;\n", " }\n", "" ] }, "metadata": { "application/vnd.databricks.v1+output": { "arguments": {}, "data": "", "errorSummary": "Cancelled", "errorTraceType": "html", "metadata": {}, "type": "ipynbError" } }, "output_type": "display_data" } ], "source": [ "from tqdm import tqdm\n", "tqdm.pandas()\n", "df_ravdess['Transcription'] = df_ravdess['Filenames'].progress_apply(lambda x: speechtoText(x))" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "3fd0f459-02db-4fc0-99e6-9df1706732f5", "showTitle": false, "title": "" } }, "outputs": [ { "output_type": "display_data", "data": { "text/html": [ "" ] }, "metadata": { "application/vnd.databricks.v1+output": { "arguments": {}, "data": "", "errorSummary": "Cancelled", "errorTraceType": "html", "metadata": {}, "type": "ipynbError" } }, "output_type": "display_data" } ], "source": [ "destination_dir = '/dbfs/FileStore/wav-files/FinalExperiments/RAVDESS/files'\n", "for f in tempwavfiles:\n", " shutil.copy(f, destination_dir)" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "7d1a4fb1-3d28-4d9b-ba8b-2bd542fe3a40", "showTitle": false, "title": "" } }, "outputs": [ { "output_type": "display_data", "data": { "text/html": [ "" ] }, "metadata": { "application/vnd.databricks.v1+output": { "arguments": {}, "data": "", "errorSummary": "Cancelled", "errorTraceType": "html", "metadata": {}, "type": "ipynbError" } }, "output_type": "display_data" } ], "source": [ "df_ravdess" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "39f86b83-8984-43dc-bfcc-1892d4d929ea", "showTitle": false, "title": "" } }, "outputs": [ { "output_type": "display_data", "data": { "text/html": [ "" ] }, "metadata": { "application/vnd.databricks.v1+output": { "arguments": {}, "data": "", "errorSummary": "Cancelled", "errorTraceType": "html", "metadata": {}, "type": "ipynbError" } }, "output_type": "display_data" } ], "source": [ "df_ravdess.to_csv('/dbfs/FileStore/wav-files/FinalExperiments/RAVDESS.csv',index=False)" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "1f3d687e-1c98-4f80-a3c9-b7844b725878", "showTitle": false, "title": "" } }, "outputs": [], "source": [ "df_ravdess= pd.read_csv('/dbfs/FileStore/wav-files/FinalExperiments/RAVDESS.csv')" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "ef693eb2-8e05-4088-be22-b9f925a3f5f2", "showTitle": false, "title": "" } }, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FilenamesEmotionsDatasetTranscriptionDataBricksPathFilenameOnly
0/dbfs/FileStore/wav-files/FinalExperiments/RAV...calmRAVDESSKids are talking by the door./dbfs/FileStore/wav-files/FinalExperiments/RAV...03-01-01-01-01-01-01.wav
1/dbfs/FileStore/wav-files/FinalExperiments/RAV...calmRAVDESSKids are talking by the door./dbfs/FileStore/wav-files/FinalExperiments/RAV...03-01-01-01-01-02-01.wav
2/dbfs/FileStore/wav-files/FinalExperiments/RAV...calmRAVDESSDogs are sitting by the door./dbfs/FileStore/wav-files/FinalExperiments/RAV...03-01-01-01-02-01-01.wav
3/dbfs/FileStore/wav-files/FinalExperiments/RAV...calmRAVDESSDogs are sitting by the door./dbfs/FileStore/wav-files/FinalExperiments/RAV...03-01-01-01-02-02-01.wav
4/dbfs/FileStore/wav-files/FinalExperiments/RAV...calmRAVDESSKids, talk about it./dbfs/FileStore/wav-files/FinalExperiments/RAV...03-01-02-01-01-01-01.wav
.....................
1435/dbfs/FileStore/wav-files/FinalExperiments/RAV...surprisedRAVDESSDogs are sitting by the door./dbfs/FileStore/wav-files/FinalExperiments/RAV...03-01-08-01-02-02-24.wav
1436/dbfs/FileStore/wav-files/FinalExperiments/RAV...surprisedRAVDESSKids are talking by the door./dbfs/FileStore/wav-files/FinalExperiments/RAV...03-01-08-02-01-01-24.wav
1437/dbfs/FileStore/wav-files/FinalExperiments/RAV...surprisedRAVDESSKids are talking by the door?/dbfs/FileStore/wav-files/FinalExperiments/RAV...03-01-08-02-01-02-24.wav
1438/dbfs/FileStore/wav-files/FinalExperiments/RAV...surprisedRAVDESSDogs are sitting by the door./dbfs/FileStore/wav-files/FinalExperiments/RAV...03-01-08-02-02-01-24.wav
1439/dbfs/FileStore/wav-files/FinalExperiments/RAV...surprisedRAVDESSDogs are sitting by the door./dbfs/FileStore/wav-files/FinalExperiments/RAV...03-01-08-02-02-02-24.wav
\n", "

1440 rows × 6 columns

\n", "
" ], "text/plain": [ " Filenames ... FilenameOnly\n", "0 /dbfs/FileStore/wav-files/FinalExperiments/RAV... ... 03-01-01-01-01-01-01.wav\n", "1 /dbfs/FileStore/wav-files/FinalExperiments/RAV... ... 03-01-01-01-01-02-01.wav\n", "2 /dbfs/FileStore/wav-files/FinalExperiments/RAV... ... 03-01-01-01-02-01-01.wav\n", "3 /dbfs/FileStore/wav-files/FinalExperiments/RAV... ... 03-01-01-01-02-02-01.wav\n", "4 /dbfs/FileStore/wav-files/FinalExperiments/RAV... ... 03-01-02-01-01-01-01.wav\n", "... ... ... ...\n", "1435 /dbfs/FileStore/wav-files/FinalExperiments/RAV... ... 03-01-08-01-02-02-24.wav\n", "1436 /dbfs/FileStore/wav-files/FinalExperiments/RAV... ... 03-01-08-02-01-01-24.wav\n", "1437 /dbfs/FileStore/wav-files/FinalExperiments/RAV... ... 03-01-08-02-01-02-24.wav\n", "1438 /dbfs/FileStore/wav-files/FinalExperiments/RAV... ... 03-01-08-02-02-01-24.wav\n", "1439 /dbfs/FileStore/wav-files/FinalExperiments/RAV... ... 03-01-08-02-02-02-24.wav\n", "\n", "[1440 rows x 6 columns]" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_ravdess" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "52db22c3-863c-4a57-98cf-0b937253fd9e", "showTitle": false, "title": "" } }, "outputs": [ { "output_type": "stream", "name": "stdout", "output_type": "stream", "text": [ "/dbfs/FileStore/wav-files/FinalExperiments/FinalExperiments/Augmented-HubertModel7Epochs\n" ] }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "af814cdd4d644fdf9739414a2a7f779b", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading (…)rocessor_config.json: 0%| | 0.00/213 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FilenamesEmotionsDatasetTranscriptionDataBricksPathFilenameOnly
940/dbfs/FileStore/wav-files/FinalExperiments/RAV...fearfulRAVDESSKids are talking by the door!/dbfs/FileStore/wav-files/FinalExperiments/RAV...03-01-06-02-01-01-16.wav
941/dbfs/FileStore/wav-files/FinalExperiments/RAV...fearfulRAVDESSKids are talking by the door!/dbfs/FileStore/wav-files/FinalExperiments/RAV...03-01-06-02-01-02-16.wav
942/dbfs/FileStore/wav-files/FinalExperiments/RAV...fearfulRAVDESSDogs are sitting by the door!/dbfs/FileStore/wav-files/FinalExperiments/RAV...03-01-06-02-02-01-16.wav
943/dbfs/FileStore/wav-files/FinalExperiments/RAV...fearfulRAVDESSDogs are sitting by the door!/dbfs/FileStore/wav-files/FinalExperiments/RAV...03-01-06-02-02-02-16.wav
944/dbfs/FileStore/wav-files/FinalExperiments/RAV...disgustRAVDESSKids are talking by the door./dbfs/FileStore/wav-files/FinalExperiments/RAV...03-01-07-01-01-01-16.wav
.....................
999/dbfs/FileStore/wav-files/FinalExperiments/RAV...fearfulRAVDESSDogs are sitting by the door!/dbfs/FileStore/wav-files/FinalExperiments/RAV...03-01-06-01-02-02-17.wav
1000/dbfs/FileStore/wav-files/FinalExperiments/RAV...fearfulRAVDESSKids are talking by the door!/dbfs/FileStore/wav-files/FinalExperiments/RAV...03-01-06-02-01-01-17.wav
1001/dbfs/FileStore/wav-files/FinalExperiments/RAV...fearfulRAVDESSKids are talking by the door!/dbfs/FileStore/wav-files/FinalExperiments/RAV...03-01-06-02-01-02-17.wav
1002/dbfs/FileStore/wav-files/FinalExperiments/RAV...fearfulRAVDESSDogs are sitting by the door!/dbfs/FileStore/wav-files/FinalExperiments/RAV...03-01-06-02-02-01-17.wav
1003/dbfs/FileStore/wav-files/FinalExperiments/RAV...fearfulRAVDESSDogs are sitting by the door./dbfs/FileStore/wav-files/FinalExperiments/RAV...03-01-06-02-02-02-17.wav
\n", "

64 rows × 6 columns

\n", "" ], "text/plain": [ " Filenames ... FilenameOnly\n", "940 /dbfs/FileStore/wav-files/FinalExperiments/RAV... ... 03-01-06-02-01-01-16.wav\n", "941 /dbfs/FileStore/wav-files/FinalExperiments/RAV... ... 03-01-06-02-01-02-16.wav\n", "942 /dbfs/FileStore/wav-files/FinalExperiments/RAV... ... 03-01-06-02-02-01-16.wav\n", "943 /dbfs/FileStore/wav-files/FinalExperiments/RAV... ... 03-01-06-02-02-02-16.wav\n", "944 /dbfs/FileStore/wav-files/FinalExperiments/RAV... ... 03-01-07-01-01-01-16.wav\n", "... ... ... ...\n", "999 /dbfs/FileStore/wav-files/FinalExperiments/RAV... ... 03-01-06-01-02-02-17.wav\n", "1000 /dbfs/FileStore/wav-files/FinalExperiments/RAV... ... 03-01-06-02-01-01-17.wav\n", "1001 /dbfs/FileStore/wav-files/FinalExperiments/RAV... ... 03-01-06-02-01-02-17.wav\n", "1002 /dbfs/FileStore/wav-files/FinalExperiments/RAV... ... 03-01-06-02-02-01-17.wav\n", "1003 /dbfs/FileStore/wav-files/FinalExperiments/RAV... ... 03-01-06-02-02-02-17.wav\n", "\n", "[64 rows x 6 columns]" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_ravdess.iloc[940:1004]\n", "#/dbfs/FileStore/wav-files/FinalExperiments/RAVDESS/files/03-01-06-02-01-02-16.wav\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "533383c3-47aa-4a4b-a973-ef5f93614d58", "showTitle": false, "title": "" } }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "array([ 1.4403500e-05, 2.7266444e-05, 3.2287273e-05, ...,\n", " -9.8497458e-06, 5.6316446e-07, -1.8365588e-05], dtype=float32)" ] }, "execution_count": 91, "metadata": {}, "output_type": "execute_result" } ], "source": [ "librosa.load('/dbfs/FileStore/wav-files/FinalExperiments/RAVDESS/files/03-01-01-01-02-01-01.wav',sr=16000, mono=False)[0]" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "b7b0c798-c99e-49eb-ac63-40343569692c", "showTitle": false, "title": "" } }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "array([-5.6843419e-14, -1.7053026e-13, 0.0000000e+00, ...,\n", " -1.1841621e-09, 9.6542863e-10, 0.0000000e+00], dtype=float32)" ] }, "execution_count": 92, "metadata": {}, "output_type": "execute_result" } ], "source": [ "librosa.load('/dbfs/FileStore/wav-files/FinalExperiments/RAVDESS/files/03-01-02-01-01-02-01.wav', sr=16000, mono=True)[0]" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "2143e624-b43b-490a-82a4-5100754e5e34", "showTitle": false, "title": "" } }, "outputs": [ { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9a6bec1dd5a24a78a30f8e9a5cd0f066", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Map: 0%| | 0/1440 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FilenamesEmotionsDataset
0/dbfs/FileStore/wav-files/FinalExperiments/SAV...aSAVEE
1/dbfs/FileStore/wav-files/FinalExperiments/SAV...aSAVEE
2/dbfs/FileStore/wav-files/FinalExperiments/SAV...aSAVEE
3/dbfs/FileStore/wav-files/FinalExperiments/SAV...aSAVEE
4/dbfs/FileStore/wav-files/FinalExperiments/SAV...aSAVEE
............
475/dbfs/FileStore/wav-files/FinalExperiments/SAV...suSAVEE
476/dbfs/FileStore/wav-files/FinalExperiments/SAV...suSAVEE
477/dbfs/FileStore/wav-files/FinalExperiments/SAV...suSAVEE
478/dbfs/FileStore/wav-files/FinalExperiments/SAV...suSAVEE
479/dbfs/FileStore/wav-files/FinalExperiments/SAV...suSAVEE
\n", "

480 rows × 3 columns

\n", "" ], "text/plain": [ " Filenames Emotions Dataset\n", "0 /dbfs/FileStore/wav-files/FinalExperiments/SAV... a SAVEE\n", "1 /dbfs/FileStore/wav-files/FinalExperiments/SAV... a SAVEE\n", "2 /dbfs/FileStore/wav-files/FinalExperiments/SAV... a SAVEE\n", "3 /dbfs/FileStore/wav-files/FinalExperiments/SAV... a SAVEE\n", "4 /dbfs/FileStore/wav-files/FinalExperiments/SAV... a SAVEE\n", ".. ... ... ...\n", "475 /dbfs/FileStore/wav-files/FinalExperiments/SAV... su SAVEE\n", "476 /dbfs/FileStore/wav-files/FinalExperiments/SAV... su SAVEE\n", "477 /dbfs/FileStore/wav-files/FinalExperiments/SAV... su SAVEE\n", "478 /dbfs/FileStore/wav-files/FinalExperiments/SAV... su SAVEE\n", "479 /dbfs/FileStore/wav-files/FinalExperiments/SAV... su SAVEE\n", "\n", "[480 rows x 3 columns]" ] }, "execution_count": 129, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_SAVEE" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "eabe4277-1023-457e-9b4e-c73a75c75324", "showTitle": false, "title": "" } }, "outputs": [], "source": [ "## Standardizing the label to be consistent across datasets!\n", "emo_dict = {\n", " 'fru':'angry',\n", " 'a':'angry',\n", " 'ANG':'angry',\n", " 'ANG': 'angry',\n", " 'neu':'calm',\n", " 'n':'calm',\n", " 'NEU':'calm',\n", " 'neutral':'calm',\n", " 'exc':'happy',\n", " 'h':'happy',\n", " 'HAP':'happy',\n", " 'sad':'sad',\n", " 'SAD':'sad',\n", " 'sa':'sad',\n", " 'ang':'angry',\n", " 'hap':'happy',\n", " 'sur':'surprised',\n", " 'su':'surprised',\n", " 'ps':'surprised',\n", " 'fea':'fearful',\n", " 'fear':'fearful',\n", " 'f':'fearful',\n", " 'FEA':'fearful',\n", " 'dis':'disgust',\n", " 'DIS':'disgust',\n", " 'd':'disgust'\n", " }" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "a42d8b7f-bf7b-4f54-aa52-6bf11a7c1d38", "showTitle": false, "title": "" } }, "outputs": [], "source": [ "df_SAVEE['Emotions'] = df_SAVEE['Emotions'].apply(lambda x: emo_dict[x] if x in emo_dict else x)\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "7f608ee4-09eb-45b0-859e-13ebe0f29179", "showTitle": false, "title": "" } }, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FilenamesEmotionsDataset
0/dbfs/FileStore/wav-files/FinalExperiments/SAV...angrySAVEE
1/dbfs/FileStore/wav-files/FinalExperiments/SAV...angrySAVEE
2/dbfs/FileStore/wav-files/FinalExperiments/SAV...angrySAVEE
3/dbfs/FileStore/wav-files/FinalExperiments/SAV...angrySAVEE
4/dbfs/FileStore/wav-files/FinalExperiments/SAV...angrySAVEE
............
475/dbfs/FileStore/wav-files/FinalExperiments/SAV...surprisedSAVEE
476/dbfs/FileStore/wav-files/FinalExperiments/SAV...surprisedSAVEE
477/dbfs/FileStore/wav-files/FinalExperiments/SAV...surprisedSAVEE
478/dbfs/FileStore/wav-files/FinalExperiments/SAV...surprisedSAVEE
479/dbfs/FileStore/wav-files/FinalExperiments/SAV...surprisedSAVEE
\n", "

480 rows × 3 columns

\n", "
" ], "text/plain": [ " Filenames Emotions Dataset\n", "0 /dbfs/FileStore/wav-files/FinalExperiments/SAV... angry SAVEE\n", "1 /dbfs/FileStore/wav-files/FinalExperiments/SAV... angry SAVEE\n", "2 /dbfs/FileStore/wav-files/FinalExperiments/SAV... angry SAVEE\n", "3 /dbfs/FileStore/wav-files/FinalExperiments/SAV... angry SAVEE\n", "4 /dbfs/FileStore/wav-files/FinalExperiments/SAV... angry SAVEE\n", ".. ... ... ...\n", "475 /dbfs/FileStore/wav-files/FinalExperiments/SAV... surprised SAVEE\n", "476 /dbfs/FileStore/wav-files/FinalExperiments/SAV... surprised SAVEE\n", "477 /dbfs/FileStore/wav-files/FinalExperiments/SAV... surprised SAVEE\n", "478 /dbfs/FileStore/wav-files/FinalExperiments/SAV... surprised SAVEE\n", "479 /dbfs/FileStore/wav-files/FinalExperiments/SAV... surprised SAVEE\n", "\n", "[480 rows x 3 columns]" ] }, "execution_count": 133, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_SAVEE" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "677eaa93-b32f-4022-b00c-f00f3c847586", "showTitle": false, "title": "" } }, "outputs": [], "source": [ "df_SAVEE.to_csv('/dbfs/FileStore/wav-files/FinalExperiments/SAVEE/SAVEE_Baseline.csv',index=False)" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "96844ae9-ffd1-4149-9738-ed44b80da7a2", "showTitle": false, "title": "" } }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "'/dbfs/FileStore/wav-files/FinalExperiments/SAVEE/ALL/DC_a01.wav'" ] }, "execution_count": 125, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_SAVEE.Filenames.iloc[0]" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "959edaae-8195-4235-aee1-c5a3eef660d0", "showTitle": false, "title": "" } }, "outputs": [ { "output_type": "stream", "name": "stderr", "output_type": "stream", "text": [ "\r 0%| | 0/480 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FilenamesEmotionsDatasetTranscriptionFilenameOnly
0/dbfs/FileStore/wav-files/FinalExperiments/SAV...angrySAVEEShall you dark suit in greasy wash water all ...DC_a01.wav
1/dbfs/FileStore/wav-files/FinalExperiments/SAV...angrySAVEEDon't ask me to carry an oily rag like that.DC_a02.wav
2/dbfs/FileStore/wav-files/FinalExperiments/SAV...angrySAVEEWill you tell me why?DC_a03.wav
3/dbfs/FileStore/wav-files/FinalExperiments/SAV...angrySAVEEWho authorised your limited expense account?DC_a04.wav
4/dbfs/FileStore/wav-files/FinalExperiments/SAV...angrySAVEEDestroy every file related to my audits.DC_a05.wav
..................
475/dbfs/FileStore/wav-files/FinalExperiments/SAV...surprisedSAVEESalvation reconsidered.KL_su11.wav
476/dbfs/FileStore/wav-files/FinalExperiments/SAV...surprisedSAVEEproperly used the prison book as an excellent...KL_su12.wav
477/dbfs/FileStore/wav-files/FinalExperiments/SAV...surprisedSAVEELighting windows glowed dual bright through t...KL_su13.wav
478/dbfs/FileStore/wav-files/FinalExperiments/SAV...surprisedSAVEEBut this doesn't distract from its merit as a...KL_su14.wav
479/dbfs/FileStore/wav-files/FinalExperiments/SAV...surprisedSAVEEHe further proposed grant of an unsubesified ...KL_su15.wav
\n", "

480 rows × 5 columns

\n", "" ], "text/plain": [ " Filenames ... FilenameOnly\n", "0 /dbfs/FileStore/wav-files/FinalExperiments/SAV... ... DC_a01.wav\n", "1 /dbfs/FileStore/wav-files/FinalExperiments/SAV... ... DC_a02.wav\n", "2 /dbfs/FileStore/wav-files/FinalExperiments/SAV... ... DC_a03.wav\n", "3 /dbfs/FileStore/wav-files/FinalExperiments/SAV... ... DC_a04.wav\n", "4 /dbfs/FileStore/wav-files/FinalExperiments/SAV... ... DC_a05.wav\n", ".. ... ... ...\n", "475 /dbfs/FileStore/wav-files/FinalExperiments/SAV... ... KL_su11.wav\n", "476 /dbfs/FileStore/wav-files/FinalExperiments/SAV... ... KL_su12.wav\n", "477 /dbfs/FileStore/wav-files/FinalExperiments/SAV... ... KL_su13.wav\n", "478 /dbfs/FileStore/wav-files/FinalExperiments/SAV... ... KL_su14.wav\n", "479 /dbfs/FileStore/wav-files/FinalExperiments/SAV... ... KL_su15.wav\n", "\n", "[480 rows x 5 columns]" ] }, "execution_count": 158, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_SAVEE" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "c45a8b62-25e3-4c8b-ba9b-41c63f6c43ee", "showTitle": false, "title": "" } }, "outputs": [], "source": [ "df_SAVEE.to_csv('/dbfs/FileStore/wav-files/FinalExperiments/SAVEE_Baseline.csv',index=False)" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "4aecb8d1-5bbe-4438-ad03-cb70ca111105", "showTitle": false, "title": "" } }, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FilenamesEmotionsDatasetTranscriptionFilenameOnly
0/dbfs/FileStore/wav-files/FinalExperiments/SAV...angrySAVEEShall you dark suit in greasy wash water all ...DC_a01.wav
1/dbfs/FileStore/wav-files/FinalExperiments/SAV...angrySAVEEDon't ask me to carry an oily rag like that.DC_a02.wav
2/dbfs/FileStore/wav-files/FinalExperiments/SAV...angrySAVEEWill you tell me why?DC_a03.wav
3/dbfs/FileStore/wav-files/FinalExperiments/SAV...angrySAVEEWho authorised your limited expense account?DC_a04.wav
4/dbfs/FileStore/wav-files/FinalExperiments/SAV...angrySAVEEDestroy every file related to my audits.DC_a05.wav
..................
475/dbfs/FileStore/wav-files/FinalExperiments/SAV...surprisedSAVEESalvation reconsidered.KL_su11.wav
476/dbfs/FileStore/wav-files/FinalExperiments/SAV...surprisedSAVEEproperly used the prison book as an excellent...KL_su12.wav
477/dbfs/FileStore/wav-files/FinalExperiments/SAV...surprisedSAVEELighting windows glowed dual bright through t...KL_su13.wav
478/dbfs/FileStore/wav-files/FinalExperiments/SAV...surprisedSAVEEBut this doesn't distract from its merit as a...KL_su14.wav
479/dbfs/FileStore/wav-files/FinalExperiments/SAV...surprisedSAVEEHe further proposed grant of an unsubesified ...KL_su15.wav
\n", "

480 rows × 5 columns

\n", "
" ], "text/plain": [ " Filenames ... FilenameOnly\n", "0 /dbfs/FileStore/wav-files/FinalExperiments/SAV... ... DC_a01.wav\n", "1 /dbfs/FileStore/wav-files/FinalExperiments/SAV... ... DC_a02.wav\n", "2 /dbfs/FileStore/wav-files/FinalExperiments/SAV... ... DC_a03.wav\n", "3 /dbfs/FileStore/wav-files/FinalExperiments/SAV... ... DC_a04.wav\n", "4 /dbfs/FileStore/wav-files/FinalExperiments/SAV... ... DC_a05.wav\n", ".. ... ... ...\n", "475 /dbfs/FileStore/wav-files/FinalExperiments/SAV... ... KL_su11.wav\n", "476 /dbfs/FileStore/wav-files/FinalExperiments/SAV... ... KL_su12.wav\n", "477 /dbfs/FileStore/wav-files/FinalExperiments/SAV... ... KL_su13.wav\n", "478 /dbfs/FileStore/wav-files/FinalExperiments/SAV... ... KL_su14.wav\n", "479 /dbfs/FileStore/wav-files/FinalExperiments/SAV... ... KL_su15.wav\n", "\n", "[480 rows x 5 columns]" ] }, "execution_count": 160, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_csv('/dbfs/FileStore/wav-files/FinalExperiments/SAVEE_Baseline.csv')\n", "df" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "2e8793cd-fb7a-4e6f-96b3-b263f298176a", "showTitle": false, "title": "" } }, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FilenamesEmotionsDatasetTranscriptionFilenameOnly
0/dbfs/FileStore/wav-files/FinalExperiments/SAV...angrySAVEEShall you dark suit in greasy wash water all ...DC_a01.wav
1/dbfs/FileStore/wav-files/FinalExperiments/SAV...angrySAVEEDon't ask me to carry an oily rag like that.DC_a02.wav
2/dbfs/FileStore/wav-files/FinalExperiments/SAV...angrySAVEEWill you tell me why?DC_a03.wav
3/dbfs/FileStore/wav-files/FinalExperiments/SAV...angrySAVEEWho authorised your limited expense account?DC_a04.wav
4/dbfs/FileStore/wav-files/FinalExperiments/SAV...angrySAVEEDestroy every file related to my audits.DC_a05.wav
..................
475/dbfs/FileStore/wav-files/FinalExperiments/SAV...surprisedSAVEESalvation reconsidered.KL_su11.wav
476/dbfs/FileStore/wav-files/FinalExperiments/SAV...surprisedSAVEEproperly used the prison book as an excellent...KL_su12.wav
477/dbfs/FileStore/wav-files/FinalExperiments/SAV...surprisedSAVEELighting windows glowed dual bright through t...KL_su13.wav
478/dbfs/FileStore/wav-files/FinalExperiments/SAV...surprisedSAVEEBut this doesn't distract from its merit as a...KL_su14.wav
479/dbfs/FileStore/wav-files/FinalExperiments/SAV...surprisedSAVEEHe further proposed grant of an unsubesified ...KL_su15.wav
\n", "

480 rows × 5 columns

\n", "
" ], "text/plain": [ " Filenames ... FilenameOnly\n", "0 /dbfs/FileStore/wav-files/FinalExperiments/SAV... ... DC_a01.wav\n", "1 /dbfs/FileStore/wav-files/FinalExperiments/SAV... ... DC_a02.wav\n", "2 /dbfs/FileStore/wav-files/FinalExperiments/SAV... ... DC_a03.wav\n", "3 /dbfs/FileStore/wav-files/FinalExperiments/SAV... ... DC_a04.wav\n", "4 /dbfs/FileStore/wav-files/FinalExperiments/SAV... ... DC_a05.wav\n", ".. ... ... ...\n", "475 /dbfs/FileStore/wav-files/FinalExperiments/SAV... ... KL_su11.wav\n", "476 /dbfs/FileStore/wav-files/FinalExperiments/SAV... ... KL_su12.wav\n", "477 /dbfs/FileStore/wav-files/FinalExperiments/SAV... ... KL_su13.wav\n", "478 /dbfs/FileStore/wav-files/FinalExperiments/SAV... ... KL_su14.wav\n", "479 /dbfs/FileStore/wav-files/FinalExperiments/SAV... ... KL_su15.wav\n", "\n", "[480 rows x 5 columns]" ] }, "execution_count": 156, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_SAVEE" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "1e4b2eaa-19fc-4497-ade2-b4114fa56288", "showTitle": false, "title": "" } }, "outputs": [ { "output_type": "stream", "name": "stdout", "output_type": "stream", "text": [ "Index(['Filenames', 'Emotions', 'Dataset', 'Transcription', 'FilenameOnly'], dtype='object')\nDownloading and preparing dataset savee_benchmarking/clean to /dbfs/FileStore/wav-files/FinalExperiments/cached_emo/savee_benchmarking/clean-9b66cbcbd5068a74/0.0.0/0ec8621888fd47f93241a3889ecc6d354cf9cc628e2460507333c36c695eb8c1...\n" ] }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "3b0def8309634df684c0e2514e64d099", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Generating train split: 0 examples [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "output_type": "stream", "name": "stdout", "output_type": "stream", "text": [ "files is /dbfs/FileStore/wav-files/FinalExperiments\n\naudio dir is /dbfs/FileStore/wav-files/FinalExperiments/SAVEE/\n\nDataset savee_benchmarking downloaded and prepared to /dbfs/FileStore/wav-files/FinalExperiments/cached_emo/savee_benchmarking/clean-9b66cbcbd5068a74/0.0.0/0ec8621888fd47f93241a3889ecc6d354cf9cc628e2460507333c36c695eb8c1. Subsequent calls will reuse this data.\n" ] }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c6340bcbcc704ea9876e952676b7d823", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/1 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FilenamesEmotionsDataset
0/dbfs/FileStore/wav-files/FinalExperiments/TES...angryTESS
1/dbfs/FileStore/wav-files/FinalExperiments/TES...disgustTESS
2/dbfs/FileStore/wav-files/FinalExperiments/TES...fearTESS
3/dbfs/FileStore/wav-files/FinalExperiments/TES...happyTESS
4/dbfs/FileStore/wav-files/FinalExperiments/TES...neutralTESS
............
2795/dbfs/FileStore/wav-files/FinalExperiments/TES...fearTESS
2796/dbfs/FileStore/wav-files/FinalExperiments/TES...happyTESS
2797/dbfs/FileStore/wav-files/FinalExperiments/TES...neutralTESS
2798/dbfs/FileStore/wav-files/FinalExperiments/TES...psTESS
2799/dbfs/FileStore/wav-files/FinalExperiments/TES...sadTESS
\n", "

2800 rows × 3 columns

\n", "" ], "text/plain": [ " Filenames Emotions Dataset\n", "0 /dbfs/FileStore/wav-files/FinalExperiments/TES... angry TESS\n", "1 /dbfs/FileStore/wav-files/FinalExperiments/TES... disgust TESS\n", "2 /dbfs/FileStore/wav-files/FinalExperiments/TES... fear TESS\n", "3 /dbfs/FileStore/wav-files/FinalExperiments/TES... happy TESS\n", "4 /dbfs/FileStore/wav-files/FinalExperiments/TES... neutral TESS\n", "... ... ... ...\n", "2795 /dbfs/FileStore/wav-files/FinalExperiments/TES... fear TESS\n", "2796 /dbfs/FileStore/wav-files/FinalExperiments/TES... happy TESS\n", "2797 /dbfs/FileStore/wav-files/FinalExperiments/TES... neutral TESS\n", "2798 /dbfs/FileStore/wav-files/FinalExperiments/TES... ps TESS\n", "2799 /dbfs/FileStore/wav-files/FinalExperiments/TES... sad TESS\n", "\n", "[2800 rows x 3 columns]" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_TESS" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "867e41cf-b0ef-4779-9c12-457c8993686f", "showTitle": false, "title": "" } }, "outputs": [], "source": [ "## Standardizing the label to be consistent across datasets!\n", "emo_dict = {\n", " 'fru':'angry',\n", " 'a':'angry',\n", " 'ANG':'angry',\n", " 'ANG': 'angry',\n", " 'neu':'calm',\n", " 'n':'calm',\n", " 'NEU':'calm',\n", " 'exc':'happy',\n", " 'h':'happy',\n", " 'HAP':'happy',\n", " 'sad':'sad',\n", " 'SAD':'sad',\n", " 'sa':'sad',\n", " 'ang':'angry',\n", " 'hap':'happy',\n", " 'sur':'surprised',\n", " 'su':'surprised',\n", " 'ps':'surprised',\n", " 'fea':'fearful',\n", " 'fear':'fearful',\n", " 'f':'fearful',\n", " 'FEA':'fearful',\n", " 'dis':'disgust',\n", " 'DIS':'disgust',\n", " 'd':'disgust'\n", " }" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "3c42e386-e508-4ace-aee9-edec032dc964", "showTitle": false, "title": "" } }, "outputs": [], "source": [ "df_TESS['Emotions'] = df_TESS['Emotions'].apply(lambda x: emo_dict[x] if x in emo_dict else x)" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "7f7029d4-bdd5-4bb3-b4d1-e23dedce3097", "showTitle": false, "title": "" } }, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FilenamesEmotionsDataset
0/dbfs/FileStore/wav-files/FinalExperiments/TES...angryTESS
1/dbfs/FileStore/wav-files/FinalExperiments/TES...disgustTESS
2/dbfs/FileStore/wav-files/FinalExperiments/TES...fearfulTESS
3/dbfs/FileStore/wav-files/FinalExperiments/TES...happyTESS
4/dbfs/FileStore/wav-files/FinalExperiments/TES...neutralTESS
............
2795/dbfs/FileStore/wav-files/FinalExperiments/TES...fearfulTESS
2796/dbfs/FileStore/wav-files/FinalExperiments/TES...happyTESS
2797/dbfs/FileStore/wav-files/FinalExperiments/TES...neutralTESS
2798/dbfs/FileStore/wav-files/FinalExperiments/TES...surprisedTESS
2799/dbfs/FileStore/wav-files/FinalExperiments/TES...sadTESS
\n", "

2800 rows × 3 columns

\n", "
" ], "text/plain": [ " Filenames Emotions Dataset\n", "0 /dbfs/FileStore/wav-files/FinalExperiments/TES... angry TESS\n", "1 /dbfs/FileStore/wav-files/FinalExperiments/TES... disgust TESS\n", "2 /dbfs/FileStore/wav-files/FinalExperiments/TES... fearful TESS\n", "3 /dbfs/FileStore/wav-files/FinalExperiments/TES... happy TESS\n", "4 /dbfs/FileStore/wav-files/FinalExperiments/TES... neutral TESS\n", "... ... ... ...\n", "2795 /dbfs/FileStore/wav-files/FinalExperiments/TES... fearful TESS\n", "2796 /dbfs/FileStore/wav-files/FinalExperiments/TES... happy TESS\n", "2797 /dbfs/FileStore/wav-files/FinalExperiments/TES... neutral TESS\n", "2798 /dbfs/FileStore/wav-files/FinalExperiments/TES... surprised TESS\n", "2799 /dbfs/FileStore/wav-files/FinalExperiments/TES... sad TESS\n", "\n", "[2800 rows x 3 columns]" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_TESS" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "280669bb-a3d9-4e78-9a4a-f4d5ac2bd3bb", "showTitle": false, "title": "" } }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "Filenames False\n", "Emotions False\n", "Dataset False\n", "dtype: bool" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_TESS.isnull().any()" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "de5b7de5-5eb2-4df1-94ed-ccd0e345f343", "showTitle": false, "title": "" } }, "outputs": [], "source": [ "df_TESS = df_TESS[df_TESS.Emotions!='neutral']" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "da6aaf6d-2d7b-4b2b-9370-5b1f73f98cfc", "showTitle": false, "title": "" } }, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FilenamesEmotionsDataset
0/dbfs/FileStore/wav-files/FinalExperiments/TES...angryTESS
1/dbfs/FileStore/wav-files/FinalExperiments/TES...disgustTESS
2/dbfs/FileStore/wav-files/FinalExperiments/TES...fearfulTESS
3/dbfs/FileStore/wav-files/FinalExperiments/TES...happyTESS
5/dbfs/FileStore/wav-files/FinalExperiments/TES...surprisedTESS
............
2794/dbfs/FileStore/wav-files/FinalExperiments/TES...disgustTESS
2795/dbfs/FileStore/wav-files/FinalExperiments/TES...fearfulTESS
2796/dbfs/FileStore/wav-files/FinalExperiments/TES...happyTESS
2798/dbfs/FileStore/wav-files/FinalExperiments/TES...surprisedTESS
2799/dbfs/FileStore/wav-files/FinalExperiments/TES...sadTESS
\n", "

2400 rows × 3 columns

\n", "
" ], "text/plain": [ " Filenames Emotions Dataset\n", "0 /dbfs/FileStore/wav-files/FinalExperiments/TES... angry TESS\n", "1 /dbfs/FileStore/wav-files/FinalExperiments/TES... disgust TESS\n", "2 /dbfs/FileStore/wav-files/FinalExperiments/TES... fearful TESS\n", "3 /dbfs/FileStore/wav-files/FinalExperiments/TES... happy TESS\n", "5 /dbfs/FileStore/wav-files/FinalExperiments/TES... surprised TESS\n", "... ... ... ...\n", "2794 /dbfs/FileStore/wav-files/FinalExperiments/TES... disgust TESS\n", "2795 /dbfs/FileStore/wav-files/FinalExperiments/TES... fearful TESS\n", "2796 /dbfs/FileStore/wav-files/FinalExperiments/TES... happy TESS\n", "2798 /dbfs/FileStore/wav-files/FinalExperiments/TES... surprised TESS\n", "2799 /dbfs/FileStore/wav-files/FinalExperiments/TES... sad TESS\n", "\n", "[2400 rows x 3 columns]" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_TESS" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "bf44ca7a-acb2-4fbd-b4b7-9b258519158d", "showTitle": false, "title": "" } }, "outputs": [ { "output_type": "stream", "name": "stderr", "output_type": "stream", "text": [ "\r 0%| | 0/2400 [00:00:3: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame.\nTry using .loc[row_indexer,col_indexer] = value instead\n\nSee the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n df_TESS['Transcription'] = df_TESS['Filenames'].progress_apply(lambda x: speechtoText(x))\n" ] } ], "source": [ "from tqdm import tqdm\n", "tqdm.pandas()\n", "df_TESS['Transcription'] = df_TESS['Filenames'].progress_apply(lambda x: speechtoText(x))" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "beca2840-15b2-441f-8b3d-f0a0a6ec6ad9", "showTitle": false, "title": "" } }, "outputs": [], "source": [ "## Defining a general purpose function\n", "def get_file_nameonly(link):\n", " '''\n", " General purpose function to get absolute filename from relative path\n", " Parameters: Filename with relative path\n", " Returns: Filename\n", " '''\n", " newPath = link.replace(os.sep, '/')\n", " filename = newPath.split('/')[::-1][0]\n", "\n", " return filename" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "32a1f382-0df6-4858-8dab-28a37e81029e", "showTitle": false, "title": "" } }, "outputs": [ { "output_type": "stream", "name": "stderr", "output_type": "stream", "text": [ "\r 0%| | 0/2400 [00:00:2: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame.\nTry using .loc[row_indexer,col_indexer] = value instead\n\nSee the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n df_TESS['FilenameOnly'] = df_TESS['Filenames'].progress_apply(lambda x: get_file_nameonly(x))\n" ] } ], "source": [ "tqdm.pandas()\n", "df_TESS['FilenameOnly'] = df_TESS['Filenames'].progress_apply(lambda x: get_file_nameonly(x))" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "00325445-88a4-4bc4-9b9b-66cfb43d29cd", "showTitle": false, "title": "" } }, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FilenamesEmotionsDatasetTranscriptionFilenameOnly
0/dbfs/FileStore/wav-files/FinalExperiments/TES...angryTESSsay the word back.OAF_back_angry.wav
1/dbfs/FileStore/wav-files/FinalExperiments/TES...disgustTESSSay the word back.OAF_back_disgust.wav
2/dbfs/FileStore/wav-files/FinalExperiments/TES...fearfulTESSSay the word back.OAF_back_fear.wav
3/dbfs/FileStore/wav-files/FinalExperiments/TES...happyTESSSay the word back.OAF_back_happy.wav
5/dbfs/FileStore/wav-files/FinalExperiments/TES...surprisedTESSSay the word back.OAF_back_ps.wav
..................
2794/dbfs/FileStore/wav-files/FinalExperiments/TES...disgustTESSSay the word youth.YAF_youth_disgust.wav
2795/dbfs/FileStore/wav-files/FinalExperiments/TES...fearfulTESSSay the word youth.YAF_youth_fear.wav
2796/dbfs/FileStore/wav-files/FinalExperiments/TES...happyTESSSay the word youth.YAF_youth_happy.wav
2798/dbfs/FileStore/wav-files/FinalExperiments/TES...surprisedTESSSay the word youth.YAF_youth_ps.wav
2799/dbfs/FileStore/wav-files/FinalExperiments/TES...sadTESSSay the word youth.YAF_youth_sad.wav
\n", "

2400 rows × 5 columns

\n", "
" ], "text/plain": [ " Filenames ... FilenameOnly\n", "0 /dbfs/FileStore/wav-files/FinalExperiments/TES... ... OAF_back_angry.wav\n", "1 /dbfs/FileStore/wav-files/FinalExperiments/TES... ... OAF_back_disgust.wav\n", "2 /dbfs/FileStore/wav-files/FinalExperiments/TES... ... OAF_back_fear.wav\n", "3 /dbfs/FileStore/wav-files/FinalExperiments/TES... ... OAF_back_happy.wav\n", "5 /dbfs/FileStore/wav-files/FinalExperiments/TES... ... OAF_back_ps.wav\n", "... ... ... ...\n", "2794 /dbfs/FileStore/wav-files/FinalExperiments/TES... ... YAF_youth_disgust.wav\n", "2795 /dbfs/FileStore/wav-files/FinalExperiments/TES... ... YAF_youth_fear.wav\n", "2796 /dbfs/FileStore/wav-files/FinalExperiments/TES... ... YAF_youth_happy.wav\n", "2798 /dbfs/FileStore/wav-files/FinalExperiments/TES... ... YAF_youth_ps.wav\n", "2799 /dbfs/FileStore/wav-files/FinalExperiments/TES... ... YAF_youth_sad.wav\n", "\n", "[2400 rows x 5 columns]" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_TESS" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "c1b18468-21be-433a-9321-3944d7f2b1c1", "showTitle": false, "title": "" } }, "outputs": [], "source": [ "df_TESS.to_csv('/dbfs/FileStore/wav-files/FinalExperiments/TESS_Baseline.csv',index=False)\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { "byteLimit": 2048000, "rowLimit": 10000 }, "inputWidgets": {}, "nuid": "81bcdcb7-a883-4811-95db-42f32a41c059", "showTitle": false, "title": "" } }, "outputs": [ { "output_type": "stream", "name": "stderr", "output_type": "stream", "text": [ "/databricks/python_shell/dbruntime/huggingface_patches/datasets.py:13: UserWarning: During large dataset downloads, there could be multiple progress bar widgets that can cause performance issues for your notebook or browser. To avoid these issues, use `datasets.utils.logging.disable_progress_bar()` to turn off the progress bars.\n warnings.warn(\n" ] }, { "output_type": "stream", "name": "stdout", "output_type": "stream", "text": [ "Index(['Filenames', 'Emotions', 'Dataset', 'Transcription', 'FilenameOnly'], dtype='object')\nDownloading and preparing dataset tess_benchmarking/clean to /dbfs/FileStore/wav-files/FinalExperiments/cached_emo/tess_benchmarking/clean-9b66cbcbd5068a74/0.0.0/54dd96fa57ef5df2ee9216541170c41066df10b2aa5537b02693986895738a8d...\n" ] }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "7d984674c2d74032b7b39cf0a6547354", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Generating train split: 0 examples [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "output_type": "stream", "name": "stdout", "output_type": "stream", "text": [ "files is /dbfs/FileStore/wav-files/FinalExperiments\n\naudio dir is /dbfs/FileStore/wav-files/FinalExperiments/TESS/\n\nDataset tess_benchmarking downloaded and prepared to /dbfs/FileStore/wav-files/FinalExperiments/cached_emo/tess_benchmarking/clean-9b66cbcbd5068a74/0.0.0/54dd96fa57ef5df2ee9216541170c41066df10b2aa5537b02693986895738a8d. Subsequent calls will reuse this data.\n" ] }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "6a758a44a7704b7393685a87aee57107", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/1 [00:00