Upload 5 files

Browse files

Files changed (5) hide show

CNN_Architecture.ipynb +0 -0
CNN_support.py +138 -0
Data_preparation.ipynb +652 -0
RNN_Architecture.ipynb +0 -0
evaluation.py +99 -0

CNN_Architecture.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

CNN_support.py ADDED Viewed

	@@ -0,0 +1,138 @@

+import os
+import sys
+import librosa
+import numpy as np
+from scipy.io import wavfile
+from sklearn.preprocessing import normalize
+class SoundPreprocessing:
+    """
+    Parameters
+    ----------
+    sr (int): sampling rate
+    max_size (iterable): resulting shape of the tensor
+    n_fft (int): number related to FFT
+    n_mfcc (int): number of MFCC
+    """
+    def __init__(self, *, sr, max_size, n_fft, n_mfcc = 60, hop_length = 512):
+        self.sr = sr
+        self.n_fft = n_fft
+        self.n_mfcc = n_mfcc
+        self.max_size = max_size
+        self.hop_length = hop_length
+    def padding(self, array, xx, yy):
+        """
+        Parameters
+        ----------
+            array: numpy array
+            xx: desired height
+            yy: desirex width
+        Returns: padded array
+        """
+        self.array = array
+        self.xx = xx
+        self.yy = yy
+        h = array.shape[0]
+        w = array.shape[1]
+        a = max((xx - h) // 2,0)
+        aa = max(0,xx - a - h)
+        b = max(0,(yy - w) // 2)
+        bb = max(yy - b - w,0)
+        return np.pad(array, pad_width = ((a, aa), (b, bb)),
+                      mode = "constant")
+    def generate_features(self, y_cut, sr, max_size, n_fft, n_mfcc, hop_length):
+        self.y_cut = y_cut
+        # Numeri -2 divisibili per 14
+        condition = np.arange(2, 1000)[np.where((np.arange(2, 1000) - 2)%14 == 0)]
+        global shape_changed
+        shape_changed = False
+        if max_size[0] not in condition:
+            # Get closest number to 'max_size' that respects 'condition'
+            new_max0 = sorted(condition, key = lambda v: abs(v - max_size[0]))[0]
+            shape_changed = True
+            max_size = (new_max0, max_size[1])
+        stft = self.padding(np.abs(librosa.stft(y = y_cut, n_fft = n_fft,
+                                   hop_length = 512)), max_size[0], max_size[1])
+        if max_size[0] < stft.shape[0]:
+            new_max0 = sorted(condition[condition >= stft.shape[0]],
+                              key = lambda v: abs(v - stft.shape[0]))[0]
+            max_size = (new_max0, max_size[1])
+            shape_changed = True
+        stft = self.padding(np.abs(librosa.stft(y = y_cut, n_fft = n_fft,
+                                   hop_length = 512)), max_size[0], max_size[1])
+        MFCCs = self.padding(librosa.feature.mfcc(y = y_cut, n_fft = n_fft, sr = sr,
+                                                  hop_length = hop_length, n_mfcc = n_mfcc),
+                             max_size[0], max_size[1])
+        spec_centroid = librosa.feature.spectral_centroid(y = y_cut, sr = sr)
+        chroma_stft = librosa.feature.chroma_stft(y = y_cut, sr = sr)
+        spec_bw = librosa.feature.spectral_bandwidth(y = y_cut, sr = sr)
+        #Now the padding part
+        image = np.array([self.padding(normalize(spec_bw), 1, max_size[1])]).reshape(1, max_size[1])
+        image = np.append(image, self.padding(normalize(spec_centroid), 1, max_size[1]), axis = 0)
+        #repeat the padded spec_bw,spec_centroid and chroma stft until they are stft and MFCC-sized
+        for i in range( int((max_size[0]-2)/14) ):
+            image = np.append(image, self.padding(normalize(spec_bw), 1, max_size[1]), axis = 0)
+            image = np.append(image, self.padding(normalize(spec_centroid), 1, max_size[1]), axis = 0)
+            image = np.append(image, self.padding(normalize(chroma_stft), 12, max_size[1]), axis = 0)
+        image = np.dstack((image, np.abs(stft)))
+        image = np.dstack((image, MFCCs))
+        return image
+    def get_features(self, df, filepath):
+        self.df = df
+        self.filepath = filepath
+        # Get data for CNN
+        X = []
+        y = np.zeros(shape = (len(df), 1))
+        for i in df.index:
+            sr_i, aud = wavfile.read("{}\\{}".format(filepath, df.loc[i, "filename"]))
+            aud = aud.astype(np.float16)
+            X += [self.generate_features(y_cut = aud, sr = sr_i,
+                                         n_fft = self.n_fft,
+                                         n_mfcc = self.n_mfcc,
+                                         max_size = self.max_size,
+                                         hop_length = self.hop_length)]
+            y[i] = df.loc[i, "target"]
+        if shape_changed == True:
+            print(f"New max_size is {max_size}")
+        X = np.array(X)
+        return X, y

Data_preparation.ipynb ADDED Viewed

	@@ -0,0 +1,652 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "VNUnhmXWe9qz"
+   },
+   "source": [
+    "# Notebook for data preparation\n",
+    "\n",
+    "A.A. 2022-2023 - HUMAN DATA ANALYTICS\n",
+    "\n",
+    "Authors:\n",
+    "* Mattia Brocco\n",
+    "* Brenda Eloisa Tellez Juarez\n",
+    "\n",
+    "In the following notebook the pipeline for data import, preprocessing and storage (using `.parquet` format) is presented."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2023-02-12T22:43:39.436355Z",
+     "start_time": "2023-02-12T22:43:39.418449Z"
+    },
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 915
+    },
+    "id": "pz7MotpCfCUR",
+    "outputId": "fc916ed3-03d2-41ee-87db-237d79979cf0"
+   },
+   "outputs": [],
+   "source": [
+    "from google.colab import drive\n",
+    "drive.mount(\"/content/drive\")\n",
+    "\n",
+    "#%cd /content/drive/MyDrive/Environmental-sounds-UNIPD-2022"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "id": "6YEmW9n_fOB8"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import sys\n",
+    "import torch\n",
+    "import librosa\n",
+    "import matplotlib\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import seaborn as sns\n",
+    "import tensorflow as tf\n",
+    "from librosa import display\n",
+    "from scipy.io import wavfile\n",
+    "from tensorflow import keras\n",
+    "import IPython.display as ipd\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "from sklearn.metrics import confusion_matrix\n",
+    "from sklearn.metrics import classification_report\n",
+    "\n",
+    "import evaluation\n",
+    "import CNN_support as cnns\n",
+    "from gng import GrowingNeuralGas\n",
+    "\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 206
+    },
+    "execution": {
+     "iopub.execute_input": "2023-01-14T19:51:27.903698Z",
+     "iopub.status.busy": "2023-01-14T19:51:27.903426Z",
+     "iopub.status.idle": "2023-01-14T19:51:27.930731Z",
+     "shell.execute_reply": "2023-01-14T19:51:27.929790Z",
+     "shell.execute_reply.started": "2023-01-14T19:51:27.903668Z"
+    },
+    "id": "ZjdASAl2emSc",
+    "outputId": "a209c1ff-299b-4e8d-c79a-911fc9fab8ca"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "  <div id=\"df-ea413149-b901-4dd6-a8ae-0b417a0edf47\">\n",
+       "    <div class=\"colab-df-container\">\n",
+       "      <div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>filename</th>\n",
+       "      <th>fold</th>\n",
+       "      <th>target</th>\n",
+       "      <th>category</th>\n",
+       "      <th>esc10</th>\n",
+       "      <th>src_file</th>\n",
+       "      <th>take</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1-100032-A-0.wav</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>dog</td>\n",
+       "      <td>True</td>\n",
+       "      <td>100032</td>\n",
+       "      <td>A</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1-100038-A-14.wav</td>\n",
+       "      <td>1</td>\n",
+       "      <td>14</td>\n",
+       "      <td>chirping_birds</td>\n",
+       "      <td>False</td>\n",
+       "      <td>100038</td>\n",
+       "      <td>A</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1-100210-A-36.wav</td>\n",
+       "      <td>1</td>\n",
+       "      <td>36</td>\n",
+       "      <td>vacuum_cleaner</td>\n",
+       "      <td>False</td>\n",
+       "      <td>100210</td>\n",
+       "      <td>A</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1-100210-B-36.wav</td>\n",
+       "      <td>1</td>\n",
+       "      <td>36</td>\n",
+       "      <td>vacuum_cleaner</td>\n",
+       "      <td>False</td>\n",
+       "      <td>100210</td>\n",
+       "      <td>B</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1-101296-A-19.wav</td>\n",
+       "      <td>1</td>\n",
+       "      <td>19</td>\n",
+       "      <td>thunderstorm</td>\n",
+       "      <td>False</td>\n",
+       "      <td>101296</td>\n",
+       "      <td>A</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>\n",
+       "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-ea413149-b901-4dd6-a8ae-0b417a0edf47')\"\n",
+       "              title=\"Convert this dataframe to an interactive table.\"\n",
+       "              style=\"display:none;\">\n",
+       "        \n",
+       "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+       "       width=\"24px\">\n",
+       "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
+       "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
+       "  </svg>\n",
+       "      </button>\n",
+       "      \n",
+       "  <style>\n",
+       "    .colab-df-container {\n",
+       "      display:flex;\n",
+       "      flex-wrap:wrap;\n",
+       "      gap: 12px;\n",
+       "    }\n",
+       "\n",
+       "    .colab-df-convert {\n",
+       "      background-color: #E8F0FE;\n",
+       "      border: none;\n",
+       "      border-radius: 50%;\n",
+       "      cursor: pointer;\n",
+       "      display: none;\n",
+       "      fill: #1967D2;\n",
+       "      height: 32px;\n",
+       "      padding: 0 0 0 0;\n",
+       "      width: 32px;\n",
+       "    }\n",
+       "\n",
+       "    .colab-df-convert:hover {\n",
+       "      background-color: #E2EBFA;\n",
+       "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+       "      fill: #174EA6;\n",
+       "    }\n",
+       "\n",
+       "    [theme=dark] .colab-df-convert {\n",
+       "      background-color: #3B4455;\n",
+       "      fill: #D2E3FC;\n",
+       "    }\n",
+       "\n",
+       "    [theme=dark] .colab-df-convert:hover {\n",
+       "      background-color: #434B5C;\n",
+       "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+       "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+       "      fill: #FFFFFF;\n",
+       "    }\n",
+       "  </style>\n",
+       "\n",
+       "      <script>\n",
+       "        const buttonEl =\n",
+       "          document.querySelector('#df-ea413149-b901-4dd6-a8ae-0b417a0edf47 button.colab-df-convert');\n",
+       "        buttonEl.style.display =\n",
+       "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+       "\n",
+       "        async function convertToInteractive(key) {\n",
+       "          const element = document.querySelector('#df-ea413149-b901-4dd6-a8ae-0b417a0edf47');\n",
+       "          const dataTable =\n",
+       "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+       "                                                     [key], {});\n",
+       "          if (!dataTable) return;\n",
+       "\n",
+       "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
+       "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+       "            + ' to learn more about interactive tables.';\n",
+       "          element.innerHTML = '';\n",
+       "          dataTable['output_type'] = 'display_data';\n",
+       "          await google.colab.output.renderOutput(dataTable, element);\n",
+       "          const docLink = document.createElement('div');\n",
+       "          docLink.innerHTML = docLinkHtml;\n",
+       "          element.appendChild(docLink);\n",
+       "        }\n",
+       "      </script>\n",
+       "    </div>\n",
+       "  </div>\n",
+       "  "
+      ],
+      "text/plain": [
+       "            filename  fold  target        category  esc10  src_file take\n",
+       "0   1-100032-A-0.wav     1       0             dog   True    100032    A\n",
+       "1  1-100038-A-14.wav     1      14  chirping_birds  False    100038    A\n",
+       "2  1-100210-A-36.wav     1      36  vacuum_cleaner  False    100210    A\n",
+       "3  1-100210-B-36.wav     1      36  vacuum_cleaner  False    100210    B\n",
+       "4  1-101296-A-19.wav     1      19    thunderstorm  False    101296    A"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#reading the csv file\n",
+    "data = pd.read_csv('./data/meta/esc50.csv')\n",
+    "data.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "EsFcOZlvqf-K"
+   },
+   "source": [
+    "### 2. Data import & preprocessing\n",
+    "With the aim of replicability, the whole pipeline is implemented with the use of `np.random.seed()`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "Q0aXZASmzZtM",
+    "outputId": "7556538e-41f2-4a0a-cb42-d1cca4d1d575"
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.8/dist-packages/librosa/core/pitch.py:153: UserWarning: Trying to estimate tuning from empty frequency set.\n",
+      "  warnings.warn(\"Trying to estimate tuning from empty frequency set.\")\n",
+      "/usr/local/lib/python3.8/dist-packages/librosa/core/pitch.py:153: UserWarning: Trying to estimate tuning from empty frequency set.\n",
+      "  warnings.warn(\"Trying to estimate tuning from empty frequency set.\")\n"
+     ]
+    }
+   ],
+   "source": [
+    "# DATA AUGMENTATION\n",
+    "\n",
+    "#np.random.seed(42)\n",
+    "#indexed_samples = np.random.choice(X.shape[0], size = 10000, replace = True)\n",
+    "np.random.seed(101)\n",
+    "randn_seeds = np.random.choice(len(data), size = len(data), replace = False)\n",
+    "\n",
+    "aug_iterations = 7\n",
+    "\n",
+    "new_X = []\n",
+    "#new_X2 = []\n",
+    "new_y = np.zeros(shape = (aug_iterations*len(randn_seeds), 1))\n",
+    "\n",
+    "input_length = 220500\n",
+    "row_count = 0\n",
+    "for i in data.index:\n",
+    "\n",
+    "    sample, sr_sample = librosa.load(\"./data/audio/{}\".format(data.loc[i, \"filename\"]),\n",
+    "                                     sr = 44100)\n",
+    "    # Min-max scaler [0, 1]\n",
+    "    sample = (sample - sample.min()) / (sample.max() - sample.min())\n",
+    "\n",
+    "    if len(sample) > input_length:\n",
+    "        sample = sample[:input_length]\n",
+    "    else:\n",
+    "        sample = np.pad(sample, (0, max(0, input_length - len(sample))), \"constant\")\n",
+    "\n",
+    "    for n in range(aug_iterations):\n",
+    "        \n",
+    "        if n == 0:\n",
+    "            # NOISE INJECTION\n",
+    "            np.random.seed(randn_seeds[i])\n",
+    "            noise = np.random.randn(len( sample ))\n",
+    "            augmented_data = (sample + 0.005 * noise)\n",
+    "\n",
+    "        elif n == 1:\n",
+    "            # TIME SHIFT: right shift\n",
+    "            augmented_data = np.roll(sample, 22050)\n",
+    "\n",
+    "        elif n == 2:\n",
+    "            # PITCH SHIFT: shift down by 3\n",
+    "            augmented_data = librosa.effects.pitch_shift(y = sample, sr = sr_sample,\n",
+    "                                                         n_steps = 3)\n",
+    "        elif n == 3:\n",
+    "            # PITCH SHIFT: shift down by -3\n",
+    "            augmented_data = librosa.effects.pitch_shift(y = sample, sr = sr_sample,\n",
+    "                                                         n_steps = -3)\n",
+    "        elif n == 4:\n",
+    "            # SPEED SHIFT: faster\n",
+    "            augmented_data = librosa.effects.time_stretch(y = sample, rate = 1.25)\n",
+    "            augmented_data = np.append(augmented_data,\n",
+    "                                       np.zeros(shape = len(sample) - len(augmented_data)))\n",
+    "        elif n == 5:\n",
+    "            # SPEED SHIFT: slower (returns longer array)\n",
+    "            augmented_data = librosa.effects.time_stretch(y = sample, rate = 0.8)\n",
+    "            augmented_data = augmented_data[:len(sample)]\n",
+    "\n",
+    "        else:\n",
+    "            # KEEP NORMAL SAMPLE\n",
+    "            augmented_data = sample\n",
+    "\n",
+    "        new_instance = librosa.feature.mfcc(y = augmented_data, sr = sr_sample,\n",
+    "                                            hop_length = 512, n_mfcc = 60)\n",
+    "        \n",
+    "        \"\"\"\n",
+    "        For the CNN, the input is composed of three channels\n",
+    "        stacked together as follows (commented lines).\n",
+    "        \"\"\"\n",
+    "        #new_MFCC = librosa.feature.mfcc(y = augmented_data, sr = sr_sample,\n",
+    "        #                                hop_length = 512, n_mfcc = 60)\n",
+    "        #new_chromagram = librosa.feature.chroma_stft(y = augmented_data, sr = sr_sample,\n",
+    "        #                                             hop_length = 512, win_length = 1024,\n",
+    "        #                                             n_chroma = 60)\n",
+    "        #new_delta = librosa.feature.delta(new_MFCC)\n",
+    "    \n",
+    "        #new_instance = np.dstack((new_MFCC, new_chromagram, new_delta))\n",
+    "\n",
+    "    \n",
+    "        new_X += [new_instance]\n",
+    "        #new_X2 += [new_instance2]\n",
+    "        new_y[row_count] = data.loc[i, \"target\"]\n",
+    "        \n",
+    "        row_count += 1\n",
+    "        \n",
+    "    \n",
+    "new_X = np.array(new_X)\n",
+    "#new_X2 = np.array(new_X2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "kXgmb61EKq2_",
+    "outputId": "c4af2309-c793-4c6b-a083-864b01c71a16"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "((14000, 60, 431, 3), (14000, 1))"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "new_X.shape, new_y.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "id": "paUvHcNHmVfH"
+   },
+   "outputs": [],
+   "source": [
+    "# Reduce float precision in order to decrease the size of the files\n",
+    "new_X = new_X.astype(np.float32)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2023-02-12T22:44:44.989798Z",
+     "start_time": "2023-02-12T22:44:44.984746Z"
+    },
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 811
+    },
+    "id": "CWxW80DewwYQ",
+    "outputId": "bebc360f-9f33-48f3-8106-62d0cc0c91ee"
+   },
+   "outputs": [],
+   "source": [
+    "def data_to_parquet(arr, name):\n",
+    "    \"\"\"\n",
+    "    Whether it is for the CNN or the RNN,\n",
+    "    this function provides a flattening of all the \n",
+    "    dimensions of the array except the first\n",
+    "    (number of samples).\n",
+    "    \n",
+    "    When required, the files are then imported\n",
+    "    via the 'pandas' library and prperly reshaped.\n",
+    "    \"\"\"\n",
+    "    if len(arr.shape) > 2:\n",
+    "        arr2 = arr.reshape(arr.shape[0], -1)\n",
+    "        arr2 = pd.DataFrame(arr2)\n",
+    "    else:\n",
+    "        arr2 = pd.DataFrame(arr)\n",
+    "\n",
+    "    arr2.columns = [str(c) for c in arr2.columns]\n",
+    "    arr2.to_parquet(os.getcwd() + f\"/data/{name}.parquet\")\n",
+    "    \n",
+    "\n",
+    "data_to_parquet(new_X, \"X_CNN_60x431x3_7times\")\n",
+    "data_to_parquet(new_y, \"y_CNN_7times\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "lb16Lux3deLi"
+   },
+   "source": [
+    "```python\n",
+    "# Get data for RNN\n",
+    "X = []\n",
+    "y = np.zeros(shape = (len(data), 1))\n",
+    "\n",
+    "for i in data.index:\n",
+    "    \n",
+    "    sample, sr_sample = librosa.load(\"./data/audio/{}\".format(data.loc[i, \"filename\"]),\n",
+    "                                     sr = 44100)\n",
+    "    \n",
+    "    MFCC = librosa.feature.mfcc(y = sample, sr = sr_sample,\n",
+    "                                hop_length = 512, n_mfcc = 60)\n",
+    "    \n",
+    "    #instance = MFCC.mean(axis = 0)\n",
+    "    \n",
+    "    X += [MFCC]\n",
+    "    \n",
+    "    y[i] = data.loc[i, \"target\"]\n",
+    "    \n",
+    "X = np.array(X)\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "kNf0QXsLg8Yz"
+   },
+   "source": [
+    "### Adversarial attacks"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-01-14T17:37:45.772463Z",
+     "iopub.status.busy": "2023-01-14T17:37:45.771814Z",
+     "iopub.status.idle": "2023-01-14T17:37:45.787426Z",
+     "shell.execute_reply": "2023-01-14T17:37:45.786380Z",
+     "shell.execute_reply.started": "2023-01-14T17:37:45.772366Z"
+    },
+    "id": "u8gNRa0xemS-"
+   },
+   "outputs": [],
+   "source": [
+    "# create an adversarial example\n",
+    "def create_adversarial_example(x2, y_new, model_bidirectional):\n",
+    "    # convert the label to a one-hot encoded vector\n",
+    "    y = tf.keras.utils.to_categorical(y_new, num_classes=50)\n",
+    "# compute the gradient of the loss with respect to the input\n",
+    "    with tf.GradientTape() as tape:\n",
+    "        tape.watch(x2)\n",
+    "        logits = model_bidirectional(x2)\n",
+    "        loss_value = tf.losses.categorical_crossentropy(y_new, logits)\n",
+    "    grads = tape.gradient(loss_value, x2)\n",
+    "# create an adversarial example by adding the sign of the gradient to the input\n",
+    "    epsilon = 0.01\n",
+    "    x_adv = x2 + epsilon * tf.sign(grads)\n",
+    "    x_adv = tf.clip_by_value(x_adv, 0, 1)\n",
+    "    return x_adv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-01-14T17:34:00.160306Z",
+     "iopub.status.busy": "2023-01-14T17:34:00.159720Z",
+     "iopub.status.idle": "2023-01-14T17:34:00.166335Z",
+     "shell.execute_reply": "2023-01-14T17:34:00.165267Z",
+     "shell.execute_reply.started": "2023-01-14T17:34:00.160266Z"
+    },
+    "id": "fXKsE1PzemS_"
+   },
+   "outputs": [],
+   "source": [
+    "#def create_adversarial_example(x2, y_new, model_bidirectional):\n",
+    "    # convert the label to a one-hot encoded vector\n",
+    "    y = tf.keras.utils.to_categorical(y_new, num_classes=20)\n",
+    "    # compute the gradient of the loss with respect to the input\n",
+    "    logits = model_bidirectional(x2)\n",
+    "    loss = tf.losses.categorical_crossentropy(y_new, logits)\n",
+    "    grads, = tf.gradients(loss, x2)\n",
+    "    # create an adversarial example by adding the sign of the gradient to the input\n",
+    "    epsilon = 0.01\n",
+    "    x_adv = x2 + epsilon * tf.sign(grads)\n",
+    "    x_adv = tf.clip_by_value(x_adv, 0, 1)\n",
+    "    return x_adv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-01-14T20:08:50.574052Z",
+     "iopub.status.busy": "2023-01-14T20:08:50.573757Z",
+     "iopub.status.idle": "2023-01-14T20:09:00.767976Z",
+     "shell.execute_reply": "2023-01-14T20:09:00.766358Z",
+     "shell.execute_reply.started": "2023-01-14T20:08:50.574022Z"
+    },
+    "id": "Tl_qP5S6emS_"
+   },
+   "outputs": [],
+   "source": [
+    "# create an adversarial example and test it with the model\n",
+    "x_adv = create_adversarial_example(x2, y_new, model_bidirectional)\n",
+    "y_pred_adv = model_bidirectional(x_adv).argmax() # get the predicted label\n",
+    "acc = (y_pred_adv == y_new).mean() # calculate the accuracy\n",
+    "print(f'Model accuracy on adversarial example: {acc:.2f}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-01-14T19:56:28.078039Z",
+     "iopub.status.busy": "2023-01-14T19:56:28.074638Z",
+     "iopub.status.idle": "2023-01-14T19:56:39.922249Z",
+     "shell.execute_reply": "2023-01-14T19:56:39.920914Z",
+     "shell.execute_reply.started": "2023-01-14T19:56:28.077987Z"
+    },
+    "id": "bFH3lL8UemS_"
+   },
+   "outputs": [],
+   "source": [
+    "# test the adversarial example\n",
+    "x_adv = create_adversarial_example(x2, y_new, model_bidirectional)\n",
+    "logits_adv = model_bidirectional(x_adv)\n",
+    "y_pred_adv = np.argmax(logits_adv, axis=1)\n",
+    "accuracy = accuracy_score(y_new, y_pred_adv)\n",
+    "print('Accuracy on adversarial example:', accuracy)"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "machine_shape": "hm",
+   "provenance": []
+  },
+  "gpuClass": "standard",
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}

RNN_Architecture.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

evaluation.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import seaborn as sns
+import tensorflow as tf
+import matplotlib.pyplot as plt
+def plot_loss(history, axis = None):
+    """
+    Parameters
+    ----------
+        history : 'tf.keras.callbacks.History' object
+        axis : 'matplotlib.pyplot.axis' object
+    """
+    if axis is not None:
+        axis.plot(history.epoch, history.history["loss"],
+                  label = "Train loss", color = "#191970")
+        axis.plot(history.epoch, history.history["val_loss"],
+                  label = "Val loss", color = "#00CC33")
+        axis.set_title("Loss")
+        axis.legend()
+    else:
+        plt.plot(history.epoch, history.history["loss"],
+                 label = "Train loss", color = "#191970")
+        plt.plot(history.epoch, history.history["val_loss"],
+                 label = "Val loss", color = "#00CC33")
+        plt.title("Loss")
+        plt.legend()
+def plot_accuracy(history, axis = None):
+    """
+    Parameters
+    ----------
+        history : 'tf.keras.callbacks.History' object
+        axis : 'matplotlib.pyplot.axis' object
+    """
+    if axis is not None:
+        axis.plot(history.epoch, history.history["accuracy"],
+                  label = "Train accuracy", color = "#191970")
+        axis.plot(history.epoch, history.history["val_accuracy"],
+                  label = "Val accuracy", color = "#00CC33")
+        axis.set_ylim(0, 1.1)
+        axis.set_title("Accuracy")
+        axis.legend()
+    else:
+        plt.plot(history.epoch, history.history["accuracy"],
+                 label = "Train accuracy", color = "#191970")
+        plt.plot(history.epoch, history.history["val_accuracy"],
+                 label = "Val accuracy", color = "#00CC33")
+        plt.title("Accuracy")
+        plt.ylim(0, 1.1)
+        plt.legend()
+def keras_model_memory_usage_in_bytes(model, *, batch_size: int):
+    """
+    Return the estimated memory usage of a given Keras model in bytes.
+    This includes the model weights and layers, but excludes the dataset.
+    The model shapes are multipled by the batch size, but the weights are not.
+    Parameters
+    ----------
+        model: A Keras model.
+        batch_size: The batch size you intend to run the model with. If you
+            have already specified the batch size in the model itself, then
+            pass `1` as the argument here.
+    Returns
+    -------
+        An estimate of the Keras model's memory usage in bytes.
+    """
+    default_dtype = tf.keras.backend.floatx()
+    shapes_mem_count = 0
+    internal_model_mem_count = 0
+    for layer in model.layers:
+        if isinstance(layer, tf.keras.Model):
+            internal_model_mem_count += keras_model_memory_usage_in_bytes(layer,
+                                                                          batch_size = batch_size)
+        single_layer_mem = tf.as_dtype(layer.dtype or default_dtype).size
+        out_shape = layer.output_shape
+        if isinstance(out_shape, list):
+            out_shape = out_shape[0]
+        for s in out_shape:
+            if s is None:
+                continue
+            single_layer_mem *= s
+        shapes_mem_count += single_layer_mem
+    trainable_count = sum([tf.keras.backend.count_params(p)
+                           for p in model.trainable_weights])
+    non_trainable_count = sum([tf.keras.backend.count_params(p)
+                               for p in model.non_trainable_weights])
+    total_memory = (batch_size * shapes_mem_count + internal_model_mem_count\
+                    + trainable_count + non_trainable_count)
+    return total_memory