{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "VNUnhmXWe9qz" }, "source": [ "# Notebook for data preparation\n", "\n", "A.A. 2022-2023 - HUMAN DATA ANALYTICS\n", "\n", "Authors:\n", "* Mattia Brocco\n", "* Brenda Eloisa Tellez Juarez\n", "\n", "In the following notebook the pipeline for data import, preprocessing and storage (using `.parquet` format) is presented." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2023-02-12T22:43:39.436355Z", "start_time": "2023-02-12T22:43:39.418449Z" }, "colab": { "base_uri": "https://localhost:8080/", "height": 915 }, "id": "pz7MotpCfCUR", "outputId": "fc916ed3-03d2-41ee-87db-237d79979cf0" }, "outputs": [], "source": [ "from google.colab import drive\n", "drive.mount(\"/content/drive\")\n", "\n", "#%cd /content/drive/MyDrive/Environmental-sounds-UNIPD-2022" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "id": "6YEmW9n_fOB8" }, "outputs": [], "source": [ "import os\n", "import sys\n", "import torch\n", "import librosa\n", "import matplotlib\n", "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", "import tensorflow as tf\n", "from librosa import display\n", "from scipy.io import wavfile\n", "from tensorflow import keras\n", "import IPython.display as ipd\n", "import matplotlib.pyplot as plt\n", "\n", "from sklearn.metrics import confusion_matrix\n", "from sklearn.metrics import classification_report\n", "\n", "import evaluation\n", "import CNN_support as cnns\n", "from gng import GrowingNeuralGas\n", "\n", "%load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "execution": { "iopub.execute_input": "2023-01-14T19:51:27.903698Z", "iopub.status.busy": "2023-01-14T19:51:27.903426Z", "iopub.status.idle": "2023-01-14T19:51:27.930731Z", "shell.execute_reply": "2023-01-14T19:51:27.929790Z", "shell.execute_reply.started": "2023-01-14T19:51:27.903668Z" }, "id": "ZjdASAl2emSc", "outputId": "a209c1ff-299b-4e8d-c79a-911fc9fab8ca" }, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
filenamefoldtargetcategoryesc10src_filetake
01-100032-A-0.wav10dogTrue100032A
11-100038-A-14.wav114chirping_birdsFalse100038A
21-100210-A-36.wav136vacuum_cleanerFalse100210A
31-100210-B-36.wav136vacuum_cleanerFalse100210B
41-101296-A-19.wav119thunderstormFalse101296A
\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ], "text/plain": [ " filename fold target category esc10 src_file take\n", "0 1-100032-A-0.wav 1 0 dog True 100032 A\n", "1 1-100038-A-14.wav 1 14 chirping_birds False 100038 A\n", "2 1-100210-A-36.wav 1 36 vacuum_cleaner False 100210 A\n", "3 1-100210-B-36.wav 1 36 vacuum_cleaner False 100210 B\n", "4 1-101296-A-19.wav 1 19 thunderstorm False 101296 A" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#reading the csv file\n", "data = pd.read_csv('./data/meta/esc50.csv')\n", "data.head()" ] }, { "cell_type": "markdown", "metadata": { "id": "EsFcOZlvqf-K" }, "source": [ "### 2. Data import & preprocessing\n", "With the aim of replicability, the whole pipeline is implemented with the use of `np.random.seed()`." ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Q0aXZASmzZtM", "outputId": "7556538e-41f2-4a0a-cb42-d1cca4d1d575" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.8/dist-packages/librosa/core/pitch.py:153: UserWarning: Trying to estimate tuning from empty frequency set.\n", " warnings.warn(\"Trying to estimate tuning from empty frequency set.\")\n", "/usr/local/lib/python3.8/dist-packages/librosa/core/pitch.py:153: UserWarning: Trying to estimate tuning from empty frequency set.\n", " warnings.warn(\"Trying to estimate tuning from empty frequency set.\")\n" ] } ], "source": [ "# DATA AUGMENTATION\n", "\n", "#np.random.seed(42)\n", "#indexed_samples = np.random.choice(X.shape[0], size = 10000, replace = True)\n", "np.random.seed(101)\n", "randn_seeds = np.random.choice(len(data), size = len(data), replace = False)\n", "\n", "aug_iterations = 7\n", "\n", "new_X = []\n", "#new_X2 = []\n", "new_y = np.zeros(shape = (aug_iterations*len(randn_seeds), 1))\n", "\n", "input_length = 220500\n", "row_count = 0\n", "for i in data.index:\n", "\n", " sample, sr_sample = librosa.load(\"./data/audio/{}\".format(data.loc[i, \"filename\"]),\n", " sr = 44100)\n", " # Min-max scaler [0, 1]\n", " sample = (sample - sample.min()) / (sample.max() - sample.min())\n", "\n", " if len(sample) > input_length:\n", " sample = sample[:input_length]\n", " else:\n", " sample = np.pad(sample, (0, max(0, input_length - len(sample))), \"constant\")\n", "\n", " for n in range(aug_iterations):\n", " \n", " if n == 0:\n", " # NOISE INJECTION\n", " np.random.seed(randn_seeds[i])\n", " noise = np.random.randn(len( sample ))\n", " augmented_data = (sample + 0.005 * noise)\n", "\n", " elif n == 1:\n", " # TIME SHIFT: right shift\n", " augmented_data = np.roll(sample, 22050)\n", "\n", " elif n == 2:\n", " # PITCH SHIFT: shift down by 3\n", " augmented_data = librosa.effects.pitch_shift(y = sample, sr = sr_sample,\n", " n_steps = 3)\n", " elif n == 3:\n", " # PITCH SHIFT: shift down by -3\n", " augmented_data = librosa.effects.pitch_shift(y = sample, sr = sr_sample,\n", " n_steps = -3)\n", " elif n == 4:\n", " # SPEED SHIFT: faster\n", " augmented_data = librosa.effects.time_stretch(y = sample, rate = 1.25)\n", " augmented_data = np.append(augmented_data,\n", " np.zeros(shape = len(sample) - len(augmented_data)))\n", " elif n == 5:\n", " # SPEED SHIFT: slower (returns longer array)\n", " augmented_data = librosa.effects.time_stretch(y = sample, rate = 0.8)\n", " augmented_data = augmented_data[:len(sample)]\n", "\n", " else:\n", " # KEEP NORMAL SAMPLE\n", " augmented_data = sample\n", "\n", " new_instance = librosa.feature.mfcc(y = augmented_data, sr = sr_sample,\n", " hop_length = 512, n_mfcc = 60)\n", " \n", " \"\"\"\n", " For the CNN, the input is composed of three channels\n", " stacked together as follows (commented lines).\n", " \"\"\"\n", " #new_MFCC = librosa.feature.mfcc(y = augmented_data, sr = sr_sample,\n", " # hop_length = 512, n_mfcc = 60)\n", " #new_chromagram = librosa.feature.chroma_stft(y = augmented_data, sr = sr_sample,\n", " # hop_length = 512, win_length = 1024,\n", " # n_chroma = 60)\n", " #new_delta = librosa.feature.delta(new_MFCC)\n", " \n", " #new_instance = np.dstack((new_MFCC, new_chromagram, new_delta))\n", "\n", " \n", " new_X += [new_instance]\n", " #new_X2 += [new_instance2]\n", " new_y[row_count] = data.loc[i, \"target\"]\n", " \n", " row_count += 1\n", " \n", " \n", "new_X = np.array(new_X)\n", "#new_X2 = np.array(new_X2)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "kXgmb61EKq2_", "outputId": "c4af2309-c793-4c6b-a083-864b01c71a16" }, "outputs": [ { "data": { "text/plain": [ "((14000, 60, 431, 3), (14000, 1))" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_X.shape, new_y.shape" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "id": "paUvHcNHmVfH" }, "outputs": [], "source": [ "# Reduce float precision in order to decrease the size of the files\n", "new_X = new_X.astype(np.float32)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2023-02-12T22:44:44.989798Z", "start_time": "2023-02-12T22:44:44.984746Z" }, "colab": { "base_uri": "https://localhost:8080/", "height": 811 }, "id": "CWxW80DewwYQ", "outputId": "bebc360f-9f33-48f3-8106-62d0cc0c91ee" }, "outputs": [], "source": [ "def data_to_parquet(arr, name):\n", " \"\"\"\n", " Whether it is for the CNN or the RNN,\n", " this function provides a flattening of all the \n", " dimensions of the array except the first\n", " (number of samples).\n", " \n", " When required, the files are then imported\n", " via the 'pandas' library and prperly reshaped.\n", " \"\"\"\n", " if len(arr.shape) > 2:\n", " arr2 = arr.reshape(arr.shape[0], -1)\n", " arr2 = pd.DataFrame(arr2)\n", " else:\n", " arr2 = pd.DataFrame(arr)\n", "\n", " arr2.columns = [str(c) for c in arr2.columns]\n", " arr2.to_parquet(os.getcwd() + f\"/data/{name}.parquet\")\n", " \n", "\n", "data_to_parquet(new_X, \"X_CNN_60x431x3_7times\")\n", "data_to_parquet(new_y, \"y_CNN_7times\")" ] }, { "cell_type": "markdown", "metadata": { "id": "lb16Lux3deLi" }, "source": [ "```python\n", "# Get data for RNN\n", "X = []\n", "y = np.zeros(shape = (len(data), 1))\n", "\n", "for i in data.index:\n", " \n", " sample, sr_sample = librosa.load(\"./data/audio/{}\".format(data.loc[i, \"filename\"]),\n", " sr = 44100)\n", " \n", " MFCC = librosa.feature.mfcc(y = sample, sr = sr_sample,\n", " hop_length = 512, n_mfcc = 60)\n", " \n", " #instance = MFCC.mean(axis = 0)\n", " \n", " X += [MFCC]\n", " \n", " y[i] = data.loc[i, \"target\"]\n", " \n", "X = np.array(X)\n", "```" ] }, { "cell_type": "markdown", "metadata": { "id": "kNf0QXsLg8Yz" }, "source": [ "### Adversarial attacks" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2023-01-14T17:37:45.772463Z", "iopub.status.busy": "2023-01-14T17:37:45.771814Z", "iopub.status.idle": "2023-01-14T17:37:45.787426Z", "shell.execute_reply": "2023-01-14T17:37:45.786380Z", "shell.execute_reply.started": "2023-01-14T17:37:45.772366Z" }, "id": "u8gNRa0xemS-" }, "outputs": [], "source": [ "# create an adversarial example\n", "def create_adversarial_example(x2, y_new, model_bidirectional):\n", " # convert the label to a one-hot encoded vector\n", " y = tf.keras.utils.to_categorical(y_new, num_classes=50)\n", "# compute the gradient of the loss with respect to the input\n", " with tf.GradientTape() as tape:\n", " tape.watch(x2)\n", " logits = model_bidirectional(x2)\n", " loss_value = tf.losses.categorical_crossentropy(y_new, logits)\n", " grads = tape.gradient(loss_value, x2)\n", "# create an adversarial example by adding the sign of the gradient to the input\n", " epsilon = 0.01\n", " x_adv = x2 + epsilon * tf.sign(grads)\n", " x_adv = tf.clip_by_value(x_adv, 0, 1)\n", " return x_adv" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2023-01-14T17:34:00.160306Z", "iopub.status.busy": "2023-01-14T17:34:00.159720Z", "iopub.status.idle": "2023-01-14T17:34:00.166335Z", "shell.execute_reply": "2023-01-14T17:34:00.165267Z", "shell.execute_reply.started": "2023-01-14T17:34:00.160266Z" }, "id": "fXKsE1PzemS_" }, "outputs": [], "source": [ "#def create_adversarial_example(x2, y_new, model_bidirectional):\n", " # convert the label to a one-hot encoded vector\n", " y = tf.keras.utils.to_categorical(y_new, num_classes=20)\n", " # compute the gradient of the loss with respect to the input\n", " logits = model_bidirectional(x2)\n", " loss = tf.losses.categorical_crossentropy(y_new, logits)\n", " grads, = tf.gradients(loss, x2)\n", " # create an adversarial example by adding the sign of the gradient to the input\n", " epsilon = 0.01\n", " x_adv = x2 + epsilon * tf.sign(grads)\n", " x_adv = tf.clip_by_value(x_adv, 0, 1)\n", " return x_adv" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2023-01-14T20:08:50.574052Z", "iopub.status.busy": "2023-01-14T20:08:50.573757Z", "iopub.status.idle": "2023-01-14T20:09:00.767976Z", "shell.execute_reply": "2023-01-14T20:09:00.766358Z", "shell.execute_reply.started": "2023-01-14T20:08:50.574022Z" }, "id": "Tl_qP5S6emS_" }, "outputs": [], "source": [ "# create an adversarial example and test it with the model\n", "x_adv = create_adversarial_example(x2, y_new, model_bidirectional)\n", "y_pred_adv = model_bidirectional(x_adv).argmax() # get the predicted label\n", "acc = (y_pred_adv == y_new).mean() # calculate the accuracy\n", "print(f'Model accuracy on adversarial example: {acc:.2f}')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2023-01-14T19:56:28.078039Z", "iopub.status.busy": "2023-01-14T19:56:28.074638Z", "iopub.status.idle": "2023-01-14T19:56:39.922249Z", "shell.execute_reply": "2023-01-14T19:56:39.920914Z", "shell.execute_reply.started": "2023-01-14T19:56:28.077987Z" }, "id": "bFH3lL8UemS_" }, "outputs": [], "source": [ "# test the adversarial example\n", "x_adv = create_adversarial_example(x2, y_new, model_bidirectional)\n", "logits_adv = model_bidirectional(x_adv)\n", "y_pred_adv = np.argmax(logits_adv, axis=1)\n", "accuracy = accuracy_score(y_new, y_pred_adv)\n", "print('Accuracy on adversarial example:', accuracy)" ] } ], "metadata": { "colab": { "machine_shape": "hm", "provenance": [] }, "gpuClass": "standard", "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" } }, "nbformat": 4, "nbformat_minor": 1 }