Spaces:
Sleeping
Sleeping
Delete deepfake_audio_detection.ipynb
Browse files- deepfake_audio_detection.ipynb +0 -1624
deepfake_audio_detection.ipynb
DELETED
|
@@ -1,1624 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"metadata": {},
|
| 6 |
-
"source": [
|
| 7 |
-
"# ποΈ Deepfake Audio Detection System\n",
|
| 8 |
-
"\n",
|
| 9 |
-
"**Pipeline Overview:**\n",
|
| 10 |
-
"```\n",
|
| 11 |
-
"Audio β Noise Removal β Feature Extraction (Log-Mel + TEO)\n",
|
| 12 |
-
" β ECAPA-TDNN Embeddings (192-dim) β XGBoost β REAL / FAKE\n",
|
| 13 |
-
"```\n",
|
| 14 |
-
"\n",
|
| 15 |
-
"**Architecture Highlights:**\n",
|
| 16 |
-
"- Spectral gating denoising\n",
|
| 17 |
-
"- 40-band log-mel spectrogram + Teager Energy Operator\n",
|
| 18 |
-
"- Simplified ECAPA-TDNN for speaker/spoof-aware embeddings\n",
|
| 19 |
-
"- XGBoost classifier on top of embeddings\n",
|
| 20 |
-
"\n",
|
| 21 |
-
"**Dataset:** Synthetic balanced dataset (real vs fake WAV files) \n",
|
| 22 |
-
"Compatible with ASVspoof / WaveFake / FakeAVCeleb folder structure.\n",
|
| 23 |
-
"\n",
|
| 24 |
-
"---"
|
| 25 |
-
]
|
| 26 |
-
},
|
| 27 |
-
{
|
| 28 |
-
"cell_type": "markdown",
|
| 29 |
-
"metadata": {},
|
| 30 |
-
"source": [
|
| 31 |
-
"## π¦ Cell 1 β Install Dependencies"
|
| 32 |
-
]
|
| 33 |
-
},
|
| 34 |
-
{
|
| 35 |
-
"cell_type": "code",
|
| 36 |
-
"execution_count": null,
|
| 37 |
-
"metadata": {},
|
| 38 |
-
"outputs": [],
|
| 39 |
-
"source": [
|
| 40 |
-
"# ββ Cell 1: Install Dependencies (Google Colab) ββββββββββββββββββββββββββββββ\n",
|
| 41 |
-
"# Colab pre-installs torch, numpy, etc. β we only upgrade what needs changing.\n",
|
| 42 |
-
"# Do NOT restart runtime manually; the code handles it automatically.\n",
|
| 43 |
-
"\n",
|
| 44 |
-
"import subprocess, sys, importlib, os\n",
|
| 45 |
-
"\n",
|
| 46 |
-
"def get_version(pkg):\n",
|
| 47 |
-
" try:\n",
|
| 48 |
-
" return importlib.metadata.version(pkg)\n",
|
| 49 |
-
" except:\n",
|
| 50 |
-
" return None\n",
|
| 51 |
-
"\n",
|
| 52 |
-
"# ββ Packages to install βββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 53 |
-
"# Colab already has torch ~2.3+, numpy ~1.26+, pandas, sklearn, matplotlib.\n",
|
| 54 |
-
"# We only pin the ones Colab doesn't ship or ships at wrong versions.\n",
|
| 55 |
-
"PACKAGES = [\n",
|
| 56 |
-
" \"librosa==0.10.1\",\n",
|
| 57 |
-
" \"soundfile>=0.12.1\",\n",
|
| 58 |
-
" \"xgboost==2.0.3\",\n",
|
| 59 |
-
" \"tqdm==4.66.1\",\n",
|
| 60 |
-
" \"seaborn>=0.12.0\",\n",
|
| 61 |
-
" # torch and torchaudio are pre-installed on Colab β skip to save time\n",
|
| 62 |
-
" # numpy, pandas, sklearn, matplotlib are also pre-installed\n",
|
| 63 |
-
"]\n",
|
| 64 |
-
"\n",
|
| 65 |
-
"print(\"π¦ Installing packages for Google Colab...\\n\")\n",
|
| 66 |
-
"\n",
|
| 67 |
-
"try:\n",
|
| 68 |
-
" result = subprocess.run(\n",
|
| 69 |
-
" [sys.executable, \"-m\", \"pip\", \"install\", \"--quiet\"] + PACKAGES,\n",
|
| 70 |
-
" check=True,\n",
|
| 71 |
-
" capture_output=True,\n",
|
| 72 |
-
" text=True,\n",
|
| 73 |
-
" )\n",
|
| 74 |
-
" print(result.stdout or \"\")\n",
|
| 75 |
-
" if result.stderr:\n",
|
| 76 |
-
" print(\"[pip warnings]:\", result.stderr[:500])\n",
|
| 77 |
-
" print(\"β
Installation complete.\\n\")\n",
|
| 78 |
-
"\n",
|
| 79 |
-
"except subprocess.CalledProcessError as e:\n",
|
| 80 |
-
" print(f\"β pip failed (exit code {e.returncode})\")\n",
|
| 81 |
-
" print(\"STDOUT:\", e.stdout[-2000:])\n",
|
| 82 |
-
" print(\"STDERR:\", e.stderr[-2000:])\n",
|
| 83 |
-
" raise\n",
|
| 84 |
-
"\n",
|
| 85 |
-
"# ββ Version report ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 86 |
-
"import torch, torchaudio, librosa, numpy, pandas, sklearn, xgboost, tqdm\n",
|
| 87 |
-
"\n",
|
| 88 |
-
"print(\"π₯οΈ Environment report:\")\n",
|
| 89 |
-
"print(f\" Python : {sys.version.split()[0]}\")\n",
|
| 90 |
-
"print(f\" torch : {torch.__version__}\")\n",
|
| 91 |
-
"print(f\" torchaudio : {torchaudio.__version__}\")\n",
|
| 92 |
-
"print(f\" librosa : {librosa.__version__}\")\n",
|
| 93 |
-
"print(f\" numpy : {numpy.__version__}\")\n",
|
| 94 |
-
"print(f\" pandas : {pandas.__version__}\")\n",
|
| 95 |
-
"print(f\" sklearn : {sklearn.__version__}\")\n",
|
| 96 |
-
"print(f\" xgboost : {xgboost.__version__}\")\n",
|
| 97 |
-
"print(f\" tqdm : {tqdm.__version__}\")\n",
|
| 98 |
-
"print(f\"\\nπ₯οΈ GPU available : {torch.cuda.is_available()}\")\n",
|
| 99 |
-
"if torch.cuda.is_available():\n",
|
| 100 |
-
" print(f\" GPU name : {torch.cuda.get_device_name(0)}\")"
|
| 101 |
-
]
|
| 102 |
-
},
|
| 103 |
-
{
|
| 104 |
-
"cell_type": "markdown",
|
| 105 |
-
"metadata": {},
|
| 106 |
-
"source": [
|
| 107 |
-
"## π Cell 2 β All Imports (Single Setup Cell)"
|
| 108 |
-
]
|
| 109 |
-
},
|
| 110 |
-
{
|
| 111 |
-
"cell_type": "code",
|
| 112 |
-
"execution_count": null,
|
| 113 |
-
"id": "256a6f57",
|
| 114 |
-
"metadata": {},
|
| 115 |
-
"outputs": [],
|
| 116 |
-
"source": [
|
| 117 |
-
"# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 118 |
-
"# Cell 2+3 β All Imports + Global Configuration (Google Colab)\n",
|
| 119 |
-
"# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 120 |
-
"\n",
|
| 121 |
-
"# ββ Standard library ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 122 |
-
"import os\n",
|
| 123 |
-
"import random\n",
|
| 124 |
-
"import warnings\n",
|
| 125 |
-
"import time\n",
|
| 126 |
-
"from pathlib import Path\n",
|
| 127 |
-
"from typing import Tuple, List, Dict, Optional\n",
|
| 128 |
-
"\n",
|
| 129 |
-
"# ββ Numerical & data ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 130 |
-
"import numpy as np\n",
|
| 131 |
-
"import pandas as pd\n",
|
| 132 |
-
"\n",
|
| 133 |
-
"# ββ Audio processing ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 134 |
-
"import librosa\n",
|
| 135 |
-
"import librosa.display\n",
|
| 136 |
-
"import soundfile as sf\n",
|
| 137 |
-
"\n",
|
| 138 |
-
"# ββ Deep learning βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 139 |
-
"import torch\n",
|
| 140 |
-
"import torch.nn as nn\n",
|
| 141 |
-
"import torch.nn.functional as F\n",
|
| 142 |
-
"from torch.utils.data import Dataset, DataLoader\n",
|
| 143 |
-
"import torchaudio\n",
|
| 144 |
-
"\n",
|
| 145 |
-
"# ββ Machine learning ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 146 |
-
"from sklearn.model_selection import train_test_split\n",
|
| 147 |
-
"from sklearn.preprocessing import StandardScaler\n",
|
| 148 |
-
"from sklearn.metrics import (\n",
|
| 149 |
-
" accuracy_score, f1_score, roc_auc_score,\n",
|
| 150 |
-
" confusion_matrix, roc_curve, ConfusionMatrixDisplay\n",
|
| 151 |
-
")\n",
|
| 152 |
-
"import xgboost as xgb\n",
|
| 153 |
-
"\n",
|
| 154 |
-
"# ββ Visualization βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 155 |
-
"import matplotlib.pyplot as plt\n",
|
| 156 |
-
"import matplotlib.gridspec as gridspec\n",
|
| 157 |
-
"import seaborn as sns\n",
|
| 158 |
-
"\n",
|
| 159 |
-
"# ββ Progress bar ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 160 |
-
"from tqdm import tqdm\n",
|
| 161 |
-
"\n",
|
| 162 |
-
"# ββ Suppress non-critical warnings ββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 163 |
-
"warnings.filterwarnings(\"ignore\")\n",
|
| 164 |
-
"\n",
|
| 165 |
-
"# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 166 |
-
"# Reproducibility β MUST come before anything that uses SEED\n",
|
| 167 |
-
"# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 168 |
-
"SEED = 42\n",
|
| 169 |
-
"random.seed(SEED)\n",
|
| 170 |
-
"np.random.seed(SEED)\n",
|
| 171 |
-
"torch.manual_seed(SEED)\n",
|
| 172 |
-
"if torch.cuda.is_available():\n",
|
| 173 |
-
" torch.cuda.manual_seed_all(SEED)\n",
|
| 174 |
-
"\n",
|
| 175 |
-
"# ββ Device β MUST come before XGB_PARAMS which references torch βββββββββββββ\n",
|
| 176 |
-
"DEVICE = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
|
| 177 |
-
"\n",
|
| 178 |
-
"# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 179 |
-
"# Audio signal parameters\n",
|
| 180 |
-
"# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 181 |
-
"SAMPLE_RATE = 16000\n",
|
| 182 |
-
"DURATION = 3.0\n",
|
| 183 |
-
"N_SAMPLES = int(SAMPLE_RATE * DURATION) # 48 000\n",
|
| 184 |
-
"\n",
|
| 185 |
-
"# ββ Log-mel parameters ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 186 |
-
"N_MELS = 40\n",
|
| 187 |
-
"N_FFT = int(0.025 * SAMPLE_RATE) # 400 (25 ms window)\n",
|
| 188 |
-
"HOP_LENGTH = int(0.010 * SAMPLE_RATE) # 160 (10 ms hop)\n",
|
| 189 |
-
"FMIN = 20\n",
|
| 190 |
-
"FMAX = 8000\n",
|
| 191 |
-
"\n",
|
| 192 |
-
"# ββ ECAPA-TDNN parameters βββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 193 |
-
"EMBEDDING_DIM = 192\n",
|
| 194 |
-
"CHANNELS = 512\n",
|
| 195 |
-
"ECAPA_EPOCHS = 15\n",
|
| 196 |
-
"ECAPA_BATCH = 32\n",
|
| 197 |
-
"ECAPA_LR = 1e-3\n",
|
| 198 |
-
"\n",
|
| 199 |
-
"# ββ Dataset parameters ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 200 |
-
"MAX_SAMPLES = 1000 # per class β 2 000 total\n",
|
| 201 |
-
"DATASET_ROOT = Path(\"dataset\")\n",
|
| 202 |
-
"\n",
|
| 203 |
-
"# ββ XGBoost parameters β SEED and DEVICE are now defined above βββββββββββββββ\n",
|
| 204 |
-
"XGB_PARAMS = dict(\n",
|
| 205 |
-
" objective = \"binary:logistic\",\n",
|
| 206 |
-
" max_depth = 6,\n",
|
| 207 |
-
" learning_rate = 0.1,\n",
|
| 208 |
-
" n_estimators = 200,\n",
|
| 209 |
-
" subsample = 0.8,\n",
|
| 210 |
-
" colsample_bytree = 0.8,\n",
|
| 211 |
-
" eval_metric = \"logloss\",\n",
|
| 212 |
-
" random_state = SEED, # β
defined 20 lines above\n",
|
| 213 |
-
" n_jobs = -1,\n",
|
| 214 |
-
" device = \"cuda\" if torch.cuda.is_available() else \"cpu\", # β
torch imported\n",
|
| 215 |
-
")\n",
|
| 216 |
-
"\n",
|
| 217 |
-
"# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 218 |
-
"# Environment report\n",
|
| 219 |
-
"# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 220 |
-
"print(\"β
Imports + config complete.\")\n",
|
| 221 |
-
"print(f\"π₯οΈ Device : {DEVICE}\")\n",
|
| 222 |
-
"print(f\"π’ PyTorch : {torch.__version__}\")\n",
|
| 223 |
-
"print(f\"π’ Torchaudio : {torchaudio.__version__}\")\n",
|
| 224 |
-
"print(f\"π’ Librosa : {librosa.__version__}\")\n",
|
| 225 |
-
"print(f\"π’ XGBoost : {xgb.__version__}\")\n",
|
| 226 |
-
"print(f\"π’ NumPy : {np.__version__}\")\n",
|
| 227 |
-
"print(f\"π’ Pandas : {pd.__version__}\")\n",
|
| 228 |
-
"print(f\"\\nβοΈ Sample rate : {SAMPLE_RATE} Hz\")\n",
|
| 229 |
-
"print(f\"βοΈ Clip duration : {DURATION} s ({N_SAMPLES} samples)\")\n",
|
| 230 |
-
"print(f\"βοΈ Mel bands : {N_MELS}\")\n",
|
| 231 |
-
"print(f\"βοΈ Embedding dim : {EMBEDDING_DIM}\")\n",
|
| 232 |
-
"print(f\"βοΈ Max per class : {MAX_SAMPLES}\")"
|
| 233 |
-
]
|
| 234 |
-
},
|
| 235 |
-
{
|
| 236 |
-
"cell_type": "markdown",
|
| 237 |
-
"id": "d8c67257",
|
| 238 |
-
"metadata": {},
|
| 239 |
-
"source": [
|
| 240 |
-
"## βοΈ Cell 3 β Global Configuration"
|
| 241 |
-
]
|
| 242 |
-
},
|
| 243 |
-
{
|
| 244 |
-
"cell_type": "code",
|
| 245 |
-
"execution_count": null,
|
| 246 |
-
"id": "b518441d",
|
| 247 |
-
"metadata": {},
|
| 248 |
-
"outputs": [],
|
| 249 |
-
"source": [
|
| 250 |
-
"# βββ Audio signal parameters ββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 251 |
-
"SAMPLE_RATE = 16000 # Target sample rate in Hz\n",
|
| 252 |
-
"DURATION = 3.0 # Fixed clip duration in seconds\n",
|
| 253 |
-
"N_SAMPLES = int(SAMPLE_RATE * DURATION) # 48 000 samples per clip\n",
|
| 254 |
-
"\n",
|
| 255 |
-
"# βββ Log-mel spectrogram parameters βββββββββββββββββββββββββββββββββββββββ\n",
|
| 256 |
-
"N_MELS = 40 # Number of mel filterbanks\n",
|
| 257 |
-
"N_FFT = int(0.025 * SAMPLE_RATE) # 25 ms window β 400 samples\n",
|
| 258 |
-
"HOP_LENGTH = int(0.010 * SAMPLE_RATE) # 10 ms hop β 160 samples\n",
|
| 259 |
-
"FMIN = 20 # Min frequency for mel filters\n",
|
| 260 |
-
"FMAX = 8000 # Max frequency for mel filters\n",
|
| 261 |
-
"\n",
|
| 262 |
-
"# βββ ECAPA-TDNN model parameters ββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 263 |
-
"EMBEDDING_DIM = 192 # Output embedding size\n",
|
| 264 |
-
"CHANNELS = 512 # Internal channel width\n",
|
| 265 |
-
"ECAPA_EPOCHS = 15 # Training epochs for the neural model\n",
|
| 266 |
-
"ECAPA_BATCH = 32 # Batch size\n",
|
| 267 |
-
"ECAPA_LR = 1e-3 # Learning rate\n",
|
| 268 |
-
"\n",
|
| 269 |
-
"# βββ Dataset parameters βββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 270 |
-
"MAX_SAMPLES = 1000 # Samples PER CLASS (1000 real + 1000 fake = 2000 total)\n",
|
| 271 |
-
"DATASET_ROOT = Path(\"dataset\") # Root folder containing real/ and fake/\n",
|
| 272 |
-
"\n",
|
| 273 |
-
"# βββ XGBoost parameters βββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 274 |
-
"XGB_PARAMS = dict(\n",
|
| 275 |
-
" objective = \"binary:logistic\",\n",
|
| 276 |
-
" max_depth = 6,\n",
|
| 277 |
-
" learning_rate = 0.1,\n",
|
| 278 |
-
" n_estimators = 200,\n",
|
| 279 |
-
" subsample = 0.8,\n",
|
| 280 |
-
" colsample_bytree= 0.8,\n",
|
| 281 |
-
" use_label_encoder = False,\n",
|
| 282 |
-
" eval_metric = \"logloss\",\n",
|
| 283 |
-
" random_state = SEED,\n",
|
| 284 |
-
" n_jobs = -1,\n",
|
| 285 |
-
")\n",
|
| 286 |
-
"\n",
|
| 287 |
-
"print(\"β
Configuration loaded.\")\n",
|
| 288 |
-
"print(f\" Sample rate : {SAMPLE_RATE} Hz\")\n",
|
| 289 |
-
"print(f\" Clip duration : {DURATION} s ({N_SAMPLES} samples)\")\n",
|
| 290 |
-
"print(f\" Mel bands : {N_MELS}\")\n",
|
| 291 |
-
"print(f\" Embedding dim : {EMBEDDING_DIM}\")\n",
|
| 292 |
-
"print(f\" Max per class : {MAX_SAMPLES}\")"
|
| 293 |
-
]
|
| 294 |
-
},
|
| 295 |
-
{
|
| 296 |
-
"cell_type": "markdown",
|
| 297 |
-
"id": "f1cd5010",
|
| 298 |
-
"metadata": {},
|
| 299 |
-
"source": [
|
| 300 |
-
"## ποΈ Cell 4 β Download ASVspoof 2019 LA Dataset\n",
|
| 301 |
-
"\n",
|
| 302 |
-
"> **ASVspoof 2019 LA** is the official benchmark for logical-access spoofed/deepfake speech detection. \n",
|
| 303 |
-
"> It contains **bonafide** (real human speech) and **spoof** (TTS / voice-conversion generated) utterances. \n",
|
| 304 |
-
"> We download the training partition, parse the official protocol file, and copy files into `dataset/real/` and `dataset/fake/`."
|
| 305 |
-
]
|
| 306 |
-
},
|
| 307 |
-
{
|
| 308 |
-
"cell_type": "code",
|
| 309 |
-
"execution_count": null,
|
| 310 |
-
"id": "ae82ace4",
|
| 311 |
-
"metadata": {},
|
| 312 |
-
"outputs": [],
|
| 313 |
-
"source": [
|
| 314 |
-
"# ββ CELL 4: Download ASVspoof 2019 LA subset ββββββββββββββββββββββββββββββββ\n",
|
| 315 |
-
"# Official benchmark for spoofed/deepfake speech detection\n",
|
| 316 |
-
"# Free, no login needed via Zenodo\n",
|
| 317 |
-
"\n",
|
| 318 |
-
"!pip install -q zenodo_get\n",
|
| 319 |
-
"\n",
|
| 320 |
-
"import zipfile, shutil\n",
|
| 321 |
-
"from pathlib import Path\n",
|
| 322 |
-
"\n",
|
| 323 |
-
"# ββ Download LA (Logical Access) partition βββββββββββββββββββββββββββββββββ\n",
|
| 324 |
-
"# Contains TTS/VC deepfakes + bonafide speech\n",
|
| 325 |
-
"RAW_DIR = Path(\"asvspoof_raw\")\n",
|
| 326 |
-
"if not RAW_DIR.exists():\n",
|
| 327 |
-
" print(\"π₯ Downloading ASVspoof 2019 LA from Zenodo (this may take a few minutes)...\")\n",
|
| 328 |
-
" !zenodo_get 10.5281/zenodo.10509676 -o {RAW_DIR}\n",
|
| 329 |
-
"else:\n",
|
| 330 |
-
" print(f\"β
Raw data directory '{RAW_DIR}' already exists, skipping download.\")\n",
|
| 331 |
-
"\n",
|
| 332 |
-
"# ββ Extract the ZIP ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 333 |
-
"zip_path = RAW_DIR / \"LA.zip\"\n",
|
| 334 |
-
"extracted_marker = RAW_DIR / \"LA\"\n",
|
| 335 |
-
"\n",
|
| 336 |
-
"if zip_path.exists() and not extracted_marker.exists():\n",
|
| 337 |
-
" print(\"π¦ Extracting LA.zip...\")\n",
|
| 338 |
-
" with zipfile.ZipFile(str(zip_path), \"r\") as z:\n",
|
| 339 |
-
" z.extractall(str(RAW_DIR))\n",
|
| 340 |
-
" print(\"β
Extraction complete.\")\n",
|
| 341 |
-
"elif extracted_marker.exists():\n",
|
| 342 |
-
" print(\"β
Already extracted.\")\n",
|
| 343 |
-
"else:\n",
|
| 344 |
-
" print(\"β οΈ LA.zip not found β check the download step above.\")\n",
|
| 345 |
-
"\n",
|
| 346 |
-
"# ββ Create dataset/real and dataset/fake from official labels ββββββββββββββ\n",
|
| 347 |
-
"Path(\"dataset/real\").mkdir(parents=True, exist_ok=True)\n",
|
| 348 |
-
"Path(\"dataset/fake\").mkdir(parents=True, exist_ok=True)\n",
|
| 349 |
-
"\n",
|
| 350 |
-
"# Format of each protocol line:\n",
|
| 351 |
-
"# SPEAKER_ID FILENAME ENV ATTACK_TYPE LABEL\n",
|
| 352 |
-
"# LABEL is either \"bonafide\" (real) or \"spoof\" (fake)\n",
|
| 353 |
-
"label_file = RAW_DIR / \"LA\" / \"ASVspoof2019_LA_cm_protocols\" / \"ASVspoof2019.LA.cm.train.trn.txt\"\n",
|
| 354 |
-
"audio_dir = RAW_DIR / \"LA\" / \"ASVspoof2019_LA_train\" / \"flac\"\n",
|
| 355 |
-
"\n",
|
| 356 |
-
"if not label_file.exists():\n",
|
| 357 |
-
" raise FileNotFoundError(\n",
|
| 358 |
-
" f\"Protocol file not found at {label_file}. \"\n",
|
| 359 |
-
" f\"Check that the Zenodo download and extraction succeeded.\"\n",
|
| 360 |
-
" )\n",
|
| 361 |
-
"\n",
|
| 362 |
-
"real_count = 0\n",
|
| 363 |
-
"fake_count = 0\n",
|
| 364 |
-
"MAX_PER_CLASS = 1000 # cap at 1000 each for Colab speed\n",
|
| 365 |
-
"\n",
|
| 366 |
-
"# Only copy if dataset dirs are empty (skip if already done)\n",
|
| 367 |
-
"existing_real = len(list(Path(\"dataset/real\").glob(\"*.flac\")))\n",
|
| 368 |
-
"existing_fake = len(list(Path(\"dataset/fake\").glob(\"*.flac\")))\n",
|
| 369 |
-
"\n",
|
| 370 |
-
"if existing_real >= MAX_PER_CLASS and existing_fake >= MAX_PER_CLASS:\n",
|
| 371 |
-
" real_count = existing_real\n",
|
| 372 |
-
" fake_count = existing_fake\n",
|
| 373 |
-
" print(f\"β
Dataset already prepared ({existing_real} real, {existing_fake} fake). Skipping copy.\")\n",
|
| 374 |
-
"else:\n",
|
| 375 |
-
" print(\"π Copying audio files into dataset/real/ and dataset/fake/...\")\n",
|
| 376 |
-
" with open(label_file) as f:\n",
|
| 377 |
-
" for line in f:\n",
|
| 378 |
-
" parts = line.strip().split()\n",
|
| 379 |
-
" utt_id = parts[1]\n",
|
| 380 |
-
" label = parts[4] # \"bonafide\" or \"spoof\"\n",
|
| 381 |
-
"\n",
|
| 382 |
-
" src = audio_dir / f\"{utt_id}.flac\"\n",
|
| 383 |
-
" if not src.exists():\n",
|
| 384 |
-
" continue\n",
|
| 385 |
-
"\n",
|
| 386 |
-
" if label == \"bonafide\" and real_count < MAX_PER_CLASS:\n",
|
| 387 |
-
" shutil.copy(str(src), f\"dataset/real/{utt_id}.flac\")\n",
|
| 388 |
-
" real_count += 1\n",
|
| 389 |
-
" elif label == \"spoof\" and fake_count < MAX_PER_CLASS:\n",
|
| 390 |
-
" shutil.copy(str(src), f\"dataset/fake/{utt_id}.flac\")\n",
|
| 391 |
-
" fake_count += 1\n",
|
| 392 |
-
"\n",
|
| 393 |
-
" if real_count >= MAX_PER_CLASS and fake_count >= MAX_PER_CLASS:\n",
|
| 394 |
-
" break\n",
|
| 395 |
-
"\n",
|
| 396 |
-
"print(f\"\\nβ
ASVspoof 2019 LA dataset ready.\")\n",
|
| 397 |
-
"print(f\" Real (bonafide) : {real_count}\")\n",
|
| 398 |
-
"print(f\" Fake (spoof) : {fake_count}\")\n",
|
| 399 |
-
"\n",
|
| 400 |
-
"\n",
|
| 401 |
-
"# ββ load_file_list β supports .wav AND .flac ββββββββββββββββββββββββββββββ\n",
|
| 402 |
-
"def load_file_list(\n",
|
| 403 |
-
" root: Path,\n",
|
| 404 |
-
" max_per_class: int = MAX_SAMPLES,\n",
|
| 405 |
-
") -> pd.DataFrame:\n",
|
| 406 |
-
" \"\"\"\n",
|
| 407 |
-
" Build a balanced DataFrame of audio file paths and labels.\n",
|
| 408 |
-
" Supports .wav, .flac, and .ogg files.\n",
|
| 409 |
-
"\n",
|
| 410 |
-
" Returns\n",
|
| 411 |
-
" -------\n",
|
| 412 |
-
" DataFrame with columns: [path, label] where label β {0=real, 1=fake}\n",
|
| 413 |
-
" \"\"\"\n",
|
| 414 |
-
" rows: List[Dict] = []\n",
|
| 415 |
-
"\n",
|
| 416 |
-
" for label_name, label_int in [(\"real\", 0), (\"fake\", 1)]:\n",
|
| 417 |
-
" folder = root / label_name\n",
|
| 418 |
-
" if not folder.exists():\n",
|
| 419 |
-
" raise FileNotFoundError(f\"Expected folder not found: {folder}\")\n",
|
| 420 |
-
"\n",
|
| 421 |
-
" # Collect all common audio formats\n",
|
| 422 |
-
" files = []\n",
|
| 423 |
-
" for ext in [\"*.wav\", \"*.flac\", \"*.ogg\"]:\n",
|
| 424 |
-
" files.extend(folder.glob(ext))\n",
|
| 425 |
-
" files = sorted(files)\n",
|
| 426 |
-
"\n",
|
| 427 |
-
" if len(files) == 0:\n",
|
| 428 |
-
" raise FileNotFoundError(\n",
|
| 429 |
-
" f\"No audio files (.wav/.flac/.ogg) found in {folder}\"\n",
|
| 430 |
-
" )\n",
|
| 431 |
-
"\n",
|
| 432 |
-
" # Shuffle to avoid ordering bias, then cap\n",
|
| 433 |
-
" random.shuffle(files)\n",
|
| 434 |
-
" files = files[:max_per_class]\n",
|
| 435 |
-
"\n",
|
| 436 |
-
" for fp in files:\n",
|
| 437 |
-
" rows.append({\"path\": str(fp), \"label\": label_int})\n",
|
| 438 |
-
"\n",
|
| 439 |
-
" df = pd.DataFrame(rows).sample(frac=1, random_state=SEED).reset_index(drop=True)\n",
|
| 440 |
-
" return df\n",
|
| 441 |
-
"\n",
|
| 442 |
-
"\n",
|
| 443 |
-
"# ββ Load the file list βββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 444 |
-
"df = load_file_list(DATASET_ROOT)\n",
|
| 445 |
-
"\n",
|
| 446 |
-
"print(f\"\\nπ Dataset summary:\")\n",
|
| 447 |
-
"print(df[\"label\"].value_counts().rename({0: \"real\", 1: \"fake\"}).to_string())\n",
|
| 448 |
-
"print(f\" Total files : {len(df)}\")\n",
|
| 449 |
-
"df.head()"
|
| 450 |
-
]
|
| 451 |
-
},
|
| 452 |
-
{
|
| 453 |
-
"cell_type": "markdown",
|
| 454 |
-
"metadata": {},
|
| 455 |
-
"source": [
|
| 456 |
-
"## π Cell 5 β Audio Preprocessing"
|
| 457 |
-
]
|
| 458 |
-
},
|
| 459 |
-
{
|
| 460 |
-
"cell_type": "code",
|
| 461 |
-
"execution_count": null,
|
| 462 |
-
"metadata": {},
|
| 463 |
-
"outputs": [],
|
| 464 |
-
"source": [
|
| 465 |
-
"def load_and_normalize(\n",
|
| 466 |
-
" path: str,\n",
|
| 467 |
-
" target_sr: int = SAMPLE_RATE,\n",
|
| 468 |
-
" target_len: int = N_SAMPLES,\n",
|
| 469 |
-
") -> np.ndarray:\n",
|
| 470 |
-
" \"\"\"\n",
|
| 471 |
-
" Load a WAV file, resample, pad/trim to a fixed length, and normalise.\n",
|
| 472 |
-
"\n",
|
| 473 |
-
" Parameters\n",
|
| 474 |
-
" ----------\n",
|
| 475 |
-
" path : path to WAV file\n",
|
| 476 |
-
" target_sr : desired sample rate (default 16 kHz)\n",
|
| 477 |
-
" target_len : desired number of samples (sr Γ duration)\n",
|
| 478 |
-
"\n",
|
| 479 |
-
" Returns\n",
|
| 480 |
-
" -------\n",
|
| 481 |
-
" y : float32 array of shape (target_len,), amplitude in [-1, 1]\n",
|
| 482 |
-
" \"\"\"\n",
|
| 483 |
-
" # librosa.load resamples and returns mono float32\n",
|
| 484 |
-
" y, _ = librosa.load(path, sr=target_sr, mono=True)\n",
|
| 485 |
-
"\n",
|
| 486 |
-
" # ββ Trim or zero-pad to exactly target_len samples ββββββββββββββββββββ\n",
|
| 487 |
-
" if len(y) >= target_len:\n",
|
| 488 |
-
" y = y[:target_len]\n",
|
| 489 |
-
" else:\n",
|
| 490 |
-
" pad = target_len - len(y)\n",
|
| 491 |
-
" y = np.pad(y, (0, pad), mode=\"constant\")\n",
|
| 492 |
-
"\n",
|
| 493 |
-
" # ββ Peak normalisation ββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 494 |
-
" peak = np.abs(y).max()\n",
|
| 495 |
-
" if peak > 1e-9:\n",
|
| 496 |
-
" y = y / peak\n",
|
| 497 |
-
"\n",
|
| 498 |
-
" return y.astype(np.float32)\n",
|
| 499 |
-
"\n",
|
| 500 |
-
"\n",
|
| 501 |
-
"def spectral_gate_denoise(\n",
|
| 502 |
-
" y: np.ndarray,\n",
|
| 503 |
-
" sr: int = SAMPLE_RATE,\n",
|
| 504 |
-
" noise_percentile: float = 15.0,\n",
|
| 505 |
-
" threshold_scale: float = 1.5,\n",
|
| 506 |
-
") -> np.ndarray:\n",
|
| 507 |
-
" \"\"\"\n",
|
| 508 |
-
" Simple spectral-gating denoiser.\n",
|
| 509 |
-
"\n",
|
| 510 |
-
" Algorithm\n",
|
| 511 |
-
" ---------\n",
|
| 512 |
-
" 1. Compute STFT of the signal.\n",
|
| 513 |
-
" 2. Estimate the noise floor from the lowest-magnitude frames\n",
|
| 514 |
-
" (using the bottom `noise_percentile`-th percentile of the\n",
|
| 515 |
-
" per-frequency mean magnitudes).\n",
|
| 516 |
-
" 3. Build a soft mask: bins above threshold_scale Γ noise_floor\n",
|
| 517 |
-
" are kept; bins below are attenuated.\n",
|
| 518 |
-
" 4. Apply the mask and reconstruct via inverse STFT.\n",
|
| 519 |
-
"\n",
|
| 520 |
-
" Parameters\n",
|
| 521 |
-
" ----------\n",
|
| 522 |
-
" y : input waveform (float32, mono)\n",
|
| 523 |
-
" sr : sample rate\n",
|
| 524 |
-
" noise_percentile : percentile used to estimate the noise floor\n",
|
| 525 |
-
" threshold_scale : multiplier on the noise floor threshold\n",
|
| 526 |
-
"\n",
|
| 527 |
-
" Returns\n",
|
| 528 |
-
" -------\n",
|
| 529 |
-
" Denoised waveform (float32), same length as input.\n",
|
| 530 |
-
" \"\"\"\n",
|
| 531 |
-
" n_fft = 512\n",
|
| 532 |
-
" hop = 128\n",
|
| 533 |
-
"\n",
|
| 534 |
-
" # Forward STFT: shape (n_fft//2+1, n_frames)\n",
|
| 535 |
-
" stft = librosa.stft(y, n_fft=n_fft, hop_length=hop)\n",
|
| 536 |
-
" magnitude, phase = np.abs(stft), np.angle(stft)\n",
|
| 537 |
-
"\n",
|
| 538 |
-
" # Estimate noise profile (per-frequency mean of lowest frames)\n",
|
| 539 |
-
" noise_profile = np.percentile(magnitude, noise_percentile, axis=1, keepdims=True)\n",
|
| 540 |
-
"\n",
|
| 541 |
-
" # Compute soft mask (sigmoid-like gate)\n",
|
| 542 |
-
" threshold = threshold_scale * noise_profile\n",
|
| 543 |
-
" mask = np.where(magnitude >= threshold, 1.0, magnitude / (threshold + 1e-9))\n",
|
| 544 |
-
"\n",
|
| 545 |
-
" # Apply mask and reconstruct\n",
|
| 546 |
-
" denoised_stft = mask * magnitude * np.exp(1j * phase)\n",
|
| 547 |
-
" y_denoised = librosa.istft(denoised_stft, hop_length=hop, length=len(y))\n",
|
| 548 |
-
"\n",
|
| 549 |
-
" return y_denoised.astype(np.float32)\n",
|
| 550 |
-
"\n",
|
| 551 |
-
"\n",
|
| 552 |
-
"def preprocess_audio(path: str) -> np.ndarray:\n",
|
| 553 |
-
" \"\"\"Full preprocessing pipeline: load β normalise β denoise.\"\"\"\n",
|
| 554 |
-
" y = load_and_normalize(path)\n",
|
| 555 |
-
" y = spectral_gate_denoise(y)\n",
|
| 556 |
-
" return y\n",
|
| 557 |
-
"\n",
|
| 558 |
-
"\n",
|
| 559 |
-
"# ββ Quick sanity check ββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 560 |
-
"sample_path = df[\"path\"].iloc[0]\n",
|
| 561 |
-
"sample_wave = preprocess_audio(sample_path)\n",
|
| 562 |
-
"\n",
|
| 563 |
-
"print(f\"β
Preprocessing OK.\")\n",
|
| 564 |
-
"print(f\" Waveform shape : {sample_wave.shape}\")\n",
|
| 565 |
-
"print(f\" Duration : {len(sample_wave) / SAMPLE_RATE:.2f} s\")\n",
|
| 566 |
-
"print(f\" Peak amplitude : {np.abs(sample_wave).max():.4f}\")\n",
|
| 567 |
-
"\n",
|
| 568 |
-
"# Plot preprocessed waveform\n",
|
| 569 |
-
"fig, ax = plt.subplots(figsize=(10, 2))\n",
|
| 570 |
-
"librosa.display.waveshow(sample_wave, sr=SAMPLE_RATE, ax=ax, color=\"steelblue\")\n",
|
| 571 |
-
"ax.set_title(f\"Preprocessed waveform β label={df['label'].iloc[0]} (0=real, 1=fake)\")\n",
|
| 572 |
-
"ax.set_xlabel(\"Time (s)\")\n",
|
| 573 |
-
"plt.tight_layout()\n",
|
| 574 |
-
"plt.show()"
|
| 575 |
-
]
|
| 576 |
-
},
|
| 577 |
-
{
|
| 578 |
-
"cell_type": "markdown",
|
| 579 |
-
"metadata": {},
|
| 580 |
-
"source": [
|
| 581 |
-
"## π¬ Cell 6 β Feature Extraction (Log-Mel + Teager Energy Operator)"
|
| 582 |
-
]
|
| 583 |
-
},
|
| 584 |
-
{
|
| 585 |
-
"cell_type": "code",
|
| 586 |
-
"execution_count": null,
|
| 587 |
-
"metadata": {},
|
| 588 |
-
"outputs": [],
|
| 589 |
-
"source": [
|
| 590 |
-
"def compute_log_mel(\n",
|
| 591 |
-
" y: np.ndarray,\n",
|
| 592 |
-
" sr: int = SAMPLE_RATE,\n",
|
| 593 |
-
" n_mels: int = N_MELS,\n",
|
| 594 |
-
" n_fft: int = N_FFT,\n",
|
| 595 |
-
" hop_length: int = HOP_LENGTH,\n",
|
| 596 |
-
" fmin: float = FMIN,\n",
|
| 597 |
-
" fmax: float = FMAX,\n",
|
| 598 |
-
") -> np.ndarray:\n",
|
| 599 |
-
" \"\"\"\n",
|
| 600 |
-
" Compute log-mel spectrogram.\n",
|
| 601 |
-
"\n",
|
| 602 |
-
" Returns\n",
|
| 603 |
-
" -------\n",
|
| 604 |
-
" log_mel : shape (n_mels, T) β float32\n",
|
| 605 |
-
" \"\"\"\n",
|
| 606 |
-
" mel_spec = librosa.feature.melspectrogram(\n",
|
| 607 |
-
" y = y,\n",
|
| 608 |
-
" sr = sr,\n",
|
| 609 |
-
" n_mels = n_mels,\n",
|
| 610 |
-
" n_fft = n_fft,\n",
|
| 611 |
-
" hop_length = hop_length,\n",
|
| 612 |
-
" fmin = fmin,\n",
|
| 613 |
-
" fmax = fmax,\n",
|
| 614 |
-
" ) # shape: (n_mels, T) β power spectrogram\n",
|
| 615 |
-
"\n",
|
| 616 |
-
" # Convert to log scale (decibels), clamp floor at -80 dB\n",
|
| 617 |
-
" log_mel = librosa.power_to_db(mel_spec, ref=np.max)\n",
|
| 618 |
-
" return log_mel.astype(np.float32)\n",
|
| 619 |
-
"\n",
|
| 620 |
-
"\n",
|
| 621 |
-
"def compute_teager_energy(\n",
|
| 622 |
-
" y: np.ndarray,\n",
|
| 623 |
-
" sr: int = SAMPLE_RATE,\n",
|
| 624 |
-
" hop_length: int = HOP_LENGTH,\n",
|
| 625 |
-
" n_fft: int = N_FFT,\n",
|
| 626 |
-
") -> np.ndarray:\n",
|
| 627 |
-
" \"\"\"\n",
|
| 628 |
-
" Compute frame-level Teager Energy Operator (TEO).\n",
|
| 629 |
-
"\n",
|
| 630 |
-
" The discrete TEO is defined as:\n",
|
| 631 |
-
" Ξ¨[x(n)] = x(n)^2 β x(nβ1) Β· x(n+1)\n",
|
| 632 |
-
"\n",
|
| 633 |
-
" This captures instantaneous energy and is sensitive to\n",
|
| 634 |
-
" unnatural modulation artefacts introduced by vocoders.\n",
|
| 635 |
-
"\n",
|
| 636 |
-
" Returns\n",
|
| 637 |
-
" -------\n",
|
| 638 |
-
" teo_frames : shape (1, T) β frame-level mean TEO β float32\n",
|
| 639 |
-
" \"\"\"\n",
|
| 640 |
-
" # Compute per-sample TEO (boundary samples use clipped indexing)\n",
|
| 641 |
-
" y_pad = np.pad(y, 1, mode=\"edge\") # length N+2\n",
|
| 642 |
-
" teo_raw = y_pad[1:-1]**2 - y_pad[:-2] * y_pad[2:] # length N\n",
|
| 643 |
-
" teo_raw = np.abs(teo_raw) # take absolute value\n",
|
| 644 |
-
"\n",
|
| 645 |
-
" # Frame the TEO signal to match the mel spectrogram time axis\n",
|
| 646 |
-
" # Using librosa.util.frame for consistent framing\n",
|
| 647 |
-
" frames = librosa.util.frame(\n",
|
| 648 |
-
" teo_raw,\n",
|
| 649 |
-
" frame_length = n_fft,\n",
|
| 650 |
-
" hop_length = hop_length,\n",
|
| 651 |
-
" ) # shape: (n_fft, T)\n",
|
| 652 |
-
"\n",
|
| 653 |
-
" # Collapse to a single row per frame: mean TEO energy\n",
|
| 654 |
-
" teo_frames = frames.mean(axis=0, keepdims=True) # shape: (1, T)\n",
|
| 655 |
-
" return np.log1p(teo_frames).astype(np.float32) # log-compress\n",
|
| 656 |
-
"\n",
|
| 657 |
-
"\n",
|
| 658 |
-
"def extract_features(y: np.ndarray) -> np.ndarray:\n",
|
| 659 |
-
" \"\"\"\n",
|
| 660 |
-
" Combined feature extraction: log-mel + TEO.\n",
|
| 661 |
-
"\n",
|
| 662 |
-
" Steps\n",
|
| 663 |
-
" -----\n",
|
| 664 |
-
" 1. Compute 40-band log-mel spectrogram β shape (40, T)\n",
|
| 665 |
-
" 2. Compute frame-level TEO β shape (1, T)\n",
|
| 666 |
-
" 3. Concatenate along feature axis β shape (41, T)\n",
|
| 667 |
-
" 4. Align T across both via min-trimming.\n",
|
| 668 |
-
"\n",
|
| 669 |
-
" Returns\n",
|
| 670 |
-
" -------\n",
|
| 671 |
-
" feature_matrix : np.ndarray, shape (41, T) β float32\n",
|
| 672 |
-
" \"\"\"\n",
|
| 673 |
-
" log_mel = compute_log_mel(y) # (40, T_mel)\n",
|
| 674 |
-
" teo = compute_teager_energy(y) # (1, T_teo)\n",
|
| 675 |
-
"\n",
|
| 676 |
-
" # Align time dimensions (may differ by 1-2 frames due to boundary effects)\n",
|
| 677 |
-
" T = min(log_mel.shape[1], teo.shape[1])\n",
|
| 678 |
-
" log_mel = log_mel[:, :T]\n",
|
| 679 |
-
" teo = teo[:, :T]\n",
|
| 680 |
-
"\n",
|
| 681 |
-
" return np.concatenate([log_mel, teo], axis=0) # (41, T)\n",
|
| 682 |
-
"\n",
|
| 683 |
-
"\n",
|
| 684 |
-
"# ββ Verify feature extraction on the sample ββββββββββββββββββββββββββββββββ\n",
|
| 685 |
-
"feat = extract_features(sample_wave)\n",
|
| 686 |
-
"print(f\"β
Feature matrix shape: {feat.shape} (features Γ time_frames)\")\n",
|
| 687 |
-
"\n",
|
| 688 |
-
"# Visualise features\n",
|
| 689 |
-
"fig, axes = plt.subplots(1, 2, figsize=(14, 4))\n",
|
| 690 |
-
"\n",
|
| 691 |
-
"# Log-mel panel\n",
|
| 692 |
-
"img = librosa.display.specshow(\n",
|
| 693 |
-
" feat[:40],\n",
|
| 694 |
-
" sr=SAMPLE_RATE,\n",
|
| 695 |
-
" hop_length=HOP_LENGTH,\n",
|
| 696 |
-
" x_axis=\"time\",\n",
|
| 697 |
-
" y_axis=\"mel\",\n",
|
| 698 |
-
" ax=axes[0],\n",
|
| 699 |
-
" cmap=\"magma\",\n",
|
| 700 |
-
")\n",
|
| 701 |
-
"axes[0].set_title(\"40-band Log-Mel Spectrogram\")\n",
|
| 702 |
-
"fig.colorbar(img, ax=axes[0], format=\"%+2.0f dB\")\n",
|
| 703 |
-
"\n",
|
| 704 |
-
"# TEO panel\n",
|
| 705 |
-
"axes[1].plot(feat[40], color=\"darkorange\", lw=0.8)\n",
|
| 706 |
-
"axes[1].set_title(\"Teager Energy Operator (frame-level)\")\n",
|
| 707 |
-
"axes[1].set_xlabel(\"Frame index\")\n",
|
| 708 |
-
"axes[1].set_ylabel(\"log(1 + TEO)\")\n",
|
| 709 |
-
"axes[1].grid(True, alpha=0.3)\n",
|
| 710 |
-
"\n",
|
| 711 |
-
"plt.tight_layout()\n",
|
| 712 |
-
"plt.show()"
|
| 713 |
-
]
|
| 714 |
-
},
|
| 715 |
-
{
|
| 716 |
-
"cell_type": "markdown",
|
| 717 |
-
"metadata": {},
|
| 718 |
-
"source": [
|
| 719 |
-
"## π§ Cell 7 β ECAPA-TDNN Architecture"
|
| 720 |
-
]
|
| 721 |
-
},
|
| 722 |
-
{
|
| 723 |
-
"cell_type": "code",
|
| 724 |
-
"execution_count": null,
|
| 725 |
-
"metadata": {},
|
| 726 |
-
"outputs": [],
|
| 727 |
-
"source": [
|
| 728 |
-
"class SEBlock(nn.Module):\n",
|
| 729 |
-
" \"\"\"\n",
|
| 730 |
-
" Squeeze-and-Excitation (SE) channel attention block.\n",
|
| 731 |
-
"\n",
|
| 732 |
-
" Adaptively re-weights each channel by learning global statistics.\n",
|
| 733 |
-
" Introduced in 'Squeeze-and-Excitation Networks' (Hu et al., 2018).\n",
|
| 734 |
-
" \"\"\"\n",
|
| 735 |
-
"\n",
|
| 736 |
-
" def __init__(self, channels: int, bottleneck: int = 128):\n",
|
| 737 |
-
" super().__init__()\n",
|
| 738 |
-
" self.squeeze = nn.AdaptiveAvgPool1d(1) # global average pool\n",
|
| 739 |
-
" self.excite = nn.Sequential(\n",
|
| 740 |
-
" nn.Linear(channels, bottleneck),\n",
|
| 741 |
-
" nn.ReLU(inplace=True),\n",
|
| 742 |
-
" nn.Linear(bottleneck, channels),\n",
|
| 743 |
-
" nn.Sigmoid(),\n",
|
| 744 |
-
" )\n",
|
| 745 |
-
"\n",
|
| 746 |
-
" def forward(self, x: torch.Tensor) -> torch.Tensor:\n",
|
| 747 |
-
" # x: (B, C, T)\n",
|
| 748 |
-
" s = self.squeeze(x).squeeze(-1) # (B, C)\n",
|
| 749 |
-
" e = self.excite(s).unsqueeze(-1) # (B, C, 1)\n",
|
| 750 |
-
" return x * e # channel-wise scaling\n",
|
| 751 |
-
"\n",
|
| 752 |
-
"\n",
|
| 753 |
-
"class TDNNBlock(nn.Module):\n",
|
| 754 |
-
" \"\"\"\n",
|
| 755 |
-
" Res2Net-style TDNN block with dilated 1-D convolution + SE attention.\n",
|
| 756 |
-
"\n",
|
| 757 |
-
" Each TDNN block:\n",
|
| 758 |
-
" 1. Projects input to the same channel width.\n",
|
| 759 |
-
" 2. Applies a dilated depthwise-style 1D conv (captures long-range context).\n",
|
| 760 |
-
" 3. Applies channel attention via SE block.\n",
|
| 761 |
-
" 4. Adds residual connection.\n",
|
| 762 |
-
" \"\"\"\n",
|
| 763 |
-
"\n",
|
| 764 |
-
" def __init__(\n",
|
| 765 |
-
" self,\n",
|
| 766 |
-
" in_channels: int,\n",
|
| 767 |
-
" out_channels: int,\n",
|
| 768 |
-
" kernel_size: int = 3,\n",
|
| 769 |
-
" dilation: int = 1,\n",
|
| 770 |
-
" ):\n",
|
| 771 |
-
" super().__init__()\n",
|
| 772 |
-
" self.conv = nn.Conv1d(\n",
|
| 773 |
-
" in_channels,\n",
|
| 774 |
-
" out_channels,\n",
|
| 775 |
-
" kernel_size = kernel_size,\n",
|
| 776 |
-
" dilation = dilation,\n",
|
| 777 |
-
" padding = (kernel_size - 1) * dilation // 2, # same padding\n",
|
| 778 |
-
" )\n",
|
| 779 |
-
" self.bn = nn.BatchNorm1d(out_channels)\n",
|
| 780 |
-
" self.act = nn.ReLU(inplace=True)\n",
|
| 781 |
-
" self.se = SEBlock(out_channels)\n",
|
| 782 |
-
"\n",
|
| 783 |
-
" # Residual projection if channel dims differ\n",
|
| 784 |
-
" self.res_proj = (\n",
|
| 785 |
-
" nn.Conv1d(in_channels, out_channels, kernel_size=1)\n",
|
| 786 |
-
" if in_channels != out_channels\n",
|
| 787 |
-
" else nn.Identity()\n",
|
| 788 |
-
" )\n",
|
| 789 |
-
"\n",
|
| 790 |
-
" def forward(self, x: torch.Tensor) -> torch.Tensor:\n",
|
| 791 |
-
" residual = self.res_proj(x)\n",
|
| 792 |
-
" out = self.act(self.bn(self.conv(x)))\n",
|
| 793 |
-
" out = self.se(out)\n",
|
| 794 |
-
" return out + residual\n",
|
| 795 |
-
"\n",
|
| 796 |
-
"\n",
|
| 797 |
-
"class AttentiveStatPooling(nn.Module):\n",
|
| 798 |
-
" \"\"\"\n",
|
| 799 |
-
" Attentive statistics pooling (temporal aggregation).\n",
|
| 800 |
-
"\n",
|
| 801 |
-
" Learns a soft alignment over time frames and computes\n",
|
| 802 |
-
" the weighted mean and standard deviation, producing a\n",
|
| 803 |
-
" fixed-length utterance-level representation.\n",
|
| 804 |
-
" \"\"\"\n",
|
| 805 |
-
"\n",
|
| 806 |
-
" def __init__(self, in_channels: int, attention_hidden: int = 128):\n",
|
| 807 |
-
" super().__init__()\n",
|
| 808 |
-
" self.attention = nn.Sequential(\n",
|
| 809 |
-
" nn.Conv1d(in_channels, attention_hidden, kernel_size=1),\n",
|
| 810 |
-
" nn.Tanh(),\n",
|
| 811 |
-
" nn.Conv1d(attention_hidden, in_channels, kernel_size=1),\n",
|
| 812 |
-
" nn.Softmax(dim=-1), # softmax over the time axis\n",
|
| 813 |
-
" )\n",
|
| 814 |
-
"\n",
|
| 815 |
-
" def forward(self, x: torch.Tensor) -> torch.Tensor:\n",
|
| 816 |
-
" # x: (B, C, T)\n",
|
| 817 |
-
" w = self.attention(x) # (B, C, T) β attention weights\n",
|
| 818 |
-
" mean = (w * x).sum(dim=-1) # (B, C) β weighted mean\n",
|
| 819 |
-
" var = (w * (x - mean.unsqueeze(-1))**2).sum(dim=-1) # (B, C)\n",
|
| 820 |
-
" std = torch.sqrt(var + 1e-8) # (B, C)\n",
|
| 821 |
-
" return torch.cat([mean, std], dim=1) # (B, 2C)\n",
|
| 822 |
-
"\n",
|
| 823 |
-
"\n",
|
| 824 |
-
"class ECAPATDNN(nn.Module):\n",
|
| 825 |
-
" \"\"\"\n",
|
| 826 |
-
" Simplified ECAPA-TDNN speaker/spoof embedding model.\n",
|
| 827 |
-
"\n",
|
| 828 |
-
" Input : feature matrix of shape (B, n_features, T)\n",
|
| 829 |
-
" where n_features = 41 (40 log-mel + 1 TEO)\n",
|
| 830 |
-
" Output : (B, 2) logits for binary classification\n",
|
| 831 |
-
" Embeddings can be extracted from the penultimate FC layer.\n",
|
| 832 |
-
"\n",
|
| 833 |
-
" Architecture\n",
|
| 834 |
-
" ------------\n",
|
| 835 |
-
" Input conv β TDNN Γ 3 (dilations 1, 2, 3)\n",
|
| 836 |
-
" β concatenation of multi-scale features\n",
|
| 837 |
-
" β 1Γ1 aggregation conv\n",
|
| 838 |
-
" β attentive statistics pooling\n",
|
| 839 |
-
" β FC β BN β ReLU (embedding layer, 192-dim)\n",
|
| 840 |
-
" β linear classifier (2 classes)\n",
|
| 841 |
-
" \"\"\"\n",
|
| 842 |
-
"\n",
|
| 843 |
-
" def __init__(\n",
|
| 844 |
-
" self,\n",
|
| 845 |
-
" in_channels: int = 41,\n",
|
| 846 |
-
" channels: int = CHANNELS,\n",
|
| 847 |
-
" emb_dim: int = EMBEDDING_DIM,\n",
|
| 848 |
-
" ):\n",
|
| 849 |
-
" super().__init__()\n",
|
| 850 |
-
"\n",
|
| 851 |
-
" # ββ Entry convolution βββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 852 |
-
" self.input_conv = nn.Sequential(\n",
|
| 853 |
-
" nn.Conv1d(in_channels, channels, kernel_size=5, padding=2),\n",
|
| 854 |
-
" nn.BatchNorm1d(channels),\n",
|
| 855 |
-
" nn.ReLU(inplace=True),\n",
|
| 856 |
-
" )\n",
|
| 857 |
-
"\n",
|
| 858 |
-
" # ββ Multi-scale TDNN blocks βββββββββββββββββββββββββββββββββββββ\n",
|
| 859 |
-
" # Three blocks with increasing dilation to model different\n",
|
| 860 |
-
" # temporal receptive fields simultaneously.\n",
|
| 861 |
-
" self.tdnn1 = TDNNBlock(channels, channels, kernel_size=3, dilation=1)\n",
|
| 862 |
-
" self.tdnn2 = TDNNBlock(channels, channels, kernel_size=3, dilation=2)\n",
|
| 863 |
-
" self.tdnn3 = TDNNBlock(channels, channels, kernel_size=3, dilation=3)\n",
|
| 864 |
-
"\n",
|
| 865 |
-
" # ββ Multi-scale aggregation βββββββββββββββββββββββββββββββββββββ\n",
|
| 866 |
-
" # Concatenate outputs from all three TDNN blocks β 3Γchannels,\n",
|
| 867 |
-
" # then compress back to `channels` with a 1Γ1 conv.\n",
|
| 868 |
-
" self.agg_conv = nn.Sequential(\n",
|
| 869 |
-
" nn.Conv1d(channels * 3, channels, kernel_size=1),\n",
|
| 870 |
-
" nn.BatchNorm1d(channels),\n",
|
| 871 |
-
" nn.ReLU(inplace=True),\n",
|
| 872 |
-
" )\n",
|
| 873 |
-
"\n",
|
| 874 |
-
" # ββ Temporal pooling ββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 875 |
-
" self.pool = AttentiveStatPooling(channels)\n",
|
| 876 |
-
" # After pooling: mean + std concatenated β 2 Γ channels\n",
|
| 877 |
-
"\n",
|
| 878 |
-
" # ββ Embedding FC ββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 879 |
-
" self.emb_fc = nn.Sequential(\n",
|
| 880 |
-
" nn.Linear(channels * 2, emb_dim),\n",
|
| 881 |
-
" nn.BatchNorm1d(emb_dim),\n",
|
| 882 |
-
" nn.ReLU(inplace=True),\n",
|
| 883 |
-
" )\n",
|
| 884 |
-
"\n",
|
| 885 |
-
" # ββ Binary classifier βββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 886 |
-
" self.classifier = nn.Linear(emb_dim, 2)\n",
|
| 887 |
-
"\n",
|
| 888 |
-
" self._init_weights()\n",
|
| 889 |
-
"\n",
|
| 890 |
-
" def _init_weights(self):\n",
|
| 891 |
-
" \"\"\"Xavier initialisation for all Conv1d and Linear layers.\"\"\"\n",
|
| 892 |
-
" for m in self.modules():\n",
|
| 893 |
-
" if isinstance(m, (nn.Conv1d, nn.Linear)):\n",
|
| 894 |
-
" nn.init.xavier_uniform_(m.weight)\n",
|
| 895 |
-
" if m.bias is not None:\n",
|
| 896 |
-
" nn.init.zeros_(m.bias)\n",
|
| 897 |
-
"\n",
|
| 898 |
-
" def embed(self, x: torch.Tensor) -> torch.Tensor:\n",
|
| 899 |
-
" \"\"\"\n",
|
| 900 |
-
" Extract 192-dim embedding (used post-training for XGBoost input).\n",
|
| 901 |
-
"\n",
|
| 902 |
-
" Parameters\n",
|
| 903 |
-
" ----------\n",
|
| 904 |
-
" x : (B, in_channels, T)\n",
|
| 905 |
-
"\n",
|
| 906 |
-
" Returns\n",
|
| 907 |
-
" -------\n",
|
| 908 |
-
" emb : (B, emb_dim)\n",
|
| 909 |
-
" \"\"\"\n",
|
| 910 |
-
" x = self.input_conv(x)\n",
|
| 911 |
-
" t1 = self.tdnn1(x)\n",
|
| 912 |
-
" t2 = self.tdnn2(x)\n",
|
| 913 |
-
" t3 = self.tdnn3(x)\n",
|
| 914 |
-
" x = self.agg_conv(torch.cat([t1, t2, t3], dim=1))\n",
|
| 915 |
-
" x = self.pool(x)\n",
|
| 916 |
-
" return self.emb_fc(x)\n",
|
| 917 |
-
"\n",
|
| 918 |
-
" def forward(self, x: torch.Tensor) -> torch.Tensor:\n",
|
| 919 |
-
" \"\"\"Full forward pass returning classification logits.\"\"\"\n",
|
| 920 |
-
" return self.classifier(self.embed(x))\n",
|
| 921 |
-
"\n",
|
| 922 |
-
"\n",
|
| 923 |
-
"# ββ Instantiate and profile the model ββββββββββββββββββββββββββββββββββββ\n",
|
| 924 |
-
"model = ECAPATDNN().to(DEVICE)\n",
|
| 925 |
-
"\n",
|
| 926 |
-
"# Count trainable parameters\n",
|
| 927 |
-
"n_params = sum(p.numel() for p in model.parameters() if p.requires_grad)\n",
|
| 928 |
-
"print(f\"β
ECAPA-TDNN instantiated.\")\n",
|
| 929 |
-
"print(f\" Trainable parameters : {n_params:,}\")\n",
|
| 930 |
-
"\n",
|
| 931 |
-
"# Sanity-check a forward pass\n",
|
| 932 |
-
"T_test = feat.shape[1]\n",
|
| 933 |
-
"dummy = torch.randn(4, 41, T_test).to(DEVICE)\n",
|
| 934 |
-
"logits = model(dummy)\n",
|
| 935 |
-
"emb = model.embed(dummy)\n",
|
| 936 |
-
"print(f\" Logit shape : {logits.shape} (expected [4, 2])\")\n",
|
| 937 |
-
"print(f\" Embedding shape : {emb.shape} (expected [4, {EMBEDDING_DIM}])\")"
|
| 938 |
-
]
|
| 939 |
-
},
|
| 940 |
-
{
|
| 941 |
-
"cell_type": "markdown",
|
| 942 |
-
"metadata": {},
|
| 943 |
-
"source": [
|
| 944 |
-
"## π¦ Cell 8 β PyTorch Dataset & DataLoader"
|
| 945 |
-
]
|
| 946 |
-
},
|
| 947 |
-
{
|
| 948 |
-
"cell_type": "code",
|
| 949 |
-
"execution_count": null,
|
| 950 |
-
"metadata": {},
|
| 951 |
-
"outputs": [],
|
| 952 |
-
"source": [
|
| 953 |
-
"class AudioDataset(Dataset):\n",
|
| 954 |
-
" \"\"\"\n",
|
| 955 |
-
" PyTorch Dataset for audio deepfake detection.\n",
|
| 956 |
-
"\n",
|
| 957 |
-
" Each __getitem__ call:\n",
|
| 958 |
-
" 1. Loads and preprocesses the WAV file (load β normalise β denoise).\n",
|
| 959 |
-
" 2. Extracts the feature matrix (log-mel + TEO).\n",
|
| 960 |
-
" 3. Returns (feature_tensor, label).\n",
|
| 961 |
-
"\n",
|
| 962 |
-
" Parameters\n",
|
| 963 |
-
" ----------\n",
|
| 964 |
-
" df : DataFrame with columns [path, label]\n",
|
| 965 |
-
" fixed_T : fixed number of time frames (pad/trim feature matrix)\n",
|
| 966 |
-
" \"\"\"\n",
|
| 967 |
-
"\n",
|
| 968 |
-
" def __init__(self, df: pd.DataFrame, fixed_T: Optional[int] = None):\n",
|
| 969 |
-
" self.paths = df[\"path\"].tolist()\n",
|
| 970 |
-
" self.labels = df[\"label\"].tolist()\n",
|
| 971 |
-
" self.fixed_T = fixed_T\n",
|
| 972 |
-
"\n",
|
| 973 |
-
" def __len__(self) -> int:\n",
|
| 974 |
-
" return len(self.paths)\n",
|
| 975 |
-
"\n",
|
| 976 |
-
" def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]:\n",
|
| 977 |
-
" y = preprocess_audio(self.paths[idx])\n",
|
| 978 |
-
" feat = extract_features(y) # (41, T)\n",
|
| 979 |
-
"\n",
|
| 980 |
-
" # Align time dimension across all samples in the batch\n",
|
| 981 |
-
" if self.fixed_T is not None:\n",
|
| 982 |
-
" T = feat.shape[1]\n",
|
| 983 |
-
" if T >= self.fixed_T:\n",
|
| 984 |
-
" feat = feat[:, :self.fixed_T]\n",
|
| 985 |
-
" else:\n",
|
| 986 |
-
" feat = np.pad(feat, ((0, 0), (0, self.fixed_T - T)), mode=\"constant\")\n",
|
| 987 |
-
"\n",
|
| 988 |
-
" x = torch.tensor(feat, dtype=torch.float32) # (41, T)\n",
|
| 989 |
-
" y = torch.tensor(self.labels[idx], dtype=torch.long) # scalar\n",
|
| 990 |
-
" return x, y\n",
|
| 991 |
-
"\n",
|
| 992 |
-
"\n",
|
| 993 |
-
"# ββ Determine fixed T from the first sample βββββββββββββββββββββββββββββ\n",
|
| 994 |
-
"sample_feat = extract_features(preprocess_audio(df[\"path\"].iloc[0]))\n",
|
| 995 |
-
"FIXED_T = sample_feat.shape[1]\n",
|
| 996 |
-
"print(f\"β
Fixed time frames per sample: {FIXED_T}\")\n",
|
| 997 |
-
"\n",
|
| 998 |
-
"# ββ Train / validation split (80 / 20) ββββββββββββββββββββββββββββββββββ\n",
|
| 999 |
-
"df_train, df_val = train_test_split(\n",
|
| 1000 |
-
" df,\n",
|
| 1001 |
-
" test_size = 0.20,\n",
|
| 1002 |
-
" stratify = df[\"label\"],\n",
|
| 1003 |
-
" random_state = SEED,\n",
|
| 1004 |
-
")\n",
|
| 1005 |
-
"\n",
|
| 1006 |
-
"print(f\" Train samples : {len(df_train)}\")\n",
|
| 1007 |
-
"print(f\" Val samples : {len(df_val)}\")\n",
|
| 1008 |
-
"\n",
|
| 1009 |
-
"# ββ Build datasets and loaders ββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 1010 |
-
"train_ds = AudioDataset(df_train, fixed_T=FIXED_T)\n",
|
| 1011 |
-
"val_ds = AudioDataset(df_val, fixed_T=FIXED_T)\n",
|
| 1012 |
-
"\n",
|
| 1013 |
-
"train_loader = DataLoader(\n",
|
| 1014 |
-
" train_ds,\n",
|
| 1015 |
-
" batch_size = ECAPA_BATCH,\n",
|
| 1016 |
-
" shuffle = True,\n",
|
| 1017 |
-
" num_workers = 0, # 0 avoids multiprocessing issues in Kaggle notebooks\n",
|
| 1018 |
-
" pin_memory = DEVICE.type == \"cuda\",\n",
|
| 1019 |
-
")\n",
|
| 1020 |
-
"val_loader = DataLoader(\n",
|
| 1021 |
-
" val_ds,\n",
|
| 1022 |
-
" batch_size = ECAPA_BATCH,\n",
|
| 1023 |
-
" shuffle = False,\n",
|
| 1024 |
-
" num_workers = 0,\n",
|
| 1025 |
-
" pin_memory = DEVICE.type == \"cuda\",\n",
|
| 1026 |
-
")\n",
|
| 1027 |
-
"\n",
|
| 1028 |
-
"print(f\"\\n Train batches : {len(train_loader)}\")\n",
|
| 1029 |
-
"print(f\" Val batches : {len(val_loader)}\")"
|
| 1030 |
-
]
|
| 1031 |
-
},
|
| 1032 |
-
{
|
| 1033 |
-
"cell_type": "markdown",
|
| 1034 |
-
"metadata": {},
|
| 1035 |
-
"source": [
|
| 1036 |
-
"## ποΈ Cell 9 β Train ECAPA-TDNN"
|
| 1037 |
-
]
|
| 1038 |
-
},
|
| 1039 |
-
{
|
| 1040 |
-
"cell_type": "code",
|
| 1041 |
-
"execution_count": null,
|
| 1042 |
-
"metadata": {},
|
| 1043 |
-
"outputs": [],
|
| 1044 |
-
"source": [
|
| 1045 |
-
"def train_one_epoch(\n",
|
| 1046 |
-
" model: nn.Module,\n",
|
| 1047 |
-
" loader: DataLoader,\n",
|
| 1048 |
-
" optimizer: torch.optim.Optimizer,\n",
|
| 1049 |
-
" criterion: nn.Module,\n",
|
| 1050 |
-
") -> float:\n",
|
| 1051 |
-
" \"\"\"\n",
|
| 1052 |
-
" Run one training epoch.\n",
|
| 1053 |
-
"\n",
|
| 1054 |
-
" Returns\n",
|
| 1055 |
-
" -------\n",
|
| 1056 |
-
" avg_loss : mean cross-entropy loss over all batches\n",
|
| 1057 |
-
" \"\"\"\n",
|
| 1058 |
-
" model.train()\n",
|
| 1059 |
-
" total_loss = 0.0\n",
|
| 1060 |
-
"\n",
|
| 1061 |
-
" for x, y in loader:\n",
|
| 1062 |
-
" x, y = x.to(DEVICE), y.to(DEVICE)\n",
|
| 1063 |
-
"\n",
|
| 1064 |
-
" optimizer.zero_grad()\n",
|
| 1065 |
-
" logits = model(x) # (B, 2)\n",
|
| 1066 |
-
" loss = criterion(logits, y)\n",
|
| 1067 |
-
" loss.backward()\n",
|
| 1068 |
-
" optimizer.step()\n",
|
| 1069 |
-
"\n",
|
| 1070 |
-
" total_loss += loss.item() * len(y)\n",
|
| 1071 |
-
"\n",
|
| 1072 |
-
" return total_loss / len(loader.dataset)\n",
|
| 1073 |
-
"\n",
|
| 1074 |
-
"\n",
|
| 1075 |
-
"@torch.no_grad()\n",
|
| 1076 |
-
"def evaluate(\n",
|
| 1077 |
-
" model: nn.Module,\n",
|
| 1078 |
-
" loader: DataLoader,\n",
|
| 1079 |
-
" criterion: nn.Module,\n",
|
| 1080 |
-
") -> Tuple[float, float]:\n",
|
| 1081 |
-
" \"\"\"\n",
|
| 1082 |
-
" Evaluate model on a DataLoader.\n",
|
| 1083 |
-
"\n",
|
| 1084 |
-
" Returns\n",
|
| 1085 |
-
" -------\n",
|
| 1086 |
-
" avg_loss : float\n",
|
| 1087 |
-
" accuracy : float (fraction correct)\n",
|
| 1088 |
-
" \"\"\"\n",
|
| 1089 |
-
" model.eval()\n",
|
| 1090 |
-
" total_loss = 0.0\n",
|
| 1091 |
-
" correct = 0\n",
|
| 1092 |
-
"\n",
|
| 1093 |
-
" for x, y in loader:\n",
|
| 1094 |
-
" x, y = x.to(DEVICE), y.to(DEVICE)\n",
|
| 1095 |
-
" logits = model(x)\n",
|
| 1096 |
-
" loss = criterion(logits, y)\n",
|
| 1097 |
-
"\n",
|
| 1098 |
-
" total_loss += loss.item() * len(y)\n",
|
| 1099 |
-
" preds = logits.argmax(dim=1)\n",
|
| 1100 |
-
" correct += (preds == y).sum().item()\n",
|
| 1101 |
-
"\n",
|
| 1102 |
-
" avg_loss = total_loss / len(loader.dataset)\n",
|
| 1103 |
-
" accuracy = correct / len(loader.dataset)\n",
|
| 1104 |
-
" return avg_loss, accuracy\n",
|
| 1105 |
-
"\n",
|
| 1106 |
-
"\n",
|
| 1107 |
-
"# ββ Optimiser, scheduler, loss βββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 1108 |
-
"optimizer = torch.optim.AdamW(\n",
|
| 1109 |
-
" model.parameters(),\n",
|
| 1110 |
-
" lr = ECAPA_LR,\n",
|
| 1111 |
-
" weight_decay = 1e-4,\n",
|
| 1112 |
-
")\n",
|
| 1113 |
-
"scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(\n",
|
| 1114 |
-
" optimizer, T_max=ECAPA_EPOCHS, eta_min=1e-5\n",
|
| 1115 |
-
")\n",
|
| 1116 |
-
"criterion = nn.CrossEntropyLoss() # binary CE via 2-class softmax\n",
|
| 1117 |
-
"\n",
|
| 1118 |
-
"# ββ Training loop ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 1119 |
-
"history = {\"train_loss\": [], \"val_loss\": [], \"val_acc\": []}\n",
|
| 1120 |
-
"\n",
|
| 1121 |
-
"best_val_loss = float(\"inf\")\n",
|
| 1122 |
-
"best_weights = None\n",
|
| 1123 |
-
"\n",
|
| 1124 |
-
"print(f\"π Training ECAPA-TDNN for {ECAPA_EPOCHS} epochs on {DEVICE}...\\n\")\n",
|
| 1125 |
-
"start_time = time.time()\n",
|
| 1126 |
-
"\n",
|
| 1127 |
-
"for epoch in range(1, ECAPA_EPOCHS + 1):\n",
|
| 1128 |
-
" t_loss = train_one_epoch(model, train_loader, optimizer, criterion)\n",
|
| 1129 |
-
" v_loss, v_acc = evaluate(model, val_loader, criterion)\n",
|
| 1130 |
-
" scheduler.step()\n",
|
| 1131 |
-
"\n",
|
| 1132 |
-
" history[\"train_loss\"].append(t_loss)\n",
|
| 1133 |
-
" history[\"val_loss\"].append(v_loss)\n",
|
| 1134 |
-
" history[\"val_acc\"].append(v_acc)\n",
|
| 1135 |
-
"\n",
|
| 1136 |
-
" # Save best checkpoint (by validation loss)\n",
|
| 1137 |
-
" if v_loss < best_val_loss:\n",
|
| 1138 |
-
" best_val_loss = v_loss\n",
|
| 1139 |
-
" best_weights = {k: v.cpu().clone() for k, v in model.state_dict().items()}\n",
|
| 1140 |
-
"\n",
|
| 1141 |
-
" print(\n",
|
| 1142 |
-
" f\" Epoch {epoch:03d}/{ECAPA_EPOCHS:03d} \"\n",
|
| 1143 |
-
" f\"train_loss={t_loss:.4f} \"\n",
|
| 1144 |
-
" f\"val_loss={v_loss:.4f} \"\n",
|
| 1145 |
-
" f\"val_acc={v_acc*100:.2f}%\"\n",
|
| 1146 |
-
" )\n",
|
| 1147 |
-
"\n",
|
| 1148 |
-
"elapsed = time.time() - start_time\n",
|
| 1149 |
-
"print(f\"\\nβ
Training complete in {elapsed:.1f}s. Best val loss: {best_val_loss:.4f}\")\n",
|
| 1150 |
-
"\n",
|
| 1151 |
-
"# Restore best weights\n",
|
| 1152 |
-
"model.load_state_dict(best_weights)\n",
|
| 1153 |
-
"\n",
|
| 1154 |
-
"# ββ Plot training curves βββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 1155 |
-
"fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(13, 4))\n",
|
| 1156 |
-
"\n",
|
| 1157 |
-
"ax1.plot(history[\"train_loss\"], label=\"Train\", color=\"steelblue\")\n",
|
| 1158 |
-
"ax1.plot(history[\"val_loss\"], label=\"Val\", color=\"tomato\")\n",
|
| 1159 |
-
"ax1.set_title(\"Cross-Entropy Loss\")\n",
|
| 1160 |
-
"ax1.set_xlabel(\"Epoch\")\n",
|
| 1161 |
-
"ax1.set_ylabel(\"Loss\")\n",
|
| 1162 |
-
"ax1.legend()\n",
|
| 1163 |
-
"ax1.grid(True, alpha=0.3)\n",
|
| 1164 |
-
"\n",
|
| 1165 |
-
"ax2.plot(np.array(history[\"val_acc\"]) * 100, color=\"seagreen\", label=\"Val Accuracy\")\n",
|
| 1166 |
-
"ax2.set_title(\"Validation Accuracy\")\n",
|
| 1167 |
-
"ax2.set_xlabel(\"Epoch\")\n",
|
| 1168 |
-
"ax2.set_ylabel(\"Accuracy (%)\")\n",
|
| 1169 |
-
"ax2.legend()\n",
|
| 1170 |
-
"ax2.grid(True, alpha=0.3)\n",
|
| 1171 |
-
"\n",
|
| 1172 |
-
"plt.suptitle(\"ECAPA-TDNN Training Curves\", fontsize=13, fontweight=\"bold\")\n",
|
| 1173 |
-
"plt.tight_layout()\n",
|
| 1174 |
-
"plt.show()"
|
| 1175 |
-
]
|
| 1176 |
-
},
|
| 1177 |
-
{
|
| 1178 |
-
"cell_type": "markdown",
|
| 1179 |
-
"metadata": {},
|
| 1180 |
-
"source": [
|
| 1181 |
-
"## π’ Cell 10 β Extract 192-dim Embeddings"
|
| 1182 |
-
]
|
| 1183 |
-
},
|
| 1184 |
-
{
|
| 1185 |
-
"cell_type": "code",
|
| 1186 |
-
"execution_count": null,
|
| 1187 |
-
"metadata": {},
|
| 1188 |
-
"outputs": [],
|
| 1189 |
-
"source": [
|
| 1190 |
-
"@torch.no_grad()\n",
|
| 1191 |
-
"def extract_embeddings(\n",
|
| 1192 |
-
" model: nn.Module,\n",
|
| 1193 |
-
" loader: DataLoader,\n",
|
| 1194 |
-
") -> Tuple[np.ndarray, np.ndarray]:\n",
|
| 1195 |
-
" \"\"\"\n",
|
| 1196 |
-
" Pass all samples through the trained ECAPA-TDNN to obtain\n",
|
| 1197 |
-
" 192-dimensional embeddings.\n",
|
| 1198 |
-
"\n",
|
| 1199 |
-
" Returns\n",
|
| 1200 |
-
" -------\n",
|
| 1201 |
-
" embeddings : np.ndarray, shape (N, 192)\n",
|
| 1202 |
-
" labels : np.ndarray, shape (N,)\n",
|
| 1203 |
-
" \"\"\"\n",
|
| 1204 |
-
" model.eval()\n",
|
| 1205 |
-
" all_embs = []\n",
|
| 1206 |
-
" all_labels = []\n",
|
| 1207 |
-
"\n",
|
| 1208 |
-
" for x, y in tqdm(loader, desc=\"Extracting embeddings\", leave=False):\n",
|
| 1209 |
-
" x = x.to(DEVICE)\n",
|
| 1210 |
-
" emb = model.embed(x) # (B, 192)\n",
|
| 1211 |
-
" all_embs.append(emb.cpu().numpy())\n",
|
| 1212 |
-
" all_labels.append(y.numpy())\n",
|
| 1213 |
-
"\n",
|
| 1214 |
-
" embeddings = np.vstack(all_embs) # (N, 192)\n",
|
| 1215 |
-
" labels = np.concatenate(all_labels) # (N,)\n",
|
| 1216 |
-
" return embeddings, labels\n",
|
| 1217 |
-
"\n",
|
| 1218 |
-
"\n",
|
| 1219 |
-
"# Build a single DataLoader covering the full dataset (no shuffling)\n",
|
| 1220 |
-
"# We will split embeddings later into train/test for XGBoost\n",
|
| 1221 |
-
"full_ds = AudioDataset(df, fixed_T=FIXED_T)\n",
|
| 1222 |
-
"full_loader = DataLoader(\n",
|
| 1223 |
-
" full_ds,\n",
|
| 1224 |
-
" batch_size = ECAPA_BATCH,\n",
|
| 1225 |
-
" shuffle = False,\n",
|
| 1226 |
-
" num_workers = 0,\n",
|
| 1227 |
-
")\n",
|
| 1228 |
-
"\n",
|
| 1229 |
-
"print(\"π Extracting embeddings for all samples...\")\n",
|
| 1230 |
-
"embeddings, labels = extract_embeddings(model, full_loader)\n",
|
| 1231 |
-
"\n",
|
| 1232 |
-
"print(f\"β
Embedding matrix shape : {embeddings.shape}\")\n",
|
| 1233 |
-
"print(f\" Label array shape : {labels.shape}\")\n",
|
| 1234 |
-
"print(f\" Class balance β real : {(labels==0).sum()}\")\n",
|
| 1235 |
-
"print(f\" Class balance β fake : {(labels==1).sum()}\")\n",
|
| 1236 |
-
"\n",
|
| 1237 |
-
"# ββ t-SNE visualisation of embeddings ββββββββββββββββββββββββββββββββββββ\n",
|
| 1238 |
-
"from sklearn.manifold import TSNE\n",
|
| 1239 |
-
"\n",
|
| 1240 |
-
"print(\"\\nπ Running t-SNE (may take ~30 s)...\")\n",
|
| 1241 |
-
"tsne = TSNE(n_components=2, random_state=SEED, perplexity=30, n_iter=500)\n",
|
| 1242 |
-
"emb_2d = tsne.fit_transform(embeddings)\n",
|
| 1243 |
-
"\n",
|
| 1244 |
-
"fig, ax = plt.subplots(figsize=(8, 6))\n",
|
| 1245 |
-
"colours = [\"steelblue\", \"tomato\"]\n",
|
| 1246 |
-
"for c, label_name in enumerate([\"Real\", \"Fake\"]):\n",
|
| 1247 |
-
" mask = labels == c\n",
|
| 1248 |
-
" ax.scatter(\n",
|
| 1249 |
-
" emb_2d[mask, 0], emb_2d[mask, 1],\n",
|
| 1250 |
-
" c=colours[c], label=label_name, alpha=0.55, s=18,\n",
|
| 1251 |
-
" )\n",
|
| 1252 |
-
"ax.set_title(\"t-SNE of 192-dim ECAPA-TDNN Embeddings\")\n",
|
| 1253 |
-
"ax.set_xlabel(\"t-SNE dim 1\")\n",
|
| 1254 |
-
"ax.set_ylabel(\"t-SNE dim 2\")\n",
|
| 1255 |
-
"ax.legend()\n",
|
| 1256 |
-
"ax.grid(True, alpha=0.3)\n",
|
| 1257 |
-
"plt.tight_layout()\n",
|
| 1258 |
-
"plt.show()"
|
| 1259 |
-
]
|
| 1260 |
-
},
|
| 1261 |
-
{
|
| 1262 |
-
"cell_type": "markdown",
|
| 1263 |
-
"metadata": {},
|
| 1264 |
-
"source": [
|
| 1265 |
-
"## π² Cell 11 β XGBoost Classifier"
|
| 1266 |
-
]
|
| 1267 |
-
},
|
| 1268 |
-
{
|
| 1269 |
-
"cell_type": "code",
|
| 1270 |
-
"execution_count": null,
|
| 1271 |
-
"metadata": {},
|
| 1272 |
-
"outputs": [],
|
| 1273 |
-
"source": [
|
| 1274 |
-
"# ββ Train / test split on embeddings βββββββββββββββββββββββββββββββββββββ\n",
|
| 1275 |
-
"X_train, X_test, y_train, y_test = train_test_split(\n",
|
| 1276 |
-
" embeddings,\n",
|
| 1277 |
-
" labels,\n",
|
| 1278 |
-
" test_size = 0.20,\n",
|
| 1279 |
-
" stratify = labels,\n",
|
| 1280 |
-
" random_state = SEED,\n",
|
| 1281 |
-
")\n",
|
| 1282 |
-
"\n",
|
| 1283 |
-
"# ββ Standardise embeddings (mean=0, std=1) ββββββββββββββββββββββββββββββββ\n",
|
| 1284 |
-
"# XGBoost is tree-based (scale-invariant), but normalisation helps when\n",
|
| 1285 |
-
"# we later use the same scaler inside the inference function.\n",
|
| 1286 |
-
"scaler = StandardScaler()\n",
|
| 1287 |
-
"X_train = scaler.fit_transform(X_train)\n",
|
| 1288 |
-
"X_test = scaler.transform(X_test)\n",
|
| 1289 |
-
"\n",
|
| 1290 |
-
"print(f\" X_train shape : {X_train.shape}\")\n",
|
| 1291 |
-
"print(f\" X_test shape : {X_test.shape}\")\n",
|
| 1292 |
-
"\n",
|
| 1293 |
-
"# ββ Train XGBoost βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 1294 |
-
"xgb_clf = xgb.XGBClassifier(**XGB_PARAMS)\n",
|
| 1295 |
-
"\n",
|
| 1296 |
-
"print(\"\\nπ Training XGBoost...\")\n",
|
| 1297 |
-
"xgb_clf.fit(\n",
|
| 1298 |
-
" X_train, y_train,\n",
|
| 1299 |
-
" eval_set = [(X_test, y_test)],\n",
|
| 1300 |
-
" verbose = 50, # print every 50 rounds\n",
|
| 1301 |
-
")\n",
|
| 1302 |
-
"\n",
|
| 1303 |
-
"print(\"\\nβ
XGBoost training complete.\")"
|
| 1304 |
-
]
|
| 1305 |
-
},
|
| 1306 |
-
{
|
| 1307 |
-
"cell_type": "markdown",
|
| 1308 |
-
"metadata": {},
|
| 1309 |
-
"source": [
|
| 1310 |
-
"## π Cell 12 β Evaluation Metrics"
|
| 1311 |
-
]
|
| 1312 |
-
},
|
| 1313 |
-
{
|
| 1314 |
-
"cell_type": "code",
|
| 1315 |
-
"execution_count": null,
|
| 1316 |
-
"metadata": {},
|
| 1317 |
-
"outputs": [],
|
| 1318 |
-
"source": [
|
| 1319 |
-
"# ββ Predictions βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 1320 |
-
"y_pred = xgb_clf.predict(X_test)\n",
|
| 1321 |
-
"y_prob = xgb_clf.predict_proba(X_test)[:, 1] # probability of FAKE\n",
|
| 1322 |
-
"\n",
|
| 1323 |
-
"# ββ Core metrics ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 1324 |
-
"acc = accuracy_score(y_test, y_pred)\n",
|
| 1325 |
-
"f1 = f1_score(y_test, y_pred)\n",
|
| 1326 |
-
"roc_auc = roc_auc_score(y_test, y_prob)\n",
|
| 1327 |
-
"cm = confusion_matrix(y_test, y_pred)\n",
|
| 1328 |
-
"\n",
|
| 1329 |
-
"print(\"βββββββββββββββββββββββββββββββββββββ\")\n",
|
| 1330 |
-
"print(\"π Evaluation Results\")\n",
|
| 1331 |
-
"print(\"βββββββββββββββββββββββββββββββββββββ\")\n",
|
| 1332 |
-
"print(f\" Accuracy : {acc*100:.2f}%\")\n",
|
| 1333 |
-
"print(f\" F1 Score : {f1:.4f}\")\n",
|
| 1334 |
-
"print(f\" ROC-AUC : {roc_auc:.4f}\")\n",
|
| 1335 |
-
"print(\"βββββββββββββββββββββββββββββββββββββ\")\n",
|
| 1336 |
-
"\n",
|
| 1337 |
-
"# ββ Figure layout: confusion matrix + ROC + feature importance ββββββββββββ\n",
|
| 1338 |
-
"fig = plt.figure(figsize=(17, 5))\n",
|
| 1339 |
-
"gs = gridspec.GridSpec(1, 3, figure=fig)\n",
|
| 1340 |
-
"\n",
|
| 1341 |
-
"# --- Panel 1: Confusion Matrix -------------------------------------------\n",
|
| 1342 |
-
"ax1 = fig.add_subplot(gs[0])\n",
|
| 1343 |
-
"disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[\"Real\", \"Fake\"])\n",
|
| 1344 |
-
"disp.plot(ax=ax1, colorbar=False, cmap=\"Blues\")\n",
|
| 1345 |
-
"ax1.set_title(\"Confusion Matrix\", fontweight=\"bold\")\n",
|
| 1346 |
-
"\n",
|
| 1347 |
-
"# --- Panel 2: ROC Curve --------------------------------------------------\n",
|
| 1348 |
-
"ax2 = fig.add_subplot(gs[1])\n",
|
| 1349 |
-
"fpr, tpr, _ = roc_curve(y_test, y_prob)\n",
|
| 1350 |
-
"ax2.plot(fpr, tpr, color=\"tomato\", lw=2, label=f\"AUC = {roc_auc:.3f}\")\n",
|
| 1351 |
-
"ax2.plot([0, 1], [0, 1], \"k--\", lw=1, alpha=0.5)\n",
|
| 1352 |
-
"ax2.set_title(\"ROC Curve\", fontweight=\"bold\")\n",
|
| 1353 |
-
"ax2.set_xlabel(\"False Positive Rate\")\n",
|
| 1354 |
-
"ax2.set_ylabel(\"True Positive Rate\")\n",
|
| 1355 |
-
"ax2.legend(loc=\"lower right\")\n",
|
| 1356 |
-
"ax2.grid(True, alpha=0.3)\n",
|
| 1357 |
-
"\n",
|
| 1358 |
-
"# --- Panel 3: Top-20 XGBoost Feature Importances -------------------------\n",
|
| 1359 |
-
"ax3 = fig.add_subplot(gs[2])\n",
|
| 1360 |
-
"importances = xgb_clf.feature_importances_ # shape: (192,)\n",
|
| 1361 |
-
"top20_idx = np.argsort(importances)[::-1][:20] # top-20 by importance\n",
|
| 1362 |
-
"top20_imp = importances[top20_idx]\n",
|
| 1363 |
-
"\n",
|
| 1364 |
-
"colors = plt.cm.viridis(np.linspace(0.2, 0.85, 20))\n",
|
| 1365 |
-
"ax3.barh(\n",
|
| 1366 |
-
" [f\"dim {i}\" for i in top20_idx],\n",
|
| 1367 |
-
" top20_imp,\n",
|
| 1368 |
-
" color=colors,\n",
|
| 1369 |
-
")\n",
|
| 1370 |
-
"ax3.invert_yaxis()\n",
|
| 1371 |
-
"ax3.set_title(\"Top-20 XGBoost Feature Importances\", fontweight=\"bold\")\n",
|
| 1372 |
-
"ax3.set_xlabel(\"Importance (gain)\")\n",
|
| 1373 |
-
"ax3.grid(True, axis=\"x\", alpha=0.3)\n",
|
| 1374 |
-
"\n",
|
| 1375 |
-
"plt.suptitle(\n",
|
| 1376 |
-
" f\"Deepfake Audio Detection β Acc={acc*100:.1f}% F1={f1:.3f} AUC={roc_auc:.3f}\",\n",
|
| 1377 |
-
" fontsize=13,\n",
|
| 1378 |
-
" fontweight=\"bold\",\n",
|
| 1379 |
-
")\n",
|
| 1380 |
-
"plt.tight_layout()\n",
|
| 1381 |
-
"plt.show()"
|
| 1382 |
-
]
|
| 1383 |
-
},
|
| 1384 |
-
{
|
| 1385 |
-
"cell_type": "markdown",
|
| 1386 |
-
"metadata": {},
|
| 1387 |
-
"source": [
|
| 1388 |
-
"## π Cell 13 β Inference Function"
|
| 1389 |
-
]
|
| 1390 |
-
},
|
| 1391 |
-
{
|
| 1392 |
-
"cell_type": "code",
|
| 1393 |
-
"execution_count": null,
|
| 1394 |
-
"metadata": {},
|
| 1395 |
-
"outputs": [],
|
| 1396 |
-
"source": [
|
| 1397 |
-
"@torch.no_grad()\n",
|
| 1398 |
-
"def detect_deepfake(\n",
|
| 1399 |
-
" audio_path: str,\n",
|
| 1400 |
-
" ecapa_model: nn.Module = model,\n",
|
| 1401 |
-
" xgb_model: xgb.XGBClassifier = xgb_clf,\n",
|
| 1402 |
-
" feat_scaler: StandardScaler = scaler,\n",
|
| 1403 |
-
" fixed_T: int = FIXED_T,\n",
|
| 1404 |
-
" device: torch.device = DEVICE,\n",
|
| 1405 |
-
") -> Dict[str, object]:\n",
|
| 1406 |
-
" \"\"\"\n",
|
| 1407 |
-
" End-to-end deepfake audio detection for a single WAV file.\n",
|
| 1408 |
-
"\n",
|
| 1409 |
-
" Pipeline\n",
|
| 1410 |
-
" --------\n",
|
| 1411 |
-
" WAV β preprocess β log-mel+TEO features β ECAPA-TDNN embedding\n",
|
| 1412 |
-
" β StandardScaler β XGBoost β REAL / FAKE\n",
|
| 1413 |
-
"\n",
|
| 1414 |
-
" Parameters\n",
|
| 1415 |
-
" ----------\n",
|
| 1416 |
-
" audio_path : path to input WAV file\n",
|
| 1417 |
-
" ecapa_model : trained ECAPA-TDNN (default: module-level `model`)\n",
|
| 1418 |
-
" xgb_model : trained XGBoost (default: module-level `xgb_clf`)\n",
|
| 1419 |
-
" feat_scaler : fitted StandardScaler (default: module-level `scaler`)\n",
|
| 1420 |
-
" fixed_T : fixed frame count used during training\n",
|
| 1421 |
-
" device : torch device\n",
|
| 1422 |
-
"\n",
|
| 1423 |
-
" Returns\n",
|
| 1424 |
-
" -------\n",
|
| 1425 |
-
" dict with keys:\n",
|
| 1426 |
-
" label : 'REAL' or 'FAKE'\n",
|
| 1427 |
-
" confidence : float in [0, 1] β probability of the predicted class\n",
|
| 1428 |
-
" fake_prob : float in [0, 1] β raw probability of being FAKE\n",
|
| 1429 |
-
" \"\"\"\n",
|
| 1430 |
-
" # ββ Step 1: Preprocess βββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 1431 |
-
" y = preprocess_audio(audio_path)\n",
|
| 1432 |
-
"\n",
|
| 1433 |
-
" # ββ Step 2: Feature extraction βββββββββββββββββββββββββββββββββββββββ\n",
|
| 1434 |
-
" feat = extract_features(y) # (41, T_raw)\n",
|
| 1435 |
-
"\n",
|
| 1436 |
-
" # Align to fixed_T (pad or trim)\n",
|
| 1437 |
-
" T = feat.shape[1]\n",
|
| 1438 |
-
" if T >= fixed_T:\n",
|
| 1439 |
-
" feat = feat[:, :fixed_T]\n",
|
| 1440 |
-
" else:\n",
|
| 1441 |
-
" feat = np.pad(feat, ((0, 0), (0, fixed_T - T)), mode=\"constant\")\n",
|
| 1442 |
-
"\n",
|
| 1443 |
-
" # ββ Step 3: ECAPA-TDNN embedding βββββββββββββββββββββββββββββββββββββ\n",
|
| 1444 |
-
" x_tensor = torch.tensor(feat, dtype=torch.float32).unsqueeze(0).to(device)\n",
|
| 1445 |
-
" ecapa_model.eval()\n",
|
| 1446 |
-
" emb = ecapa_model.embed(x_tensor).cpu().numpy() # (1, 192)\n",
|
| 1447 |
-
"\n",
|
| 1448 |
-
" # ββ Step 4: Normalise embedding ββββββββββββββββββββββββββββββββββββββ\n",
|
| 1449 |
-
" emb_scaled = feat_scaler.transform(emb) # (1, 192)\n",
|
| 1450 |
-
"\n",
|
| 1451 |
-
" # ββ Step 5: XGBoost prediction βββββββββββββββββββββββββββββββββββββββ\n",
|
| 1452 |
-
" pred_class = int(xgb_model.predict(emb_scaled)[0])\n",
|
| 1453 |
-
" probs = xgb_model.predict_proba(emb_scaled)[0] # [p_real, p_fake]\n",
|
| 1454 |
-
" fake_prob = float(probs[1])\n",
|
| 1455 |
-
" confidence = float(probs[pred_class])\n",
|
| 1456 |
-
"\n",
|
| 1457 |
-
" label = \"FAKE\" if pred_class == 1 else \"REAL\"\n",
|
| 1458 |
-
"\n",
|
| 1459 |
-
" return {\n",
|
| 1460 |
-
" \"label\": label,\n",
|
| 1461 |
-
" \"confidence\": round(confidence, 4),\n",
|
| 1462 |
-
" \"fake_prob\": round(fake_prob, 4),\n",
|
| 1463 |
-
" }\n",
|
| 1464 |
-
"\n",
|
| 1465 |
-
"\n",
|
| 1466 |
-
"# ββ Demo inference on a few test samples βββββββββββββββββββββββββββοΏ½οΏ½οΏ½βββββ\n",
|
| 1467 |
-
"print(\"π Running detect_deepfake() on 6 random samples:\\n\")\n",
|
| 1468 |
-
"print(f\"{'File':<50} {'True':>6} {'Predicted':>10} {'Confidence':>12} {'Fake Prob':>10}\")\n",
|
| 1469 |
-
"print(\"-\" * 95)\n",
|
| 1470 |
-
"\n",
|
| 1471 |
-
"for _, row in df.sample(6, random_state=SEED).iterrows():\n",
|
| 1472 |
-
" result = detect_deepfake(row[\"path\"])\n",
|
| 1473 |
-
" true_lbl = \"REAL\" if row[\"label\"] == 0 else \"FAKE\"\n",
|
| 1474 |
-
" match_sym = \"β
\" if result[\"label\"] == true_lbl else \"β\"\n",
|
| 1475 |
-
" fname = Path(row[\"path\"]).name\n",
|
| 1476 |
-
"\n",
|
| 1477 |
-
" print(\n",
|
| 1478 |
-
" f\"{fname:<50} \"\n",
|
| 1479 |
-
" f\"{true_lbl:>6} \"\n",
|
| 1480 |
-
" f\"{result['label']:>9} {match_sym} \"\n",
|
| 1481 |
-
" f\"{result['confidence']:>10.4f} \"\n",
|
| 1482 |
-
" f\"{result['fake_prob']:>10.4f}\"\n",
|
| 1483 |
-
" )"
|
| 1484 |
-
]
|
| 1485 |
-
},
|
| 1486 |
-
{
|
| 1487 |
-
"cell_type": "markdown",
|
| 1488 |
-
"metadata": {},
|
| 1489 |
-
"source": [
|
| 1490 |
-
"## πΎ Cell 14 β Save / Load Artefacts"
|
| 1491 |
-
]
|
| 1492 |
-
},
|
| 1493 |
-
{
|
| 1494 |
-
"cell_type": "code",
|
| 1495 |
-
"execution_count": null,
|
| 1496 |
-
"metadata": {},
|
| 1497 |
-
"outputs": [],
|
| 1498 |
-
"source": [
|
| 1499 |
-
"import pickle\n",
|
| 1500 |
-
"from pathlib import Path\n",
|
| 1501 |
-
"\n",
|
| 1502 |
-
"SAVE_DIR = Path(\"saved_models\")\n",
|
| 1503 |
-
"SAVE_DIR.mkdir(exist_ok=True)\n",
|
| 1504 |
-
"\n",
|
| 1505 |
-
"# ββ Save ECAPA-TDNN weights βββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 1506 |
-
"torch.save(model.state_dict(), SAVE_DIR / \"ecapa_tdnn.pt\")\n",
|
| 1507 |
-
"print(\"β
ECAPA-TDNN weights saved.\")\n",
|
| 1508 |
-
"\n",
|
| 1509 |
-
"# ββ Save XGBoost model ββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 1510 |
-
"xgb_clf.save_model(str(SAVE_DIR / \"xgboost.json\"))\n",
|
| 1511 |
-
"print(\"β
XGBoost model saved.\")\n",
|
| 1512 |
-
"\n",
|
| 1513 |
-
"# ββ Save StandardScaler βββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 1514 |
-
"with open(SAVE_DIR / \"scaler.pkl\", \"wb\") as f:\n",
|
| 1515 |
-
" pickle.dump(scaler, f)\n",
|
| 1516 |
-
"print(\"β
StandardScaler saved.\")\n",
|
| 1517 |
-
"\n",
|
| 1518 |
-
"# ββ Save FIXED_T (needed for exact inference alignment) βββββββββββββββββββ\n",
|
| 1519 |
-
"with open(SAVE_DIR / \"config.pkl\", \"wb\") as f:\n",
|
| 1520 |
-
" pickle.dump({\"fixed_T\": FIXED_T, \"embedding_dim\": EMBEDDING_DIM}, f)\n",
|
| 1521 |
-
"print(\"β
Config saved.\")\n",
|
| 1522 |
-
"\n",
|
| 1523 |
-
"print(f\"\\nAll artefacts saved to '{SAVE_DIR.resolve()}'\")"
|
| 1524 |
-
]
|
| 1525 |
-
},
|
| 1526 |
-
{
|
| 1527 |
-
"cell_type": "markdown",
|
| 1528 |
-
"metadata": {},
|
| 1529 |
-
"source": [
|
| 1530 |
-
"## π Cell 15 β Results Summary Dashboard"
|
| 1531 |
-
]
|
| 1532 |
-
},
|
| 1533 |
-
{
|
| 1534 |
-
"cell_type": "code",
|
| 1535 |
-
"execution_count": null,
|
| 1536 |
-
"metadata": {},
|
| 1537 |
-
"outputs": [],
|
| 1538 |
-
"source": [
|
| 1539 |
-
"# ββ Final consolidated summary βββββββββββββββββββββββββββββββββββββββββββββ\n",
|
| 1540 |
-
"print(\"=\"*60)\n",
|
| 1541 |
-
"print(\" DEEPFAKE AUDIO DETECTION β FINAL RESULTS\")\n",
|
| 1542 |
-
"print(\"=\"*60)\n",
|
| 1543 |
-
"\n",
|
| 1544 |
-
"# Pipeline parameters\n",
|
| 1545 |
-
"print(\"\\nπ Pipeline configuration:\")\n",
|
| 1546 |
-
"print(f\" Sample rate : {SAMPLE_RATE} Hz\")\n",
|
| 1547 |
-
"print(f\" Clip duration : {DURATION} s\")\n",
|
| 1548 |
-
"print(f\" Features : {N_MELS} log-mel + 1 TEO = 41 channels\")\n",
|
| 1549 |
-
"print(f\" ECAPA-TDNN params : {n_params:,}\")\n",
|
| 1550 |
-
"print(f\" Embedding dim : {EMBEDDING_DIM}\")\n",
|
| 1551 |
-
"print(f\" XGBoost estimators : {XGB_PARAMS['n_estimators']}\")\n",
|
| 1552 |
-
"\n",
|
| 1553 |
-
"# Dataset stats\n",
|
| 1554 |
-
"print(\"\\nπ Dataset:\")\n",
|
| 1555 |
-
"vc = pd.Series(labels).value_counts()\n",
|
| 1556 |
-
"print(f\" Real samples : {vc.get(0, 0)}\")\n",
|
| 1557 |
-
"print(f\" Fake samples : {vc.get(1, 0)}\")\n",
|
| 1558 |
-
"print(f\" Test set size : {len(y_test)}\")\n",
|
| 1559 |
-
"\n",
|
| 1560 |
-
"# Performance\n",
|
| 1561 |
-
"print(\"\\nπ Test-set performance:\")\n",
|
| 1562 |
-
"print(f\" Accuracy : {acc*100:.2f}%\")\n",
|
| 1563 |
-
"print(f\" F1 Score : {f1:.4f}\")\n",
|
| 1564 |
-
"print(f\" ROC-AUC : {roc_auc:.4f}\")\n",
|
| 1565 |
-
"\n",
|
| 1566 |
-
"tn, fp, fn, tp = cm.ravel()\n",
|
| 1567 |
-
"print(f\"\\n Confusion matrix:\")\n",
|
| 1568 |
-
"print(f\" TP={tp} FP={fp}\")\n",
|
| 1569 |
-
"print(f\" FN={fn} TN={tn}\")\n",
|
| 1570 |
-
"\n",
|
| 1571 |
-
"precision = tp / (tp + fp + 1e-9)\n",
|
| 1572 |
-
"recall = tp / (tp + fn + 1e-9)\n",
|
| 1573 |
-
"print(f\"\\n Precision (fake) : {precision:.4f}\")\n",
|
| 1574 |
-
"print(f\" Recall (fake) : {recall:.4f}\")\n",
|
| 1575 |
-
"\n",
|
| 1576 |
-
"print(\"\\n\" + \"=\"*60)\n",
|
| 1577 |
-
"print(\" detect_deepfake(audio_path) β {label, confidence, fake_prob}\")\n",
|
| 1578 |
-
"print(\"=\"*60)"
|
| 1579 |
-
]
|
| 1580 |
-
},
|
| 1581 |
-
{
|
| 1582 |
-
"cell_type": "markdown",
|
| 1583 |
-
"metadata": {},
|
| 1584 |
-
"source": [
|
| 1585 |
-
"---\n",
|
| 1586 |
-
"\n",
|
| 1587 |
-
"## π Notes & Extension Ideas\n",
|
| 1588 |
-
"\n",
|
| 1589 |
-
"| Area | What to try |\n",
|
| 1590 |
-
"|---|---|\n",
|
| 1591 |
-
"| **Data** | Replace synthetic data with ASVspoof2019 LA / WaveFake (see links below) |\n",
|
| 1592 |
-
"| **Features** | Add MFCC delta/delta-delta, CQT, or group delay features |\n",
|
| 1593 |
-
"| **Denoising** | Replace spectral gating with RNNoise or DeepFilterNet |\n",
|
| 1594 |
-
"| **Model** | Use the full Res2Net-based ECAPA-TDNN (SpeechBrain implementation) |\n",
|
| 1595 |
-
"| **Classifier** | Compare with LightGBM, SVM, or a shallow MLP |\n",
|
| 1596 |
-
"| **Augmentation** | Add RIR simulation, speed perturbation, codec compression |\n",
|
| 1597 |
-
"| **Deployment** | Wrap `detect_deepfake` in a FastAPI endpoint |\n",
|
| 1598 |
-
"\n",
|
| 1599 |
-
"### Recommended Datasets\n",
|
| 1600 |
-
"- **ASVspoof 2019 LA**: https://www.asvspoof.org/\n",
|
| 1601 |
-
"- **WaveFake**: https://github.com/RUB-SysSec/WaveFake\n",
|
| 1602 |
-
"- **FakeAVCeleb**: https://github.com/DASH-Lab/FakeAVCeleb\n",
|
| 1603 |
-
"\n",
|
| 1604 |
-
"### Key References\n",
|
| 1605 |
-
"- *ECAPA-TDNN: Emphasized Channel Attention, Propagation and Aggregation in TDNN Based Speaker Verification* β Desplanques et al., 2020\n",
|
| 1606 |
-
"- *WaveFake: A Data Set to Facilitate Audio Deepfake Detection* β Frank & SchΓΆnherr, 2021\n",
|
| 1607 |
-
"- *ASVspoof 2019: A Large-Scale Public Database* β Wang et al., 2020"
|
| 1608 |
-
]
|
| 1609 |
-
}
|
| 1610 |
-
],
|
| 1611 |
-
"metadata": {
|
| 1612 |
-
"kernelspec": {
|
| 1613 |
-
"display_name": "Python 3",
|
| 1614 |
-
"language": "python",
|
| 1615 |
-
"name": "python3"
|
| 1616 |
-
},
|
| 1617 |
-
"language_info": {
|
| 1618 |
-
"name": "python",
|
| 1619 |
-
"version": "3.10.0"
|
| 1620 |
-
}
|
| 1621 |
-
},
|
| 1622 |
-
"nbformat": 4,
|
| 1623 |
-
"nbformat_minor": 5
|
| 1624 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|