{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import librosa\n", "import io\n", "import soundfile as sf\n", "from moviepy.editor import VideoFileClip\n", "from tqdm import tqdm\n", "import pickle as pk\n", "import os\n", "import tensorflow as tf\n", "# from tensorflow.keras.saving import register_keras_serializable\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import classification_report, confusion_matrix\n", "from tensorflow.keras import layers, models\n", "from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# real_audio_dir = (\n", "# r\"H:\\.shortcut-targets-by-id\\1jH_pc6mMj0Iu8wLS1r0vggMWpVElJvOU\\SIH2024_DATASET\\REAL\"\n", "# )\n", "# fake_audio_dir = (\n", "# r\"H:\\.shortcut-targets-by-id\\1jH_pc6mMj0Iu8wLS1r0vggMWpVElJvOU\\SIH2024_DATASET\\FAKE\"\n", "# )" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# with open(\n", "# r\"H:\\.shortcut-targets-by-id\\1jH_pc6mMj0Iu8wLS1r0vggMWpVElJvOU\\SIH2024_DATASET\\real_files.pkl\",\n", "# \"rb\",\n", "# ) as f:\n", "# real_files = pk.load(f)\n", "\n", "# with open(\n", "# r\"H:\\.shortcut-targets-by-id\\1jH_pc6mMj0Iu8wLS1r0vggMWpVElJvOU\\SIH2024_DATASET\\fake_files.pkl\",\n", "# \"rb\",\n", "# ) as f:\n", "# fake_files = pk.load(f)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# len(real_files), len(fake_files)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# real_files = real_files[:2000]\n", "# fake_files = fake_files[:2000]" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# fake_files = fake_files[: len(real_files)]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# len(real_files), len(fake_files)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# def extract_features(file_path):\n", "# try:\n", "# # Load the video file\n", "# video_clip = VideoFileClip(file_path)\n", "# audio = video_clip.audio\n", "# fps = audio.fps\n", "# audio_samples = np.array(\n", "# list(audio.iter_frames(fps=fps, dtype=\"float32\"))\n", "# ).flatten()\n", "# buffer = io.BytesIO()\n", "# sf.write(buffer, audio_samples, fps, format=\"wav\")\n", "# buffer.seek(0)\n", "# x, sr = librosa.load(buffer, sr=None)\n", "# mfccs = librosa.feature.mfcc(y=x, sr=sr, n_mfcc=20)\n", "\n", "# return mfccs\n", "\n", "# except Exception as e:\n", "# print(f\"Error encountered while parsing file: {file_path}, {e}\")\n", "# return None\n", "\n", "\n", "# def load_data(real_dir, fake_dir):\n", "# labels = []\n", "# features = []\n", "\n", "# # Load real audios\n", "# for file_name in real_files:\n", "# file_path = os.path.join(real_dir, file_name)\n", "# mfccs = extract_features(file_path)\n", "# if mfccs is not None:\n", "# features.append(mfccs)\n", "# labels.append(0) # 0 for REAL\n", "\n", "# # Load fake audios\n", "# for file_name in fake_files:\n", "# file_path = os.path.join(fake_dir, file_name)\n", "# mfccs = extract_features(file_path)\n", "# if mfccs is not None:\n", "# features.append(mfccs)\n", "# labels.append(1) # 1 for FAKE\n", "\n", "# return np.array(features), np.array(labels)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "# def extract_frame_features(file_path, frame_duration=1.0):\n", "# try:\n", "# video_clip = VideoFileClip(file_path)\n", "# audio = video_clip.audio\n", "# fps = audio.fps\n", "# audio_samples = np.array(\n", "# list(audio.iter_frames(fps=fps, dtype=\"float32\"))\n", "# ).flatten()\n", "# buffer = io.BytesIO()\n", "# sf.write(buffer, audio_samples, fps, format=\"wav\")\n", "# buffer.seek(0)\n", "# x, sr = librosa.load(buffer, sr=None)\n", "\n", "# # Split audio into frames of 'frame_duration' seconds\n", "# frame_length = int(frame_duration * sr)\n", "# frames = [\n", "# librosa.feature.mfcc(y=x[i : i + frame_length], sr=sr, n_mfcc=20)\n", "# for i in range(0, len(x), frame_length)\n", "# if i + frame_length <= len(x)\n", "# ]\n", "\n", "# return frames # Returns list of MFCCs for each frame\n", "\n", "# except Exception as e:\n", "# print(f\"Error processing file {file_path}: {e}\")\n", "# return None" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "def extract_frame_features(file_path, frame_duration=1.0):\n", " video_clip = VideoFileClip(file_path)\n", " audio = video_clip.audio\n", " fps = audio.fps\n", " audio_samples = np.array(\n", " list(audio.iter_frames(fps=fps, dtype=\"float32\"))\n", " ).flatten()\n", " buffer = io.BytesIO()\n", " sf.write(buffer, audio_samples, fps, format=\"wav\")\n", " buffer.seek(0)\n", " x, sr = librosa.load(buffer, sr=None)\n", "\n", " # Split audio into frames of 'frame_duration' seconds\n", " frame_length = int(frame_duration * sr)\n", " frames = []\n", " timestamps = []\n", "\n", " for i in range(0, len(x), frame_length):\n", " if i + frame_length <= len(x):\n", " # Extract MFCCs for each frame and store the timestamp\n", " frame_mfcc = librosa.feature.mfcc(y=x[i: i + frame_length], sr=sr, n_mfcc=20)\n", " frames.append(frame_mfcc)\n", " timestamp = i / sr # Convert index to seconds\n", " timestamps.append(timestamp)\n", "\n", " return frames, timestamps" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "# def load_data(real_dir, fake_dir, real_files, fake_files):\n", "# labels, features = [], []\n", "\n", "# # Load real audio frames with progress bar\n", "# print(\"Loading real audio files:\")\n", "# for file_name in tqdm(real_files, desc=\"Processing Real Files\"):\n", "# file_path = os.path.join(real_dir, file_name)\n", "# frame_features, timestamps = extract_frame_features(file_path)\n", "# if frame_features:\n", "# features.extend(frame_features)\n", "# labels.extend([0] * len(frame_features)) # Label 0 for REAL\n", "\n", "# # Load fake audio frames with progress bar\n", "# print(\"Loading fake audio files:\")\n", "# for file_name in tqdm(fake_files, desc=\"Processing Fake Files\"):\n", "# file_path = os.path.join(fake_dir, file_name)\n", "# frame_features = extract_frame_features(file_path)\n", "# if frame_features:\n", "# features.extend(frame_features)\n", "# labels.extend([1] * len(frame_features)) # Label 1 for FAKE\n", "\n", "# # Convert to numpy arrays\n", "# features = np.array(features)\n", "# labels = np.array(labels)\n", "\n", "# # Shuffle the data\n", "# indices = np.arange(len(features))\n", "# np.random.shuffle(indices)\n", "# features = features[indices]\n", "# labels = labels[indices]\n", "\n", "# return features, labels" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "# X, y = load_data(real_audio_dir, fake_audio_dir, real_files, fake_files)\n", "# X = X[..., np.newaxis]" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "# with open(\"X_for_dl_2000.pkl\", \"wb\") as f:\n", "# pk.dump(X, f)\n", "# with open(\"y_for_dl_2000.pkl\", \"wb\") as f:\n", "# pk.dump(y, f)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "with open(\"X_for_dl_2000.pkl\", \"rb\") as f:\n", " X = pk.load(f)\n", "with open(\"y_for_dl_2000.pkl\", \"rb\") as f:\n", " y = pk.load(f)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "X_train, X_test, y_train, y_test = train_test_split(\n", " X, y, test_size=0.2, random_state=30\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## TCN" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "# model = models.Sequential(\n", "# [\n", "# layers.Conv1D(\n", "# 64,\n", "# kernel_size=3,\n", "# dilation_rate=1,\n", "# padding=\"causal\",\n", "# activation=\"relu\",\n", "# input_shape=(X.shape[1], X.shape[2]),\n", "# ),\n", "# layers.Conv1D(\n", "# 128, kernel_size=3, dilation_rate=2, padding=\"causal\", activation=\"relu\"\n", "# ),\n", "# layers.Conv1D(\n", "# 256, kernel_size=3, dilation_rate=4, padding=\"causal\", activation=\"relu\"\n", "# ),\n", "# layers.GlobalAveragePooling1D(),\n", "# layers.Dropout(0.5),\n", "# layers.Dense(64, activation=\"relu\"),\n", "# layers.Dense(2, activation=\"softmax\"),\n", "# ]\n", "# )" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "from tensorflow.keras import models, layers\n", "\n", "model = models.Sequential(\n", " [\n", " layers.Conv1D(\n", " 64,\n", " kernel_size=3,\n", " dilation_rate=1,\n", " padding=\"causal\",\n", " activation=\"relu\",\n", " input_shape=(X.shape[1], X.shape[2]),\n", " ),\n", " layers.BatchNormalization(),\n", " layers.Conv1D(\n", " 128, kernel_size=3, dilation_rate=2, padding=\"causal\", activation=\"relu\"\n", " ),\n", " layers.BatchNormalization(),\n", " layers.Conv1D(\n", " 256, kernel_size=3, dilation_rate=4, padding=\"causal\", activation=\"relu\"\n", " ),\n", " layers.BatchNormalization(),\n", " layers.GlobalAveragePooling1D(),\n", " layers.Dropout(0.5),\n", " layers.Dense(128, activation=\"relu\"),\n", " layers.Dropout(0.3),\n", " layers.Dense(2, activation=\"softmax\"),\n", " ]\n", ")" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "model.compile(\n", " optimizer=\"adam\", loss=\"sparse_categorical_crossentropy\", metrics=[\"accuracy\"]\n", ")" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "checkpoint = ModelCheckpoint(\n", " \"model/best_model.keras\", monitor=\"val_loss\", save_best_only=True\n", ")\n", "early_stopping = EarlyStopping(monitor=\"val_loss\", patience=3)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Model: \"sequential\"\n",
       "
\n" ], "text/plain": [ "\u001b[1mModel: \"sequential\"\u001b[0m\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
       "┃ Layer (type)                     Output Shape                  Param # ┃\n",
       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
       "│ conv1d (Conv1D)                 │ (None, 20, 64)         │        16,768 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ batch_normalization             │ (None, 20, 64)         │           256 │\n",
       "│ (BatchNormalization)            │                        │               │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ conv1d_1 (Conv1D)               │ (None, 20, 128)        │        24,704 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ batch_normalization_1           │ (None, 20, 128)        │           512 │\n",
       "│ (BatchNormalization)            │                        │               │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ conv1d_2 (Conv1D)               │ (None, 20, 256)        │        98,560 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ batch_normalization_2           │ (None, 20, 256)        │         1,024 │\n",
       "│ (BatchNormalization)            │                        │               │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ global_average_pooling1d        │ (None, 256)            │             0 │\n",
       "│ (GlobalAveragePooling1D)        │                        │               │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dropout (Dropout)               │ (None, 256)            │             0 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense (Dense)                   │ (None, 128)            │        32,896 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dropout_1 (Dropout)             │ (None, 128)            │             0 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense_1 (Dense)                 │ (None, 2)              │           258 │\n",
       "└─────────────────────────────────┴────────────────────────┴───────────────┘\n",
       "
\n" ], "text/plain": [ "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n", "┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\n", "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n", "│ conv1d (\u001b[38;5;33mConv1D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m20\u001b[0m, \u001b[38;5;34m64\u001b[0m) │ \u001b[38;5;34m16,768\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ batch_normalization │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m20\u001b[0m, \u001b[38;5;34m64\u001b[0m) │ \u001b[38;5;34m256\u001b[0m │\n", "│ (\u001b[38;5;33mBatchNormalization\u001b[0m) │ │ │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ conv1d_1 (\u001b[38;5;33mConv1D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m20\u001b[0m, \u001b[38;5;34m128\u001b[0m) │ \u001b[38;5;34m24,704\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ batch_normalization_1 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m20\u001b[0m, \u001b[38;5;34m128\u001b[0m) │ \u001b[38;5;34m512\u001b[0m │\n", "│ (\u001b[38;5;33mBatchNormalization\u001b[0m) │ │ │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ conv1d_2 (\u001b[38;5;33mConv1D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m20\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m98,560\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ batch_normalization_2 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m20\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m1,024\u001b[0m │\n", "│ (\u001b[38;5;33mBatchNormalization\u001b[0m) │ │ │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ global_average_pooling1d │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n", "│ (\u001b[38;5;33mGlobalAveragePooling1D\u001b[0m) │ │ │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ dropout (\u001b[38;5;33mDropout\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ dense (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m128\u001b[0m) │ \u001b[38;5;34m32,896\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ dropout_1 (\u001b[38;5;33mDropout\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m128\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ dense_1 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m2\u001b[0m) │ \u001b[38;5;34m258\u001b[0m │\n", "└─────────────────────────────────┴────────────────────────┴───────────────┘\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
 Total params: 174,978 (683.51 KB)\n",
       "
\n" ], "text/plain": [ "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m174,978\u001b[0m (683.51 KB)\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
 Trainable params: 174,082 (680.01 KB)\n",
       "
\n" ], "text/plain": [ "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m174,082\u001b[0m (680.01 KB)\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
 Non-trainable params: 896 (3.50 KB)\n",
       "
\n" ], "text/plain": [ "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m896\u001b[0m (3.50 KB)\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "model.summary()" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m18s\u001b[0m 4ms/step - accuracy: 0.6385 - loss: 0.6350 - val_accuracy: 0.6710 - val_loss: 0.6011\n", "Epoch 2/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m17s\u001b[0m 4ms/step - accuracy: 0.6680 - loss: 0.6062 - val_accuracy: 0.6838 - val_loss: 0.5800\n", "Epoch 3/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m16s\u001b[0m 4ms/step - accuracy: 0.6856 - loss: 0.5882 - val_accuracy: 0.7069 - val_loss: 0.5591\n", "Epoch 4/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 4ms/step - accuracy: 0.6969 - loss: 0.5731 - val_accuracy: 0.7187 - val_loss: 0.5497\n", "Epoch 5/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 4ms/step - accuracy: 0.7038 - loss: 0.5649 - val_accuracy: 0.7303 - val_loss: 0.5353\n", "Epoch 6/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 4ms/step - accuracy: 0.7127 - loss: 0.5569 - val_accuracy: 0.7343 - val_loss: 0.5330\n", "Epoch 7/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 4ms/step - accuracy: 0.7198 - loss: 0.5478 - val_accuracy: 0.7102 - val_loss: 0.5598\n", "Epoch 8/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 4ms/step - accuracy: 0.7239 - loss: 0.5452 - val_accuracy: 0.7404 - val_loss: 0.5247\n", "Epoch 9/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 4ms/step - accuracy: 0.7271 - loss: 0.5389 - val_accuracy: 0.7310 - val_loss: 0.5310\n", "Epoch 10/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 4ms/step - accuracy: 0.7301 - loss: 0.5323 - val_accuracy: 0.7369 - val_loss: 0.5335\n", "Epoch 11/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 4ms/step - accuracy: 0.7358 - loss: 0.5272 - val_accuracy: 0.7529 - val_loss: 0.5058\n", "Epoch 12/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 4ms/step - accuracy: 0.7363 - loss: 0.5263 - val_accuracy: 0.7451 - val_loss: 0.5065\n", "Epoch 13/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m14s\u001b[0m 4ms/step - accuracy: 0.7379 - loss: 0.5212 - val_accuracy: 0.7451 - val_loss: 0.5055\n", "Epoch 14/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 4ms/step - accuracy: 0.7454 - loss: 0.5105 - val_accuracy: 0.7447 - val_loss: 0.5048\n", "Epoch 15/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 4ms/step - accuracy: 0.7478 - loss: 0.5100 - val_accuracy: 0.7554 - val_loss: 0.4946\n", "Epoch 16/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 4ms/step - accuracy: 0.7442 - loss: 0.5087 - val_accuracy: 0.7533 - val_loss: 0.5004\n", "Epoch 17/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m14s\u001b[0m 4ms/step - accuracy: 0.7513 - loss: 0.5005 - val_accuracy: 0.7469 - val_loss: 0.5045\n", "Epoch 18/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m14s\u001b[0m 4ms/step - accuracy: 0.7507 - loss: 0.4992 - val_accuracy: 0.7519 - val_loss: 0.4980\n", "Epoch 19/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m14s\u001b[0m 4ms/step - accuracy: 0.7528 - loss: 0.4976 - val_accuracy: 0.7553 - val_loss: 0.4930\n", "Epoch 20/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 4ms/step - accuracy: 0.7582 - loss: 0.4947 - val_accuracy: 0.7637 - val_loss: 0.4833\n", "Epoch 21/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 4ms/step - accuracy: 0.7561 - loss: 0.4986 - val_accuracy: 0.7668 - val_loss: 0.4831\n", "Epoch 22/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 4ms/step - accuracy: 0.7593 - loss: 0.4891 - val_accuracy: 0.7671 - val_loss: 0.4819\n", "Epoch 23/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m14s\u001b[0m 4ms/step - accuracy: 0.7578 - loss: 0.4900 - val_accuracy: 0.7671 - val_loss: 0.4808\n", "Epoch 24/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 4ms/step - accuracy: 0.7628 - loss: 0.4851 - val_accuracy: 0.7586 - val_loss: 0.5014\n", "Epoch 25/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m14s\u001b[0m 4ms/step - accuracy: 0.7609 - loss: 0.4850 - val_accuracy: 0.7563 - val_loss: 0.4884\n", "Epoch 26/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 4ms/step - accuracy: 0.7647 - loss: 0.4826 - val_accuracy: 0.7679 - val_loss: 0.4788\n", "Epoch 27/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 4ms/step - accuracy: 0.7623 - loss: 0.4848 - val_accuracy: 0.7476 - val_loss: 0.5020\n", "Epoch 28/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 4ms/step - accuracy: 0.7665 - loss: 0.4792 - val_accuracy: 0.7659 - val_loss: 0.4835\n", "Epoch 29/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 4ms/step - accuracy: 0.7658 - loss: 0.4796 - val_accuracy: 0.7688 - val_loss: 0.4923\n", "Epoch 30/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 4ms/step - accuracy: 0.7688 - loss: 0.4759 - val_accuracy: 0.7709 - val_loss: 0.4781\n", "Epoch 31/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 4ms/step - accuracy: 0.7702 - loss: 0.4755 - val_accuracy: 0.7553 - val_loss: 0.4968\n", "Epoch 32/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 4ms/step - accuracy: 0.7703 - loss: 0.4728 - val_accuracy: 0.7692 - val_loss: 0.4744\n", "Epoch 33/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 4ms/step - accuracy: 0.7703 - loss: 0.4716 - val_accuracy: 0.7613 - val_loss: 0.4869\n", "Epoch 34/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 4ms/step - accuracy: 0.7728 - loss: 0.4707 - val_accuracy: 0.7648 - val_loss: 0.4952\n", "Epoch 35/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 4ms/step - accuracy: 0.7699 - loss: 0.4720 - val_accuracy: 0.7648 - val_loss: 0.4968\n", "Epoch 36/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 4ms/step - accuracy: 0.7727 - loss: 0.4688 - val_accuracy: 0.7643 - val_loss: 0.5095\n", "Epoch 37/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 4ms/step - accuracy: 0.7730 - loss: 0.4670 - val_accuracy: 0.7674 - val_loss: 0.4827\n", "Epoch 38/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 4ms/step - accuracy: 0.7749 - loss: 0.4659 - val_accuracy: 0.7728 - val_loss: 0.4697\n", "Epoch 39/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m16s\u001b[0m 4ms/step - accuracy: 0.7772 - loss: 0.4618 - val_accuracy: 0.7753 - val_loss: 0.4774\n", "Epoch 40/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m16s\u001b[0m 4ms/step - accuracy: 0.7795 - loss: 0.4587 - val_accuracy: 0.7663 - val_loss: 0.4824\n", "Epoch 41/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m18s\u001b[0m 5ms/step - accuracy: 0.7765 - loss: 0.4638 - val_accuracy: 0.7561 - val_loss: 0.4910\n", "Epoch 42/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m18s\u001b[0m 4ms/step - accuracy: 0.7768 - loss: 0.4616 - val_accuracy: 0.7749 - val_loss: 0.4737\n", "Epoch 43/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m20s\u001b[0m 5ms/step - accuracy: 0.7800 - loss: 0.4554 - val_accuracy: 0.7698 - val_loss: 0.4747\n", "Epoch 44/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m20s\u001b[0m 5ms/step - accuracy: 0.7816 - loss: 0.4528 - val_accuracy: 0.7476 - val_loss: 0.4988\n", "Epoch 45/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m20s\u001b[0m 5ms/step - accuracy: 0.7819 - loss: 0.4553 - val_accuracy: 0.7630 - val_loss: 0.4820\n", "Epoch 46/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m20s\u001b[0m 5ms/step - accuracy: 0.7780 - loss: 0.4587 - val_accuracy: 0.7554 - val_loss: 0.4887\n", "Epoch 47/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m20s\u001b[0m 5ms/step - accuracy: 0.7832 - loss: 0.4555 - val_accuracy: 0.7773 - val_loss: 0.4709\n", "Epoch 48/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m21s\u001b[0m 5ms/step - accuracy: 0.7831 - loss: 0.4511 - val_accuracy: 0.7667 - val_loss: 0.4760\n", "Epoch 49/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m17s\u001b[0m 4ms/step - accuracy: 0.7831 - loss: 0.4513 - val_accuracy: 0.7731 - val_loss: 0.4812\n", "Epoch 50/50\n", "\u001b[1m3998/3998\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 4ms/step - accuracy: 0.7837 - loss: 0.4550 - val_accuracy: 0.7775 - val_loss: 0.4859\n" ] } ], "source": [ "history = model.fit(\n", " X_train,\n", " y_train,\n", " epochs=50,\n", " batch_size=16,\n", " validation_data=(X_test, y_test),\n", " callbacks=[checkpoint],\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model = tf.keras.models.load_model(\"model/TCN.keras\")" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[1m500/500\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 2ms/step\n", " precision recall f1-score support\n", "\n", " REAL 0.89 0.62 0.73 7920\n", " FAKE 0.71 0.92 0.80 8072\n", "\n", " accuracy 0.77 15992\n", " macro avg 0.80 0.77 0.77 15992\n", "weighted avg 0.80 0.77 0.77 15992\n", "\n", "[[4910 3010]\n", " [ 623 7449]]\n" ] } ], "source": [ "# model = tf.keras.models.load_model(\"model/best_model.keras\")\n", "y_pred = model.predict(X_test)\n", "y_pred_labels = np.argmax(y_pred, axis=1)\n", "\n", "# Print classification report\n", "print(classification_report(y_test, y_pred_labels, target_names=[\"REAL\", \"FAKE\"]))\n", "print(confusion_matrix(y_test, y_pred_labels))" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 169ms/step\n", "Found 8 deepfake frames:\n", "Frame 1 at 0.00s: FAKE\n", "Frame 2 at 1.00s: FAKE\n", "Frame 3 at 2.00s: FAKE\n", "Frame 4 at 3.00s: FAKE\n", "Frame 5 at 4.00s: FAKE\n", "Frame 8 at 7.00s: FAKE\n", "Frame 11 at 10.00s: FAKE\n", "Frame 15 at 14.00s: FAKE\n" ] } ], "source": [ "def test_on_video(file_path, frame_duration=1.0):\n", " # Load the trained model\n", " model = tf.keras.models.load_model(\"model/TCN.keras\")\n", "\n", " # Extract features and timestamps for each frame in the new video\n", " frames, timestamps = extract_frame_features(file_path, frame_duration)\n", "\n", " if frames is None or timestamps is None:\n", " print(\"No frames extracted.\")\n", " return\n", "\n", " # Reshape frames for model input\n", " frames = np.array(frames)[..., np.newaxis]\n", "\n", " # Predict on each frame\n", " predictions = model.predict(frames)\n", " pred_labels = np.argmax(predictions, axis=1)\n", "\n", " # Store deepfake frames, their timestamps, and frame indices\n", " deepfake_frames = []\n", " deepfake_timestamps = []\n", " deepfake_indices = []\n", "\n", " # Identify deepfake frames\n", " for i, label in enumerate(pred_labels):\n", " if label == 1: # If the label is FAKE\n", " deepfake_frames.append(frames[i])\n", " deepfake_timestamps.append(timestamps[i])\n", " deepfake_indices.append(i)\n", "\n", " if not deepfake_frames:\n", " print(\"No deepfake frames detected in the video.\")\n", " return\n", "\n", " # Analyze deepfake frames\n", " print(f\"Found {len(deepfake_frames)} deepfake frames:\")\n", " for i, (timestamp, index) in enumerate(zip(deepfake_timestamps, deepfake_indices)):\n", " print(f\"Frame {index + 1} at {timestamp:.2f}s: FAKE\")\n", "\n", "\n", "# Example usage\n", "test_video_path = r\"REAL\\ajqslcypsw.mp4\" # Replace with your test video path\n", "test_on_video(test_video_path)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "# def test_on_video(file_path, frame_duration=1.0):\n", "# # Load the trained model\n", "# model = tf.keras.models.load_model(\"model/best_model.keras\")\n", "\n", "# # Extract features for each frame in the new video\n", "# frames = extract_frame_features(file_path, frame_duration)\n", "\n", "# if frames is None:\n", "# print(\"No frames extracted.\")\n", "# return\n", "\n", "# # Reshape frames for model input\n", "# frames = np.array(frames)[..., np.newaxis]\n", "\n", "# # Predict on each frame\n", "# predictions = model.predict(frames)\n", "# pred_labels = np.argmax(predictions, axis=1)\n", "\n", "# # Output results for each frame\n", "# for i, label in enumerate(pred_labels):\n", "# status = \"REAL\" if label == 0 else \"FAKE\"\n", "# print(f\"Frame {i+1}: {status}\")\n", "\n", "\n", "# # Example usage\n", "# test_video_path = r\"REAL\\bddjdhzfze.mp4\" # Replace with your test video path\n", "# test_on_video(test_video_path)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "# @register_keras_serializable()\n", "# class AudioModel(tf.keras.Model):\n", "# def __init__(self, input_shape):\n", "# super(AudioModel, self).__init__()\n", "# self.input_shape = input_shape # Store the input shape\n", "# # Define the model layers\n", "# self.conv1 = layers.Conv2D(\n", "# 32, kernel_size=(3, 3), activation=\"relu\", input_shape=input_shape\n", "# )\n", "# self.conv2 = layers.Conv2D(64, kernel_size=(3, 3), activation=\"relu\")\n", "# self.pool = layers.MaxPooling2D(pool_size=(2, 2))\n", "# self.dropout1 = layers.Dropout(0.25)\n", "\n", "# self.reshape = layers.Reshape((64, -1))\n", "# self.gru = layers.Bidirectional(layers.GRU(128, return_sequences=False))\n", "\n", "# self.dense1 = layers.Dense(128, activation=\"relu\")\n", "# self.dropout2 = layers.Dropout(0.5)\n", "# self.dense2 = layers.Dense(2, activation=\"softmax\")\n", "\n", "# def call(self, inputs):\n", "# # Forward pass through the layers\n", "# x = self.conv1(inputs)\n", "# x = self.conv2(x)\n", "# x = self.pool(x)\n", "# x = self.dropout1(x)\n", "\n", "# x = self.reshape(x)\n", "# x = self.gru(x)\n", "\n", "# x = self.dense1(x)\n", "# x = self.dropout2(x)\n", "# return self.dense2(x)\n", "\n", "# def get_config(self):\n", "# config = super(AudioModel, self).get_config()\n", "# config.update(\n", "# {\"input_shape\": self.input_shape} # Include input shape in config\n", "# )\n", "# return config\n", "\n", "# @classmethod\n", "# def from_config(cls, config):\n", "# # Create a model instance from the config\n", "# input_shape = config.pop(\"input_shape\") # Extract input_shape from config\n", "# return cls(input_shape) # Create an instance of the model\n", "\n", "\n", "# # Function to create and compile the model\n", "# def create_model(input_shape):\n", "# model = AudioModel(input_shape)\n", "# model.compile(\n", "# optimizer=\"adam\", loss=\"sparse_categorical_crossentropy\", metrics=[\"accuracy\"]\n", "# )\n", "# return model\n", "\n", "\n", "# # Example usage\n", "# input_shape = (\n", "# 64,\n", "# 40,\n", "# 1,\n", "# ) # Adjust based on your data (e.g., (n_mfccs, time_steps, channels))" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "# model = create_model(input_shape)\n", "# model.summary()" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "# checkpoint = ModelCheckpoint(r\"models/dl_model.keras\", monitor=\"val_loss\", save_best_only=True, verbose=1)\n", "# early_stopping = EarlyStopping(monitor=\"val_loss\", patience=5, verbose=1)\n", "\n", "# history = model.fit(\n", "# X_train, y_train, epochs=10, batch_size=16, validation_data=(X_test, y_test), callbacks=[checkpoint, early_stopping]\n", "# )" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "# model.save(r\"models/dl_model.keras\", overwrite=True)\n", "# print(\"Model saved successfully.\")" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "# # Ensure to import keras properly\n", "# import tensorflow as tf\n", "# from tensorflow import keras\n", "\n", "\n", "# # Function to load the model\n", "# def load_model(model_path):\n", "# try:\n", "# # Load the model from the specified path\n", "# model = keras.models.load_model(model_path)\n", "# print(\"Model loaded successfully.\")\n", "# return model\n", "# except Exception as e:\n", "# print(f\"Error loading model: {e}\")\n", "# return None" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "# model_path = r\"models/dl_model.keras\"\n", "\n", "# # Load the model\n", "# loaded_model = load_model(model_path)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.2" } }, "nbformat": 4, "nbformat_minor": 2 }