{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "H3eVgsMsJVRY", "outputId": "30e4d553-6ce2-4b44-8217-21d0f1875d8b" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "cuda\n" ] } ], "source": [ "import torch\n", "import cupy as cp\n", "from moviepy.editor import VideoFileClip\n", "import pandas as pd\n", "import librosa\n", "import scipy.stats\n", "import soundfile as sf\n", "import io\n", "import os\n", "from tqdm import tqdm\n", "import pickle as pk\n", "\n", "# Set device to GPU if available\n", "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", "print(device)" ] }, { "cell_type": "markdown", "metadata": { "id": "3A9iF-QXJVRZ" }, "source": [ "Statistical Features \n", "A first easy step is to compute the mean, standard deviation, minimum, maximum, median and quartiles of the frequencies of each signal. This can be done using Numpy and it always brings value to our feature extraction." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "id": "ibnbShbMJVRa" }, "outputs": [], "source": [ "def describe_freq(freqs):\n", " freqs = cp.array(freqs) # Convert to CuPy array for GPU computation\n", " mean = cp.mean(freqs)\n", " std = cp.std(freqs)\n", " maxv = cp.amax(freqs)\n", " minv = cp.amin(freqs)\n", " median = cp.median(freqs)\n", " skew = scipy.stats.skew(cp.asnumpy(freqs)) # Skew not directly supported in CuPy\n", " kurt = scipy.stats.kurtosis(cp.asnumpy(freqs)) # Kurtosis not directly supported in CuPy\n", " q1 = cp.quantile(freqs, 0.25)\n", " q3 = cp.quantile(freqs, 0.75)\n", " mode = scipy.stats.mode(cp.asnumpy(freqs))[0][0] # Mode not directly supported in CuPy\n", " iqr = cp.subtract(q3, q1)\n", "\n", " return [mean.get(), std.get(), maxv.get(), minv.get(), median.get(), skew, kurt, q1.get(), q3.get(), mode, iqr.get()]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "id": "nNifSVyDJVRa" }, "outputs": [], "source": [ "def get_features(x, sr):\n", " x = torch.tensor(x, device=device) # Send to GPU\n", " rmse = torch.mean(torch.tensor(librosa.feature.rms(y=x.cpu().numpy())[0], device=device))\n", " zcr = torch.mean(torch.tensor(librosa.feature.zero_crossing_rate(x.cpu().numpy())[0], device=device))\n", " tempo = torch.tensor(librosa.beat.tempo(y=x.cpu().numpy(), sr=sr)[0], device=device)\n", " mfcc = torch.mean(torch.tensor(librosa.feature.mfcc(y=x.cpu().numpy(), sr=sr), device=device), axis=1)\n", " spec_cen = torch.mean(torch.tensor(librosa.feature.spectral_centroid(y=x.cpu().numpy(), sr=sr), device=device))\n", " spectral_bandwidth = torch.mean(torch.tensor(librosa.feature.spectral_bandwidth(y=x.cpu().numpy(), sr=sr), device=device))\n", " spectral_contrast = torch.mean(torch.tensor(librosa.feature.spectral_contrast(y=x.cpu().numpy(), sr=sr), device=device))\n", " spectral_flatness = torch.mean(torch.tensor(librosa.feature.spectral_flatness(y=x.cpu().numpy()), device=device))\n", " spectral_rolloff = torch.mean(torch.tensor(librosa.feature.spectral_rolloff(y=x.cpu().numpy(), sr=sr), device=device))\n", "\n", " features = [rmse, zcr, tempo, spec_cen, spectral_bandwidth, spectral_contrast, spectral_flatness, spectral_rolloff]\n", " features = [f.item() for f in features] + [mfcc[i].item() for i in range(mfcc.size(0))] # Convert to list\n", " return features" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "id": "6p0CSM2I_qGY" }, "outputs": [], "source": [ "def extract_features(file_path):\n", " try:\n", " # Load video file\n", " video_clip = VideoFileClip(file_path)\n", " audio = video_clip.audio\n", " fps = audio.fps\n", " audio_samples = cp.array(list(audio.iter_frames(fps=fps, dtype=\"float32\"))).flatten()\n", " buffer = io.BytesIO()\n", " sf.write(buffer, cp.asnumpy(audio_samples), fps, format=\"wav\")\n", " buffer.seek(0)\n", " x, sr = librosa.load(buffer, sr=None)\n", " video_clip.close()\n", " features = get_features(x, sr)\n", " return features\n", "\n", " except Exception as e:\n", " print(f\"Error encountered while parsing file: {file_path}, {e}\")\n", " return None" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "id": "fiFT26TK_tA_" }, "outputs": [], "source": [ "def load_data(real_dir, fake_dir, real_files, fake_files):\n", " data = []\n", " columns = [\"rmse\", \"zcr\", \"tempo\", \"spectral_centroid\", \"spectral_bandwidth\",\n", " \"spectral_contrast\", \"spectral_flatness\", \"spectral_rolloff\"] + \\\n", " [f\"mfcc{i}\" for i in range(1, 21)] + [\"label\"]\n", "\n", " # Set up progress bar\n", " total_files = len(real_files) + len(fake_files)\n", " pbar = tqdm(total=total_files, desc=\"Processing files\", unit=\"file\")\n", "\n", " # Process real audio files\n", " for file_name in real_files:\n", " file_path = os.path.join(real_dir, file_name)\n", " features = extract_features(file_path)\n", " if features is not None:\n", " features.append(0) # Label: 0 for REAL\n", " data.append(features)\n", " pbar.update(1)\n", "\n", " # Process fake audio files\n", " for file_name in fake_files:\n", " file_path = os.path.join(fake_dir, file_name)\n", " features = extract_features(file_path)\n", " if features is not None:\n", " features.append(1) # Label: 1 for FAKE\n", " data.append(features)\n", " pbar.update(1)\n", "\n", " pbar.close()\n", " df = pd.DataFrame(data, columns=columns)\n", " return df\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "id": "nL9J7Vp9JVRa" }, "outputs": [], "source": [ "real_audio_dir = r\"H:\\.shortcut-targets-by-id\\1jH_pc6mMj0Iu8wLS1r0vggMWpVElJvOU\\SIH2024_DATASET\\REAL\"\n", "fake_audio_dir = r\"H:\\.shortcut-targets-by-id\\1jH_pc6mMj0Iu8wLS1r0vggMWpVElJvOU\\SIH2024_DATASET\\FAKE\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "gfwu2Ct2E5aQ" }, "outputs": [], "source": [ "with open(\n", " r\"H:\\.shortcut-targets-by-id\\1jH_pc6mMj0Iu8wLS1r0vggMWpVElJvOU\\SIH2024_DATASET\\real_files.pkl\",\n", " \"rb\",\n", ") as f:\n", " real_files = pk.load(f)\n", "\n", "with open(\n", " r\"H:\\.shortcut-targets-by-id\\1jH_pc6mMj0Iu8wLS1r0vggMWpVElJvOU\\SIH2024_DATASET\\fake_files.pkl\",\n", " \"rb\",\n", ") as f:\n", " fake_files = pk.load(f)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(19154, 99992)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(real_files), len(fake_files)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fake_files = fake_files[:len(real_files)]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(19154, 19154)" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(real_files), len(fake_files)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "BUS-nOHOJVRb", "outputId": "8eee5356-bbdb-4941-a6a8-023a552db603" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Processing files: 17%|█▋ | 671/4000 [1:19:37<4:35:56, 4.97s/file] " ] } ], "source": [ "df = load_data(real_audio_dir, fake_audio_dir, real_files[:2000], fake_files[:2000])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "3tLFhSuVJVRc" }, "outputs": [ { "data": { "text/html": [ "
\n", " | rmse | \n", "zcr | \n", "tempo | \n", "spectral_centroid | \n", "spectral_bandwidth | \n", "spectral_contrast | \n", "spectral_flatness | \n", "spectral_rolloff | \n", "mfcc1 | \n", "mfcc2 | \n", "... | \n", "mfcc12 | \n", "mfcc13 | \n", "mfcc14 | \n", "mfcc15 | \n", "mfcc16 | \n", "mfcc17 | \n", "mfcc18 | \n", "mfcc19 | \n", "mfcc20 | \n", "label | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
5 | \n", "0.004624 | \n", "0.025053 | \n", "129.199219 | \n", "2725.983254 | \n", "5010.822943 | \n", "14.822473 | \n", "0.002854 | \n", "4820.494920 | \n", "-534.778259 | \n", "154.150742 | \n", "... | \n", "8.461435 | \n", "-5.363853 | \n", "1.651735 | \n", "1.570598 | \n", "-6.969818 | \n", "-1.332273 | \n", "-7.264575 | \n", "-2.166896 | \n", "-5.390424 | \n", "1 | \n", "
6 | \n", "0.012205 | \n", "0.040296 | \n", "123.046875 | \n", "3647.104615 | \n", "5343.519738 | \n", "16.671819 | \n", "0.007903 | \n", "8357.563553 | \n", "-421.535065 | \n", "121.641014 | \n", "... | \n", "16.492485 | \n", "-15.264863 | \n", "5.351438 | \n", "-6.834963 | \n", "-6.844149 | \n", "2.524184 | \n", "-9.907133 | \n", "2.443203 | \n", "-3.203485 | \n", "1 | \n", "
7 | \n", "0.000486 | \n", "0.065730 | \n", "123.046875 | \n", "4911.118560 | \n", "5816.154610 | \n", "13.167884 | \n", "0.020470 | \n", "12992.775671 | \n", "-651.358948 | \n", "105.408440 | \n", "... | \n", "22.212151 | \n", "-8.999311 | \n", "9.159810 | \n", "-1.134552 | \n", "0.878308 | \n", "-4.592861 | \n", "6.159277 | \n", "-8.804791 | \n", "4.221607 | \n", "1 | \n", "
8 | \n", "0.010587 | \n", "0.044573 | \n", "126.048018 | \n", "3769.014655 | \n", "5425.975753 | \n", "16.238748 | \n", "0.008020 | \n", "8702.531203 | \n", "-423.674591 | \n", "125.309708 | \n", "... | \n", "17.190102 | \n", "-19.386557 | \n", "2.690195 | \n", "-8.972520 | \n", "-8.547749 | \n", "3.633717 | \n", "-7.594123 | \n", "5.063034 | \n", "-3.646331 | \n", "1 | \n", "
9 | \n", "0.001556 | \n", "0.048985 | \n", "126.048018 | \n", "3916.497123 | \n", "5451.384648 | \n", "14.959555 | \n", "0.011601 | \n", "8986.764496 | \n", "-614.185364 | \n", "123.651947 | \n", "... | \n", "16.776917 | \n", "-9.418891 | \n", "1.858516 | \n", "-3.961122 | \n", "-3.926236 | \n", "-5.990383 | \n", "3.210501 | \n", "-8.581244 | \n", "4.236759 | \n", "1 | \n", "
5 rows × 29 columns
\n", "