{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "H3eVgsMsJVRY", "outputId": "30e4d553-6ce2-4b44-8217-21d0f1875d8b" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "cuda\n" ] } ], "source": [ "import torch\n", "import cupy as cp\n", "from moviepy.editor import VideoFileClip\n", "import pandas as pd\n", "import librosa\n", "import scipy.stats\n", "import soundfile as sf\n", "import io\n", "import os\n", "from tqdm import tqdm\n", "import pickle as pk\n", "\n", "# Set device to GPU if available\n", "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", "print(device)" ] }, { "cell_type": "markdown", "metadata": { "id": "3A9iF-QXJVRZ" }, "source": [ "Statistical Features \n", "A first easy step is to compute the mean, standard deviation, minimum, maximum, median and quartiles of the frequencies of each signal. This can be done using Numpy and it always brings value to our feature extraction." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "id": "ibnbShbMJVRa" }, "outputs": [], "source": [ "def describe_freq(freqs):\n", " freqs = cp.array(freqs) # Convert to CuPy array for GPU computation\n", " mean = cp.mean(freqs)\n", " std = cp.std(freqs)\n", " maxv = cp.amax(freqs)\n", " minv = cp.amin(freqs)\n", " median = cp.median(freqs)\n", " skew = scipy.stats.skew(cp.asnumpy(freqs)) # Skew not directly supported in CuPy\n", " kurt = scipy.stats.kurtosis(cp.asnumpy(freqs)) # Kurtosis not directly supported in CuPy\n", " q1 = cp.quantile(freqs, 0.25)\n", " q3 = cp.quantile(freqs, 0.75)\n", " mode = scipy.stats.mode(cp.asnumpy(freqs))[0][0] # Mode not directly supported in CuPy\n", " iqr = cp.subtract(q3, q1)\n", "\n", " return [mean.get(), std.get(), maxv.get(), minv.get(), median.get(), skew, kurt, q1.get(), q3.get(), mode, iqr.get()]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "id": "nNifSVyDJVRa" }, "outputs": [], "source": [ "def get_features(x, sr):\n", " x = torch.tensor(x, device=device) # Send to GPU\n", " rmse = torch.mean(torch.tensor(librosa.feature.rms(y=x.cpu().numpy())[0], device=device))\n", " zcr = torch.mean(torch.tensor(librosa.feature.zero_crossing_rate(x.cpu().numpy())[0], device=device))\n", " tempo = torch.tensor(librosa.beat.tempo(y=x.cpu().numpy(), sr=sr)[0], device=device)\n", " mfcc = torch.mean(torch.tensor(librosa.feature.mfcc(y=x.cpu().numpy(), sr=sr), device=device), axis=1)\n", " spec_cen = torch.mean(torch.tensor(librosa.feature.spectral_centroid(y=x.cpu().numpy(), sr=sr), device=device))\n", " spectral_bandwidth = torch.mean(torch.tensor(librosa.feature.spectral_bandwidth(y=x.cpu().numpy(), sr=sr), device=device))\n", " spectral_contrast = torch.mean(torch.tensor(librosa.feature.spectral_contrast(y=x.cpu().numpy(), sr=sr), device=device))\n", " spectral_flatness = torch.mean(torch.tensor(librosa.feature.spectral_flatness(y=x.cpu().numpy()), device=device))\n", " spectral_rolloff = torch.mean(torch.tensor(librosa.feature.spectral_rolloff(y=x.cpu().numpy(), sr=sr), device=device))\n", "\n", " features = [rmse, zcr, tempo, spec_cen, spectral_bandwidth, spectral_contrast, spectral_flatness, spectral_rolloff]\n", " features = [f.item() for f in features] + [mfcc[i].item() for i in range(mfcc.size(0))] # Convert to list\n", " return features" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "id": "6p0CSM2I_qGY" }, "outputs": [], "source": [ "def extract_features(file_path):\n", " try:\n", " # Load video file\n", " video_clip = VideoFileClip(file_path)\n", " audio = video_clip.audio\n", " fps = audio.fps\n", " audio_samples = cp.array(list(audio.iter_frames(fps=fps, dtype=\"float32\"))).flatten()\n", " buffer = io.BytesIO()\n", " sf.write(buffer, cp.asnumpy(audio_samples), fps, format=\"wav\")\n", " buffer.seek(0)\n", " x, sr = librosa.load(buffer, sr=None)\n", " video_clip.close()\n", " features = get_features(x, sr)\n", " return features\n", "\n", " except Exception as e:\n", " print(f\"Error encountered while parsing file: {file_path}, {e}\")\n", " return None" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "id": "fiFT26TK_tA_" }, "outputs": [], "source": [ "def load_data(real_dir, fake_dir, real_files, fake_files):\n", " data = []\n", " columns = [\"rmse\", \"zcr\", \"tempo\", \"spectral_centroid\", \"spectral_bandwidth\",\n", " \"spectral_contrast\", \"spectral_flatness\", \"spectral_rolloff\"] + \\\n", " [f\"mfcc{i}\" for i in range(1, 21)] + [\"label\"]\n", "\n", " # Set up progress bar\n", " total_files = len(real_files) + len(fake_files)\n", " pbar = tqdm(total=total_files, desc=\"Processing files\", unit=\"file\")\n", "\n", " # Process real audio files\n", " for file_name in real_files:\n", " file_path = os.path.join(real_dir, file_name)\n", " features = extract_features(file_path)\n", " if features is not None:\n", " features.append(0) # Label: 0 for REAL\n", " data.append(features)\n", " pbar.update(1)\n", "\n", " # Process fake audio files\n", " for file_name in fake_files:\n", " file_path = os.path.join(fake_dir, file_name)\n", " features = extract_features(file_path)\n", " if features is not None:\n", " features.append(1) # Label: 1 for FAKE\n", " data.append(features)\n", " pbar.update(1)\n", "\n", " pbar.close()\n", " df = pd.DataFrame(data, columns=columns)\n", " return df\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "id": "nL9J7Vp9JVRa" }, "outputs": [], "source": [ "real_audio_dir = r\"H:\\.shortcut-targets-by-id\\1jH_pc6mMj0Iu8wLS1r0vggMWpVElJvOU\\SIH2024_DATASET\\REAL\"\n", "fake_audio_dir = r\"H:\\.shortcut-targets-by-id\\1jH_pc6mMj0Iu8wLS1r0vggMWpVElJvOU\\SIH2024_DATASET\\FAKE\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "gfwu2Ct2E5aQ" }, "outputs": [], "source": [ "with open(\n", " r\"H:\\.shortcut-targets-by-id\\1jH_pc6mMj0Iu8wLS1r0vggMWpVElJvOU\\SIH2024_DATASET\\real_files.pkl\",\n", " \"rb\",\n", ") as f:\n", " real_files = pk.load(f)\n", "\n", "with open(\n", " r\"H:\\.shortcut-targets-by-id\\1jH_pc6mMj0Iu8wLS1r0vggMWpVElJvOU\\SIH2024_DATASET\\fake_files.pkl\",\n", " \"rb\",\n", ") as f:\n", " fake_files = pk.load(f)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(19154, 99992)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(real_files), len(fake_files)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fake_files = fake_files[:len(real_files)]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(19154, 19154)" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(real_files), len(fake_files)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "BUS-nOHOJVRb", "outputId": "8eee5356-bbdb-4941-a6a8-023a552db603" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Processing files: 17%|█▋ | 671/4000 [1:19:37<4:35:56, 4.97s/file] " ] } ], "source": [ "df = load_data(real_audio_dir, fake_audio_dir, real_files[:2000], fake_files[:2000])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "3tLFhSuVJVRc" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
rmsezcrtempospectral_centroidspectral_bandwidthspectral_contrastspectral_flatnessspectral_rolloffmfcc1mfcc2...mfcc12mfcc13mfcc14mfcc15mfcc16mfcc17mfcc18mfcc19mfcc20label
50.0046240.025053129.1992192725.9832545010.82294314.8224730.0028544820.494920-534.778259154.150742...8.461435-5.3638531.6517351.570598-6.969818-1.332273-7.264575-2.166896-5.3904241
60.0122050.040296123.0468753647.1046155343.51973816.6718190.0079038357.563553-421.535065121.641014...16.492485-15.2648635.351438-6.834963-6.8441492.524184-9.9071332.443203-3.2034851
70.0004860.065730123.0468754911.1185605816.15461013.1678840.02047012992.775671-651.358948105.408440...22.212151-8.9993119.159810-1.1345520.878308-4.5928616.159277-8.8047914.2216071
80.0105870.044573126.0480183769.0146555425.97575316.2387480.0080208702.531203-423.674591125.309708...17.190102-19.3865572.690195-8.972520-8.5477493.633717-7.5941235.063034-3.6463311
90.0015560.048985126.0480183916.4971235451.38464814.9595550.0116018986.764496-614.185364123.651947...16.776917-9.4188911.858516-3.961122-3.926236-5.9903833.210501-8.5812444.2367591
\n", "

5 rows × 29 columns

\n", "
" ], "text/plain": [ " rmse zcr tempo spectral_centroid spectral_bandwidth \\\n", "5 0.004624 0.025053 129.199219 2725.983254 5010.822943 \n", "6 0.012205 0.040296 123.046875 3647.104615 5343.519738 \n", "7 0.000486 0.065730 123.046875 4911.118560 5816.154610 \n", "8 0.010587 0.044573 126.048018 3769.014655 5425.975753 \n", "9 0.001556 0.048985 126.048018 3916.497123 5451.384648 \n", "\n", " spectral_contrast spectral_flatness spectral_rolloff mfcc1 \\\n", "5 14.822473 0.002854 4820.494920 -534.778259 \n", "6 16.671819 0.007903 8357.563553 -421.535065 \n", "7 13.167884 0.020470 12992.775671 -651.358948 \n", "8 16.238748 0.008020 8702.531203 -423.674591 \n", "9 14.959555 0.011601 8986.764496 -614.185364 \n", "\n", " mfcc2 ... mfcc12 mfcc13 mfcc14 mfcc15 mfcc16 \\\n", "5 154.150742 ... 8.461435 -5.363853 1.651735 1.570598 -6.969818 \n", "6 121.641014 ... 16.492485 -15.264863 5.351438 -6.834963 -6.844149 \n", "7 105.408440 ... 22.212151 -8.999311 9.159810 -1.134552 0.878308 \n", "8 125.309708 ... 17.190102 -19.386557 2.690195 -8.972520 -8.547749 \n", "9 123.651947 ... 16.776917 -9.418891 1.858516 -3.961122 -3.926236 \n", "\n", " mfcc17 mfcc18 mfcc19 mfcc20 label \n", "5 -1.332273 -7.264575 -2.166896 -5.390424 1 \n", "6 2.524184 -9.907133 2.443203 -3.203485 1 \n", "7 -4.592861 6.159277 -8.804791 4.221607 1 \n", "8 3.633717 -7.594123 5.063034 -3.646331 1 \n", "9 -5.990383 3.210501 -8.581244 4.236759 1 \n", "\n", "[5 rows x 29 columns]" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.tail()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "zMej7SKRJVRc" }, "outputs": [], "source": [ "# for file in file_names:\n", "\n", "# clean_file = file.split(\"/\")[-1]\n", "# video_clip = VideoFileClip(file)\n", "# audio = video_clip.audio\n", "# fps = audio.fps\n", "# audio_samples = np.array(list(audio.iter_frames(fps=fps, dtype=\"float32\"))).flatten()\n", "# buffer = io.BytesIO()\n", "# sf.write(buffer, audio_samples, fps, format='wav')\n", "# buffer.seek(0)\n", "# x, sr = librosa.load(buffer, sr=None)\n", "# label = json.load(open(\"train_sample_videos/metadata.json\"))[clean_file]['label']\n", "# new_row = pd.DataFrame([[clean_file] + get_features(x, sr) + [label]], columns=column_ames)\n", "# df = pd.concat([df, new_row], ignore_index=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "BxacOcTrJVRc" }, "outputs": [ { "ename": "OSError", "evalue": "Cannot save file into a non-existent directory: '\\content\\drive\\MyDrive\\SIH2024_DATASET'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mOSError\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[14], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mdf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_csv\u001b[49m\u001b[43m(\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/content/drive/MyDrive/SIH2024_DATASET/full_features.csv\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindex\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n", "File \u001b[1;32md:\\Python\\Lib\\site-packages\\pandas\\util\\_decorators.py:333\u001b[0m, in \u001b[0;36mdeprecate_nonkeyword_arguments..decorate..wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 327\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(args) \u001b[38;5;241m>\u001b[39m num_allow_args:\n\u001b[0;32m 328\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[0;32m 329\u001b[0m msg\u001b[38;5;241m.\u001b[39mformat(arguments\u001b[38;5;241m=\u001b[39m_format_argument_list(allow_args)),\n\u001b[0;32m 330\u001b[0m \u001b[38;5;167;01mFutureWarning\u001b[39;00m,\n\u001b[0;32m 331\u001b[0m stacklevel\u001b[38;5;241m=\u001b[39mfind_stack_level(),\n\u001b[0;32m 332\u001b[0m )\n\u001b[1;32m--> 333\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[1;32md:\\Python\\Lib\\site-packages\\pandas\\core\\generic.py:3964\u001b[0m, in \u001b[0;36mNDFrame.to_csv\u001b[1;34m(self, path_or_buf, sep, na_rep, float_format, columns, header, index, index_label, mode, encoding, compression, quoting, quotechar, lineterminator, chunksize, date_format, doublequote, escapechar, decimal, errors, storage_options)\u001b[0m\n\u001b[0;32m 3953\u001b[0m df \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m, ABCDataFrame) \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mto_frame()\n\u001b[0;32m 3955\u001b[0m formatter \u001b[38;5;241m=\u001b[39m DataFrameFormatter(\n\u001b[0;32m 3956\u001b[0m frame\u001b[38;5;241m=\u001b[39mdf,\n\u001b[0;32m 3957\u001b[0m header\u001b[38;5;241m=\u001b[39mheader,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 3961\u001b[0m decimal\u001b[38;5;241m=\u001b[39mdecimal,\n\u001b[0;32m 3962\u001b[0m )\n\u001b[1;32m-> 3964\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mDataFrameRenderer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mformatter\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_csv\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 3965\u001b[0m \u001b[43m \u001b[49m\u001b[43mpath_or_buf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 3966\u001b[0m \u001b[43m \u001b[49m\u001b[43mlineterminator\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlineterminator\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 3967\u001b[0m \u001b[43m \u001b[49m\u001b[43msep\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msep\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 3968\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 3969\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 3970\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompression\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcompression\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 3971\u001b[0m \u001b[43m \u001b[49m\u001b[43mquoting\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mquoting\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 3972\u001b[0m \u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 3973\u001b[0m \u001b[43m \u001b[49m\u001b[43mindex_label\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mindex_label\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 3974\u001b[0m \u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 3975\u001b[0m \u001b[43m \u001b[49m\u001b[43mchunksize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunksize\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 3976\u001b[0m \u001b[43m \u001b[49m\u001b[43mquotechar\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mquotechar\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 3977\u001b[0m \u001b[43m \u001b[49m\u001b[43mdate_format\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdate_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 3978\u001b[0m \u001b[43m \u001b[49m\u001b[43mdoublequote\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdoublequote\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 3979\u001b[0m \u001b[43m \u001b[49m\u001b[43mescapechar\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mescapechar\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 3980\u001b[0m \u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstorage_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 3981\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[1;32md:\\Python\\Lib\\site-packages\\pandas\\io\\formats\\format.py:1014\u001b[0m, in \u001b[0;36mDataFrameRenderer.to_csv\u001b[1;34m(self, path_or_buf, encoding, sep, columns, index_label, mode, compression, quoting, quotechar, lineterminator, chunksize, date_format, doublequote, escapechar, errors, storage_options)\u001b[0m\n\u001b[0;32m 993\u001b[0m created_buffer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[0;32m 995\u001b[0m csv_formatter \u001b[38;5;241m=\u001b[39m CSVFormatter(\n\u001b[0;32m 996\u001b[0m path_or_buf\u001b[38;5;241m=\u001b[39mpath_or_buf,\n\u001b[0;32m 997\u001b[0m lineterminator\u001b[38;5;241m=\u001b[39mlineterminator,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 1012\u001b[0m formatter\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfmt,\n\u001b[0;32m 1013\u001b[0m )\n\u001b[1;32m-> 1014\u001b[0m \u001b[43mcsv_formatter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1016\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m created_buffer:\n\u001b[0;32m 1017\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(path_or_buf, StringIO)\n", "File \u001b[1;32md:\\Python\\Lib\\site-packages\\pandas\\io\\formats\\csvs.py:251\u001b[0m, in \u001b[0;36mCSVFormatter.save\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 247\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 248\u001b[0m \u001b[38;5;124;03mCreate the writer & save.\u001b[39;00m\n\u001b[0;32m 249\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 250\u001b[0m \u001b[38;5;66;03m# apply compression and byte/text conversion\u001b[39;00m\n\u001b[1;32m--> 251\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[43mget_handle\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 252\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 253\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 254\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 255\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 256\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompression\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompression\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 257\u001b[0m \u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstorage_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 258\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m handles:\n\u001b[0;32m 259\u001b[0m \u001b[38;5;66;03m# Note: self.encoding is irrelevant here\u001b[39;00m\n\u001b[0;32m 260\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mwriter \u001b[38;5;241m=\u001b[39m csvlib\u001b[38;5;241m.\u001b[39mwriter(\n\u001b[0;32m 261\u001b[0m handles\u001b[38;5;241m.\u001b[39mhandle,\n\u001b[0;32m 262\u001b[0m lineterminator\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlineterminator,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 267\u001b[0m quotechar\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mquotechar,\n\u001b[0;32m 268\u001b[0m )\n\u001b[0;32m 270\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_save()\n", "File \u001b[1;32md:\\Python\\Lib\\site-packages\\pandas\\io\\common.py:749\u001b[0m, in \u001b[0;36mget_handle\u001b[1;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[0;32m 747\u001b[0m \u001b[38;5;66;03m# Only for write methods\u001b[39;00m\n\u001b[0;32m 748\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m mode \u001b[38;5;129;01mand\u001b[39;00m is_path:\n\u001b[1;32m--> 749\u001b[0m \u001b[43mcheck_parent_directory\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mstr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mhandle\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 751\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m compression:\n\u001b[0;32m 752\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m compression \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mzstd\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m 753\u001b[0m \u001b[38;5;66;03m# compression libraries do not like an explicit text-mode\u001b[39;00m\n", "File \u001b[1;32md:\\Python\\Lib\\site-packages\\pandas\\io\\common.py:616\u001b[0m, in \u001b[0;36mcheck_parent_directory\u001b[1;34m(path)\u001b[0m\n\u001b[0;32m 614\u001b[0m parent \u001b[38;5;241m=\u001b[39m Path(path)\u001b[38;5;241m.\u001b[39mparent\n\u001b[0;32m 615\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m parent\u001b[38;5;241m.\u001b[39mis_dir():\n\u001b[1;32m--> 616\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mOSError\u001b[39;00m(\u001b[38;5;124mrf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot save file into a non-existent directory: \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mparent\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", "\u001b[1;31mOSError\u001b[0m: Cannot save file into a non-existent directory: '\\content\\drive\\MyDrive\\SIH2024_DATASET'" ] } ], "source": [ "df.to_csv( \"full_features.csv\", index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "3PTTLrLhJVRc" }, "outputs": [], "source": [] } ], "metadata": { "accelerator": "GPU", "colab": { "gpuType": "T4", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.2" } }, "nbformat": 4, "nbformat_minor": 0 }