In [4]:
from moviepy.editor import VideoFileClip
import numpy as np
import pandas as pd
import scipy.stats
import pandas as pd
import glob
import json
import librosa
import soundfile as sf
import io
import os
from tqdm import tqdm
import pickle as pk

Statistical Features  
A first easy step is to compute the mean, standard deviation, minimum, maximum, median and quartiles of the frequencies of each signal. This can be done using Numpy and it always brings value to our feature extraction.

In [2]:
# freqs = np.fft.fftfreq(x.size)

# def describe_freq(freqs):
#     mean = np.mean(freqs)
#     std = np.std(freqs)
#     maxv = np.amax(freqs)
#     minv = np.amin(freqs)
#     median = np.median(freqs)
#     skew = scipy.stats.skew(freqs)
#     kurt = scipy.stats.kurtosis(freqs)
#     q1 = np.quantile(freqs, 0.25)
#     q3 = np.quantile(freqs, 0.75)
#     mode = scipy.stats.mode(freqs)[0][0]
#     iqr = scipy.stats.iqr(freqs)

#     return [mean, std, maxv, minv, median, skew, kurt, q1, q3, mode, iqr]

In [3]:
# def get_features(x, sr):
#     rmse = np.mean(librosa.feature.rms(y=x)[0])
#     zcr = np.mean(librosa.feature.zero_crossing_rate(x)[0])
#     tempo = librosa.beat.tempo(y=x, sr=sr)[0]
#     mfcc = list(np.mean(librosa.feature.mfcc(y=x, sr=sr), axis=1))
#     spec_cen = np.mean(librosa.feature.spectral_centroid(y=x, sr=sr))
#     spectral_bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=x, sr=sr))
#     spectral_contrast = np.mean(librosa.feature.spectral_contrast(y=x, sr=sr))
#     spectral_flatness = np.mean(librosa.feature.spectral_flatness(y=x))
#     spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=x, sr=sr))
#     features = [rmse, zcr, tempo, spec_cen, spectral_bandwidth, spectral_contrast, spectral_flatness, spectral_rolloff]
#     return features + mfcc

In [2]:
fake_audio_dir = (
    r"H:\.shortcut-targets-by-id\1jH_pc6mMj0Iu8wLS1r0vggMWpVElJvOU\SIH2024_DATASET\FAKE"
)
real_audio_dir = (
    r"H:\.shortcut-targets-by-id\1jH_pc6mMj0Iu8wLS1r0vggMWpVElJvOU\SIH2024_DATASET\REAL"
)

In [3]:
real_files = os.listdir(real_audio_dir)
fake_files = os.listdir(fake_audio_dir)

In [5]:
with open(
    r"H:\.shortcut-targets-by-id\1jH_pc6mMj0Iu8wLS1r0vggMWpVElJvOU\SIH2024_DATASET\real_files.pkl",
    "wb",
) as f:
    pk.dump(real_files, f)

with open(
    r"H:\.shortcut-targets-by-id\1jH_pc6mMj0Iu8wLS1r0vggMWpVElJvOU\SIH2024_DATASET\fake_files.pkl",
    "wb",
) as f:
    pk.dump(fake_files, f)

In [None]:
with open(
    r"H:\.shortcut-targets-by-id\1jH_pc6mMj0Iu8wLS1r0vggMWpVElJvOU\SIH2024_DATASET\real_files.pkl",
    "rb",
) as f:
    real_files = pk.load(f)

with open(
    r"H:\.shortcut-targets-by-id\1jH_pc6mMj0Iu8wLS1r0vggMWpVElJvOU\SIH2024_DATASET\fake_files.pkl",
    "rb",
) as f:
    fake_files = pk.load(f)

In [8]:
total_files = len(real_files) + len(fake_files)

In [9]:
def get_features(x, sr):
    """Extract audio features from the audio signal."""
    rmse = np.mean(librosa.feature.rms(y=x)[0])
    zcr = np.mean(librosa.feature.zero_crossing_rate(x)[0])
    tempo = librosa.beat.tempo(y=x, sr=sr)[0]
    mfcc = list(np.mean(librosa.feature.mfcc(y=x, sr=sr), axis=1))
    spec_cen = np.mean(librosa.feature.spectral_centroid(y=x, sr=sr))
    spectral_bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=x, sr=sr))
    spectral_contrast = np.mean(librosa.feature.spectral_contrast(y=x, sr=sr))
    spectral_flatness = np.mean(librosa.feature.spectral_flatness(y=x))
    spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=x, sr=sr))
    features = [
        rmse,
        zcr,
        tempo,
        spec_cen,
        spectral_bandwidth,
        spectral_contrast,
        spectral_flatness,
        spectral_rolloff,
    ]
    return features + mfcc


def extract_features(file_path):
    """Extract features from a video file."""
    try:
        # Load the video file
        video_clip = VideoFileClip(file_path)
        audio = video_clip.audio
        fps = audio.fps
        audio_samples = np.array(
            list(audio.iter_frames(fps=fps, dtype="float32"))
        ).flatten()
        buffer = io.BytesIO()
        sf.write(buffer, audio_samples, fps, format="wav")
        buffer.seek(0)
        x, sr = librosa.load(buffer, sr=None)
        video_clip.close()  # Close the video file
        features = get_features(x, sr)
        return features

    except Exception as e:
        print(f"Error encountered while parsing file: {file_path}, {e}")
        return None


def load_data(real_dir, fake_dir, real_files, fake_files):
    """Load and process audio files from real and fake directories."""
    data = []

    # Define column names
    columns = (
        [
            "rmse",
            "zcr",
            "tempo",
            "spectral_centroid",
            "spectral_bandwidth",
            "spectral_contrast",
            "spectral_flatness",
            "spectral_rolloff",
        ]
        + [f"mfcc{i}" for i in range(1, 21)]
        + ["label"]
    )

    # Get total number of files for progress bar
    total_files = len(real_files) + len(fake_files)

    # Create progress bar
    pbar = tqdm(total=total_files, desc="Processing files", unit="file")

    # Process real audio files
    for file_name in real_files:
        file_path = os.path.join(real_dir, file_name)
        features = extract_features(file_path)
        if features is not None:
            features.append(0)  # 0 for REAL
            data.append(features)
        pbar.update(1)
        pbar.set_postfix({"Current file": file_name[:20]})

    # Process fake audio files
    for file_name in fake_files:
        file_path = os.path.join(fake_dir, file_name)
        features = extract_features(file_path)
        if features is not None:
            features.append(1)  # 1 for FAKE
            data.append(features)
        pbar.update(1)
        pbar.set_postfix({"Current file": file_name[:20]})

    pbar.close()

    # Create DataFrame with the collected data
    df = pd.DataFrame(data, columns=columns)
    return df

In [10]:
df = pd.DataFrame(
    columns=[
        "rmse",
        "zcr",
        "tempo",
        "spectral_centroid",
        "spectral_bandwidth",
        "spectral_contrast",
        "spectral_flatness",
        "spectral_rolloff",
        "mfcc1",
        "mfcc2",
        "mfcc3",
        "mfcc4",
        "mfcc5",
        "mfcc6",
        "mfcc7",
        "mfcc8",
        "mfcc9",
        "mfcc10",
        "mfcc11",
        "mfcc12",
        "mfcc13",
        "mfcc14",
        "mfcc15",
        "mfcc16",
        "mfcc17",
        "mfcc18",
        "mfcc19",
        "mfcc20",
        "label",]
)

In [11]:
df = load_data(real_audio_dir, fake_audio_dir, real_files, fake_files)

Processing files:   0%|          | 15/119148 [01:07<176:25:02,  5.33s/file, Current file=ehgdzhkdvo.mp4]

KeyboardInterrupt: 

In [None]:
df.tail()

In [None]:
# for file in file_names:

#     clean_file = file.split("/")[-1]
#     video_clip = VideoFileClip(file)
#     audio = video_clip.audio
#     fps = audio.fps
#     audio_samples = np.array(list(audio.iter_frames(fps=fps, dtype="float32"))).flatten()
#     buffer = io.BytesIO()
#     sf.write(buffer, audio_samples, fps, format='wav')
#     buffer.seek(0)
#     x, sr = librosa.load(buffer, sr=None)
#     label = json.load(open("train_sample_videos/metadata.json"))[clean_file]['label']
#     new_row = pd.DataFrame([[clean_file] + get_features(x, sr) + [label]], columns=column_ames)
#     df = pd.concat([df, new_row], ignore_index=True)

In [None]:
df.to_csv( "/content/drive/MyDrive/SIH2024_DATASET/full_features.csv", index=False)