diff --git a/.gitattributes b/.gitattributes index c7d9f3332a950355d5a77d85000f05e6f45435ea..effd2ab8aab1079562cde4c78a8a6f4ca8e2d1a5 100644 --- a/.gitattributes +++ b/.gitattributes @@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +libs/shape_predictor_68_face_landmarks.dat filter=lfs diff=lfs merge=lfs -text diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..8745c0c37a11c6bd3abf4a579dfae7742e9fd979 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,54 @@ +ARG PYTORCH="1.10.0" +ARG CUDA="11.3" +ARG CUDNN="8" + +FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel + +ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" +ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" + +# Setting noninteractive build, setting up tzdata and configuring timezones +ENV DEBIAN_FRONTEND=noninteractive +ENV TZ=Europe/Berlin +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + +RUN apt-get update && apt-get install -y libglib2.0-0 libsm6 libxrender-dev libxext6 nano mc glances vim git \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Install cython +RUN conda install cython -y && conda clean --all + +# Installing APEX +RUN pip install -U pip +RUN git clone https://github.com/NVIDIA/apex +RUN sed -i 's/check_cuda_torch_binary_vs_bare_metal(torch.utils.cpp_extension.CUDA_HOME)/pass/g' apex/setup.py +RUN pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./apex +RUN apt-get update -y +RUN apt-get install build-essential cmake -y +RUN apt-get install libopenblas-dev liblapack-dev -y +RUN apt-get install libx11-dev libgtk-3-dev -y +RUN pip install dlib +RUN pip install facenet-pytorch +RUN pip install albumentations==1.0.0 timm==0.4.12 pytorch_toolbelt tensorboardx +RUN pip install cython jupyter jupyterlab ipykernel matplotlib tqdm pandas + +# download pretraned Imagenet models +RUN apt install wget +RUN wget https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b7_ns-1dbc32de.pth -P /root/.cache/torch/hub/checkpoints/ +RUN wget https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b5_ns-6f26d0cf.pth -P /root/.cache/torch/hub/checkpoints/ + +# Setting the working directory +WORKDIR /workspace + +# Copying the required codebase +COPY . /workspace + +RUN chmod 777 preprocess_data.sh +RUN chmod 777 train.sh +RUN chmod 777 predict_submission.sh + +ENV PYTHONPATH=. + +CMD ["/bin/bash"] + diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..69832490ceba1c969d07507be2bd891cf63f9f1a --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 Selim Seferbekov + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 56d885279bb69d544e233a14f55211d46a4ebe0e..fb8ef5b7f6de3fd950217311420c06d444e87313 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,171 @@ ---- -title: DeepFakeClassifier -emoji: ⚡ -colorFrom: gray -colorTo: red -sdk: gradio -sdk_version: 3.15.0 -app_file: app.py -pinned: false ---- - -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference +## DeepFake Detection (DFDC) Solution by @selimsef + +## Challenge details: + +[Kaggle Challenge Page](https://www.kaggle.com/c/deepfake-detection-challenge) + + +### Fake detection articles +- [The Deepfake Detection Challenge (DFDC) Preview Dataset](https://arxiv.org/abs/1910.08854) +- [Deep Fake Image Detection Based on Pairwise Learning](https://www.mdpi.com/2076-3417/10/1/370) +- [DeeperForensics-1.0: A Large-Scale Dataset for Real-World Face Forgery Detection](https://arxiv.org/abs/2001.03024) +- [DeepFakes and Beyond: A Survey of Face Manipulation and Fake Detection](https://arxiv.org/abs/2001.00179) +- [Real or Fake? Spoofing State-Of-The-Art Face Synthesis Detection Systems](https://arxiv.org/abs/1911.05351) +- [CNN-generated images are surprisingly easy to spot... for now](https://arxiv.org/abs/1912.11035) +- [FakeSpotter: A Simple yet Robust Baseline for Spotting AI-Synthesized Fake Faces](https://arxiv.org/abs/1909.06122) +- [FakeLocator: Robust Localization of GAN-Based Face Manipulations via Semantic Segmentation Networks with Bells and Whistles](https://arxiv.org/abs/2001.09598) +- [Media Forensics and DeepFakes: an overview](https://arxiv.org/abs/2001.06564) +- [Face X-ray for More General Face Forgery Detection](https://arxiv.org/abs/1912.13458) + +## Solution description +In general solution is based on frame-by-frame classification approach. Other complex things did not work so well on public leaderboard. + +#### Face-Detector +MTCNN detector is chosen due to kernel time limits. It would be better to use S3FD detector as more precise and robust, but opensource Pytorch implementations don't have a license. + +Input size for face detector was calculated for each video depending on video resolution. + +- 2x scale for videos with less than 300 pixels wider side +- no rescale for videos with wider side between 300 and 1000 +- 0.5x scale for videos with wider side > 1000 pixels +- 0.33x scale for videos with wider side > 1900 pixels + +### Input size +As soon as I discovered that EfficientNets significantly outperform other encoders I used only them in my solution. +As I started with B4 I decided to use "native" size for that network (380x380). +Due to memory costraints I did not increase input size even for B7 encoder. + +### Margin +When I generated crops for training I added 30% of face crop size from each side and used only this setting during the competition. +See [extract_crops.py](preprocessing/extract_crops.py) for the details + +### Encoders +The winning encoder is current state-of-the-art model (EfficientNet B7) pretrained with ImageNet and noisy student [Self-training with Noisy Student improves ImageNet classification +](https://arxiv.org/abs/1911.04252) + +### Averaging predictions +I used 32 frames for each video. +For each model output instead of simple averaging I used the following heuristic which worked quite well on public leaderbord (0.25 -> 0.22 solo B5). +```python +import numpy as np + +def confident_strategy(pred, t=0.8): + pred = np.array(pred) + sz = len(pred) + fakes = np.count_nonzero(pred > t) + # 11 frames are detected as fakes with high probability + if fakes > sz // 2.5 and fakes > 11: + return np.mean(pred[pred > t]) + elif np.count_nonzero(pred < 0.2) > 0.9 * sz: + return np.mean(pred[pred < 0.2]) + else: + return np.mean(pred) +``` + +### Augmentations + +I used heavy augmentations by default. +[Albumentations](https://github.com/albumentations-team/albumentations) library supports most of the augmentations out of the box. Only needed to add IsotropicResize augmentation. +``` + +def create_train_transforms(size=300): + return Compose([ + ImageCompression(quality_lower=60, quality_upper=100, p=0.5), + GaussNoise(p=0.1), + GaussianBlur(blur_limit=3, p=0.05), + HorizontalFlip(), + OneOf([ + IsotropicResize(max_side=size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC), + IsotropicResize(max_side=size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_LINEAR), + IsotropicResize(max_side=size, interpolation_down=cv2.INTER_LINEAR, interpolation_up=cv2.INTER_LINEAR), + ], p=1), + PadIfNeeded(min_height=size, min_width=size, border_mode=cv2.BORDER_CONSTANT), + OneOf([RandomBrightnessContrast(), FancyPCA(), HueSaturationValue()], p=0.7), + ToGray(p=0.2), + ShiftScaleRotate(shift_limit=0.1, scale_limit=0.2, rotate_limit=10, border_mode=cv2.BORDER_CONSTANT, p=0.5), + ] + ) +``` +In addition to these augmentations I wanted to achieve better generalization with +- Cutout like augmentations (dropping artefacts and parts of face) +- Dropout part of the image, inspired by [GridMask](https://arxiv.org/abs/2001.04086) and [Severstal Winning Solution](https://www.kaggle.com/c/severstal-steel-defect-detection/discussion/114254) + +![augmentations](images/augmentations.jpg "Dropout augmentations") + +## Building docker image +All libraries and enviroment is already configured with Dockerfile. It requires docker engine https://docs.docker.com/engine/install/ubuntu/ and nvidia docker in your system https://github.com/NVIDIA/nvidia-docker. + +To build a docker image run `docker build -t df .` + +## Running docker +`docker run --runtime=nvidia --ipc=host --rm --volume :/dataset -it df` + +## Data preparation + +Once DFDC dataset is downloaded all the scripts expect to have `dfdc_train_xxx` folders under data root directory. + +Preprocessing is done in a single script **`preprocess_data.sh`** which requires dataset directory as first argument. +It will execute the steps below: + +##### 1. Find face bboxes +To extract face bboxes I used facenet library, basically only MTCNN. +`python preprocessing/detect_original_faces.py --root-dir DATA_ROOT` +This script will detect faces in real videos and store them as jsons in DATA_ROOT/bboxes directory + +##### 2. Extract crops from videos +To extract image crops I used bboxes saved before. It will use bounding boxes from original videos for face videos as well. +`python preprocessing/extract_crops.py --root-dir DATA_ROOT --crops-dir crops` +This script will extract face crops from videos and save them in DATA_ROOT/crops directory + +##### 3. Generate landmarks +From the saved crops it is quite fast to process crops with MTCNN and extract landmarks +`python preprocessing/generate_landmarks.py --root-dir DATA_ROOT` +This script will extract landmarks and save them in DATA_ROOT/landmarks directory + +##### 4. Generate diff SSIM masks +`python preprocessing/generate_diffs.py --root-dir DATA_ROOT` +This script will extract SSIM difference masks between real and fake images and save them in DATA_ROOT/diffs directory + +##### 5. Generate folds +`python preprocessing/generate_folds.py --root-dir DATA_ROOT --out folds.csv` +By default it will use 16 splits to have 0-2 folders as a holdout set. Though only 400 videos can be used for validation as well. + + +## Training + +Training 5 B7 models with different seeds is done in **`train.sh`** script. + +During training checkpoints are saved for every epoch. + +## Hardware requirements +Mostly trained on devbox configuration with 4xTitan V, thanks to Nvidia and DSB2018 competition where I got these gpus https://www.kaggle.com/c/data-science-bowl-2018/ + +Overall training requires 4 GPUs with 12gb+ memory. +Batch size needs to be adjusted for standard 1080Ti or 2080Ti graphic cards. + +As I computed fake loss and real loss separately inside each batch, results might be better with larger batch size, for example on V100 gpus. +Even though SyncBN is used larger batch on each GPU will lead to less noise as DFDC dataset has some fakes where face detector failed and face crops are not really fakes. + +## Plotting losses to select checkpoints + +`python plot_loss.py --log-file logs/` + +![loss plot](images/loss_plot.png "Weighted loss") + +## Inference + + +Kernel is reproduced with `predict_folder.py` script. + + +## Pretrained models +`download_weights.sh` script will download trained models to `weights/` folder. They should be downloaded before building a docker image. + +Ensemble inference is already preconfigured with `predict_submission.sh` bash script. It expects a directory with videos as first argument and an output csv file as second argument. + +For example `./predict_submission.sh /mnt/datasets/deepfake/test_videos submission.csv` + + + + + diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..af878780061b785d49b9cbe56a5d4df6fd86da00 --- /dev/null +++ b/app.py @@ -0,0 +1,62 @@ +import argparse +import os +import re +import time +import cv2 + +import torch +import pandas as pd +from kernel_utils import VideoReader, FaceExtractor, confident_strategy, predict_on_video_set +from training.zoo.classifiers import DeepFakeClassifier +import gradio as gr + + + +def deepfakeclassifier(potential_test_video, option): + if option == 'Original': + weights_dir = "./weights" + models_dir = ["Original_DeepFakeClassifier_tf_efficientnet_b7_ns"] + else: + weights_dir = "./weights" + models_dir = ["Custom_classifier_DeepFakeClassifier_tf_efficientnet_b7_ns"] + + parts = potential_test_video.split("\\") + test_videos = [parts[-1]] + parts[0] += "\\" + test_dir = parts[:-1] + test_dir = os.path.join(*test_dir) + + + models = [] + model_paths = [os.path.join(weights_dir, model) for model in models_dir] + for path in model_paths: + model = DeepFakeClassifier(encoder="tf_efficientnet_b7_ns").to('cuda') + print("loading state dict {}".format(path)) + checkpoint = torch.load(path, map_location="cpu") + state_dict = checkpoint.get("state_dict", checkpoint) + model.load_state_dict({re.sub("^module.", "", k): v for k, v in state_dict.items()}, strict=True) + model.eval() + del checkpoint + models.append(model.half()) + + frames_per_video = 32 + video_reader = VideoReader() + video_read_fn = lambda x: video_reader.read_frames(x, num_frames=frames_per_video) + face_extractor = FaceExtractor(video_read_fn) + input_size = 380 + strategy = confident_strategy + stime = time.time() + + print("Predicting {} videos".format(len(test_videos))) + predictions = predict_on_video_set(face_extractor=face_extractor, input_size=input_size, models=models, + strategy=strategy, frames_per_video=frames_per_video, videos=test_videos, + num_workers=6, test_dir=test_dir) + + print("Elapsed:", time.time() - stime) + return "This video is FAKE with {} probability!".format(predictions[0]) + +demo = gr.Interface(fn=deepfakeclassifier, inputs=[gr.Video(), + gr.Radio(["Original", "Custom"])] ,outputs="text", description="Original option uses the trained weights of the winning idea. Custom is my trained \ + network. Original optional performs better as it uses much more data for training!") + +demo.launch() \ No newline at end of file diff --git a/configs/b5.json b/configs/b5.json new file mode 100644 index 0000000000000000000000000000000000000000..f32e6d00f853ed34a05ccac376a6e1b74e85132f --- /dev/null +++ b/configs/b5.json @@ -0,0 +1,28 @@ +{ + "network": "DeepFakeClassifier", + "encoder": "tf_efficientnet_b5_ns", + "batches_per_epoch": 2500, + "size": 380, + "fp16": true, + "optimizer": { + "batch_size": 20, + "type": "SGD", + "momentum": 0.9, + "weight_decay": 1e-4, + "learning_rate": 0.01, + "nesterov": true, + "schedule": { + "type": "poly", + "mode": "step", + "epochs": 30, + "params": {"max_iter": 75100} + } + }, + "normalize": { + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225] + }, + "losses": { + "BinaryCrossentropy": 1 + } +} \ No newline at end of file diff --git a/configs/b7.json b/configs/b7.json new file mode 100644 index 0000000000000000000000000000000000000000..1f26eefcf6c11e4a4894ffb8ff1014cec4f47449 --- /dev/null +++ b/configs/b7.json @@ -0,0 +1,29 @@ +{ + "network": "DeepFakeClassifier", + "encoder": "tf_efficientnet_b7_ns", + "batches_per_epoch": 10000, + "size": 380, + "fp16": true, + "optimizer": { + "batch_size": 2, + "type": "SGD", + "momentum": 0.9, + "weight_decay": 1e-4, + "learning_rate": 0.01, + "nesterov": true, + "schedule": { + "type": "poly", + "mode": "step", + "epochs": 15, + "params": {"max_iter": 100500} + } + }, + "normalize": { + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225] + }, + "losses": { + "BinaryCrossentropy": 1 + } +} + diff --git a/download_weights.sh b/download_weights.sh new file mode 100644 index 0000000000000000000000000000000000000000..57ef2d2c9311def0132c7cbced9d759014609bd8 --- /dev/null +++ b/download_weights.sh @@ -0,0 +1,9 @@ +tag=0.0.1 + +wget -O weights/final_111_DeepFakeClassifier_tf_efficientnet_b7_ns_0_36 https://github.com/selimsef/dfdc_deepfake_challenge/releases/download/$tag/final_111_DeepFakeClassifier_tf_efficientnet_b7_ns_0_36 +wget -O weights/final_555_DeepFakeClassifier_tf_efficientnet_b7_ns_0_19 https://github.com/selimsef/dfdc_deepfake_challenge/releases/download/$tag/final_555_DeepFakeClassifier_tf_efficientnet_b7_ns_0_19 +wget -O weights/final_777_DeepFakeClassifier_tf_efficientnet_b7_ns_0_29 https://github.com/selimsef/dfdc_deepfake_challenge/releases/download/$tag/final_777_DeepFakeClassifier_tf_efficientnet_b7_ns_0_29 +wget -O weights/final_777_DeepFakeClassifier_tf_efficientnet_b7_ns_0_31 https://github.com/selimsef/dfdc_deepfake_challenge/releases/download/$tag/final_777_DeepFakeClassifier_tf_efficientnet_b7_ns_0_31 +wget -O weights/final_888_DeepFakeClassifier_tf_efficientnet_b7_ns_0_37 https://github.com/selimsef/dfdc_deepfake_challenge/releases/download/$tag/final_888_DeepFakeClassifier_tf_efficientnet_b7_ns_0_37 +wget -O weights/final_888_DeepFakeClassifier_tf_efficientnet_b7_ns_0_40 https://github.com/selimsef/dfdc_deepfake_challenge/releases/download/$tag/final_888_DeepFakeClassifier_tf_efficientnet_b7_ns_0_40 +wget -O weights/final_999_DeepFakeClassifier_tf_efficientnet_b7_ns_0_23 https://github.com/selimsef/dfdc_deepfake_challenge/releases/download/$tag/final_999_DeepFakeClassifier_tf_efficientnet_b7_ns_0_23 \ No newline at end of file diff --git a/images/augmentations.jpg b/images/augmentations.jpg new file mode 100644 index 0000000000000000000000000000000000000000..357c580a6796c840ff6a8417389f252816128ebe Binary files /dev/null and b/images/augmentations.jpg differ diff --git a/images/loss_plot.png b/images/loss_plot.png new file mode 100644 index 0000000000000000000000000000000000000000..1f5c7dda8ab8c22d549086aa0611ff47845b4b7a Binary files /dev/null and b/images/loss_plot.png differ diff --git a/kernel_utils.py b/kernel_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..b954622bc9f832394630566098d00e978178154f --- /dev/null +++ b/kernel_utils.py @@ -0,0 +1,361 @@ +import os + +import cv2 +import numpy as np +import torch +from PIL import Image +from albumentations.augmentations.functional import image_compression +from facenet_pytorch.models.mtcnn import MTCNN +from concurrent.futures import ThreadPoolExecutor + +from torchvision.transforms import Normalize + +mean = [0.485, 0.456, 0.406] +std = [0.229, 0.224, 0.225] +normalize_transform = Normalize(mean, std) + + +class VideoReader: + """Helper class for reading one or more frames from a video file.""" + + def __init__(self, verbose=True, insets=(0, 0)): + """Creates a new VideoReader. + + Arguments: + verbose: whether to print warnings and error messages + insets: amount to inset the image by, as a percentage of + (width, height). This lets you "zoom in" to an image + to remove unimportant content around the borders. + Useful for face detection, which may not work if the + faces are too small. + """ + self.verbose = verbose + self.insets = insets + + def read_frames(self, path, num_frames, jitter=0, seed=None): + """Reads frames that are always evenly spaced throughout the video. + + Arguments: + path: the video file + num_frames: how many frames to read, -1 means the entire video + (warning: this will take up a lot of memory!) + jitter: if not 0, adds small random offsets to the frame indices; + this is useful so we don't always land on even or odd frames + seed: random seed for jittering; if you set this to a fixed value, + you probably want to set it only on the first video + """ + assert num_frames > 0 + + capture = cv2.VideoCapture(path) + frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT)) + if frame_count <= 0: return None + + frame_idxs = np.linspace(0, frame_count - 1, num_frames, endpoint=True, dtype=np.int) + if jitter > 0: + np.random.seed(seed) + jitter_offsets = np.random.randint(-jitter, jitter, len(frame_idxs)) + frame_idxs = np.clip(frame_idxs + jitter_offsets, 0, frame_count - 1) + + result = self._read_frames_at_indices(path, capture, frame_idxs) + capture.release() + return result + + def read_random_frames(self, path, num_frames, seed=None): + """Picks the frame indices at random. + + Arguments: + path: the video file + num_frames: how many frames to read, -1 means the entire video + (warning: this will take up a lot of memory!) + """ + assert num_frames > 0 + np.random.seed(seed) + + capture = cv2.VideoCapture(path) + frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT)) + if frame_count <= 0: return None + + frame_idxs = sorted(np.random.choice(np.arange(0, frame_count), num_frames)) + result = self._read_frames_at_indices(path, capture, frame_idxs) + + capture.release() + return result + + def read_frames_at_indices(self, path, frame_idxs): + """Reads frames from a video and puts them into a NumPy array. + + Arguments: + path: the video file + frame_idxs: a list of frame indices. Important: should be + sorted from low-to-high! If an index appears multiple + times, the frame is still read only once. + + Returns: + - a NumPy array of shape (num_frames, height, width, 3) + - a list of the frame indices that were read + + Reading stops if loading a frame fails, in which case the first + dimension returned may actually be less than num_frames. + + Returns None if an exception is thrown for any reason, or if no + frames were read. + """ + assert len(frame_idxs) > 0 + capture = cv2.VideoCapture(path) + result = self._read_frames_at_indices(path, capture, frame_idxs) + capture.release() + return result + + def _read_frames_at_indices(self, path, capture, frame_idxs): + try: + frames = [] + idxs_read = [] + for frame_idx in range(frame_idxs[0], frame_idxs[-1] + 1): + # Get the next frame, but don't decode if we're not using it. + ret = capture.grab() + if not ret: + if self.verbose: + print("Error grabbing frame %d from movie %s" % (frame_idx, path)) + break + + # Need to look at this frame? + current = len(idxs_read) + if frame_idx == frame_idxs[current]: + ret, frame = capture.retrieve() + if not ret or frame is None: + if self.verbose: + print("Error retrieving frame %d from movie %s" % (frame_idx, path)) + break + + frame = self._postprocess_frame(frame) + frames.append(frame) + idxs_read.append(frame_idx) + + if len(frames) > 0: + return np.stack(frames), idxs_read + if self.verbose: + print("No frames read from movie %s" % path) + return None + except: + if self.verbose: + print("Exception while reading movie %s" % path) + return None + + def read_middle_frame(self, path): + """Reads the frame from the middle of the video.""" + capture = cv2.VideoCapture(path) + frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT)) + result = self._read_frame_at_index(path, capture, frame_count // 2) + capture.release() + return result + + def read_frame_at_index(self, path, frame_idx): + """Reads a single frame from a video. + + If you just want to read a single frame from the video, this is more + efficient than scanning through the video to find the frame. However, + for reading multiple frames it's not efficient. + + My guess is that a "streaming" approach is more efficient than a + "random access" approach because, unless you happen to grab a keyframe, + the decoder still needs to read all the previous frames in order to + reconstruct the one you're asking for. + + Returns a NumPy array of shape (1, H, W, 3) and the index of the frame, + or None if reading failed. + """ + capture = cv2.VideoCapture(path) + result = self._read_frame_at_index(path, capture, frame_idx) + capture.release() + return result + + def _read_frame_at_index(self, path, capture, frame_idx): + capture.set(cv2.CAP_PROP_POS_FRAMES, frame_idx) + ret, frame = capture.read() + if not ret or frame is None: + if self.verbose: + print("Error retrieving frame %d from movie %s" % (frame_idx, path)) + return None + else: + frame = self._postprocess_frame(frame) + return np.expand_dims(frame, axis=0), [frame_idx] + + def _postprocess_frame(self, frame): + frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + + if self.insets[0] > 0: + W = frame.shape[1] + p = int(W * self.insets[0]) + frame = frame[:, p:-p, :] + + if self.insets[1] > 0: + H = frame.shape[1] + q = int(H * self.insets[1]) + frame = frame[q:-q, :, :] + + return frame + + +class FaceExtractor: + def __init__(self, video_read_fn): + self.video_read_fn = video_read_fn + self.detector = MTCNN(margin=0, thresholds=[0.7, 0.8, 0.8], device='cuda') + + def process_videos(self, input_dir, filenames, video_idxs): + videos_read = [] + frames_read = [] + frames = [] + results = [] + for video_idx in video_idxs: + # Read the full-size frames from this video. + filename = filenames[video_idx] + video_path = os.path.join(input_dir, filename) + result = self.video_read_fn(video_path) + # Error? Then skip this video. + if result is None: continue + + videos_read.append(video_idx) + + # Keep track of the original frames (need them later). + my_frames, my_idxs = result + + frames.append(my_frames) + frames_read.append(my_idxs) + for i, frame in enumerate(my_frames): + h, w = frame.shape[:2] + img = Image.fromarray(frame.astype(np.uint8)) + img = img.resize(size=[s // 2 for s in img.size]) + + batch_boxes, probs = self.detector.detect(img, landmarks=False) + + faces = [] + scores = [] + if batch_boxes is None: + continue + for bbox, score in zip(batch_boxes, probs): + if bbox is not None: + xmin, ymin, xmax, ymax = [int(b * 2) for b in bbox] + w = xmax - xmin + h = ymax - ymin + p_h = h // 3 + p_w = w // 3 + crop = frame[max(ymin - p_h, 0):ymax + p_h, max(xmin - p_w, 0):xmax + p_w] + faces.append(crop) + scores.append(score) + + frame_dict = {"video_idx": video_idx, + "frame_idx": my_idxs[i], + "frame_w": w, + "frame_h": h, + "faces": faces, + "scores": scores} + results.append(frame_dict) + + return results + + def process_video(self, video_path): + """Convenience method for doing face extraction on a single video.""" + input_dir = os.path.dirname(video_path) + filenames = [os.path.basename(video_path)] + return self.process_videos(input_dir, filenames, [0]) + + + +def confident_strategy(pred, t=0.8): + pred = np.array(pred) + sz = len(pred) + fakes = np.count_nonzero(pred > t) + # 11 frames are detected as fakes with high probability + if fakes > sz // 2.5 and fakes > 11: + return np.mean(pred[pred > t]) + elif np.count_nonzero(pred < 0.2) > 0.9 * sz: + return np.mean(pred[pred < 0.2]) + else: + return np.mean(pred) + +strategy = confident_strategy + + +def put_to_center(img, input_size): + img = img[:input_size, :input_size] + image = np.zeros((input_size, input_size, 3), dtype=np.uint8) + start_w = (input_size - img.shape[1]) // 2 + start_h = (input_size - img.shape[0]) // 2 + image[start_h:start_h + img.shape[0], start_w: start_w + img.shape[1], :] = img + return image + + +def isotropically_resize_image(img, size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC): + h, w = img.shape[:2] + if max(w, h) == size: + return img + if w > h: + scale = size / w + h = h * scale + w = size + else: + scale = size / h + w = w * scale + h = size + interpolation = interpolation_up if scale > 1 else interpolation_down + resized = cv2.resize(img, (int(w), int(h)), interpolation=interpolation) + return resized + + +def predict_on_video(face_extractor, video_path, batch_size, input_size, models, strategy=np.mean, + apply_compression=False): + batch_size *= 4 + try: + faces = face_extractor.process_video(video_path) + if len(faces) > 0: + x = np.zeros((batch_size, input_size, input_size, 3), dtype=np.uint8) + n = 0 + for frame_data in faces: + for face in frame_data["faces"]: + resized_face = isotropically_resize_image(face, input_size) + resized_face = put_to_center(resized_face, input_size) + if apply_compression: + resized_face = image_compression(resized_face, quality=90, image_type=".jpg") + if n + 1 < batch_size: + x[n] = resized_face + n += 1 + else: + pass + if n > 0: + x = torch.tensor(x, device="cuda").float() + # Preprocess the images. + x = x.permute((0, 3, 1, 2)) + # x = x.to('cpu') + for i in range(len(x)): + x[i] = normalize_transform(x[i] / 255.) + # Make a prediction, then take the average. + with torch.no_grad(): + preds = [] + for model in models: + # with torch.cuda.amp.autocast(): + y_pred = model(x[:n].float()) + y_pred = torch.sigmoid(y_pred.squeeze()) + bpred = y_pred[:n].cpu().numpy() + preds.append(strategy(bpred)) + return np.mean(preds) + except Exception as e: + print("Prediction error on video %s: %s" % (video_path, str(e))) + + return 0.5 + + +def predict_on_video_set(face_extractor, videos, input_size, num_workers, test_dir, frames_per_video, models, + strategy=np.mean, + apply_compression=False): + def process_file(i): + filename = videos[i] + y_pred = predict_on_video(face_extractor=face_extractor, video_path=os.path.join(test_dir, filename), + input_size=input_size, + batch_size=frames_per_video, + models=models, strategy=strategy, apply_compression=apply_compression) + return y_pred + + with ThreadPoolExecutor(max_workers=num_workers) as ex: + predictions = ex.map(process_file, range(len(videos))) + return list(predictions) + diff --git a/libs/shape_predictor_68_face_landmarks.dat b/libs/shape_predictor_68_face_landmarks.dat new file mode 100644 index 0000000000000000000000000000000000000000..1e5da4f9a556bec8582e6c55b89b3e6bfdd60021 --- /dev/null +++ b/libs/shape_predictor_68_face_landmarks.dat @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbdc2cb80eb9aa7a758672cbfdda32ba6300efe9b6e6c7a299ff7e736b11b92f +size 99693937 diff --git a/logs/.gitkeep b/logs/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/logs/classifier_tf_efficientnet_b7_ns_1/events.out.tfevents.1671630947.Green.15284.0 b/logs/classifier_tf_efficientnet_b7_ns_1/events.out.tfevents.1671630947.Green.15284.0 new file mode 100644 index 0000000000000000000000000000000000000000..c8a099ac48f615612d4528b5ad8bde31d4c9c123 --- /dev/null +++ b/logs/classifier_tf_efficientnet_b7_ns_1/events.out.tfevents.1671630947.Green.15284.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e450ee9880145e3f8ec94a5f7c344e8a24b4c57184d033fa8622a56987a778ba +size 40 diff --git a/logs/classifier_tf_efficientnet_b7_ns_1/events.out.tfevents.1671631032.Green.22668.0 b/logs/classifier_tf_efficientnet_b7_ns_1/events.out.tfevents.1671631032.Green.22668.0 new file mode 100644 index 0000000000000000000000000000000000000000..521615249931b9cc334d589f55ef8e9a2daa7411 --- /dev/null +++ b/logs/classifier_tf_efficientnet_b7_ns_1/events.out.tfevents.1671631032.Green.22668.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b497c6e7dd9bd7d92ab941a7cb3d4778c07b1c91c5cc24d8174a1176b0f25d10 +size 1154 diff --git a/logs/classifier_tf_efficientnet_b7_ns_1/events.out.tfevents.1671715958.Green.34292.0 b/logs/classifier_tf_efficientnet_b7_ns_1/events.out.tfevents.1671715958.Green.34292.0 new file mode 100644 index 0000000000000000000000000000000000000000..bb644a41a9ffb10f841ab587f72bcbbc2ac60d82 --- /dev/null +++ b/logs/classifier_tf_efficientnet_b7_ns_1/events.out.tfevents.1671715958.Green.34292.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95ce902041c80cb054cd04d4c98d9eeecaa51ef6957f8c4dd62a3eba482c1b46 +size 40 diff --git a/plot_loss.py b/plot_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..a7b388c8559e32ab0d03c785449a08bbb27ff0af --- /dev/null +++ b/plot_loss.py @@ -0,0 +1,45 @@ +import argparse + +import numpy as np + +from matplotlib import pyplot as plt + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Plot losses from log") + parser.add_argument("--log-file", help="path to log file", required=True) + parser.add_argument("--fake-weight", help="weight for fake loss", default=1.4, type=float) + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + + with open(args.log_file, "r") as f: + lines = f.readlines() + real_losses = [] + fake_losses = [] + for line in lines: + line = line.strip() + if line.startswith("fake_loss"): + fake_losses.append(float(line.split(" ")[-1])) + elif line.startswith("real_loss"): + real_losses.append(float(line.split(" ")[-1])) + real_losses = np.array(real_losses) + fake_losses = np.array(fake_losses) + loss = (fake_losses * args.fake_weight + real_losses)/2 + plt.title("Weighted loss ({}*fake_loss + real_loss)/2)".format(args.fake_weight)) + best_loss_idx = np.argsort(loss)[:5] + # ignore early epochs loss is quite noisy and there could be spikes + best_loss_idx = best_loss_idx[best_loss_idx > 16] + plt.scatter(best_loss_idx, loss[best_loss_idx], c="red") + for idx in best_loss_idx: + plt.annotate(str(idx), (idx, loss[idx])) + plt.plot(loss) + plt.show() + + +if __name__ == '__main__': + main() diff --git a/predict_folder.py b/predict_folder.py new file mode 100644 index 0000000000000000000000000000000000000000..84ca0a63b930d18a44d91b9383cd0fc8fc87ba08 --- /dev/null +++ b/predict_folder.py @@ -0,0 +1,47 @@ +import argparse +import os +import re +import time + +import torch +import pandas as pd +from kernel_utils import VideoReader, FaceExtractor, confident_strategy, predict_on_video_set +from training.zoo.classifiers import DeepFakeClassifier + +if __name__ == '__main__': + parser = argparse.ArgumentParser("Predict test videos") + arg = parser.add_argument + arg('--weights-dir', type=str, default="weights", help="path to directory with checkpoints") + arg('--models', nargs='+', required=True, help="checkpoint files") + arg('--test-dir', type=str, required=True, help="path to directory with videos") + arg('--output', type=str, required=False, help="path to output csv", default="submission.csv") + args = parser.parse_args() + + models = [] + model_paths = [os.path.join(args.weights_dir, model) for model in args.models] + for path in model_paths: + model = DeepFakeClassifier(encoder="tf_efficientnet_b7_ns").to("cuda") + print("loading state dict {}".format(path)) + checkpoint = torch.load(path, map_location="cpu") + state_dict = checkpoint.get("state_dict", checkpoint) + model.load_state_dict({re.sub("^module.", "", k): v for k, v in state_dict.items()}, strict=True) + model.eval() + del checkpoint + models.append(model.half()) + + frames_per_video = 32 + video_reader = VideoReader() + video_read_fn = lambda x: video_reader.read_frames(x, num_frames=frames_per_video) + face_extractor = FaceExtractor(video_read_fn) + input_size = 380 + strategy = confident_strategy + stime = time.time() + + test_videos = sorted([x for x in os.listdir(args.test_dir) if x[-4:] == ".mp4"]) + print("Predicting {} videos".format(len(test_videos))) + predictions = predict_on_video_set(face_extractor=face_extractor, input_size=input_size, models=models, + strategy=strategy, frames_per_video=frames_per_video, videos=test_videos, + num_workers=6, test_dir=args.test_dir) + submission_df = pd.DataFrame({"filename": test_videos, "label": predictions}) + submission_df.to_csv(args.output, index=False) + print("Elapsed:", time.time() - stime) diff --git a/predict_submission.sh b/predict_submission.sh new file mode 100644 index 0000000000000000000000000000000000000000..13fd6d98aa77c0c9bd653f39f61ac864aa628a86 --- /dev/null +++ b/predict_submission.sh @@ -0,0 +1,13 @@ +TEST_DIR=$1 +CSV=$2 + +python predict_folder.py \ + --test-dir $TEST_DIR \ + --output $CSV \ + --models final_111_DeepFakeClassifier_tf_efficientnet_b7_ns_0_36 \ + final_555_DeepFakeClassifier_tf_efficientnet_b7_ns_0_19 \ + final_777_DeepFakeClassifier_tf_efficientnet_b7_ns_0_29 \ + final_777_DeepFakeClassifier_tf_efficientnet_b7_ns_0_31 \ + final_888_DeepFakeClassifier_tf_efficientnet_b7_ns_0_37 \ + final_888_DeepFakeClassifier_tf_efficientnet_b7_ns_0_40 \ + final_999_DeepFakeClassifier_tf_efficientnet_b7_ns_0_23 \ No newline at end of file diff --git a/predictions_1.json b/predictions_1.json new file mode 100644 index 0000000000000000000000000000000000000000..562b8cba48b32acb3946f3a8cf7fb6109d040f10 --- /dev/null +++ b/predictions_1.json @@ -0,0 +1 @@ +{"probs": {"avmjormvsx": [[0.98095703125], [0.984375], [0.97900390625], [0.97021484375], [0.98291015625], [0.98193359375], [0.97998046875], [0.97998046875], [0.9833984375], [0.984375], [0.97802734375], [0.95947265625], [0.97802734375], [0.98046875], [0.974609375]], "curpwogllm": [[0.97998046875], [0.93115234375], [0.9736328125], [0.9521484375], [0.93017578125], [0.97119140625], [0.9716796875], [0.9736328125], [0.97998046875], [0.97705078125], [0.93017578125], [0.95751953125], [0.97509765625], [0.95458984375], [0.98291015625]], "aorjvbyxhw": [[0.9814453125], [0.98388671875], [0.98388671875], [0.9853515625], [0.9833984375], [0.98388671875], [0.98046875], [0.98486328125], [0.98046875], [0.9794921875], [0.97021484375], [0.982421875], [0.98193359375], [0.984375], [0.982421875]], "cprhtltsjp": [[0.9814453125], [0.984375], [0.984375], [0.9775390625], [0.9833984375], [0.97900390625], [0.9833984375], [0.984375], [0.9775390625], [0.9775390625], [0.9833984375], [0.9833984375], [0.978515625], [0.98291015625], [0.9853515625]], "avnqydkqjj": [[0.9814453125], [0.98583984375], [0.98583984375], [0.9794921875], [0.9794921875], [0.9814453125], [0.97119140625], [0.96240234375], [0.97900390625], [0.98583984375], [0.98046875], [0.98193359375], [0.9814453125], [0.98486328125], [0.98291015625]], "bulkxhhknf": [[0.9794921875], [0.9765625], [0.97802734375], [0.9716796875], [0.98193359375], [0.9833984375], [0.98095703125], [0.970703125], [0.98095703125], [0.9794921875], [0.9833984375], [0.97900390625], [0.9814453125], [0.98193359375], [0.982421875]], "ekcrtigpab": [[0.98876953125], [0.94384765625], [0.9853515625], [0.9775390625], [0.9794921875], [0.97900390625], [0.96142578125], [0.98291015625], [0.98486328125], [0.98388671875], [0.97998046875], [0.7119140625], [0.98583984375], [0.984375], [0.9794921875]], "dhkwmjxwrn": [[0.9794921875], [0.98486328125], [0.638671875], [0.98193359375], [0.97216796875], [0.98486328125], [0.9833984375], [0.9814453125], [0.98046875], [0.982421875], [0.97998046875], [0.98095703125], [0.966796875], [0.9814453125], [0.98486328125], [0.97802734375], [0.97900390625], [0.9208984375], [0.98193359375], [0.97119140625], [0.98291015625], [0.9755859375], [0.9716796875], [0.97509765625], [0.91650390625], [0.98046875]], "apatcsqejh": [[0.97412109375], [0.9794921875], [0.955078125], [0.982421875], [0.98046875], [0.982421875], [0.98193359375], [0.982421875], [0.9814453125], [0.98193359375], [0.98388671875], [0.9375], [0.982421875], [0.9755859375], [0.98095703125], [0.98388671875], [0.97216796875], [0.9697265625], [0.9814453125], [0.98291015625], [0.98388671875], [0.97509765625], [0.98291015625], [0.9814453125], [0.98193359375], [0.982421875], [0.97802734375], [0.982421875], [0.9814453125], [0.98193359375]], "dzyuwjkjui": [[0.978515625], [0.96826171875], [0.9814453125], [0.978515625], [0.98046875], [0.97607421875], [0.97998046875], [0.98291015625], [0.9775390625], [0.97900390625], [0.9775390625], [0.982421875], [0.982421875], [0.98046875], [0.98046875]], "emaalmsonj": [[0.98095703125], [0.9599609375], [0.98046875], [0.98291015625], [0.97802734375], [0.9775390625], [0.97998046875], [0.9794921875], [0.97998046875], [0.9775390625], [0.97705078125], [0.9794921875], [0.97998046875], [0.9736328125], [0.98046875]], "bgwmmujlmc": [[0.98291015625], [0.982421875], [0.9736328125], [0.97998046875], [0.984375], [0.9814453125], [0.92822265625], [0.98388671875], [0.97314453125], [0.978515625], [0.98681640625], [0.98486328125], [0.98388671875], [0.98486328125], [0.97314453125]], "eckvhdusax": [[0.98388671875], [0.98388671875], [0.98046875], [0.96142578125], [0.9794921875], [0.98291015625], [0.98291015625], [0.9833984375], [0.98193359375], [0.98388671875], [0.98486328125], [0.984375], [0.98486328125], [0.98583984375], [0.982421875]], "ajwpjhrbcv": [[0.9765625], [0.96875], [0.9765625], [0.9599609375], [0.9755859375], [0.97607421875], [0.9658203125], [0.95703125], [0.958984375], [0.97705078125], [0.93505859375], [0.943359375], [0.96142578125], [0.73583984375], [0.95556640625]], "efwfxwwlbw": [[0.97998046875], [0.9609375], [0.96875], [0.97900390625], [0.98486328125], [0.9794921875], [0.94580078125], [0.9853515625], [0.966796875], [0.982421875], [0.93896484375], [0.8740234375], [0.98046875], [0.9462890625], [0.970703125]], "emfbhytfhc": [[0.984375], [0.9833984375], [0.982421875], [0.9853515625], [0.9833984375], [0.982421875], [0.98193359375], [0.98193359375], [0.9833984375], [0.98291015625], [0.98486328125], [0.982421875], [0.98291015625], [0.98388671875], [0.98193359375]], "bnjcdrfuov": [[0.984375], [0.98388671875], [0.98193359375], [0.98583984375], [0.984375], [0.98193359375], [0.98486328125], [0.98583984375], [0.984375], [0.98583984375], [0.98583984375], [0.9833984375], [0.986328125], [0.9833984375], [0.98486328125]], "drcyabprvt": [[0.98583984375], [0.98388671875], [0.9873046875], [0.98779296875], [0.986328125], [0.98486328125], [0.98388671875], [0.9853515625], [0.9853515625], [0.9853515625], [0.9853515625], [0.98388671875], [0.9853515625], [0.98583984375], [0.98486328125]], "cdphtzqrvp": [[0.984375], [0.98486328125], [0.982421875], [0.984375], [0.984375], [0.98291015625], [0.9833984375], [0.98193359375], [0.98291015625], [0.9833984375], [0.98583984375], [0.9833984375], [0.98388671875], [0.986328125], [0.9833984375]], "ellavthztb": [[0.982421875], [0.98193359375], [0.98681640625], [0.9833984375], [0.98486328125], [0.98193359375], [0.9853515625], [0.98291015625], [0.9853515625], [0.982421875], [0.986328125], [0.984375], [0.984375], [0.98486328125], [0.98291015625]], "dbtbbhakdv": [[0.9765625], [0.98193359375], [0.96630859375], [0.98388671875], [0.98193359375], [0.978515625], [0.96728515625], [0.978515625], [0.97607421875], [0.9755859375], [0.98095703125], [0.982421875], [0.98193359375], [0.9765625], [0.97802734375]], "btohlidmru": [[0.98583984375], [0.984375], [0.98486328125], [0.984375], [0.98388671875], [0.986328125], [0.98583984375], [0.98681640625], [0.98681640625], [0.98779296875], [0.9873046875], [0.98486328125], [0.98681640625], [0.98681640625], [0.98583984375]], "bzythlfnhq": [[0.98193359375], [0.978515625], [0.97021484375], [0.97900390625], [0.97998046875], [0.96142578125], [0.95849609375], [0.97705078125], [0.96240234375], [0.98193359375], [0.96435546875], [0.982421875], [0.978515625], [0.97607421875], [0.93359375]], "etmcruaihe": [[0.96435546875], [0.96435546875], [0.97998046875], [0.98193359375], [0.9814453125], [0.98095703125], [0.95703125], [0.962890625], [0.98291015625], [0.97119140625], [0.97509765625], [0.982421875], [0.982421875], [0.984375], [0.9814453125]], "bsqgziaylx": [[0.98095703125], [0.982421875], [0.98291015625], [0.97998046875], [0.982421875], [0.97607421875], [0.982421875], [0.92431640625], [0.98388671875], [0.98095703125], [0.9814453125], [0.982421875], [0.98193359375], [0.982421875], [0.9814453125], [0.97900390625], [0.98291015625], [0.982421875], [0.98046875], [0.8251953125], [0.986328125], [0.9755859375], [0.9755859375], [0.98193359375], [0.98095703125], [0.98193359375], [0.98291015625], [0.9814453125], [0.9814453125], [0.9794921875]], "bpxckdzddv": [[0.9658203125], [0.95556640625], [0.9853515625], [0.82080078125], [0.9833984375], [0.98681640625], [0.98193359375], [0.94091796875], [0.97265625], [0.98193359375], [0.974609375], [0.97998046875], [0.9794921875], [0.708984375], [0.96923828125]], "dakiztgtnw": [[0.98291015625], [0.9794921875], [0.984375], [0.9716796875], [0.98291015625], [0.984375], [0.97802734375], [0.984375], [0.98291015625], [0.984375], [0.98388671875], [0.97998046875], [0.98193359375], [0.97802734375], [0.98388671875]], "dgxrqjdomn": [[0.9658203125], [0.9482421875], [0.982421875], [0.98291015625], [0.98486328125], [0.970703125], [0.97412109375], [0.98291015625], [0.96142578125], [0.98681640625], [0.98095703125], [0.98291015625], [0.97705078125], [0.97705078125], [0.98486328125]], "aytzyidmgs": [[0.98388671875], [0.98046875], [0.9814453125], [0.98046875], [0.974609375], [0.9814453125], [0.97412109375], [0.98095703125], [0.98046875], [0.98193359375], [0.98095703125], [0.9814453125], [0.97705078125], [0.98095703125], [0.9794921875]], "eepezmygaq": [[0.98291015625], [0.98388671875], [0.98046875], [0.984375], [0.986328125], [0.98583984375], [0.98486328125], [0.98388671875], [0.98046875], [0.98681640625], [0.98681640625], [0.984375], [0.98583984375], [0.9833984375], [0.98583984375]], "bofqajtwve": [[0.98193359375], [0.96240234375], [0.98486328125], [0.98291015625], [0.986328125], [0.982421875], [0.982421875], [0.9814453125], [0.98291015625], [0.9814453125], [0.982421875], [0.98193359375], [0.984375], [0.984375], [0.982421875]], "abarnvbtwb": [[0.984375], [0.98388671875], [0.97998046875], [0.984375], [0.96484375], [0.98046875], [0.984375], [0.9853515625], [0.98388671875], [0.9677734375], [0.984375], [0.98583984375], [0.96142578125], [0.98583984375], [0.98388671875]], "btmsngnqhv": [[0.98486328125], [0.98291015625], [0.98486328125], [0.984375], [0.9853515625], [0.9814453125], [0.98486328125], [0.986328125], [0.98486328125], [0.98486328125], [0.98388671875], [0.984375], [0.984375], [0.98291015625], [0.98388671875]], "ehfiekigla": [[0.984375], [0.97998046875], [0.97607421875], [0.9833984375], [0.98095703125], [0.98291015625], [0.982421875], [0.982421875], [0.982421875], [0.984375], [0.982421875], [0.9775390625], [0.98388671875], [0.98193359375], [0.98193359375]], "awnwkrqibf": [[0.98388671875], [0.9814453125], [0.982421875], [0.98388671875], [0.9833984375], [0.98193359375], [0.98291015625], [0.984375], [0.98193359375], [0.9833984375], [0.98388671875], [0.98486328125], [0.9833984375], [0.9853515625], [0.98583984375]], "eqvuznuwsa": [[0.9814453125], [0.9853515625], [0.84912109375], [0.98193359375], [0.98681640625], [0.9814453125], [0.98486328125], [0.98095703125], [0.984375], [0.9814453125], [0.9853515625], [0.98291015625], [0.98193359375], [0.9853515625], [0.98681640625], [0.97900390625]], "cizlkenljw": [[0.9853515625], [0.97998046875], [0.982421875], [0.98388671875], [0.92626953125], [0.98486328125], [0.97998046875], [0.9736328125], [0.97900390625], [0.97216796875], [0.98095703125], [0.986328125], [0.982421875], [0.9853515625], [0.984375]], "ahbweevwpv": [[0.93603515625], [0.89892578125], [0.98486328125], [0.96728515625], [0.98388671875], [0.98095703125], [0.97412109375], [0.95068359375], [0.75732421875], [0.85107421875], [0.94921875], [0.377197265625], [0.97216796875], [0.90771484375], [0.98193359375]], "ehccixxzoe": [[0.978515625], [0.9833984375], [0.9765625], [0.9853515625], [0.98193359375], [0.9814453125], [0.9755859375], [0.9755859375], [0.98193359375], [0.984375], [0.98486328125], [0.98046875], [0.9814453125], [0.984375], [0.97900390625], [0.98388671875], [0.9814453125], [0.98046875], [0.9814453125], [0.97607421875], [0.9765625], [0.951171875], [0.9658203125], [0.98486328125], [0.9609375], [0.8681640625]], "duycddgtrl": [[0.96875], [0.97412109375], [0.939453125], [0.97607421875], [0.88623046875], [0.97509765625], [0.98486328125], [0.95654296875], [0.7041015625], [0.96923828125], [0.96435546875], [0.974609375], [0.96240234375], [0.9736328125], [0.97705078125]], "eqjscdagiv": [[0.9833984375], [0.984375], [0.98291015625], [0.9775390625], [0.974609375], [0.98095703125], [0.97705078125], [0.9814453125], [0.9775390625], [0.98291015625], [0.96142578125], [0.9814453125], [0.951171875], [0.97412109375], [0.98193359375]], "bwipwzzxxu": [[0.9580078125], [0.97900390625], [0.97802734375], [0.98193359375], [0.93115234375], [0.97119140625], [0.97509765625], [0.97900390625], [0.97021484375], [0.974609375], [0.98193359375], [0.9775390625], [0.9736328125], [0.90478515625], [0.91943359375]], "asmpfjfzif": [[0.98046875], [0.97705078125], [0.982421875], [0.98291015625], [0.98193359375], [0.96875], [0.974609375], [0.97998046875], [0.98095703125], [0.98095703125], [0.97998046875], [0.97900390625], [0.98095703125], [0.9814453125], [0.97021484375]], "ekhacizpah": [[0.98779296875], [0.97802734375], [0.9794921875], [0.97998046875], [0.9736328125], [0.962890625], [0.9501953125], [0.97119140625], [0.98388671875], [0.97900390625], [0.978515625], [0.9775390625], [0.96630859375], [0.95556640625], [0.97705078125]], "btjlfpzbdu": [[0.986328125], [0.98681640625], [0.984375], [0.9853515625], [0.9873046875], [0.98779296875], [0.98583984375], [0.98681640625], [0.98681640625], [0.986328125], [0.98486328125], [0.98291015625], [0.984375], [0.98779296875], [0.9853515625]], "brwrlczjvi": [[0.9833984375], [0.95654296875], [0.97900390625], [0.982421875], [0.98095703125], [0.9794921875], [0.97705078125], [0.9677734375], [0.9814453125], [0.98193359375], [0.98095703125], [0.98046875], [0.9833984375], [0.9580078125], [0.98193359375], [0.98291015625], [0.984375], [0.97021484375], [0.9775390625], [0.94384765625], [0.98291015625], [0.94384765625], [0.982421875], [0.9814453125], [0.97998046875], [0.9814453125], [0.984375], [0.974609375], [0.9794921875], [0.97314453125]], "bxzakyopjf": [[0.9833984375], [0.9833984375], [0.9814453125], [0.984375], [0.98193359375], [0.97998046875], [0.9833984375], [0.9853515625], [0.98046875], [0.984375], [0.98388671875], [0.98046875], [0.98486328125], [0.98388671875], [0.9833984375]], "cppdvdejkc": [[0.94091796875], [0.9560546875], [0.92529296875], [0.96728515625], [0.98388671875], [0.98193359375], [0.95458984375], [0.9736328125], [0.953125], [0.96826171875], [0.96826171875], [0.98291015625], [0.982421875], [0.9716796875], [0.9794921875]], "edyncaijwx": [[0.98291015625], [0.98046875], [0.98193359375], [0.927734375], [0.97119140625], [0.97998046875], [0.9755859375], [0.97998046875], [0.98095703125], [0.96533203125], [0.97412109375], [0.98193359375], [0.984375], [0.982421875], [0.97021484375], [0.97998046875], [0.982421875], [0.9765625], [0.982421875], [0.97998046875], [0.9794921875], [0.98046875], [0.98095703125], [0.98095703125], [0.98193359375], [0.98193359375], [0.984375], [0.98388671875], [0.98388671875], [0.9794921875]], "bffwsjxghk": [[0.98046875], [0.98046875], [0.98291015625], [0.9814453125], [0.98388671875], [0.98193359375], [0.98291015625], [0.982421875], [0.9833984375], [0.9814453125], [0.98046875], [0.97998046875], [0.9814453125], [0.97998046875], [0.9794921875]], "cxrfacemmq": [[0.96337890625], [0.97802734375], [0.982421875], [0.98583984375], [0.98046875], [0.982421875], [0.98095703125], [0.98291015625], [0.9765625], [0.9814453125], [0.9814453125], [0.9775390625], [0.97509765625], [0.978515625], [0.97802734375]], "dbzpcjntve": [[0.98828125], [0.986328125], [0.98828125], [0.98583984375], [0.98779296875], [0.9873046875], [0.98779296875], [0.98681640625], [0.98876953125], [0.98583984375], [0.98486328125], [0.98486328125], [0.984375], [0.98974609375], [0.98388671875]], "agqphdxmwt": [[0.9814453125], [0.9814453125], [0.98291015625], [0.98046875], [0.97802734375], [0.98193359375], [0.9814453125], [0.9765625], [0.98046875], [0.982421875], [0.98193359375], [0.982421875], [0.982421875], [0.98046875], [0.98046875]], "dtocdfbwca": [[0.98583984375], [0.982421875], [0.98583984375], [0.9853515625], [0.982421875], [0.98583984375], [0.9853515625], [0.98291015625], [0.98486328125], [0.986328125], [0.9794921875], [0.98583984375], [0.98583984375], [0.98486328125], [0.98388671875]], "bdnaqemxmr": [[0.982421875], [0.98291015625], [0.97998046875], [0.984375], [0.98486328125], [0.98388671875], [0.98095703125], [0.9814453125], [0.98046875], [0.9775390625], [0.98583984375], [0.9814453125], [0.97705078125], [0.98779296875], [0.98193359375]], "crezycjqyk": [[0.97900390625], [0.98193359375], [0.98291015625], [0.98486328125], [0.9833984375], [0.984375], [0.98388671875], [0.9833984375], [0.98388671875], [0.98388671875], [0.98388671875], [0.98388671875], [0.9833984375], [0.98291015625], [0.9833984375]], "ebchwmwayp": [[0.9794921875], [0.95166015625], [0.97998046875], [0.98583984375], [0.97119140625], [0.98095703125], [0.98291015625], [0.97998046875], [0.98095703125], [0.9833984375], [0.96240234375], [0.96875], [0.9794921875], [0.982421875], [0.96826171875], [0.97802734375], [0.984375], [0.98193359375], [0.98388671875], [0.984375], [0.9697265625], [0.966796875], [0.98193359375], [0.98095703125], [0.9814453125], [0.96337890625]], "caifxvsozs": [[0.98046875], [0.97998046875], [0.98291015625], [0.9775390625], [0.97900390625], [0.98291015625], [0.9794921875], [0.986328125], [0.98095703125], [0.96435546875], [0.98193359375], [0.97607421875], [0.98095703125], [0.96923828125], [0.98486328125], [0.98046875], [0.9775390625], [0.982421875], [0.98193359375], [0.9833984375], [0.984375], [0.98193359375], [0.97900390625], [0.978515625], [0.9853515625], [0.97509765625], [0.9755859375], [0.978515625], [0.98291015625], [0.98193359375]], "blzydqdfem": [[0.9853515625], [0.984375], [0.98388671875], [0.98486328125], [0.9716796875], [0.984375], [0.9833984375], [0.98291015625], [0.98291015625], [0.978515625], [0.98486328125], [0.984375], [0.9755859375], [0.98486328125], [0.982421875]], "bbhtdfuqxq": [[0.98193359375], [0.9794921875], [0.9853515625], [0.98388671875], [0.98681640625], [0.9833984375], [0.98388671875], [0.98388671875], [0.97998046875], [0.9833984375], [0.9853515625], [0.9853515625], [0.984375], [0.982421875], [0.9873046875]], "dlpoieqvfb": [[0.966796875], [0.98046875], [0.98046875], [0.9794921875], [0.97998046875], [0.98193359375], [0.9755859375], [0.9736328125], [0.982421875], [0.98095703125], [0.982421875], [0.97900390625], [0.97119140625], [0.978515625], [0.986328125]], "cknyxaqouy": [[0.982421875], [0.98193359375], [0.98388671875], [0.98291015625], [0.9833984375], [0.9833984375], [0.98291015625], [0.98291015625], [0.98193359375], [0.98291015625], [0.9833984375], [0.9833984375], [0.982421875], [0.982421875], [0.98095703125]], "egghxjjmfg": [[0.98291015625], [0.9833984375], [0.9814453125], [0.98388671875], [0.9814453125], [0.9873046875], [0.97998046875], [0.9853515625], [0.98388671875], [0.97900390625], [0.98193359375], [0.9853515625], [0.9765625], [0.97998046875], [0.98291015625]], "cksanfsjhc": [[0.98681640625], [0.986328125], [0.98486328125], [0.9853515625], [0.98486328125], [0.98779296875], [0.984375], [0.98486328125], [0.9853515625], [0.986328125], [0.9873046875], [0.98583984375], [0.986328125], [0.986328125], [0.986328125]], "etohcvnzbj": [[0.96484375], [0.970703125], [0.8544921875], [0.9482421875], [0.87158203125], [0.62939453125], [0.97802734375], [0.97314453125], [0.9609375], [0.970703125], [0.966796875], [0.9560546875], [0.79638671875], [0.92919921875], [0.71240234375]], "bqqpbzjgup": [[0.9814453125], [0.98388671875], [0.98388671875], [0.98291015625], [0.98291015625], [0.9814453125], [0.98193359375], [0.98291015625], [0.98583984375], [0.98193359375], [0.98193359375], [0.982421875], [0.9833984375], [0.98193359375], [0.982421875], [0.9833984375]], "czfunozvwp": [[0.98681640625], [0.98388671875], [0.982421875], [0.98291015625], [0.9833984375], [0.98486328125], [0.96826171875], [0.982421875], [0.9853515625], [0.98486328125], [0.98095703125], [0.98681640625], [0.98291015625], [0.97705078125], [0.98486328125], [0.98291015625], [0.98388671875], [0.98193359375], [0.98291015625], [0.98486328125], [0.98193359375], [0.98486328125], [0.98291015625], [0.984375], [0.98291015625], [0.97900390625], [0.9833984375], [0.986328125], [0.982421875], [0.9833984375]], "dsdoseflas": [[0.97998046875], [0.98046875], [0.98193359375], [0.98095703125], [0.9814453125], [0.95751953125], [0.97705078125], [0.966796875], [0.97998046875], [0.9794921875], [0.97900390625], [0.9833984375], [0.9814453125], [0.98095703125], [0.98095703125]], "bvzjkezkms": [[0.9873046875], [0.98486328125], [0.98583984375], [0.9814453125], [0.9853515625], [0.98193359375], [0.97998046875], [0.9873046875], [0.98046875], [0.9736328125], [0.9833984375], [0.98583984375], [0.98486328125], [0.9892578125], [0.986328125]], "dbnygxtwek": [[0.97705078125], [0.98095703125], [0.96875], [0.9677734375], [0.96875], [0.94921875], [0.9794921875], [0.9765625], [0.97998046875], [0.9482421875], [0.94140625], [0.9794921875], [0.96923828125], [0.9814453125], [0.97900390625]], "dnyvfblxpm": [[0.97705078125], [0.98583984375], [0.984375], [0.98486328125], [0.98388671875], [0.982421875], [0.9833984375], [0.98388671875], [0.98583984375], [0.9853515625], [0.984375], [0.9833984375], [0.98095703125], [0.9853515625], [0.986328125]], "coadfnerlk": [[0.97265625], [0.98291015625], [0.982421875], [0.9814453125], [0.96337890625], [0.98291015625], [0.98291015625], [0.98291015625], [0.98291015625], [0.982421875], [0.98095703125], [0.9853515625], [0.9443359375], [0.98291015625], [0.98388671875]], "afoovlsmtx": [[0.97412109375], [0.98193359375], [0.93896484375], [0.97509765625], [0.9814453125], [0.96826171875], [0.9677734375], [0.97802734375], [0.9794921875], [0.98193359375], [0.97509765625], [0.97607421875], [0.9521484375], [0.982421875], [0.9794921875]], "dhevettufk": [[0.984375], [0.98388671875], [0.98095703125], [0.98193359375], [0.98095703125], [0.98193359375], [0.98388671875], [0.98486328125], [0.978515625], [0.98291015625], [0.98193359375], [0.98193359375], [0.97607421875], [0.98193359375], [0.9814453125]], "chtapglbcj": [[0.98291015625], [0.982421875], [0.98291015625], [0.98486328125], [0.98291015625], [0.9853515625], [0.9775390625], [0.9833984375], [0.97314453125], [0.9755859375], [0.97998046875], [0.98388671875], [0.97900390625], [0.98095703125], [0.97705078125]], "cpjxareypw": [[0.98193359375], [0.9814453125], [0.98193359375], [0.9853515625], [0.9853515625], [0.9658203125], [0.984375], [0.98193359375], [0.98193359375], [0.97265625], [0.98291015625], [0.97998046875], [0.98486328125], [0.97509765625], [0.9853515625]], "bourlmzsio": [[0.98095703125], [0.98388671875], [0.98486328125], [0.9814453125], [0.97998046875], [0.98486328125], [0.98291015625], [0.98291015625], [0.98486328125], [0.98291015625], [0.98291015625], [0.97900390625], [0.97900390625], [0.97998046875], [0.98291015625]], "cmbzllswnl": [[0.98388671875], [0.986328125], [0.9775390625], [0.85595703125], [0.984375], [0.984375], [0.9833984375], [0.9833984375], [0.98291015625], [0.96923828125], [0.98583984375], [0.9755859375], [0.9853515625], [0.9853515625], [0.9814453125], [0.9833984375]], "azsmewqghg": [[0.982421875], [0.9794921875], [0.98291015625], [0.9833984375], [0.982421875], [0.98486328125], [0.98193359375], [0.98095703125], [0.98095703125], [0.98095703125], [0.9833984375], [0.98095703125], [0.9814453125], [0.98193359375], [0.9814453125], [0.98291015625]], "drtbksnpol": [[0.98095703125], [0.97998046875], [0.97998046875], [0.97998046875], [0.9814453125], [0.974609375], [0.9794921875], [0.9814453125], [0.94921875], [0.97998046875], [0.97900390625], [0.98046875], [0.978515625], [0.98046875], [0.9775390625]], "ehtdtkmmli": [[0.98681640625], [0.98486328125], [0.98193359375], [0.984375], [0.984375], [0.9833984375], [0.97998046875], [0.982421875], [0.98388671875], [0.9853515625], [0.98486328125], [0.984375], [0.98291015625], [0.982421875], [0.9765625]], "atkdltyyen": [[0.9833984375], [0.978515625], [0.97607421875], [0.984375], [0.98095703125], [0.9794921875], [0.98193359375], [0.97802734375], [0.98291015625], [0.98388671875], [0.982421875], [0.98291015625], [0.98291015625], [0.98291015625], [0.9833984375]], "dzvyfiarrq": [[0.9833984375], [0.98291015625], [0.982421875], [0.9853515625], [0.98388671875], [0.9873046875], [0.984375], [0.97607421875], [0.9814453125], [0.98486328125], [0.9814453125], [0.984375], [0.98388671875], [0.9814453125], [0.984375]], "dkuayagnmc": [[0.9794921875], [0.97607421875], [0.978515625], [0.97802734375], [0.97412109375], [0.98193359375], [0.9794921875], [0.9619140625], [0.98095703125], [0.98095703125], [0.9814453125], [0.97607421875], [0.970703125], [0.9765625], [0.982421875]], "adylbeequz": [[0.94189453125], [0.9716796875], [0.94921875], [0.978515625], [0.970703125], [0.96923828125], [0.97900390625], [0.97607421875], [0.96875], [0.98046875], [0.873046875], [0.97705078125], [0.98291015625], [0.966796875], [0.97412109375]], "djxdyjopjd": [[0.97412109375], [0.98388671875], [0.98291015625], [0.97998046875], [0.982421875], [0.9775390625], [0.96142578125], [0.98291015625], [0.9736328125], [0.9833984375], [0.98046875], [0.98291015625], [0.98095703125], [0.9794921875], [0.97412109375], [0.97998046875]], "bejhvclboh": [[0.96875], [0.96875], [0.95263671875], [0.9814453125], [0.986328125], [0.9677734375], [0.9833984375], [0.9765625], [0.97265625], [0.94384765625], [0.98046875], [0.97802734375], [0.9775390625], [0.98388671875], [0.96630859375], [0.98193359375]], "dfbpceeaox": [[0.9853515625], [0.98388671875], [0.984375], [0.9814453125], [0.9833984375], [0.98291015625], [0.9873046875], [0.98291015625], [0.98291015625], [0.982421875], [0.98046875], [0.98095703125], [0.9833984375], [0.98291015625], [0.984375]], "ehdkmxgtxh": [[0.98046875], [0.98486328125], [0.984375], [0.9765625], [0.97509765625], [0.978515625], [0.98291015625], [0.92041015625], [0.98046875], [0.9677734375], [0.984375], [0.98193359375], [0.984375], [0.98388671875], [0.95361328125], [0.9775390625]], "cyxlcuyznd": [[0.97265625], [0.98095703125], [0.97119140625], [0.98193359375], [0.984375], [0.984375], [0.9775390625], [0.9794921875], [0.9697265625], [0.986328125], [0.97607421875], [0.9794921875], [0.9814453125], [0.97314453125], [0.9853515625]], "acqfdwsrhi": [[0.95556640625], [0.982421875], [0.98095703125], [0.97802734375], [0.97119140625], [0.98291015625], [0.98388671875], [0.9521484375], [0.97998046875], [0.986328125], [0.97265625], [0.98193359375], [0.95166015625], [0.9736328125], [0.9833984375]], "alaijyygdv": [[0.986328125], [0.982421875], [0.9775390625], [0.98388671875], [0.974609375], [0.98095703125], [0.9833984375], [0.97998046875], [0.9814453125], [0.9853515625], [0.9814453125], [0.984375], [0.97900390625], [0.98583984375], [0.9794921875]], "ccfoszqabv": [[0.97705078125], [0.97900390625], [0.982421875], [0.9755859375], [0.98388671875], [0.96142578125], [0.9814453125], [0.96533203125], [0.9619140625], [0.9345703125], [0.8916015625], [0.96435546875], [0.9765625], [0.98291015625], [0.95654296875]]}, "targets": {"avmjormvsx": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "curpwogllm": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "aorjvbyxhw": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "cprhtltsjp": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "avnqydkqjj": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "bulkxhhknf": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "ekcrtigpab": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "dhkwmjxwrn": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "apatcsqejh": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "dzyuwjkjui": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "emaalmsonj": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "bgwmmujlmc": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "eckvhdusax": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "ajwpjhrbcv": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "efwfxwwlbw": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "emfbhytfhc": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "bnjcdrfuov": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "drcyabprvt": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "cdphtzqrvp": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "ellavthztb": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "dbtbbhakdv": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "btohlidmru": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "bzythlfnhq": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "etmcruaihe": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "bsqgziaylx": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "bpxckdzddv": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "dakiztgtnw": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "dgxrqjdomn": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "aytzyidmgs": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "eepezmygaq": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "bofqajtwve": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "abarnvbtwb": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "btmsngnqhv": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "ehfiekigla": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "awnwkrqibf": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "eqvuznuwsa": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "cizlkenljw": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "ahbweevwpv": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "ehccixxzoe": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "duycddgtrl": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "eqjscdagiv": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "bwipwzzxxu": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "asmpfjfzif": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "ekhacizpah": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "btjlfpzbdu": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "brwrlczjvi": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "bxzakyopjf": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "cppdvdejkc": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "edyncaijwx": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "bffwsjxghk": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "cxrfacemmq": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "dbzpcjntve": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "agqphdxmwt": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "dtocdfbwca": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "bdnaqemxmr": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "crezycjqyk": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "ebchwmwayp": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "caifxvsozs": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "blzydqdfem": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "bbhtdfuqxq": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "dlpoieqvfb": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "cknyxaqouy": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "egghxjjmfg": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "cksanfsjhc": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "etohcvnzbj": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "bqqpbzjgup": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "czfunozvwp": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "dsdoseflas": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "bvzjkezkms": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "dbnygxtwek": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "dnyvfblxpm": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "coadfnerlk": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "afoovlsmtx": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "dhevettufk": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "chtapglbcj": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "cpjxareypw": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "bourlmzsio": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "cmbzllswnl": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "azsmewqghg": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "drtbksnpol": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "ehtdtkmmli": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "atkdltyyen": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "dzvyfiarrq": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "dkuayagnmc": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "adylbeequz": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "djxdyjopjd": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "bejhvclboh": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "dfbpceeaox": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "ehdkmxgtxh": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "cyxlcuyznd": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], "acqfdwsrhi": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "alaijyygdv": [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1.0]], "ccfoszqabv": [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]]}} \ No newline at end of file diff --git a/preprocess_data.sh b/preprocess_data.sh new file mode 100644 index 0000000000000000000000000000000000000000..ad5606170193c5834024d3fa91cec9c2b204ccbe --- /dev/null +++ b/preprocess_data.sh @@ -0,0 +1,15 @@ +DATA_ROOT=$1 +echo "Extracting bounding boxes from original videos" +PYTHONPATH=. python preprocessing/detect_original_faces.py --root-dir $DATA_ROOT + +echo "Extracting crops as pngs" +PYTHONPATH=. python preprocessing/extract_crops.py --root-dir $DATA_ROOT --crops-dir crops + +echo "Extracting landmarks" +PYTHONPATH=. python preprocessing/generate_landmarks.py --root-dir $DATA_ROOT + +echo "Extracting SSIM masks" +PYTHONPATH=. python preprocessing/generate_diffs.py --root-dir $DATA_ROOT + +echo "Generate folds" +PYTHONPATH=. python preprocessing/generate_folds.py --root-dir $DATA_ROOT --out folds.csv \ No newline at end of file diff --git a/preprocessing/__init__.py b/preprocessing/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a21a707f7e4361a8b93f99ed12ed80e785bcf2ab --- /dev/null +++ b/preprocessing/__init__.py @@ -0,0 +1 @@ +from .face_detector import * \ No newline at end of file diff --git a/preprocessing/__pycache__/face_detector.cpython-39.pyc b/preprocessing/__pycache__/face_detector.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..950a9b423b9d8dadf974ee33a141c45859ce71f3 Binary files /dev/null and b/preprocessing/__pycache__/face_detector.cpython-39.pyc differ diff --git a/preprocessing/__pycache__/utils.cpython-39.pyc b/preprocessing/__pycache__/utils.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..32e4c880267d55ecd4a2407325137990ad80c495 Binary files /dev/null and b/preprocessing/__pycache__/utils.cpython-39.pyc differ diff --git a/preprocessing/compress_videos.py b/preprocessing/compress_videos.py new file mode 100644 index 0000000000000000000000000000000000000000..3bc1bc367bcf356405e9bae25913765b8b6c9891 --- /dev/null +++ b/preprocessing/compress_videos.py @@ -0,0 +1,45 @@ +import argparse +import os +import random +import subprocess + +os.environ["MKL_NUM_THREADS"] = "1" +os.environ["NUMEXPR_NUM_THREADS"] = "1" +os.environ["OMP_NUM_THREADS"] = "1" +from functools import partial +from glob import glob +from multiprocessing.pool import Pool +from os import cpu_count + +import cv2 + +cv2.ocl.setUseOpenCL(False) +cv2.setNumThreads(0) +from tqdm import tqdm + + +def compress_video(video, root_dir): + parent_dir = video.split("/")[-2] + out_dir = os.path.join(root_dir, "compressed", parent_dir) + os.makedirs(out_dir, exist_ok=True) + video_name = video.split("/")[-1] + out_path = os.path.join(out_dir, video_name) + lvl = random.choice([23, 28, 32]) + command = "ffmpeg -i {} -c:v libx264 -crf {} -threads 1 {}".format(video, lvl, out_path) + try: + subprocess.check_output(command, shell=True, stderr=subprocess.STDOUT) + except Exception as e: + print("Could not process vide", str(e)) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description="Extracts jpegs from video") + parser.add_argument("--root-dir", help="root directory", default="/mnt/sota/datasets/deepfake") + + args = parser.parse_args() + videos = [video_path for video_path in glob(os.path.join(args.root_dir, "*/*.mp4"))] + with Pool(processes=cpu_count() - 2) as p: + with tqdm(total=len(videos)) as pbar: + for v in p.imap_unordered(partial(compress_video, root_dir=args.root_dir), videos): + pbar.update() diff --git a/preprocessing/detect_original_faces.py b/preprocessing/detect_original_faces.py new file mode 100644 index 0000000000000000000000000000000000000000..b16904ef6909f0e230bd2047659f3e79e4a23f44 --- /dev/null +++ b/preprocessing/detect_original_faces.py @@ -0,0 +1,63 @@ +import argparse +import json +import os +from os import cpu_count +from typing import Type +import dill +from torch.utils.data.dataloader import DataLoader +from tqdm import tqdm + +# import face_detector, VideoDataset +import face_detector +from face_detector import VideoDataset +from face_detector import VideoFaceDetector +from utils import get_original_video_paths + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Process a original videos with face detector") + parser.add_argument("--root-dir", help="root directory") + parser.add_argument("--detector-type", help="type of the detector", default="FacenetDetector", + choices=["FacenetDetector"]) + + args = parser.parse_args() + return args + +def collate_fn(batch) -> tuple: + return tuple(zip(*batch)) + +def process_videos(videos, root_dir, detector_cls: Type[VideoFaceDetector]): + detector = face_detector.__dict__[detector_cls](device="cuda:0") + dataset = VideoDataset(videos) + # loader = DataLoader(dataset, shuffle=False, num_workers=cpu_count() - 2, batch_size=1, collate_fn=lambda x: x) + # loader = DataLoader(dataset, shuffle=False, num_workers=1, batch_size=1, collate_fn=lambda x: x) + # loader = DataLoader(dataset, shuffle=False, num_workers=1, batch_size=1) + loader = DataLoader(dataset, shuffle=False, num_workers=4, batch_size=1, collate_fn=collate_fn) + for item in tqdm(loader): + result = {} + # video, indices, frames = item[0] + video, indices, frames = item + video = video[0] + indices = indices[0] + frames = frames[0] + batches = [frames[i:i + detector._batch_size] for i in range(0, len(frames), detector._batch_size)] + for j, frames in enumerate(batches): + result.update({int(j * detector._batch_size) + i : b for i, b in zip(indices, detector._detect_faces(frames))}) + id = os.path.splitext(os.path.basename(video))[0] + out_dir = os.path.join(root_dir, "boxes") + os.makedirs(out_dir, exist_ok=True) + with open(os.path.join(out_dir, "{}.json".format(id)), "w") as f: + json.dump(result, f) + + + + +def main(): + args = parse_args() + originals = get_original_video_paths(args.root_dir) + process_videos(originals, args.root_dir, args.detector_type) + z = 2 + +if __name__ == "__main__": + main() diff --git a/preprocessing/extract_crops.py b/preprocessing/extract_crops.py new file mode 100644 index 0000000000000000000000000000000000000000..f248a34931efef4b333654fc9c91bd0d352a85f3 --- /dev/null +++ b/preprocessing/extract_crops.py @@ -0,0 +1,88 @@ +import argparse +import json +import os +from os import cpu_count +from pathlib import Path + +os.environ["MKL_NUM_THREADS"] = "1" +os.environ["NUMEXPR_NUM_THREADS"] = "1" +os.environ["OMP_NUM_THREADS"] = "1" +from functools import partial +from glob import glob +from multiprocessing.pool import Pool + +import cv2 + +cv2.ocl.setUseOpenCL(False) +cv2.setNumThreads(0) +from tqdm import tqdm + + +def extract_video(param, root_dir, crops_dir): + video, bboxes_path = param + with open(bboxes_path, "r") as bbox_f: + bboxes_dict = json.load(bbox_f) + + capture = cv2.VideoCapture(video) + frames_num = int(capture.get(cv2.CAP_PROP_FRAME_COUNT)) + + for i in range(frames_num): + capture.grab() + if i % 10 != 0: + continue + success, frame = capture.retrieve() + if not success or str(i) not in bboxes_dict: + continue + id = os.path.splitext(os.path.basename(video))[0] + crops = [] + bboxes = bboxes_dict[str(i)] + if bboxes is None: + continue + for bbox in bboxes: + xmin, ymin, xmax, ymax = [int(b * 2) for b in bbox] + w = xmax - xmin + h = ymax - ymin + p_h = h // 3 + p_w = w // 3 + crop = frame[max(ymin - p_h, 0):ymax + p_h, max(xmin - p_w, 0):xmax + p_w] + h, w = crop.shape[:2] + crops.append(crop) + img_dir = os.path.join(root_dir, crops_dir, id) + os.makedirs(img_dir, exist_ok=True) + for j, crop in enumerate(crops): + cv2.imwrite(os.path.join(img_dir, "{}_{}.png".format(i, j)), crop) + + +def get_video_paths(root_dir): + paths = [] + for json_path in glob(os.path.join(root_dir, "*/metadata.json")): + dir = Path(json_path).parent + with open(json_path, "r") as f: + metadata = json.load(f) + for k, v in metadata.items(): + original = v.get("original", None) + if not original: + original = k + bboxes_path = os.path.join(root_dir, "boxes", original[:-4] + ".json") + if not os.path.exists(bboxes_path): + continue + paths.append((os.path.join(dir, k), bboxes_path)) + + return paths + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description="Extracts crops from video") + parser.add_argument("--root-dir", help="root directory") + parser.add_argument("--crops-dir", help="crops directory") + + args = parser.parse_args() + os.makedirs(os.path.join(args.root_dir, args.crops_dir), exist_ok=True) + params = get_video_paths(args.root_dir) + with Pool(processes=cpu_count()) as p: + with tqdm(total=len(params)) as pbar: + for v in p.imap_unordered(partial(extract_video, root_dir=args.root_dir, crops_dir=args.crops_dir), params): + pbar.update() + z = 2 + diff --git a/preprocessing/extract_images.py b/preprocessing/extract_images.py new file mode 100644 index 0000000000000000000000000000000000000000..52857c294e7a84a56357b0b69f0f02dbc8f52dd2 --- /dev/null +++ b/preprocessing/extract_images.py @@ -0,0 +1,42 @@ +import argparse +import os +os.environ["MKL_NUM_THREADS"] = "1" +os.environ["NUMEXPR_NUM_THREADS"] = "1" +os.environ["OMP_NUM_THREADS"] = "1" +from functools import partial +from glob import glob +from multiprocessing.pool import Pool +from os import cpu_count + +import cv2 +cv2.ocl.setUseOpenCL(False) +cv2.setNumThreads(0) +from tqdm import tqdm + + +def extract_video(video, root_dir): + capture = cv2.VideoCapture(video) + frames_num = int(capture.get(cv2.CAP_PROP_FRAME_COUNT)) + + for i in range(frames_num): + capture.grab() + success, frame = capture.retrieve() + if not success: + continue + id = os.path.splitext(os.path.basename(video))[0] + cv2.imwrite(os.path.join(root_dir, "jpegs", "{}_{}.jpg".format(id, i)), frame, [cv2.IMWRITE_JPEG_QUALITY, 100]) + + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description="Extracts jpegs from video") + parser.add_argument("--root-dir", help="root directory") + + args = parser.parse_args() + os.makedirs(os.path.join(args.root_dir, "jpegs"), exist_ok=True) + videos = [video_path for video_path in glob(os.path.join(args.root_dir, "*/*.mp4"))] + with Pool(processes=cpu_count() - 2) as p: + with tqdm(total=len(videos)) as pbar: + for v in p.imap_unordered(partial(extract_video, root_dir=args.root_dir), videos): + pbar.update() diff --git a/preprocessing/face_detector.py b/preprocessing/face_detector.py new file mode 100644 index 0000000000000000000000000000000000000000..070b51a686ac2ce4edc676f2d7f546bd2f7941dd --- /dev/null +++ b/preprocessing/face_detector.py @@ -0,0 +1,72 @@ +import os +os.environ["MKL_NUM_THREADS"] = "1" +os.environ["NUMEXPR_NUM_THREADS"] = "1" +os.environ["OMP_NUM_THREADS"] = "1" + +from abc import ABC, abstractmethod +from collections import OrderedDict +from typing import List + +import cv2 +cv2.ocl.setUseOpenCL(False) +cv2.setNumThreads(0) + +from PIL import Image +from facenet_pytorch.models.mtcnn import MTCNN +from torch.utils.data import Dataset + + +class VideoFaceDetector(ABC): + + def __init__(self, **kwargs) -> None: + super().__init__() + + @property + @abstractmethod + def _batch_size(self) -> int: + pass + + @abstractmethod + def _detect_faces(self, frames) -> List: + pass + + +class FacenetDetector(VideoFaceDetector): + + def __init__(self, device="cuda:0") -> None: + super().__init__() + self.detector = MTCNN(margin=0,thresholds=[0.85, 0.95, 0.95], device=device) + + def _detect_faces(self, frames) -> List: + batch_boxes, *_ = self.detector.detect(frames, landmarks=False) + return [b.tolist() if b is not None else None for b in batch_boxes] + + @property + def _batch_size(self): + return 32 + + +class VideoDataset(Dataset): + + def __init__(self, videos) -> None: + super().__init__() + self.videos = videos + + def __getitem__(self, index: int): + video = self.videos[index] + capture = cv2.VideoCapture(video) + frames_num = int(capture.get(cv2.CAP_PROP_FRAME_COUNT)) + frames = OrderedDict() + for i in range(frames_num): + capture.grab() + success, frame = capture.retrieve() + if not success: + continue + frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + frame = Image.fromarray(frame) + frame = frame.resize(size=[s // 2 for s in frame.size]) + frames[i] = frame + return video, list(frames.keys()), list(frames.values()) + + def __len__(self) -> int: + return len(self.videos) diff --git a/preprocessing/face_encodings.py b/preprocessing/face_encodings.py new file mode 100644 index 0000000000000000000000000000000000000000..39ea545d04e4f2e31d57ab29a3006598c61765e9 --- /dev/null +++ b/preprocessing/face_encodings.py @@ -0,0 +1,55 @@ +import argparse +import os +from functools import partial +from multiprocessing.pool import Pool + +from tqdm import tqdm + +from preprocessing.utils import get_original_video_paths + +os.environ["MKL_NUM_THREADS"] = "1" +os.environ["NUMEXPR_NUM_THREADS"] = "1" +os.environ["OMP_NUM_THREADS"] = "1" + +import random + +import face_recognition +import numpy as np + + +def write_face_encodings(video, root_dir): + video_id, *_ = os.path.splitext(video) + crops_dir = os.path.join(root_dir, "crops", video_id) + if not os.path.exists(crops_dir): + return + crop_files = [f for f in os.listdir(crops_dir) if f.endswith("jpg")] + if crop_files: + crop_files = random.sample(crop_files, min(10, len(crop_files))) + encodings = [] + for crop_file in crop_files: + img = face_recognition.load_image_file(os.path.join(crops_dir, crop_file)) + encoding = face_recognition.face_encodings(img, num_jitters=10) + if encoding: + encodings.append(encoding[0]) + np.save(os.path.join(crops_dir, "encodings"), encodings) + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Extract 10 crops encodings for each video") + parser.add_argument("--root-dir", help="root directory", default="/home/selim/datasets/deepfake") + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + originals = get_original_video_paths(args.root_dir, basename=True) + with Pool(processes=os.cpu_count() - 4) as p: + with tqdm(total=len(originals)) as pbar: + for v in p.imap_unordered(partial(write_face_encodings, root_dir=args.root_dir), originals): + pbar.update() + + +if __name__ == '__main__': + main() diff --git a/preprocessing/generate_diffs.py b/preprocessing/generate_diffs.py new file mode 100644 index 0000000000000000000000000000000000000000..a57ab90a18501c2ecc0b6452f7c3e9910f62e19d --- /dev/null +++ b/preprocessing/generate_diffs.py @@ -0,0 +1,75 @@ +import argparse +import os + +os.environ["MKL_NUM_THREADS"] = "1" +os.environ["NUMEXPR_NUM_THREADS"] = "1" +os.environ["OMP_NUM_THREADS"] = "1" +# from skimage.measure import compare_ssim +from skimage.metrics import structural_similarity as ssim + +from functools import partial +from multiprocessing.pool import Pool + +from tqdm import tqdm + +from utils import get_original_with_fakes + +import cv2 + +cv2.ocl.setUseOpenCL(False) +cv2.setNumThreads(0) + +import numpy as np + +cache = {} + + +def save_diffs(pair, root_dir): + ori_id, fake_id = pair + ori_dir = os.path.join(root_dir, "crops", ori_id) + fake_dir = os.path.join(root_dir, "crops", fake_id) + diff_dir = os.path.join(root_dir, "diffs", fake_id) + os.makedirs(diff_dir, exist_ok=True) + for frame in range(320): + if frame % 10 != 0: + continue + for actor in range(2): + image_id = "{}_{}.png".format(frame, actor) + diff_image_id = "{}_{}_diff.png".format(frame, actor) + ori_path = os.path.join(ori_dir, image_id) + fake_path = os.path.join(fake_dir, image_id) + diff_path = os.path.join(diff_dir, diff_image_id) + if os.path.exists(ori_path) and os.path.exists(fake_path): + img1 = cv2.imread(ori_path, cv2.IMREAD_COLOR) + img2 = cv2.imread(fake_path, cv2.IMREAD_COLOR) + try: + d, a = ssim(img1, img2, multichannel=True, full=True) + a = 1 - a + diff = (a * 255).astype(np.uint8) + diff = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY) + cv2.imwrite(diff_path, diff) + except: + pass + +def parse_args(): + parser = argparse.ArgumentParser( + description="Extract image diffs") + parser.add_argument("--root-dir", help="root directory", default="/mnt/sota/datasets/deepfake") + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + pairs = get_original_with_fakes(args.root_dir) + os.makedirs(os.path.join(args.root_dir, "diffs"), exist_ok=True) + with Pool(processes=os.cpu_count() - 2) as p: + with tqdm(total=len(pairs)) as pbar: + func = partial(save_diffs, root_dir=args.root_dir) + for v in p.imap_unordered(func, pairs): + pbar.update() + + +if __name__ == '__main__': + main() + z=2 diff --git a/preprocessing/generate_folds.py b/preprocessing/generate_folds.py new file mode 100644 index 0000000000000000000000000000000000000000..d7e6c430465927c2f13631dbd81a246b110b7a4c --- /dev/null +++ b/preprocessing/generate_folds.py @@ -0,0 +1,117 @@ +import argparse +import json +import os +import random +from functools import partial +from multiprocessing.pool import Pool +from pathlib import Path + +os.environ["MKL_NUM_THREADS"] = "1" +os.environ["NUMEXPR_NUM_THREADS"] = "1" +os.environ["OMP_NUM_THREADS"] = "1" +import pandas as pd + +from tqdm import tqdm + +from utils import get_original_with_fakes + +import cv2 + +cv2.ocl.setUseOpenCL(False) +cv2.setNumThreads(0) + + +def get_paths(vid, label, root_dir): + ori_vid, fake_vid = vid + ori_dir = os.path.join(root_dir, "crops", ori_vid) + fake_dir = os.path.join(root_dir, "crops", fake_vid) + data = [] + for frame in range(320): + if frame % 10 != 0: + continue + for actor in range(2): + image_id = "{}_{}.png".format(frame, actor) + ori_img_path = os.path.join(ori_dir, image_id) + fake_img_path = os.path.join(fake_dir, image_id) + img_path = ori_img_path if label == 0 else fake_img_path + try: + # img = cv2.imread(img_path)[..., ::-1] + if os.path.exists(img_path): + data.append([img_path, label, ori_vid]) + except: + pass + return data + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Generate Folds") + parser.add_argument("--root-dir", help="root directory", default="/mnt/sota/datasets/deepfake") + parser.add_argument("--out", type=str, default="folds02.csv", help="CSV file to save") + parser.add_argument("--seed", type=int, default=777, help="Seed to split, default 777") + parser.add_argument("--n_splits", type=int, default=2, help="Num folds, default 10") + args = parser.parse_args() + + return args + + +def main(): + args = parse_args() + ori_fakes = get_original_with_fakes(args.root_dir) + # sz = 50 // args.n_splits + sz = 2 // args.n_splits + folds = [] + for fold in range(args.n_splits): + # folds.append(list(range(sz * fold, sz * fold + sz if fold < args.n_splits - 1 else 50))) + folds.append(list(range(sz * fold, sz * fold + sz if fold < args.n_splits - 1 else 2))) + print(folds) + video_fold = {} + for d in os.listdir(args.root_dir): + if "dfdc" in d: + part = int(d.split("_")[-1]) + for f in os.listdir(os.path.join(args.root_dir, d)): + if "metadata.json" in f: + with open(os.path.join(args.root_dir, d, "metadata.json")) as metadata_json: + metadata = json.load(metadata_json) + + for k, v in metadata.items(): + fold = None + for i, fold_dirs in enumerate(folds): + if part in fold_dirs: + fold = i + break + assert fold is not None + video_id = k[:-4] + video_fold[video_id] = fold + for fold in range(len(folds)): + holdoutset = {k for k, v in video_fold.items() if v == fold} + trainset = {k for k, v in video_fold.items() if v != fold} + assert holdoutset.isdisjoint(trainset), "Folds have leaks" + data = [] + ori_ori = set([(ori, ori) for ori, fake in ori_fakes]) + with Pool(processes=os.cpu_count()) as p: + with tqdm(total=len(ori_ori)) as pbar: + func = partial(get_paths, label=0, root_dir=args.root_dir) + for v in p.imap_unordered(func, ori_ori): + pbar.update() + data.extend(v) + with tqdm(total=len(ori_fakes)) as pbar: + func = partial(get_paths, label=1, root_dir=args.root_dir) + for v in p.imap_unordered(func, ori_fakes): + pbar.update() + data.extend(v) + fold_data = [] + for img_path, label, ori_vid in data: + path = Path(img_path) + video = path.parent.name + file = path.name + assert video_fold[video] == video_fold[ori_vid], "original video and fake have leak {} {}".format(ori_vid, + video) + fold_data.append([video, file, label, ori_vid, int(file.split("_")[0]), video_fold[video]]) + random.shuffle(fold_data) + pd.DataFrame(fold_data, columns=["video", "file", "label", "original", "frame", "fold"]).to_csv(args.out, index=False) + + +if __name__ == '__main__': + main() + z = 2 \ No newline at end of file diff --git a/preprocessing/generate_landmarks.py b/preprocessing/generate_landmarks.py new file mode 100644 index 0000000000000000000000000000000000000000..7b29f5d82a67fd790f8516f65c73adb22acad24b --- /dev/null +++ b/preprocessing/generate_landmarks.py @@ -0,0 +1,76 @@ +import argparse +import os +from functools import partial +from multiprocessing.pool import Pool + + + +os.environ["MKL_NUM_THREADS"] = "1" +os.environ["NUMEXPR_NUM_THREADS"] = "1" +os.environ["OMP_NUM_THREADS"] = "1" + +from tqdm import tqdm + + +import cv2 + +cv2.ocl.setUseOpenCL(False) +cv2.setNumThreads(0) +from utils import get_original_video_paths + +from PIL import Image +from facenet_pytorch.models.mtcnn import MTCNN +import numpy as np + +detector = MTCNN(margin=0, thresholds=[0.65, 0.75, 0.75], device="cpu") + + +def save_landmarks(ori_id, root_dir): + ori_id = ori_id[:-4] + ori_dir = os.path.join(root_dir, "crops", ori_id) + landmark_dir = os.path.join(root_dir, "landmarks", ori_id) + os.makedirs(landmark_dir, exist_ok=True) + for frame in range(320): + if frame % 10 != 0: + continue + for actor in range(2): + image_id = "{}_{}.png".format(frame, actor) + landmarks_id = "{}_{}".format(frame, actor) + ori_path = os.path.join(ori_dir, image_id) + landmark_path = os.path.join(landmark_dir, landmarks_id) + + if os.path.exists(ori_path): + try: + image_ori = cv2.imread(ori_path, cv2.IMREAD_COLOR)[...,::-1] + frame_img = Image.fromarray(image_ori) + batch_boxes, conf, landmarks = detector.detect(frame_img, landmarks=True) + if landmarks is not None: + landmarks = np.around(landmarks[0]).astype(np.int16) + np.save(landmark_path, landmarks) + except Exception as e: + print(e) + pass + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Extract image landmarks") + parser.add_argument("--root-dir", help="root directory", default="/mnt/sota/datasets/deepfake") + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + ids = get_original_video_paths(args.root_dir, basename=True) + os.makedirs(os.path.join(args.root_dir, "landmarks"), exist_ok=True) + with Pool(processes=os.cpu_count()-8) as p: + with tqdm(total=len(ids)) as pbar: + func = partial(save_landmarks, root_dir=args.root_dir) + for v in p.imap_unordered(func, ids): + pbar.update() + + +if __name__ == '__main__': + main() + Z = 2 diff --git a/preprocessing/utils.py b/preprocessing/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..e54ae3aac7516f7c11bdc1a3fbd75696c66cf20a --- /dev/null +++ b/preprocessing/utils.py @@ -0,0 +1,51 @@ +import json +import os +from glob import glob +from pathlib import Path + + +def get_original_video_paths(root_dir, basename=False): + originals = set() + originals_v = set() + for json_path in glob(os.path.join(root_dir, "*/metadata.json")): + dir = Path(json_path).parent + with open(json_path, "r") as f: + metadata = json.load(f) + for k, v in metadata.items(): + original = v.get("original", None) + if v["label"] == "REAL": + original = k + originals_v.add(original) + originals.add(os.path.join(dir, original)) + originals = list(originals) + originals_v = list(originals_v) + print(len(originals)) + return originals_v if basename else originals + + +def get_original_with_fakes(root_dir): + pairs = [] + for json_path in glob(os.path.join(root_dir, "*/metadata.json")): + with open(json_path, "r") as f: + metadata = json.load(f) + for k, v in metadata.items(): + original = v.get("original", None) + if v["label"] == "FAKE": + pairs.append((original[:-4], k[:-4] )) + + return pairs + + +def get_originals_and_fakes(root_dir): + originals = [] + fakes = [] + for json_path in glob(os.path.join(root_dir, "*/metadata.json")): + with open(json_path, "r") as f: + metadata = json.load(f) + for k, v in metadata.items(): + if v["label"] == "FAKE": + fakes.append(k[:-4]) + else: + originals.append(k[:-4]) + + return originals, fakes diff --git a/train.sh b/train.sh new file mode 100644 index 0000000000000000000000000000000000000000..e339ddb8bdc4045c60d0258eaa59f91b814b0d2f --- /dev/null +++ b/train.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +ROOT_DIR=$1 +NUM_GPUS=$2 +python -u -m torch.distributed.launch --nproc_per_node=$NUM_GPUS --master_port 9901 training/pipelines/train_classifier.py \ + --distributed --config configs/b7.json --freeze-epochs 0 --test_every 1 --opt-level O1 --label-smoothing 0.01 --folds-csv folds.csv --fold 0 --seed 111 --data-dir $ROOT_DIR --prefix b7_111_ > logs/b7_111 + +python -u -m torch.distributed.launch --nproc_per_node=$NUM_GPUS --master_port 9901 training/pipelines/train_classifier.py \ + --distributed --config configs/b7.json --freeze-epochs 0 --test_every 1 --opt-level O1 --label-smoothing 0.01 --folds-csv folds.csv --fold 0 --seed 555 --data-dir $ROOT_DIR --prefix b7_555_ > logs/b7_555 + +python -u -m torch.distributed.launch --nproc_per_node=$NUM_GPUS --master_port 9901 training/pipelines/train_classifier.py \ + --distributed --config configs/b7.json --freeze-epochs 0 --test_every 1 --opt-level O1 --label-smoothing 0.01 --folds-csv folds.csv --fold 0 --seed 777 --data-dir $ROOT_DIR --prefix b7_777_ > logs/b7_777 + +python -u -m torch.distributed.launch --nproc_per_node=$NUM_GPUS --master_port 9901 training/pipelines/train_classifier.py \ + --distributed --config configs/b7.json --freeze-epochs 0 --test_every 1 --opt-level O1 --label-smoothing 0.01 --folds-csv folds.csv --fold 0 --seed 888 --data-dir $ROOT_DIR --prefix b7_888_ > logs/b7_888 + +python -u -m torch.distributed.launch --nproc_per_node=$NUM_GPUS --master_port 9901 training/pipelines/train_classifier.py \ + --distributed --config configs/b7.json --freeze-epochs 0 --test_every 1 --opt-level O1 --label-smoothing 0.01 --folds-csv folds.csv --fold 0 --seed 999 --data-dir $ROOT_DIR --prefix b7_999_ > logs/b7_999 diff --git a/training/__init__.py b/training/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/training/__pycache__/__init__.cpython-39.pyc b/training/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8edb5d40a9d44afd0f703fcf560631fd1ae500ff Binary files /dev/null and b/training/__pycache__/__init__.cpython-39.pyc differ diff --git a/training/__pycache__/losses.cpython-39.pyc b/training/__pycache__/losses.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7323d8def02df88fb3f2cba525117fec0a10ed90 Binary files /dev/null and b/training/__pycache__/losses.cpython-39.pyc differ diff --git a/training/datasets/__init__.py b/training/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/training/datasets/__pycache__/__init__.cpython-39.pyc b/training/datasets/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..20dd3d33092f6fe6ec4eeb3ea496950220dfb1a2 Binary files /dev/null and b/training/datasets/__pycache__/__init__.cpython-39.pyc differ diff --git a/training/datasets/__pycache__/classifier_dataset.cpython-39.pyc b/training/datasets/__pycache__/classifier_dataset.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d35fd82c387b75a280a314dd6d215e3df8c1afdd Binary files /dev/null and b/training/datasets/__pycache__/classifier_dataset.cpython-39.pyc differ diff --git a/training/datasets/__pycache__/validation_set.cpython-39.pyc b/training/datasets/__pycache__/validation_set.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..430641d0edc3a14df5554e9f30e1a6382fd84d3e Binary files /dev/null and b/training/datasets/__pycache__/validation_set.cpython-39.pyc differ diff --git a/training/datasets/classifier_dataset.py b/training/datasets/classifier_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..a5cd898761d26f04c63bc3c62f3990d25297017b --- /dev/null +++ b/training/datasets/classifier_dataset.py @@ -0,0 +1,379 @@ +import math +import os +import random +import sys +import traceback + +import cv2 +import numpy as np +import pandas as pd +import skimage.draw +from albumentations import ImageCompression, OneOf, GaussianBlur, Blur +from albumentations.augmentations.functional import image_compression +from albumentations.augmentations.geometric.functional import rot90 +from albumentations.pytorch.functional import img_to_tensor +from scipy.ndimage import binary_erosion, binary_dilation +from skimage import measure +from torch.utils.data import Dataset +import dlib + +from training.datasets.validation_set import PUBLIC_SET + + +def prepare_bit_masks(mask): + h, w = mask.shape + mid_w = w // 2 + mid_h = w // 2 + masks = [] + ones = np.ones_like(mask) + ones[:mid_h] = 0 + masks.append(ones) + ones = np.ones_like(mask) + ones[mid_h:] = 0 + masks.append(ones) + ones = np.ones_like(mask) + ones[:, :mid_w] = 0 + masks.append(ones) + ones = np.ones_like(mask) + ones[:, mid_w:] = 0 + masks.append(ones) + ones = np.ones_like(mask) + ones[:mid_h, :mid_w] = 0 + ones[mid_h:, mid_w:] = 0 + masks.append(ones) + ones = np.ones_like(mask) + ones[:mid_h, mid_w:] = 0 + ones[mid_h:, :mid_w] = 0 + masks.append(ones) + return masks + +sys.path.insert(1, 'D:\\University And Papers\\VESSL\\dfdc_deepfake_challenge') +detector = dlib.get_frontal_face_detector() +predictor = dlib.shape_predictor('D:\\University And Papers\\VESSL\\dfdc_deepfake_challenge/libs/shape_predictor_68_face_landmarks.dat') + + +def blackout_convex_hull(img): + try: + rect = detector(img)[0] + sp = predictor(img, rect) + landmarks = np.array([[p.x, p.y] for p in sp.parts()]) + outline = landmarks[[*range(17), *range(26, 16, -1)]] + Y, X = skimage.draw.polygon(outline[:, 1], outline[:, 0]) + cropped_img = np.zeros(img.shape[:2], dtype=np.uint8) + cropped_img[Y, X] = 1 + # if random.random() > 0.5: + # img[cropped_img == 0] = 0 + # #leave only face + # return img + + y, x = measure.centroid(cropped_img) + y = int(y) + x = int(x) + first = random.random() > 0.5 + if random.random() > 0.5: + if first: + cropped_img[:y, :] = 0 + else: + cropped_img[y:, :] = 0 + else: + if first: + cropped_img[:, :x] = 0 + else: + cropped_img[:, x:] = 0 + + img[cropped_img > 0] = 0 + except Exception as e: + pass + + +def dist(p1, p2): + return math.sqrt((p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2) + + +def remove_eyes(image, landmarks): + image = image.copy() + (x1, y1), (x2, y2) = landmarks[:2] + mask = np.zeros_like(image[..., 0]) + line = cv2.line(mask, (x1, y1), (x2, y2), color=(1), thickness=2) + w = dist((x1, y1), (x2, y2)) + dilation = int(w // 4) + line = binary_dilation(line, iterations=dilation) + image[line, :] = 0 + return image + + +def remove_nose(image, landmarks): + image = image.copy() + (x1, y1), (x2, y2) = landmarks[:2] + x3, y3 = landmarks[2] + mask = np.zeros_like(image[..., 0]) + x4 = int((x1 + x2) / 2) + y4 = int((y1 + y2) / 2) + line = cv2.line(mask, (x3, y3), (x4, y4), color=(1), thickness=2) + w = dist((x1, y1), (x2, y2)) + dilation = int(w // 4) + line = binary_dilation(line, iterations=dilation) + image[line, :] = 0 + return image + + +def remove_mouth(image, landmarks): + image = image.copy() + (x1, y1), (x2, y2) = landmarks[-2:] + mask = np.zeros_like(image[..., 0]) + line = cv2.line(mask, (x1, y1), (x2, y2), color=(1), thickness=2) + w = dist((x1, y1), (x2, y2)) + dilation = int(w // 3) + line = binary_dilation(line, iterations=dilation) + image[line, :] = 0 + return image + + +def remove_landmark(image, landmarks): + if random.random() > 0.5: + image = remove_eyes(image, landmarks) + elif random.random() > 0.5: + image = remove_mouth(image, landmarks) + elif random.random() > 0.5: + image = remove_nose(image, landmarks) + return image + + +def change_padding(image, part=5): + h, w = image.shape[:2] + # original padding was done with 1/3 from each side, too much + pad_h = int(((3 / 5) * h) / part) + pad_w = int(((3 / 5) * w) / part) + image = image[h // 5 - pad_h:-h // 5 + pad_h, w // 5 - pad_w:-w // 5 + pad_w] + return image + + +def blackout_random(image, mask, label): + binary_mask = mask > 0.4 * 255 + h, w = binary_mask.shape[:2] + + tries = 50 + current_try = 1 + while current_try < tries: + first = random.random() < 0.5 + if random.random() < 0.5: + pivot = random.randint(h // 2 - h // 5, h // 2 + h // 5) + bitmap_msk = np.ones_like(binary_mask) + if first: + bitmap_msk[:pivot, :] = 0 + else: + bitmap_msk[pivot:, :] = 0 + else: + pivot = random.randint(w // 2 - w // 5, w // 2 + w // 5) + bitmap_msk = np.ones_like(binary_mask) + if first: + bitmap_msk[:, :pivot] = 0 + else: + bitmap_msk[:, pivot:] = 0 + + if label < 0.5 and np.count_nonzero(image * np.expand_dims(bitmap_msk, axis=-1)) / 3 > (h * w) / 5 \ + or np.count_nonzero(binary_mask * bitmap_msk) > 40: + mask *= bitmap_msk + image *= np.expand_dims(bitmap_msk, axis=-1) + break + current_try += 1 + return image + + +def blend_original(img): + img = img.copy() + h, w = img.shape[:2] + rect = detector(img) + if len(rect) == 0: + return img + else: + rect = rect[0] + sp = predictor(img, rect) + landmarks = np.array([[p.x, p.y] for p in sp.parts()]) + outline = landmarks[[*range(17), *range(26, 16, -1)]] + Y, X = skimage.draw.polygon(outline[:, 1], outline[:, 0]) + raw_mask = np.zeros(img.shape[:2], dtype=np.uint8) + raw_mask[Y, X] = 1 + face = img * np.expand_dims(raw_mask, -1) + + # add warping + h1 = random.randint(h - h // 2, h + h // 2) + w1 = random.randint(w - w // 2, w + w // 2) + while abs(h1 - h) < h // 3 and abs(w1 - w) < w // 3: + h1 = random.randint(h - h // 2, h + h // 2) + w1 = random.randint(w - w // 2, w + w // 2) + face = cv2.resize(face, (w1, h1), interpolation=random.choice([cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC])) + face = cv2.resize(face, (w, h), interpolation=random.choice([cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC])) + + raw_mask = binary_erosion(raw_mask, iterations=random.randint(4, 10)) + img[raw_mask, :] = face[raw_mask, :] + if random.random() < 0.2: + img = OneOf([GaussianBlur(), Blur()], p=0.5)(image=img)["image"] + # image compression + if random.random() < 0.5: + img = ImageCompression(quality_lower=40, quality_upper=95)(image=img)["image"] + return img + + +class DeepFakeClassifierDataset(Dataset): + + def __init__(self, + data_path="/mnt/sota/datasets/deepfake", + fold=0, + label_smoothing=0.01, + padding_part=3, + hardcore=True, + crops_dir="crops", + folds_csv="folds.csv", + normalize={"mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225]}, + rotation=False, + mode="train", + reduce_val=True, + oversample_real=True, + transforms=None + ): + super().__init__() + self.data_root = data_path + self.fold = fold + self.folds_csv = folds_csv + self.mode = mode + self.rotation = rotation + self.padding_part = padding_part + self.hardcore = hardcore + self.crops_dir = crops_dir + self.label_smoothing = label_smoothing + self.normalize = normalize + self.transforms = transforms + self.df = pd.read_csv(os.path.join(data_path, folds_csv)) + self.oversample_real = oversample_real + self.reduce_val = reduce_val + + def __getitem__(self, index: int): + + while True: + video, img_file, label, ori_video, frame, fold = self.data[index] + try: + if self.mode == "train": + label = np.clip(label, self.label_smoothing, 1 - self.label_smoothing) + img_path = os.path.join(self.data_root, self.crops_dir, video, img_file) + image = cv2.imread(img_path, cv2.IMREAD_COLOR) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + mask = np.zeros(image.shape[:2], dtype=np.uint8) + diff_path = os.path.join(self.data_root, "diffs", video, img_file[:-4] + "_diff.png") + try: + msk = cv2.imread(diff_path, cv2.IMREAD_GRAYSCALE) + if msk is not None: + mask = msk + except: + print("not found mask", diff_path) + pass + if self.mode == "train" and self.hardcore and not self.rotation: + landmark_path = os.path.join(self.data_root, "landmarks", ori_video, img_file[:-4] + ".npy") + if os.path.exists(landmark_path) and random.random() < 0.7: + landmarks = np.load(landmark_path) + image = remove_landmark(image, landmarks) + elif random.random() < 0.2: + blackout_convex_hull(image) + elif random.random() < 0.1: + binary_mask = mask > 0.4 * 255 + masks = prepare_bit_masks((binary_mask * 1).astype(np.uint8)) + tries = 6 + current_try = 1 + while current_try < tries: + bitmap_msk = random.choice(masks) + if label < 0.5 or np.count_nonzero(mask * bitmap_msk) > 20: + mask *= bitmap_msk + image *= np.expand_dims(bitmap_msk, axis=-1) + break + current_try += 1 + if self.mode == "train" and self.padding_part > 3: + image = change_padding(image, self.padding_part) + valid_label = np.count_nonzero(mask[mask > 20]) > 32 or label < 0.5 + valid_label = 1 if valid_label else 0 + rotation = 0 + if self.transforms: + data = self.transforms(image=image, mask=mask) + image = data["image"] + mask = data["mask"] + if self.mode == "train" and self.hardcore and self.rotation: + # landmark_path = os.path.join(self.data_root, "landmarks", ori_video, img_file[:-4] + ".npy") + dropout = 0.8 if label > 0.5 else 0.6 + if self.rotation: + dropout *= 0.7 + elif random.random() < dropout: + blackout_random(image, mask, label) + + # + # os.makedirs("../images", exist_ok=True) + # cv2.imwrite(os.path.join("../images", video+ "_" + str(1 if label > 0.5 else 0) + "_"+img_file), image[...,::-1]) + + if self.mode == "train" and self.rotation: + rotation = random.randint(0, 3) + image = rot90(image, rotation) + + image = img_to_tensor(image, self.normalize) + return {"image": image, "labels": np.array((label,)), "img_name": os.path.join(video, img_file), + "valid": valid_label, "rotations": rotation} + except Exception as e: + traceback.print_exc(file=sys.stdout) + print("Broken image", os.path.join(self.data_root, self.crops_dir, video, img_file)) + index = random.randint(0, len(self.data) - 1) + + def random_blackout_landmark(self, image, mask, landmarks): + x, y = random.choice(landmarks) + first = random.random() > 0.5 + # crop half face either vertically or horizontally + if random.random() > 0.5: + # width + if first: + image[:, :x] = 0 + mask[:, :x] = 0 + else: + image[:, x:] = 0 + mask[:, x:] = 0 + else: + # height + if first: + image[:y, :] = 0 + mask[:y, :] = 0 + else: + image[y:, :] = 0 + mask[y:, :] = 0 + + def reset(self, epoch, seed): + self.data = self._prepare_data(epoch, seed) + + def __len__(self) -> int: + return len(self.data) + + def _prepare_data(self, epoch, seed): + df = self.df + if self.mode == "train": + rows = df[df["fold"] != self.fold] + else: + rows = df[df["fold"] == self.fold] + seed = (epoch + 1) * seed + if self.oversample_real: + rows = self._oversample(rows, seed) + if self.mode == "val" and self.reduce_val: + # every 2nd frame, to speed up validation + rows = rows[rows["frame"] % 20 == 0] + # another option is to use public validation set + #rows = rows[rows["video"].isin(PUBLIC_SET)] + + print( + "real {} fakes {} mode {}".format(len(rows[rows["label"] == 0]), len(rows[rows["label"] == 1]), self.mode)) + data = rows.values + + np.random.seed(seed) + np.random.shuffle(data) + return data + + def _oversample(self, rows: pd.DataFrame, seed): + real = rows[rows["label"] == 0] + fakes = rows[rows["label"] == 1] + num_real = real["video"].count() + if self.mode == "train": + fakes = fakes.sample(n=num_real, replace=False, random_state=seed) + return pd.concat([real, fakes]) diff --git a/training/datasets/validation_set.py b/training/datasets/validation_set.py new file mode 100644 index 0000000000000000000000000000000000000000..fa28f0889acb86d37fc4f0b8d9a9373ab0cc6ba4 --- /dev/null +++ b/training/datasets/validation_set.py @@ -0,0 +1,60 @@ + + +PUBLIC_SET = {'tjuihawuqm', 'prwsfljdjo', 'scrbqgpvzz', 'ziipxxchai', 'uubgqnvfdl', 'wclvkepakb', 'xjvxtuakyd', + 'qlvsqdroqo', 'bcbqxhziqz', 'yzuestxcbq', 'hxwtsaydal', 'kqlvggiqee', 'vtunvalyji', 'mohiqoogpb', + 'siebfpwuhu', 'cekwtyxdoo', 'hszwwswewp', 'orekjthsef', 'huvlwkxoxm', 'fmhiujydwo', 'lhvjzhjxdp', + 'ibxfxggtqh', 'bofrwgeyjo', 'rmufsuogzn', 'zbgssotnjm', 'dpevefkefv', 'sufvvwmbha', 'ncoeewrdlo', + 'qhsehzgxqj', 'yxadevzohx', 'aomqqjipcp', 'pcyswtgick', 'wfzjxzhdkj', 'rcjfxxhcal', 'lnjkpdviqb', + 'xmkwsnuzyq', 'ouaowjmigq', 'bkuzquigyt', 'vwxednhlwz', 'mszblrdprw', 'blnmxntbey', 'gccnvdoknm', + 'mkzaekkvej', 'hclsparpth', 'eryjktdexi', 'hfsvqabzfq', 'acazlolrpz', 'yoyhmxtrys', 'rerpivllud', + 'elackxuccp', 'zgbhzkditd', 'vjljdfopjg', 'famlupsgqm', 'nymodlmxni', 'qcbkztamqc', 'qclpbcbgeq', + 'lpkgabskbw', 'mnowxangqx', 'czfqlbcfpa', 'qyyhuvqmyf', 'toinozytsp', 'ztyvglkcsf', 'nplviymzlg', + 'opvqdabdap', 'uxuvkrjhws', 'mxahsihabr', 'cqxxumarvp', 'ptbfnkajyi', 'njzshtfmcw', 'dcqodpzomd', + 'ajiyrjfyzp', 'ywauoonmlr', 'gochxzemmq', 'lpgxwdgnio', 'hnfwagcxdf', 'gfcycflhbo', 'gunamloolc', + 'yhjlnisfel', 'srfefmyjvt', 'evysmtpnrf', 'aktnlyqpah', 'gpsxfxrjrr', 'zfobicuigx', 'mnzabbkpmt', + 'rfjuhbnlro', 'zuwwbbusgl', 'csnkohqxdv', 'bzvzpwrabw', 'yietrwuncf', 'wynotylpnm', 'ekboxwrwuv', + 'rcecrgeotc', 'rklawjhbpv', 'ilqwcbprqa', 'jsysgmycsx', 'sqixhnilfm', 'wnlubukrki', 'nikynwcvuh', + 'sjkfxrlxxs', 'btdxnajogv', 'wjhpisoeaj', 'dyjklprkoc', 'qlqhjcshpk', 'jyfvaequfg', 'dozjwhnedd', + 'owaogcehvc', 'oyqgwjdwaj', 'vvfszaosiv', 'kmcdjxmnoa', 'jiswxuqzyz', 'ddtbarpcgo', 'wqysrieiqu', + 'xcruhaccxc', 'honxqdilvv', 'nxgzmgzkfv', 'cxsvvnxpyz', 'demuhxssgl', 'hzoiotcykp', 'fwykevubzy', + 'tejfudfgpq', 'kvmpmhdxly', 'oojxonbgow', 'vurjckblge', 'oysopgovhu', 'khpipxnsvx', 'pqthmvwonf', + 'fddmkqjwsh', 'pcoxcmtroa', 'cnxccbjlct', 'ggzjfrirjh', 'jquevmhdvc', 'ecumyiowzs', 'esmqxszybs', + 'mllzkpgatp', 'ryxaqpfubf', 'hbufmvbium', 'vdtsbqidjb', 'sjwywglgym', 'qxyrtwozyw', 'upmgtackuf', + 'ucthmsajay', 'zgjosltkie', 'snlyjbnpgw', 'nswtvttxre', 'iznnzjvaxc', 'jhczqfefgw', 'htzbnroagi', + 'pdswwyyntw', 'uvrzaczrbx', 'vbcgoyxsvn', 'hzssdinxec', 'novarhxpbj', 'vizerpsvbz', 'jawgcggquk', + 'iorbtaarte', 'yarpxfqejd', 'vhbbwdflyh', 'rrrfjhugvb', 'fneqiqpqvs', 'jytrvwlewz', 'bfjsthfhbd', + 'rxdoimqble', 'ekelfsnqof', 'uqvxjfpwdo', 'cjkctqqakb', 'tynfsthodx', 'yllztsrwjw', 'bktkwbcawi', + 'wcqvzujamg', 'bcvheslzrq', 'aqrsylrzgi', 'sktpeppbkc', 'mkmgcxaztt', 'etdliwticv', 'hqzwudvhih', + 'swsaoktwgi', 'temjefwaas', 'papagllumt', 'xrtvqhdibb', 'oelqpetgwj', 'ggdpclfcgk', 'imdmhwkkni', + 'lebzjtusnr', 'xhtppuyqdr', 'nxzgekegsp', 'waucvvmtkq', 'rnfcjxynfa', 'adohdulfwb', 'tjywwgftmv', + 'fjrueenjyp', 'oaguiggjyv', 'ytopzxrswu', 'yxvmusxvcz', 'rukyxomwcx', 'qdqdsaiitt', 'mxlipjhmqk', + 'voawxrmqyl', 'kezwvsxxzj', 'oocincvedt', 'qooxnxqqjb', 'mwwploizlj', 'yaxgpxhavq', 'uhakqelqri', + 'bvpeerislp', 'bkcyglmfci', 'jyoxdvxpza', 'gkutjglghz', 'knxltsvzyu', 'ybbrkacebd', 'apvzjkvnwn', + 'ahjnxtiamx', 'hsbljbsgxr', 'fnxgqcvlsd', 'xphdfgmfmz', 'scbdenmaed', 'ywxpquomgt', 'yljecirelf', + 'wcvsqnplsk', 'vmxfwxgdei', 'icbsahlivv', 'yhylappzid', 'irqzdokcws', 'petmyhjclt', 'rmlzgerevr', + 'qarqtkvgby', 'nkhzxomani', 'viteugozpv', 'qhkzlnzruj', 'eisofhptvk', 'gqnaxievjx', 'heiyoojifp', + 'zcxcmneefk', 'wvgviwnwob', 'gcdtglsoqj', 'yqhouqakbx', 'fopjiyxiqd', 'hierggamuo', 'ypbtpunjvm', + 'sjinmmbipg', 'kmqkiihrmj', 'wmoqzxddkb', 'lnhkjhyhvw', 'wixbuuzygv', 'fsdrwikhge', 'sfsayjgzrh', + 'pqdeutauqc', 'frqfsucgao', 'pdufsewrec', 'bfdopzvxbi', 'shnsajrsow', 'rvvpazsffd', 'pxcfrszlgi', + 'itfsvvmslp', 'ayipraspbn', 'prhmixykhr', 'doniqevxeg', 'dvtpwatuja', 'jiavqbrkyk', 'ipkpxvwroe', + 'syxobtuucp', 'syuxttuyhm', 'nwvsbmyndn', 'eqslzbqfea', 'ytddugrwph', 'vokrpfjpeb', 'bdshuoldwx', + 'fmvvmcbdrw', 'bnuwxhfahw', 'gbnzicjyhz', 'txnmkabufs', 'gfdjzwnpyp', 'hweshqpfwe', 'dxgnpnowgk', + 'xugmhbetrw', 'rktrpsdlci', 'nthpnwylxo', 'ihglzxzroo', 'ocgdbrgmtq', 'ruhtnngrqv', 'xljemofssi', + 'zxacihctqp', 'ghnpsltzyn', 'lbigytrrtr', 'ndikguxzek', 'mdfndlljvt', 'lyoslorecs', 'oefukgnvel', + 'zmxeiipnqb', 'cosghhimnd', 'alrtntfxtd', 'eywdmustbb', 'ooafcxxfrs', 'fqgypsunzr', 'hevcclcklc', + 'uhrqlmlclw', 'ipvwtgdlre', 'wcssbghcpc', 'didzujjhtg', 'fjxovgmwnm', 'dmmvuaikkv', 'hitfycdavv', + 'zyufpqvpyu', 'coujjnypba', 'temeqbmzxu', 'apedduehoy', 'iksxzpqxzi', 'kwfdyqofzw', 'aassnaulhq', + 'eyguqfmgzh', 'yiykshcbaz', 'sngjsueuhs', 'okgelildpc', 'ztyuiqrhdk', 'tvhjcfnqtg', 'gfgcwxkbjd', + 'lbfqksftuo', 'kowiwvrjht', 'dkuqbduxev', 'mwnibuujwz', 'sodvtfqbpf', 'hsbwhlolsn', 'qsjiypnjwi', + 'blszgmxkvu', 'ystdtnetgj', 'rfwxcinshk', 'vnlzxqwthl', 'ljouzjaqqe', 'gahgyuwzbu', 'xxzefxwyku', + 'xitgdpzbxv', 'sylnrepacf', 'igpvrfjdzc', 'nxnmkytwze', 'psesikjaxx', 'dvwpvqdflx', 'bjyaxvggle', + 'dpmgoiwhuf', 'wadvzjhwtw', 'kcjvhgvhpt', 'eppyqpgewp', 'tyjpjpglgx', 'cekarydqba', 'dvkdfhrpph', + 'cnpanmywno', 'ljauauuyka', 'hicjuubiau', 'cqhwesrciw', 'dnmowthjcj', 'lujvyveojc', 'wndursivcx', + 'espkiocpxq', 'jsbpkpxwew', 'dsnxgrfdmd', 'hyjqolupxn', 'xdezcezszc', 'axfhbpkdlc', 'qqnlrngaft', + 'coqwgzpbhx', 'ncmpqwmnzb', 'sznkemeqro', 'omphqltjdd', 'uoccaiathd', 'jzmzdispyo', 'pxjkzvqomp', + 'udxqbhgvvx', 'dzkyxbbqkr', 'dtozwcapoa', 'qswlzfgcgj', 'tgawasvbbr', 'lmdyicksrv', 'fzvpbrzssi', + 'dxfdovivlw', 'zzmgnglanj', 'vssmlqoiti', 'vajkicalux', 'ekvwecwltj', 'ylxwcwhjjd', 'keioymnobc', + 'usqqvxcjmg', 'phjvutxpoi', 'nycmyuzpml', 'bwdmzwhdnw', 'fxuxxtryjn', 'orixbcfvdz', 'hefisnapds', + 'fpevfidstw', 'halvwiltfs', 'dzojiwfvba', 'ojsxxkalat', 'esjdyghhog', 'ptbnewtvon', 'hcanfkwivl', + 'yronlutbgm', 'llplvmcvbl', 'yxirnfyijn', 'nwvloufjty', 'rtpbawlmxr', 'aayfryxljh', 'zfrrixsimm', + 'txmnoyiyte'} diff --git a/training/losses.py b/training/losses.py new file mode 100644 index 0000000000000000000000000000000000000000..a9bbb41dc6128e12fbf3989734b9b6d4c08a8977 --- /dev/null +++ b/training/losses.py @@ -0,0 +1,28 @@ +from typing import Any + +from pytorch_toolbelt.losses import BinaryFocalLoss +from torch import nn +from torch.nn.modules.loss import BCEWithLogitsLoss + + +class WeightedLosses(nn.Module): + def __init__(self, losses, weights): + super().__init__() + self.losses = losses + self.weights = weights + + def forward(self, *input: Any, **kwargs: Any): + cum_loss = 0 + for loss, w in zip(self.losses, self.weights): + cum_loss += w * loss.forward(*input, **kwargs) + return cum_loss + + +class BinaryCrossentropy(BCEWithLogitsLoss): + pass + + +class FocalLoss(BinaryFocalLoss): + def __init__(self, alpha=None, gamma=3, ignore_index=None, reduction="mean", normalized=False, + reduced_threshold=None): + super().__init__(alpha, gamma, ignore_index, reduction, normalized, reduced_threshold) \ No newline at end of file diff --git a/training/pipelines/__init__.py b/training/pipelines/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/training/pipelines/logs/classifier_tf_efficientnet_b7_ns_1/events.out.tfevents.1671716506.Green.33248.0 b/training/pipelines/logs/classifier_tf_efficientnet_b7_ns_1/events.out.tfevents.1671716506.Green.33248.0 new file mode 100644 index 0000000000000000000000000000000000000000..6b5aedecdc4446c1c15b0ee9e8124d626c5412da --- /dev/null +++ b/training/pipelines/logs/classifier_tf_efficientnet_b7_ns_1/events.out.tfevents.1671716506.Green.33248.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d08877d01d980696dce4abdd58b57e37f18910c0ad8aa2cfe140a6baad2a1c35 +size 1020 diff --git a/training/pipelines/train_classifier.py b/training/pipelines/train_classifier.py new file mode 100644 index 0000000000000000000000000000000000000000..515782ef80035dd5528de4e7b6d95cb2935ee109 --- /dev/null +++ b/training/pipelines/train_classifier.py @@ -0,0 +1,365 @@ +import argparse +import json +import os +import sys +from collections import defaultdict + +from sklearn.metrics import log_loss +from torch import topk +sys.path.insert(1, 'D:\\University And Papers\\VESSL\\dfdc_deepfake_challenge') +sys.path.insert(2, 'D:\\University And Papers\\VESSL\\dfdc_deepfake_challenge\\apex\\apex') +from training import losses +from training.datasets.classifier_dataset import DeepFakeClassifierDataset +from training.losses import WeightedLosses +from training.tools.config import load_config +from training.tools.utils import create_optimizer, AverageMeter +from training.transforms.albu import IsotropicResize +from training.zoo import classifiers + +os.environ["MKL_NUM_THREADS"] = "1" +os.environ["NUMEXPR_NUM_THREADS"] = "1" +os.environ["OMP_NUM_THREADS"] = "1" + +import cv2 + +cv2.ocl.setUseOpenCL(False) +cv2.setNumThreads(0) +import numpy as np +from albumentations import Compose, RandomBrightnessContrast, \ + HorizontalFlip, FancyPCA, HueSaturationValue, OneOf, ToGray, \ + ShiftScaleRotate, ImageCompression, PadIfNeeded, GaussNoise, GaussianBlur + +from apex.parallel import DistributedDataParallel, convert_syncbn_model +# from tensorboardX import SummaryWriter +from torch.utils.tensorboard import SummaryWriter + +from apex import amp + +import torch +from torch.backends import cudnn +from torch.nn import DataParallel +from torch.utils.data import DataLoader +from tqdm import tqdm +import torch.distributed as dist + +torch.backends.cudnn.benchmark = True + + +def create_train_transforms(size=300): + return Compose([ + ImageCompression(quality_lower=60, quality_upper=100, p=0.5), + GaussNoise(p=0.1), + GaussianBlur(blur_limit=3, p=0.05), + HorizontalFlip(), + OneOf([ + IsotropicResize(max_side=size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC), + IsotropicResize(max_side=size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_LINEAR), + IsotropicResize(max_side=size, interpolation_down=cv2.INTER_LINEAR, interpolation_up=cv2.INTER_LINEAR), + ], p=1), + PadIfNeeded(min_height=size, min_width=size, border_mode=cv2.BORDER_CONSTANT), + OneOf([RandomBrightnessContrast(), FancyPCA(), HueSaturationValue()], p=0.7), + ToGray(p=0.2), + ShiftScaleRotate(shift_limit=0.1, scale_limit=0.2, rotate_limit=10, border_mode=cv2.BORDER_CONSTANT, p=0.5), + ] + ) + + +def create_val_transforms(size=300): + return Compose([ + IsotropicResize(max_side=size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC), + PadIfNeeded(min_height=size, min_width=size, border_mode=cv2.BORDER_CONSTANT), + ]) + + +def main(): + parser = argparse.ArgumentParser("PyTorch Xview Pipeline") + arg = parser.add_argument + arg('--config', metavar='CONFIG_FILE', help='path to configuration file') + arg('--workers', type=int, default=6, help='number of cpu threads to use') + arg('--gpu', type=str, default='0', help='List of GPUs for parallel training, e.g. 0,1,2,3') + arg('--output-dir', type=str, default='C:/Users/Green/Desktop/VESSL/dfdc_deepfake_challenge/data_root/weights/') + arg('--resume', type=str, default='') + arg('--fold', type=int, default=1) + arg('--prefix', type=str, default='classifier_') + arg('--data-dir', type=str, default="C:/Users/Green/Desktop/VESSL/dfdc_deepfake_challenge/data_root") + arg('--folds-csv', type=str, default='folds02.csv') + arg('--crops-dir', type=str, default='crops') + arg('--label-smoothing', type=float, default=0.01) + arg('--logdir', type=str, default='logs') + arg('--zero-score', action='store_true', default=False) + arg('--from-zero', action='store_true', default=False) + arg('--distributed', action='store_true', default=False) + arg('--freeze-epochs', type=int, default=0) + arg("--local_rank", default=0, type=int) + arg("--seed", default=777, type=int) + arg("--padding-part", default=3, type=int) + arg("--opt-level", default='O1', type=str) + arg("--test_every", type=int, default=1) + arg("--no-oversample", action="store_true") + arg("--no-hardcore", action="store_true") + arg("--only-changed-frames", action="store_true") + + args = parser.parse_args() + os.makedirs(args.output_dir, exist_ok=True) + if args.distributed: + torch.cuda.set_device(args.local_rank) + torch.distributed.init_process_group(backend='nccl', init_method='env://') + else: + os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' + os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu + + cudnn.benchmark = True + + conf = load_config(args.config) + model = classifiers.__dict__[conf['network']](encoder=conf['encoder']) + + model = model.cuda() + if args.distributed: + model = convert_syncbn_model(model) + ohem = conf.get("ohem_samples", None) + reduction = "mean" + if ohem: + reduction = "none" + loss_fn = [] + weights = [] + for loss_name, weight in conf["losses"].items(): + loss_fn.append(losses.__dict__[loss_name](reduction=reduction).cuda()) + weights.append(weight) + loss = WeightedLosses(loss_fn, weights) + loss_functions = {"classifier_loss": loss} + optimizer, scheduler = create_optimizer(conf['optimizer'], model) + bce_best = 100 + start_epoch = 0 + batch_size = conf['optimizer']['batch_size'] + + data_train = DeepFakeClassifierDataset(mode="train", + oversample_real=not args.no_oversample, + fold=args.fold, + padding_part=args.padding_part, + hardcore=not args.no_hardcore, + crops_dir=args.crops_dir, + data_path=args.data_dir, + label_smoothing=args.label_smoothing, + folds_csv=args.folds_csv, + transforms=create_train_transforms(conf["size"]), + normalize=conf.get("normalize", None)) + data_val = DeepFakeClassifierDataset(mode="val", + fold=args.fold, + padding_part=args.padding_part, + crops_dir=args.crops_dir, + data_path=args.data_dir, + folds_csv=args.folds_csv, + transforms=create_val_transforms(conf["size"]), + normalize=conf.get("normalize", None)) + val_data_loader = DataLoader(data_val, batch_size=batch_size * 2, num_workers=args.workers, shuffle=False, + pin_memory=False) + os.makedirs(args.logdir, exist_ok=True) + summary_writer = SummaryWriter(args.logdir + '/' + conf.get("prefix", args.prefix) + conf['encoder'] + "_" + str(args.fold)) + if args.resume: + if os.path.isfile(args.resume): + print("=> loading checkpoint '{}'".format(args.resume)) + checkpoint = torch.load(args.resume, map_location='cpu') + state_dict = checkpoint['state_dict'] + state_dict = {k[7:]: w for k, w in state_dict.items()} + model.load_state_dict(state_dict, strict=False) + if not args.from_zero: + start_epoch = checkpoint['epoch'] + if not args.zero_score: + bce_best = checkpoint.get('bce_best', 0) + print("=> loaded checkpoint '{}' (epoch {}, bce_best {})" + .format(args.resume, checkpoint['epoch'], checkpoint['bce_best'])) + else: + print("=> no checkpoint found at '{}'".format(args.resume)) + if args.from_zero: + start_epoch = 0 + current_epoch = start_epoch + + if conf['fp16']: + model, optimizer = amp.initialize(model, optimizer, + opt_level=args.opt_level, + loss_scale='dynamic') + + snapshot_name = "{}{}_{}_{}".format(conf.get("prefix", args.prefix), conf['network'], conf['encoder'], args.fold) + + if args.distributed: + model = DistributedDataParallel(model, delay_allreduce=True) + else: + model = DataParallel(model).cuda() + data_val.reset(1, args.seed) + max_epochs = conf['optimizer']['schedule']['epochs'] + for epoch in range(start_epoch, max_epochs): + data_train.reset(epoch, args.seed) + train_sampler = None + if args.distributed: + train_sampler = torch.utils.data.distributed.DistributedSampler(data_train) + train_sampler.set_epoch(epoch) + if epoch < args.freeze_epochs: + print("Freezing encoder!!!") + model.module.encoder.eval() + for p in model.module.encoder.parameters(): + p.requires_grad = False + else: + model.module.encoder.train() + for p in model.module.encoder.parameters(): + p.requires_grad = True + + train_data_loader = DataLoader(data_train, batch_size=batch_size, num_workers=args.workers, + shuffle=train_sampler is None, sampler=train_sampler, pin_memory=False, + drop_last=True) + + train_epoch(current_epoch, loss_functions, model, optimizer, scheduler, train_data_loader, summary_writer, conf, + args.local_rank, args.only_changed_frames) + model = model.eval() + + if args.local_rank == 0: + torch.save({ + 'epoch': current_epoch + 1, + 'state_dict': model.state_dict(), + 'bce_best': bce_best, + }, args.output_dir + '/' + snapshot_name + "_last") + torch.save({ + 'epoch': current_epoch + 1, + 'state_dict': model.state_dict(), + 'bce_best': bce_best, + }, args.output_dir + snapshot_name + "_{}".format(current_epoch)) + if (epoch + 1) % args.test_every == 0: + bce_best = evaluate_val(args, val_data_loader, bce_best, model, + snapshot_name=snapshot_name, + current_epoch=current_epoch, + summary_writer=summary_writer) + current_epoch += 1 + + +def evaluate_val(args, data_val, bce_best, model, snapshot_name, current_epoch, summary_writer): + print("Test phase") + model = model.eval() + + bce, probs, targets = validate(model, data_loader=data_val) + if args.local_rank == 0: + summary_writer.add_scalar('val/bce', float(bce), global_step=current_epoch) + if bce < bce_best: + print("Epoch {} improved from {} to {}".format(current_epoch, bce_best, bce)) + if args.output_dir is not None: + torch.save({ + 'epoch': current_epoch + 1, + 'state_dict': model.state_dict(), + 'bce_best': bce, + }, args.output_dir + snapshot_name + "_best_dice") + bce_best = bce + with open("predictions_{}.json".format(args.fold), "w") as f: + json.dump({"probs": probs, "targets": targets}, f) + torch.save({ + 'epoch': current_epoch + 1, + 'state_dict': model.state_dict(), + 'bce_best': bce_best, + }, args.output_dir + snapshot_name + "_last") + print("Epoch: {} bce: {}, bce_best: {}".format(current_epoch, bce, bce_best)) + return bce_best + + +def validate(net, data_loader, prefix=""): + probs = defaultdict(list) + targets = defaultdict(list) + + with torch.no_grad(): + for sample in tqdm(data_loader): + imgs = sample["image"].cuda() + img_names = sample["img_name"] + labels = sample["labels"].cuda().float() + out = net(imgs) + labels = labels.cpu().numpy() + preds = torch.sigmoid(out).cpu().numpy() + for i in range(out.shape[0]): + video, img_id = img_names[i].split("\\") + probs[video].append(preds[i].tolist()) + targets[video].append(labels[i].tolist()) + data_x = [] + data_y = [] + for vid, score in probs.items(): + score = np.array(score) + lbl = targets[vid] + + score = np.mean(score) + lbl = np.mean(lbl) + data_x.append(score) + data_y.append(lbl) + y = np.array(data_y) + x = np.array(data_x) + fake_idx = y > 0.1 + real_idx = y < 0.1 + fake_loss = log_loss(y[fake_idx], x[fake_idx], labels=[0, 1]) + real_loss = log_loss(y[real_idx], x[real_idx], labels=[0, 1]) + print("{}fake_loss".format(prefix), fake_loss) + print("{}real_loss".format(prefix), real_loss) + + return (fake_loss + real_loss) / 2, probs, targets + + +def train_epoch(current_epoch, loss_functions, model, optimizer, scheduler, train_data_loader, summary_writer, conf, + local_rank, only_valid): + losses = AverageMeter() + fake_losses = AverageMeter() + real_losses = AverageMeter() + max_iters = conf["batches_per_epoch"] + print("training epoch {}".format(current_epoch)) + model.train() + pbar = tqdm(enumerate(train_data_loader), total=max_iters, desc="Epoch {}".format(current_epoch), ncols=0) + if conf["optimizer"]["schedule"]["mode"] == "epoch": + scheduler.step(current_epoch) + for i, sample in pbar: + imgs = sample["image"].cuda() + labels = sample["labels"].cuda().float() + out_labels = model(imgs) + if only_valid: + valid_idx = sample["valid"].cuda().float() > 0 + out_labels = out_labels[valid_idx] + labels = labels[valid_idx] + if labels.size(0) == 0: + continue + + fake_loss = 0 + real_loss = 0 + fake_idx = labels > 0.5 + real_idx = labels <= 0.5 + + ohem = conf.get("ohem_samples", None) + if torch.sum(fake_idx * 1) > 0: + fake_loss = loss_functions["classifier_loss"](out_labels[fake_idx], labels[fake_idx]) + if torch.sum(real_idx * 1) > 0: + real_loss = loss_functions["classifier_loss"](out_labels[real_idx], labels[real_idx]) + if ohem: + fake_loss = topk(fake_loss, k=min(ohem, fake_loss.size(0)), sorted=False)[0].mean() + real_loss = topk(real_loss, k=min(ohem, real_loss.size(0)), sorted=False)[0].mean() + + loss = (fake_loss + real_loss) / 2 + losses.update(loss.item(), imgs.size(0)) + fake_losses.update(0 if fake_loss == 0 else fake_loss.item(), imgs.size(0)) + real_losses.update(0 if real_loss == 0 else real_loss.item(), imgs.size(0)) + + optimizer.zero_grad() + pbar.set_postfix({"lr": float(scheduler.get_lr()[-1]), "epoch": current_epoch, "loss": losses.avg, + "fake_loss": fake_losses.avg, "real_loss": real_losses.avg}) + + if conf['fp16']: + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss.backward() + torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), 1) + optimizer.step() + torch.cuda.synchronize() + if conf["optimizer"]["schedule"]["mode"] in ("step", "poly"): + scheduler.step(i + current_epoch * max_iters) + if i == max_iters - 1: + break + pbar.close() + if local_rank == 0: + for idx, param_group in enumerate(optimizer.param_groups): + lr = param_group['lr'] + summary_writer.add_scalar('group{}/lr'.format(idx), float(lr), global_step=current_epoch) + summary_writer.add_scalar('train/loss', float(losses.avg), global_step=current_epoch) + + +if __name__ == '__main__': + main() + z = 2 diff --git a/training/tools/__init__.py b/training/tools/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/training/tools/__pycache__/__init__.cpython-39.pyc b/training/tools/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1ce669cc2fb281adad2e33f5576f143e4c879675 Binary files /dev/null and b/training/tools/__pycache__/__init__.cpython-39.pyc differ diff --git a/training/tools/__pycache__/config.cpython-39.pyc b/training/tools/__pycache__/config.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..59d2cab7db3ba618a81ee36b7a3ff0e84508bd87 Binary files /dev/null and b/training/tools/__pycache__/config.cpython-39.pyc differ diff --git a/training/tools/__pycache__/schedulers.cpython-39.pyc b/training/tools/__pycache__/schedulers.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..161cc1eb4f27221a4389f1e016298075c5cc1009 Binary files /dev/null and b/training/tools/__pycache__/schedulers.cpython-39.pyc differ diff --git a/training/tools/__pycache__/utils.cpython-39.pyc b/training/tools/__pycache__/utils.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..07e879e89f3d9fcfd99ed51842342630c911df4a Binary files /dev/null and b/training/tools/__pycache__/utils.cpython-39.pyc differ diff --git a/training/tools/config.py b/training/tools/config.py new file mode 100644 index 0000000000000000000000000000000000000000..51145222df6fd7026dfbc435785b610fc08c041f --- /dev/null +++ b/training/tools/config.py @@ -0,0 +1,43 @@ +import json + +DEFAULTS = { + "network": "dpn", + "encoder": "dpn92", + "model_params": {}, + "optimizer": { + "batch_size": 32, + "type": "SGD", # supported: SGD, Adam + "momentum": 0.9, + "weight_decay": 0, + "clip": 1., + "learning_rate": 0.1, + "classifier_lr": -1, + "nesterov": True, + "schedule": { + "type": "constant", # supported: constant, step, multistep, exponential, linear, poly + "mode": "epoch", # supported: epoch, step + "epochs": 10, + "params": {} + } + }, + "normalize": { + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225] + } +} + + +def _merge(src, dst): + for k, v in src.items(): + if k in dst: + if isinstance(v, dict): + _merge(src[k], dst[k]) + else: + dst[k] = v + + +def load_config(config_file, defaults=DEFAULTS): + with open(config_file, "r") as fd: + config = json.load(fd) + _merge(defaults, config) + return config diff --git a/training/tools/schedulers.py b/training/tools/schedulers.py new file mode 100644 index 0000000000000000000000000000000000000000..e41f1a6fd8a913d382c2a5f99c23d7946c5cd22a --- /dev/null +++ b/training/tools/schedulers.py @@ -0,0 +1,46 @@ +from bisect import bisect_right + +from torch.optim.lr_scheduler import _LRScheduler + + +class LRStepScheduler(_LRScheduler): + def __init__(self, optimizer, steps, last_epoch=-1): + self.lr_steps = steps + super().__init__(optimizer, last_epoch) + + def get_lr(self): + pos = max(bisect_right([x for x, y in self.lr_steps], self.last_epoch) - 1, 0) + return [self.lr_steps[pos][1] if self.lr_steps[pos][0] <= self.last_epoch else base_lr for base_lr in self.base_lrs] + + +class PolyLR(_LRScheduler): + """Sets the learning rate of each parameter group according to poly learning rate policy + """ + def __init__(self, optimizer, max_iter=90000, power=0.9, last_epoch=-1): + self.max_iter = max_iter + self.power = power + super(PolyLR, self).__init__(optimizer, last_epoch) + + def get_lr(self): + self.last_epoch = (self.last_epoch + 1) % self.max_iter + return [base_lr * ((1 - float(self.last_epoch) / self.max_iter) ** (self.power)) for base_lr in self.base_lrs] + +class ExponentialLRScheduler(_LRScheduler): + """Decays the learning rate of each parameter group by gamma every epoch. + When last_epoch=-1, sets initial lr as lr. + + Args: + optimizer (Optimizer): Wrapped optimizer. + gamma (float): Multiplicative factor of learning rate decay. + last_epoch (int): The index of last epoch. Default: -1. + """ + + def __init__(self, optimizer, gamma, last_epoch=-1): + self.gamma = gamma + super(ExponentialLRScheduler, self).__init__(optimizer, last_epoch) + + def get_lr(self): + if self.last_epoch <= 0: + return self.base_lrs + return [base_lr * self.gamma**self.last_epoch for base_lr in self.base_lrs] + diff --git a/training/tools/utils.py b/training/tools/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..dbfce4fb0657064af82fe7b270b26a98734a41aa --- /dev/null +++ b/training/tools/utils.py @@ -0,0 +1,123 @@ +import cv2 +import sys +sys.path.insert(2, 'D:\\University And Papers\\VESSL\\dfdc_deepfake_challenge\\apex\\apex') +from apex.optimizers import FusedAdam, FusedSGD +from timm.optim import AdamW +from torch import optim +from torch.optim import lr_scheduler +from torch.optim.rmsprop import RMSprop +from torch.optim.adamw import AdamW +from torch.optim.lr_scheduler import MultiStepLR, CyclicLR + +from training.tools.schedulers import ExponentialLRScheduler, PolyLR, LRStepScheduler + +cv2.ocl.setUseOpenCL(False) +cv2.setNumThreads(0) + + +class AverageMeter(object): + """Computes and stores the average and current value""" + + def __init__(self): + self.reset() + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count + +def create_optimizer(optimizer_config, model, master_params=None): + """Creates optimizer and schedule from configuration + + Parameters + ---------- + optimizer_config : dict + Dictionary containing the configuration options for the optimizer. + model : Model + The network model. + + Returns + ------- + optimizer : Optimizer + The optimizer. + scheduler : LRScheduler + The learning rate scheduler. + """ + if optimizer_config.get("classifier_lr", -1) != -1: + # Separate classifier parameters from all others + net_params = [] + classifier_params = [] + for k, v in model.named_parameters(): + if not v.requires_grad: + continue + if k.find("encoder") != -1: + net_params.append(v) + else: + classifier_params.append(v) + params = [ + {"params": net_params}, + {"params": classifier_params, "lr": optimizer_config["classifier_lr"]}, + ] + else: + if master_params: + params = master_params + else: + params = model.parameters() + + if optimizer_config["type"] == "SGD": + optimizer = optim.SGD(params, + lr=optimizer_config["learning_rate"], + momentum=optimizer_config["momentum"], + weight_decay=optimizer_config["weight_decay"], + nesterov=optimizer_config["nesterov"]) + elif optimizer_config["type"] == "FusedSGD": + optimizer = FusedSGD(params, + lr=optimizer_config["learning_rate"], + momentum=optimizer_config["momentum"], + weight_decay=optimizer_config["weight_decay"], + nesterov=optimizer_config["nesterov"]) + elif optimizer_config["type"] == "Adam": + optimizer = optim.Adam(params, + lr=optimizer_config["learning_rate"], + weight_decay=optimizer_config["weight_decay"]) + elif optimizer_config["type"] == "FusedAdam": + optimizer = FusedAdam(params, + lr=optimizer_config["learning_rate"], + weight_decay=optimizer_config["weight_decay"]) + elif optimizer_config["type"] == "AdamW": + optimizer = AdamW(params, + lr=optimizer_config["learning_rate"], + weight_decay=optimizer_config["weight_decay"]) + elif optimizer_config["type"] == "RmsProp": + optimizer = RMSprop(params, + lr=optimizer_config["learning_rate"], + weight_decay=optimizer_config["weight_decay"]) + else: + raise KeyError("unrecognized optimizer {}".format(optimizer_config["type"])) + + if optimizer_config["schedule"]["type"] == "step": + scheduler = LRStepScheduler(optimizer, **optimizer_config["schedule"]["params"]) + elif optimizer_config["schedule"]["type"] == "clr": + scheduler = CyclicLR(optimizer, **optimizer_config["schedule"]["params"]) + elif optimizer_config["schedule"]["type"] == "multistep": + scheduler = MultiStepLR(optimizer, **optimizer_config["schedule"]["params"]) + elif optimizer_config["schedule"]["type"] == "exponential": + scheduler = ExponentialLRScheduler(optimizer, **optimizer_config["schedule"]["params"]) + elif optimizer_config["schedule"]["type"] == "poly": + scheduler = PolyLR(optimizer, **optimizer_config["schedule"]["params"]) + elif optimizer_config["schedule"]["type"] == "constant": + scheduler = lr_scheduler.LambdaLR(optimizer, lambda epoch: 1.0) + elif optimizer_config["schedule"]["type"] == "linear": + def linear_lr(it): + return it * optimizer_config["schedule"]["params"]["alpha"] + optimizer_config["schedule"]["params"]["beta"] + + scheduler = lr_scheduler.LambdaLR(optimizer, linear_lr) + + return optimizer, scheduler diff --git a/training/transforms/__init__.py b/training/transforms/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/training/transforms/__pycache__/__init__.cpython-39.pyc b/training/transforms/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..87bbb17a98dc2e0d857a4eed2589124fcf6ff2fb Binary files /dev/null and b/training/transforms/__pycache__/__init__.cpython-39.pyc differ diff --git a/training/transforms/__pycache__/albu.cpython-39.pyc b/training/transforms/__pycache__/albu.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..33f325df8eab2a643b039adff2686c0617b5de74 Binary files /dev/null and b/training/transforms/__pycache__/albu.cpython-39.pyc differ diff --git a/training/transforms/albu.py b/training/transforms/albu.py new file mode 100644 index 0000000000000000000000000000000000000000..931a1caf95543b5bb73811e718d25d00c9c073e1 --- /dev/null +++ b/training/transforms/albu.py @@ -0,0 +1,99 @@ +import random + +import cv2 +import numpy as np +from albumentations import DualTransform, ImageOnlyTransform +from albumentations.augmentations.crops.functional import crop + + +def isotropically_resize_image(img, size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC): + h, w = img.shape[:2] + if max(w, h) == size: + return img + if w > h: + scale = size / w + h = h * scale + w = size + else: + scale = size / h + w = w * scale + h = size + interpolation = interpolation_up if scale > 1 else interpolation_down + resized = cv2.resize(img, (int(w), int(h)), interpolation=interpolation) + return resized + + +class IsotropicResize(DualTransform): + def __init__(self, max_side, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC, + always_apply=False, p=1): + super(IsotropicResize, self).__init__(always_apply, p) + self.max_side = max_side + self.interpolation_down = interpolation_down + self.interpolation_up = interpolation_up + + def apply(self, img, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC, **params): + return isotropically_resize_image(img, size=self.max_side, interpolation_down=interpolation_down, + interpolation_up=interpolation_up) + + def apply_to_mask(self, img, **params): + return self.apply(img, interpolation_down=cv2.INTER_NEAREST, interpolation_up=cv2.INTER_NEAREST, **params) + + def get_transform_init_args_names(self): + return ("max_side", "interpolation_down", "interpolation_up") + + +class Resize4xAndBack(ImageOnlyTransform): + def __init__(self, always_apply=False, p=0.5): + super(Resize4xAndBack, self).__init__(always_apply, p) + + def apply(self, img, **params): + h, w = img.shape[:2] + scale = random.choice([2, 4]) + img = cv2.resize(img, (w // scale, h // scale), interpolation=cv2.INTER_AREA) + img = cv2.resize(img, (w, h), + interpolation=random.choice([cv2.INTER_CUBIC, cv2.INTER_LINEAR, cv2.INTER_NEAREST])) + return img + + +class RandomSizedCropNonEmptyMaskIfExists(DualTransform): + + def __init__(self, min_max_height, w2h_ratio=[0.7, 1.3], always_apply=False, p=0.5): + super(RandomSizedCropNonEmptyMaskIfExists, self).__init__(always_apply, p) + + self.min_max_height = min_max_height + self.w2h_ratio = w2h_ratio + + def apply(self, img, x_min=0, x_max=0, y_min=0, y_max=0, **params): + cropped = crop(img, x_min, y_min, x_max, y_max) + return cropped + + @property + def targets_as_params(self): + return ["mask"] + + def get_params_dependent_on_targets(self, params): + mask = params["mask"] + mask_height, mask_width = mask.shape[:2] + crop_height = int(mask_height * random.uniform(self.min_max_height[0], self.min_max_height[1])) + w2h_ratio = random.uniform(*self.w2h_ratio) + crop_width = min(int(crop_height * w2h_ratio), mask_width - 1) + if mask.sum() == 0: + x_min = random.randint(0, mask_width - crop_width + 1) + y_min = random.randint(0, mask_height - crop_height + 1) + else: + mask = mask.sum(axis=-1) if mask.ndim == 3 else mask + non_zero_yx = np.argwhere(mask) + y, x = random.choice(non_zero_yx) + x_min = x - random.randint(0, crop_width - 1) + y_min = y - random.randint(0, crop_height - 1) + x_min = np.clip(x_min, 0, mask_width - crop_width) + y_min = np.clip(y_min, 0, mask_height - crop_height) + + x_max = x_min + crop_height + y_max = y_min + crop_width + y_max = min(mask_height, y_max) + x_max = min(mask_width, x_max) + return {"x_min": x_min, "x_max": x_max, "y_min": y_min, "y_max": y_max} + + def get_transform_init_args_names(self): + return "min_max_height", "height", "width", "w2h_ratio" \ No newline at end of file diff --git a/training/zoo/__init__.py b/training/zoo/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/training/zoo/__pycache__/__init__.cpython-39.pyc b/training/zoo/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3f89b0e9115ec1ef8112ba06c68695e82c2d677d Binary files /dev/null and b/training/zoo/__pycache__/__init__.cpython-39.pyc differ diff --git a/training/zoo/__pycache__/classifiers.cpython-39.pyc b/training/zoo/__pycache__/classifiers.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..307fde28758976d6a3196b2a77f60bf55e2aed90 Binary files /dev/null and b/training/zoo/__pycache__/classifiers.cpython-39.pyc differ diff --git a/training/zoo/classifiers.py b/training/zoo/classifiers.py new file mode 100644 index 0000000000000000000000000000000000000000..f5899c3ee9d71d3f9ea7ad31c53ce6ed3f9c7e2c --- /dev/null +++ b/training/zoo/classifiers.py @@ -0,0 +1,172 @@ +from functools import partial + +import numpy as np +import torch +from timm.models.efficientnet import tf_efficientnet_b4_ns, tf_efficientnet_b3_ns, \ + tf_efficientnet_b5_ns, tf_efficientnet_b2_ns, tf_efficientnet_b6_ns, tf_efficientnet_b7_ns +from torch import nn +from torch.nn.modules.dropout import Dropout +from torch.nn.modules.linear import Linear +from torch.nn.modules.pooling import AdaptiveAvgPool2d + +encoder_params = { + "tf_efficientnet_b3_ns": { + "features": 1536, + "init_op": partial(tf_efficientnet_b3_ns, pretrained=True, drop_path_rate=0.2) + }, + "tf_efficientnet_b2_ns": { + "features": 1408, + "init_op": partial(tf_efficientnet_b2_ns, pretrained=False, drop_path_rate=0.2) + }, + "tf_efficientnet_b4_ns": { + "features": 1792, + "init_op": partial(tf_efficientnet_b4_ns, pretrained=True, drop_path_rate=0.5) + }, + "tf_efficientnet_b5_ns": { + "features": 2048, + "init_op": partial(tf_efficientnet_b5_ns, pretrained=True, drop_path_rate=0.2) + }, + "tf_efficientnet_b4_ns_03d": { + "features": 1792, + "init_op": partial(tf_efficientnet_b4_ns, pretrained=True, drop_path_rate=0.3) + }, + "tf_efficientnet_b5_ns_03d": { + "features": 2048, + "init_op": partial(tf_efficientnet_b5_ns, pretrained=True, drop_path_rate=0.3) + }, + "tf_efficientnet_b5_ns_04d": { + "features": 2048, + "init_op": partial(tf_efficientnet_b5_ns, pretrained=True, drop_path_rate=0.4) + }, + "tf_efficientnet_b6_ns": { + "features": 2304, + "init_op": partial(tf_efficientnet_b6_ns, pretrained=True, drop_path_rate=0.2) + }, + "tf_efficientnet_b7_ns": { + "features": 2560, + "init_op": partial(tf_efficientnet_b7_ns, pretrained=True, drop_path_rate=0.2) + }, + "tf_efficientnet_b6_ns_04d": { + "features": 2304, + "init_op": partial(tf_efficientnet_b6_ns, pretrained=True, drop_path_rate=0.4) + }, +} + + +def setup_srm_weights(input_channels: int = 3) -> torch.Tensor: + """Creates the SRM kernels for noise analysis.""" + # note: values taken from Zhou et al., "Learning Rich Features for Image Manipulation Detection", CVPR2018 + srm_kernel = torch.from_numpy(np.array([ + [ # srm 1/2 horiz + [0., 0., 0., 0., 0.], # noqa: E241,E201 + [0., 0., 0., 0., 0.], # noqa: E241,E201 + [0., 1., -2., 1., 0.], # noqa: E241,E201 + [0., 0., 0., 0., 0.], # noqa: E241,E201 + [0., 0., 0., 0., 0.], # noqa: E241,E201 + ], [ # srm 1/4 + [0., 0., 0., 0., 0.], # noqa: E241,E201 + [0., -1., 2., -1., 0.], # noqa: E241,E201 + [0., 2., -4., 2., 0.], # noqa: E241,E201 + [0., -1., 2., -1., 0.], # noqa: E241,E201 + [0., 0., 0., 0., 0.], # noqa: E241,E201 + ], [ # srm 1/12 + [-1., 2., -2., 2., -1.], # noqa: E241,E201 + [2., -6., 8., -6., 2.], # noqa: E241,E201 + [-2., 8., -12., 8., -2.], # noqa: E241,E201 + [2., -6., 8., -6., 2.], # noqa: E241,E201 + [-1., 2., -2., 2., -1.], # noqa: E241,E201 + ] + ])).float() + srm_kernel[0] /= 2 + srm_kernel[1] /= 4 + srm_kernel[2] /= 12 + return srm_kernel.view(3, 1, 5, 5).repeat(1, input_channels, 1, 1) + + +def setup_srm_layer(input_channels: int = 3) -> torch.nn.Module: + """Creates a SRM convolution layer for noise analysis.""" + weights = setup_srm_weights(input_channels) + conv = torch.nn.Conv2d(input_channels, out_channels=3, kernel_size=5, stride=1, padding=2, bias=False) + with torch.no_grad(): + conv.weight = torch.nn.Parameter(weights, requires_grad=False) + return conv + + +class DeepFakeClassifierSRM(nn.Module): + def __init__(self, encoder, dropout_rate=0.5) -> None: + super().__init__() + self.encoder = encoder_params[encoder]["init_op"]() + self.avg_pool = AdaptiveAvgPool2d((1, 1)) + self.srm_conv = setup_srm_layer(3) + self.dropout = Dropout(dropout_rate) + self.fc = Linear(encoder_params[encoder]["features"], 1) + + def forward(self, x): + noise = self.srm_conv(x) + x = self.encoder.forward_features(noise) + x = self.avg_pool(x).flatten(1) + x = self.dropout(x) + x = self.fc(x) + return x + + +class GlobalWeightedAvgPool2d(nn.Module): + """ + Global Weighted Average Pooling from paper "Global Weighted Average + Pooling Bridges Pixel-level Localization and Image-level Classification" + """ + + def __init__(self, features: int, flatten=False): + super().__init__() + self.conv = nn.Conv2d(features, 1, kernel_size=1, bias=True) + self.flatten = flatten + + def fscore(self, x): + m = self.conv(x) + m = m.sigmoid().exp() + return m + + def norm(self, x: torch.Tensor): + return x / x.sum(dim=[2, 3], keepdim=True) + + def forward(self, x): + input_x = x + x = self.fscore(x) + x = self.norm(x) + x = x * input_x + x = x.sum(dim=[2, 3], keepdim=not self.flatten) + return x + + +class DeepFakeClassifier(nn.Module): + def __init__(self, encoder, dropout_rate=0.0) -> None: + super().__init__() + self.encoder = encoder_params[encoder]["init_op"]() + self.avg_pool = AdaptiveAvgPool2d((1, 1)) + self.dropout = Dropout(dropout_rate) + self.fc = Linear(encoder_params[encoder]["features"], 1) + + def forward(self, x): + x = self.encoder.forward_features(x) + x = self.avg_pool(x).flatten(1) + x = self.dropout(x) + x = self.fc(x) + return x + + + + +class DeepFakeClassifierGWAP(nn.Module): + def __init__(self, encoder, dropout_rate=0.5) -> None: + super().__init__() + self.encoder = encoder_params[encoder]["init_op"]() + self.avg_pool = GlobalWeightedAvgPool2d(encoder_params[encoder]["features"]) + self.dropout = Dropout(dropout_rate) + self.fc = Linear(encoder_params[encoder]["features"], 1) + + def forward(self, x): + x = self.encoder.forward_features(x) + x = self.avg_pool(x).flatten(1) + x = self.dropout(x) + x = self.fc(x) + return x \ No newline at end of file diff --git a/training/zoo/unet.py b/training/zoo/unet.py new file mode 100644 index 0000000000000000000000000000000000000000..e190ea7f341a176d44aa5da472a2753502163b77 --- /dev/null +++ b/training/zoo/unet.py @@ -0,0 +1,151 @@ +from functools import partial + +import torch +from timm.models.efficientnet import tf_efficientnet_b3_ns, tf_efficientnet_b5_ns +from torch import nn +from torch.nn import Dropout2d, Conv2d +from torch.nn.modules.dropout import Dropout +from torch.nn.modules.linear import Linear +from torch.nn.modules.pooling import AdaptiveAvgPool2d +from torch.nn.modules.upsampling import UpsamplingBilinear2d + +encoder_params = { + "tf_efficientnet_b3_ns": { + "features": 1536, + "filters": [40, 32, 48, 136, 1536], + "decoder_filters": [64, 128, 256, 256], + "init_op": partial(tf_efficientnet_b3_ns, pretrained=True, drop_path_rate=0.2) + }, + "tf_efficientnet_b5_ns": { + "features": 2048, + "filters": [48, 40, 64, 176, 2048], + "decoder_filters": [64, 128, 256, 256], + "init_op": partial(tf_efficientnet_b5_ns, pretrained=True, drop_path_rate=0.2) + }, +} + + +class DecoderBlock(nn.Module): + def __init__(self, in_channels, out_channels): + super().__init__() + self.layer = nn.Sequential( + nn.Upsample(scale_factor=2), + nn.Conv2d(in_channels, out_channels, 3, padding=1), + nn.ReLU(inplace=True) + ) + + def forward(self, x): + return self.layer(x) + + +class ConcatBottleneck(nn.Module): + def __init__(self, in_channels, out_channels): + super().__init__() + self.seq = nn.Sequential( + nn.Conv2d(in_channels, out_channels, 3, padding=1), + nn.ReLU(inplace=True) + ) + + def forward(self, dec, enc): + x = torch.cat([dec, enc], dim=1) + return self.seq(x) + + +class Decoder(nn.Module): + def __init__(self, decoder_filters, filters, upsample_filters=None, + decoder_block=DecoderBlock, bottleneck=ConcatBottleneck, dropout=0): + super().__init__() + self.decoder_filters = decoder_filters + self.filters = filters + self.decoder_block = decoder_block + self.decoder_stages = nn.ModuleList([self._get_decoder(idx) for idx in range(0, len(decoder_filters))]) + self.bottlenecks = nn.ModuleList([bottleneck(self.filters[-i - 2] + f, f) + for i, f in enumerate(reversed(decoder_filters))]) + self.dropout = Dropout2d(dropout) if dropout > 0 else None + self.last_block = None + if upsample_filters: + self.last_block = decoder_block(decoder_filters[0], out_channels=upsample_filters) + else: + self.last_block = UpsamplingBilinear2d(scale_factor=2) + + def forward(self, encoder_results: list): + x = encoder_results[0] + bottlenecks = self.bottlenecks + for idx, bottleneck in enumerate(bottlenecks): + rev_idx = - (idx + 1) + x = self.decoder_stages[rev_idx](x) + x = bottleneck(x, encoder_results[-rev_idx]) + if self.last_block: + x = self.last_block(x) + if self.dropout: + x = self.dropout(x) + return x + + def _get_decoder(self, layer): + idx = layer + 1 + if idx == len(self.decoder_filters): + in_channels = self.filters[idx] + else: + in_channels = self.decoder_filters[idx] + return self.decoder_block(in_channels, self.decoder_filters[max(layer, 0)]) + + +def _initialize_weights(module): + for m in module.modules(): + if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d) or isinstance(m, nn.Linear): + m.weight.data = nn.init.kaiming_normal_(m.weight.data) + if m.bias is not None: + m.bias.data.zero_() + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + + +class EfficientUnetClassifier(nn.Module): + def __init__(self, encoder, dropout_rate=0.5) -> None: + super().__init__() + self.decoder = Decoder(decoder_filters=encoder_params[encoder]["decoder_filters"], + filters=encoder_params[encoder]["filters"]) + self.avg_pool = AdaptiveAvgPool2d((1, 1)) + self.dropout = Dropout(dropout_rate) + self.fc = Linear(encoder_params[encoder]["features"], 1) + self.final = Conv2d(encoder_params[encoder]["decoder_filters"][0], out_channels=1, kernel_size=1, bias=False) + _initialize_weights(self) + self.encoder = encoder_params[encoder]["init_op"]() + + def get_encoder_features(self, x): + encoder_results = [] + x = self.encoder.conv_stem(x) + x = self.encoder.bn1(x) + x = self.encoder.act1(x) + encoder_results.append(x) + x = self.encoder.blocks[:2](x) + encoder_results.append(x) + x = self.encoder.blocks[2:3](x) + encoder_results.append(x) + x = self.encoder.blocks[3:5](x) + encoder_results.append(x) + x = self.encoder.blocks[5:](x) + x = self.encoder.conv_head(x) + x = self.encoder.bn2(x) + x = self.encoder.act2(x) + encoder_results.append(x) + encoder_results = list(reversed(encoder_results)) + return encoder_results + + def forward(self, x): + encoder_results = self.get_encoder_features(x) + seg = self.final(self.decoder(encoder_results)) + x = encoder_results[0] + x = self.avg_pool(x).flatten(1) + x = self.dropout(x) + x = self.fc(x) + return x, seg + + +if __name__ == '__main__': + model = EfficientUnetClassifier("tf_efficientnet_b5_ns") + model.eval() + with torch.no_grad(): + input = torch.rand(4, 3, 224, 224) + print(model(input))