Spaces:

22GC22
/

deepsaif

Running

App Files Files Community

22GC22 commited on Jan 19

Commit

2fbad05

verified ·

1 Parent(s): 1884edd

Upload 17 files

Browse files

Files changed (17) hide show

app.py +195 -0
classifiers.py +136 -0
networks/drn.py +416 -0
networks/drn_seg.py +95 -0
pipeline.py +172 -0
requirements.txt +40 -0
utils/__init__.py +0 -0
utils/__pycache__/__init__.cpython-312.pyc +0 -0
utils/__pycache__/preprocessing.cpython-312.pyc +0 -0
utils/__pycache__/tools.cpython-312.pyc +0 -0
utils/__pycache__/visualization.cpython-312.pyc +0 -0
utils/__pycache__/visualize.cpython-312.pyc +0 -0
utils/download_weights.py +45 -0
utils/preprocessing.py +98 -0
utils/tools.py +143 -0
utils/visualization.py +14 -0
utils/visualize.py +61 -0

app.py ADDED Viewed

	@@ -0,0 +1,195 @@

+import streamlit as st
+from PIL import Image
+from models import mesonet, mesoinception, fal_detector, local_detector
+from utils.visualization import display_results
+from utils.preprocessing import preprocess_image, preprocess_video
+from utils.preprocessing import preprocess_image, generate_local_image
+import numpy as np
+import cv2
+import tempfile
+import os
+# Initialize session state for tabs and uploaded files
+if "active_tab" not in st.session_state:
+    st.session_state["active_tab"] = "Face Photoshop Detection"
+if "uploaded_file" not in st.session_state:
+    st.session_state["uploaded_file"] = None
+# Load models
+models = {
+    "MesoNet": mesonet.load_mesonet("models/weights/Meso4_DF.h5"),
+    "MesoInception": mesoinception.load_mesonetInception("models/weights/MesoInception_DF.h5"),
+    "Photoshop FALdetector Global": fal_detector.load_fal_detector("models/weights/global.pth"),
+    "Photoshop FALdetector Local": local_detector.load_local_detector("models/weights/local.pth", gpu_id=-1),
+}
+st.title("DeepSAIF")
+# Create tabs for different functionalities
+tab1, tab2, tab3 = st.tabs(["Face Photoshop Detection", "DeepFake Detection for Images", "DeepFake Detection for Videos"])
+# Tab 1: Photoshop Detection
+with tab1:
+    if st.session_state["active_tab"] != "Face Photoshop Detection":
+        st.session_state["uploaded_file"] = None
+        st.session_state["active_tab"] = "Face Photoshop Detection"
+    st.header("Face Photoshop Detection")
+    uploaded_file = st.file_uploader("Upload an Image", type=["jpg", "png"], key="photoshop")
+    if uploaded_file:
+        st.session_state["uploaded_file"] = uploaded_file
+        image = Image.open(uploaded_file).convert("RGB")
+        st.image(image, caption="Uploaded Image", use_column_width=True)
+        local_image = generate_local_image(image)
+        # Run inference on all models
+        results = {}
+        for model_name, model in models.items():
+            if model_name == "Photoshop FALdetector Global":
+                results[model_name] = fal_detector.predict_fal_detector(model, image)
+            elif model_name == "Photoshop FALdetector Local":
+                heatmap_path, prediction = local_detector.predict_and_generate_heatmap(model, image)
+                if heatmap_path:
+                    # Display the heatmap using Streamlit
+                    st.image(heatmap_path, caption=f"Heatmap for {model_name}", use_container_width=True)
+                    # Delete the temporary heatmap file after display
+                    os.remove(heatmap_path)
+                    os.remove('cropped_input.jpg')
+                    os.remove('warped.jpg')
+                else:
+                    st.error(f"Failed to generate heatmap for {model_name}")
+                results[model_name] = prediction
+            # elif model_name == "Global Classifier":
+            #     results[model_name] = global_classifier.classify_fake(model, image)
+        # Display results
+        display_results(results)
+# Tab 2: DeepFake Detection for Images
+with tab2:
+    if st.session_state["active_tab"] != "DeepFake Detection for Images":
+        st.session_state["uploaded_file"] = None
+        st.session_state["active_tab"] = "DeepFake Detection for Images"
+    st.header("DeepFake Detection for Images")
+    uploaded_file = st.file_uploader("Upload an Image", type=["jpg", "png"], key="deepfake_image")
+    if uploaded_file:
+        st.session_state["uploaded_file"] = uploaded_file
+        image = Image.open(uploaded_file).convert("RGB")
+        st.image(image, caption="Uploaded Image", use_column_width=True)
+        local_image = generate_local_image(image)
+        # Preprocess the image
+        # preprocessed_image = preprocess_image(uploaded_file)
+        # Run inference on all models
+        results = {}
+        for model_name, model in models.items():
+            if model_name == "MesoNet":
+                results[model_name] = mesonet.predict_mesonet(model, image)
+            elif model_name == "MesoInception":
+                results[model_name] = mesoinception.predict_mesonetInception(model, image)
+        # Display results
+        display_results(results)
+def confident_strategy(pred, t=0.8):
+    """
+    Implements the confident averaging strategy for predictions.
+    Args:
+        pred (list[float]): List of predictions for each frame.
+        t (float): Threshold for high-confidence fake detection.
+    Returns:
+        float: Final confidence score for the video.
+    """
+    if len(pred) == 0:
+        return np.nan
+    pred = np.array(pred)
+    sz = len(pred)
+    fakes = np.count_nonzero(pred > t)
+    if fakes > sz // 2.5 and fakes > 11:
+        return np.mean(pred[pred > t])
+    elif np.count_nonzero(pred < 0.2) > 0.9 * sz:
+        return np.mean(pred[pred < 0.2])
+    else:
+        return np.mean(pred)
+# Tab 3: DeepFake Detection for Videos
+with tab3:
+    if st.session_state["active_tab"] != "DeepFake Detection for Videos":
+        st.session_state["uploaded_file"] = None
+        st.session_state["active_tab"] = "DeepFake Detection for Videos"
+    st.header("DeepFake Detection for Videos")
+    uploaded_file = st.file_uploader("Upload Video", type=["mp4", "avi", "mov"], key="deepfake_video")
+    if uploaded_file:
+        st.session_state["uploaded_file"] = uploaded_file
+        with st.spinner("Processing video..."):
+            # Save uploaded file to a temporary location
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_file:
+                temp_file.write(uploaded_file.getbuffer())
+                video_path = temp_file.name
+            try:
+                # Test video accessibility
+                cap = cv2.VideoCapture(video_path)
+                if not cap.isOpened():
+                    st.error("Failed to open video file.")
+                else:
+                    st.success("Video file opened successfully!")
+                # Extract frames from video
+                frames = preprocess_video(video_path, frame_count=32)
+                if len(frames) == 0:
+                    st.error("Failed to extract frames from the video.")
+                else:
+                    # st.success(f"Extracted {len(frames)} frames.")
+                    # for frame in frames[:5]:  # Display first 5 frames
+                    #     st.image(frame, caption="Extracted Frame")
+                    # Dictionary to store model predictions
+                    model_results = {
+                        "MesoNet": [],
+                        "Photoshop FALdetector Global": []
+                    }
+                    # Iterate over frames and make predictions for each model
+                    for frame in frames:
+                        preprocessed_frame = preprocess_image(frame)  # Preprocess frame
+                        local_image = generate_local_image(preprocessed_frame)
+                        # Predictions for MesoNet and Photoshop FALdetector Global
+                        model_results["MesoNet"].append(
+                            mesonet.predict_mesonet(models["MesoNet"], preprocessed_frame)
+                        )
+                        model_results["Photoshop FALdetector Global"].append(
+                            fal_detector.predict_fal_detector(models["Photoshop FALdetector Global"], local_image)
+                        )
+                    # Apply the confident averaging strategy for each model
+                    final_results = {}
+                    for model_name, predictions in model_results.items():
+                        final_results[model_name] = confident_strategy(predictions)
+                    # Display results
+                    st.write("### Video Analysis Results")
+                    display_results(final_results)
+                    # Optionally show detailed frame predictions per model
+                    if st.checkbox("Show Detailed Frame Predictions"):
+                        for model_name, predictions in model_results.items():
+                            st.write(f"### Predictions for {model_name}")
+                            st.bar_chart(predictions)
+            finally:
+                # Clean up temporary file
+                os.remove(video_path)

classifiers.py ADDED Viewed

	@@ -0,0 +1,136 @@

+# -*- coding:utf-8 -*-
+from tensorflow.keras.models import Model as KerasModel
+from tensorflow.keras.layers import Input, Dense, Flatten, Conv2D, MaxPooling2D, BatchNormalization, Dropout, Reshape, Concatenate, LeakyReLU
+from tensorflow.keras.optimizers import Adam
+IMGWIDTH = 256
+class Classifier:
+    def __init__():
+        self.model = 0
+    def predict(self, x):
+        if x.size == 0:
+            return []
+        return self.model.predict(x)
+    def fit(self, x, y):
+        return self.model.train_on_batch(x, y)
+    def get_accuracy(self, x, y):
+        return self.model.test_on_batch(x, y)
+    def load(self, path):
+        self.model.load_weights(path)
+class Meso1(Classifier):
+    """
+    Feature extraction + Classification
+    """
+    def __init__(self, learning_rate = 0.001, dl_rate = 1):
+        self.model = self.init_model(dl_rate)
+        optimizer = Adam(lr = learning_rate)
+        self.model.compile(optimizer = optimizer, loss = 'mean_squared_error', metrics = ['accuracy'])
+    def init_model(self, dl_rate):
+        x = Input(shape = (IMGWIDTH, IMGWIDTH, 3))
+        x1 = Conv2D(16, (3, 3), dilation_rate = dl_rate, strides = 1, padding='same', activation = 'relu')(x)
+        x1 = Conv2D(4, (1, 1), padding='same', activation = 'relu')(x1)
+        x1 = BatchNormalization()(x1)
+        x1 = MaxPooling2D(pool_size=(8, 8), padding='same')(x1)
+        y = Flatten()(x1)
+        y = Dropout(0.5)(y)
+        y = Dense(1, activation = 'sigmoid')(y)
+        return KerasModel(inputs = x, outputs = y)
+class Meso4(Classifier):
+    def __init__(self, learning_rate = 0.001):
+        self.model = self.init_model()
+        optimizer = Adam(learning_rate = learning_rate)
+        self.model.compile(optimizer = optimizer, loss = 'mean_squared_error', metrics = ['accuracy'])
+    def init_model(self):
+        x = Input(shape = (IMGWIDTH, IMGWIDTH, 3))
+        x1 = Conv2D(8, (3, 3), padding='same', activation = 'relu')(x)
+        x1 = BatchNormalization()(x1)
+        x1 = MaxPooling2D(pool_size=(2, 2), padding='same')(x1)
+        x2 = Conv2D(8, (5, 5), padding='same', activation = 'relu')(x1)
+        x2 = BatchNormalization()(x2)
+        x2 = MaxPooling2D(pool_size=(2, 2), padding='same')(x2)
+        x3 = Conv2D(16, (5, 5), padding='same', activation = 'relu')(x2)
+        x3 = BatchNormalization()(x3)
+        x3 = MaxPooling2D(pool_size=(2, 2), padding='same')(x3)
+        x4 = Conv2D(16, (5, 5), padding='same', activation = 'relu')(x3)
+        x4 = BatchNormalization()(x4)
+        x4 = MaxPooling2D(pool_size=(4, 4), padding='same')(x4)
+        y = Flatten()(x4)
+        y = Dropout(0.5)(y)
+        y = Dense(16)(y)
+        y = LeakyReLU(negative_slope=0.1)(y)
+        y = Dropout(0.5)(y)
+        y = Dense(1, activation = 'sigmoid')(y)
+        return KerasModel(inputs = x, outputs = y)
+class MesoInception4(Classifier):
+    def __init__(self, learning_rate = 0.001):
+        self.model = self.init_model()
+        optimizer = Adam(learning_rate = learning_rate)
+        self.model.compile(optimizer = optimizer, loss = 'mean_squared_error', metrics = ['accuracy'])
+    def InceptionLayer(self, a, b, c, d):
+        def func(x):
+            x1 = Conv2D(a, (1, 1), padding='same', activation='relu')(x)
+            x2 = Conv2D(b, (1, 1), padding='same', activation='relu')(x)
+            x2 = Conv2D(b, (3, 3), padding='same', activation='relu')(x2)
+            x3 = Conv2D(c, (1, 1), padding='same', activation='relu')(x)
+            x3 = Conv2D(c, (3, 3), dilation_rate = 2, strides = 1, padding='same', activation='relu')(x3)
+            x4 = Conv2D(d, (1, 1), padding='same', activation='relu')(x)
+            x4 = Conv2D(d, (3, 3), dilation_rate = 3, strides = 1, padding='same', activation='relu')(x4)
+            y = Concatenate(axis = -1)([x1, x2, x3, x4])
+            return y
+        return func
+    def init_model(self):
+        x = Input(shape = (IMGWIDTH, IMGWIDTH, 3))
+        x1 = self.InceptionLayer(1, 4, 4, 2)(x)
+        x1 = BatchNormalization()(x1)
+        x1 = MaxPooling2D(pool_size=(2, 2), padding='same')(x1)
+        x2 = self.InceptionLayer(2, 4, 4, 2)(x1)
+        x2 = BatchNormalization()(x2)
+        x2 = MaxPooling2D(pool_size=(2, 2), padding='same')(x2)
+        x3 = Conv2D(16, (5, 5), padding='same', activation = 'relu')(x2)
+        x3 = BatchNormalization()(x3)
+        x3 = MaxPooling2D(pool_size=(2, 2), padding='same')(x3)
+        x4 = Conv2D(16, (5, 5), padding='same', activation = 'relu')(x3)
+        x4 = BatchNormalization()(x4)
+        x4 = MaxPooling2D(pool_size=(4, 4), padding='same')(x4)
+        y = Flatten()(x4)
+        y = Dropout(0.5)(y)
+        y = Dense(16)(y)
+        y = LeakyReLU(negative_slope=0.1)(y)
+        y = Dropout(0.5)(y)
+        y = Dense(1, activation = 'sigmoid')(y)
+        return KerasModel(inputs = x, outputs = y)

networks/drn.py ADDED Viewed

	@@ -0,0 +1,416 @@

+import pdb
+import torch
+import torch.nn as nn
+import math
+import torch.utils.model_zoo as model_zoo
+torch.backends.cudnn.benchmark = True
+BatchNorm = nn.BatchNorm2d
+# __all__ = ['DRN', 'drn26', 'drn42', 'drn58']
+webroot = 'https://tigress-web.princeton.edu/~fy/drn/models/'
+model_urls = {
+    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
+    'drn-c-26': webroot + 'drn_c_26-ddedf421.pth',
+    'drn-c-42': webroot + 'drn_c_42-9d336e8c.pth',
+    'drn-c-58': webroot + 'drn_c_58-0a53a92c.pth',
+    'drn-d-22': webroot + 'drn_d_22-4bd2f8ea.pth',
+    'drn-d-38': webroot + 'drn_d_38-eebb45f0.pth',
+    'drn-d-54': webroot + 'drn_d_54-0e0534ff.pth',
+    'drn-d-105': webroot + 'drn_d_105-12b40979.pth'
+}
+def conv3x3(in_planes, out_planes, stride=1, padding=1, dilation=1):
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=padding, bias=False, dilation=dilation)
+class BasicBlock(nn.Module):
+    expansion = 1
+    def __init__(self, inplanes, planes, stride=1, downsample=None,
+                 dilation=(1, 1), residual=True):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride,
+                             padding=dilation[0], dilation=dilation[0])
+        self.bn1 = BatchNorm(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes,
+                             padding=dilation[1], dilation=dilation[1])
+        self.bn2 = BatchNorm(planes)
+        self.downsample = downsample
+        self.stride = stride
+        self.residual = residual
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        if self.residual:
+            out += residual
+        out = self.relu(out)
+        return out
+class Bottleneck(nn.Module):
+    expansion = 4
+    def __init__(self, inplanes, planes, stride=1, downsample=None,
+                 dilation=(1, 1), residual=True):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = BatchNorm(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
+                               padding=dilation[1], bias=False,
+                               dilation=dilation[1])
+        self.bn2 = BatchNorm(planes)
+        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
+        self.bn3 = BatchNorm(planes * 4)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+class DRN(nn.Module):
+    def __init__(self, block, layers, num_classes=1000,
+                 channels=(16, 32, 64, 128, 256, 512, 512, 512),
+                 out_map=False, out_middle=False, pool_size=28, arch='D'):
+        super(DRN, self).__init__()
+        self.inplanes = channels[0]
+        self.out_map = out_map
+        self.out_dim = channels[-1]
+        self.out_middle = out_middle
+        self.arch = arch
+        if arch == 'C':
+            self.conv1 = nn.Conv2d(3, channels[0], kernel_size=7, stride=1,
+                                   padding=3, bias=False)
+            self.bn1 = BatchNorm(channels[0])
+            self.relu = nn.ReLU(inplace=True)
+            self.layer1 = self._make_layer(
+                BasicBlock, channels[0], layers[0], stride=1)
+            self.layer2 = self._make_layer(
+                BasicBlock, channels[1], layers[1], stride=2)
+        elif arch == 'D':
+            self.layer0 = nn.Sequential(
+                nn.Conv2d(3, channels[0], kernel_size=7, stride=1, padding=3,
+                          bias=False),
+                BatchNorm(channels[0]),
+                nn.ReLU(inplace=True)
+            )
+            self.layer1 = self._make_conv_layers(
+                channels[0], layers[0], stride=1)
+            self.layer2 = self._make_conv_layers(
+                channels[1], layers[1], stride=2)
+        self.layer3 = self._make_layer(block, channels[2], layers[2], stride=2)
+        self.layer4 = self._make_layer(block, channels[3], layers[3], stride=2)
+        self.layer5 = self._make_layer(block, channels[4], layers[4],
+                                       dilation=2, new_level=False)
+        self.layer6 = None if layers[5] == 0 else \
+            self._make_layer(block, channels[5], layers[5], dilation=4,
+                             new_level=False)
+        if arch == 'C':
+            self.layer7 = None if layers[6] == 0 else \
+                self._make_layer(BasicBlock, channels[6], layers[6], dilation=2,
+                                 new_level=False, residual=False)
+            self.layer8 = None if layers[7] == 0 else \
+                self._make_layer(BasicBlock, channels[7], layers[7], dilation=1,
+                                 new_level=False, residual=False)
+        elif arch == 'D':
+            self.layer7 = None if layers[6] == 0 else \
+                self._make_conv_layers(channels[6], layers[6], dilation=2)
+            self.layer8 = None if layers[7] == 0 else \
+                self._make_conv_layers(channels[7], layers[7], dilation=1)
+        if num_classes > 0:
+            self.avgpool = nn.AvgPool2d(pool_size)
+            self.fc = nn.Conv2d(self.out_dim, num_classes, kernel_size=1,
+                                stride=1, padding=0, bias=True)
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+            elif isinstance(m, BatchNorm):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+    def _make_layer(self, block, planes, blocks, stride=1, dilation=1,
+                    new_level=True, residual=True):
+        assert dilation == 1 or dilation % 2 == 0
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes, planes * block.expansion,
+                          kernel_size=1, stride=stride, bias=False),
+                BatchNorm(planes * block.expansion),
+            )
+        layers = list()
+        layers.append(block(
+            self.inplanes, planes, stride, downsample,
+            dilation=(1, 1) if dilation == 1 else (
+                dilation // 2 if new_level else dilation, dilation),
+            residual=residual))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes, residual=residual,
+                                dilation=(dilation, dilation)))
+        return nn.Sequential(*layers)
+    def _make_conv_layers(self, channels, convs, stride=1, dilation=1):
+        modules = []
+        for i in range(convs):
+            modules.extend([
+                nn.Conv2d(self.inplanes, channels, kernel_size=3,
+                          stride=stride if i == 0 else 1,
+                          padding=dilation, bias=False, dilation=dilation),
+                BatchNorm(channels),
+                nn.ReLU(inplace=True)])
+            self.inplanes = channels
+        return nn.Sequential(*modules)
+    def forward(self, x):
+        y = list()
+        if self.arch == 'C':
+            x = self.conv1(x)
+            x = self.bn1(x)
+            x = self.relu(x)
+        elif self.arch == 'D':
+            x = self.layer0(x)
+        x = self.layer1(x)
+        y.append(x)
+        x = self.layer2(x)
+        y.append(x)
+        x = self.layer3(x)
+        y.append(x)
+        x = self.layer4(x)
+        y.append(x)
+        x = self.layer5(x)
+        y.append(x)
+        if self.layer6 is not None:
+            x = self.layer6(x)
+            y.append(x)
+        if self.layer7 is not None:
+            x = self.layer7(x)
+            y.append(x)
+        if self.layer8 is not None:
+            x = self.layer8(x)
+            y.append(x)
+        if self.out_map:
+            x = self.fc(x)
+        else:
+            x = self.avgpool(x)
+            x = self.fc(x)
+            x = x.view(x.size(0), -1)
+        if self.out_middle:
+            return x, y
+        else:
+            return x
+class DRN_A(nn.Module):
+    def __init__(self, block, layers, num_classes=1000):
+        self.inplanes = 64
+        super(DRN_A, self).__init__()
+        self.out_dim = 512 * block.expansion
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
+                               bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=1,
+                                       dilation=2)
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=1,
+                                       dilation=4)
+        self.avgpool = nn.AvgPool2d(28, stride=1)
+        self.fc = nn.Linear(512 * block.expansion, num_classes)
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+            elif isinstance(m, BatchNorm):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+        # for m in self.modules():
+        #     if isinstance(m, nn.Conv2d):
+        #         nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+        #     elif isinstance(m, nn.BatchNorm2d):
+        #         nn.init.constant_(m.weight, 1)
+        #         nn.init.constant_(m.bias, 0)
+    def _make_layer(self, block, planes, blocks, stride=1, dilation=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes, planes * block.expansion,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(planes * block.expansion),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes,
+                                dilation=(dilation, dilation)))
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        x = self.avgpool(x)
+        x = x.view(x.size(0), -1)
+        x = self.fc(x)
+        return x
+def drn_a_50(pretrained=False, **kwargs):
+    model = DRN_A(Bottleneck, [3, 4, 6, 3], **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
+    return model
+def drn_c_26(pretrained=False, **kwargs):
+    model = DRN(BasicBlock, [1, 1, 2, 2, 2, 2, 1, 1], arch='C', **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['drn-c-26']))
+    return model
+def drn_c_42(pretrained=False, **kwargs):
+    model = DRN(BasicBlock, [1, 1, 3, 4, 6, 3, 1, 1], arch='C', **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['drn-c-42']))
+    return model
+def drn_c_58(pretrained=False, **kwargs):
+    model = DRN(Bottleneck, [1, 1, 3, 4, 6, 3, 1, 1], arch='C', **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['drn-c-58']))
+    return model
+def drn_d_22(pretrained=False, **kwargs):
+    model = DRN(BasicBlock, [1, 1, 2, 2, 2, 2, 1, 1], arch='D', **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['drn-d-22']))
+    return model
+def drn_d_24(pretrained=False, **kwargs):
+    model = DRN(BasicBlock, [1, 1, 2, 2, 2, 2, 2, 2], arch='D', **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['drn-d-24']))
+    return model
+def drn_d_38(pretrained=False, **kwargs):
+    model = DRN(BasicBlock, [1, 1, 3, 4, 6, 3, 1, 1], arch='D', **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['drn-d-38']))
+    return model
+def drn_d_40(pretrained=False, **kwargs):
+    model = DRN(BasicBlock, [1, 1, 3, 4, 6, 3, 2, 2], arch='D', **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['drn-d-40']))
+    return model
+def drn_d_54(pretrained=False, **kwargs):
+    model = DRN(Bottleneck, [1, 1, 3, 4, 6, 3, 1, 1], arch='D', **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['drn-d-54']))
+    return model
+def drn_d_56(pretrained=False, **kwargs):
+    model = DRN(Bottleneck, [1, 1, 3, 4, 6, 3, 2, 2], arch='D', **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['drn-d-56']))
+    return model
+def drn_d_105(pretrained=False, **kwargs):
+    model = DRN(Bottleneck, [1, 1, 3, 4, 23, 3, 1, 1], arch='D', **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['drn-d-105']))
+    return model
+def drn_d_107(pretrained=False, **kwargs):
+    model = DRN(Bottleneck, [1, 1, 3, 4, 23, 3, 2, 2], arch='D', **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['drn-d-107']))
+    return model

networks/drn_seg.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import math
+import torch
+import torch.nn as nn
+from networks.drn import drn_c_26
+def fill_up_weights(up):
+    w = up.weight.data
+    f = math.ceil(w.size(2) / 2)
+    c = (2 * f - 1 - f % 2) / (2. * f)
+    for i in range(w.size(2)):
+        for j in range(w.size(3)):
+            w[0, 0, i, j] = \
+                (1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c))
+    for c in range(1, w.size(0)):
+        w[c, 0, :, :] = w[0, 0, :, :]
+class DRNSeg(nn.Module):
+    def __init__(self, classes, pretrained_drn=False,
+            pretrained_model=None, use_torch_up=False):
+        super(DRNSeg, self).__init__()
+        model = drn_c_26(pretrained=pretrained_drn)
+        self.base = nn.Sequential(*list(model.children())[:-2])
+        if pretrained_model:
+            self.load_pretrained(pretrained_model)
+        self.seg = nn.Conv2d(model.out_dim, classes,
+                             kernel_size=1, bias=True)
+        m = self.seg
+        n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+        m.weight.data.normal_(0, math.sqrt(2. / n))
+        m.bias.data.zero_()
+        if use_torch_up:
+            self.up = nn.UpsamplingBilinear2d(scale_factor=8)
+        else:
+            up = nn.ConvTranspose2d(classes, classes, 16, stride=8, padding=4,
+                                    output_padding=0, groups=classes,
+                                    bias=False)
+            fill_up_weights(up)
+            up.weight.requires_grad = False
+            self.up = up
+    def forward(self, x):
+        x = self.base(x)
+        x = self.seg(x)
+        y = self.up(x)
+        return y
+    def optim_parameters(self, memo=None):
+        for param in self.base.parameters():
+            yield param
+        for param in self.seg.parameters():
+            yield param
+    def load_pretrained(self, pretrained_model):
+        print("loading the pretrained drn model from %s" % pretrained_model)
+        state_dict = torch.load(pretrained_model, map_location='cpu')
+        if hasattr(state_dict, '_metadata'):
+            del state_dict._metadata
+        # filter out unnecessary keys
+        pretrained_dict = state_dict['model']
+        pretrained_dict = {k[5:]: v for k, v in pretrained_dict.items() if k.split('.')[0] == 'base'}
+        # load the pretrained state dict
+        self.base.load_state_dict(pretrained_dict)
+class DRNSub(nn.Module):
+    def __init__(self, num_classes, pretrained_model=None, fix_base=False):
+        super(DRNSub, self).__init__()
+        drnseg = DRNSeg(2)
+        if pretrained_model:
+            print("loading the pretrained drn model from %s" % pretrained_model)
+            state_dict = torch.load(pretrained_model, map_location='cpu')
+            drnseg.load_state_dict(state_dict['model'])
+        self.base = drnseg.base
+        if fix_base:
+            for param in self.base.parameters():
+                param.requires_grad = False
+        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+        self.fc = nn.Linear(512, num_classes)
+    def forward(self, x):
+        x = self.base(x)
+        x = self.avgpool(x)
+        x = x.view(x.size(0), -1)
+        x = self.fc(x)
+        return x

pipeline.py ADDED Viewed

	@@ -0,0 +1,172 @@

+from os import listdir
+from os.path import isfile, join
+import numpy as np
+from math import floor
+from scipy.ndimage.interpolation import zoom, rotate
+import imageio
+import cv2
+from os.path import join
+## Face extraction
+class Video:
+    def __init__(self, path):
+        self.path = path
+        self.container = imageio.get_reader(path, 'ffmpeg')
+        self.length = self.container.count_frames()
+        self.fps = self.container.get_meta_data()['fps']
+    def init_head(self):
+        self.container.set_image_index(0)
+    def next_frame(self):
+        self.container.get_next_data()
+    def get(self, key):
+        return self.container.get_data(key)
+    def __call__(self, key):
+        return self.get(key)
+    def __len__(self):
+        return self.length
+class FaceFinder(Video):
+    def __init__(self, path, load_first_face=True):
+        super().__init__(path)
+        self.faces = {}
+        self.coordinates = {}  # stores the face (locations center, rotation, length)
+        self.last_frame = self.get(0)
+        self.frame_shape = self.last_frame.shape[:2]
+        self.last_location = (0, 200, 200, 0)
+        # Initialize OpenCV's Haar Cascade for face detection
+        self.face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
+        if load_first_face:
+            face_positions = self.detect_faces(self.last_frame)
+            if len(face_positions) > 0:
+                self.last_location = self.expand_location_zone(face_positions[0])
+    def detect_faces(self, frame, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30)):
+        """Detect faces using Haar Cascade."""
+        gray_frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
+        faces = self.face_cascade.detectMultiScale(gray_frame, scaleFactor=scaleFactor, minNeighbors=minNeighbors, minSize=minSize)
+        return faces
+    def expand_location_zone(self, loc, margin=0.2):
+        """Adds a margin around a frame slice."""
+        x, y, w, h = loc
+        offset_x = round(margin * w)
+        offset_y = round(margin * h)
+        y0 = max(y - offset_y, 0)
+        x1 = min(x + w + offset_x, self.frame_shape[1])
+        y1 = min(y + h + offset_y, self.frame_shape[0])
+        x0 = max(x - offset_x, 0)
+        return (y0, x1, y1, x0)
+    def find_faces(self, resize=0.5, stop=0, skipstep=0, cut_left=0, cut_right=-1):
+        """The core function to extract faces from frames."""
+        # Frame iteration setup
+        if stop != 0:
+            finder_frameset = range(0, min(self.length, stop), skipstep + 1)
+        else:
+            finder_frameset = range(0, self.length, skipstep + 1)
+        # Loop through frames
+        for i in finder_frameset:
+            frame = self.get(i)
+            if cut_left != 0 or cut_right != -1:
+                frame[:, :cut_left] = 0
+                frame[:, cut_right:] = 0
+            # Detect faces in the current frame
+            face_positions = self.detect_faces(frame)
+            if len(face_positions) > 0:
+                # Use the largest detected face
+                largest_face = max(face_positions, key=lambda f: f[2] * f[3])
+                self.faces[i] = self.expand_location_zone(largest_face)
+                self.last_location = self.faces[i]
+            else:
+                print(f"No face detected in frame {i}")
+        print(f"Face extraction completed: {len(self.faces)} faces detected.")
+    def get_face(self, i):
+        """Extract the face region for the given frame index."""
+        frame = self.get(i)
+        if i in self.faces:
+            y0, x1, y1, x0 = self.faces[i]
+            return frame[y0:y1, x0:x1]
+        return frame
+## Face prediction
+class FaceBatchGenerator:
+    '''
+    Made to deal with framesubsets of video.
+    '''
+    def __init__(self, face_finder, target_size = 256):
+        self.finder = face_finder
+        self.target_size = target_size
+        self.head = 0
+        self.length = int(face_finder.length)
+    def resize_patch(self, patch):
+        m, n = patch.shape[:2]
+        return zoom(patch, (self.target_size / m, self.target_size / n, 1))
+    def next_batch(self, batch_size = 50):
+        batch = np.zeros((1, self.target_size, self.target_size, 3))
+        stop = min(self.head + batch_size, self.length)
+        i = 0
+        while (i < batch_size) and (self.head < self.length):
+            if self.head in self.finder.coordinates:
+                patch = self.finder.get_aligned_face(self.head)
+                batch = np.concatenate((batch, np.expand_dims(self.resize_patch(patch), axis = 0)),
+                                        axis = 0)
+                i += 1
+            self.head += 1
+        return batch[1:]
+def predict_faces(generator, classifier, batch_size = 50, output_size = 1):
+    '''
+    Compute predictions for a face batch generator
+    '''
+    n = len(generator.finder.coordinates.items())
+    profile = np.zeros((1, output_size))
+    for epoch in range(n // batch_size + 1):
+        face_batch = generator.next_batch(batch_size = batch_size)
+        prediction = classifier.predict(face_batch)
+        if (len(prediction) > 0):
+            profile = np.concatenate((profile, prediction))
+    return profile[1:]
+def compute_accuracy(classifier, dirname, frame_subsample_count = 30):
+    '''
+    Extraction + Prediction over a video
+    '''
+    filenames = [f for f in listdir(dirname) if isfile(join(dirname, f)) and ((f[-4:] == '.mp4') or (f[-4:] == '.avi') or (f[-4:] == '.mov'))]
+    predictions = {}
+    for vid in filenames:
+        print('Dealing with video ', vid)
+        # Compute face locations and store them in the face finder
+        face_finder = FaceFinder(join(dirname, vid), load_first_face = False)
+        skipstep = max(floor(face_finder.length / frame_subsample_count), 0)
+        face_finder.find_faces(resize=0.5, skipstep = skipstep)
+        print('Predicting ', vid)
+        gen = FaceBatchGenerator(face_finder)
+        p = predict_faces(gen, classifier)
+        predictions[vid[:-4]] = (np.mean(p > 0.5), p)
+    return predictions

requirements.txt ADDED Viewed

	@@ -0,0 +1,40 @@

+absl-py
+altair
+beautifulsoup4
+cachetools
+certifi
+charset-normalizer
+click
+decorator
+ffmpeg
+flatbuffers
+fsspec
+gdown
+gitpython
+grpcio
+h5py
+huggingface-hub
+idna
+jinja2
+jsonschema
+keras
+matplotlib
+numpy
+opencv-python
+packaging
+pandas
+pillow
+protobuf
+pytz
+PyYAML
+requests
+scipy
+streamlit
+tensorboard
+tensorflow
+torch
+torchaudio
+torchvision
+tqdm
+typing_extensions
+urllib3

utils/__init__.py ADDED Viewed

File without changes

utils/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (143 Bytes). View file

utils/__pycache__/preprocessing.cpython-312.pyc ADDED Viewed

Binary file (4.33 kB). View file

utils/__pycache__/tools.cpython-312.pyc ADDED Viewed

Binary file (6.62 kB). View file

utils/__pycache__/visualization.cpython-312.pyc ADDED Viewed

Binary file (1.07 kB). View file

utils/__pycache__/visualize.cpython-312.pyc ADDED Viewed

Binary file (3.81 kB). View file

utils/download_weights.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import os
+import gdown
+def download_weights():
+    """
+    Downloads the required model weights into the 'models/weights/' directory.
+    """
+    # Directory for storing weights
+    output_dir = "models/weights"
+    os.makedirs(output_dir, exist_ok=True)
+    # URLs for the weights
+    weights = {
+        "MesoNet": {
+            "Meso4_DF": "https://github.com/DariusAf/MesoNet/raw/master/weights/Meso4_DF.h5",
+            "MesoInception_DF": "https://github.com/DariusAf/MesoNet/raw/master/weights/MesoInception_DF.h5",
+        },
+        "EfficientNet (DFDC)": {
+            "EfficientNet-B0": "https://drive.google.com/uc?id=1LqRbCDNf9Ob7DFexCtE230FW6hhtLw0M",
+        },
+        "FALdetector": {
+            "global": "https://www.dropbox.com/s/rb8zpvrbxbbutxc/global.pth?dl=0",
+            "local": "https://www.dropbox.com/s/pby9dhpr6cqziyl/local.pth?dl=0",
+        },
+        "Vision Transformer (CViT)": {
+            "CViT": "https://github.com/erprogs/CViT/blob/main/weight/deepdeepfake_cvit_gpu_ep50.pkl",
+        },
+    }
+    # Download each weight file
+    for model_name, files in weights.items():
+        print(f"Downloading weights for {model_name}...")
+        for weight_name, url in files.items():
+            output_path = os.path.join(output_dir, f"{weight_name}.pth")
+            if not os.path.exists(output_path):
+                print(f"  - Downloading {weight_name}...")
+                gdown.download(url, output_path, quiet=False)
+            else:
+                print(f"  - {weight_name} already exists. Skipping download.")
+    print("All weights downloaded successfully.")
+if __name__ == "__main__":
+    download_weights()

utils/preprocessing.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import cv2
+from PIL import Image
+import numpy as np
+def preprocess_image(image):
+    """
+    Preprocesses an input image for prediction.
+    Args:
+        image (Union[str, numpy.ndarray]): File path to an image or a numpy array.
+    Returns:
+        PIL.Image: Preprocessed image.
+    """
+    if isinstance(image, str):  # Handle file path
+        image = Image.open(image).convert("RGB")
+    elif isinstance(image, np.ndarray):  # Handle numpy array
+        image = Image.fromarray(image).convert("RGB")
+    else:
+        raise ValueError("Unsupported image type. Must be a file path or numpy array.")
+    return image
+def resize_shorter_side(img, min_length):
+    """
+    Resize the shorter side of img to min_length while
+    preserving the aspect ratio.
+    """
+    ow, oh = img.size
+    mult = 8
+    if ow < oh:
+        if ow == min_length and oh % mult == 0:
+            return img, (ow, oh)
+        w = min_length
+        h = int(min_length * oh / ow)
+    else:
+        if oh == min_length and ow % mult == 0:
+            return img, (ow, oh)
+        h = min_length
+        w = int(min_length * ow / oh)
+    return img.resize((w, h), Image.BICUBIC), (w, h)
+def generate_local_image(image):
+    """
+    Detects the face in the input image and extracts it as a 'local image'.
+    If no face is detected, returns the global image as the local image.
+    Args:
+        image (Union[PIL.Image, numpy.ndarray]): The input image.
+    Returns:
+        PIL.Image: The cropped face or the original image if no face is detected.
+    """
+    # Convert numpy array to PIL.Image if necessary
+    if isinstance(image, np.ndarray):
+        image = Image.fromarray(image)
+    # Convert PIL image to OpenCV format for face detection
+    image_cv = np.array(image)
+    image_gray = cv2.cvtColor(image_cv, cv2.COLOR_RGB2GRAY)
+    # Load OpenCV's pre-trained Haar Cascade for face detection
+    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
+    faces = face_cascade.detectMultiScale(image_gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
+    if len(faces) == 0:
+        print("No face detected. Using the global image as the local image.")
+        return image  # Return the global image as fallback
+    # Use the first detected face
+    x, y, w, h = faces[0]
+    # Crop the face region
+    face_image = image.crop((x, y, x + w, y + h))
+    return face_image
+def preprocess_video(video_path, frame_count=32):
+    cap = cv2.VideoCapture(video_path)
+    if not cap.isOpened():
+        return []  # Return an empty list if video can't be opened
+    frames = []
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    if total_frames == 0:
+        return []  # Handle videos with no frames
+    interval = max(1, total_frames // frame_count)
+    for i in range(frame_count):
+        cap.set(cv2.CAP_PROP_POS_FRAMES, i * interval)
+        ret, frame = cap.read()
+        if ret:
+            frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+    cap.release()
+    return frames

utils/tools.py ADDED Viewed

	@@ -0,0 +1,143 @@

+import os
+import cv2
+import torch
+import numpy as np
+from PIL import Image
+# from dlib import cnn_face_detection_model_v1 as face_detect_model
+from utils.preprocessing import generate_local_image as face_detect_model
+def center_crop(im, length):
+    w, h = im.size
+    left = w//2 - length//2
+    right = w//2 + length//2
+    top = h//2 - length//2
+    bottom = h//2 + length//2
+    return im.crop((left, top, right, bottom)), (left, top)
+def remove_boundary(img):
+    """
+    Remove boundary artifacts that FAL causes.
+    """
+    w, h = img.size
+    left = w//80
+    top = h//50
+    right = w*79//80
+    bottom = h*24//25
+    return img.crop((left, top, right, bottom))
+def resize_shorter_side(img, min_length):
+    """
+    Resize the shorter side of img to min_length while
+    preserving the aspect ratio.
+    """
+    ow, oh = img.size
+    mult = 8
+    if ow < oh:
+        if ow == min_length and oh % mult == 0:
+            return img, (ow, oh)
+        w = min_length
+        h = int(min_length * oh / ow)
+    else:
+        if oh == min_length and ow % mult == 0:
+            return img, (ow, oh)
+        h = min_length
+        w = int(min_length * ow / oh)
+    return img.resize((w, h), Image.BICUBIC), (w, h)
+def flow_resize(flow, sz):
+    oh, ow, _ = flow.shape
+    w, h = sz
+    u_ = cv2.resize(flow[:,:,0], (w, h))
+    v_ = cv2.resize(flow[:,:,1], (w, h))
+    u_ *= w / float(ow)
+    v_ *= h / float(oh)
+    return np.dstack((u_,v_))
+def warp(im, flow, alpha=1, interp=cv2.INTER_CUBIC):
+    height, width, _ = flow.shape
+    cart = np.dstack(np.meshgrid(np.arange(width), np.arange(height)))
+    pixel_map = (cart + alpha * flow).astype(np.float32)
+    warped = cv2.remap(
+        im,
+        pixel_map[:, :, 0],
+        pixel_map[:, :, 1],
+        interp,
+        borderMode=cv2.BORDER_REPLICATE)
+    return warped
+cnn_face_detector = None
+def face_detection(
+        img_path,
+        verbose=False,
+        model_file='utils/dlib_face_detector/mmod_human_face_detector.dat'):
+    """
+    Detects faces using dlib cnn face detection, and extend the bounding box
+    to include the entire face.
+    """
+    def shrink(img, max_length=2048):
+        ow, oh = img.size
+        if max_length >= max(ow, oh):
+            return img, 1.0
+        if ow > oh:
+            mult = max_length / ow
+        else:
+            mult = max_length / oh
+        w = int(ow * mult)
+        h = int(oh * mult)
+        return img.resize((w, h), Image.BILINEAR), mult
+    global cnn_face_detector
+    if cnn_face_detector is None:
+        cnn_face_detector = face_detect_model(model_file)
+    img = Image.open(img_path).convert('RGB')
+    w, h = img.size
+    img_shrinked, mult = shrink(img)
+    im = np.asarray(img_shrinked)
+    if len(im.shape) != 3 or im.shape[2] != 3:
+        return []
+    crop_ims = []
+    dets = cnn_face_detector(im, 0)
+    for k, d in enumerate(dets):
+        top = d.rect.top() / mult
+        bottom = d.rect.bottom() / mult
+        left = d.rect.left() / mult
+        right = d.rect.right() / mult
+        wid = right - left
+        left = max(0, left - wid // 2.5)
+        top = max(0, top - wid // 1.5)
+        right = min(w - 1, right + wid // 2.5)
+        bottom = min(h - 1, bottom + wid // 2.5)
+        if d.confidence > 1:
+            if verbose:
+                print("%d-th face detected: (%d, %d, %d, %d)" %
+                      (k, left, top, right, bottom))
+            crop_im = img.crop((left, top, right, bottom))
+            crop_ims.append((crop_im, (left, top, right, bottom)))
+    return crop_ims
+def mkdirs(paths):
+    if isinstance(paths, list) and not isinstance(paths, str):
+        for path in paths:
+            mkdir(path)
+    else:
+        mkdir(paths)
+def mkdir(path):
+    if not os.path.exists(path):
+        os.makedirs(path)

utils/visualization.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import pandas as pd
+import matplotlib.pyplot as plt
+import streamlit as st
+def display_results(results):
+    st.write("### Detection Results")
+    df = pd.DataFrame(results.items(), columns=["Model", "Probability (%)"])
+    st.table(df)
+    st.write("### Visualization")
+    fig, ax = plt.subplots()
+    df.plot.bar(x="Model", y="Probability (%)", ax=ax, legend=False)
+    ax.set_ylabel("Probability (%)")
+    st.pyplot(fig)

utils/visualize.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import os
+import cv2
+import torch
+import numpy as np
+import torchvision
+from PIL import Image
+def unnormalize(tens, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]):
+    # assume tensor of shape NxCxHxW
+    return tens * torch.Tensor(std)[None, :, None, None] + torch.Tensor(
+        mean)[None, :, None, None]
+def get_heatmap_cv(img, magn, max_flow_mag):
+    min_flow_mag = .5
+    cv_magn = np.clip(
+        255 * (magn - min_flow_mag) / (max_flow_mag - min_flow_mag),
+        a_min=0,
+        a_max=255).astype(np.uint8)
+    if img.dtype != np.uint8:
+        img = (255 * img).astype(np.uint8)
+    heatmap_img = cv2.applyColorMap(cv_magn, cv2.COLORMAP_JET)
+    heatmap_img = heatmap_img[..., ::-1]
+    h, w = magn.shape
+    img_alpha = np.ones((h, w), dtype=np.double)[:, :, None]
+    heatmap_alpha = np.clip(
+        magn / max_flow_mag, a_min=0, a_max=1)[:, :, None]**.7
+    heatmap_alpha[heatmap_alpha < .2]**.5
+    pm_hm = heatmap_img * heatmap_alpha
+    pm_img = img * img_alpha
+    cv_out = pm_hm + pm_img * (1 - heatmap_alpha)
+    cv_out = np.clip(cv_out, a_min=0, a_max=255).astype(np.uint8)
+    return cv_out
+def get_heatmap_batch(img_batch, pred_batch):
+    imgrid = torchvision.utils.make_grid(img_batch).cpu()
+    magn_batch = torch.norm(pred_batch, p=2, dim=1, keepdim=True)
+    magngrid = torchvision.utils.make_grid(magn_batch)
+    magngrid = magngrid[0, :, :]
+    imgrid = unnormalize(imgrid).squeeze_()
+    cv_magn = magngrid.detach().cpu().numpy()
+    cv_img = imgrid.permute(1, 2, 0).detach().cpu().numpy()
+    cv_out = get_heatmap_cv(cv_img, cv_magn, max_flow_mag=9)
+    out = np.asarray(cv_out).astype(np.double) / 255.0
+    out = torch.from_numpy(out).permute(2, 0, 1)
+    return out
+def save_heatmap_cv(img, magn, path, max_flow_mag=7):
+    cv_out = get_heatmap_cv(img, magn, max_flow_mag)
+    out = Image.fromarray(cv_out)
+    out.save(path, quality=95)