Spaces:
Runtime error
Runtime error
Upload 6 files
Browse files- utils/__init__.py +1 -0
- utils/basicblocks.py +32 -0
- utils/classifier.py +32 -0
- utils/config.py +38 -0
- utils/data_transforms.py +33 -0
- utils/feature_fusion_block.py +46 -0
utils/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
import os
|
utils/basicblocks.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
import torch.nn.functional as F
|
4 |
+
|
5 |
+
|
6 |
+
BatchNorm2d = nn.BatchNorm2d
|
7 |
+
|
8 |
+
def conv3x3(in_planes, out_planes, stride = 1):
|
9 |
+
"""3x3 convolution with padding"""
|
10 |
+
return nn.Conv2d(in_planes, out_planes, kernel_size = 3, stride = stride,
|
11 |
+
padding = 1, bias = False)
|
12 |
+
|
13 |
+
def conv1x1(in_planes, out_planes, stride = 1):
|
14 |
+
"""3x3 convolution with padding"""
|
15 |
+
return nn.Conv2d(in_planes, out_planes, kernel_size = 1, stride = stride,
|
16 |
+
padding = 0, bias = False)
|
17 |
+
|
18 |
+
class BasicBlock(nn.Module):
|
19 |
+
def __init__(self, inplanes, outplanes, stride = 1):
|
20 |
+
super(BasicBlock, self).__init__()
|
21 |
+
self.conv1 = conv3x3(inplanes, outplanes, stride)
|
22 |
+
self.bn1 = BatchNorm2d(outplanes)
|
23 |
+
self.relu = nn.ReLU(inplace = True)
|
24 |
+
self.conv2 = conv3x3(outplanes, outplanes, 2*stride)
|
25 |
+
|
26 |
+
def forward(self, x):
|
27 |
+
out = self.conv1(x)
|
28 |
+
out = self.bn1(out)
|
29 |
+
out = self.relu(out)
|
30 |
+
out = self.conv2(out)
|
31 |
+
|
32 |
+
return out
|
utils/classifier.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
import torch.nn.functional as F
|
4 |
+
|
5 |
+
class ClassifierModel(nn.Module):
|
6 |
+
def __init__(self, num_classes):
|
7 |
+
super(ClassifierModel, self).__init__()
|
8 |
+
# Apply adaptive average pooling to convert (512, 14, 14) to (512)
|
9 |
+
self.adaptive_pool = nn.AdaptiveAvgPool2d((1, 1))
|
10 |
+
|
11 |
+
# Define multiple fully connected layers
|
12 |
+
self.fc1 = nn.Linear(512, 256) # First FC layer, reducing to 256 features
|
13 |
+
self.fc2 = nn.Linear(256, 128) # Second FC layer, reducing to 128 features
|
14 |
+
self.fc3 = nn.Linear(128, num_classes) # Final FC layer, outputting num_classes for classification
|
15 |
+
|
16 |
+
#dropout for regularization
|
17 |
+
self.dropout = nn.Dropout(0.2)
|
18 |
+
|
19 |
+
def forward(self, x):
|
20 |
+
# Flatten the output from the adaptive pooling
|
21 |
+
x = self.adaptive_pool(x)
|
22 |
+
x = torch.flatten(x, 1)
|
23 |
+
|
24 |
+
# Pass through the fully connected layers with ReLU activations and dropout
|
25 |
+
x = F.relu(self.fc1(x))
|
26 |
+
x = self.dropout(x)
|
27 |
+
x = F.relu(self.fc2(x))
|
28 |
+
x = self.dropout(x)
|
29 |
+
x = self.fc3(x) # No activation, raw scores
|
30 |
+
x = F.softmax(x, dim=1)
|
31 |
+
|
32 |
+
return x
|
utils/config.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from easydict import EasyDict as edict
|
2 |
+
import numpy as np
|
3 |
+
|
4 |
+
__C = edict()
|
5 |
+
cfg = __C
|
6 |
+
|
7 |
+
# 0. basic config
|
8 |
+
__C.TAG = 'default'
|
9 |
+
__C.CLASSES = ['Real', 'Fake']
|
10 |
+
|
11 |
+
|
12 |
+
# config of network input
|
13 |
+
__C.MULTIMODAL_FUSION = edict()
|
14 |
+
__C.MULTIMODAL_FUSION.IMG_CHANNELS = [3, 64, 128, 256, 512]
|
15 |
+
__C.MULTIMODAL_FUSION.DCT_CHANNELS = [1, 64, 128, 256, 512]
|
16 |
+
|
17 |
+
|
18 |
+
__C.NUM_EPOCHS = 100
|
19 |
+
|
20 |
+
__C.BATCH_SIZE = 64
|
21 |
+
|
22 |
+
__C.NUM_WORKERS = 4
|
23 |
+
|
24 |
+
__C.LEARNING_RATE = 0.0001
|
25 |
+
|
26 |
+
__C.PRETRAINED = False
|
27 |
+
|
28 |
+
__C.PRETRAINED_PATH = "/home/user/Documents/Real_and_DeepFake/src/best_model.pth"
|
29 |
+
|
30 |
+
|
31 |
+
|
32 |
+
|
33 |
+
__C.TEST_BATCH_SIZE = 512
|
34 |
+
|
35 |
+
__C.TEST_CSV = "/home/user/Documents/Real_and_DeepFake/src/dataset/extended_val.csv"
|
36 |
+
|
37 |
+
__C.MODEL_PATH = "/home/user/Documents/Real_and_DeepFake/src/best_model.pth"
|
38 |
+
|
utils/data_transforms.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torchvision import transforms
|
2 |
+
|
3 |
+
|
4 |
+
|
5 |
+
def get_transforms_train():
|
6 |
+
# Define the dataset object
|
7 |
+
transform = transform = transforms.Compose([
|
8 |
+
transforms.ToTensor(),
|
9 |
+
transforms.Lambda(lambda x: x.float()) ,
|
10 |
+
transforms.Resize((224, 224)),
|
11 |
+
transforms.RandomHorizontalFlip(),
|
12 |
+
transforms.RandomRotation(10),
|
13 |
+
transforms.Normalize(mean=[(0.485+0.456+0.406)/3], std=[(0.229+ 0.224+ 0.225)/3]),
|
14 |
+
])
|
15 |
+
|
16 |
+
return transform
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
|
21 |
+
def get_transforms_val():
|
22 |
+
transform = transform = transforms.Compose([
|
23 |
+
transforms.ToTensor(),
|
24 |
+
transforms.Lambda(lambda x: x.float()) ,
|
25 |
+
transforms.Resize((224, 224)),
|
26 |
+
# transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
27 |
+
transforms.Normalize(mean=[(0.485+0.456+0.406)/3], std=[(0.229+ 0.224+ 0.225)/3]),
|
28 |
+
|
29 |
+
|
30 |
+
])
|
31 |
+
|
32 |
+
|
33 |
+
return transform
|
utils/feature_fusion_block.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
from torch import nn
|
3 |
+
from torch.nn import functional as F
|
4 |
+
|
5 |
+
class SpatialAttention(nn.Module):
|
6 |
+
def __init__(self, in_channels):
|
7 |
+
super(SpatialAttention, self).__init__()
|
8 |
+
self.conv1 = nn.Conv2d(in_channels, 1, kernel_size=1, stride=1, padding=0)
|
9 |
+
|
10 |
+
def forward(self, x):
|
11 |
+
# Calculate attention scores
|
12 |
+
attention_scores = self.conv1(x)
|
13 |
+
attention_scores = F.softmax(attention_scores, dim=2)
|
14 |
+
|
15 |
+
# Apply attention to input features
|
16 |
+
attended_features = x * attention_scores
|
17 |
+
|
18 |
+
return attended_features
|
19 |
+
|
20 |
+
class DCT_Attention_Fusion_Conv(nn.Module):
|
21 |
+
def __init__(self, channels):
|
22 |
+
super(DCT_Attention_Fusion_Conv, self).__init__()
|
23 |
+
self.rgb_attention = SpatialAttention(channels)
|
24 |
+
self.depth_attention = SpatialAttention(channels)
|
25 |
+
self.rgb_pooling = nn.AdaptiveAvgPool2d(1)
|
26 |
+
self.depth_pooling = nn.AdaptiveAvgPool2d(1)
|
27 |
+
|
28 |
+
def forward(self, rgb_features, DCT_features):
|
29 |
+
# Spatial attention for both modalities
|
30 |
+
rgb_attended_features = self.rgb_attention(rgb_features)
|
31 |
+
depth_attended_features = self.depth_attention(DCT_features)
|
32 |
+
|
33 |
+
# Adaptive pooling for both modalities
|
34 |
+
rgb_pooled = self.rgb_pooling(rgb_attended_features)
|
35 |
+
depth_pooled = self.depth_pooling(depth_attended_features)
|
36 |
+
|
37 |
+
# Upsample attended and pooled features to the original size
|
38 |
+
rgb_upsampled = F.interpolate(rgb_pooled, size=rgb_features.size()[2:], mode='bilinear', align_corners=False)
|
39 |
+
depth_upsampled = F.interpolate(depth_pooled, size=DCT_features.size()[2:], mode='bilinear', align_corners=False)
|
40 |
+
|
41 |
+
# Concatenate the upsampled features
|
42 |
+
fused_features = F.relu(rgb_upsampled+depth_upsampled)
|
43 |
+
# fused_features = fused_features.sum(dim=1)
|
44 |
+
|
45 |
+
return fused_features
|
46 |
+
|