ybbwcwaps commited on
Commit
711b041
1 Parent(s): 3cc4a06

some FakeVD

Browse files
FakeVD/code_test/C3D_Feature_Extractor/C3D_model.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding: utf-8
2
+
3
+ import torch.nn as nn
4
+
5
+
6
+ class C3D(nn.Module):
7
+ """
8
+ nb_classes: nb_classes in classification task, 101 for UCF101 dataset
9
+ """
10
+
11
+ def __init__(self, nb_classes):
12
+ super(C3D, self).__init__()
13
+
14
+ self.conv1 = nn.Conv3d(3, 64, kernel_size=(3, 3, 3), padding=(1, 1, 1))
15
+ self.pool1 = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2))
16
+
17
+ self.conv2 = nn.Conv3d(64, 128, kernel_size=(3, 3, 3), padding=(1, 1, 1))
18
+ self.pool2 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))
19
+
20
+ self.conv3a = nn.Conv3d(128, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1))
21
+ self.conv3b = nn.Conv3d(256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1))
22
+ self.pool3 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))
23
+
24
+ self.conv4a = nn.Conv3d(256, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
25
+ self.conv4b = nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
26
+ self.pool4 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))
27
+
28
+ self.conv5a = nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
29
+ self.conv5b = nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
30
+ self.pool5 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2), padding=(0, 1, 1))
31
+
32
+ self.fc6 = nn.Linear(8192, 4096)
33
+ self.fc7 = nn.Linear(4096, 4096)
34
+ self.fc8 = nn.Linear(4096, nb_classes)
35
+
36
+ self.dropout = nn.Dropout(p=0.5)
37
+
38
+ self.relu = nn.ReLU()
39
+
40
+ def forward(self, x, feature_layer):
41
+
42
+ h = self.relu(self.conv1(x))
43
+ h = self.pool1(h)
44
+ h = self.relu(self.conv2(h))
45
+ h = self.pool2(h)
46
+
47
+ h = self.relu(self.conv3a(h))
48
+ h = self.relu(self.conv3b(h))
49
+ h = self.pool3(h)
50
+
51
+ h = self.relu(self.conv4a(h))
52
+ h = self.relu(self.conv4b(h))
53
+ h = self.pool4(h)
54
+
55
+ h = self.relu(self.conv5a(h))
56
+ h = self.relu(self.conv5b(h))
57
+ h = self.pool5(h)
58
+
59
+ h = h.reshape(-1, 8192)
60
+ out = h if feature_layer == 5 else None
61
+ h = self.relu(self.fc6(h))
62
+ out = h if feature_layer == 6 and out == None else out
63
+ h = self.dropout(h)
64
+ h = self.relu(self.fc7(h))
65
+ out = h if feature_layer == 7 and out == None else out
66
+ h = self.dropout(h)
67
+ logits = self.fc8(h)
68
+ return logits, out
69
+
70
+
71
+
72
+
73
+
74
+
FakeVD/code_test/C3D_Feature_Extractor/error.txt ADDED
File without changes
FakeVD/code_test/C3D_Feature_Extractor/feature_extractor_frm.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding: utf-8
2
+ # from data_provider import *
3
+ from C3D_model import *
4
+ import json
5
+ import torchvision
6
+ import torch.optim as optim
7
+ import torch
8
+ from torch.autograd import Variable
9
+ import torch.nn.functional as F
10
+ import argparse
11
+ import os
12
+ from torch import save, load
13
+ import pickle
14
+ import time
15
+ import numpy as np
16
+ import PIL.Image as Image
17
+ import collections
18
+ #import imageio # read video
19
+ import skimage.io as io
20
+ from skimage.transform import resize
21
+ import h5py
22
+ import fnmatch
23
+ from PIL import Image
24
+
25
+ def feature_extractor():
26
+ #trainloader = Train_Data_Loader( VIDEO_DIR, resize_w=128, resize_h=171, crop_w = 112, crop_h = 112, nb_frames=16)
27
+ net = C3D(487)
28
+ print('net', net)
29
+ ## Loading pretrained model from sports and finetune the last layer
30
+ net.load_state_dict(torch.load('/data1/miayuan/pretrained_models/c3d.pickle'))
31
+ if RUN_GPU :
32
+ net.cuda(0)
33
+ net.eval()
34
+ print('net', net)
35
+ feature_dim = 4096 if EXTRACTED_LAYER != 5 else 8192
36
+ video_list = os.listdir(VIDEO_DIR)
37
+ print('video_list', video_list)
38
+ if not os.path.isdir(OUTPUT_DIR):
39
+ os.mkdir(OUTPUT_DIR)
40
+ f = h5py.File(os.path.join(OUTPUT_DIR, OUTPUT_NAME), 'w')
41
+
42
+ def count_files(directory, prefix_list):
43
+ lst = os.listdir(directory)
44
+ cnt_list = [len(fnmatch.filter(lst, x+'*')) for x in prefix_list]
45
+ return cnt_list
46
+
47
+
48
+ for video_name in video_list:
49
+ video_path = os.path.join(VIDEO_DIR, video_name)
50
+ print('video_path', video_path)
51
+ #video = imageio.get_reader(video_path, 'ffmpeg')
52
+ #print('video', video)
53
+ all_cnt = count_files(video_path, ('image_'))
54
+ total_frames = all_cnt[0]
55
+ print 'Total frames: %d'%total_frames
56
+ valid_frames = total_frames/nb_frames * nb_frames
57
+ print 'Total validated frames: %d'%valid_frames
58
+ index_w = np.random.randint(resize_w - crop_w) ## crop
59
+ index_h = np.random.randint(resize_h - crop_h) ## crop
60
+ #features = np.array((valid_frames/nb_frames, feature_dim))
61
+ features = []
62
+ #print('features', features)
63
+ print 'NB features: %d' %(valid_frames/nb_frames)
64
+ #print(io.imread(os.path.join(video_path, 'image_{:04d}.jpg'.format(1))).shape)
65
+ for i in range(valid_frames/nb_frames) :
66
+ clip = np.array([resize(io.imread(os.path.join(video_path, 'image_{:04d}.jpg'.format(j))), output_shape=(resize_w, resize_h), preserve_range=True) for j in range(i * nb_frames+1, (i+1) * nb_frames+1)])
67
+ #clip = np.array([resize(video.get_data(j), output_shape=(resize_w, resize_h), preserve_range=True) for j in range(i * nb_frames, (i+1) * nb_frames)])
68
+ clip = clip[:, index_w: index_w+ crop_w, index_h: index_h+ crop_h, :]
69
+ clip = torch.from_numpy(np.float32(clip.transpose(3, 0, 1, 2)))
70
+ clip = Variable(clip).cuda() if RUN_GPU else Variable(clip)
71
+ clip = clip.resize(1, 3, nb_frames, crop_w, crop_h)
72
+ #print('clip', clip)
73
+ _, clip_output = net(clip, EXTRACTED_LAYER)
74
+ #print('clip_output', clip_output)
75
+ clip_feature = (clip_output.data).cpu()
76
+ features.append(clip_feature)
77
+ #features[i] = np.array(clip_feature)
78
+ features = torch.cat(features, 0)
79
+ features = features.numpy()
80
+ print('features', features)
81
+
82
+ fgroup = f.create_group(video_name)
83
+ fgroup.create_dataset('c3d_features', data=features)
84
+ fgroup.create_dataset('total_frames', data=np.array(total_frames))
85
+ fgroup.create_dataset('valid_frames', data=np.array(valid_frames))
86
+
87
+ #with open(os.path.join(OUTPUT_DIR, video_name[:-4]), 'wb') as f :
88
+ # pickle.dump( features, f )
89
+ print '%s has been processed...'%video_name
90
+
91
+ if __name__ == "__main__":
92
+
93
+ parser = argparse.ArgumentParser()
94
+
95
+ print '******--------- Extract C3D features ------*******'
96
+ parser.add_argument('-o', '--OUTPUT_DIR', dest='OUTPUT_DIR', type=str, default='./output_frm/', help='Output file name')
97
+ parser.add_argument('-l', '--EXTRACTED_LAYER', dest='EXTRACTED_LAYER', type=int, choices=[5, 6, 7], default=5, help='Feature extractor layer')
98
+ parser.add_argument('-i', '--VIDEO_DIR', dest='VIDEO_DIR', type = str, help='Input Video directory')
99
+ parser.add_argument('-gpu', '--gpu', dest='GPU', action = 'store_true', help='Run GPU?')
100
+ parser.add_argument('--OUTPUT_NAME', default='c3d_features.hdf5', help='The output name of the hdf5 features')
101
+
102
+ args = parser.parse_args()
103
+ params = vars(args) # convert to ordinary dict
104
+ print 'parsed parameters:'
105
+ print json.dumps(params, indent = 2)
106
+
107
+ OUTPUT_DIR = params['OUTPUT_DIR']
108
+ EXTRACTED_LAYER = params['EXTRACTED_LAYER']
109
+ VIDEO_DIR = params['VIDEO_DIR']
110
+ RUN_GPU = params['GPU']
111
+ OUTPUT_NAME = params['OUTPUT_NAME']
112
+ crop_w = 112
113
+ resize_w = 128
114
+ crop_h = 112
115
+ resize_h = 171
116
+ nb_frames = 16
117
+ feature_extractor()
118
+
119
+
FakeVD/code_test/C3D_Feature_Extractor/feature_extractor_vid.py ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding: utf-8
2
+ # from data_provider import *
3
+ from .C3D_model import *
4
+ import torchvision
5
+ import torch
6
+ from torch.autograd import Variable
7
+ import torch.nn.functional as F
8
+ import argparse
9
+ import os
10
+ from torch import save, load
11
+ import pickle
12
+ import time
13
+ import numpy as np
14
+ import PIL.Image as Image
15
+ import skimage.io as io
16
+ from skimage.transform import resize
17
+ import h5py
18
+ from PIL import Image
19
+
20
+ def load_model_c3d(RUN_GPU = False):
21
+ net = C3D(487)
22
+ # print('net', net)
23
+ ## Loading pretrained model from sports and finetune the last layer
24
+ net.load_state_dict(torch.load('./FakeVD/code_test/C3D_Feature_Extractor/c3d.pickle'))
25
+ if RUN_GPU :
26
+ net.cuda(0)
27
+ net.eval()
28
+ # print('net', net)
29
+
30
+ return net
31
+
32
+
33
+ def feature_extractor(net, OUTPUT_DIR,VIDEO_DIR,video_path=None):
34
+ crop_w = 112
35
+ resize_w = 128
36
+ crop_h = 112
37
+ resize_h = 171
38
+ nb_frames = 16
39
+ BATCH_SIZE = 10
40
+ EXTRACTED_LAYER = 6
41
+ RUN_GPU = False
42
+
43
+ #trainloader = Train_Data_Loader( VIDEO_DIR, resize_w=128, resize_h=171, crop_w = 112, crop_h = 112, nb_frames=16)
44
+
45
+
46
+ feature_dim = 4096 if EXTRACTED_LAYER != 5 else 8192
47
+
48
+ # read video list from the folder
49
+ if video_path:
50
+ video_list = [video_path]
51
+ else:
52
+ video_list = [f for f in os.listdir(VIDEO_DIR) if os.path.isfile(os.path.join(VIDEO_DIR, f))]
53
+
54
+
55
+ if not os.path.isdir(OUTPUT_DIR):
56
+ os.mkdir(OUTPUT_DIR)
57
+ # f = h5py.File(os.path.join(OUTPUT_DIR, OUTPUT_NAME), 'w')
58
+
59
+ # current location
60
+ temp_path = os.path.join(os.getcwd(), 'temp')
61
+ if not os.path.exists(temp_path):
62
+ os.mkdir(temp_path)
63
+
64
+ error_fid = open('error.txt', 'w')
65
+ for video_name in video_list:
66
+ output_file_name = video_name.split('.')[0] + '.hdf5'
67
+ print("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx")
68
+ print(output_file_name)
69
+ f = h5py.File(os.path.join(OUTPUT_DIR, output_file_name), 'w')
70
+
71
+ video_path = os.path.join(VIDEO_DIR, video_name)
72
+ print('video_path', video_path)
73
+ frame_path = os.path.join(temp_path, video_name)
74
+ if not os.path.exists(frame_path):
75
+ os.mkdir(frame_path)
76
+
77
+
78
+ print('Extracting video frames ...')
79
+ # using ffmpeg to extract video frames into a temporary folder
80
+ # example: ffmpeg -i video_validation_0000051.mp4 -q:v 2 -f image2 output/image%5d.jpg
81
+ os.system('ffmpeg -i ' + video_path + ' -q:v 2 -f image2 ' + frame_path + '/image_%5d.jpg')
82
+
83
+
84
+ print('Extracting features ...')
85
+ total_frames = len(os.listdir(frame_path))
86
+ if total_frames == 0:
87
+ error_fid.write(video_name+'\n')
88
+ print('Fail to extract frames for video: %s'%video_name)
89
+ continue
90
+
91
+ valid_frames = total_frames // nb_frames * nb_frames
92
+ n_feat = valid_frames // nb_frames # 可提取的特征数,每个特征由 nb_frames 帧组成
93
+ n_batch = n_feat // BATCH_SIZE
94
+ if n_feat - n_batch*BATCH_SIZE > 0:
95
+ n_batch = n_batch + 1
96
+ print('n_frames: %d; n_feat: %d; n_batch: %d'%(total_frames, n_feat, n_batch))
97
+
98
+ #print 'Total frames: %d'%total_frames
99
+ #print 'Total validated frames: %d'%valid_frames
100
+ #print 'NB features: %d' %(valid_frames/nb_frames)
101
+ # 随机裁剪
102
+ index_w = np.random.randint(resize_w - crop_w) ## crop
103
+ index_h = np.random.randint(resize_h - crop_h) ## crop
104
+
105
+ features = []
106
+
107
+ for i in range(n_batch-1):
108
+ input_blobs = []
109
+ for j in range(BATCH_SIZE):
110
+ clip = []
111
+ clip = np.array([resize(io.imread(os.path.join(frame_path, 'image_{:05d}.jpg'.format(k))), output_shape=(resize_w, resize_h), preserve_range=True) for k in range((i*BATCH_SIZE+j) * nb_frames+1, min((i*BATCH_SIZE+j+1) * nb_frames+1, valid_frames+1))])
112
+ # print('clip_shape', clip.shape)
113
+ clip = clip[:, index_w: index_w+ crop_w, index_h: index_h+ crop_h, :]
114
+ #print('clip_shape',clip.shape)
115
+ #print('range', range((i*BATCH_SIZE+j) * nb_frames+1, min((i*BATCH_SIZE+j+1) * nb_frames+1, valid_frames+1)))
116
+ input_blobs.append(clip)
117
+ input_blobs = np.array(input_blobs, dtype='float32')
118
+ #print('input_blobs_shape', input_blobs.shape)
119
+ input_blobs = torch.from_numpy(np.float32(input_blobs.transpose(0, 4, 1, 2, 3)))
120
+ input_blobs = Variable(input_blobs).cuda() if RUN_GPU else Variable(input_blobs)
121
+ _, batch_output = net(input_blobs, EXTRACTED_LAYER) # 输入后提取某一层
122
+ batch_feature = (batch_output.data).cpu()
123
+ features.append(batch_feature)
124
+
125
+ # The last batch
126
+ input_blobs = []
127
+ for j in range(n_feat-(n_batch-1)*BATCH_SIZE):
128
+ clip = []
129
+ clip = np.array([resize(io.imread(os.path.join(frame_path, 'image_{:05d}.jpg'.format(k))), output_shape=(resize_w, resize_h), preserve_range=True) for k in range(((n_batch-1)*BATCH_SIZE+j) * nb_frames+1, min(((n_batch-1)*BATCH_SIZE+j+1) * nb_frames+1, valid_frames+1))])
130
+
131
+ clip = clip[:, index_w: index_w+ crop_w, index_h: index_h+ crop_h, :]
132
+ #print('range', range(((n_batch-1)*BATCH_SIZE+j) * nb_frames+1, min(((n_batch-1)*BATCH_SIZE+j+1) * nb_frames+1, valid_frames+1)))
133
+ input_blobs.append(clip)
134
+ input_blobs = np.array(input_blobs, dtype='float32')
135
+ #print('input_blobs_shape', input_blobs.shape)
136
+ input_blobs = torch.from_numpy(np.float32(input_blobs.transpose(0, 4, 1, 2, 3)))
137
+ input_blobs = Variable(input_blobs).cuda() if RUN_GPU else Variable(input_blobs)
138
+ _, batch_output = net(input_blobs, EXTRACTED_LAYER)
139
+ batch_feature = (batch_output.data).cpu()
140
+ features.append(batch_feature)
141
+
142
+ features = torch.cat(features, 0)
143
+ features = features.numpy()
144
+ print('features', features.shape)
145
+ fgroup = f.create_group(video_name.split('.')[0])
146
+ fgroup.create_dataset('c3d_features', data=features)
147
+ # fgroup.create_dataset('total_frames', data=np.array(total_frames))
148
+ # fgroup.create_dataset('valid_frames', data=np.array(valid_frames))
149
+
150
+ print('%s has been processed...'%video_name)
151
+
152
+
153
+ # clear temp frame folders
154
+ try:
155
+ os.system('rm -rf ' + frame_path)
156
+ except:
157
+ pass
158
+
159
+
160
+ # for i in range(valid_frames/nb_frames) :
161
+ # clip = np.array([resize(io.imread(os.path.join(video_path, 'image_{:05d}.jpg'.format(j))), output_shape=(resize_w, resize_h), preserve_range=True) for j in range(i * nb_frames+1, (i+1) * nb_frames+1)])
162
+ # #clip = np.array([resize(video.get_data(j), output_shape=(resize_w, resize_h), preserve_range=True) for j in range(i * nb_frames, (i+1) * nb_frames)])
163
+ # clip = clip[:, index_w: index_w+ crop_w, index_h: index_h+ crop_h, :]
164
+ # clip = torch.from_numpy(np.float32(clip.transpose(3, 0, 1, 2)))
165
+ # clip = Variable(clip).cuda() if RUN_GPU else Variable(clip)
166
+ # clip = clip.resize(1, 3, nb_frames, crop_w, crop_h)
167
+ # #print('clip', clip)
168
+ # _, clip_output = net(clip, EXTRACTED_LAYER)
169
+ # #print('clip_output', clip_output)
170
+ # clip_feature = (clip_output.data).cpu()
171
+ # features.append(clip_feature)
172
+ # #features[i] = np.array(clip_feature)
173
+ # features = torch.cat(features, 0)
174
+ # features = features.numpy()
175
+ # print('features', features)
176
+ # fgroup = f.create_group(video_name)
177
+ # fgroup.create_dataset('c3d_features', data=features)
178
+ # fgroup.create_dataset('total_frames', data=np.array(total_frames))
179
+ # fgroup.create_dataset('valid_frames', data=np.array(valid_frames))
180
+ #
181
+ # #with open(os.path.join(OUTPUT_DIR, video_name[:-4]), 'wb') as f :
182
+ # # pickle.dump( features, f )
183
+ # print '%s has been processed...'%video_name
184
+
185
+
186
+ if __name__ == "__main__":
187
+
188
+ # parser = argparse.ArgumentParser()
189
+ # parser.add_argument('-o', '--OUTPUT_DIR', dest='OUTPUT_DIR', type=str, default='./output_frm/', help='Output file name')
190
+ # parser.add_argument('-l', '--EXTRACTED_LAYER', dest='EXTRACTED_LAYER', type=int, choices=[5, 6, 7], default=6, help='Feature extractor layer')
191
+ # parser.add_argument('-i', '--VIDEO_DIR', dest='VIDEO_DIR', type = str, default='./raw_video/', help='Input Video directory')
192
+ # parser.add_argument('-v', '--VIDEO_PATH', dest='VIDEO_PATH', type=str, help='Path to a single video file to process')
193
+ # parser.add_argument('-gpu', '--gpu', dest='GPU', action = 'store_true', help='Run GPU?')
194
+ # # parser.add_argument('--OUTPUT_NAME', default='c3d_features.hdf5', help='The output name of the hdf5 features')
195
+ # parser.add_argument('-b', '--BATCH_SIZE', default=10, help='the batch size')
196
+ # parser.add_argument('-id', '--gpu_id', default=0, type=int)
197
+ # args = parser.parse_args()
198
+ # params = vars(args) # convert to ordinary dict
199
+ # print('parsed parameters:')
200
+
201
+ OUTPUT_DIR = './FakeVD/code/C3D_Feature_Extractor/output_frm'
202
+ VIDEO_DIR = './FakeVD/code/C3D_Feature_Extractor/raw_video'
203
+ VIDEO_PATH = 'douyin_6571001202379590925.mp4'
204
+ # OUTPUT_NAME = params['OUTPUT_NAME']
205
+
206
+ net = load_model_c3d()
207
+ feature_extractor(net, OUTPUT_DIR, VIDEO_DIR, video_path=VIDEO_PATH)
208
+
209
+
FakeVD/code_test/C3D_Feature_Extractor/output_frm/douyin_6571001202379590925.hdf5 ADDED
Binary file (248 kB). View file
 
FakeVD/code_test/C3D_Feature_Extractor/raw_video/douyin_6571001202379590925.mp4 ADDED
Binary file (820 kB). View file
 
FakeVD/code_test/C3D_Feature_Extractor/raw_video/douyin_6583481991964921092.mp4 ADDED
Binary file (386 kB). View file
 
FakeVD/code_test/main.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import argparse
3
+ import os
4
+ import random
5
+ import warnings
6
+ warnings.filterwarnings('ignore')
7
+ import numpy as np
8
+ import torch
9
+ from run import Run
10
+
11
+ parser = argparse.ArgumentParser()
12
+ parser.add_argument('--model_name', default='SVFEND', help='SVFEND/FANVM/C3D/VGG/Bbox/Vggish/Bert/TextCNN/Comments/TikTec')
13
+ parser.add_argument('--mode_eval', default= 'nocv', help='nocv/cv/temporal')
14
+ parser.add_argument('--fold', type=int, default= 1, help='needed when model_eval=nocv')
15
+
16
+ parser.add_argument('--epoches', type=int, default=30)
17
+ parser.add_argument('--batch_size', type = int, default=128)
18
+ parser.add_argument('--num_workers', type=int, default=0)
19
+ parser.add_argument('--epoch_stop', type=int, default=5)
20
+ parser.add_argument('--seed', type=int, default=2022)
21
+ parser.add_argument('--gpu', type=int, required=True)
22
+ parser.add_argument('--lr', type=float, default=0.0001)
23
+ parser.add_argument('--lambd', type=float, default=0.1)
24
+ parser.add_argument('--dropout', type=float, default=0.1)
25
+ parser.add_argument('--weight_decay', type=float, default=5e-5)
26
+
27
+ parser.add_argument('--path_param', default= './checkpoints/')
28
+ parser.add_argument('--path_tensorboard', default= './tb/')
29
+
30
+ args = parser.parse_args()
31
+
32
+ # os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
33
+
34
+ seed = args.seed
35
+ random.seed(seed)
36
+ np.random.seed(seed)
37
+ torch.manual_seed(seed)
38
+ torch.cuda.manual_seed(seed)
39
+ torch.backends.cudnn.benchmark = False
40
+ torch.backends.cudnn.deterministic = True
41
+
42
+ print (args)
43
+
44
+ config = {
45
+ 'model_name': args.model_name,
46
+ 'mode_eval':args.mode_eval,
47
+ 'fold':args.fold,
48
+
49
+ 'epoches': args.epoches,
50
+ 'batch_size': args.batch_size,
51
+ 'num_workers': args.num_workers,
52
+ 'epoch_stop': args.epoch_stop,
53
+ 'seed': args.seed,
54
+ 'device': args.gpu,
55
+ 'lr': args.lr,
56
+ 'lambd': args.lambd,
57
+ 'dropout': args.dropout,
58
+ 'weight_decay': args.weight_decay,
59
+
60
+ 'path_param': args.path_param,
61
+ 'path_tensorboard': args.path_tensorboard,
62
+ }
63
+
64
+
65
+ if __name__ == '__main__':
66
+ Run(config = config
67
+ ).main()
FakeVD/code_test/models/Baselines.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+ from einops import rearrange
5
+ from transformers import BertModel
6
+ from .layers import Attention
7
+
8
+
9
+ class bBbox(torch.nn.Module):
10
+ def __init__(self,fea_dim):
11
+ super(bBbox, self).__init__()
12
+ self.img_dim = 4096
13
+ self.attention1 = Attention(dim=128,heads=4)
14
+ self.attention2 = Attention(dim=128,heads=4)
15
+
16
+ self.linear_img = nn.Sequential(torch.nn.Linear(self.img_dim, fea_dim),torch.nn.ReLU())
17
+
18
+ self.classifier = nn.Linear(fea_dim,2)
19
+
20
+ def forward(self, **kwargs):
21
+ frames=kwargs['bbox_vgg']
22
+ fea_img = self.linear_img(frames)
23
+ fea_img = torch.reshape(fea_img, (-1, 45, 128))
24
+ fea_img = self.attention1(fea_img)
25
+ fea_img = torch.mean(fea_img, -2)
26
+ fea_img = torch.reshape(fea_img, (-1, 83, 128))
27
+ fea_img = self.attention2(fea_img)
28
+ fea_img = torch.mean(fea_img, -2)
29
+ output = self.classifier(fea_img)
30
+ return output, fea_img
31
+
32
+ class bC3D(torch.nn.Module):
33
+ def __init__(self,fea_dim):
34
+ super(bC3D, self).__init__()
35
+ # self.video_dim = 4096
36
+ self.video_dim = 2048
37
+ self.attention = Attention(dim=128,heads=4)
38
+
39
+ self.linear_video = nn.Sequential(torch.nn.Linear(self.video_dim, fea_dim),torch.nn.ReLU())
40
+
41
+ self.classifier = nn.Linear(fea_dim,2)
42
+
43
+ def forward(self, **kwargs):
44
+ c3d = kwargs['c3d']
45
+ fea_video = self.linear_video(c3d)
46
+ fea_video = self.attention(fea_video)
47
+ fea_video = torch.mean(fea_video, -2)
48
+ output = self.classifier(fea_video)
49
+ return output
50
+
51
+ class bVGG(torch.nn.Module):
52
+ def __init__(self,fea_dim):
53
+ super(bVGG, self).__init__()
54
+ # self.img_dim = 4096
55
+ self.img_dim = 2048
56
+ self.attention = Attention(dim=128,heads=4)
57
+
58
+ self.linear_img = nn.Sequential(torch.nn.Linear(self.img_dim, fea_dim),torch.nn.ReLU())
59
+
60
+ self.classifier = nn.Linear(fea_dim,2)
61
+
62
+ def forward(self, **kwargs):
63
+ frames=kwargs['frames']
64
+ fea_img = self.linear_img(frames)
65
+ fea_img = self.attention(fea_img)
66
+ fea_img = torch.mean(fea_img, -2)
67
+ output = self.classifier(fea_img)
68
+ return output
69
+
70
+ class bVggish(torch.nn.Module):
71
+ def __init__(self,fea_dim):
72
+ super(bVggish, self).__init__()
73
+ # self.audio_dim = 128
74
+ self.attention = Attention(dim=128,heads=4)
75
+
76
+ self.vggish_layer = torch.hub.load('./torchvggish/', 'vggish', source = 'local')
77
+ net_structure = list(self.vggish_layer.children())
78
+ self.vggish_modified = nn.Sequential(*net_structure[-2:-1])
79
+
80
+ self.classifier = nn.Linear(fea_dim,2)
81
+
82
+ def forward(self, **kwargs):
83
+ audioframes=kwargs['audioframes']
84
+ fea_audio = self.vggish_modified(audioframes)
85
+ fea_audio = self.attention(fea_audio)
86
+ fea_audio = torch.mean(fea_audio, -2)
87
+ print (fea_audio.shape)
88
+ output = self.classifier(fea_audio)
89
+ return output, fea_audio
90
+
91
+
92
+ class bBert(torch.nn.Module):
93
+ def __init__(self,bert_model,fea_dim, dropout):
94
+ super(bBert, self).__init__()
95
+ self.text_dim = 768
96
+
97
+ self.bert = BertModel.from_pretrained(bert_model).requires_grad_(False)
98
+
99
+ self.linear_text = nn.Sequential(torch.nn.Linear(self.text_dim, fea_dim),torch.nn.ReLU())
100
+ self.classifier = nn.Linear(fea_dim,2)
101
+
102
+ def forward(self, **kwargs):
103
+ title_inputid = kwargs['title_inputid']
104
+ title_mask=kwargs['title_mask']
105
+ fea_text=self.bert(title_inputid,attention_mask=title_mask)[1]
106
+ fea_text=self.linear_text(fea_text)
107
+ output = self.classifier(fea_text)
108
+ return output,fea_text
109
+
110
+ class bTextCNN(nn.Module):
111
+ def __init__(self, fea_dim, vocab_size):
112
+ super(bTextCNN, self).__init__()
113
+ self.vocab_size = vocab_size
114
+ self.fea_dim=fea_dim
115
+
116
+ self.channel_in = 1
117
+ self.filter_num = 14
118
+ self.window_size = [3,4,5]
119
+
120
+ self.textcnn =nn.ModuleList([nn.Conv2d(self.channel_in, self.filter_num, (K,self.vocab_size)) for K in self.window_size])
121
+ self.linear = nn.Sequential(torch.nn.Linear(len(self.window_size) * self.filter_num, self.fea_dim),torch.nn.ReLU())
122
+ self.classifier = nn.Linear(self.fea_dim,2)
123
+
124
+ def forward(self, **kwargs):
125
+ title_w2v = kwargs['title_w2v']
126
+ text = title_w2v.unsqueeze(1)
127
+ text = [F.relu(conv(text)).squeeze(3) for conv in self.textcnn]
128
+ text = [F.max_pool1d(i.squeeze(2), i.shape[-1]).squeeze(2) for i in text]
129
+ fea_text = torch.cat(text, 1)
130
+ fea_text = self.linear(fea_text)
131
+
132
+ output = self.classifier(fea_text)
133
+
134
+ return output
135
+
136
+ class bComments(torch.nn.Module):
137
+ def __init__(self,bert_model,fea_dim):
138
+ super(bComments, self).__init__()
139
+ self.comment_dim = 768
140
+ self.bert = BertModel.from_pretrained(bert_model).requires_grad_(False)
141
+ self.attention = Attention(dim=128,heads=4)
142
+ self.linear_comment = nn.Sequential(torch.nn.Linear(self.comment_dim, fea_dim),torch.nn.ReLU())
143
+ self.classifier = nn.Linear(fea_dim,2)
144
+
145
+ def forward(self, **kwargs):
146
+ comments_inputid = kwargs['comments_inputid']
147
+ comments_mask=kwargs['comments_mask']
148
+ comments_feature=[]
149
+ for i in range(comments_inputid.shape[0]):
150
+ bert_fea=self.bert(comments_inputid[i], attention_mask=comments_mask[i])[1]
151
+ comments_feature.append(bert_fea)
152
+ comments_feature=torch.stack(comments_feature)
153
+ fea_comments=self.linear_comment(comments_feature)
154
+ print (fea_comments.shape)
155
+ fea_comments = self.attention(fea_comments)
156
+ fea_comments = torch.mean(fea_comments, -2)
157
+ output = self.classifier(fea_comments)
158
+ return output
159
+
160
+
FakeVD/code_test/models/FANVM.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ import os
4
+
5
+ import numpy as np
6
+ import torch
7
+ import torch.nn as nn
8
+ import torch.nn.functional as F
9
+ import tqdm
10
+ from sklearn.metrics import *
11
+ from transformers import BertModel
12
+ from zmq import device
13
+
14
+ from .layers import *
15
+
16
+
17
+ class TextCNN(nn.Module):
18
+ def __init__(self, fea_dim, vocab_size):
19
+ super(TextCNN, self).__init__()
20
+ self.vocab_size = vocab_size
21
+ self.fea_dim=fea_dim
22
+
23
+ self.channel_in = 1
24
+ self.filter_num = 14
25
+ self.window_size = [3,4,5]
26
+
27
+ self.textcnn =nn.ModuleList([nn.Conv2d(self.channel_in, self.filter_num, (K,self.vocab_size)) for K in self.window_size])
28
+ self.linear = nn.Sequential(torch.nn.Linear(len(self.window_size) * self.filter_num, self.fea_dim),torch.nn.ReLU())
29
+
30
+ def forward(self, inputs):
31
+ text = inputs.unsqueeze(1)
32
+ text = [F.relu(conv(text)).squeeze(3) for conv in self.textcnn]
33
+ text = [F.max_pool1d(i.squeeze(2), i.shape[-1]).squeeze(2) for i in text]
34
+ fea_text = torch.cat(text, 1)
35
+ fea_text = self.linear(fea_text)
36
+
37
+ return fea_text
38
+
39
+
40
+ class VideoEncoder(nn.Module):
41
+ def __init__(self,emb_dim,fea_dim):
42
+ super(VideoEncoder, self).__init__()
43
+
44
+ self.emb_dim = emb_dim
45
+ self.linear1 = torch.nn.Linear(self.emb_dim, self.emb_dim, bias=False)
46
+ self.linear2 = nn.Sequential(torch.nn.Linear(self.emb_dim, fea_dim),torch.nn.ReLU())
47
+
48
+ def forward(self, input_thumb, input_L):
49
+ input_ALL = torch.cat((input_L, input_thumb),1) #(bs,len+1,4096)
50
+ fea_A = torch.bmm(input_thumb,self.linear1(input_ALL).permute(0,2,1)) # (bs, 1, len+1)
51
+ fea_alpha = F.softmax(fea_A) # (bs, 1, len+1)
52
+ fea_V = torch.matmul(fea_alpha,input_ALL).squeeze() # (bs, 4096)
53
+ fea = self.linear2(fea_V)
54
+ return fea
55
+
56
+ class ReverseLayerF(Function):
57
+ #@staticmethod
58
+ def forward(self, x):
59
+ self.lambd = 1
60
+ return x.view_as(x)
61
+
62
+ #@staticmethod
63
+ def backward(self, grad_output):
64
+ return (grad_output * -self.lambd)
65
+
66
+ def grad_reverse(x):
67
+ return ReverseLayerF.apply(x)
68
+
69
+
70
+ class FANVMModel(torch.nn.Module):
71
+ def __init__(self,bert_model,fea_dim):
72
+ super(FANVMModel, self).__init__()
73
+ self.text_dim = 768
74
+ self.img_dim = 4096
75
+ self.topic_dim = 15
76
+
77
+ self.bert = BertModel.from_pretrained(bert_model).requires_grad_(False)
78
+ self.title_encoder = TextCNN(fea_dim, self.text_dim)
79
+ # self.comments_encoder = BiLSTM(self.text_dim,300,fea_dim)
80
+ self.video_encoder = VideoEncoder(self.img_dim,fea_dim)
81
+
82
+ self.gate_m1 = torch.nn.Linear(fea_dim*2,1)
83
+ self.gate_m2 = torch.nn.Linear(fea_dim*2,1)
84
+
85
+ self.classifier = nn.Linear(fea_dim*2,2)
86
+ self.classifier_topic = nn.Linear(fea_dim*3,self.topic_dim)
87
+
88
+ def forward(self, **kwargs):
89
+ title_inputid = kwargs['title_inputid']#(batch,512)
90
+ title_mask = kwargs['title_mask']#(batch,512)
91
+ fea_text = self.bert(title_inputid,attention_mask=title_mask)[0] #(bs,seq,768)
92
+ fea_text = self.title_encoder(fea_text)
93
+ fea_R = fea_text # (bs, 128)
94
+
95
+ comments_inputid = kwargs['comments_inputid']#(batch,20,250)
96
+ comments_mask=kwargs['comments_mask']#(batch,20,250)
97
+ comments_like=kwargs['comments_like']
98
+ comments_feature=[]
99
+ for i in range(comments_inputid.shape[0]):
100
+ bert_fea=self.bert(comments_inputid[i], attention_mask=comments_mask[i])[0]
101
+ comments_feature.append(self.comments_encoder(bert_fea))
102
+ comments_feature=torch.stack(comments_feature) #(batch,seq,fea_dim)
103
+ fea_comments =[]
104
+ for v in range(comments_like.shape[0]): # batch内循环
105
+ # print (reviews_like[v])
106
+ comments_weight=torch.stack([torch.true_divide((i+1),(comments_like[v].shape[0]+comments_like[v].sum())) for i in comments_like[v]])
107
+ comments_fea_reweight = torch.sum(comments_feature[v]*(comments_weight.reshape(comments_weight.shape[0],1)),dim=0)
108
+ fea_comments.append(comments_fea_reweight)
109
+ fea_comments = torch.stack(fea_comments)
110
+ fea_H = fea_comments # (bs, 600)
111
+
112
+ frames = kwargs['frames'] # (bs, 30, 4096)
113
+ frame_thumb = kwargs['frame_thmub'] # (bs,1,4096)
114
+ fea_video = self.video_encoder(frame_thumb, frames)
115
+ fea_V = fea_video # (bs, 128)
116
+
117
+ s = kwargs['s']
118
+
119
+ ## fusion: title, frames
120
+ m1 = self.gate_m1(torch.cat((fea_V, fea_R),1))
121
+ fea_P = torch.add(torch.mul(m1,fea_V),torch.mul((1-m1),fea_R))
122
+ ## fusion: comments, title
123
+ m2 = s.reshape((s.shape[0],1))
124
+ fea_E = torch.add(torch.mul(fea_H,m2),torch.mul(fea_R,(1-m2)))
125
+
126
+ fea_fnd = torch.cat((fea_P,fea_E),1).to(torch.float32)
127
+ output = self.classifier(fea_fnd)
128
+
129
+ fea_topic = torch.cat((fea_H, fea_R, fea_V),1)
130
+ fea_reverse = grad_reverse(fea_topic)
131
+ output_topic = self.classifier_topic(fea_reverse)
132
+
133
+ return output,output_topic,fea_fnd
FakeVD/code_test/models/SVFEND.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ import json
3
+ import os
4
+ import time
5
+
6
+ import numpy as np
7
+ import torch
8
+ import torch.nn as nn
9
+ import torch.nn.functional as F
10
+ import torchvision.transforms as transforms
11
+ import tqdm
12
+ from sklearn.metrics import *
13
+ from tqdm import tqdm
14
+ from transformers import AutoConfig, BertModel
15
+ from transformers.models.bert.modeling_bert import BertLayer
16
+ from zmq import device
17
+
18
+ from .coattention import *
19
+ from .layers import *
20
+ from FakeVD.code_test.utils.metrics import *
21
+
22
+
23
+ class SVFENDModel(torch.nn.Module):
24
+ def __init__(self,bert_model,fea_dim,dropout):
25
+ super(SVFENDModel, self).__init__()
26
+ self.bert = BertModel.from_pretrained("./FakeVD/Models/bert-base-chinese/").requires_grad_(False)
27
+
28
+ self.text_dim = 768
29
+ self.comment_dim = 768
30
+ self.img_dim = 4096
31
+ self.video_dim = 4096
32
+ self.num_frames = 83
33
+ self.num_audioframes = 50
34
+ self.num_comments = 23
35
+ self.dim = fea_dim
36
+ self.num_heads = 4
37
+
38
+ self.dropout = dropout
39
+
40
+ self.vggish_layer = torch.hub.load('./FakeVD/Models/torchvggish/', 'vggish', source = 'local')
41
+ net_structure = list(self.vggish_layer.children())
42
+ self.vggish_modified = nn.Sequential(*net_structure[-2:-1])
43
+
44
+ self.co_attention_ta = co_attention(d_k=fea_dim, d_v=fea_dim, n_heads=self.num_heads, dropout=self.dropout, d_model=fea_dim,
45
+ visual_len=self.num_audioframes, sen_len=512, fea_v=self.dim, fea_s=self.dim, pos=False)
46
+ self.co_attention_tv = co_attention(d_k=fea_dim, d_v=fea_dim, n_heads=self.num_heads, dropout=self.dropout, d_model=fea_dim,
47
+ visual_len=self.num_frames, sen_len=512, fea_v=self.dim, fea_s=self.dim, pos=False)
48
+ self.trm = nn.TransformerEncoderLayer(d_model = self.dim, nhead = 2, batch_first = True)
49
+
50
+
51
+ self.linear_text = nn.Sequential(torch.nn.Linear(self.text_dim, fea_dim), torch.nn.ReLU(),nn.Dropout(p=self.dropout))
52
+ self.linear_comment = nn.Sequential(torch.nn.Linear(self.comment_dim, fea_dim), torch.nn.ReLU(),nn.Dropout(p=self.dropout))
53
+ self.linear_img = nn.Sequential(torch.nn.Linear(self.img_dim, fea_dim), torch.nn.ReLU(),nn.Dropout(p=self.dropout))
54
+ self.linear_video = nn.Sequential(torch.nn.Linear(self.video_dim, fea_dim), torch.nn.ReLU(),nn.Dropout(p=self.dropout))
55
+ self.linear_intro = nn.Sequential(torch.nn.Linear(self.text_dim, fea_dim),torch.nn.ReLU(),nn.Dropout(p=self.dropout))
56
+ self.linear_audio = nn.Sequential(torch.nn.Linear(fea_dim, fea_dim), torch.nn.ReLU(),nn.Dropout(p=self.dropout))
57
+
58
+ self.classifier = nn.Linear(fea_dim,2)
59
+
60
+ def forward(self, **kwargs):
61
+
62
+ ### User Intro ###
63
+
64
+
65
+ ### Title ###
66
+ title_inputid = kwargs['title_inputid']#(batch,512)
67
+ title_mask=kwargs['title_mask']#(batch,512)
68
+
69
+ fea_text=self.bert(title_inputid,attention_mask=title_mask)['last_hidden_state']#(batch,sequence,768)
70
+ fea_text=self.linear_text(fea_text)
71
+
72
+ ### Audio Frames ###
73
+ audioframes=kwargs['audioframes']#(batch,36,12288)
74
+ audioframes_masks = kwargs['audioframes_masks']
75
+ fea_audio = self.vggish_modified(audioframes) #(batch, frames, 128)
76
+ fea_audio = self.linear_audio(fea_audio)
77
+ fea_audio, fea_text = self.co_attention_ta(v=fea_audio, s=fea_text, v_len=fea_audio.shape[1], s_len=fea_text.shape[1])
78
+ fea_audio = torch.mean(fea_audio, -2)
79
+
80
+ ### Image Frames ###
81
+ frames=kwargs['frames']#(batch,30,4096)
82
+ frames_masks = kwargs['frames_masks']
83
+ fea_img = self.linear_img(frames)
84
+ fea_img, fea_text = self.co_attention_tv(v=fea_img, s=fea_text, v_len=fea_img.shape[1], s_len=fea_text.shape[1])
85
+ fea_img = torch.mean(fea_img, -2)
86
+
87
+ fea_text = torch.mean(fea_text, -2)
88
+
89
+ ### C3D ###
90
+ c3d = kwargs['c3d'] # (batch, 36, 4096)
91
+ c3d_masks = kwargs['c3d_masks']
92
+ fea_video = self.linear_video(c3d) #(batch, frames, 128)
93
+ fea_video = torch.mean(fea_video, -2)
94
+
95
+ ### Comment ###
96
+
97
+ fea_text = fea_text.unsqueeze(1)
98
+ # fea_comments = fea_comments.unsqueeze(1)
99
+ fea_img = fea_img.unsqueeze(1)
100
+ fea_audio = fea_audio.unsqueeze(1)
101
+ fea_video = fea_video.unsqueeze(1)
102
+ # fea_intro = fea_intro.unsqueeze(1)
103
+
104
+ fea=torch.cat((fea_text, fea_audio, fea_video,fea_img),1) # (bs, 6, 128)
105
+ fea = self.trm(fea)
106
+ fea = torch.mean(fea, -2)
107
+
108
+ output = self.classifier(fea)
109
+
110
+ return output, fea
FakeVD/code_test/models/TikTec.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import nn
3
+
4
+ class MLP(nn.Module):
5
+ def __init__(self, input_dim, hidden_dims, output_dim, dropout):
6
+ super(MLP, self).__init__()
7
+ layers = list()
8
+ curr_dim = input_dim
9
+ for hidden_dim in hidden_dims:
10
+ layers.append(nn.Linear(curr_dim, hidden_dim))
11
+ layers.append(nn.BatchNorm1d(hidden_dim))
12
+ layers.append(nn.ReLU())
13
+ layers.append(nn.Dropout(p=dropout))
14
+ curr_dim = hidden_dim
15
+ layers.append(nn.Linear(curr_dim, output_dim))
16
+ self.mlp = nn.Sequential(*layers)
17
+
18
+ def forward(self, input):
19
+ return self.mlp(input)
20
+
21
+ class MaskAvg(nn.Module):
22
+ def __init__(self):
23
+ super(MaskAvg, self).__init__()
24
+
25
+ def forward(self, input, mask):
26
+ score = torch.ones((input.shape[0], input.shape[1]), device=input.device)
27
+ score = score.masked_fill(mask == 0, float('-inf'))
28
+ score = torch.softmax(score, dim=-1).unsqueeze(1)
29
+ output = torch.matmul(score, input).squeeze(1)
30
+ return output
31
+
32
+ class CVRL(nn.Module):
33
+ def __init__(self, d_w, d_f, obj_num, gru_dim):
34
+ super(CVRL, self).__init__()
35
+ self.gru = nn.GRU(d_w, gru_dim, batch_first=True, bidirectional=True)
36
+
37
+ self.linear_r = nn.Linear(d_f, 1)
38
+ self.linear_h = nn.Linear(2*gru_dim, obj_num)
39
+
40
+ def forward(self, caption_feature, visual_feature):
41
+ # IN: caption_feature: (bs, K, S, d_w), visual_feature: (bs, K, obj_num, d_f)
42
+ # OUT: frame_visual_rep: (bs, K, d_f)
43
+ encoded_caption, _ = self.gru(caption_feature.view(-1, caption_feature.shape[-2], caption_feature.shape[-1])) # (bs*K, S, 2*gru_dim)
44
+ encoded_caption = encoded_caption.view(-1, caption_feature.shape[-3], caption_feature.shape[-2], encoded_caption.shape[-1]) # (bs, K, S, 2*gru_dim)
45
+ frame_caption_rep = encoded_caption.max(dim=2).values # (bs, K, 2*gru_dim)
46
+
47
+ alpha = self.linear_r(visual_feature).squeeze() + self.linear_h(frame_caption_rep) # (bs, K, obj_num)
48
+ alpha = torch.softmax(torch.tanh(alpha), dim=-1).unsqueeze(dim=-2) # (bs, K, 1, obj_num)
49
+ frame_visual_rep = alpha.matmul(visual_feature) # (bs, K, 1, d_f)
50
+ frame_visual_rep = frame_visual_rep.squeeze() # (bs, K, d_f)
51
+ return frame_visual_rep
52
+
53
+ class ASRL(nn.Module):
54
+ def __init__(self, d_w, gru_dim):
55
+ super(ASRL, self).__init__()
56
+ self.gru = nn.GRU(d_w, gru_dim, batch_first=True, bidirectional=True)
57
+
58
+ def forward(self, asr_feature):
59
+ # IN: asr_feature: (bs, N, d_w)
60
+ # OUT: text_audio_rep: (bs, N, 2*gru_dim)
61
+ text_audio_rep, _ = self.gru(asr_feature)
62
+ return text_audio_rep
63
+
64
+ class VCIF(nn.Module):
65
+ def __init__(self, d_f, d_w, d_H, gru_f_dim, gru_w_dim, dropout):
66
+ super(VCIF, self).__init__()
67
+
68
+ self.param_D = nn.Parameter(torch.empty((d_f, d_w)))
69
+ self.param_Df = nn.Parameter(torch.empty((d_f, d_H)))
70
+ self.param_Dw = nn.Parameter(torch.empty((d_w, d_H)))
71
+ self.param_df = nn.Parameter(torch.empty(d_H))
72
+ self.param_dw = nn.Parameter(torch.empty(d_H))
73
+
74
+ self.gru_f = nn.GRU(d_f, gru_f_dim, batch_first=True)
75
+ self.gru_w = nn.GRU(d_w, gru_w_dim, batch_first=True)
76
+ self.mask_avg = MaskAvg()
77
+ self.dropout = nn.Dropout(p=dropout)
78
+
79
+ self.reset_parameters()
80
+
81
+ def reset_parameters(self):
82
+ nn.init.xavier_uniform_(self.param_D)
83
+ nn.init.xavier_uniform_(self.param_Df)
84
+ nn.init.xavier_uniform_(self.param_Dw)
85
+ nn.init.uniform_(self.param_df)
86
+ nn.init.uniform_(self.param_dw)
87
+
88
+ def forward(self, frame_visual_rep, text_audio_rep, mask_K, mask_N):
89
+ # IN: frame_visual_rep: (bs, K, d_f), text_audio_rep: (bs, N, d_w)
90
+ # OUT: video_rep: (bs, gru_f_dim + gru_w_dim)
91
+ affinity_matrix = torch.tanh(frame_visual_rep.matmul(self.param_D).matmul(text_audio_rep.transpose(-1, -2)))
92
+ affinity_matrix = self.dropout(affinity_matrix)
93
+
94
+ frame_co_att_map = torch.tanh(frame_visual_rep.matmul(self.param_Df) + affinity_matrix.matmul(text_audio_rep).matmul(self.param_Dw))
95
+ word_co_att_map = torch.tanh(text_audio_rep.matmul(self.param_Dw) + affinity_matrix.transpose(-1, -2).matmul(frame_visual_rep).matmul(self.param_Df))
96
+ frame_co_att_map = self.dropout(frame_co_att_map)
97
+ word_co_att_map = self.dropout(word_co_att_map)
98
+
99
+ frame_att_weight = torch.softmax(frame_co_att_map.matmul(self.param_df), dim=-1)
100
+ word_att_weight = torch.softmax(word_co_att_map.matmul(self.param_dw), dim=-1)
101
+
102
+ frame_visual_weighted_rep = frame_att_weight.unsqueeze(dim=-1) * frame_visual_rep
103
+ text_audio_weighted_rep = word_att_weight.unsqueeze(dim=-1) * text_audio_rep
104
+
105
+ encoded_visual_rep, _ = self.gru_f(frame_visual_weighted_rep)
106
+ encoded_speech_rep, _ = self.gru_w(text_audio_weighted_rep)
107
+
108
+ visual_rep = self.mask_avg(encoded_visual_rep, mask_K) # (bs, gru_f_dim)
109
+ speech_rep = self.mask_avg(encoded_speech_rep, mask_N) # (bs, gru_w_dim)
110
+
111
+ video_rep = torch.cat([visual_rep, speech_rep], dim=-1)
112
+ return video_rep
113
+
114
+ class TikTecModel(nn.Module):
115
+ def __init__(self, word_dim=300, mfcc_dim=650, visual_dim=1000, obj_num=45, CVRL_gru_dim=200, ASRL_gru_dim=500, VCIF_d_H=200, VCIF_gru_f_dim=200, VCIF_gru_w_dim=100, VCIF_dropout=0.2, MLP_hidden_dims=[512], MLP_dropout=0.2):
116
+ super(TikTecModel, self).__init__()
117
+ self.CVRL = CVRL(d_w=word_dim, d_f=visual_dim, obj_num=obj_num, gru_dim=CVRL_gru_dim)
118
+ self.ASRL = ASRL(d_w=(word_dim + mfcc_dim), gru_dim=ASRL_gru_dim)
119
+ self.VCIF = VCIF(d_f=visual_dim, d_w=2*ASRL_gru_dim, d_H=VCIF_d_H, gru_f_dim=VCIF_gru_f_dim, gru_w_dim=VCIF_gru_w_dim, dropout=VCIF_dropout)
120
+ self.MLP = MLP(VCIF_gru_f_dim + VCIF_gru_w_dim, MLP_hidden_dims, 2, MLP_dropout)
121
+
122
+ def forward(self, **kwargs):
123
+ # IN:
124
+ # caption_feature: (bs, K, S, word_dim) = (bs, 200, 100, 300)
125
+ # visual_feature: (bs, K, obj_num, visual_dim) = (bs, 200, 45, 1000)
126
+ # asr_feature: (bs, N, word_dim + mfcc_dim) = (bs, 500, 300 + 650)
127
+ # mask_K: (bs, K) = (bs, 200)
128
+ # mask_N: (bs, N) = (bs, 500)
129
+ # OUT: (bs, 2)
130
+ caption_feature = kwargs['caption_feature']
131
+ visual_feature = kwargs['visual_feature']
132
+ asr_feature = kwargs['asr_feature']
133
+ mask_K = kwargs['mask_K']
134
+ mask_N = kwargs['mask_N']
135
+
136
+ frame_visual_rep = self.CVRL(caption_feature, visual_feature)
137
+ text_audio_rep = self.ASRL(asr_feature)
138
+ video_rep = self.VCIF(frame_visual_rep, text_audio_rep, mask_K, mask_N)
139
+ output = self.MLP(video_rep)
140
+ return output
FakeVD/code_test/models/Trainer.py ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ import json
3
+ import os
4
+ import time
5
+ from tkinter import E
6
+
7
+ import numpy as np
8
+ import torch
9
+ import torch.nn as nn
10
+ import torch.nn.functional as F
11
+ import torchvision.transforms as transforms
12
+ import tqdm
13
+ from sklearn.metrics import *
14
+ from tqdm import tqdm
15
+ from transformers import BertModel
16
+ from FakeVD.code_test.utils.metrics import *
17
+ from zmq import device
18
+
19
+ from .coattention import *
20
+ from .layers import *
21
+
22
+
23
+
24
+ class Trainer():
25
+ def __init__(self,
26
+ model,
27
+ device,
28
+ lr,
29
+ dropout,
30
+ dataloaders,
31
+ weight_decay,
32
+ save_param_path,
33
+ writer,
34
+ epoch_stop,
35
+ epoches,
36
+ mode,
37
+ model_name,
38
+ event_num,
39
+ save_threshold = 0.0,
40
+ start_epoch = 0,
41
+ ):
42
+
43
+ self.model = model
44
+ self.device = device
45
+ self.mode = mode
46
+ self.model_name = model_name
47
+ self.event_num = event_num
48
+
49
+ self.dataloaders = dataloaders
50
+ self.start_epoch = start_epoch
51
+ self.num_epochs = epoches
52
+ self.epoch_stop = epoch_stop
53
+ self.save_threshold = save_threshold
54
+ self.writer = writer
55
+
56
+ if os.path.exists(save_param_path):
57
+ self.save_param_path = save_param_path
58
+ else:
59
+ self.save_param_path = os.makedirs(save_param_path)
60
+ self.save_param_path= save_param_path
61
+
62
+ self.lr = lr
63
+ self.weight_decay = weight_decay
64
+ self.dropout = dropout
65
+
66
+ self.criterion = nn.CrossEntropyLoss()
67
+
68
+
69
+ def train(self):
70
+
71
+ since = time.time()
72
+
73
+ self.model.cuda()
74
+
75
+ best_model_wts_test = copy.deepcopy(self.model.state_dict())
76
+ best_acc_test = 0.0
77
+ best_epoch_test = 0
78
+ is_earlystop = False
79
+
80
+ if self.mode == "eann":
81
+ best_acc_test_event = 0.0
82
+ best_epoch_test_event = 0
83
+
84
+ for epoch in range(self.start_epoch, self.start_epoch+self.num_epochs):
85
+ if is_earlystop:
86
+ break
87
+ print('-' * 50)
88
+ print('Epoch {}/{}'.format(epoch+1, self.start_epoch+self.num_epochs))
89
+ print('-' * 50)
90
+
91
+ p = float(epoch) / 100
92
+ lr = self.lr / (1. + 10 * p) ** 0.75
93
+ self.optimizer = torch.optim.Adam(params=self.model.parameters(), lr=lr)
94
+
95
+ for phase in ['train', 'test']:
96
+ if phase == 'train':
97
+ self.model.train()
98
+ else:
99
+ self.model.eval()
100
+ print('-' * 10)
101
+ print (phase.upper())
102
+ print('-' * 10)
103
+
104
+ running_loss_fnd = 0.0
105
+ running_loss = 0.0
106
+ tpred = []
107
+ tlabel = []
108
+
109
+ if self.mode == "eann":
110
+ running_loss_event = 0.0
111
+ tpred_event = []
112
+ tlabel_event = []
113
+
114
+ for batch in tqdm(self.dataloaders[phase]):
115
+ batch_data=batch
116
+ for k,v in batch_data.items():
117
+ batch_data[k]=v.cuda()
118
+ label = batch_data['label']
119
+ if self.mode == "eann":
120
+ label_event = batch_data['label_event']
121
+
122
+
123
+ with torch.set_grad_enabled(phase == 'train'):
124
+ if self.mode == "eann":
125
+ outputs, outputs_event,fea = self.model(**batch_data)
126
+ loss_fnd = self.criterion(outputs, label)
127
+ loss_event = self.criterion(outputs_event, label_event)
128
+ loss = loss_fnd + loss_event
129
+ _, preds = torch.max(outputs, 1)
130
+ _, preds_event = torch.max(outputs_event, 1)
131
+ else:
132
+ outputs,fea = self.model(**batch_data)
133
+ _, preds = torch.max(outputs, 1)
134
+ loss = self.criterion(outputs, label)
135
+
136
+ if phase == 'train':
137
+ loss.backward()
138
+ self.optimizer.step()
139
+ self.optimizer.zero_grad()
140
+
141
+ tlabel.extend(label.detach().cpu().numpy().tolist())
142
+ tpred.extend(preds.detach().cpu().numpy().tolist())
143
+ running_loss += loss.item() * label.size(0)
144
+
145
+ if self.mode == "eann":
146
+ tlabel_event.extend(label_event.detach().cpu().numpy().tolist())
147
+ tpred_event.extend(preds_event.detach().cpu().numpy().tolist())
148
+ running_loss_event += loss_event.item() * label_event.size(0)
149
+ running_loss_fnd += loss_fnd.item() * label.size(0)
150
+
151
+ epoch_loss = running_loss / len(self.dataloaders[phase].dataset)
152
+ print('Loss: {:.4f} '.format(epoch_loss))
153
+ results = metrics(tlabel, tpred)
154
+ print (results)
155
+ self.writer.add_scalar('Loss/'+phase, epoch_loss, epoch+1)
156
+ self.writer.add_scalar('Acc/'+phase, results['acc'], epoch+1)
157
+ self.writer.add_scalar('F1/'+phase, results['f1'], epoch+1)
158
+
159
+ if self.mode == "eann":
160
+ epoch_loss_fnd = running_loss_fnd / len(self.dataloaders[phase].dataset)
161
+ print('Loss_fnd: {:.4f} '.format(epoch_loss_fnd))
162
+ epoch_loss_event = running_loss_event / len(self.dataloaders[phase].dataset)
163
+ print('Loss_event: {:.4f} '.format(epoch_loss_event))
164
+ self.writer.add_scalar('Loss_fnd/'+phase, epoch_loss_fnd, epoch+1)
165
+ self.writer.add_scalar('Loss_event/'+phase, epoch_loss_event, epoch+1)
166
+
167
+ if phase == 'test':
168
+ if results['acc'] > best_acc_test:
169
+ best_acc_test = results['acc']
170
+ best_model_wts_test = copy.deepcopy(self.model.state_dict())
171
+ best_epoch_test = epoch+1
172
+ if best_acc_test > self.save_threshold:
173
+ torch.save(self.model.state_dict(), self.save_param_path + "_test_epoch" + str(best_epoch_test) + "_{0:.4f}".format(best_acc_test))
174
+ print ("saved " + self.save_param_path + "_test_epoch" + str(best_epoch_test) + "_{0:.4f}".format(best_acc_test) )
175
+ else:
176
+ if epoch-best_epoch_test >= self.epoch_stop-1:
177
+ is_earlystop = True
178
+ print ("early stopping...")
179
+
180
+ time_elapsed = time.time() - since
181
+ print('Training complete in {:.0f}m {:.0f}s'.format(
182
+ time_elapsed // 60, time_elapsed % 60))
183
+ print("Best model on test: epoch" + str(best_epoch_test) + "_" + str(best_acc_test))
184
+
185
+ if self.mode == "eann":
186
+ print("Event: Best model on test: epoch" + str(best_epoch_test_event) + "_" + str(best_acc_test_event))
187
+
188
+ self.model.load_state_dict(best_model_wts_test)
189
+ return self.test()
190
+
191
+
192
+ def test(self):
193
+ since = time.time()
194
+
195
+ self.model.cuda()
196
+ self.model.eval()
197
+
198
+ pred = []
199
+ label = []
200
+
201
+ if self.mode == "eann":
202
+ pred_event = []
203
+ label_event = []
204
+
205
+ for batch in tqdm(self.dataloaders['test']):
206
+ with torch.no_grad():
207
+ batch_data=batch
208
+ for k,v in batch_data.items():
209
+ batch_data[k]=v.cuda()
210
+ batch_label = batch_data['label']
211
+
212
+ if self.mode == "eann":
213
+ batch_label_event = batch_data['label_event']
214
+ batch_outputs, batch_outputs_event, fea = self.model(**batch_data)
215
+ _, batch_preds_event = torch.max(batch_outputs_event, 1)
216
+
217
+ label_event.extend(batch_label_event.detach().cpu().numpy().tolist())
218
+ pred_event.extend(batch_preds_event.detach().cpu().numpy().tolist())
219
+ else:
220
+ batch_outputs,fea = self.model(**batch_data)
221
+
222
+ _, batch_preds = torch.max(batch_outputs, 1)
223
+
224
+ label.extend(batch_label.detach().cpu().numpy().tolist())
225
+ pred.extend(batch_preds.detach().cpu().numpy().tolist())
226
+
227
+ print (get_confusionmatrix_fnd(np.array(pred), np.array(label)))
228
+ print (metrics(label, pred))
229
+
230
+ if self.mode == "eann" and self.model_name != "FANVM":
231
+ print ("event:")
232
+ print (accuracy_score(np.array(label_event), np.array(pred_event)))
233
+
234
+ return metrics(label, pred)
235
+
FakeVD/code_test/models/Trainer_3set.py ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ import json
3
+ import os
4
+ import time
5
+
6
+ import numpy as np
7
+ import torch
8
+ import torch.nn as nn
9
+ import torch.nn.functional as F
10
+ import torchvision.transforms as transforms
11
+ import tqdm
12
+ from sklearn.metrics import *
13
+ from tqdm import tqdm
14
+ from transformers import BertModel
15
+ from FakeVD.code_test.utils.metrics import *
16
+ from zmq import device
17
+
18
+ from .coattention import *
19
+ from .layers import *
20
+
21
+
22
+ class Trainer3():
23
+ def __init__(self,
24
+ model,
25
+ device,
26
+ lr,
27
+ dropout,
28
+ dataloaders,
29
+ weight_decay,
30
+ save_param_path,
31
+ writer,
32
+ epoch_stop,
33
+ epoches,
34
+ mode,
35
+ model_name,
36
+ event_num,
37
+ save_threshold = 0.0,
38
+ start_epoch = 0,
39
+ ):
40
+
41
+ self.model = model
42
+
43
+ self.device = device
44
+ self.mode = mode
45
+ self.model_name = model_name
46
+ self.event_num = event_num
47
+
48
+ self.dataloaders = dataloaders
49
+ self.start_epoch = start_epoch
50
+ self.num_epochs = epoches
51
+ self.epoch_stop = epoch_stop
52
+ self.save_threshold = save_threshold
53
+ self.writer = writer
54
+
55
+ if os.path.exists(save_param_path):
56
+ self.save_param_path = save_param_path
57
+ else:
58
+ self.save_param_path = os.makedirs(save_param_path)
59
+ self.save_param_path= save_param_path
60
+
61
+ self.lr = lr
62
+ self.weight_decay = weight_decay
63
+ self.dropout = dropout
64
+
65
+ self.criterion = nn.CrossEntropyLoss()
66
+
67
+
68
+ def train(self):
69
+
70
+ since = time.time()
71
+
72
+ self.model.cuda()
73
+
74
+ best_model_wts_val = copy.deepcopy(self.model.state_dict())
75
+ best_acc_val = 0.0
76
+ best_epoch_val = 0
77
+
78
+ is_earlystop = False
79
+
80
+ if self.mode == "eann":
81
+ best_acc_val_event = 0.0
82
+ best_epoch_val_event = 0
83
+
84
+ for epoch in range(self.start_epoch, self.start_epoch+self.num_epochs):
85
+ if is_earlystop:
86
+ break
87
+ print('-' * 50)
88
+ print('Epoch {}/{}'.format(epoch+1, self.start_epoch+self.num_epochs))
89
+ print('-' * 50)
90
+
91
+ p = float(epoch) / 100
92
+ lr = self.lr / (1. + 10 * p) ** 0.75
93
+ self.optimizer = torch.optim.Adam(params=self.model.parameters(), lr=lr)
94
+
95
+ for phase in ['train', 'val', 'test']:
96
+ if phase == 'train':
97
+ self.model.train()
98
+ else:
99
+ self.model.eval()
100
+ print('-' * 10)
101
+ print (phase.upper())
102
+ print('-' * 10)
103
+
104
+ running_loss_fnd = 0.0
105
+ running_loss = 0.0
106
+ tpred = []
107
+ tlabel = []
108
+
109
+ if self.mode == "eann":
110
+ running_loss_event = 0.0
111
+ tpred_event = []
112
+ tlabel_event = []
113
+
114
+ for batch in tqdm(self.dataloaders[phase]):
115
+ batch_data=batch
116
+ for k,v in batch_data.items():
117
+ batch_data[k]=v.cuda()
118
+ label = batch_data['label']
119
+ if self.mode == "eann":
120
+ label_event = batch_data['label_event']
121
+
122
+ self.optimizer.zero_grad()
123
+
124
+ with torch.set_grad_enabled(phase == 'train'):
125
+ if self.mode == "eann":
126
+ outputs, outputs_event,fea = self.model(**batch_data)
127
+ loss_fnd = self.criterion(outputs, label)
128
+ loss_event = self.criterion(outputs_event, label_event)
129
+ loss = loss_fnd + loss_event
130
+ _, preds = torch.max(outputs, 1)
131
+ _, preds_event = torch.max(outputs_event, 1)
132
+ else:
133
+ outputs,fea = self.model(**batch_data)
134
+ _, preds = torch.max(outputs, 1)
135
+ loss = self.criterion(outputs, label)
136
+
137
+ if phase == 'train':
138
+ loss.backward()
139
+ torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
140
+ self.optimizer.step()
141
+ self.optimizer.zero_grad()
142
+
143
+ tlabel.extend(label.detach().cpu().numpy().tolist())
144
+ tpred.extend(preds.detach().cpu().numpy().tolist())
145
+ running_loss += loss.item() * label.size(0)
146
+
147
+ if self.mode == "eann":
148
+ tlabel_event.extend(label_event.detach().cpu().numpy().tolist())
149
+ tpred_event.extend(preds_event.detach().cpu().numpy().tolist())
150
+ running_loss_event += loss_event.item() * label_event.size(0)
151
+ running_loss_fnd += loss_fnd.item() * label.size(0)
152
+
153
+ epoch_loss = running_loss / len(self.dataloaders[phase].dataset)
154
+ print('Loss: {:.4f} '.format(epoch_loss))
155
+ results = metrics(tlabel, tpred)
156
+ print (results)
157
+ self.writer.add_scalar('Loss/'+phase, epoch_loss, epoch+1)
158
+ self.writer.add_scalar('Acc/'+phase, results['acc'], epoch+1)
159
+ self.writer.add_scalar('F1/'+phase, results['f1'], epoch+1)
160
+
161
+ if self.mode == "eann":
162
+ epoch_loss_fnd = running_loss_fnd / len(self.dataloaders[phase].dataset)
163
+ print('Loss_fnd: {:.4f} '.format(epoch_loss_fnd))
164
+ epoch_loss_event = running_loss_event / len(self.dataloaders[phase].dataset)
165
+ print('Loss_event: {:.4f} '.format(epoch_loss_event))
166
+ self.writer.add_scalar('Loss_fnd/'+phase, epoch_loss_fnd, epoch+1)
167
+ self.writer.add_scalar('Loss_event/'+phase, epoch_loss_event, epoch+1)
168
+
169
+ if phase == 'val' and results['acc'] > best_acc_val:
170
+ best_acc_val = results['acc']
171
+ best_model_wts_val = copy.deepcopy(self.model.state_dict())
172
+ best_epoch_val = epoch+1
173
+ if best_acc_val > self.save_threshold:
174
+ torch.save(self.model.state_dict(), self.save_param_path + "_val_epoch" + str(best_epoch_val) + "_{0:.4f}".format(best_acc_val))
175
+ print ("saved " + self.save_param_path + "_val_epoch" + str(best_epoch_val) + "_{0:.4f}".format(best_acc_val) )
176
+ else:
177
+ if epoch-best_epoch_val >= self.epoch_stop-1:
178
+ is_earlystop = True
179
+ print ("early stopping...")
180
+
181
+ time_elapsed = time.time() - since
182
+ print('Training complete in {:.0f}m {:.0f}s'.format(
183
+ time_elapsed // 60, time_elapsed % 60))
184
+ print("Best model on val: epoch" + str(best_epoch_val) + "_" + str(best_acc_val))
185
+
186
+ if self.mode == "eann":
187
+ print("Event: Best model on val: epoch" + str(best_epoch_val_event) + "_" + str(best_acc_val_event))
188
+
189
+
190
+ self.model.load_state_dict(best_model_wts_val)
191
+
192
+ print ("test result when using best model on val")
193
+ return self.test()
194
+
195
+
196
+
197
+ def test(self):
198
+ since = time.time()
199
+
200
+ self.model.cuda()
201
+ self.model.eval()
202
+
203
+ pred = []
204
+ label = []
205
+
206
+ if self.mode == "eann":
207
+ pred_event = []
208
+ label_event = []
209
+
210
+ for batch in tqdm(self.dataloaders['test']):
211
+ with torch.no_grad():
212
+ batch_data=batch
213
+ for k,v in batch_data.items():
214
+ batch_data[k]=v.cuda()
215
+ batch_label = batch_data['label']
216
+
217
+ if self.mode == "eann":
218
+ batch_label_event = batch_data['label_event']
219
+ batch_outputs, batch_outputs_event, fea = self.model(**batch_data)
220
+ _, batch_preds_event = torch.max(batch_outputs_event, 1)
221
+
222
+ label_event.extend(batch_label_event.detach().cpu().numpy().tolist())
223
+ pred_event.extend(batch_preds_event.detach().cpu().numpy().tolist())
224
+ else:
225
+ batch_outputs,fea = self.model(**batch_data)
226
+
227
+ _, batch_preds = torch.max(batch_outputs, 1)
228
+
229
+ label.extend(batch_label.detach().cpu().numpy().tolist())
230
+ pred.extend(batch_preds.detach().cpu().numpy().tolist())
231
+
232
+
233
+ print (get_confusionmatrix_fnd(np.array(pred), np.array(label)))
234
+ print (metrics(label, pred))
235
+
236
+ if self.mode == "eann" and self.model_name != "FANVM":
237
+ print ("event:")
238
+ print (accuracy_score(np.array(label_event), np.array(pred_event)))
239
+
240
+ return metrics(label, pred)
241
+
FakeVD/code_test/models/coattention.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import torch.nn as nn
3
+
4
+ from .trm import *
5
+
6
+
7
+ class _MultiHeadAttention(nn.Module):
8
+ def __init__(self, d_k, d_v, d_model, n_heads, dropout):
9
+ super(_MultiHeadAttention, self).__init__()
10
+ self.d_k = d_k
11
+ self.d_v = d_v
12
+ self.d_model = d_model
13
+ self.n_heads = n_heads
14
+
15
+ self.w_q = Linear(d_model, d_k * n_heads)
16
+ self.w_k = Linear(d_model, d_k * n_heads)
17
+ self.w_v = Linear(d_model, d_v * n_heads)
18
+
19
+ def forward(self, q, k, v):
20
+ # q: [b_size x len_q x d_model]
21
+ # k: [b_size x len_k x d_model]
22
+ # v: [b_size x len_k x d_model]
23
+ b_size = q.size(0)
24
+
25
+ # q_s: [b_size x n_heads x len_q x d_k]
26
+ # k_s: [b_size x n_heads x len_k x d_k]
27
+ # v_s: [b_size x n_heads x len_k x d_v]
28
+ q_s = self.w_q(q).view(b_size, -1, self.n_heads, self.d_k).transpose(1, 2)
29
+ k_s = self.w_k(k).view(b_size, -1, self.n_heads, self.d_k).transpose(1, 2)
30
+ v_s = self.w_v(v).view(b_size, -1, self.n_heads, self.d_v).transpose(1, 2)
31
+ return q_s, k_s, v_s
32
+
33
+ class PoswiseFeedForwardNet(nn.Module):
34
+ def __init__(self, d_model, d_ff, dropout=0.1):
35
+ super(PoswiseFeedForwardNet, self).__init__()
36
+ self.relu = nn.ReLU()
37
+ self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
38
+ self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
39
+ self.dropout = nn.Dropout(dropout)
40
+ self.layer_norm = LayerNormalization(d_model)
41
+
42
+ def forward(self, inputs):
43
+ # inputs: [b_size x len_q x d_model]
44
+ residual = inputs
45
+ output = self.relu(self.conv1(inputs.transpose(1, 2)))
46
+
47
+ # outputs: [b_size x len_q x d_model]
48
+ output = self.conv2(output).transpose(1, 2)
49
+ output = self.dropout(output)
50
+
51
+ return self.layer_norm(residual + output)
52
+
53
+ class MultiHeadAttention(nn.Module):
54
+ def __init__(self, d_k, d_v, n_heads, dropout, d_model, visual_len, sen_len, fea_v, fea_s, pos):
55
+ super(MultiHeadAttention, self).__init__()
56
+ self.n_heads = n_heads
57
+ self.multihead_attn_v = _MultiHeadAttention(d_k, d_v, d_model, n_heads, dropout)
58
+ self.multihead_attn_s = _MultiHeadAttention(d_k, d_v, d_model, n_heads, dropout)
59
+ self.pos_emb_v = PosEncoding(visual_len * 10, d_model)
60
+ self.pos_emb_s = PosEncoding(sen_len * 10, d_model)
61
+ self.linear_v = nn.Linear(in_features=fea_v, out_features=d_model)
62
+ self.linear_s = nn.Linear(in_features=fea_s, out_features=d_model)
63
+ self.proj_v = Linear(n_heads * d_v, d_model)
64
+ self.proj_s = Linear(n_heads * d_v, d_model)
65
+ self.d_v = d_v
66
+ self.dropout = nn.Dropout(dropout)
67
+ self.layer_norm_v = LayerNormalization(d_model)
68
+ self.layer_norm_s = LayerNormalization(d_model)
69
+ self.attention = ScaledDotProductAttention(d_k, dropout)
70
+ self.pos = pos
71
+
72
+ def forward(self, v, s, v_len, s_len):
73
+ b_size = v.size(0)
74
+ # q: [b_size x len_q x d_model]
75
+ # k: [b_size x len_k x d_model]
76
+ # v: [b_size x len_v x d_model] note (len_k == len_v)
77
+ v, s = self.linear_v(v), self.linear_s(s)
78
+ if self.pos:
79
+ pos_v, pos_s = self.pos_emb_v(v_len), self.pos_emb_s(s_len)
80
+ residual_v, residual_s = v + pos_v, s + pos_s
81
+ else:
82
+ residual_v, residual_s = v, s
83
+ # context: a tensor of shape [b_size x len_q x n_heads * d_v]
84
+ q_v, k_v, v_v = self.multihead_attn_v(v, v, v)
85
+ q_s, k_s, v_s = self.multihead_attn_s(s, s, s)
86
+ context_v, attn_v = self.attention(q_v, k_s, v_s)
87
+ context_s, attn_s = self.attention(q_s, k_v, v_v)
88
+ context_v = context_v.transpose(1, 2).contiguous().view(b_size, -1, self.n_heads * self.d_v)
89
+ context_s = context_s.transpose(1, 2).contiguous().view(b_size, -1, self.n_heads * self.d_v)
90
+ # project back to the residual size, outputs: [b_size x len_q x d_model]
91
+ output_v = self.dropout(self.proj_v(context_v))
92
+ output_s = self.dropout(self.proj_s(context_s))
93
+ return self.layer_norm_v(residual_v + output_v), self.layer_norm_s(residual_s + output_s)
94
+
95
+ class co_attention(nn.Module):
96
+ def __init__(self, d_k, d_v, n_heads, dropout, d_model, visual_len, sen_len, fea_v, fea_s, pos):
97
+ super(co_attention, self).__init__()
98
+ # self.layer_num = layer_num
99
+ # self.multi_head = MultiHeadAttention(d_k=d_k, d_v=d_v, n_heads=n_heads, dropout=dropout, d_model=d_model,
100
+ # visual_len=visual_len, sen_len=sen_len, fea_v=fea_v, fea_s=fea_s, pos=False)
101
+ # self.PoswiseFeedForwardNet_v = nn.ModuleList([PoswiseFeedForwardNet(d_model=d_model, d_ff=256)])
102
+ # self.PoswiseFeedForwardNet_s = nn.ModuleList([PoswiseFeedForwardNet(d_model=d_model, d_ff=256)])
103
+ # self.multi_head = nn.ModuleList([MultiHeadAttention(d_k=d_k, d_v=d_v, n_heads=n_heads, dropout=dropout, d_model=d_model,
104
+ # visual_len=visual_len, sen_len=sen_len, fea_v=fea_v, fea_s=fea_s, pos=False)])
105
+ # for i in range(1, layer_num):
106
+ # self.PoswiseFeedForwardNet_v.append(PoswiseFeedForwardNet(d_model=d_model, d_ff=256))
107
+ # self.PoswiseFeedForwardNet_s.append(PoswiseFeedForwardNet(d_model=d_model, d_ff=256))
108
+ # self.multi_head.append(MultiHeadAttention(d_k=d_k, d_v=d_v, n_heads=n_heads, dropout=dropout, d_model=d_model,
109
+ # visual_len=visual_len, sen_len=sen_len, fea_v=d_model, fea_s=d_model, pos=True))
110
+ self.multi_head = MultiHeadAttention(d_k=d_k, d_v=d_v, n_heads=n_heads, dropout=dropout, d_model=d_model,
111
+ visual_len=visual_len, sen_len=sen_len, fea_v=fea_v, fea_s=fea_s, pos=pos)
112
+ self.PoswiseFeedForwardNet_v = PoswiseFeedForwardNet(d_model=d_model, d_ff=128, dropout=dropout)
113
+ self.PoswiseFeedForwardNet_s = PoswiseFeedForwardNet(d_model=d_model, d_ff=128,dropout=dropout)
114
+ def forward(self, v, s, v_len, s_len):
115
+ # for i in range(self.layer_num):
116
+ # v, s = self.multi_head[i](v, s, v_len, s_len)
117
+ # v = self.PoswiseFeedForwardNet_v[i](v)
118
+ # s = self.PoswiseFeedForwardNet_s[i](s)
119
+ v, s = self.multi_head(v, s, v_len, s_len)
120
+ v = self.PoswiseFeedForwardNet_v(v)
121
+ s = self.PoswiseFeedForwardNet_s(s)
122
+ return v, s
FakeVD/code_test/models/layers.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+
3
+ import numpy as np
4
+ import torch
5
+ import torch.nn as nn
6
+ import torch.nn.functional as F
7
+ from einops import rearrange
8
+ from torch.autograd import Function
9
+
10
+
11
+ class ReverseLayerF(Function):
12
+ @staticmethod
13
+ def forward(ctx, input_, alpha):
14
+ ctx.alpha = alpha
15
+ return input_
16
+
17
+ @staticmethod
18
+ def backward(ctx, grad_output):
19
+ output = grad_output.neg() * ctx.alpha
20
+ return output, None
21
+
22
+
23
+ class Attention(nn.Module):
24
+ def __init__(self, dim, heads = 2, dim_head = 64, dropout = 0.):
25
+ super().__init__()
26
+ inner_dim = dim_head * heads
27
+ project_out = not (heads == 1 and dim_head == dim)
28
+
29
+ self.heads = heads
30
+ self.scale = dim_head ** -0.5
31
+
32
+ self.attend = nn.Softmax(dim = -1)
33
+ self.to_qkv = nn.Linear(dim, inner_dim * 3, bias = False)
34
+
35
+ self.to_out = nn.Sequential(
36
+ nn.Linear(inner_dim, dim),
37
+ nn.Dropout(dropout)
38
+ ) if project_out else nn.Identity()
39
+
40
+ def forward(self, x):
41
+ qkv = self.to_qkv(x).chunk(3, dim = -1)
42
+ q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h = self.heads), qkv)
43
+
44
+ dots = torch.matmul(q, k.transpose(-1, -2)) * self.scale
45
+
46
+ attn = self.attend(dots)
47
+
48
+ out = torch.matmul(attn, v)
49
+ out = rearrange(out, 'b h n d -> b n (h d)')
50
+ return self.to_out(out)
51
+
52
+
53
+
54
+
FakeVD/code_test/models/trm.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import numpy as np
3
+ import torch
4
+ import torch.nn as nn
5
+ import torch.nn.init as init
6
+
7
+
8
+ class Linear(nn.Module):
9
+ def __init__(self, in_features, out_features, bias=True):
10
+ super(Linear, self).__init__()
11
+ self.linear = nn.Linear(in_features, out_features, bias=bias)
12
+ init.xavier_normal_(self.linear.weight)
13
+ init.zeros_(self.linear.bias)
14
+
15
+ def forward(self, inputs):
16
+ return self.linear(inputs)
17
+
18
+
19
+ class ScaledDotProductAttention(nn.Module):
20
+ def __init__(self, d_k, dropout=.1):
21
+ super(ScaledDotProductAttention, self).__init__()
22
+ self.scale_factor = np.sqrt(d_k)
23
+ self.softmax = nn.Softmax(dim=-1)
24
+ self.dropout = nn.Dropout(dropout)
25
+
26
+ def forward(self, q, k, v, attn_mask=None):
27
+ # q: [b_size x n_heads x len_q x d_k]
28
+ # k: [b_size x n_heads x len_k x d_k]
29
+ # v: [b_size x n_heads x len_v x d_v] note: (len_k == len_v)
30
+
31
+ # attn: [b_size x n_heads x len_q x len_k]
32
+ scores = torch.matmul(q, k.transpose(-1, -2)) / self.scale_factor
33
+ if attn_mask is not None:
34
+ assert attn_mask.size() == scores.size()
35
+ scores.masked_fill_(attn_mask, -1e9)
36
+ attn = self.dropout(self.softmax(scores))
37
+
38
+ # outputs: [b_size x n_heads x len_q x d_v]
39
+ context = torch.matmul(attn, v)
40
+
41
+ return context, attn
42
+
43
+
44
+ class LayerNormalization(nn.Module):
45
+ def __init__(self, d_hid, eps=1e-6):
46
+ super(LayerNormalization, self).__init__()
47
+ self.gamma = nn.Parameter(torch.ones(d_hid))
48
+ self.beta = nn.Parameter(torch.zeros(d_hid))
49
+ self.eps = eps
50
+
51
+ def forward(self, z):
52
+ mean = z.mean(dim=-1, keepdim=True,)
53
+ std = z.std(dim=-1, keepdim=True,)
54
+ ln_out = (z - mean) / (std + self.eps)
55
+ ln_out = self.gamma * ln_out + self.beta
56
+
57
+ return ln_out
58
+
59
+
60
+ class PosEncoding(nn.Module):
61
+ def __init__(self, max_seq_len, d_word_vec):
62
+ super(PosEncoding, self).__init__()
63
+ pos_enc = np.array(
64
+ [[pos / np.power(10000, 2.0 * (j // 2) / d_word_vec) for j in range(d_word_vec)]
65
+ for pos in range(max_seq_len)])
66
+ pos_enc[:, 0::2] = np.sin(pos_enc[:, 0::2])
67
+ pos_enc[:, 1::2] = np.cos(pos_enc[:, 1::2])
68
+ pad_row = np.zeros([1, d_word_vec])
69
+ pos_enc = np.concatenate([pad_row, pos_enc]).astype(np.float32)
70
+
71
+ # additional single row for PAD idx
72
+ self.pos_enc = nn.Embedding(max_seq_len + 1, d_word_vec)
73
+ # fix positional encoding: exclude weight from grad computation
74
+ self.pos_enc.weight = nn.Parameter(torch.from_numpy(pos_enc), requires_grad=False)
75
+ self.max_len = int(max_seq_len/10)
76
+ def forward(self, input_len):
77
+ max_len = self.max_len # torch.max(input_len)
78
+ tensor = torch.cuda.LongTensor if input_len.is_cuda else torch.LongTensor
79
+ input_pos = tensor([list(range(1, len+1)) + [0]*(max_len-len) for len in input_len])
80
+ return self.pos_enc(input_pos)
FakeVD/code_test/predict.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import torch.nn.functional as F
4
+ from torch.utils.data import DataLoader
5
+ import numpy as np
6
+
7
+ from tqdm import tqdm
8
+ from FakeVD.code_test.utils.metrics import *
9
+
10
+ from FakeVD.code_test.models.SVFEND import SVFENDModel
11
+ from FakeVD.code_test.utils.dataloader import SVFENDDataset
12
+ from FakeVD.code_test.run import _init_fn, SVFEND_collate_fn
13
+
14
+ # from VGGish_Feature_Extractor.my_vggish_folder_fun import vggish_audio
15
+ from FakeVD.code_test.VGGish_Feature_Extractor.my_vggish_fun import vggish_audio, load_model_vggish
16
+ from FakeVD.code_test.VGG19_Feature_Extractor.vgg19_feature import process_video as vgg19_frame
17
+ from FakeVD.code_test.VGG19_Feature_Extractor.vgg19_feature import load_model_vgg19
18
+ from FakeVD.code_test.C3D_Feature_Extractor.feature_extractor_vid import feature_extractor as c3d_video
19
+ from FakeVD.code_test.C3D_Feature_Extractor.feature_extractor_vid import load_model_c3d
20
+ from FakeVD.code_test.Text_Feature_Extractor.main import video_work as asr_text
21
+ from FakeVD.code_test.Text_Feature_Extractor.wav2text import wav2text
22
+
23
+ def load_model(checkpoint_path):
24
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
25
+ model = SVFENDModel(bert_model='bert-base-chinese', fea_dim=128,dropout=0.1)
26
+ # model.load_state_dict(torch.load(checkpoint_path))
27
+ model.load_state_dict(torch.load(checkpoint_path, map_location=device), False)
28
+ model.eval()
29
+ return model
30
+
31
+ def get_model(checkpoint_path='./FakeVD/code_test/checkpoints/SVFEND/SVFEND/_test_epoch4_0.7943'):
32
+ # 加载检测模型 模型存放路径 checkpoint_path
33
+ model_main = load_model(checkpoint_path)
34
+ model_vggish = load_model_vggish()
35
+ model_vgg19 = load_model_vgg19()
36
+ model_c3d = load_model_c3d()
37
+ model_text = wav2text()
38
+
39
+ models = {
40
+ 'model_main': model_main,
41
+ 'model_vggish': model_vggish,
42
+ 'model_vgg19': model_vgg19,
43
+ 'model_c3d' : model_c3d,
44
+ 'model_text' : model_text
45
+ }
46
+
47
+ return models
48
+
49
+
50
+
51
+ # label = 0 if item['annotation']=='真' else 1
52
+ def test(model, dataloader):
53
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
54
+ model.to(device)
55
+ # model.cuda()
56
+ model.eval()
57
+
58
+ pred = []
59
+ label = []
60
+ prob = []
61
+
62
+ for batch in tqdm(dataloader):
63
+ with torch.no_grad():
64
+ batch_data = batch
65
+ for k, v in batch_data.items():
66
+ batch_data[k] = v.to(device)
67
+ batch_label = batch_data['label']
68
+
69
+ batch_outputs, fea = model(**batch_data)
70
+
71
+ _, batch_preds = torch.max(batch_outputs, 1)
72
+
73
+ softmax_probs = F.softmax(batch_outputs, dim=1) # 计算softmax概率
74
+
75
+ label.extend(batch_label.detach().cpu().numpy().tolist())
76
+ pred.extend(batch_preds.detach().cpu().numpy().tolist())
77
+ prob.extend(softmax_probs.detach().cpu().numpy().tolist()) # 收集softmax概率
78
+
79
+ return (label, pred, prob)
80
+
81
+ def main(models,
82
+ video_file_path,
83
+ preprocessed_flag=False,
84
+ feature_path='./FakeVD/code_test/preprocessed_feature'):
85
+ # 视频是否已经过预处理 preprocessed_flag
86
+ # 特征存放目录 feature_path
87
+
88
+ # 获取模型
89
+ model_main = models['model_main']
90
+ model_vggish = models['model_vggish']
91
+ model_vgg19 = models['model_vgg19']
92
+ model_c3d = models['model_c3d']
93
+ model_text = models['model_text']
94
+
95
+ # 获取视频文件夹路径
96
+ video_folder_path = os.path.dirname(video_file_path)
97
+
98
+ # 获取视频文件名(包含扩展名)
99
+ video_file_name = os.path.basename(video_file_path)
100
+
101
+ # 提取视频文件名(不包括扩展名)作为视频ID
102
+ vids = []
103
+ vid = os.path.splitext(video_file_name)[0]
104
+ vids.append(vid)
105
+ # video_file_name = os.path.basename(video_file_path)
106
+ # vids.append(os.path.splitext(video_file_name)[0])
107
+ # # vids.append(video_file_name.split('_')[1].split('.')[0]
108
+
109
+ # VGGish_audio特征目录
110
+ VGGish_audio_feature_path = os.path.join(feature_path, vid+'.pkl')
111
+ # C3D_video特征目录
112
+ C3D_video_feature_path = os.path.join(feature_path, 'C3D/')
113
+ # VGG19_frame特征目录
114
+ VGG19_frame_feature_path = os.path.join(feature_path, 'VGG19/')
115
+ # ASR_text特征目录
116
+ asr_text_feature_path = os.path.join(feature_path, 'ASR/'+vid+'.json')
117
+
118
+ # 特征提取
119
+ if not preprocessed_flag:
120
+ vggish_audio(model_vggish, video_file_path, VGGish_audio_feature_path)
121
+ vgg19_frame(model_vgg19, video_file_name, video_folder_path, VGG19_frame_feature_path)
122
+ c3d_video(model_c3d, C3D_video_feature_path, video_folder_path, video_file_name)
123
+ asr_text(model_text, model_vggish, video_file_path, asr_text_feature_path)
124
+
125
+ # 数据路径
126
+ data = vids
127
+ data_paths = {
128
+ 'VGGish_audio' : VGGish_audio_feature_path,
129
+ 'C3D_video' : C3D_video_feature_path,
130
+ 'VGG19_frame' : VGG19_frame_feature_path,
131
+ 'ASR_text' : asr_text_feature_path
132
+ }
133
+
134
+ # 创建Dataset和DataLoader
135
+ dataset = SVFENDDataset(data, data_paths)
136
+
137
+ dataloader=DataLoader(dataset, batch_size=1,
138
+ num_workers=0,
139
+ pin_memory=True,
140
+ shuffle=False,
141
+ worker_init_fn=_init_fn,
142
+ collate_fn=SVFEND_collate_fn)
143
+
144
+ # 进行预测
145
+ predictions = test(model_main, dataloader)
146
+ annotation = '真' if predictions[1][0]==0 else '假'
147
+ prob_softmax = predictions[2]
148
+ # annotation_prob = max(prob_softmax[0])
149
+ annotation_prob = prob_softmax[0][0]#真的概率
150
+ annotation_prob1 = prob_softmax[0][1]#假的概率
151
+ # 打印预测结果
152
+ print(annotation, annotation_prob, annotation_prob1)
153
+
154
+ return annotation_prob1
155
+
156
+
157
+ if __name__ == "__main__":
158
+ # 视频是否已经过预处理
159
+ preprocessed_flag = False
160
+ video_file_path = "./FakeVD/dataset/videos_1/douyin_6700861687563570439.mp4"
161
+ models = get_model()
162
+ main(models, video_file_path, preprocessed_flag)
FakeVD/code_test/run.py ADDED
@@ -0,0 +1,500 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import collections
3
+ import json
4
+ import os
5
+ import time
6
+
7
+ import torch
8
+ from torch.utils.data import DataLoader
9
+ from torch.utils.tensorboard import SummaryWriter
10
+ # from gensim.models import KeyedVectors
11
+
12
+ from FakeVD.code_test.models.Baselines import *
13
+ from FakeVD.code_test.models.FANVM import FANVMModel
14
+ from FakeVD.code_test.models.SVFEND import SVFENDModel
15
+ from FakeVD.code_test.models.TikTec import TikTecModel
16
+
17
+ from FakeVD.code_test.utils.dataloader import *
18
+ from FakeVD.code_test.models.Trainer import Trainer
19
+ from FakeVD.code_test.models.Trainer_3set import Trainer3
20
+
21
+
22
+ def pad_sequence(seq_len,lst, emb):
23
+ result=[]
24
+ for video in lst:
25
+ if isinstance(video, list):
26
+ video = torch.stack(video)
27
+ ori_len=video.shape[0]
28
+ if ori_len == 0:
29
+ video = torch.zeros([seq_len,emb],dtype=torch.long)
30
+ elif ori_len>=seq_len:
31
+ if emb == 200:
32
+ video=torch.FloatTensor(video[:seq_len])
33
+ else:
34
+ video=torch.LongTensor(video[:seq_len])
35
+ else:
36
+ video=torch.cat([video,torch.zeros([seq_len-ori_len,video.shape[1]],dtype=torch.long)],dim=0)
37
+ if emb == 200:
38
+ video=torch.FloatTensor(video)
39
+ else:
40
+ video=torch.LongTensor(video)
41
+ result.append(video)
42
+ return torch.stack(result)
43
+
44
+ def pad_sequence_bbox(seq_len,lst):
45
+ result=[]
46
+ for video in lst:
47
+ if isinstance(video, list):
48
+ video = torch.stack(video)
49
+ ori_len=video.shape[0]
50
+ if ori_len == 0:
51
+ video = torch.zeros([seq_len,45,4096],dtype=torch.float)
52
+ elif ori_len>=seq_len:
53
+ video=torch.FloatTensor(video[:seq_len])
54
+ else:
55
+ video=torch.cat([video,torch.zeros([seq_len-ori_len,45,4096],dtype=torch.float)],dim=0)
56
+ result.append(video)
57
+ return torch.stack(result)
58
+
59
+ def pad_frame_sequence(seq_len,lst):
60
+ attention_masks = []
61
+ result=[]
62
+ for video in lst:
63
+ video=torch.FloatTensor(video)
64
+ ori_len=video.shape[0]
65
+ if ori_len>=seq_len:
66
+ gap=ori_len//seq_len
67
+ video=video[::gap][:seq_len]
68
+ mask = np.ones((seq_len))
69
+ else:
70
+ video=torch.cat((video,torch.zeros([seq_len-ori_len,video.shape[1]],dtype=torch.float)),dim=0)
71
+ mask = np.append(np.ones(ori_len), np.zeros(seq_len-ori_len))
72
+ result.append(video)
73
+ mask = torch.IntTensor(mask)
74
+ attention_masks.append(mask)
75
+ return torch.stack(result), torch.stack(attention_masks)
76
+
77
+
78
+ def _init_fn(worker_id):
79
+ np.random.seed(2022)
80
+
81
+ def SVFEND_collate_fn(batch):
82
+ num_frames = 83
83
+ num_audioframes = 50
84
+
85
+ title_inputid = [item['title_inputid'] for item in batch]
86
+ title_mask = [item['title_mask'] for item in batch]
87
+
88
+ frames = [item['frames'] for item in batch]
89
+ frames, frames_masks = pad_frame_sequence(num_frames, frames)
90
+
91
+ audioframes = [item['audioframes'] for item in batch]
92
+ audioframes, audioframes_masks = pad_frame_sequence(num_audioframes, audioframes)
93
+
94
+ c3d = [item['c3d'] for item in batch]
95
+ c3d, c3d_masks = pad_frame_sequence(num_frames, c3d)
96
+
97
+ label = [item['label'] for item in batch]
98
+
99
+ return {
100
+ 'label': torch.stack(label),
101
+ 'title_inputid': torch.stack(title_inputid),
102
+ 'title_mask': torch.stack(title_mask),
103
+ 'audioframes': audioframes,
104
+ 'audioframes_masks': audioframes_masks,
105
+ 'frames':frames,
106
+ 'frames_masks': frames_masks,
107
+ 'c3d': c3d,
108
+ 'c3d_masks': c3d_masks,
109
+ }
110
+
111
+ def FANVM_collate_fn(batch):
112
+ num_comments = 23
113
+ num_frames = 83
114
+
115
+ title_inputid = [item['title_inputid'] for item in batch]
116
+ title_mask = [item['title_mask'] for item in batch]
117
+
118
+ comments_like = [item['comments_like'] for item in batch]
119
+ comments_inputid = [item['comments_inputid'] for item in batch]
120
+ comments_mask = [item['comments_mask'] for item in batch]
121
+
122
+ comments_inputid_resorted = []
123
+ comments_mask_resorted = []
124
+ comments_like_resorted = []
125
+
126
+ for idx in range(len(comments_like)):
127
+ comments_like_one = comments_like[idx]
128
+ comments_inputid_one = comments_inputid[idx]
129
+ comments_mask_one = comments_mask[idx]
130
+ if comments_like_one.shape != torch.Size([0]):
131
+ comments_inputid_one, comments_mask_one, comments_like_one = (list(t) for t in zip(*sorted(zip(comments_inputid_one, comments_mask_one, comments_like_one), key=lambda s: s[2], reverse=True)))
132
+ comments_inputid_resorted.append(comments_inputid_one)
133
+ comments_mask_resorted.append(comments_mask_one)
134
+ comments_like_resorted.append(comments_like_one)
135
+
136
+ comments_inputid = pad_sequence(num_comments,comments_inputid_resorted,250)
137
+ comments_mask = pad_sequence(num_comments,comments_mask_resorted,250)
138
+ comments_like=[]
139
+ for idx in range(len(comments_like_resorted)):
140
+ comments_like_resorted_one = comments_like_resorted[idx]
141
+ if len(comments_like_resorted_one)>=num_comments:
142
+ comments_like.append(torch.tensor(comments_like_resorted_one[:num_comments]))
143
+ else:
144
+ if isinstance(comments_like_resorted_one, list):
145
+ comments_like.append(torch.tensor(comments_like_resorted_one+[0]*(num_comments-len(comments_like_resorted_one))))
146
+ else:
147
+ comments_like.append(torch.tensor(comments_like_resorted_one.tolist()+[0]*(num_comments-len(comments_like_resorted_one))))
148
+
149
+ frames = [item['frames'] for item in batch]
150
+ frames, frames_masks = pad_frame_sequence(num_frames, frames)
151
+ frame_thmub = [item['frame_thmub'] for item in batch]
152
+
153
+ label = [item['label'] for item in batch]
154
+ label_event = [item['label_event'] for item in batch]
155
+ s = [item['s'] for item in batch]
156
+
157
+ return {
158
+ 'label': torch.stack(label),
159
+ 'title_inputid': torch.stack(title_inputid),
160
+ 'title_mask': torch.stack(title_mask),
161
+ 'comments_inputid': comments_inputid,
162
+ 'comments_mask': comments_mask,
163
+ 'comments_like': torch.stack(comments_like),
164
+ 'frames':frames,
165
+ 'frames_masks': frames_masks,
166
+ 'frame_thmub': torch.stack(frame_thmub),
167
+ 's': torch.stack(s),
168
+ 'label_event':torch.stack(label_event),
169
+ }
170
+
171
+ def bbox_collate_fn(batch):
172
+ num_frames = 83
173
+
174
+ bbox_vgg = [item['bbox_vgg'] for item in batch]
175
+ bbox_vgg = pad_sequence_bbox(num_frames,bbox_vgg)
176
+
177
+ label = [item['label'] for item in batch]
178
+
179
+ return {
180
+ 'label': torch.stack(label),
181
+ 'bbox_vgg': bbox_vgg,
182
+ }
183
+
184
+ def c3d_collate_fn(batch):
185
+ num_frames = 83
186
+
187
+ c3d = [item['c3d'] for item in batch]
188
+ c3d, c3d_masks = pad_frame_sequence(num_frames, c3d)
189
+
190
+ label = [item['label'] for item in batch]
191
+
192
+ return {
193
+ 'label': torch.stack(label),
194
+ 'c3d': c3d,
195
+ 'c3d_masks': c3d_masks,
196
+ }
197
+
198
+ def vgg_collate_fn(batch):
199
+ num_frames = 83
200
+
201
+ frames = [item['frames'] for item in batch]
202
+ frames, frames_masks = pad_frame_sequence(num_frames, frames)
203
+
204
+ label = [item['label'] for item in batch]
205
+
206
+ return {
207
+ 'label': torch.stack(label),
208
+ 'frames':frames,
209
+ 'frames_masks': frames_masks,
210
+ }
211
+
212
+ def comments_collate_fn(batch):
213
+ num_comments = 23
214
+
215
+ comments_like = [item['comments_like'] for item in batch]
216
+ comments_inputid = [item['comments_inputid'] for item in batch]
217
+ comments_mask = [item['comments_mask'] for item in batch]
218
+
219
+ comments_inputid_resorted = []
220
+ comments_mask_resorted = []
221
+ comments_like_resorted = []
222
+
223
+ for idx in range(len(comments_like)):
224
+ comments_like_one = comments_like[idx]
225
+ comments_inputid_one = comments_inputid[idx]
226
+ comments_mask_one = comments_mask[idx]
227
+ if comments_like_one.shape != torch.Size([0]):
228
+ comments_inputid_one, comments_mask_one, comments_like_one = (list(t) for t in zip(*sorted(zip(comments_inputid_one, comments_mask_one, comments_like_one), key=lambda s: s[2], reverse=True)))
229
+ comments_inputid_resorted.append(comments_inputid_one)
230
+ comments_mask_resorted.append(comments_mask_one)
231
+ comments_like_resorted.append(comments_like_one)
232
+
233
+ comments_inputid = pad_sequence(num_comments,comments_inputid_resorted,250)
234
+ comments_mask = pad_sequence(num_comments,comments_mask_resorted,250)
235
+ comments_like=[]
236
+ for idx in range(len(comments_like_resorted)):
237
+ comments_like_resorted_one = comments_like_resorted[idx]
238
+ if len(comments_like_resorted_one)>=num_comments:
239
+ comments_like.append(torch.tensor(comments_like_resorted_one[:num_comments]))
240
+ else:
241
+ if isinstance(comments_like_resorted_one, list):
242
+ comments_like.append(torch.tensor(comments_like_resorted_one+[0]*(num_comments-len(comments_like_resorted_one))))
243
+ else:
244
+ comments_like.append(torch.tensor(comments_like_resorted_one.tolist()+[0]*(num_comments-len(comments_like_resorted_one))))
245
+
246
+ label = [item['label'] for item in batch]
247
+
248
+ return {
249
+ 'label': torch.stack(label),
250
+ 'comments_inputid': comments_inputid,
251
+ 'comments_mask': comments_mask,
252
+ 'comments_like': torch.stack(comments_like),
253
+ }
254
+
255
+ def title_w2v_collate_fn(batch):
256
+ length_title = 128
257
+ title_w2v = [item['title_w2v'] for item in batch]
258
+ title_w2v = pad_sequence(length_title, title_w2v, 100)
259
+
260
+ label = [item['label'] for item in batch]
261
+
262
+ return {
263
+ 'label': torch.stack(label),
264
+ 'title_w2v': title_w2v,
265
+ }
266
+
267
+ def tictec_collate_fn(batch):
268
+ """
269
+ 将一批样本组合成一个批次。
270
+
271
+ Args:
272
+ batch (list of dict): 包含单个样本的列表,每个样本是一个字典,包含 'label'、'caption_feature'、'visual_feature'、'asr_feature'、'mask_K' 和 'mask_N'。
273
+
274
+ Returns:
275
+ dict: 包含批次数据的字典,'labels' 是一个张量,其他特征和掩码也是张量。
276
+ """
277
+ num_frames = 83
278
+
279
+
280
+ labels = torch.stack([item['label'] for item in batch])
281
+ caption_features = torch.stack([item['caption_feature'] for item in batch])
282
+ visual_features = torch.stack([item['visual_feature'] for item in batch])
283
+ asr_features = torch.stack([item['asr_feature'] for item in batch])
284
+ mask_Ks = torch.stack([item['mask_K'] for item in batch])
285
+ mask_Ns = torch.stack([item['mask_N'] for item in batch])
286
+
287
+ return {
288
+ 'label': labels,
289
+ 'caption_feature': caption_features,
290
+ 'visual_feature': visual_features,
291
+ 'asr_feature': asr_features,
292
+ 'mask_K': mask_Ks,
293
+ 'mask_N': mask_Ns,
294
+ }
295
+
296
+
297
+ class Run():
298
+ def __init__(self,
299
+ config
300
+ ):
301
+
302
+ self.model_name = config['model_name']
303
+ self.mode_eval = config['mode_eval']
304
+ self.fold = config['fold']
305
+ self.data_type = 'SVFEND'
306
+
307
+ self.epoches = config['epoches']
308
+ self.batch_size = config['batch_size']
309
+ self.num_workers = config['num_workers']
310
+ self.epoch_stop = config['epoch_stop']
311
+ self.seed = config['seed']
312
+ self.device = config['device']
313
+ self.lr = config['lr']
314
+ self.lambd=config['lambd']
315
+ self.save_param_dir = config['path_param']
316
+ self.path_tensorboard = config['path_tensorboard']
317
+ self.dropout = config['dropout']
318
+ self.weight_decay = config['weight_decay']
319
+ self.event_num = 616
320
+ self.mode ='normal'
321
+
322
+
323
+ def get_dataloader(self,data_type,data_fold):
324
+ collate_fn=None
325
+
326
+ if data_type=='SVFEND':
327
+ dataset_train = SVFENDDataset(f'vid_fold_{1}.txt')
328
+ dataset_test = SVFENDDataset(f'vid_fold_{2}.txt')
329
+ collate_fn=SVFEND_collate_fn
330
+ elif data_type=='FANVM':
331
+ dataset_train = FANVMDataset_train(f'vid_fold_no_{data_fold}.txt')
332
+ dataset_test = FANVMDataset_test(path_vid_train=f'vid_fold_no_{data_fold}.txt', path_vid_test=f'vid_fold_{data_fold}.txt')
333
+ collate_fn = FANVM_collate_fn
334
+ elif data_type=='c3d':
335
+ dataset_train = C3DDataset(f'vid_fold_no_{data_fold}.txt')
336
+ dataset_test = C3DDataset(f'vid_fold_{data_fold}.txt')
337
+ collate_fn = c3d_collate_fn
338
+ elif data_type=='vgg':
339
+ dataset_train = VGGDataset(f'vid_fold_no_{data_fold}.txt')
340
+ dataset_test = VGGDataset(f'vid_fold_{data_fold}.txt')
341
+ collate_fn = vgg_collate_fn
342
+ elif data_type=='bbox':
343
+ dataset_train = BboxDataset('vid_fold_no1.txt')
344
+ dataset_test = BboxDataset('vid_fold_1.txt')
345
+ collate_fn = bbox_collate_fn
346
+ elif data_type=='comments':
347
+ dataset_train = CommentsDataset(f'vid_fold_no_{data_fold}.txt')
348
+ dataset_test = CommentsDataset(f'vid_fold_{data_fold}.txt')
349
+ collate_fn = comments_collate_fn
350
+ elif data_type=='TikTec':
351
+ dataset_train = TikTecDataset(f'vid_fold_no_{data_fold}.txt')
352
+ dataset_test = TikTecDataset(f'vid_fold_{data_fold}.txt')
353
+ collate_fn = tictec_collate_fn
354
+ # elif data_type=='w2v':
355
+ # wv_from_text = KeyedVectors.load_word2vec_format("./stores/tencent-ailab-embedding-zh-d100-v0.2.0-s/tencent-ailab-embedding-zh-d100-v0.2.0-s.txt", binary=False)
356
+ # dataset_train = Title_W2V_Dataset(f'vid_fold_no{data_fold}.txt', wv_from_text)
357
+ # dataset_test = Title_W2V_Dataset(f'vid_fold_{data_fold}.txt', wv_from_text)
358
+ # collate_fn = title_w2v_collate_fn
359
+
360
+ train_dataloader = DataLoader(dataset_train, batch_size=self.batch_size,
361
+ num_workers=self.num_workers,
362
+ pin_memory=True,
363
+ shuffle=True,
364
+ worker_init_fn=_init_fn,
365
+ collate_fn=collate_fn)
366
+
367
+ test_dataloader=DataLoader(dataset_test, batch_size=self.batch_size,
368
+ num_workers=self.num_workers,
369
+ pin_memory=True,
370
+ shuffle=False,
371
+ worker_init_fn=_init_fn,
372
+ collate_fn=collate_fn)
373
+
374
+ dataloaders = dict(zip(['train', 'test'],[train_dataloader, test_dataloader]))
375
+
376
+ return dataloaders
377
+
378
+
379
+ def get_dataloader_temporal(self, data_type):
380
+ collate_fn=None
381
+ if data_type=='SVFEND':
382
+ dataset_train = SVFENDDataset('vid_time3_train.txt')
383
+ dataset_val = SVFENDDataset('vid_time3_val.txt')
384
+ dataset_test = SVFENDDataset('vid_time3_test.txt')
385
+ collate_fn=SVFEND_collate_fn
386
+ elif data_type=='FANVM':
387
+ dataset_train = FANVMDataset_train('vid_time3_train.txt')
388
+ dataset_val = FANVMDataset_test(path_vid_train='vid_time3_train.txt', path_vid_test='vid_time3_valid.txt')
389
+ dataset_test = FANVMDataset_test(path_vid_train='vid_time3_train.txt', path_vid_test='vid_time3_test.txt')
390
+ collate_fn = FANVM_collate_fn
391
+ else:
392
+ # can be added
393
+ print ("Not available")
394
+
395
+ train_dataloader = DataLoader(dataset_train, batch_size=self.batch_size,
396
+ num_workers=self.num_workers,
397
+ pin_memory=True,
398
+ shuffle=True,
399
+ worker_init_fn=_init_fn,
400
+ collate_fn=collate_fn)
401
+ val_dataloader = DataLoader(dataset_val, batch_size=self.batch_size,
402
+ num_workers=self.num_workers,
403
+ pin_memory=True,
404
+ shuffle=False,
405
+ worker_init_fn=_init_fn,
406
+ collate_fn=collate_fn)
407
+ test_dataloader=DataLoader(dataset_test, batch_size=self.batch_size,
408
+ num_workers=self.num_workers,
409
+ pin_memory=True,
410
+ shuffle=False,
411
+ worker_init_fn=_init_fn,
412
+ collate_fn=collate_fn)
413
+
414
+ dataloaders = dict(zip(['train', 'val', 'test'],[train_dataloader, val_dataloader, test_dataloader]))
415
+
416
+ return dataloaders
417
+
418
+
419
+ def get_model(self):
420
+ if self.model_name == 'SVFEND':
421
+ self.model = SVFENDModel(bert_model='bert-base-chinese', fea_dim=128,dropout=self.dropout)
422
+ elif self.model_name == 'FANVM':
423
+ self.model = FANVMModel(bert_model='bert-base-chinese', fea_dim=128)
424
+ self.data_type = "FANVM"
425
+ self.mode = 'eann'
426
+ elif self.model_name == 'C3D':
427
+ self.model = bC3D(fea_dim=128)
428
+ self.data_type = "c3d"
429
+ elif self.model_name == 'VGG':
430
+ self.model = bVGG(fea_dim=128)
431
+ self.data_type = "vgg"
432
+ elif self.model_name == 'Bbox':
433
+ self.model = bBbox(fea_dim=128)
434
+ self.data_type = "bbox"
435
+ elif self.model_name == 'Vggish':
436
+ self.model = bVggish(fea_dim=128)
437
+ elif self.model_name == 'Bert':
438
+ self.model = bBert(bert_model='bert-base-chinese', fea_dim=128,dropout=self.dropout)
439
+ elif self.model_name == 'TextCNN':
440
+ self.model = bTextCNN(fea_dim=128, vocab_size=100)
441
+ self.data_type = "w2v"
442
+ elif self.model_name == 'Comments':
443
+ self.model = bComments(bert_model='bert-base-chinese', fea_dim=128)
444
+ self.data_type = "comments"
445
+ elif self.model_name == 'TikTec':
446
+ self.model = TikTecModel(VCIF_dropout=self.dropout, MLP_dropout=self.dropout)
447
+ self.data_type = 'TikTec'
448
+
449
+ return self.model
450
+
451
+
452
+ def main(self):
453
+ if self.mode_eval == "nocv":
454
+ self.model = self.get_model()
455
+ dataloaders = self.get_dataloader(data_type=self.data_type, data_fold=self.fold)
456
+ trainer = Trainer(model=self.model, device = self.device, lr = self.lr, dataloaders = dataloaders, epoches = self.epoches, dropout = self.dropout, weight_decay = self.weight_decay, mode = self.mode, model_name = self.model_name, event_num = self.event_num,
457
+ epoch_stop = self.epoch_stop, save_param_path = self.save_param_dir+self.data_type+"/"+self.model_name+"/", writer = SummaryWriter(self.path_tensorboard))
458
+ result=trainer.train()
459
+ for metric in ['acc', 'f1', 'precision', 'recall', 'auc']:
460
+ print ('%s : %.4f' % (metric, result[metric]))
461
+
462
+ elif self.mode_eval == "temporal":
463
+ self.model = self.get_model()
464
+ dataloaders = self.get_dataloader_temporal(data_type=self.data_type)
465
+ trainer = Trainer3(model=self.model, device = self.device, lr = self.lr, dataloaders = dataloaders, epoches = self.epoches, dropout = self.dropout, weight_decay = self.weight_decay, mode = self.mode, model_name = self.model_name, event_num = self.event_num,
466
+ epoch_stop = self.epoch_stop, save_param_path = self.save_param_dir+self.data_type+"/"+self.model_name+"/", writer = SummaryWriter(self.path_tensorboard))
467
+ result=trainer.train()
468
+ for metric in ['acc', 'f1', 'precision', 'recall', 'auc']:
469
+ print ('%s : %.4f' % (metric, result[metric]))
470
+ return result
471
+
472
+ elif self.mode_eval == "cv":
473
+ collate_fn=None
474
+ # if self.model_name == 'TextCNN':
475
+ # wv_from_text = KeyedVectors.load_word2vec_format("./stores/tencent-ailab-embedding-zh-d100-v0.2.0-s/tencent-ailab-embedding-zh-d100-v0.2.0-s.txt", binary=False)
476
+
477
+ history = collections.defaultdict(list)
478
+ for fold in range(1, 6):
479
+ print('-' * 50)
480
+ print ('fold %d:' % fold)
481
+ print('-' * 50)
482
+ self.model = self.get_model()
483
+ dataloaders = self.get_dataloader(data_type=self.data_type, data_fold=fold)
484
+ trainer = Trainer(model = self.model, device = self.device, lr = self.lr, dataloaders = dataloaders, epoches = self.epoches, dropout = self.dropout, weight_decay = self.weight_decay, mode = self.mode, model_name = self.model_name, event_num = self.event_num,
485
+ epoch_stop = self.epoch_stop, save_param_path = self.save_param_dir+self.data_type+"/"+self.model_name+"/", writer = SummaryWriter(self.path_tensorboard+"fold_"+str(fold)+"/"))
486
+
487
+ result = trainer.train()
488
+
489
+ history['auc'].append(result['auc'])
490
+ history['f1'].append(result['f1'])
491
+ history['recall'].append(result['recall'])
492
+ history['precision'].append(result['precision'])
493
+ history['acc'].append(result['acc'])
494
+
495
+ print ('results on 5-fold cross-validation: ')
496
+ for metric in ['acc', 'f1', 'precision', 'recall', 'auc']:
497
+ print ('%s : %.4f +/- %.4f' % (metric, np.mean(history[metric]), np.std(history[metric])))
498
+
499
+ else:
500
+ print ("Not Available")
FakeVD/code_test/test.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # cPickle是python2系列用的,3系列已经不用了,直接用pickle就好了
2
+ import pickle
3
+
4
+ # 重点是rb和r的区别,rb是打开2进制文件,文本文件用r
5
+ f = open('/mnt/data10t/dazuoye/GROUP2024-GEN6/FakeSV/code_test/preprocessed_feature/douyin_6559701594739313923.pkl','rb')
6
+ data = pickle.load(f)
7
+
8
+ # 打印前五个键值对
9
+ for i, (key, value) in enumerate(data.items()):
10
+ if i >= 2:
11
+ break
12
+ print(f"Key: {key}")
13
+ print(f"Value: {value}\n")