LEGIONM36 commited on
Commit
283b625
·
verified ·
1 Parent(s): b081ac9

Upload 4 files

Browse files
Files changed (4) hide show
  1. best_model_slowfast.pth +3 -0
  2. model.py +112 -0
  3. readme.md +20 -0
  4. train.py +295 -0
best_model_slowfast.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a4912e91148f7a80cec9d10bb6b4c2ce2dde42f1923efecf8d7942ebee14e09
3
+ size 2321299
model.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ ALPHA = 8
5
+
6
+ class LateralConnection(nn.Module):
7
+ """
8
+ Fuses Fast pathway features into Slow pathway.
9
+ Transforms Fast features to match Slow features in temporal dimension.
10
+ """
11
+ def __init__(self, fast_channels, slow_channels, alpha=ALPHA):
12
+ super(LateralConnection, self).__init__()
13
+ # 3D Convolution to match duration and channels
14
+ # Kernel size usually (5, 1, 1) or (7, 1, 1) to pool temporal info
15
+ # Stride = (alpha, 1, 1) to match slow temporal dim
16
+ self.conv = nn.Conv3d(fast_channels, slow_channels * 2, kernel_size=(5, 1, 1), stride=(alpha, 1, 1), padding=(2, 0, 0), bias=False)
17
+
18
+ def forward(self, x_fast):
19
+ return self.conv(x_fast)
20
+
21
+ class SlowFastNetwork(nn.Module):
22
+ def __init__(self):
23
+ super(SlowFastNetwork, self).__init__()
24
+
25
+ # --- Fast Pathway (High Frame Rate, Low Channel Capacity) ---
26
+ # Input: (B, 3, 32, 112, 112)
27
+ self.fast_conv1 = nn.Conv3d(3, 8, kernel_size=(5, 7, 7), stride=(1, 2, 2), padding=(2, 3, 3), bias=False)
28
+ self.fast_bn1 = nn.BatchNorm3d(8)
29
+ self.fast_pool1 = nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1))
30
+ # Output: (B, 8, 32, 28, 28)
31
+
32
+ self.fast_conv2 = nn.Conv3d(8, 16, kernel_size=(3, 3, 3), stride=(1, 2, 2), padding=(1, 1, 1), bias=False)
33
+ self.fast_bn2 = nn.BatchNorm3d(16)
34
+ # Output: (B, 16, 32, 14, 14)
35
+
36
+ self.fast_conv3 = nn.Conv3d(16, 32, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1), bias=False)
37
+ self.fast_bn3 = nn.BatchNorm3d(32)
38
+ # Output: (B, 32, 32, 14, 14)
39
+
40
+ # --- Slow Pathway (Low Frame Rate, High Channel Capacity) ---
41
+ # Input: (B, 3, 4, 112, 112)
42
+ self.slow_conv1 = nn.Conv3d(3, 64, kernel_size=(1, 7, 7), stride=(1, 2, 2), padding=(0, 3, 3), bias=False)
43
+ self.slow_bn1 = nn.BatchNorm3d(64)
44
+ self.slow_pool1 = nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1))
45
+ # Output: (B, 64, 4, 28, 28)
46
+
47
+ self.slow_conv2 = nn.Conv3d(64 + 16, 128, kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1), bias=False)
48
+ self.slow_bn2 = nn.BatchNorm3d(128)
49
+ # Output: (B, 128, 4, 14, 14)
50
+
51
+ self.slow_conv3 = nn.Conv3d(128 + 64, 256, kernel_size=(1, 3, 3), stride=(1, 1, 1), padding=(0, 1, 1), bias=False)
52
+ self.slow_bn3 = nn.BatchNorm3d(256)
53
+ # Output: (B, 256, 4, 14, 14)
54
+
55
+ # --- Lateral Connections ---
56
+ # From Fast Stage 1 to Slow Stage 2 input
57
+ self.lateral1 = nn.Conv3d(8, 16, kernel_size=(5, 1, 1), stride=(ALPHA, 1, 1), padding=(2, 0, 0), bias=False)
58
+
59
+ # From Fast Stage 2 to Slow Stage 3 input
60
+ self.lateral2 = nn.Conv3d(16, 64, kernel_size=(5, 1, 1), stride=(ALPHA, 1, 1), padding=(2, 0, 0), bias=False)
61
+
62
+ self.relu = nn.ReLU(inplace=True)
63
+ self.avg_pool = nn.AdaptiveAvgPool3d((1, 1, 1))
64
+
65
+ # Classification
66
+ self.dropout = nn.Dropout(0.5)
67
+ self.fc = nn.Linear(32 + 256, 2) # Fast final channels (32) + Slow final channels (256)
68
+
69
+ def forward(self, slow_input, fast_input):
70
+ # Fast Pathway
71
+ f1 = self.relu(self.fast_bn1(self.fast_conv1(fast_input)))
72
+ f1_p = self.fast_pool1(f1)
73
+
74
+ # Slow Pathway Step 1
75
+ s1 = self.relu(self.slow_bn1(self.slow_conv1(slow_input)))
76
+ s1_p = self.slow_pool1(s1)
77
+
78
+ # Lateral Blend 1: Fuse Fast(f1_p) into Slow(s1_p)
79
+ # f1_p: (B, 8, 32, 28, 28) -> lateral -> (B, 16, 4, 28, 28)
80
+ # s1_p: (B, 64, 4, 28, 28)
81
+ # We concatenate features for this simple implementation
82
+ l1 = self.lateral1(f1_p)
83
+ s2_input = torch.cat([s1_p, l1], dim=1) # (64+16) channels
84
+
85
+ # Fast Stage 2
86
+ f2 = self.relu(self.fast_bn2(self.fast_conv2(f1_p)))
87
+
88
+ # Slow Stage 2
89
+ s2 = self.relu(self.slow_bn2(self.slow_conv2(s2_input)))
90
+
91
+ # Lateral Blend 2: Fuse Fast(f2) into Slow(s2)
92
+ # f2: (B, 16, 32, 14, 14) -> lateral -> (B, 64, 4, 14, 14)
93
+ # s2: (B, 128, 4, 14, 14)
94
+ l2 = self.lateral2(f2)
95
+ s3_input = torch.cat([s2, l2], dim=1) # (128+64) channels
96
+
97
+ # Fast Stage 3
98
+ f3 = self.relu(self.fast_bn3(self.fast_conv3(f2)))
99
+
100
+ # Slow Stage 3
101
+ s3 = self.relu(self.slow_bn3(self.slow_conv3(s3_input)))
102
+
103
+ # Global Pooling
104
+ f_out = self.avg_pool(f3).view(f3.size(0), -1) # B, 32
105
+ s_out = self.avg_pool(s3).view(s3.size(0), -1) # B, 256
106
+
107
+ # Concatenate pathways
108
+ x = torch.cat([s_out, f_out], dim=1)
109
+ x = self.dropout(x)
110
+ x = self.fc(x)
111
+
112
+ return x
readme.md ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SlowFast Network
2
+
3
+ ## Model Architecture
4
+ - **Type**: Two-Stream Pathway (Slow + Fast)
5
+ - **Fast Pathway**: High temporal resolution (all 32 frames), low channel capacity. Captures motion.
6
+ - **Slow Pathway**: Low temporal resolution (4 frames, stride 8), high channel capacity. Captures spatial details.
7
+ - **Fusion**: Lateral connections fuse Fast features into Slow pathway at multiple stages to integrate motion information.
8
+ - **Input**: 32 Frames.
9
+
10
+ ## Dataset Structure
11
+ Expects `Dataset` folder in parent directory.
12
+ ```
13
+ Dataset/
14
+ ├── violence/
15
+ └── no-violence/
16
+ ```
17
+
18
+ ## How to Run
19
+ 1. Install dependencies: `torch`, `opencv-python`, `scikit-learn`, `numpy`.
20
+ 2. Run `python train.py`.
train.py ADDED
@@ -0,0 +1,295 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import cv2
3
+ import numpy as np
4
+ import torch
5
+ import torch.nn as nn
6
+ import torch.optim as optim
7
+ from torch.utils.data import Dataset, DataLoader
8
+ from sklearn.model_selection import train_test_split
9
+ from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
10
+ import time
11
+ from model import SlowFastNetwork
12
+
13
+ # --- Configuration ---
14
+ BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
15
+ DATASET_DIR = os.path.join(BASE_DIR, "Dataset")
16
+ MODEL_SAVE_PATH = "best_model_slowfast.pth"
17
+
18
+ # Hyperparameters
19
+ IMG_SIZE = 112
20
+ SEQ_LEN = 32 # 32 frames for Fast path
21
+ ALPHA = 8 # Alpha to define Slow path (32/8 = 4 frames)
22
+ BATCH_SIZE = 16
23
+ EPOCHS = 80
24
+ LEARNING_RATE = 1e-4
25
+ PATIENCE = 5
26
+
27
+ # --- 1. Data Augmentation ---
28
+ def augment_video_frames(frames):
29
+ """
30
+ Apply augmentation to a sequence of frames.
31
+ """
32
+ augmented_frames = []
33
+
34
+ do_flip = np.random.random() > 0.5
35
+ do_rotate = np.random.random() > 0.5
36
+ angle = np.random.randint(-15, 15) if do_rotate else 0
37
+
38
+ brightness = np.random.uniform(0.8, 1.2)
39
+ contrast = np.random.uniform(0.8, 1.2)
40
+
41
+ for frame in frames:
42
+ new_frame = frame.copy()
43
+ if do_flip:
44
+ new_frame = cv2.flip(new_frame, 1)
45
+ if do_rotate:
46
+ (h, w) = new_frame.shape[:2]
47
+ center = (w // 2, h // 2)
48
+ M = cv2.getRotationMatrix2D(center, angle, 1.0)
49
+ new_frame = cv2.warpAffine(new_frame, M, (w, h))
50
+ new_frame = cv2.convertScaleAbs(new_frame, alpha=contrast, beta=(brightness-1)*50)
51
+ augmented_frames.append(new_frame)
52
+
53
+ return np.array(augmented_frames)
54
+
55
+ # --- Dataset Class ---
56
+ class SlowFastDataset(Dataset):
57
+ def __init__(self, video_paths, labels, alpha=ALPHA, transform=None, augment=False):
58
+ self.video_paths = video_paths
59
+ self.labels = labels
60
+ self.augment = augment
61
+ self.alpha = alpha
62
+
63
+ def __len__(self):
64
+ return len(self.video_paths)
65
+
66
+ def __getitem__(self, idx):
67
+ path = self.video_paths[idx]
68
+ label = self.labels[idx]
69
+
70
+ try:
71
+ frames = self._load_video(path)
72
+ except Exception as e:
73
+ print(f"Error loading {path}: {e}")
74
+ frames = np.zeros((SEQ_LEN, IMG_SIZE, IMG_SIZE, 3), dtype=np.uint8)
75
+
76
+ if self.augment:
77
+ frames = augment_video_frames(frames)
78
+
79
+ # Prepare inputs
80
+ # Frames shape: (T, H, W, C)
81
+
82
+ # Fast Pathway: Use all frames (High Temporal Resolution)
83
+ fast_frames = frames
84
+
85
+ # Slow Pathway: Use strided frames (Low Temporal Resolution, High Spatial focus conceptually)
86
+ # Stride = ALPHA
87
+ slow_idx = np.arange(0, SEQ_LEN, self.alpha)
88
+ slow_frames = frames[slow_idx]
89
+
90
+ # Preprocessing
91
+ fast_input = self._process_frames(fast_frames)
92
+ slow_input = self._process_frames(slow_frames)
93
+
94
+ return slow_input, fast_input, label
95
+
96
+ def _process_frames(self, frames):
97
+ # Convert to Tensor and Channel First
98
+ tensor = torch.tensor(frames, dtype=torch.float32)
99
+ tensor = tensor / 255.0
100
+ tensor = tensor.permute(3, 0, 1, 2) # (C, T, H, W)
101
+ return tensor
102
+
103
+ def _load_video(self, path):
104
+ cap = cv2.VideoCapture(path)
105
+ frames = []
106
+ try:
107
+ while True:
108
+ ret, frame = cap.read()
109
+ if not ret:
110
+ break
111
+ frame = cv2.resize(frame, (IMG_SIZE, IMG_SIZE))
112
+ frames.append(frame)
113
+ finally:
114
+ cap.release()
115
+
116
+ if len(frames) == 0:
117
+ return np.zeros((SEQ_LEN, IMG_SIZE, IMG_SIZE, 3), dtype=np.uint8)
118
+
119
+ if len(frames) < SEQ_LEN:
120
+ while len(frames) < SEQ_LEN:
121
+ frames.append(frames[-1])
122
+ elif len(frames) > SEQ_LEN:
123
+ # Uniform sampling
124
+ indices = np.linspace(0, len(frames)-1, SEQ_LEN, dtype=int)
125
+ frames = [frames[i] for i in indices]
126
+
127
+ return np.array(frames)
128
+
129
+ # --- 2. Data Splitting ---
130
+ def prepare_data():
131
+ violence_dir = os.path.join(DATASET_DIR, 'violence')
132
+ no_violence_dir = os.path.join(DATASET_DIR, 'no-violence')
133
+
134
+ if not os.path.exists(violence_dir) or not os.path.exists(no_violence_dir):
135
+ raise FileNotFoundError(f"Dataset directories not found.")
136
+
137
+ violence_files = [os.path.join(violence_dir, f) for f in os.listdir(violence_dir) if f.endswith('.avi') or f.endswith('.mp4')]
138
+ no_violence_files = [os.path.join(no_violence_dir, f) for f in os.listdir(no_violence_dir) if f.endswith('.avi') or f.endswith('.mp4')]
139
+
140
+ X = violence_files + no_violence_files
141
+ y = [1] * len(violence_files) + [0] * len(no_violence_files)
142
+
143
+ X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.30, random_state=42, stratify=y)
144
+ X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.50, random_state=42, stratify=y_temp)
145
+
146
+ return (X_train, y_train), (X_val, y_val), (X_test, y_test)
147
+
148
+ # --- 4. Early Stopping ---
149
+ class EarlyStopping:
150
+ def __init__(self, patience=5, verbose=False, path='checkpoint.pth'):
151
+ self.patience = patience
152
+ self.verbose = verbose
153
+ self.counter = 0
154
+ self.best_score = None
155
+ self.early_stop = False
156
+ self.val_loss_min = np.inf
157
+ self.path = path
158
+
159
+ def __call__(self, val_loss, model):
160
+ score = -val_loss
161
+ if self.best_score is None:
162
+ self.best_score = score
163
+ self.save_checkpoint(val_loss, model)
164
+ elif score < self.best_score:
165
+ self.counter += 1
166
+ if self.verbose:
167
+ print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
168
+ if self.counter >= self.patience:
169
+ self.early_stop = True
170
+ else:
171
+ self.best_score = score
172
+ self.save_checkpoint(val_loss, model)
173
+ self.counter = 0
174
+
175
+ def save_checkpoint(self, val_loss, model):
176
+ if self.verbose:
177
+ print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')
178
+ torch.save(model, self.path)
179
+ self.val_loss_min = val_loss
180
+
181
+ # --- Main Execution ---
182
+ if __name__ == "__main__":
183
+ start_time = time.time()
184
+
185
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
186
+ print(f"Using device: {device}")
187
+
188
+ try:
189
+ (X_train, y_train), (X_val, y_val), (X_test, y_test) = prepare_data()
190
+ print(f"Dataset Split Stats:")
191
+ print(f"Train: {len(X_train)} samples")
192
+ print(f"Val: {len(X_val)} samples")
193
+ print(f"Test: {len(X_test)} samples")
194
+ except Exception as e:
195
+ print(f"Data preparation failed: {e}")
196
+ exit(1)
197
+
198
+ train_dataset = SlowFastDataset(X_train, y_train, augment=True)
199
+ val_dataset = SlowFastDataset(X_val, y_val, augment=False)
200
+ test_dataset = SlowFastDataset(X_test, y_test, augment=False)
201
+
202
+ train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
203
+ val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
204
+ test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
205
+
206
+ model = SlowFastNetwork().to(device)
207
+ criterion = nn.CrossEntropyLoss()
208
+ optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
209
+
210
+ early_stopping = EarlyStopping(patience=PATIENCE, verbose=True, path=MODEL_SAVE_PATH)
211
+
212
+ print("\nStarting SlowFast Training...")
213
+
214
+ for epoch in range(EPOCHS):
215
+ model.train()
216
+ train_loss = 0.0
217
+ correct = 0
218
+ total = 0
219
+
220
+ for batch_idx, (slow_in, fast_in, labels) in enumerate(train_loader):
221
+ slow_in, fast_in, labels = slow_in.to(device), fast_in.to(device), labels.to(device)
222
+
223
+ optimizer.zero_grad()
224
+ outputs = model(slow_in, fast_in)
225
+ loss = criterion(outputs, labels)
226
+ loss.backward()
227
+ optimizer.step()
228
+
229
+ train_loss += loss.item()
230
+ _, predicted = torch.max(outputs.data, 1)
231
+ total += labels.size(0)
232
+ correct += (predicted == labels).sum().item()
233
+
234
+ if batch_idx % 10 == 0:
235
+ print(f"Epoch {epoch+1} Batch {batch_idx}/{len(train_loader)} Loss: {loss.item():.4f}", end='\r')
236
+
237
+ train_acc = 100 * correct / total
238
+ avg_train_loss = train_loss / len(train_loader)
239
+
240
+ # Validation
241
+ model.eval()
242
+ val_loss = 0.0
243
+ correct_val = 0
244
+ total_val = 0
245
+
246
+ with torch.no_grad():
247
+ for slow_in, fast_in, labels in val_loader:
248
+ slow_in, fast_in, labels = slow_in.to(device), fast_in.to(device), labels.to(device)
249
+ outputs = model(slow_in, fast_in)
250
+ loss = criterion(outputs, labels)
251
+ val_loss += loss.item()
252
+ _, predicted = torch.max(outputs.data, 1)
253
+ total_val += labels.size(0)
254
+ correct_val += (predicted == labels).sum().item()
255
+
256
+ val_acc = 100 * correct_val / total_val
257
+ avg_val_loss = val_loss / len(val_loader)
258
+
259
+ print(f'\nEpoch [{epoch+1}/{EPOCHS}] '
260
+ f'Train Loss: {avg_train_loss:.4f} Acc: {train_acc:.2f}% '
261
+ f'Val Loss: {avg_val_loss:.4f} Acc: {val_acc:.2f}%')
262
+
263
+ early_stopping(avg_val_loss, model)
264
+ if early_stopping.early_stop:
265
+ print("Early stopping triggered")
266
+ break
267
+
268
+ print("\nLoading best SlowFast model for evaluation...")
269
+ if os.path.exists(MODEL_SAVE_PATH):
270
+ model = torch.load(MODEL_SAVE_PATH)
271
+ else:
272
+ print("Warning: Using last epoch model.")
273
+
274
+ model.eval()
275
+ all_preds = []
276
+ all_labels = []
277
+
278
+ print("Evaluating on Test set...")
279
+ with torch.no_grad():
280
+ for slow_in, fast_in, labels in test_loader:
281
+ slow_in, fast_in, labels = slow_in.to(device), fast_in.to(device), labels.to(device)
282
+ outputs = model(slow_in, fast_in)
283
+ _, predicted = torch.max(outputs.data, 1)
284
+ all_preds.extend(predicted.cpu().numpy())
285
+ all_labels.extend(labels.cpu().numpy())
286
+
287
+ print("\n=== SlowFast Model Evaluation Report ===")
288
+ print(classification_report(all_labels, all_preds, target_names=['No Violence', 'Violence']))
289
+ print("Confusion Matrix:")
290
+ print(confusion_matrix(all_labels, all_preds))
291
+ acc = accuracy_score(all_labels, all_preds)
292
+ print(f"\nFinal Test Accuracy: {acc*100:.2f}%")
293
+
294
+ elapsed = time.time() - start_time
295
+ print(f"\nTotal execution time: {elapsed/60:.2f} minutes")