LEGIONM36 commited on
Commit
81d7389
·
verified ·
1 Parent(s): 1632fa6

Upload 4 files

Browse files
Files changed (4) hide show
  1. Two Stream Prototype1.pth +3 -0
  2. model.py +32 -0
  3. readme.md +22 -0
  4. train.py +260 -0
Two Stream Prototype1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dd0130b66c7581d9f8f2eb36693151bacfbee29ca7e182cee4b72a18bac8d9e
3
+ size 267603657
model.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torchvision.models.video as models
4
+
5
+ class TwoStreamNetwork(nn.Module):
6
+ def __init__(self):
7
+ super(TwoStreamNetwork, self).__init__()
8
+
9
+ # Stream 1: RGB
10
+ self.rgb_backbone = models.r3d_18(weights=None)
11
+ self.rgb_backbone.fc = nn.Identity() # Remove classification head
12
+
13
+ # Stream 2: Optical Flow
14
+ self.flow_backbone = models.r3d_18(weights=None)
15
+ self.flow_backbone.fc = nn.Identity()
16
+
17
+ # Fusion
18
+ # R3D_18 output dim is 512
19
+ self.fusion_fc = nn.Sequential(
20
+ nn.Linear(512 * 2, 512),
21
+ nn.ReLU(),
22
+ nn.Dropout(0.5),
23
+ nn.Linear(512, 2)
24
+ )
25
+
26
+ def forward(self, rgb, flow):
27
+ idx_rgb = self.rgb_backbone(rgb)
28
+ idx_flow = self.flow_backbone(flow)
29
+
30
+ combined = torch.cat((idx_rgb, idx_flow), dim=1)
31
+ out = self.fusion_fc(combined)
32
+ return out
readme.md ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Two-Stream Violence Detection Network
2
+
3
+ ## Model Architecture
4
+ - **Type**: Two-Stream Network (Spatial + Temporal)
5
+ - **Streams**:
6
+ 1. **RGB Stream**: ResNet3D (r3d_18) to process raw video frames. Captures appearance info.
7
+ 2. **Optical Flow Stream**: ResNet3D (r3d_18) to process computed dense optical flow. Captures motion info.
8
+ - **Fusion**: Features from both streams are concatenated and passed through fully connected layers.
9
+ - **Input**: 16 Frames (RGB) + 16 Flow Fields (Computed on the fly).
10
+ - **Computation**: Optical flow is computed using Farneback algorithm within the Dataloader.
11
+
12
+ ## Dataset Structure
13
+ Expects `Dataset` folder in parent directory.
14
+ ```
15
+ Dataset/
16
+ ├── violence/
17
+ └── no-violence/
18
+ ```
19
+
20
+ ## How to Run
21
+ 1. Install dependencies: `torch`, `opencv-python` (with contrib if needed for some algorithms, but Farneback is standard), `torchvision`.
22
+ 2. Run `python train.py`.
train.py ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import cv2
3
+ import numpy as np
4
+ import torch
5
+ import torch.nn as nn
6
+ import torch.optim as optim
7
+ from torch.utils.data import Dataset, DataLoader
8
+ from sklearn.model_selection import train_test_split
9
+ from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
10
+ import torchvision.models.video as models
11
+ import time
12
+ from model import TwoStreamNetwork
13
+
14
+ # --- Configuration ---
15
+ BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
16
+ DATASET_DIR = os.path.join(BASE_DIR, "Dataset")
17
+ MODEL_SAVE_PATH = "best_model_twostream.pth"
18
+
19
+ IMG_SIZE = 112
20
+ SEQ_LEN = 16
21
+ BATCH_SIZE = 16
22
+ EPOCHS = 80
23
+ LEARNING_RATE = 1e-4
24
+ PATIENCE = 5
25
+
26
+ class TwoStreamDataset(Dataset):
27
+ def __init__(self, video_paths, labels):
28
+ self.video_paths = video_paths
29
+ self.labels = labels
30
+
31
+ def __len__(self):
32
+ return len(self.video_paths)
33
+
34
+ def __getitem__(self, idx):
35
+ path = self.video_paths[idx]
36
+ label = self.labels[idx]
37
+
38
+ frames, flows = self._load_data(path)
39
+
40
+ # To Tensor (C, T, H, W)
41
+ frames = torch.tensor(frames, dtype=torch.float32).permute(3, 0, 1, 2)
42
+ flows = torch.tensor(flows, dtype=torch.float32).permute(3, 0, 1, 2)
43
+
44
+ return frames, flows, label
45
+
46
+ def _load_data(self, path):
47
+ cap = cv2.VideoCapture(path)
48
+ frames = []
49
+ try:
50
+ while True:
51
+ ret, frame = cap.read()
52
+ if not ret:
53
+ break
54
+ frame = cv2.resize(frame, (IMG_SIZE, IMG_SIZE))
55
+ frames.append(frame)
56
+ finally:
57
+ cap.release()
58
+
59
+ if len(frames) == 0:
60
+ dummy_f = np.zeros((SEQ_LEN, IMG_SIZE, IMG_SIZE, 3), dtype=np.float32)
61
+ dummy_opt = np.zeros((SEQ_LEN, IMG_SIZE, IMG_SIZE, 3), dtype=np.float32)
62
+ return dummy_f, dummy_opt
63
+
64
+ # Sampling
65
+ if len(frames) < SEQ_LEN:
66
+ while len(frames) < SEQ_LEN:
67
+ frames.append(frames[-1])
68
+ elif len(frames) > SEQ_LEN:
69
+ indices = np.linspace(0, len(frames)-1, SEQ_LEN, dtype=int)
70
+ frames = [frames[i] for i in indices]
71
+
72
+ rgb_frames = np.array(frames, dtype=np.float32) / 255.0
73
+
74
+ # Calculate Optical Flow (Dense)
75
+ # Use Farneback from OpenCV
76
+ prev_gray = cv2.cvtColor(frames[0], cv2.COLOR_BGR2GRAY)
77
+ flows = []
78
+ for i in range(len(frames)):
79
+ curr_gray = cv2.cvtColor(frames[i], cv2.COLOR_BGR2GRAY)
80
+ if i == 0:
81
+ # First frame has no flow, use zero
82
+ flow = np.zeros((IMG_SIZE, IMG_SIZE, 2), dtype=np.float32)
83
+ else:
84
+ flow = cv2.calcOpticalFlowFarneback(prev_gray, curr_gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
85
+ prev_gray = curr_gray
86
+
87
+ mag, _ = cv2.cartToPolar(flow[..., 0], flow[..., 1])
88
+ flow_img = np.dstack((flow[..., 0], flow[..., 1], mag))
89
+
90
+ flow_img = (flow_img + 20) / 40.0
91
+
92
+ flows.append(flow_img)
93
+
94
+ return rgb_frames, np.array(flows, dtype=np.float32)
95
+
96
+ # --- Data Preparation ---
97
+ def prepare_data():
98
+ violence_dir = os.path.join(DATASET_DIR, 'violence')
99
+ no_violence_dir = os.path.join(DATASET_DIR, 'no-violence')
100
+
101
+ if not os.path.exists(violence_dir) or not os.path.exists(no_violence_dir):
102
+ raise FileNotFoundError("Dataset directories not found.")
103
+
104
+ violence_files = [os.path.join(violence_dir, f) for f in os.listdir(violence_dir) if f.endswith('.avi') or f.endswith('.mp4')]
105
+ no_violence_files = [os.path.join(no_violence_dir, f) for f in os.listdir(no_violence_dir) if f.endswith('.avi') or f.endswith('.mp4')]
106
+
107
+ X = violence_files + no_violence_files
108
+ y = [1] * len(violence_files) + [0] * len(no_violence_files)
109
+
110
+ X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.30, random_state=42, stratify=y)
111
+ X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.50, random_state=42, stratify=y_temp)
112
+
113
+ return (X_train, y_train), (X_val, y_val), (X_test, y_test)
114
+
115
+ # --- Early Stopping ---
116
+ class EarlyStopping:
117
+ def __init__(self, patience=5, verbose=False, path='checkpoint.pth'):
118
+ self.patience = patience
119
+ self.verbose = verbose
120
+ self.counter = 0
121
+ self.best_score = None
122
+ self.early_stop = False
123
+ self.val_loss_min = np.inf
124
+ self.path = path
125
+
126
+ def __call__(self, val_loss, model):
127
+ score = -val_loss
128
+ if self.best_score is None:
129
+ self.best_score = score
130
+ self.save_checkpoint(val_loss, model)
131
+ elif score < self.best_score:
132
+ self.counter += 1
133
+ if self.verbose:
134
+ print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
135
+ if self.counter >= self.patience:
136
+ self.early_stop = True
137
+ else:
138
+ self.best_score = score
139
+ self.save_checkpoint(val_loss, model)
140
+ self.counter = 0
141
+
142
+ def save_checkpoint(self, val_loss, model):
143
+ if self.verbose:
144
+ print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')
145
+ torch.save(model, self.path)
146
+ self.val_loss_min = val_loss
147
+
148
+ if __name__ == "__main__":
149
+ start_time = time.time()
150
+
151
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
152
+ print(f"Using device: {device}")
153
+
154
+ try:
155
+ (X_train, y_train), (X_val, y_val), (X_test, y_test) = prepare_data()
156
+ print(f"Dataset Split Stats:")
157
+ print(f"Train: {len(X_train)} samples")
158
+ print(f"Val: {len(X_val)} samples")
159
+ print(f"Test: {len(X_test)} samples")
160
+ except Exception as e:
161
+ print(f"Data preparation failed: {e}")
162
+ exit(1)
163
+
164
+ train_dataset = TwoStreamDataset(X_train, y_train)
165
+ val_dataset = TwoStreamDataset(X_val, y_val)
166
+ test_dataset = TwoStreamDataset(X_test, y_test)
167
+
168
+ train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
169
+ val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
170
+ test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
171
+
172
+ model = TwoStreamNetwork().to(device)
173
+ criterion = nn.CrossEntropyLoss()
174
+ optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
175
+
176
+ early_stopping = EarlyStopping(patience=PATIENCE, verbose=True, path=MODEL_SAVE_PATH)
177
+
178
+ print("\nStarting Two-Stream Network Training...")
179
+
180
+ for epoch in range(EPOCHS):
181
+ model.train()
182
+ train_loss = 0.0
183
+ correct = 0
184
+ total = 0
185
+
186
+ for batch_idx, (rgb_in, flow_in, labels) in enumerate(train_loader):
187
+ rgb_in, flow_in, labels = rgb_in.to(device), flow_in.to(device), labels.to(device)
188
+
189
+ optimizer.zero_grad()
190
+ outputs = model(rgb_in, flow_in)
191
+ loss = criterion(outputs, labels)
192
+ loss.backward()
193
+ optimizer.step()
194
+
195
+ train_loss += loss.item()
196
+ _, predicted = torch.max(outputs.data, 1)
197
+ total += labels.size(0)
198
+ correct += (predicted == labels).sum().item()
199
+
200
+ if batch_idx % 10 == 0:
201
+ print(f"Epoch {epoch+1} Batch {batch_idx}/{len(train_loader)} Loss: {loss.item():.4f}", end='\r')
202
+
203
+ train_acc = 100 * correct / total
204
+ avg_train_loss = train_loss / len(train_loader)
205
+
206
+ model.eval()
207
+ val_loss = 0.0
208
+ correct_val = 0
209
+ total_val = 0
210
+
211
+ with torch.no_grad():
212
+ for rgb_in, flow_in, labels in val_loader:
213
+ rgb_in, flow_in, labels = rgb_in.to(device), flow_in.to(device), labels.to(device)
214
+ outputs = model(rgb_in, flow_in)
215
+ loss = criterion(outputs, labels)
216
+ val_loss += loss.item()
217
+ _, predicted = torch.max(outputs.data, 1)
218
+ total_val += labels.size(0)
219
+ correct_val += (predicted == labels).sum().item()
220
+
221
+ val_acc = 100 * correct_val / total_val
222
+ avg_val_loss = val_loss / len(val_loader)
223
+
224
+ print(f'\nEpoch [{epoch+1}/{EPOCHS}] '
225
+ f'Train Loss: {avg_train_loss:.4f} Acc: {train_acc:.2f}% '
226
+ f'Val Loss: {avg_val_loss:.4f} Acc: {val_acc:.2f}%')
227
+
228
+ early_stopping(avg_val_loss, model)
229
+ if early_stopping.early_stop:
230
+ print("Early stopping triggered")
231
+ break
232
+
233
+ print("\nLoading best Two-Stream model for evaluation...")
234
+ if os.path.exists(MODEL_SAVE_PATH):
235
+ model = torch.load(MODEL_SAVE_PATH)
236
+ else:
237
+ print("Warning: Model file not found.")
238
+
239
+ model.eval()
240
+ all_preds = []
241
+ all_labels = []
242
+
243
+ print("Evaluating on Test set...")
244
+ with torch.no_grad():
245
+ for rgb_in, flow_in, labels in test_loader:
246
+ rgb_in, flow_in, labels = rgb_in.to(device), flow_in.to(device), labels.to(device)
247
+ outputs = model(rgb_in, flow_in)
248
+ _, predicted = torch.max(outputs.data, 1)
249
+ all_preds.extend(predicted.cpu().numpy())
250
+ all_labels.extend(labels.cpu().numpy())
251
+
252
+ print("\n=== Two-Stream Model Evaluation Report ===")
253
+ print(classification_report(all_labels, all_preds, target_names=['No Violence', 'Violence']))
254
+ print("Confusion Matrix:")
255
+ print(confusion_matrix(all_labels, all_preds))
256
+ acc = accuracy_score(all_labels, all_preds)
257
+ print(f"\nFinal Test Accuracy: {acc*100:.2f}%")
258
+
259
+ elapsed = time.time() - start_time
260
+ print(f"\nTotal execution time: {elapsed/60:.2f} minutes")