makiisthebes commited on
Commit
4ec6f12
1 Parent(s): 7471e05

Upload 18 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Cognitive[[:space:]]Robotics[[:space:]]Lit[[:space:]]Review.docx filter=lfs diff=lfs merge=lfs -text
37
+ PresentationFinal.pdf filter=lfs diff=lfs merge=lfs -text
38
+ PresentationFinal.pptx filter=lfs diff=lfs merge=lfs -text
39
+ video1.mp4 filter=lfs diff=lfs merge=lfs -text
Cognitive Robotics Lit Review.docx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f4013d74a3d0338d39e0d9cb76445f68269d81bb9a810d915aafb3fd1c1d1e8
3
+ size 1758570
PresentationFinal.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ec1857be3a65a35e3725886e11257c258c850d01c99cea7df70cd8c43504439
3
+ size 1890972
PresentationFinal.pptx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f8778fe1522e8cff628009e656b4f216981953b28faef2f0a17f18a4822aa62
3
+ size 21281231
alexnet_2.0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60e26575481de7adc0c4002a571c50aa0c7a8fcbdd147771b14be34befd0219a
3
+ size 14995442
alexnet_cognitive.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7a3223076a2081ff3bec8174f3961054a222d0a5295ba9cadad88a044efdc11
3
+ size 187026089
alexnet_cognitive_gap.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d158f502c1b7f68d8dfdb9f26964c016b110b3571ee9e3b5425c8912dacf2437
3
+ size 86362865
best_model.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 85
best_model_2.0.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 93.11740890688259
data_formater.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Convert the data to dataloader formater.
2
+ import os
3
+
4
+
5
+ # Check root directory exists,
6
+ # If not, create it.
7
+ if not os.path.exists("dataset/root"):
8
+ os.makedirs("dataset/root")
9
+
10
+
11
+
12
+
13
+
14
+ # Check if the labels.csv file exists, if it does, delete it.
15
+ if os.path.exists("dataset/root/labels.csv"):
16
+ os.remove("dataset/root/labels.csv")
17
+
18
+ # Create a labels csv file.
19
+ print("Creating labels.csv file.")
20
+ classes_to_model_output = {"left": 0, "right": 1}
21
+ with open("dataset/root/labels.csv", "w") as file:
22
+ # file.write("image,class\n")
23
+ classes = ["left", "right"]
24
+ for class_name in classes:
25
+ image_files = os.listdir(os.path.join("dataset", class_name))
26
+ for image in image_files:
27
+ file.write(f"{image},{classes_to_model_output[class_name]}\n")
28
+
29
+
30
+ print("Creating uniform image dataset.")
31
+ # Create a uniform image dataset, named train
32
+
33
+
34
+ if not os.path.exists("dataset/root/train"):
35
+ os.makedirs("dataset/root/train")
36
+
37
+ # Copy the images to the root directory.
38
+ for class_name in classes:
39
+ image_files = os.listdir(os.path.join("dataset", class_name))
40
+ for image in image_files:
41
+ os.system(f"cp dataset/{class_name}/{image} dataset/root/train/{image}")
42
+
dataset_creation.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Dataset creation for usage from the videos based on the video preprocessing required.
2
+ from torch.utils.data import DataLoader
3
+ from torchvision.io import read_image
4
+ from torch.utils.data import Dataset
5
+ from torchvision.transforms import v2
6
+ from torchvision import transforms
7
+ from torchvision import datasets
8
+ from PIL import Image
9
+ import pandas as pd
10
+ import idx2numpy, os
11
+ import torch
12
+
13
+ # Dataset creation,
14
+ # Loading from a custom dataset
15
+ IMAGE_DIMS = 224
16
+
17
+ normal_transforms = v2.Compose([
18
+ v2.Resize(size=(IMAGE_DIMS, IMAGE_DIMS)),
19
+ # convert to rgb from greyscale.
20
+ # v2.RandomHorizontalFlip(p=0.5),
21
+ v2.ToDtype(torch.float32), # , scale=True),
22
+ # v2.RandomPerspective(distortion_scale=0.6, p=0.4),
23
+ # v2.GaussianBlur(kernel_size=(5, 11), sigma=(0.1, 0.2)),
24
+ v2.RandomRotation(degrees=(-15, 15)),
25
+ # v2.RandomAffine(degrees=(-15, 15)),
26
+ # v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
27
+ transforms.Normalize((0.13066047430038452,), (0.30810782313346863,)),
28
+ ])
29
+
30
+
31
+
32
+
33
+ class CustomImageDataset(Dataset):
34
+ """
35
+ This class must inherit from the torch.utils.data.Dataset class.
36
+ And contina functions __init__, __len__, and __getitem__.
37
+ """
38
+ def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
39
+ self.img_labels = pd.read_csv(annotations_file)
40
+ self.img_dir = img_dir
41
+ self.transform = transform
42
+ self.target_transform = target_transform
43
+
44
+
45
+ def __len__(self):
46
+ return len(self.img_labels)
47
+
48
+ def __getitem__(self, idx):
49
+ """Get the image and label at the index idx."""
50
+ img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
51
+ Image.open(img_path).convert("RGB").save(img_path)
52
+ image = read_image(img_path)
53
+ label = self.img_labels.iloc[idx, 1]
54
+ if self.transform:
55
+ image = self.transform(image)
56
+ if self.target_transform:
57
+ label = self.target_transform(label)
58
+ return image, label
59
+
60
+
61
+ train_data = CustomImageDataset("./dataset/root/labels.csv", "./dataset/root/train/", transform=normal_transforms)
62
+
63
+ # Create a DataLoader, so we can iterate through the dataset in batches.
64
+ #train_loader = DataLoader(train_data, batch_size=64, shuffle=True, )
65
+
66
+ # Testing the dataloader.
67
+ # for i, (images, labels) in enumerate(train_loader):
68
+ # print(i, images.shape, labels.shape)
69
+
70
+
71
+ train_size = int(0.8 * len(train_data))
72
+ test_size = len(train_data) - train_size
73
+ train_dataset, test_dataset = torch.utils.data.random_split(train_data, [train_size, test_size])
74
+
75
+ # Create DataLoader for train and test sets
76
+ train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
77
+ test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
78
+
79
+ print("Data loader and Test Loaders are ready to be used.")
80
+
81
+
82
+ # Create first stage labels,
83
+ # movement stage labels
84
+ # final stage labels.
model.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Michael Peres 30/03/2024.
2
+
3
+ # Model for binary classification.
4
+ # Import Statements for model.
5
+
6
+ from torchvision.transforms import ToTensor
7
+ from torchvision.transforms import v2
8
+ from torchvision import transforms
9
+
10
+ import matplotlib.pyplot as plt
11
+ from time import time
12
+ from torch import nn
13
+ import pandas as pd
14
+ import numpy as np
15
+ import torch, os
16
+ from torch.optim.lr_scheduler import ReduceLROnPlateau
17
+ from tqdm import tqdm
18
+
19
+ # Going to be using keras
20
+ input_shape = (224, 224, 3)
21
+
22
+ # device = (
23
+ # "cuda"
24
+ # if torch.cuda.is_available()
25
+ # else "mps"
26
+ # if torch.backends.mps.is_available()
27
+ # else "cpu"
28
+ # )
29
+
30
+ device = "cpu" #having trouble with mpu on mac, so will use cpu for now until main pc is available.
31
+ print(f"Using {device} device for training/inference.")
32
+ if device == "cuda":
33
+ print(f"GPU being used: {torch.cuda.get_device_name(0)}")
34
+
35
+
36
+ # We have a custom dataset that we will be using in this example.
37
+
38
+
39
+ class MakiAlexNet(nn.Module):
40
+ def __init__(self, num_classes=2):
41
+ super(MakiAlexNet, self).__init__()
42
+ self.num_classes = num_classes
43
+ self.conv1 = nn.Conv2d(in_channels=3, out_channels=96, kernel_size=11, stride=4, padding=1) # LazyConv2d determine the input channels automatically.
44
+ self.conv2 = nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, padding=2)
45
+ self.conv3 = nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, padding=1) # 256, 384
46
+ self.conv4 = nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, padding=1) # 384,384
47
+ self.conv5 = nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, padding=1) # 384, 256
48
+ self.activation = nn.ReLU()
49
+ self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2)
50
+
51
+ # Replace Flatten with GlobalAvgPool2d
52
+ self.gap = nn.AvgPool2d(5) # Adjust output size if needed
53
+
54
+ # In this case LazyLinear is really useful after flattening,
55
+ # such that abstraction is made from the initial output layer and the linear layer nodes.
56
+ self.fcc = nn.Sequential(
57
+
58
+ nn.Flatten(),
59
+ nn.Linear(6400, 4096),
60
+ # nn.Linear(in_features=256, out_features=4096), # adaptation to code
61
+ # nn.LazyLinear(4096), # this defines the output neurons size and takes in the leading input channels.
62
+ nn.ReLU(),
63
+ nn.Dropout(p=0.5),
64
+ # nn.LazyLinear(4096),
65
+ nn.Linear(4096, 4096),
66
+ nn.ReLU(),
67
+ nn.Dropout(p=0.5),
68
+ # nn.LazyLinear(self.num_classes),
69
+ nn.Linear(4096, self.num_classes)
70
+ )
71
+
72
+ # Create an empty dictionary to store layer outputs
73
+ self.layer_outputs = {}
74
+
75
+ # Register hooks for desired layers
76
+ self.conv5.register_forward_hook(self._save_layer_output)
77
+
78
+
79
+
80
+ def _save_layer_output(self, module, input, output):
81
+ self.layer_outputs[module.__class__.__name__] = output
82
+
83
+ def forward(self, x):
84
+ """Defined forward pass of AlexNet for learning left or right prediction."""
85
+ x = self.conv1(x) # wider
86
+ x = self.activation(x)
87
+ x = self.maxpool(x) # down sample.
88
+
89
+ x = self.conv2(x) # wider.
90
+ x = self.activation(x)
91
+ x = self.maxpool(x) # down sample.
92
+
93
+ x = self.conv3(x) # wider.
94
+ x = self.activation(x)
95
+
96
+ x = self.conv4(x)
97
+ x = self.activation(x)
98
+
99
+ x = self.conv5(x)
100
+ x = self.activation(x)
101
+ x = self.maxpool(x) # down sample.
102
+
103
+ # x = self.gap(x).squeeze(-1).squeeze(-1)
104
+ x = self.fcc(x) # Flatten and passed to Linear layer to 2 classes.
105
+ return x
106
+
107
+
108
+
109
+ def init_weights(m):
110
+ if isinstance(m, nn.Conv2d):
111
+ nn.init.xavier_uniform_(m.weight)
112
+ if m.bias is not None:
113
+ m.bias.data.fill_(0.01)
114
+ elif isinstance(m, nn.Linear):
115
+ nn.init.xavier_uniform_(m.weight)
116
+ m.bias.data.fill_(0.01)
117
+
118
+
119
+ if __name__ == "__main__":
120
+ from dataset_creation import test_loader, train_loader # Initiate the custom dataloaders and datasets here.
121
+ # Running the model to learn, also introducing good features to make it learn better like a cosine scheduler for the learning rate.
122
+
123
+ EPOCH = 35
124
+ model = MakiAlexNet()
125
+
126
+ # model.apply(init_weights)
127
+ # torch.load("alexnet_cognitive.pth", map_location=device)
128
+ model.to(device)
129
+ print(model)
130
+ print("Model has been tested and is working correctly.")
131
+ # Running the model with test data.
132
+ criterion = nn.CrossEntropyLoss()
133
+ optimizer = torch.optim.SGD(model.parameters(), lr=0.00001*5, weight_decay=0.0001, momentum=0.9)
134
+ # Define learning rate scheduler
135
+ scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3)
136
+ if os.path.exists("best_model.txt"):
137
+ with open("best_model.txt", "r") as file:
138
+ best_accuracy = float(file.read())
139
+ else:
140
+ best_accuracy = 0.0
141
+
142
+ for epoch in tqdm(range(EPOCH), desc="Training Epoch Cycle"):
143
+ model.train() # Set model to training mode
144
+ running_loss = 0.0
145
+
146
+ for i, data in enumerate(train_loader, 0):
147
+ if i % 10 == 0:
148
+ print(f"Internal Loop of batches: {i}")
149
+ inputs, labels = data
150
+ # print(type(labels), labels)
151
+ inputs, labels = inputs.to(device), labels.to(device)
152
+ optimizer.zero_grad()
153
+
154
+ outputs = model(inputs)
155
+ loss = criterion(outputs, labels)
156
+ loss.backward()
157
+ optimizer.step()
158
+
159
+ running_loss += loss.item()
160
+
161
+ train_loss = running_loss / len(train_loader)
162
+ print(f'Epoch [{epoch + 1}] training loss: {train_loss:.3f}')
163
+
164
+ # Validation phase
165
+ model.eval() # Set model to evaluation mode
166
+ val_running_loss = 0.0
167
+ val_correct = 0
168
+ val_total = 0
169
+ with torch.no_grad():
170
+ for data in test_loader: # Assuming test_loader is used as a validation loader
171
+ inputs, labels = data
172
+ inputs, labels = inputs.to(device), labels.to(device)
173
+
174
+ outputs = model(inputs)
175
+ loss = criterion(outputs, labels)
176
+
177
+ val_running_loss += loss.item()
178
+ _, predicted = torch.max(outputs.data, 1)
179
+ val_total += labels.size(0)
180
+ val_correct += (predicted == labels).sum().item()
181
+
182
+ val_loss = val_running_loss / len(test_loader)
183
+ val_accuracy = 100 * val_correct / val_total
184
+ print(f'Epoch [{epoch + 1}] validation loss: {val_loss:.3f}, accuracy: {val_accuracy:.2f}%')
185
+ if val_accuracy > best_accuracy:
186
+ best_accuracy = val_accuracy
187
+ torch.save(model.state_dict(), "alexnet_cognitive_gap.pth")
188
+ with open("best_model.txt", "w") as file:
189
+ file.write(f"{best_accuracy}")
190
+
191
+ # Update the LR scheduler with validation loss
192
+ scheduler.step(val_loss)
193
+ # print(f'LR: {scheduler.get_last_lr()}')
model_two.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torchvision.transforms import ToTensor
2
+ from torchvision.transforms import v2
3
+ from torchvision import transforms
4
+
5
+ import matplotlib.pyplot as plt
6
+ from time import time
7
+ from torch import nn
8
+ import pandas as pd
9
+ import numpy as np
10
+ import torch, os
11
+ from torch.optim.lr_scheduler import ReduceLROnPlateau
12
+ from tqdm import tqdm
13
+
14
+ # Going to be using keras
15
+ input_shape = (224, 224, 3)
16
+
17
+ device = (
18
+ "cuda"
19
+ if torch.cuda.is_available()
20
+ else "mps"
21
+ if torch.backends.mps.is_available()
22
+ else "cpu"
23
+ )
24
+
25
+
26
+ class MakiAlexNet(nn.Module):
27
+ def __init__(self, num_classes=2):
28
+ super(MakiAlexNet, self).__init__()
29
+ self.num_classes = num_classes
30
+ self.conv1 = nn.Conv2d(in_channels=3, out_channels=96, kernel_size=11, stride=4, padding=1) # LazyConv2d determine the input channels automatically.
31
+ self.conv2 = nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, padding=2)
32
+ self.conv3 = nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, padding=1) # 256, 384
33
+ self.conv4 = nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, padding=1) # 384,384
34
+ self.conv5 = nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, padding=1) # 384, 256
35
+ self.activation = nn.ReLU()
36
+ self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2)
37
+ self.dropout = nn.Dropout(p=0.5)
38
+ self.f_linear = nn.Linear(256, self.num_classes)
39
+
40
+ # Replace Flatten with GlobalAvgPool2d
41
+ self.gap = nn.AvgPool2d(5) # Adjust output size if needed
42
+
43
+ # In this case LazyLinear is really useful after flattening,
44
+ # such that abstraction is made from the initial output layer and the linear layer nodes.
45
+
46
+
47
+ # Create an empty dictionary to store layer outputs
48
+ self.layer_outputs = {}
49
+
50
+ # Register hooks for desired layers
51
+ self.conv5.register_forward_hook(self._save_layer_output)
52
+ self.f_linear.register_forward_hook(self._save_layer_output)
53
+
54
+
55
+ def _save_to_output_weights(self, module, input, output):
56
+ self.layer_outputs[module.__class__.__name__] = {"input": input, "output": output, "weights": module.weight.data}
57
+
58
+
59
+ def _save_layer_output(self, module, input, output):
60
+ self.layer_outputs[module.__class__.__name__] = output
61
+
62
+ def forward(self, x):
63
+ """Defined forward pass of AlexNet for learning left or right prediction."""
64
+ x = self.conv1(x) # wider
65
+ x = self.activation(x)
66
+ x = self.maxpool(x) # down sample.
67
+
68
+ x = self.conv2(x) # wider.
69
+ x = self.activation(x)
70
+ x = self.maxpool(x) # down sample.
71
+
72
+ x = self.conv3(x) # wider.
73
+ x = self.activation(x)
74
+
75
+ x = self.conv4(x)
76
+ x = self.activation(x)
77
+
78
+ x = self.conv5(x)
79
+ x = self.activation(x)
80
+ x = self.maxpool(x) # down sample.
81
+
82
+ x = self.gap(x).squeeze(-1).squeeze(-1)
83
+ # x = self.activation(x)
84
+ x = self.dropout(x)
85
+ x = self.f_linear(x)
86
+ return x
87
+
88
+
89
+
90
+ if __name__ == "__main__":
91
+ from dataset_creation import test_loader, train_loader # Initiate the custom dataloaders and datasets here.
92
+
93
+ # Running the model to learn, also introducing good features to make it learn better like a cosine scheduler for the learning rate.
94
+
95
+ EPOCH = 35
96
+ model = MakiAlexNet()
97
+ model.to(device)
98
+ print(model)
99
+ criterion = nn.CrossEntropyLoss()
100
+ optimizer = torch.optim.SGD(model.parameters(), lr=0.00001 * 5, weight_decay=0.0001, momentum=0.9)
101
+ scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3)
102
+ if os.path.exists("best_model_2.0.txt"):
103
+ with open("best_model_2.0.txt", "r") as file:
104
+ best_accuracy = float(file.read())
105
+ else:
106
+ best_accuracy = 0.0
107
+ # create a new file
108
+ with open("best_model_2.0.txt", "w") as file:
109
+ file.write(f"{best_accuracy}")
110
+
111
+ for epoch in tqdm(range(EPOCH), desc="Training Epoch Cycle"):
112
+ model.train() # Set model to training mode
113
+ running_loss = 0.0
114
+
115
+ for i, data in enumerate(train_loader, 0):
116
+ if i % 10 == 0:
117
+ print(f"Internal Loop of batches: {i}")
118
+ inputs, labels = data
119
+ # print(type(labels), labels)
120
+ inputs, labels = inputs.to(device), labels.to(device)
121
+ optimizer.zero_grad()
122
+
123
+ outputs = model(inputs)
124
+ loss = criterion(outputs, labels)
125
+ loss.backward()
126
+ optimizer.step()
127
+
128
+ running_loss += loss.item()
129
+
130
+ train_loss = running_loss / len(train_loader)
131
+ print(f'Epoch [{epoch + 1}] training loss: {train_loss:.3f}')
132
+
133
+ # Validation phase
134
+ model.eval() # Set model to evaluation mode
135
+ val_running_loss = 0.0
136
+ val_correct = 0
137
+ val_total = 0
138
+ with torch.no_grad():
139
+ for data in test_loader: # Assuming test_loader is used as a validation loader
140
+ inputs, labels = data
141
+ inputs, labels = inputs.to(device), labels.to(device)
142
+
143
+ outputs = model(inputs)
144
+ loss = criterion(outputs, labels)
145
+
146
+ val_running_loss += loss.item()
147
+ _, predicted = torch.max(outputs.data, 1)
148
+ val_total += labels.size(0)
149
+ val_correct += (predicted == labels).sum().item()
150
+
151
+ val_loss = val_running_loss / len(test_loader)
152
+ val_accuracy = 100 * val_correct / val_total
153
+ print(f'Epoch [{epoch + 1}] validation loss: {val_loss:.3f}, accuracy: {val_accuracy:.2f}%')
154
+ if val_accuracy > best_accuracy:
155
+ best_accuracy = val_accuracy
156
+ torch.save(model.state_dict(), "alexnet_2.0.pth")
157
+ with open("best_model_2.0.txt", "w") as file:
158
+ file.write(f"{best_accuracy}")
159
+
160
+ # Update the LR scheduler with validation loss
161
+ scheduler.step(val_loss)
162
+
model_visualisation.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Based on the learnt CNN kernels, this script will aid in generating a learnt kernel pattern.
2
+
3
+ # Attempt 1, did not work well.
4
+
5
+ import matplotlib.pyplot as plt
6
+ # Here we should be able to determine what weighting each part of the image aids in the detection of the goal.
7
+ # And how these change over time.
8
+
9
+ # https://www.youtube.com/watch?v=ST9NjnKKvT8
10
+ # This video aims to solve this problem, by going over the heatmaps of CNNs.
11
+
12
+
13
+ from torchvision import transforms
14
+ from dataset_creation import normal_transforms
15
+ from model import MakiAlexNet
16
+ import numpy as np
17
+ import cv2, torch, os
18
+ from tqdm import tqdm
19
+ import time
20
+
21
+ TEST_IMAGE = "dataset/root/train/left1_frame_0.jpg"
22
+ MODEL_PARAMS = "alexnet_cognitive.pth"
23
+ all_processing_files = os.listdir(os.path.join(os.getcwd(), "./dataset/root/train"))
24
+
25
+ model = MakiAlexNet()
26
+
27
+ model.load_state_dict(torch.load(MODEL_PARAMS))
28
+ model.eval()
29
+ print("Model armed and ready for evaluation.")
30
+
31
+ # Print model's state_dict
32
+ print("Model's state_dict:")
33
+ for param_tensor in model.state_dict():
34
+ print(param_tensor, "\t", model.state_dict()[param_tensor].size())
35
+
36
+
37
+
38
+
39
+ for image_file in tqdm(all_processing_files):
40
+
41
+ # Showcase and load image from file.
42
+ abs_file_path = os.path.join(os.getcwd(), "./dataset/root/train", image_file)
43
+ image = cv2.imread(abs_file_path)
44
+ # print(image.shape)
45
+ # cv2.imshow("test", image)
46
+ # cv2.waitKey(5000)
47
+
48
+
49
+ print("Image input shape of the matrix before: ", image.shape)
50
+ image = torch.unsqueeze(torch.tensor(image.astype(np.float32)), 0) # Convert image to tensor with float32, and extended batch size dimension. (Batch, Channel, W,H)
51
+ image = torch.einsum("BWHC->BCWH", image)
52
+ print("Image input shape of the matrix after: ", image.shape)
53
+ conv1_output = model.conv1(image)
54
+ print("Output shape of the matrix: ", conv1_output.shape)
55
+
56
+
57
+ # Handling image convolutions
58
+
59
+ conv1_formatted = torch.einsum("BCWH->WHC", conv1_output)
60
+ print(f"Formatted shape of matrix is: {conv1_formatted.shape}")
61
+
62
+
63
+ # Assuming your 3D array is named 'data'
64
+ num_channels = conv1_formatted.shape[2] # Get the number of channels (96)
65
+ max_rows = 5 # Set a maximum number of rows (optional)
66
+ rows = min(max_rows, int(np.sqrt(num_channels))) # Limit rows to a maximum
67
+ cols = int(np.ceil(num_channels / rows))
68
+
69
+ fig, axes = plt.subplots(rows, cols, figsize=(12, 12)) # Create a grid of subplots
70
+
71
+ DATASET_OUTPUT_PATH = "./dataset/visualisation"
72
+ merged_frames = np.zeros((224,224))
73
+ image_file_dir = abs_file_path.split(".jpg")[0].split("/")[-1]
74
+ if not os.path.isdir(os.path.join(os.getcwd(), DATASET_OUTPUT_PATH, image_file_dir)):
75
+ os.mkdir(os.path.join(os.getcwd(), DATASET_OUTPUT_PATH, image_file_dir)) # make new directory.
76
+
77
+
78
+ for i in range(rows):
79
+ for j in range(cols):
80
+ channel_idx = i * cols + j # Calculate index based on row and column
81
+ if channel_idx < num_channels: # Check if within channel range
82
+ channel_data = conv1_formatted[:, :, channel_idx]
83
+ channel_data = channel_data.detach().numpy()
84
+ print(f"Channel Data shape dimension: {channel_data.shape}")
85
+ # channel_data = np.mean(channel_data, axis=2)
86
+ # Get the mean of each third dimension, so mean on channels, if H,W,C -> H,W
87
+ channel_data = cv2.resize(channel_data, (224, 224))
88
+
89
+ # Accumulate normalized channel data
90
+ # take threshold values of channel data to add to merged frames, if above a specific point.
91
+ # ret, channel_data = cv2.threshold(channel_data, 120, 255, cv2.THRESH_BINARY)
92
+ merged_frames += channel_data
93
+
94
+
95
+
96
+
97
+ # # Save the image data matrix.
98
+ # image_filename = f"{int(time.time())}_output_{channel_idx}.jpg"
99
+ # image_path = os.path.join(os.getcwd(), DATASET_OUTPUT_PATH, image_file_dir, image_filename)
100
+ # plt.imsave(image_path, channel_data)
101
+ # print(f"Image path saved at {image_path}")
102
+
103
+
104
+
105
+
106
+ # Ensure final merged_frames is also normalized
107
+
108
+ merged_frames /= (np.max(merged_frames) * .8)
109
+
110
+ # Thresholding the main images that causes this highlight.
111
+
112
+ merged_frames_gray = merged_frames.astype(np.uint8) # No conversion needed, use as-is
113
+ # 9merged_frames = cv2.adaptiveThreshold(merged_frames_gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
114
+
115
+
116
+ image_path = os.path.join(os.getcwd(), DATASET_OUTPUT_PATH, image_file_dir, image_file_dir+"conv1_mask.jpg")
117
+
118
+ plt.imsave(image_path, merged_frames_gray, cmap='gray')
119
+
120
+ # merged_frames = merged_frames.astype(np.uint8)
121
+ heatmap_color = cv2.applyColorMap(merged_frames_gray, cv2.COLORMAP_JET) # Apply a colormap
122
+ #
123
+ # cv2.imshow("merged", heatmap_color)
124
+ image_path = os.path.join(os.getcwd(), DATASET_OUTPUT_PATH, image_file_dir, image_file_dir+"conv1_heatmap.jpg")
125
+ plt.imsave(image_path, heatmap_color)
126
+ #
127
+ # # Merge all images into one, normalising based on highest value, and then increasing from 54,54, 1, to 224,224,1
128
+ # cv2.waitKey(5000)
129
+ plt.close()
130
+
131
+ exit()
132
+
133
+ #
134
+ # image_tensor = normal_transforms(torch.tensor(image))
135
+ # print(image_tensor.shape)
136
+ # plt.imshow(image_tensor.squeeze())
137
+
138
+
test.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch import nn
2
+ import torch
3
+
4
+
5
+ # pool of square window of size=3, stride=2
6
+ m = nn.AvgPool2d(3, stride=2)
7
+ # pool of non-square window
8
+ m = nn.AvgPool2d(5)
9
+ input = torch.randn(32,256, 5, 5)
10
+ output = m(input)
11
+ output = output.squeeze(-1).squeeze(-1)
12
+ print(output.shape)
video1.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:326d7cd53f18796b719112b90a8e165e02886d6500cf3a340334e2f503cc0d2a
3
+ size 2476455
video_preprocessing.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Taking each video cropping to 520, 400, 3, and obtaining 12 key frames for the usage in a dataset.
2
+ # Frames will be saved as individual images.
3
+
4
+ import numpy as np
5
+ import cv2, os
6
+ from tqdm import tqdm
7
+
8
+
9
+ # Obtain 12 different keyframes from the video, equally spaced.
10
+
11
+ # Actually to increase dataset we will just increase the number of frames allowed to say 50 per video. 900 images for dataset, 200 are for training.
12
+ def get_equal_elements(array, num_elements=12):
13
+ """
14
+ Takes a specific number of elements equally spaced from an array.
15
+
16
+ Args:
17
+ array: The input array.
18
+ num_elements: The number of elements to take (default 12).
19
+
20
+ Returns:
21
+ A list of elements from the array.
22
+ """
23
+ if num_elements > len(array):
24
+ print(f"Number of elements cannot be greater than array length : {len(array)}")
25
+ return []
26
+ step_size = len(array) // (num_elements - 1) # Avoid extra element with floor division
27
+ return array[::step_size] # Slice with step size
28
+
29
+
30
+ def video_to_keyframes(video_filename):
31
+ cap = cv2.VideoCapture(video_filename)
32
+ frames = []
33
+ while (cap.isOpened()):
34
+ ret, frame = cap.read()
35
+ try:
36
+ # 720, 1280, 3
37
+ # Remove top 200 pixels from video frame.
38
+ # print((frame.shape)) # (720, 1280, 3)
39
+ frame = frame[200:, 440:840]
40
+ # print((frame.shape)) # (520, 400, 3)
41
+ frames.append(frame)
42
+ # cv2.imshow('frame',frame)
43
+ except Exception as e:
44
+ print(f"Error is {e}")
45
+ if frame == None:
46
+ break
47
+ continue
48
+ if cv2.waitKey(1) & 0xFF == ord('q'):
49
+ break
50
+
51
+ cap.release()
52
+ cv2.destroyAllWindows()
53
+ print("Done obtaining captured frames.")
54
+ selected_frames = get_equal_elements(frames, num_elements=50)
55
+ print("Obtained selected frames.")
56
+ # Extract filename without extension for directory creation
57
+ filename_no_ext = video_filename.split('.')[0]
58
+ if "left" in filename_no_ext:
59
+ filename_no_ext = "left"
60
+ else:
61
+ filename_no_ext = "right"
62
+ # Create directory for the video if it doesn't exist
63
+ try:
64
+ os.makedirs(os.path.join("dataset", filename_no_ext))
65
+ except FileExistsError:
66
+
67
+ pass # Directory already exists, ignore
68
+
69
+ # Save each selected frame as an image with filename_frame_number.jpg format
70
+ # Create images under directory for that specific source video file name, like left for left1.mp4.
71
+ for i, frame in enumerate(selected_frames):
72
+ print(filename_no_ext)
73
+ file_name = f"{video_filename.split('.')[0].split('/')[-1]}_frame_{i}.jpg"
74
+ print(file_name)
75
+ image_path = os.path.join("dataset", filename_no_ext, file_name)
76
+ print(f"Write to disk. {image_path}")
77
+ print("Resized to 224,224")
78
+ target_height, target_width = 224, 224
79
+ if type(frame) != type(None):
80
+ frame = resize_with_aspect_ratio(frame, target_height, target_width)
81
+ # print(type(frame), frame)
82
+ cv2.imwrite(image_path, frame)
83
+ else:
84
+ continue
85
+
86
+ print("Saved images for each all selected frames.")
87
+ return selected_frames
88
+
89
+
90
+ # Resize images.
91
+
92
+ # Target dimensions for AlexNet
93
+
94
+ # Resize the image with aspect ratio preservation
95
+ def resize_with_aspect_ratio(image, target_height, target_width):
96
+ height, width = image.shape[:2]
97
+ if height == target_height and width == target_width:
98
+ return image
99
+ if height > width:
100
+ new_width = int(width * (target_height / height))
101
+ # resized_image = cv2.resize(image, (new_width, target_height))
102
+ resized_image = cv2.resize(image, (target_width, target_height))
103
+ return resized_image
104
+ else:
105
+ new_height = int(height * (target_width / width))
106
+ resized_image = cv2.resize(image, (target_width, new_height))
107
+ # Crop the center of the resized image to match target dimensions
108
+ start_x = int((resized_image.shape[1] - target_width) / 2)
109
+ start_y = int((resized_image.shape[0] - target_height) / 2)
110
+ return resized_image[start_y:start_y + target_height, start_x:start_x + target_width]
111
+
112
+
113
+ if __name__ == "__main__":
114
+ # Load the image and resize
115
+ BASE_PATH = "dataset/src/"
116
+ videos = os.listdir(BASE_PATH)
117
+ # print(videos)
118
+ target_height, target_width = 224, 224
119
+ for video_file in tqdm(videos):
120
+ selected_frames = video_to_keyframes(os.path.join(BASE_PATH, video_file))
121
+
122
+ # image = cv2.imread(image_path) # Or use your image data
123
+ # resized_image = resize_with_aspect_ratio(image, target_height, target_width)
visualise2.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://tree.rocks/get-heatmap-from-cnn-convolution-neural-network-aka-grad-cam-222e08f57a34
2
+
3
+ import cv2, os, torch, re
4
+ import matplotlib.pyplot as plt
5
+ from scipy.ndimage import zoom
6
+ import numpy as np
7
+ from model import MakiAlexNet
8
+ from tqdm import tqdm
9
+
10
+ # from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions
11
+ TOP_ACCURACY_PERCENTILE = 10
12
+
13
+ TEST_IMAGE = "dataset/root/train/left1_frame_10.jpg"
14
+ MODEL_PARAMS = "alexnet_cognitive.pth"
15
+ GIF_STORE = "dataset/gifs/"
16
+ TRAIN_STORE = "dataset/root/train/"
17
+
18
+ model = MakiAlexNet()
19
+ model.load_state_dict(torch.load(MODEL_PARAMS))
20
+ model.eval()
21
+
22
+ # Make model run on cuda if available.
23
+ if torch.cuda.is_available():
24
+ model = model.cuda()
25
+ print("Running on cuda")
26
+
27
+
28
+ print(dir(model))
29
+
30
+ for name, module in model.named_modules():
31
+ # Print the layer name
32
+ print(name)
33
+
34
+
35
+ def extract_file_paths(filename):
36
+ """With aid from https://regex101.com/, regex."""
37
+ extractor_reg = r"(left|right)([0-9]+)(_frame_)([0-9]+)"
38
+ result = re.search(extractor_reg, filename)
39
+ frame_no = result.group(4)
40
+ frame_name = result.group(1)
41
+ video_no = result.group(2)
42
+ return frame_no, frame_name, video_no
43
+
44
+
45
+ def create_mp4_from_frames(file_name, frames):
46
+ """Generate MP4/GIF file with the collection of frames given with a duration of 2000 msec. """
47
+ print("Sorted frames: ", sorted(frames))
48
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
49
+ height, width, _ = cv2.imread(frames[0]).shape
50
+ fps = 20 # Adjust the frames per second (FPS) as needed
51
+ video_path = os.path.join(os.getcwd(), "dataset", "gifs", f"{file_name}.mp4")
52
+ video = cv2.VideoWriter(video_path, fourcc, fps, (width, height))
53
+ for frame_path in sorted(frames):
54
+ # Convert BRG to RGB
55
+ image = cv2.imread(frame_path)
56
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
57
+ # if image.dtype != np.uint8:
58
+ # image = (image * 255).astype(np.uint8) # Convert to uint8
59
+ video.write(image)
60
+
61
+ # Release the VideoWriter
62
+ video.release()
63
+
64
+
65
+
66
+ current_video_name = None
67
+ selected_frames = [] # stores matrices for the GIF generation.
68
+ for image_filename in ["left1_frame_5.jpg"]: # tqdm(sorted(os.listdir(TRAIN_STORE)), desc="Running Images"): # :
69
+
70
+ frame_no, frame_name, video_no = extract_file_paths(image_filename)
71
+ obtained_video_name = video_no+"vid"+frame_name
72
+ if current_video_name != obtained_video_name:
73
+ # We have a new video sequence, so save current sequences and name
74
+ if selected_frames:
75
+ filename = f"{current_video_name}"
76
+ # Create gif from the frames.
77
+ if current_video_name:
78
+ create_mp4_from_frames(filename, selected_frames)
79
+ # Clear frames and hand off to new handle.
80
+ selected_frames = []
81
+ current_video_name = obtained_video_name
82
+
83
+ # With the number and name of the file paths, we can then determine which should be part of the specific GIF file.
84
+ # f"frame_no,fileno,video_no.gif"
85
+
86
+ img = cv2.imread(os.path.join(TRAIN_STORE, image_filename))
87
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
88
+ img = torch.unsqueeze(torch.tensor(img.astype(np.float32)), 0) # Convert image to tensor with float32, and extended batch size dimension. (Batch, Channel, W,H)
89
+ X = torch.einsum("BWHC->BCWH", img)
90
+ if torch.cuda.is_available():
91
+ X = X.cuda()
92
+
93
+ output = model(X)
94
+ # print(output)
95
+ # print(model.layer_outputs)
96
+ conv = model.layer_outputs['Conv2d']
97
+
98
+ conv = torch.einsum("BCWH->BWHC", conv).cpu().detach().numpy()
99
+ # print(conv.shape) # torch.Size([1, 256, 12, 12])
100
+ # conv = conv.squeeze(0)
101
+ # print(conv.shape) # torch.Size([256, 12, 12])
102
+ scale = 224 / 12 # 256x5x5 after this additional.
103
+
104
+ plt.figure(figsize=(16, 16))
105
+ total_mat = None
106
+ for i in range(256):
107
+ plt.subplot(16, 16, i + 1)
108
+ plt.imshow(img.squeeze(0))
109
+ plt.imshow(zoom(conv[0, :,:,i], zoom=(scale, scale)), cmap='jet', alpha=0.3)
110
+ plt.show()
111
+ # wait for user to press a key
112
+
113
+ # mat = zoom(conv[0, :, :, i], zoom=(scale, scale))
114
+ # threshold = np.percentile(mat.flatten(), TOP_ACCURACY_PERCENTILE)
115
+ # # The Lower threshold is to zero, the more specific the look is shown.
116
+ #
117
+ # mask = mat > threshold
118
+ # # OR: filter_map = np.where(filter_map <= threshold, 0, filter_map)
119
+ #
120
+ # # Rescale remaining values (adjust new_range if needed)
121
+ # new_range = 1 # Adjust based on your desired final range
122
+ # filter_map = np.where(mask, (mat - threshold) / (mat.max() - threshold) * new_range, 0)
123
+ #
124
+ # # I just add all the maps together, which is really noisy.
125
+ # if type(total_mat) != type(None):
126
+ # total_mat += filter_map
127
+ # else:
128
+ # total_mat = filter_map
129
+ #
130
+ # # Normalize based on largest value,
131
+ # # Store this image in a collection, in which a GIF will be made, that lasts at least 2 seconds.
132
+ # total_mat = total_mat / abs(np.max(total_mat))
133
+ # #
134
+ # image = img.squeeze(0) # .detach().numpy().astype(np.float32)
135
+ #
136
+ #
137
+ # plt.imshow(plt.imread(os.path.join(os.getcwd(), "dataset/root/train", image_filename))) # full path needed
138
+ # plt.imshow(total_mat, cmap='jet', alpha=0.3)
139
+ #
140
+ # # selected_frames.append()
141
+ # filename = frame_name+frame_no+video_no+".jpg"
142
+ # file_path = os.path.join(os.getcwd(), "dataset/gifs/raw/", filename)
143
+ # plt.savefig(file_path)
144
+ # selected_frames.append(file_path)
145
+
146
+
147
+ exit()
148
+
149
+
150
+ # plt.figure(figsize=(16, 16))
151
+ # for i in range(36):
152
+ # plt.subplot(6, 6, i + 1)
153
+ # plt.imshow(cv2.imread(TEST_IMAGE))
154
+ # plt.imshow(zoom(conv[0, :,:,i], zoom=(scale, scale)), cmap='jet', alpha=0.3)
155
+ #
156
+ # plt.show()
visualise3.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Michael Peres (c) 2024
2
+ # Inspiration from code tutorial mentioned here: https://tree.rocks/get-heatmap-from-cnn-convolution-neural-network-aka-grad-cam-222e08f57a34
3
+
4
+
5
+ import cv2, os, torch, re
6
+ import matplotlib.pyplot as plt
7
+ from scipy.ndimage import zoom
8
+ import numpy as np
9
+ from model_two import MakiAlexNet
10
+ from tqdm import tqdm
11
+
12
+ # from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions
13
+ TOP_ACCURACY_PERCENTILE = 10
14
+
15
+ TEST_IMAGE = "dataset/root/train/left1_frame_10.jpg"
16
+ MODEL_PARAMS = "alexnet_2.0.pth"
17
+ GIF_STORE = "dataset/gifs2/"
18
+ TRAIN_STORE = "dataset/root/train/"
19
+
20
+ model = MakiAlexNet()
21
+ model.load_state_dict(torch.load(MODEL_PARAMS))
22
+ model.eval()
23
+
24
+ # Make model run on cuda if available.
25
+ if torch.cuda.is_available():
26
+ model = model.cuda()
27
+ print("Running on cuda")
28
+
29
+
30
+ print(dir(model))
31
+
32
+ for name, module in model.named_modules():
33
+ # Print the layer name
34
+ print(name)
35
+
36
+
37
+ def extract_file_paths(filename):
38
+ """With aid from https://regex101.com/, regex."""
39
+ extractor_reg = r"(left|right)([0-9]+)(_frame_)([0-9]+)"
40
+ result = re.search(extractor_reg, filename)
41
+ frame_no = result.group(4)
42
+ frame_name = result.group(1)
43
+ video_no = result.group(2)
44
+ return frame_no, frame_name, video_no
45
+
46
+
47
+ def create_mp4_from_frames(file_name, frames):
48
+ """Generate MP4/GIF file with the collection of frames given with a duration of 2000 msec. """
49
+ print("Sorted frames: ", sorted(frames))
50
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
51
+ height, width, _ = cv2.imread(frames[0]).shape
52
+ fps = 20 # Adjust the frames per second (FPS) as needed
53
+ video_path = os.path.join(os.getcwd(), "dataset", "gifs2", f"{file_name}.mp4")
54
+ video = cv2.VideoWriter(video_path, fourcc, fps, (width, height))
55
+ for frame_path in sorted(frames):
56
+ # Convert BRG to RGB
57
+ image = cv2.imread(frame_path)
58
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
59
+ # if image.dtype != np.uint8:
60
+ # image = (image * 255).astype(np.uint8) # Convert to uint8
61
+ video.write(image)
62
+
63
+ # Release the VideoWriter
64
+ video.release()
65
+
66
+
67
+
68
+ current_video_name = None
69
+ selected_frames = [] # stores matrices for the GIF generation.
70
+ for image_filename in tqdm(sorted(os.listdir(TRAIN_STORE)), desc="Running Images"): # :
71
+
72
+ frame_no, frame_name, video_no = extract_file_paths(image_filename)
73
+ obtained_video_name = video_no+"vid"+frame_name
74
+ if current_video_name != obtained_video_name:
75
+ # We have a new video sequence, so save current sequences and name
76
+ if selected_frames:
77
+ filename = f"{current_video_name}"
78
+ # Create gif from the frames.
79
+ if current_video_name:
80
+ create_mp4_from_frames(filename, selected_frames)
81
+ # Clear frames and hand off to new handle.
82
+ selected_frames = []
83
+ current_video_name = obtained_video_name
84
+
85
+ # With the number and name of the file paths, we can then determine which should be part of the specific GIF file.
86
+ # f"frame_no,fileno,video_no.gif"
87
+
88
+ img = cv2.imread(os.path.join(TRAIN_STORE, image_filename))
89
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
90
+ img = torch.unsqueeze(torch.tensor(img.astype(np.float32)), 0) # Convert image to tensor with float32, and extended batch size dimension. (Batch, Channel, W,H)
91
+ X = torch.einsum("BWHC->BCWH", img)
92
+ if torch.cuda.is_available():
93
+ X = X.cuda()
94
+
95
+ output = model(X)
96
+ # print(output)
97
+ #print("Model layer outputs: ")
98
+ #print(model.layer_outputs)
99
+ conv = model.layer_outputs['Conv2d']
100
+ pred = model.layer_outputs["Linear"]
101
+ pred_weights, pred_bias = model.f_linear.weight, model.f_linear.bias
102
+ #print(pred_weights.shape)
103
+
104
+
105
+ conv = torch.einsum("BCWH->BWHC", conv).cpu().detach().numpy()
106
+ # print(conv.shape) # torch.Size([1, 256, 12, 12])
107
+ # conv = conv.squeeze(0)
108
+ # print(conv.shape) # torch.Size([256, 12, 12])
109
+ target = np.argmax(pred.cpu().detach().numpy(), axis=1).squeeze()
110
+
111
+ weights = pred_weights[target, :].cpu().detach().numpy()
112
+ # print("wieghts", weights.shape, "conv", conv.squeeze(0).shape)
113
+ heatmap = conv.squeeze(0) @ weights
114
+ # print(conv.shape)
115
+ # print(heatmap.shape)
116
+ scale = 224 / 12 # 256x5x5 after this additional.
117
+ plt.figure(figsize=(12, 12))
118
+ img = cv2.imread(os.path.join(TRAIN_STORE, image_filename))
119
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
120
+ plt.imshow(img)
121
+ plt.imshow(zoom(heatmap, zoom=(scale, scale)), cmap='jet', alpha=0.5)
122
+ # if frameno is just 0-9, then add a 0 to the front.
123
+ if len(frame_no) == 1:
124
+ frame_no = "0"+frame_no
125
+ filename = video_no+frame_name+frame_no+".jpg"
126
+ file_path = os.path.join(os.getcwd(), "dataset/gifs2/raw/", filename)
127
+ plt.savefig(file_path)
128
+ selected_frames.append(file_path)
129
+ plt.close()
130
+
131
+ # wait for user to press a key
132
+
133
+ # mat = zoom(conv[0, :, :, i], zoom=(scale, scale))
134
+ # threshold = np.percentile(mat.flatten(), TOP_ACCURACY_PERCENTILE)
135
+ # # The Lower threshold is to zero, the more specific the look is shown.
136
+ #
137
+ # mask = mat > threshold
138
+ # # OR: filter_map = np.where(filter_map <= threshold, 0, filter_map)
139
+ #
140
+ # # Rescale remaining values (adjust new_range if needed)
141
+ # new_range = 1 # Adjust based on your desired final range
142
+ # filter_map = np.where(mask, (mat - threshold) / (mat.max() - threshold) * new_range, 0)
143
+ #
144
+ # # I just add all the maps together, which is really noisy.
145
+ # if type(total_mat) != type(None):
146
+ # total_mat += filter_map
147
+ # else:
148
+ # total_mat = filter_map
149
+ #
150
+ # # Normalize based on largest value,
151
+ # # Store this image in a collection, in which a GIF will be made, that lasts at least 2 seconds.
152
+ # total_mat = total_mat / abs(np.max(total_mat))
153
+ # #
154
+ # image = img.squeeze(0) # .detach().numpy().astype(np.float32)
155
+ #
156
+ #
157
+ # plt.imshow(plt.imread(os.path.join(os.getcwd(), "dataset/root/train", image_filename))) # full path needed
158
+ # plt.imshow(total_mat, cmap='jet', alpha=0.3)
159
+ #
160
+ # # selected_frames.append()
161
+ # filename = frame_name+frame_no+video_no+".jpg"
162
+ # file_path = os.path.join(os.getcwd(), "dataset/gifs/raw/", filename)
163
+ # plt.savefig(file_path)
164
+ # selected_frames.append(file_path)
165
+
166
+
167
+ exit()
168
+
169
+
170
+ # plt.figure(figsize=(16, 16))
171
+ # for i in range(36):
172
+ # plt.subplot(6, 6, i + 1)
173
+ # plt.imshow(cv2.imread(TEST_IMAGE))
174
+ # plt.imshow(zoom(conv[0, :,:,i], zoom=(scale, scale)), cmap='jet', alpha=0.3)
175
+ #
176
+ # plt.show()