makiisthebes
/

AlexNet_CNN_Visualisation

Model card Files Files and versions Community

makiisthebes commited on Apr 4, 2024

Commit

4ec6f12

verified ·

1 Parent(s): 7471e05

Upload 18 files

Browse files

Files changed (19) hide show

.gitattributes +4 -0
Cognitive Robotics Lit Review.docx +3 -0
PresentationFinal.pdf +3 -0
PresentationFinal.pptx +3 -0
alexnet_2.0.pth +3 -0
alexnet_cognitive.pth +3 -0
alexnet_cognitive_gap.pth +3 -0
best_model.txt +1 -0
best_model_2.0.txt +1 -0
data_formater.py +42 -0
dataset_creation.py +84 -0
model.py +193 -0
model_two.py +162 -0
model_visualisation.py +138 -0
test.py +12 -0
video1.mp4 +3 -0
video_preprocessing.py +123 -0
visualise2.py +156 -0
visualise3.py +176 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+Cognitive[[:space:]]Robotics[[:space:]]Lit[[:space:]]Review.docx filter=lfs diff=lfs merge=lfs -text
+PresentationFinal.pdf filter=lfs diff=lfs merge=lfs -text
+PresentationFinal.pptx filter=lfs diff=lfs merge=lfs -text
+video1.mp4 filter=lfs diff=lfs merge=lfs -text

Cognitive Robotics Lit Review.docx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3f4013d74a3d0338d39e0d9cb76445f68269d81bb9a810d915aafb3fd1c1d1e8
+size 1758570

PresentationFinal.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6ec1857be3a65a35e3725886e11257c258c850d01c99cea7df70cd8c43504439
+size 1890972

PresentationFinal.pptx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5f8778fe1522e8cff628009e656b4f216981953b28faef2f0a17f18a4822aa62
+size 21281231

alexnet_2.0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:60e26575481de7adc0c4002a571c50aa0c7a8fcbdd147771b14be34befd0219a
+size 14995442

alexnet_cognitive.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d7a3223076a2081ff3bec8174f3961054a222d0a5295ba9cadad88a044efdc11
+size 187026089

alexnet_cognitive_gap.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d158f502c1b7f68d8dfdb9f26964c016b110b3571ee9e3b5425c8912dacf2437
+size 86362865

best_model.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ 85

best_model_2.0.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ 93.11740890688259

data_formater.py ADDED Viewed

	@@ -0,0 +1,42 @@

+# Convert the data to dataloader formater.
+import os
+# Check root directory exists,
+# If not, create it.
+if not os.path.exists("dataset/root"):
+    os.makedirs("dataset/root")
+# Check if the labels.csv file exists, if it does, delete it.
+if os.path.exists("dataset/root/labels.csv"):
+    os.remove("dataset/root/labels.csv")
+# Create a labels csv file.
+print("Creating labels.csv file.")
+classes_to_model_output = {"left": 0, "right": 1}
+with open("dataset/root/labels.csv", "w") as file:
+    # file.write("image,class\n")
+    classes = ["left", "right"]
+    for class_name in classes:
+        image_files = os.listdir(os.path.join("dataset", class_name))
+        for image in image_files:
+            file.write(f"{image},{classes_to_model_output[class_name]}\n")
+print("Creating uniform image dataset.")
+# Create a uniform image dataset, named train
+if not os.path.exists("dataset/root/train"):
+    os.makedirs("dataset/root/train")
+# Copy the images to the root directory.
+for class_name in classes:
+    image_files = os.listdir(os.path.join("dataset", class_name))
+    for image in image_files:
+        os.system(f"cp dataset/{class_name}/{image} dataset/root/train/{image}")

dataset_creation.py ADDED Viewed

	@@ -0,0 +1,84 @@

+# Dataset creation for usage from the videos based on the video preprocessing required.
+from torch.utils.data import DataLoader
+from torchvision.io import read_image
+from torch.utils.data import Dataset
+from torchvision.transforms import v2
+from torchvision import transforms
+from torchvision import datasets
+from PIL import Image
+import pandas as pd
+import idx2numpy, os
+import torch
+# Dataset creation,
+# Loading from a custom dataset
+IMAGE_DIMS = 224
+normal_transforms = v2.Compose([
+    v2.Resize(size=(IMAGE_DIMS, IMAGE_DIMS)),
+    # convert to rgb from greyscale.
+    # v2.RandomHorizontalFlip(p=0.5),
+    v2.ToDtype(torch.float32), # , scale=True),
+    # v2.RandomPerspective(distortion_scale=0.6, p=0.4),
+    # v2.GaussianBlur(kernel_size=(5, 11), sigma=(0.1, 0.2)),
+    v2.RandomRotation(degrees=(-15, 15)),
+    # v2.RandomAffine(degrees=(-15, 15)),
+    # v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+    transforms.Normalize((0.13066047430038452,), (0.30810782313346863,)),
+])
+class CustomImageDataset(Dataset):
+    """
+        This class must inherit from the torch.utils.data.Dataset class.
+        And contina functions __init__, __len__, and __getitem__.
+    """
+    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
+        self.img_labels = pd.read_csv(annotations_file)
+        self.img_dir = img_dir
+        self.transform = transform
+        self.target_transform = target_transform
+    def __len__(self):
+        return len(self.img_labels)
+    def __getitem__(self, idx):
+        """Get the image and label at the index idx."""
+        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
+        Image.open(img_path).convert("RGB").save(img_path)
+        image = read_image(img_path)
+        label = self.img_labels.iloc[idx, 1]
+        if self.transform:
+            image = self.transform(image)
+        if self.target_transform:
+            label = self.target_transform(label)
+        return image, label
+train_data = CustomImageDataset("./dataset/root/labels.csv", "./dataset/root/train/", transform=normal_transforms)
+# Create a DataLoader, so we can iterate through the dataset in batches.
+#train_loader = DataLoader(train_data, batch_size=64, shuffle=True, )
+# Testing the dataloader.
+# for i, (images, labels) in enumerate(train_loader):
+#     print(i, images.shape, labels.shape)
+train_size = int(0.8 * len(train_data))
+test_size = len(train_data) - train_size
+train_dataset, test_dataset = torch.utils.data.random_split(train_data, [train_size, test_size])
+# Create DataLoader for train and test sets
+train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
+test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
+print("Data loader and Test Loaders are ready to be used.")
+# Create first stage labels,
+# movement stage labels
+# final stage labels.

model.py ADDED Viewed

	@@ -0,0 +1,193 @@

+# Michael Peres 30/03/2024.
+# Model for binary classification.
+# Import Statements for model.
+from torchvision.transforms import ToTensor
+from torchvision.transforms import v2
+from torchvision import transforms
+import matplotlib.pyplot as plt
+from time import time
+from torch import nn
+import pandas as pd
+import numpy as np
+import torch, os
+from torch.optim.lr_scheduler import ReduceLROnPlateau
+from tqdm import tqdm
+# Going to be using keras
+input_shape = (224, 224, 3)
+# device = (
+#     "cuda"
+#     if torch.cuda.is_available()
+#     else "mps"
+#     if torch.backends.mps.is_available()
+#     else "cpu"
+# )
+device  = "cpu"  #having trouble with mpu on mac, so will use cpu for now until main pc is available.
+print(f"Using {device} device for training/inference.")
+if device == "cuda":
+    print(f"GPU being used: {torch.cuda.get_device_name(0)}")
+# We have a custom dataset that we will be using in this example.
+class MakiAlexNet(nn.Module):
+    def __init__(self, num_classes=2):
+        super(MakiAlexNet, self).__init__()
+        self.num_classes = num_classes
+        self.conv1 = nn.Conv2d(in_channels=3, out_channels=96, kernel_size=11, stride=4, padding=1)  # LazyConv2d determine the input channels automatically.
+        self.conv2 = nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, padding=2)
+        self.conv3 = nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, padding=1)  # 256, 384
+        self.conv4 = nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, padding=1) # 384,384
+        self.conv5 = nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, padding=1)   # 384, 256
+        self.activation = nn.ReLU()
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2)
+        # Replace Flatten with GlobalAvgPool2d
+        self.gap = nn.AvgPool2d(5)  # Adjust output size if needed
+        # In this case LazyLinear is really useful after flattening,
+        # such that abstraction is made from the initial output layer and the linear layer nodes.
+        self.fcc = nn.Sequential(
+            nn.Flatten(),
+            nn.Linear(6400, 4096),
+            # nn.Linear(in_features=256, out_features=4096),  # adaptation to code
+            # nn.LazyLinear(4096),  # this defines the output neurons size and takes in the leading input channels.
+            nn.ReLU(),
+            nn.Dropout(p=0.5),
+            # nn.LazyLinear(4096),
+            nn.Linear(4096, 4096),
+            nn.ReLU(),
+            nn.Dropout(p=0.5),
+            # nn.LazyLinear(self.num_classes),
+            nn.Linear(4096, self.num_classes)
+        )
+        # Create an empty dictionary to store layer outputs
+        self.layer_outputs = {}
+        # Register hooks for desired layers
+        self.conv5.register_forward_hook(self._save_layer_output)
+    def _save_layer_output(self, module, input, output):
+        self.layer_outputs[module.__class__.__name__] = output
+    def forward(self, x):
+        """Defined forward pass of AlexNet for learning left or right prediction."""
+        x = self.conv1(x)  # wider
+        x = self.activation(x)
+        x = self.maxpool(x)  # down sample.
+        x = self.conv2(x)  # wider.
+        x = self.activation(x)
+        x = self.maxpool(x)  # down sample.
+        x = self.conv3(x)  # wider.
+        x = self.activation(x)
+        x = self.conv4(x)
+        x = self.activation(x)
+        x = self.conv5(x)
+        x = self.activation(x)
+        x = self.maxpool(x)  # down sample.
+        # x = self.gap(x).squeeze(-1).squeeze(-1)
+        x = self.fcc(x)  # Flatten and passed to Linear layer to 2 classes.
+        return x
+def init_weights(m):
+    if isinstance(m, nn.Conv2d):
+        nn.init.xavier_uniform_(m.weight)
+        if m.bias is not None:
+            m.bias.data.fill_(0.01)
+    elif isinstance(m, nn.Linear):
+        nn.init.xavier_uniform_(m.weight)
+        m.bias.data.fill_(0.01)
+if __name__ == "__main__":
+    from dataset_creation import test_loader, train_loader  # Initiate the custom dataloaders and datasets here.
+    # Running the model to learn, also introducing good features to make it learn better like a cosine scheduler for the learning rate.
+    EPOCH = 35
+    model = MakiAlexNet()
+    # model.apply(init_weights)
+    # torch.load("alexnet_cognitive.pth", map_location=device)
+    model.to(device)
+    print(model)
+    print("Model has been tested and is working correctly.")
+    # Running the model with test data.
+    criterion = nn.CrossEntropyLoss()
+    optimizer = torch.optim.SGD(model.parameters(), lr=0.00001*5, weight_decay=0.0001, momentum=0.9)
+    # Define learning rate scheduler
+    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3)
+    if os.path.exists("best_model.txt"):
+        with open("best_model.txt", "r") as file:
+            best_accuracy = float(file.read())
+    else:
+        best_accuracy = 0.0
+    for epoch in tqdm(range(EPOCH), desc="Training Epoch Cycle"):
+        model.train()  # Set model to training mode
+        running_loss = 0.0
+        for i, data in enumerate(train_loader, 0):
+            if i % 10 == 0:
+                print(f"Internal Loop of batches: {i}")
+            inputs, labels = data
+            # print(type(labels), labels)
+            inputs, labels = inputs.to(device), labels.to(device)
+            optimizer.zero_grad()
+            outputs = model(inputs)
+            loss = criterion(outputs, labels)
+            loss.backward()
+            optimizer.step()
+            running_loss += loss.item()
+        train_loss = running_loss / len(train_loader)
+        print(f'Epoch [{epoch + 1}] training loss: {train_loss:.3f}')
+        # Validation phase
+        model.eval()  # Set model to evaluation mode
+        val_running_loss = 0.0
+        val_correct = 0
+        val_total = 0
+        with torch.no_grad():
+            for data in test_loader:  # Assuming test_loader is used as a validation loader
+                inputs, labels = data
+                inputs, labels = inputs.to(device), labels.to(device)
+                outputs = model(inputs)
+                loss = criterion(outputs, labels)
+                val_running_loss += loss.item()
+                _, predicted = torch.max(outputs.data, 1)
+                val_total += labels.size(0)
+                val_correct += (predicted == labels).sum().item()
+        val_loss = val_running_loss / len(test_loader)
+        val_accuracy = 100 * val_correct / val_total
+        print(f'Epoch [{epoch + 1}] validation loss: {val_loss:.3f}, accuracy: {val_accuracy:.2f}%')
+        if val_accuracy > best_accuracy:
+            best_accuracy = val_accuracy
+            torch.save(model.state_dict(), "alexnet_cognitive_gap.pth")
+            with open("best_model.txt", "w") as file:
+                file.write(f"{best_accuracy}")
+        # Update the LR scheduler with validation loss
+        scheduler.step(val_loss)
+        # print(f'LR: {scheduler.get_last_lr()}')

model_two.py ADDED Viewed

	@@ -0,0 +1,162 @@

+from torchvision.transforms import ToTensor
+from torchvision.transforms import v2
+from torchvision import transforms
+import matplotlib.pyplot as plt
+from time import time
+from torch import nn
+import pandas as pd
+import numpy as np
+import torch, os
+from torch.optim.lr_scheduler import ReduceLROnPlateau
+from tqdm import tqdm
+# Going to be using keras
+input_shape = (224, 224, 3)
+device = (
+	"cuda"
+	if torch.cuda.is_available()
+	else "mps"
+	if torch.backends.mps.is_available()
+	else "cpu"
+)
+class MakiAlexNet(nn.Module):
+	def __init__(self, num_classes=2):
+		super(MakiAlexNet, self).__init__()
+		self.num_classes = num_classes
+		self.conv1 = nn.Conv2d(in_channels=3, out_channels=96, kernel_size=11, stride=4, padding=1)  # LazyConv2d determine the input channels automatically.
+		self.conv2 = nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, padding=2)
+		self.conv3 = nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, padding=1)  # 256, 384
+		self.conv4 = nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, padding=1) # 384,384
+		self.conv5 = nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, padding=1)   # 384, 256
+		self.activation = nn.ReLU()
+		self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2)
+		self.dropout = nn.Dropout(p=0.5)
+		self.f_linear = nn.Linear(256, self.num_classes)
+		# Replace Flatten with GlobalAvgPool2d
+		self.gap = nn.AvgPool2d(5)  # Adjust output size if needed
+		# In this case LazyLinear is really useful after flattening,
+		# such that abstraction is made from the initial output layer and the linear layer nodes.
+		# Create an empty dictionary to store layer outputs
+		self.layer_outputs = {}
+		# Register hooks for desired layers
+		self.conv5.register_forward_hook(self._save_layer_output)
+		self.f_linear.register_forward_hook(self._save_layer_output)
+	def _save_to_output_weights(self, module, input, output):
+		self.layer_outputs[module.__class__.__name__] = {"input": input, "output": output, "weights": module.weight.data}
+	def _save_layer_output(self, module, input, output):
+		self.layer_outputs[module.__class__.__name__] = output
+	def forward(self, x):
+		"""Defined forward pass of AlexNet for learning left or right prediction."""
+		x = self.conv1(x)  # wider
+		x = self.activation(x)
+		x = self.maxpool(x)  # down sample.
+		x = self.conv2(x)  # wider.
+		x = self.activation(x)
+		x = self.maxpool(x)  # down sample.
+		x = self.conv3(x)  # wider.
+		x = self.activation(x)
+		x = self.conv4(x)
+		x = self.activation(x)
+		x = self.conv5(x)
+		x = self.activation(x)
+		x = self.maxpool(x)  # down sample.
+		x = self.gap(x).squeeze(-1).squeeze(-1)
+		# x = self.activation(x)
+		x = self.dropout(x)
+		x = self.f_linear(x)
+		return x
+if __name__ == "__main__":
+	from dataset_creation import test_loader, train_loader  # Initiate the custom dataloaders and datasets here.
+	# Running the model to learn, also introducing good features to make it learn better like a cosine scheduler for the learning rate.
+	EPOCH = 35
+	model = MakiAlexNet()
+	model.to(device)
+	print(model)
+	criterion = nn.CrossEntropyLoss()
+	optimizer = torch.optim.SGD(model.parameters(), lr=0.00001 * 5, weight_decay=0.0001, momentum=0.9)
+	scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3)
+	if os.path.exists("best_model_2.0.txt"):
+		with open("best_model_2.0.txt", "r") as file:
+			best_accuracy = float(file.read())
+	else:
+		best_accuracy = 0.0
+		# create a new file
+		with open("best_model_2.0.txt", "w") as file:
+			file.write(f"{best_accuracy}")
+	for epoch in tqdm(range(EPOCH), desc="Training Epoch Cycle"):
+		model.train()  # Set model to training mode
+		running_loss = 0.0
+		for i, data in enumerate(train_loader, 0):
+			if i % 10 == 0:
+				print(f"Internal Loop of batches: {i}")
+			inputs, labels = data
+			# print(type(labels), labels)
+			inputs, labels = inputs.to(device), labels.to(device)
+			optimizer.zero_grad()
+			outputs = model(inputs)
+			loss = criterion(outputs, labels)
+			loss.backward()
+			optimizer.step()
+			running_loss += loss.item()
+		train_loss = running_loss / len(train_loader)
+		print(f'Epoch [{epoch + 1}] training loss: {train_loss:.3f}')
+		# Validation phase
+		model.eval()  # Set model to evaluation mode
+		val_running_loss = 0.0
+		val_correct = 0
+		val_total = 0
+		with torch.no_grad():
+			for data in test_loader:  # Assuming test_loader is used as a validation loader
+				inputs, labels = data
+				inputs, labels = inputs.to(device), labels.to(device)
+				outputs = model(inputs)
+				loss = criterion(outputs, labels)
+				val_running_loss += loss.item()
+				_, predicted = torch.max(outputs.data, 1)
+				val_total += labels.size(0)
+				val_correct += (predicted == labels).sum().item()
+		val_loss = val_running_loss / len(test_loader)
+		val_accuracy = 100 * val_correct / val_total
+		print(f'Epoch [{epoch + 1}] validation loss: {val_loss:.3f}, accuracy: {val_accuracy:.2f}%')
+		if val_accuracy > best_accuracy:
+			best_accuracy = val_accuracy
+			torch.save(model.state_dict(), "alexnet_2.0.pth")
+			with open("best_model_2.0.txt", "w") as file:
+				file.write(f"{best_accuracy}")
+		# Update the LR scheduler with validation loss
+		scheduler.step(val_loss)

model_visualisation.py ADDED Viewed

	@@ -0,0 +1,138 @@

+# Based on the learnt CNN kernels, this script will aid in generating a learnt kernel pattern.
+# Attempt 1, did not work well.
+import matplotlib.pyplot as plt
+# Here we should be able to determine what weighting each part of the image aids in the detection of the goal.
+# And how these change over time.
+# https://www.youtube.com/watch?v=ST9NjnKKvT8
+# This video aims to solve this problem, by going over the heatmaps of CNNs.
+from torchvision import transforms
+from dataset_creation import normal_transforms
+from model import MakiAlexNet
+import numpy as np
+import cv2, torch, os
+from tqdm import tqdm
+import time
+TEST_IMAGE = "dataset/root/train/left1_frame_0.jpg"
+MODEL_PARAMS = "alexnet_cognitive.pth"
+all_processing_files = os.listdir(os.path.join(os.getcwd(), "./dataset/root/train"))
+model = MakiAlexNet()
+model.load_state_dict(torch.load(MODEL_PARAMS))
+model.eval()
+print("Model armed and ready for evaluation.")
+# Print model's state_dict
+print("Model's state_dict:")
+for param_tensor in model.state_dict():
+    print(param_tensor, "\t", model.state_dict()[param_tensor].size())
+for image_file in tqdm(all_processing_files):
+  # Showcase and load image from file.
+  abs_file_path = os.path.join(os.getcwd(), "./dataset/root/train", image_file)
+  image = cv2.imread(abs_file_path)
+  # print(image.shape)
+  # cv2.imshow("test", image)
+  # cv2.waitKey(5000)
+  print("Image input shape of the matrix before: ", image.shape)
+  image = torch.unsqueeze(torch.tensor(image.astype(np.float32)), 0)  # Convert image to tensor with float32, and extended batch size dimension.  (Batch, Channel, W,H)
+  image = torch.einsum("BWHC->BCWH", image)
+  print("Image input shape of the matrix after: ", image.shape)
+  conv1_output = model.conv1(image)
+  print("Output shape of the matrix: ", conv1_output.shape)
+  # Handling image convolutions
+  conv1_formatted = torch.einsum("BCWH->WHC", conv1_output)
+  print(f"Formatted shape of matrix is: {conv1_formatted.shape}")
+  # Assuming your 3D array is named 'data'
+  num_channels = conv1_formatted.shape[2]  # Get the number of channels (96)
+  max_rows = 5  # Set a maximum number of rows (optional)
+  rows = min(max_rows, int(np.sqrt(num_channels)))  # Limit rows to a maximum
+  cols = int(np.ceil(num_channels / rows))
+  fig, axes = plt.subplots(rows, cols, figsize=(12, 12))  # Create a grid of subplots
+  DATASET_OUTPUT_PATH = "./dataset/visualisation"
+  merged_frames = np.zeros((224,224))
+  image_file_dir = abs_file_path.split(".jpg")[0].split("/")[-1]
+  if not os.path.isdir(os.path.join(os.getcwd(), DATASET_OUTPUT_PATH, image_file_dir)):
+    os.mkdir(os.path.join(os.getcwd(), DATASET_OUTPUT_PATH, image_file_dir))  # make new directory.
+  for i in range(rows):
+    for j in range(cols):
+      channel_idx = i * cols + j  # Calculate index based on row and column
+      if channel_idx < num_channels:  # Check if within channel range
+        channel_data = conv1_formatted[:, :, channel_idx]
+        channel_data = channel_data.detach().numpy()
+        print(f"Channel Data shape dimension: {channel_data.shape}")
+        # channel_data = np.mean(channel_data, axis=2)
+        # Get the mean of each third dimension,  so mean on channels, if H,W,C -> H,W
+        channel_data = cv2.resize(channel_data, (224, 224))
+        # Accumulate normalized channel data
+        # take threshold values of channel data to add to merged frames, if above a specific point.
+        # ret, channel_data = cv2.threshold(channel_data, 120, 255, cv2.THRESH_BINARY)
+        merged_frames += channel_data
+        # # Save the image data matrix.
+        # image_filename = f"{int(time.time())}_output_{channel_idx}.jpg"
+        # image_path = os.path.join(os.getcwd(), DATASET_OUTPUT_PATH, image_file_dir, image_filename)
+        # plt.imsave(image_path, channel_data)
+        # print(f"Image path saved at {image_path}")
+  # Ensure final merged_frames is also normalized
+  merged_frames /= (np.max(merged_frames) * .8)
+  # Thresholding the main images that causes this highlight.
+  merged_frames_gray = merged_frames.astype(np.uint8)  # No conversion needed, use as-is
+  # 9merged_frames = cv2.adaptiveThreshold(merged_frames_gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
+  image_path = os.path.join(os.getcwd(), DATASET_OUTPUT_PATH, image_file_dir, image_file_dir+"conv1_mask.jpg")
+  plt.imsave(image_path, merged_frames_gray, cmap='gray')
+  # merged_frames = merged_frames.astype(np.uint8)
+  heatmap_color = cv2.applyColorMap(merged_frames_gray, cv2.COLORMAP_JET)  # Apply a colormap
+  #
+  # cv2.imshow("merged", heatmap_color)
+  image_path = os.path.join(os.getcwd(), DATASET_OUTPUT_PATH, image_file_dir, image_file_dir+"conv1_heatmap.jpg")
+  plt.imsave(image_path, heatmap_color)
+  #
+  # # Merge all images into one, normalising based on highest value, and then increasing from 54,54, 1, to 224,224,1
+  # cv2.waitKey(5000)
+  plt.close()
+exit()
+#
+# image_tensor = normal_transforms(torch.tensor(image))
+# print(image_tensor.shape)
+# plt.imshow(image_tensor.squeeze())

test.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from torch import nn
+import torch
+# pool of square window of size=3, stride=2
+m = nn.AvgPool2d(3, stride=2)
+# pool of non-square window
+m = nn.AvgPool2d(5)
+input = torch.randn(32,256, 5, 5)
+output = m(input)
+output = output.squeeze(-1).squeeze(-1)
+print(output.shape)

video1.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:326d7cd53f18796b719112b90a8e165e02886d6500cf3a340334e2f503cc0d2a
+size 2476455

video_preprocessing.py ADDED Viewed

	@@ -0,0 +1,123 @@

+# Taking each video cropping to 520, 400, 3, and obtaining 12 key frames for the usage in a dataset.
+# Frames will be saved as individual images.
+import numpy as np
+import cv2, os
+from tqdm import tqdm
+# Obtain 12 different keyframes from the video, equally spaced.
+# Actually to increase dataset we will just increase the number of frames allowed to say 50 per video. 900 images for dataset, 200 are for training.
+def get_equal_elements(array, num_elements=12):
+    """
+  Takes a specific number of elements equally spaced from an array.
+  Args:
+      array: The input array.
+      num_elements: The number of elements to take (default 12).
+  Returns:
+      A list of elements from the array.
+  """
+    if num_elements > len(array):
+        print(f"Number of elements cannot be greater than array length : {len(array)}")
+        return []
+    step_size = len(array) // (num_elements - 1)  # Avoid extra element with floor division
+    return array[::step_size]  # Slice with step size
+def video_to_keyframes(video_filename):
+    cap = cv2.VideoCapture(video_filename)
+    frames = []
+    while (cap.isOpened()):
+        ret, frame = cap.read()
+        try:
+            # 720, 1280, 3
+            # Remove top 200 pixels from video frame.
+            # print((frame.shape))   # (720, 1280, 3)
+            frame = frame[200:, 440:840]
+            # print((frame.shape)) # (520, 400, 3)
+            frames.append(frame)
+            # cv2.imshow('frame',frame)
+        except Exception as e:
+            print(f"Error is {e}")
+            if frame == None:
+                break
+            continue
+        if cv2.waitKey(1) & 0xFF == ord('q'):
+            break
+    cap.release()
+    cv2.destroyAllWindows()
+    print("Done obtaining captured frames.")
+    selected_frames = get_equal_elements(frames, num_elements=50)
+    print("Obtained selected frames.")
+    # Extract filename without extension for directory creation
+    filename_no_ext = video_filename.split('.')[0]
+    if "left" in filename_no_ext:
+        filename_no_ext = "left"
+    else:
+        filename_no_ext = "right"
+    # Create directory for the video if it doesn't exist
+    try:
+        os.makedirs(os.path.join("dataset", filename_no_ext))
+    except FileExistsError:
+        pass  # Directory already exists, ignore
+    # Save each selected frame as an image with filename_frame_number.jpg format
+    # Create images under directory for that specific source video file name, like left for left1.mp4.
+    for i, frame in enumerate(selected_frames):
+        print(filename_no_ext)
+        file_name = f"{video_filename.split('.')[0].split('/')[-1]}_frame_{i}.jpg"
+        print(file_name)
+        image_path = os.path.join("dataset", filename_no_ext, file_name)
+        print(f"Write to disk. {image_path}")
+        print("Resized to 224,224")
+        target_height, target_width = 224, 224
+        if type(frame) != type(None):
+            frame = resize_with_aspect_ratio(frame, target_height, target_width)
+            # print(type(frame), frame)
+            cv2.imwrite(image_path, frame)
+        else:
+            continue
+    print("Saved images for each all selected frames.")
+    return selected_frames
+# Resize images.
+# Target dimensions for AlexNet
+# Resize the image with aspect ratio preservation
+def resize_with_aspect_ratio(image, target_height, target_width):
+    height, width = image.shape[:2]
+    if height == target_height and width == target_width:
+        return image
+    if height > width:
+        new_width = int(width * (target_height / height))
+        # resized_image = cv2.resize(image, (new_width, target_height))
+        resized_image = cv2.resize(image, (target_width, target_height))
+        return resized_image
+    else:
+        new_height = int(height * (target_width / width))
+        resized_image = cv2.resize(image, (target_width, new_height))
+        # Crop the center of the resized image to match target dimensions
+        start_x = int((resized_image.shape[1] - target_width) / 2)
+        start_y = int((resized_image.shape[0] - target_height) / 2)
+        return resized_image[start_y:start_y + target_height, start_x:start_x + target_width]
+if __name__ == "__main__":
+    # Load the image and resize
+    BASE_PATH = "dataset/src/"
+    videos = os.listdir(BASE_PATH)
+    # print(videos)
+    target_height, target_width = 224, 224
+    for video_file in tqdm(videos):
+        selected_frames = video_to_keyframes(os.path.join(BASE_PATH, video_file))
+    # image = cv2.imread(image_path)  # Or use your image data
+    # resized_image = resize_with_aspect_ratio(image, target_height, target_width)

visualise2.py ADDED Viewed

	@@ -0,0 +1,156 @@

+# https://tree.rocks/get-heatmap-from-cnn-convolution-neural-network-aka-grad-cam-222e08f57a34
+import cv2, os, torch, re
+import matplotlib.pyplot as plt
+from scipy.ndimage import zoom
+import numpy as np
+from model import MakiAlexNet
+from tqdm import tqdm
+# from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions
+TOP_ACCURACY_PERCENTILE = 10
+TEST_IMAGE = "dataset/root/train/left1_frame_10.jpg"
+MODEL_PARAMS = "alexnet_cognitive.pth"
+GIF_STORE = "dataset/gifs/"
+TRAIN_STORE = "dataset/root/train/"
+model = MakiAlexNet()
+model.load_state_dict(torch.load(MODEL_PARAMS))
+model.eval()
+# Make model run on cuda if available.
+if torch.cuda.is_available():
+    model = model.cuda()
+    print("Running on cuda")
+print(dir(model))
+for name, module in model.named_modules():
+    # Print the layer name
+    print(name)
+def extract_file_paths(filename):
+    """With aid from https://regex101.com/, regex."""
+    extractor_reg = r"(left|right)([0-9]+)(_frame_)([0-9]+)"
+    result = re.search(extractor_reg, filename)
+    frame_no = result.group(4)
+    frame_name = result.group(1)
+    video_no = result.group(2)
+    return frame_no, frame_name, video_no
+def create_mp4_from_frames(file_name, frames):
+    """Generate MP4/GIF file with the collection of frames given with a duration of 2000 msec. """
+    print("Sorted frames: ", sorted(frames))
+    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+    height, width, _ = cv2.imread(frames[0]).shape
+    fps = 20  # Adjust the frames per second (FPS) as needed
+    video_path = os.path.join(os.getcwd(), "dataset", "gifs", f"{file_name}.mp4")
+    video = cv2.VideoWriter(video_path, fourcc, fps, (width, height))
+    for frame_path in sorted(frames):
+        # Convert BRG to RGB
+        image = cv2.imread(frame_path)
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        # if image.dtype != np.uint8:
+        #     image = (image * 255).astype(np.uint8)  # Convert to uint8
+        video.write(image)
+    # Release the VideoWriter
+    video.release()
+current_video_name = None
+selected_frames = []  # stores matrices for the GIF generation.
+for image_filename in ["left1_frame_5.jpg"]: # tqdm(sorted(os.listdir(TRAIN_STORE)), desc="Running Images"):  # :
+    frame_no, frame_name, video_no = extract_file_paths(image_filename)
+    obtained_video_name = video_no+"vid"+frame_name
+    if current_video_name != obtained_video_name:
+        # We have a new video sequence, so save current sequences and name
+        if selected_frames:
+            filename = f"{current_video_name}"
+            # Create gif from the frames.
+            if current_video_name:
+                create_mp4_from_frames(filename, selected_frames)
+            # Clear frames and hand off to new handle.
+            selected_frames = []
+            current_video_name = obtained_video_name
+    # With the number and name of the file paths, we can then determine which should be part of the specific GIF file.
+    # f"frame_no,fileno,video_no.gif"
+    img = cv2.imread(os.path.join(TRAIN_STORE, image_filename))
+    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+    img = torch.unsqueeze(torch.tensor(img.astype(np.float32)), 0)  # Convert image to tensor with float32, and extended batch size dimension.  (Batch, Channel, W,H)
+    X = torch.einsum("BWHC->BCWH", img)
+    if torch.cuda.is_available():
+        X = X.cuda()
+    output = model(X)
+    # print(output)
+    # print(model.layer_outputs)
+    conv = model.layer_outputs['Conv2d']
+    conv = torch.einsum("BCWH->BWHC", conv).cpu().detach().numpy()
+    # print(conv.shape)  # torch.Size([1, 256, 12, 12])
+    # conv = conv.squeeze(0)
+    # print(conv.shape)  # torch.Size([256, 12, 12])
+    scale = 224 / 12  # 256x5x5 after this additional.
+    plt.figure(figsize=(16, 16))
+    total_mat = None
+    for i in range(256):
+        plt.subplot(16, 16, i + 1)
+        plt.imshow(img.squeeze(0))
+        plt.imshow(zoom(conv[0, :,:,i], zoom=(scale, scale)), cmap='jet', alpha=0.3)
+    plt.show()
+    # wait for user to press a key
+    #     mat = zoom(conv[0, :, :, i], zoom=(scale, scale))
+    #     threshold = np.percentile(mat.flatten(), TOP_ACCURACY_PERCENTILE)
+    #     # The Lower threshold is to zero, the more specific the look is shown.
+    #
+    #     mask = mat > threshold
+    #     # OR: filter_map = np.where(filter_map <= threshold, 0, filter_map)
+    #
+    #     # Rescale remaining values (adjust new_range if needed)
+    #     new_range = 1  # Adjust based on your desired final range
+    #     filter_map = np.where(mask, (mat - threshold) / (mat.max() - threshold) * new_range, 0)
+    #
+    #     # I just add all the maps together, which is really noisy.
+    #     if type(total_mat) != type(None):
+    #         total_mat += filter_map
+    #     else:
+    #         total_mat = filter_map
+    #
+    # # Normalize based on largest value,
+    # # Store this image in a collection, in which a GIF will be made, that lasts at least 2 seconds.
+    # total_mat = total_mat / abs(np.max(total_mat))
+    # #
+    # image = img.squeeze(0)  # .detach().numpy().astype(np.float32)
+    #
+    #
+    # plt.imshow(plt.imread(os.path.join(os.getcwd(), "dataset/root/train", image_filename)))  # full path needed
+    # plt.imshow(total_mat, cmap='jet', alpha=0.3)
+    #
+    # # selected_frames.append()
+    # filename = frame_name+frame_no+video_no+".jpg"
+    # file_path = os.path.join(os.getcwd(), "dataset/gifs/raw/", filename)
+    # plt.savefig(file_path)
+    # selected_frames.append(file_path)
+exit()
+# plt.figure(figsize=(16, 16))
+# for i in range(36):
+#     plt.subplot(6, 6, i + 1)
+#     plt.imshow(cv2.imread(TEST_IMAGE))
+#     plt.imshow(zoom(conv[0, :,:,i], zoom=(scale, scale)), cmap='jet', alpha=0.3)
+#
+# plt.show()

visualise3.py ADDED Viewed

	@@ -0,0 +1,176 @@

+# Michael Peres (c) 2024
+# Inspiration from code tutorial mentioned here: https://tree.rocks/get-heatmap-from-cnn-convolution-neural-network-aka-grad-cam-222e08f57a34
+import cv2, os, torch, re
+import matplotlib.pyplot as plt
+from scipy.ndimage import zoom
+import numpy as np
+from model_two import MakiAlexNet
+from tqdm import tqdm
+# from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions
+TOP_ACCURACY_PERCENTILE = 10
+TEST_IMAGE = "dataset/root/train/left1_frame_10.jpg"
+MODEL_PARAMS = "alexnet_2.0.pth"
+GIF_STORE = "dataset/gifs2/"
+TRAIN_STORE = "dataset/root/train/"
+model = MakiAlexNet()
+model.load_state_dict(torch.load(MODEL_PARAMS))
+model.eval()
+# Make model run on cuda if available.
+if torch.cuda.is_available():
+    model = model.cuda()
+    print("Running on cuda")
+print(dir(model))
+for name, module in model.named_modules():
+    # Print the layer name
+    print(name)
+def extract_file_paths(filename):
+    """With aid from https://regex101.com/, regex."""
+    extractor_reg = r"(left|right)([0-9]+)(_frame_)([0-9]+)"
+    result = re.search(extractor_reg, filename)
+    frame_no = result.group(4)
+    frame_name = result.group(1)
+    video_no = result.group(2)
+    return frame_no, frame_name, video_no
+def create_mp4_from_frames(file_name, frames):
+    """Generate MP4/GIF file with the collection of frames given with a duration of 2000 msec. """
+    print("Sorted frames: ", sorted(frames))
+    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+    height, width, _ = cv2.imread(frames[0]).shape
+    fps = 20  # Adjust the frames per second (FPS) as needed
+    video_path = os.path.join(os.getcwd(), "dataset", "gifs2", f"{file_name}.mp4")
+    video = cv2.VideoWriter(video_path, fourcc, fps, (width, height))
+    for frame_path in sorted(frames):
+        # Convert BRG to RGB
+        image = cv2.imread(frame_path)
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        # if image.dtype != np.uint8:
+        #     image = (image * 255).astype(np.uint8)  # Convert to uint8
+        video.write(image)
+    # Release the VideoWriter
+    video.release()
+current_video_name = None
+selected_frames = []  # stores matrices for the GIF generation.
+for image_filename in tqdm(sorted(os.listdir(TRAIN_STORE)), desc="Running Images"):  # :
+    frame_no, frame_name, video_no = extract_file_paths(image_filename)
+    obtained_video_name = video_no+"vid"+frame_name
+    if current_video_name != obtained_video_name:
+        # We have a new video sequence, so save current sequences and name
+        if selected_frames:
+            filename = f"{current_video_name}"
+            # Create gif from the frames.
+            if current_video_name:
+                create_mp4_from_frames(filename, selected_frames)
+            # Clear frames and hand off to new handle.
+            selected_frames = []
+            current_video_name = obtained_video_name
+    # With the number and name of the file paths, we can then determine which should be part of the specific GIF file.
+    # f"frame_no,fileno,video_no.gif"
+    img = cv2.imread(os.path.join(TRAIN_STORE, image_filename))
+    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+    img = torch.unsqueeze(torch.tensor(img.astype(np.float32)), 0)  # Convert image to tensor with float32, and extended batch size dimension.  (Batch, Channel, W,H)
+    X = torch.einsum("BWHC->BCWH", img)
+    if torch.cuda.is_available():
+        X = X.cuda()
+    output = model(X)
+    # print(output)
+    #print("Model layer outputs: ")
+    #print(model.layer_outputs)
+    conv = model.layer_outputs['Conv2d']
+    pred = model.layer_outputs["Linear"]
+    pred_weights, pred_bias = model.f_linear.weight, model.f_linear.bias
+    #print(pred_weights.shape)
+    conv = torch.einsum("BCWH->BWHC", conv).cpu().detach().numpy()
+    # print(conv.shape)  # torch.Size([1, 256, 12, 12])
+    # conv = conv.squeeze(0)
+    # print(conv.shape)  # torch.Size([256, 12, 12])
+    target = np.argmax(pred.cpu().detach().numpy(), axis=1).squeeze()
+    weights = pred_weights[target, :].cpu().detach().numpy()
+    # print("wieghts", weights.shape, "conv", conv.squeeze(0).shape)
+    heatmap = conv.squeeze(0) @ weights
+    # print(conv.shape)
+    # print(heatmap.shape)
+    scale = 224 / 12  # 256x5x5 after this additional.
+    plt.figure(figsize=(12, 12))
+    img = cv2.imread(os.path.join(TRAIN_STORE, image_filename))
+    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+    plt.imshow(img)
+    plt.imshow(zoom(heatmap, zoom=(scale, scale)), cmap='jet', alpha=0.5)
+    # if frameno is just 0-9, then add a 0 to the front.
+    if len(frame_no) == 1:
+        frame_no = "0"+frame_no
+    filename = video_no+frame_name+frame_no+".jpg"
+    file_path = os.path.join(os.getcwd(), "dataset/gifs2/raw/", filename)
+    plt.savefig(file_path)
+    selected_frames.append(file_path)
+    plt.close()
+    # wait for user to press a key
+    #     mat = zoom(conv[0, :, :, i], zoom=(scale, scale))
+    #     threshold = np.percentile(mat.flatten(), TOP_ACCURACY_PERCENTILE)
+    #     # The Lower threshold is to zero, the more specific the look is shown.
+    #
+    #     mask = mat > threshold
+    #     # OR: filter_map = np.where(filter_map <= threshold, 0, filter_map)
+    #
+    #     # Rescale remaining values (adjust new_range if needed)
+    #     new_range = 1  # Adjust based on your desired final range
+    #     filter_map = np.where(mask, (mat - threshold) / (mat.max() - threshold) * new_range, 0)
+    #
+    #     # I just add all the maps together, which is really noisy.
+    #     if type(total_mat) != type(None):
+    #         total_mat += filter_map
+    #     else:
+    #         total_mat = filter_map
+    #
+    # # Normalize based on largest value,
+    # # Store this image in a collection, in which a GIF will be made, that lasts at least 2 seconds.
+    # total_mat = total_mat / abs(np.max(total_mat))
+    # #
+    # image = img.squeeze(0)  # .detach().numpy().astype(np.float32)
+    #
+    #
+    # plt.imshow(plt.imread(os.path.join(os.getcwd(), "dataset/root/train", image_filename)))  # full path needed
+    # plt.imshow(total_mat, cmap='jet', alpha=0.3)
+    #
+    # # selected_frames.append()
+    # filename = frame_name+frame_no+video_no+".jpg"
+    # file_path = os.path.join(os.getcwd(), "dataset/gifs/raw/", filename)
+    # plt.savefig(file_path)
+    # selected_frames.append(file_path)
+exit()
+# plt.figure(figsize=(16, 16))
+# for i in range(36):
+#     plt.subplot(6, 6, i + 1)
+#     plt.imshow(cv2.imread(TEST_IMAGE))
+#     plt.imshow(zoom(conv[0, :,:,i], zoom=(scale, scale)), cmap='jet', alpha=0.3)
+#
+# plt.show()