# Improved CNN from Scratch import torch from torchvision import transforms from torch.utils.data import DataLoader from torchvision.datasets import ImageFolder import torch.optim as optim import torch.nn as nn import torch.nn.functional as F from sklearn.metrics import precision_score, recall_score, f1_score # Define data transforms transform = transforms.Compose([ transforms.Resize((256, 256)), transforms.RandomHorizontalFlip(), transforms.RandomVerticalFlip(), transforms.RandomRotation(15), transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3), transforms.RandomResizedCrop(224, scale=(0.8, 1.0)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) # Load the dataset image_dir = r'data/train' dataset = ImageFolder(image_dir, transform=transform) # Split the dataset train_size = int(0.8 * len(dataset)) test_size = len(dataset) - train_size train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size]) # Create data loaders train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True) test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False) # Define an improved CNN architecture class AdvancedCNN(nn.Module): def __init__(self, num_classes): super(AdvancedCNN, self).__init__() self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1) self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1) self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1) self.conv4 = nn.Conv2d(256, 512, kernel_size=3, padding=1) self.pool = nn.MaxPool2d(2, 2) self.global_avg_pool = nn.AdaptiveAvgPool2d((1, 1)) self.fc = nn.Linear(512, num_classes) self.dropout = nn.Dropout(0.5) self.batchnorm1 = nn.BatchNorm2d(64) self.batchnorm2 = nn.BatchNorm2d(128) self.batchnorm3 = nn.BatchNorm2d(256) self.batchnorm4 = nn.BatchNorm2d(512) def forward(self, x): x = self.pool(F.relu(self.batchnorm1(self.conv1(x)))) x = self.pool(F.relu(self.batchnorm2(self.conv2(x)))) x = self.pool(F.relu(self.batchnorm3(self.conv3(x)))) x = self.pool(F.relu(self.batchnorm4(self.conv4(x)))) x = self.global_avg_pool(x) x = x.view(-1, 512) x = self.dropout(x) x = self.fc(x) return x # Instantiate the model model = AdvancedCNN(num_classes=len(dataset.classes)) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) import os # Directory to save the model model_dir = "saved_models" if not os.path.exists(model_dir): os.makedirs(model_dir) # File name for saving the model model_path = os.path.join(model_dir, "best_model.pth") # Training the model best_accuracy = 0.0 # To track the best accuracy during training import os # Directory to save the model model_dir = "saved_models" if not os.path.exists(model_dir): os.makedirs(model_dir) # File name for saving the model model_path = os.path.join(model_dir, "best_model.pth") # Training the model best_accuracy = 0.0 # To track the best accuracy during training num_epochs = 20 for epoch in range(num_epochs): model.train() running_loss = 0.0 for inputs, labels in train_loader: inputs, labels = inputs.to(device), labels.to(device) optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() scheduler.step() print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}") # Evaluate on the test set after each epoch model.eval() correct = 0 total = 0 with torch.no_grad(): for inputs, labels in test_loader: inputs, labels = inputs.to(device), labels.to(device) outputs = model(inputs) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() accuracy = 100 * correct / total print(f"Epoch [{epoch+1}/{num_epochs}], Test Accuracy: {accuracy:.2f}%") # Save the model if it achieves a new best accuracy if accuracy > best_accuracy: best_accuracy = accuracy torch.save(model.state_dict(), model_path) print(f"New best model saved with accuracy: {accuracy:.2f}%") print("Training finished!") # Evaluate the best model model.load_state_dict(torch.load(model_path)) model.eval() correct = 0 total = 0 all_labels = [] all_preds = [] with torch.no_grad(): for inputs, labels in test_loader: inputs, labels = inputs.to(device), labels.to(device) outputs = model(inputs) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() all_labels.extend(labels.cpu().numpy()) all_preds.extend(predicted.cpu().numpy()) accuracy = 100 * correct / total precision = precision_score(all_labels, all_preds, average='binary') recall = recall_score(all_labels, all_preds, average='binary') f1 = f1_score(all_labels, all_preds, average='binary') print(f"Final Test Accuracy (Best Model): {accuracy:.2f}%") print(f"Precision: {precision:.2f}, Recall: {recall:.2f}, F1 Score: {f1:.2f}")