siddheshtv
commited on
Commit
·
1db42e4
1
Parent(s):
78f7d92
readme citation url change
Browse files- .gitignore +3 -0
- README.md +70 -0
- __init__.py +0 -0
- analytics.png +0 -0
- analytics.py +23 -0
- blocknet10.py +58 -0
- config.json +5 -0
- dataloader.py +30 -0
- figures/eq1.png +0 -0
- figures/fig1.png +0 -0
- main.py +102 -0
- model_state_dict.pth +3 -0
.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
data/
|
2 |
+
push_to_hf.py
|
3 |
+
*.ipynb
|
README.md
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# BlockNet10 - CNN for CIFAR-10 dataset
|
2 |
+
|
3 |
+
## Overview
|
4 |
+
|
5 |
+
BlockNet10 is a neural network architecture designed for image classification tasks using the CIFAR-10 dataset. This model implements a sequence of intermediate blocks (B1, B2, ..., BK) followed by an output block (O).
|
6 |
+
|
7 |
+
## Architecture Details
|
8 |
+
|
9 |
+
### Intermediate Block (Bi)
|
10 |
+
|
11 |
+
Each intermediate block receives an input image x and outputs an image x'. The block comprises L independent convolutional layers, denoted as C1, C2, ..., CL.
|
12 |
+
|
13 |
+
Each convolutional layer Cl in a block operates on the input image x and outputs an image Cl(x).
|
14 |
+
|
15 |
+
<div style="display: flex; justify-content: center;">
|
16 |
+
<img src="figures/eq1.png" alt="Equation 1" />
|
17 |
+
</div>
|
18 |
+
|
19 |
+
The output image x' is computed as x' = a1C1(x) + a2C2(x) + ... + aLCL(x), where a = [a1, a2, ..., aL]T is a vector computed by the block.
|
20 |
+
|
21 |
+
The vector a is obtained by computing the average value of each channel of x and passing it through a fully connected layer with the same number of units as convolutional layers in the block.
|
22 |
+
|
23 |
+
<div style="display: flex; justify-content: center;">
|
24 |
+
<img src="figures/fig1.png" alt="Figure 1" />
|
25 |
+
</div>
|
26 |
+
|
27 |
+
### Output Block (O)
|
28 |
+
|
29 |
+
The output block processes the final output image from the intermediate blocks for classification.
|
30 |
+
|
31 |
+
## Analytics
|
32 |
+
|
33 |
+
<div style="display: flex; justify-content: center; align-items: center;">
|
34 |
+
<table>
|
35 |
+
<tr>
|
36 |
+
<th>Epoch Number</th>
|
37 |
+
<th>Train Accuracy</th>
|
38 |
+
<th>Test Accuracy</th>
|
39 |
+
<th>Average Loss</th>
|
40 |
+
</tr>
|
41 |
+
<tr>
|
42 |
+
<td>50</td>
|
43 |
+
<td>75.43</td>
|
44 |
+
<td>80.56</td>
|
45 |
+
<td>0.685</td>
|
46 |
+
</tr>
|
47 |
+
</table>
|
48 |
+
</div>
|
49 |
+
|
50 |
+
## Clone on GitHub
|
51 |
+
|
52 |
+
You can contribute to the advancement of this architecture, changes in hyperparameter, or solve issues <a href="https://github.com/siddheshtv/cifar10" target="_blank">here</a>.
|
53 |
+
|
54 |
+
## Citation
|
55 |
+
|
56 |
+
If you use BlockNet10 in your research or work, please cite it as follows:
|
57 |
+
|
58 |
+
```bibtex
|
59 |
+
@article{blocknet10,
|
60 |
+
title={BlockNet10: CIFAR-10 Image Classifier},
|
61 |
+
author={Siddhesh Kulthe},
|
62 |
+
year={2024},
|
63 |
+
publisher={Hugging Face},
|
64 |
+
url={https://huggingface.co/siddheshtv/BlockNet10}
|
65 |
+
}
|
66 |
+
```
|
67 |
+
|
68 |
+
---
|
69 |
+
|
70 |
+
## license: mit
|
__init__.py
ADDED
File without changes
|
analytics.png
ADDED
![]() |
analytics.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import matplotlib.pyplot as plt
|
2 |
+
|
3 |
+
def model_analytics(train_losses, train_accuracies, test_accuracies):
|
4 |
+
plt.figure(figsize=(10, 5))
|
5 |
+
|
6 |
+
plt.subplot(1, 2, 1)
|
7 |
+
plt.plot(train_losses, label='Training Loss')
|
8 |
+
plt.xlabel('Batch')
|
9 |
+
plt.ylabel('Loss')
|
10 |
+
plt.title('Loss per Training Batch')
|
11 |
+
plt.legend()
|
12 |
+
|
13 |
+
plt.subplot(1, 2, 2)
|
14 |
+
plt.plot(train_accuracies, label='Training Accuracy')
|
15 |
+
plt.plot(test_accuracies, label='Test Accuracy')
|
16 |
+
plt.xlabel('Epoch')
|
17 |
+
plt.ylabel('Accuracy (%)')
|
18 |
+
plt.title('Training and Test Accuracies')
|
19 |
+
plt.legend()
|
20 |
+
|
21 |
+
plt.tight_layout()
|
22 |
+
plt.savefig("analytics.png")
|
23 |
+
return "✅ Figure saved successfully"
|
blocknet10.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
import torch.nn.functional as F
|
4 |
+
from huggingface_hub import PyTorchModelHubMixin
|
5 |
+
|
6 |
+
class IntermediateBlock(nn.Module):
|
7 |
+
def __init__(self, in_channels, num_conv_layers, conv_params):
|
8 |
+
super(IntermediateBlock, self).__init__()
|
9 |
+
self.conv_layers = nn.ModuleList([nn.Conv2d(in_channels, *conv_params) for _ in range(num_conv_layers)])
|
10 |
+
self.batch_norms = nn.ModuleList([nn.BatchNorm2d(conv_params[0]) for _ in range(num_conv_layers)])
|
11 |
+
out_channels = conv_params[0]
|
12 |
+
self.fc = nn.Linear(in_channels, out_channels)
|
13 |
+
|
14 |
+
def forward(self, x):
|
15 |
+
batch_size = x.size(0)
|
16 |
+
channel_means = x.mean(dim=[2, 3])
|
17 |
+
a = self.fc(channel_means)
|
18 |
+
x_out = torch.stack([F.leaky_relu(conv(x)) for conv in self.conv_layers], dim=-1).sum(dim=-1)
|
19 |
+
x_out = torch.stack([bn(x_out) for bn in self.batch_norms], dim=-1).sum(dim=-1)
|
20 |
+
return x_out * F.leaky_relu(a.view(batch_size, -1, 1, 1))
|
21 |
+
|
22 |
+
class OutputBlock(nn.Module):
|
23 |
+
def __init__(self, in_channels, num_classes, hidden_sizes=[]):
|
24 |
+
super(OutputBlock, self).__init__()
|
25 |
+
self.fc_layers = nn.ModuleList([nn.Linear(in_channels, hidden_sizes[0])] + [nn.Linear(hidden_sizes[i], hidden_sizes[i+1]) for i in range(len(hidden_sizes)-1)] + [nn.Linear(hidden_sizes[-1], num_classes)])
|
26 |
+
self.batch_norms = nn.ModuleList([nn.BatchNorm1d(size) for size in hidden_sizes])
|
27 |
+
|
28 |
+
def forward(self, x):
|
29 |
+
channel_means = x.mean(dim=[2, 3])
|
30 |
+
out = F.leaky_relu(channel_means)
|
31 |
+
for fc, bn in zip(self.fc_layers, self.batch_norms):
|
32 |
+
out = F.leaky_relu(bn(fc(out)))
|
33 |
+
return out
|
34 |
+
|
35 |
+
class CustomCIFAR10Net(nn.Module, PyTorchModelHubMixin):
|
36 |
+
def __init__(self, num_classes=10):
|
37 |
+
super(CustomCIFAR10Net, self).__init__()
|
38 |
+
self.intermediate_blocks = nn.ModuleList([
|
39 |
+
IntermediateBlock(3, 3, [64, 3, 3, 1, 1]),
|
40 |
+
IntermediateBlock(64, 3, [128, 3, 3, 1, 1]),
|
41 |
+
IntermediateBlock(128, 3, [256, 3, 3, 1, 1]),
|
42 |
+
IntermediateBlock(256, 3, [512, 3, 3, 1, 1]),
|
43 |
+
IntermediateBlock(512, 3, [1024, 3, 3, 1, 1])
|
44 |
+
])
|
45 |
+
self.output_block = OutputBlock(1024, num_classes, [512, 256])
|
46 |
+
self.dropout = nn.Dropout(0.5)
|
47 |
+
|
48 |
+
for m in self.modules():
|
49 |
+
if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
|
50 |
+
nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
|
51 |
+
|
52 |
+
|
53 |
+
def forward(self, x):
|
54 |
+
for block in self.intermediate_blocks:
|
55 |
+
x = block(x)
|
56 |
+
x = self.dropout(x)
|
57 |
+
x = self.output_block(x)
|
58 |
+
return x
|
config.json
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": ["BlockNet10"],
|
3 |
+
"hidden_size": 1024,
|
4 |
+
"num_classes": 10
|
5 |
+
}
|
dataloader.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from torchvision import datasets
|
3 |
+
import torchvision.transforms as transforms
|
4 |
+
|
5 |
+
batch_size = 128
|
6 |
+
|
7 |
+
def data_transform():
|
8 |
+
transform_train = transforms.Compose([
|
9 |
+
transforms.RandomHorizontalFlip(),
|
10 |
+
transforms.RandomRotation(10),
|
11 |
+
transforms.RandomCrop(32, padding=4),
|
12 |
+
transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
|
13 |
+
transforms.ToTensor(),
|
14 |
+
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
|
15 |
+
])
|
16 |
+
|
17 |
+
transform_test = transforms.Compose([
|
18 |
+
transforms.ToTensor(),
|
19 |
+
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
|
20 |
+
])
|
21 |
+
|
22 |
+
return transform_train, transform_test
|
23 |
+
|
24 |
+
def data_loader(transform_train, transform_test):
|
25 |
+
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
|
26 |
+
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
|
27 |
+
|
28 |
+
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
|
29 |
+
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)
|
30 |
+
return train_loader, test_loader
|
figures/eq1.png
ADDED
![]() |
figures/fig1.png
ADDED
![]() |
main.py
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
import torch.optim as optim
|
4 |
+
import torch.optim.lr_scheduler as lr_scheduler
|
5 |
+
from dataloader import batch_size
|
6 |
+
from dataloader import data_transform, data_loader
|
7 |
+
from blocknet10 import CustomCIFAR10Net
|
8 |
+
from analytics import model_analytics
|
9 |
+
from push_to_hf import HF
|
10 |
+
|
11 |
+
torch.manual_seed(42)
|
12 |
+
|
13 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
14 |
+
print(f"Using device: {device}")
|
15 |
+
|
16 |
+
transform_train, transform_test = data_transform()
|
17 |
+
train_loader, test_loader = data_loader(transform_train, transform_test)
|
18 |
+
|
19 |
+
def arch_tester():
|
20 |
+
model = CustomCIFAR10Net()
|
21 |
+
input_data = torch.randn(batch_size, 3, 32, 32)
|
22 |
+
output = model(input_data)
|
23 |
+
return output.shape
|
24 |
+
|
25 |
+
arch_tester_output = arch_tester()
|
26 |
+
print(arch_tester_output)
|
27 |
+
|
28 |
+
model = CustomCIFAR10Net().to(device)
|
29 |
+
|
30 |
+
criterion = nn.CrossEntropyLoss()
|
31 |
+
optimizer = optim.AdamW(model.parameters(), lr=0.01, betas=(0.8, 0.95), weight_decay=0.0005, amsgrad=True, eps=1e-8)
|
32 |
+
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)
|
33 |
+
|
34 |
+
train_losses = []
|
35 |
+
train_accuracies = []
|
36 |
+
test_accuracies = []
|
37 |
+
|
38 |
+
num_epochs = 50
|
39 |
+
total_steps = len(train_loader) * num_epochs
|
40 |
+
step_count = 0
|
41 |
+
|
42 |
+
for epoch in range(num_epochs):
|
43 |
+
running_loss = 0.0
|
44 |
+
correct_train = 0
|
45 |
+
total_train = 0
|
46 |
+
|
47 |
+
for i, (inputs, labels) in enumerate(train_loader, 0):
|
48 |
+
inputs, labels = inputs.to(device), labels.to(device)
|
49 |
+
optimizer.zero_grad()
|
50 |
+
outputs = model(inputs)
|
51 |
+
loss = criterion(outputs, labels)
|
52 |
+
loss.backward()
|
53 |
+
optimizer.step()
|
54 |
+
running_loss += loss.item()
|
55 |
+
step_count += 1
|
56 |
+
if step_count % 100 == 0:
|
57 |
+
train_losses.append(running_loss / 100)
|
58 |
+
print(f'[Epoch: {epoch + 1}, Step: {step_count:5d}/{total_steps}] loss: {running_loss / 100:.3f}')
|
59 |
+
running_loss = 0.0
|
60 |
+
|
61 |
+
if i == len(train_loader) - 1:
|
62 |
+
model.eval()
|
63 |
+
with torch.no_grad():
|
64 |
+
for images, labels in train_loader:
|
65 |
+
images, labels = images.to(device), labels.to(device)
|
66 |
+
outputs = model(images)
|
67 |
+
_, predicted = torch.max(outputs.data, 1)
|
68 |
+
total_train += labels.size(0)
|
69 |
+
correct_train += (predicted == labels).sum().item()
|
70 |
+
|
71 |
+
train_accuracy = 100 * correct_train / total_train
|
72 |
+
train_accuracies.append(train_accuracy)
|
73 |
+
|
74 |
+
scheduler.step()
|
75 |
+
|
76 |
+
model.eval()
|
77 |
+
correct_test = 0
|
78 |
+
total_test = 0
|
79 |
+
|
80 |
+
with torch.no_grad():
|
81 |
+
for images, labels in test_loader:
|
82 |
+
images, labels = images.to(device), labels.to(device)
|
83 |
+
outputs = model(images)
|
84 |
+
_, predicted = torch.max(outputs.data, 1)
|
85 |
+
total_test += labels.size(0)
|
86 |
+
correct_test += (predicted == labels).sum().item()
|
87 |
+
|
88 |
+
test_accuracy = 100 * correct_test / total_test
|
89 |
+
test_accuracies.append(test_accuracy)
|
90 |
+
|
91 |
+
print(f'Epoch {epoch + 1}: Test Accuracy = {test_accuracy:.2f}%')
|
92 |
+
|
93 |
+
|
94 |
+
print("Last Train Losses:" + str(train_losses[-1]))
|
95 |
+
print("Last Train Accuracy:" + str(train_accuracies[-1]))
|
96 |
+
print("Last Test Accuracy:" + str(test_accuracies[-1]))
|
97 |
+
|
98 |
+
analytics = model_analytics(train_losses, train_accuracies, test_accuracies)
|
99 |
+
|
100 |
+
huggingface = HF()
|
101 |
+
push_to_face = huggingface.push_to_face(model=model)
|
102 |
+
print(push_to_face)
|
model_state_dict.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7be1849972498ca04e513cf1cf3a49d626f011f0ba337a404412791c810d0df4
|
3 |
+
size 80831147
|