equ1 commited on
Commit
569aa5e
1 Parent(s): 0ccdae5

Upload model.py

Browse files
Files changed (1) hide show
  1. model.py +201 -0
model.py ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+
4
+ import torch
5
+ import torch.nn as nn
6
+ import torchvision.datasets as datasets
7
+ import torchvision.transforms as transforms
8
+ from torch.utils.data import DataLoader
9
+ import torch.optim as optim
10
+
11
+ from tqdm import tqdm
12
+
13
+
14
+ def get_mean_std(loader):
15
+ '''
16
+ Calculates mean and std of input images.
17
+
18
+ Args:
19
+ loader (torch.DataLoader): Loader with images
20
+
21
+ Returns:
22
+ mean (torch.Tensor): Mean of images in loader
23
+ std (torch.Tensor): Standard deviation of images in loader
24
+ '''
25
+ channels_sum, channels_squared_sum, num_batches = 0, 0, 0
26
+
27
+ for data, _ in loader:
28
+ channels_sum += torch.mean(data, dim=[0,2,3]) # mean across [no. of examples, height, width]
29
+ channels_squared_sum += torch.mean(data**2, dim=[0,2,3]) # squared mean across [no. of examples, height, width]
30
+ num_batches += 1
31
+
32
+ mean = channels_sum/num_batches
33
+ std = (channels_squared_sum/(num_batches-mean**2))**0.5
34
+
35
+ return mean, std
36
+
37
+
38
+ class Net(nn.Module):
39
+ '''
40
+ model definition
41
+ '''
42
+
43
+ def __init__(self):
44
+ super(Net, self).__init__()
45
+
46
+ self.layer1 = nn.Sequential(
47
+ nn.Conv2d(1, 32, kernel_size=5),
48
+ nn.ReLU(),
49
+ )
50
+ self.layer2 = nn.Sequential(
51
+ nn.Conv2d(32, 32, kernel_size=5, bias=False),
52
+ nn.BatchNorm2d(32),
53
+ nn.ReLU(),
54
+ nn.MaxPool2d((2, 2)),
55
+ nn.Dropout2d(0.25),
56
+ )
57
+ self.layer3 = nn.Sequential(
58
+ nn.Conv2d(32, 64, kernel_size=3),
59
+ nn.ReLU(),
60
+ )
61
+ self.layer4 = nn.Sequential(
62
+ nn.Conv2d(64, 64, kernel_size=3, bias=False),
63
+ nn.BatchNorm2d(64),
64
+ nn.ReLU(),
65
+ nn.MaxPool2d((2, 2)),
66
+ nn.Dropout2d(0.25),
67
+ nn.Flatten(),
68
+ )
69
+ self.layer5 = nn.Sequential(
70
+ nn.Linear(576, 256, bias=False),
71
+ nn.BatchNorm1d(256),
72
+ nn.ReLU(),
73
+ )
74
+ self.layer6 = nn.Sequential(
75
+ nn.Linear(256, 128, bias=False),
76
+ nn.BatchNorm1d(128),
77
+ nn.ReLU(),
78
+ )
79
+ self.layer7 = nn.Sequential(
80
+ nn.Linear(128, 84, bias=False),
81
+ nn.BatchNorm1d(84),
82
+ nn.ReLU(),
83
+ nn.Dropout(0.25),
84
+ )
85
+ self.layer8 = nn.Sequential(
86
+ nn.Linear(84, 10),
87
+ nn.LogSoftmax(dim=1),
88
+ )
89
+
90
+ def forward(self, x):
91
+ x = transforms.Normalize(mean, std)(x)
92
+ x = self.layer1(x)
93
+ x = self.layer2(x)
94
+ x = self.layer3(x)
95
+ x = self.layer4(x)
96
+ x = self.layer5(x)
97
+ x = self.layer6(x)
98
+ x = self.layer7(x)
99
+ x = self.layer8(x)
100
+
101
+ return x
102
+
103
+
104
+ # downloads and loads MNIST train set
105
+ transform = transforms.Compose([transforms.ToTensor(), transforms.RandomAffine(degrees=10, translate=(0.1,0.1))])
106
+ train_data = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
107
+ train_loader = DataLoader(dataset=train_data, batch_size=64, shuffle=True, pin_memory=True)
108
+
109
+ # downloads and loads MNIST test set
110
+ val_data = datasets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())
111
+ val_loader = DataLoader(dataset=train_data, batch_size=64, shuffle=False, pin_memory=True)
112
+
113
+ # uses GPU if available
114
+ if torch.cuda.is_available():
115
+ dev = "cuda:0"
116
+ else:
117
+ dev = "cpu"
118
+
119
+ device = torch.device(dev)
120
+
121
+ # gets mean and std of dataset
122
+ mean, std = get_mean_std(train_loader)
123
+
124
+
125
+ def run_model():
126
+ # defines parameters
127
+ model = Net().to(device=device)
128
+ optimizer = optim.Adam(model.parameters(), lr=0.1)
129
+ scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.2, patience=2)
130
+ criterion = nn.NLLLoss()
131
+
132
+ # iterates through epochs
133
+ for epoch in range(30):
134
+ print(f"\nEpoch {epoch+1}/{30}.")
135
+
136
+ # train loop
137
+ model.train()
138
+
139
+ total_train_loss = 0
140
+ total_correct = 0
141
+
142
+ for i, (images, labels) in enumerate(tqdm(train_loader)):
143
+ images = images.to(device)
144
+ labels = labels.to(device)
145
+
146
+ optimizer.zero_grad()
147
+
148
+ outputs = model(images)
149
+
150
+ loss = criterion(outputs, labels)
151
+ total_train_loss += loss.item()
152
+
153
+ loss.backward()
154
+ optimizer.step()
155
+
156
+ # Calculates train accuracy
157
+ outputs_probs = nn.functional.softmax(
158
+ outputs, dim=1) # gets probabilities
159
+ for idx, preds in enumerate(outputs_probs):
160
+ # if label with max probability matches true label
161
+ if labels[idx] == torch.argmax(preds.data):
162
+ total_correct += 1
163
+
164
+ train_loss = total_train_loss/(i+1)
165
+ train_accuracy = total_correct/len(train_data)
166
+
167
+ print(f"Train set:- Loss: {train_loss}, Accuracy: {train_accuracy}.")
168
+
169
+ # saves model state
170
+ if not os.path.exists("./saved_models"):
171
+ os.mkdir("./saved_models")
172
+ torch.save(model.state_dict(), f"./saved_models/mnist-cnn-{time.time()}.pt")
173
+
174
+ # val loop
175
+ model.eval()
176
+
177
+ total_val_loss = 0
178
+ total_correct = 0
179
+
180
+ with torch.no_grad():
181
+ for i, (images, labels) in enumerate(tqdm(val_loader)):
182
+ images = images.to(device)
183
+ labels = labels.to(device)
184
+
185
+ outputs = model(images)
186
+
187
+ loss = criterion(outputs, labels)
188
+ total_val_loss += loss.item()
189
+
190
+ outputs_probs = nn.functional.softmax(outputs, dim=1)
191
+ for idx, preds in enumerate(outputs_probs):
192
+ if labels[idx] == torch.argmax(preds.data):
193
+ total_correct += 1
194
+
195
+ val_loss = total_val_loss/(i+1)
196
+ val_accuracy = total_correct/len(val_data)
197
+
198
+ print(f"Val set:- Loss: {val_loss}, Accuracy: {val_accuracy}.")
199
+
200
+ # adjusts lr
201
+ scheduler.step(val_loss)