nofl commited on
Commit
8f6465e
·
verified ·
1 Parent(s): 677ea4a

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +90 -21
Dockerfile CHANGED
@@ -1,30 +1,99 @@
1
- FROM python:3.9
 
 
 
 
 
 
2
 
3
- RUN useradd -m -u 1000 aim_user
 
 
4
 
5
- # Switch to the "aim_user" user
6
- USER aim_user
7
 
8
- # Set home to the user's home directory
9
- ENV HOME=/home/aim_user \
10
- PATH=/home/aim_user/.local/bin:$PATH
 
 
 
 
 
11
 
12
- # Set the working directory to the user's home directory
13
- WORKDIR $HOME
 
 
 
 
 
14
 
15
- # install the `aim` package on the latest version
16
- RUN pip install aim
 
 
17
 
18
- RUN aim telemetry off
 
 
19
 
20
- ENTRYPOINT ["/bin/sh", "-c"]
 
 
21
 
22
- COPY aim_repo.tar.gz .
23
- RUN tar xvzf aim_repo.tar.gz
24
- # have to run `aim init` in the directory that stores aim data for
25
- # otherwise `aim up` will prompt for confirmation to create the directory itself.
26
- # We run aim listening on 0.0.0.0 to expose all ports. Also, we run
27
- # using `--dev` to print verbose logs. Port 43800 is the default port of
28
- # `aim up` but explicit is better than implicit.
29
- CMD ["aim up --host 0.0.0.0 --port 7860 --workers 2"]
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from aim import Run
2
+ from aim.pytorch import track_gradients_dists, track_params_dists
3
+ import torch
4
+ import torch.nn as nn
5
+ import torch.optim as optim
6
+ from torchvision import datasets, transforms
7
+ from tqdm import tqdm
8
 
9
+ batch_size = 64
10
+ epochs = 10
11
+ learning_rate = 0.01
12
 
13
+ aim_run = Run()
 
14
 
15
+ class CNN(nn.Module):
16
+ def __init__(self):
17
+ super(CNN, self).__init__()
18
+ self.conv1 = nn.Conv2d(1, 32, 3, 1)
19
+ self.conv2 = nn.Conv2d(32, 64, 3, 1)
20
+ self.pool = nn.MaxPool2d(2, 2)
21
+ self.fc1 = nn.Linear(64 * 7 * 7, 128)
22
+ self.fc2 = nn.Linear(128, 10)
23
 
24
+ def forward(self, x):
25
+ x = self.pool(torch.relu(self.conv1(x)))
26
+ x = self.pool(torch.relu(self.conv2(x)))
27
+ x = torch.flatten(x, 1)
28
+ x = torch.relu(self.fc1(x))
29
+ x = self.fc2(x)
30
+ return x
31
 
32
+ train_dataset = datasets.MNIST(root='./data',
33
+ train=True,
34
+ transform=transforms.ToTensor(),
35
+ download=True)
36
 
37
+ test_dataset = datasets.MNIST(root='./data',
38
+ train=False,
39
+ transform=transforms.ToTensor())
40
 
41
+ train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
42
+ batch_size=batch_size,
43
+ shuffle=True)
44
 
45
+ test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
46
+ batch_size=batch_size,
47
+ shuffle=False)
 
 
 
 
 
48
 
49
+ model = CNN()
50
+ optimizer = optim.Adam(model.parameters(), lr=learning_rate)
51
+ criterion = nn.CrossEntropyLoss()
52
+
53
+ for epoch in range(epochs):
54
+ model.train()
55
+ train_loss = 0
56
+ correct = 0
57
+ total = 0
58
+
59
+ for batch_idx, (data, target) in enumerate(tqdm(train_loader, desc="Training", leave=False)):
60
+ optimizer.zero_grad()
61
+ output = model(data)
62
+ loss = criterion(output, target)
63
+ loss.backward()
64
+ optimizer.step()
65
+
66
+ train_loss += loss.item()
67
+ _, predicted = torch.max(output.data, 1)
68
+ total += target.size(0)
69
+ correct += (predicted == target).sum().item()
70
+
71
+ acc = correct / total
72
+ items = {'accuracy': acc, 'loss': train_loss / len(train_loader)}
73
+ aim_run.track(items, epoch=epoch, context={'subset': 'train'})
74
+
75
+ track_params_dists(model, aim_run, epoch=epoch, context={'subset': 'train'})
76
+ track_gradients_dists(model, aim_run, epoch=epoch, context={'subset': 'train'})
77
+
78
+ model.eval()
79
+ test_loss = 0
80
+ correct = 0
81
+ total = 0
82
+
83
+ with torch.no_grad():
84
+ for batch_idx, (data, target) in enumerate(tqdm(test_loader, desc="Testing", leave=False)):
85
+ output = model(data)
86
+ loss = criterion(output, target)
87
+ test_loss += loss.item()
88
+ _, predicted = torch.max(output.data, 1)
89
+ total += target.size(0)
90
+ correct += (predicted == target).sum().item()
91
+
92
+ acc = correct / total
93
+ items = {'accuracy': acc, 'loss': test_loss / len(test_loader)}
94
+ aim_run.track(items, epoch=epoch, context={'subset': 'test'})
95
+
96
+ track_params_dists(model, aim_run, epoch=epoch, context={'subset': 'test'})
97
+ track_gradients_dists(model, aim_run, epoch=epoch, context={'subset': 'test'})
98
+
99
+ torch.save(model.state_dict(), 'mnist_cnn.pth')