abhishekrs4
commited on
Commit
•
5f62bb5
1
Parent(s):
6d8cdf1
added docstrings for various scripts
Browse files- app.py +19 -0
- training/dataset.py +71 -0
- training/decoder_models.py +142 -0
- training/encoder_models.py +135 -15
- training/image_preprocessing.py +19 -0
- training/inference.py +29 -0
- training/logger_utils.py +16 -1
- training/metrics.py +50 -0
- training/seg_models.py +63 -0
- training/train.py +57 -0
app.py
CHANGED
@@ -19,6 +19,25 @@ from training.logger_utils import load_dict_from_json
|
|
19 |
from training.dataset import get_dataloader_for_inference
|
20 |
|
21 |
def run_inference(image_array, file_weights, num_classes=5, file_stats_json="training/image_stats.json"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
oil_spill_seg_model = ResNet50DeepLabV3Plus(
|
23 |
num_classes=num_classes, pretrained=True
|
24 |
)
|
|
|
19 |
from training.dataset import get_dataloader_for_inference
|
20 |
|
21 |
def run_inference(image_array, file_weights, num_classes=5, file_stats_json="training/image_stats.json"):
|
22 |
+
"""
|
23 |
+
---------
|
24 |
+
Arguments
|
25 |
+
---------
|
26 |
+
image_array : ndarray
|
27 |
+
a numpy array of the image
|
28 |
+
file_weights : str
|
29 |
+
full path to weights file
|
30 |
+
num_classes : int
|
31 |
+
number of classes in the dataset
|
32 |
+
file_stats_json : str
|
33 |
+
full path to the json stats file for preprocessing
|
34 |
+
|
35 |
+
-------
|
36 |
+
Returns
|
37 |
+
-------
|
38 |
+
pred_mask_arr : ndarray
|
39 |
+
a numpy array of the prediction mask
|
40 |
+
"""
|
41 |
oil_spill_seg_model = ResNet50DeepLabV3Plus(
|
42 |
num_classes=num_classes, pretrained=True
|
43 |
)
|
training/dataset.py
CHANGED
@@ -13,6 +13,21 @@ from logger_utils import load_dict_from_json
|
|
13 |
|
14 |
class M4DSAROilSpillDataset(Dataset):
|
15 |
def __init__(self, dir_data, list_images, which_set="train", file_stats_json="image_stats.json"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
self.dir_data = dir_data
|
17 |
self.which_set = which_set
|
18 |
self.file_stats_json = file_stats_json
|
@@ -56,9 +71,29 @@ class M4DSAROilSpillDataset(Dataset):
|
|
56 |
|
57 |
|
58 |
def __len__(self):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
return len(self._list_images)
|
60 |
|
61 |
def __getitem__(self, idx):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
file_image = os.path.join(self._dir_images, self._list_images[idx])
|
63 |
file_label = os.path.join(self._dir_labels, self._list_labels[idx])
|
64 |
|
@@ -94,6 +129,25 @@ class M4DSAROilSpillDataset(Dataset):
|
|
94 |
return image, label
|
95 |
|
96 |
def get_dataloaders_for_training(dir_dataset, batch_size, random_state=None, num_workers=4):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
list_images = sorted(
|
98 |
[f for f in os.listdir(os.path.join(dir_dataset, "train", "images")) if f.endswith(".jpg")]
|
99 |
)
|
@@ -130,6 +184,23 @@ def get_dataloaders_for_training(dir_dataset, batch_size, random_state=None, num
|
|
130 |
return train_dataset_loader, valid_dataset_loader
|
131 |
|
132 |
def get_dataloader_for_inference(dir_dataset, batch_size=1, num_workers=4):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
list_inference_images = sorted(
|
134 |
[f for f in os.listdir(os.path.join(dir_dataset, "test", "images")) if f.endswith(".jpg")]
|
135 |
)
|
|
|
13 |
|
14 |
class M4DSAROilSpillDataset(Dataset):
|
15 |
def __init__(self, dir_data, list_images, which_set="train", file_stats_json="image_stats.json"):
|
16 |
+
"""
|
17 |
+
M4DSAROilSpillDataset class to load satellite image dataset
|
18 |
+
|
19 |
+
----------
|
20 |
+
Attributes
|
21 |
+
----------
|
22 |
+
dir_data : str
|
23 |
+
valid full directory path of the dataset
|
24 |
+
list_images : list
|
25 |
+
list of images in the directory
|
26 |
+
which_set : str
|
27 |
+
string indicates which set to be loaded (options = ["train", "test"])
|
28 |
+
file_stats_json : str
|
29 |
+
json file with image stats
|
30 |
+
"""
|
31 |
self.dir_data = dir_data
|
32 |
self.which_set = which_set
|
33 |
self.file_stats_json = file_stats_json
|
|
|
71 |
|
72 |
|
73 |
def __len__(self):
|
74 |
+
"""
|
75 |
+
-------
|
76 |
+
Returns
|
77 |
+
-------
|
78 |
+
length : int
|
79 |
+
number of images in the dataset list
|
80 |
+
"""
|
81 |
return len(self._list_images)
|
82 |
|
83 |
def __getitem__(self, idx):
|
84 |
+
"""
|
85 |
+
---------
|
86 |
+
Arguments
|
87 |
+
---------
|
88 |
+
idx : int
|
89 |
+
index of the file
|
90 |
+
|
91 |
+
-------
|
92 |
+
Returns
|
93 |
+
-------
|
94 |
+
(image, label) : tuple of torch tensors
|
95 |
+
tuple of normalized image and label torch tensors
|
96 |
+
"""
|
97 |
file_image = os.path.join(self._dir_images, self._list_images[idx])
|
98 |
file_label = os.path.join(self._dir_labels, self._list_labels[idx])
|
99 |
|
|
|
129 |
return image, label
|
130 |
|
131 |
def get_dataloaders_for_training(dir_dataset, batch_size, random_state=None, num_workers=4):
|
132 |
+
"""
|
133 |
+
---------
|
134 |
+
Arguments
|
135 |
+
---------
|
136 |
+
dir_dataset : str
|
137 |
+
full path to dataset directory
|
138 |
+
batch_size : int
|
139 |
+
batch size to be used
|
140 |
+
random_state : int
|
141 |
+
random state to be used for train / validation set split (default: None)
|
142 |
+
num_workers : int
|
143 |
+
number of workers to be used for dataloader (default: 4)
|
144 |
+
|
145 |
+
-------
|
146 |
+
Returns
|
147 |
+
-------
|
148 |
+
(train_dataset_loader, valid_dataset_loader) : tuple
|
149 |
+
tuple of torch dataloaders
|
150 |
+
"""
|
151 |
list_images = sorted(
|
152 |
[f for f in os.listdir(os.path.join(dir_dataset, "train", "images")) if f.endswith(".jpg")]
|
153 |
)
|
|
|
184 |
return train_dataset_loader, valid_dataset_loader
|
185 |
|
186 |
def get_dataloader_for_inference(dir_dataset, batch_size=1, num_workers=4):
|
187 |
+
"""
|
188 |
+
---------
|
189 |
+
Arguments
|
190 |
+
---------
|
191 |
+
dir_dataset : str
|
192 |
+
full path to dataset directory
|
193 |
+
batch_size : int
|
194 |
+
batch size to be used (default: 1)
|
195 |
+
num_workers : int
|
196 |
+
number of workers to be used for dataloader (default: 4)
|
197 |
+
|
198 |
+
-------
|
199 |
+
Returns
|
200 |
+
-------
|
201 |
+
(inference_dataset_loader, list_inference_images) : tuple
|
202 |
+
tuple of torch dataloader and a list of inference images
|
203 |
+
"""
|
204 |
list_inference_images = sorted(
|
205 |
[f for f in os.listdir(os.path.join(dir_dataset, "test", "images")) if f.endswith(".jpg")]
|
206 |
)
|
training/decoder_models.py
CHANGED
@@ -3,6 +3,23 @@ from torch import nn
|
|
3 |
from torch.nn import functional as F
|
4 |
|
5 |
class DeepLabV3(nn.Module):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
def __init__(self, in_channels, num_classes, aspp_out_channels=256, final_out_channels=256, aspp_dilate=[12, 24, 36]):
|
7 |
super().__init__()
|
8 |
self.aspp_block = ASPPBlock(in_channels, aspp_dilate, aspp_out_channels=aspp_out_channels)
|
@@ -27,11 +44,45 @@ class DeepLabV3(nn.Module):
|
|
27 |
return
|
28 |
|
29 |
def forward(self, encoded_features):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
aspp_output_feature = self.aspp_block(encoded_features)
|
31 |
final_output_feature = self.classifier_conv_block(aspp_output_feature)
|
32 |
return final_output_feature
|
33 |
|
34 |
class DeepLabV3Plus(nn.Module):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
def __init__(self, in_channels, encoder_channels, num_classes, encoder_projection_channels=48,
|
36 |
aspp_out_channels=256, final_out_channels=256, aspp_dilate=[12, 24, 36]):
|
37 |
|
@@ -66,6 +117,21 @@ class DeepLabV3Plus(nn.Module):
|
|
66 |
return
|
67 |
|
68 |
def forward(self, encoded_features, block_1_features):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
encoder_connection = self.projection_conv(block_1_features)
|
70 |
aspp_output_feature = self.aspp_block(encoded_features)
|
71 |
aspp_output_feature = F.interpolate(
|
@@ -78,6 +144,19 @@ class DeepLabV3Plus(nn.Module):
|
|
78 |
return final_output_feature
|
79 |
|
80 |
class ASPPConvLayer(nn.Sequential):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
def __init__(self, in_channels, out_channels, dilation):
|
82 |
super().__init__()
|
83 |
self.conv_block = nn.Sequential(
|
@@ -100,11 +179,35 @@ class ASPPConvLayer(nn.Sequential):
|
|
100 |
return
|
101 |
|
102 |
def forward(self, x):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
x = self.conv_block(x)
|
104 |
return x
|
105 |
|
106 |
class ASPPPoolingLayer(nn.Sequential):
|
107 |
def __init__(self, in_channels, out_channels):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
super().__init__()
|
109 |
self.avg_pool_block = nn.Sequential(
|
110 |
nn.AdaptiveAvgPool2d(1),
|
@@ -124,6 +227,19 @@ class ASPPPoolingLayer(nn.Sequential):
|
|
124 |
return
|
125 |
|
126 |
def forward(self, x):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
size = x.shape[2:]
|
128 |
x = self.avg_pool_block(x)
|
129 |
x = F.interpolate(x, size=size, mode="bilinear", align_corners=False)
|
@@ -131,6 +247,19 @@ class ASPPPoolingLayer(nn.Sequential):
|
|
131 |
|
132 |
class ASPPBlock(nn.Module):
|
133 |
def __init__(self, in_channels, atrous_rates, aspp_out_channels=256):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
super().__init__()
|
135 |
|
136 |
self.aspp_init_conv = nn.Sequential(
|
@@ -168,6 +297,19 @@ class ASPPBlock(nn.Module):
|
|
168 |
return
|
169 |
|
170 |
def forward(self, x):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
aspp_outputs = []
|
172 |
for aspp_layer in self.aspp_module_layers:
|
173 |
aspp_outputs.append(aspp_layer(x))
|
|
|
3 |
from torch.nn import functional as F
|
4 |
|
5 |
class DeepLabV3(nn.Module):
|
6 |
+
"""
|
7 |
+
DeepLabV3 class to build the DeepLabV3 decoder model
|
8 |
+
|
9 |
+
----------
|
10 |
+
Attributes
|
11 |
+
----------
|
12 |
+
in_channels : int
|
13 |
+
number of input channels to decoder model from the encoder model's output
|
14 |
+
num_classes : int
|
15 |
+
number of classes for which the decoder needs to be built
|
16 |
+
aspp_out_channels : int
|
17 |
+
number of output channels of the ASPP layer (default: 256)
|
18 |
+
final_out_channels : int
|
19 |
+
number of output channels before applying classification conv layer (default: 256)
|
20 |
+
aspp_dilate: list
|
21 |
+
a list of dilation rates to be used for conv layers in ASPP block (default: [12, 24, 36])
|
22 |
+
"""
|
23 |
def __init__(self, in_channels, num_classes, aspp_out_channels=256, final_out_channels=256, aspp_dilate=[12, 24, 36]):
|
24 |
super().__init__()
|
25 |
self.aspp_block = ASPPBlock(in_channels, aspp_dilate, aspp_out_channels=aspp_out_channels)
|
|
|
44 |
return
|
45 |
|
46 |
def forward(self, encoded_features):
|
47 |
+
"""
|
48 |
+
---------
|
49 |
+
Arguments
|
50 |
+
---------
|
51 |
+
encoded_features : torch tensor
|
52 |
+
a tensor of encoded features from the encoder
|
53 |
+
|
54 |
+
-------
|
55 |
+
Returns
|
56 |
+
-------
|
57 |
+
final_output_feature : torch tensor
|
58 |
+
a tensor of final output logits
|
59 |
+
"""
|
60 |
aspp_output_feature = self.aspp_block(encoded_features)
|
61 |
final_output_feature = self.classifier_conv_block(aspp_output_feature)
|
62 |
return final_output_feature
|
63 |
|
64 |
class DeepLabV3Plus(nn.Module):
|
65 |
+
"""
|
66 |
+
DeepLabV3Plus class to build the DeepLabV3+ decoder model
|
67 |
+
|
68 |
+
----------
|
69 |
+
Attributes
|
70 |
+
----------
|
71 |
+
in_channels : int
|
72 |
+
number of input channels to decoder model from the encoder model's output
|
73 |
+
encoder_channels : int
|
74 |
+
number of channels from the intermediate layer of the encoder for merging
|
75 |
+
num_classes : int
|
76 |
+
number of classes for which the decoder needs to be built
|
77 |
+
encoder_projection_channels : int
|
78 |
+
number of resulting projection channels from the intermediate layer of the encoder for merging (default: 48)
|
79 |
+
aspp_out_channels : int
|
80 |
+
number of output channels of the ASPP layer (default: 256)
|
81 |
+
final_out_channels : int
|
82 |
+
number of output channels before applying classification conv layer (default: 256)
|
83 |
+
aspp_dilate: list
|
84 |
+
a list of dilation rates to be used for conv layers in ASPP block (default: [12, 24, 36])
|
85 |
+
"""
|
86 |
def __init__(self, in_channels, encoder_channels, num_classes, encoder_projection_channels=48,
|
87 |
aspp_out_channels=256, final_out_channels=256, aspp_dilate=[12, 24, 36]):
|
88 |
|
|
|
117 |
return
|
118 |
|
119 |
def forward(self, encoded_features, block_1_features):
|
120 |
+
"""
|
121 |
+
---------
|
122 |
+
Arguments
|
123 |
+
---------
|
124 |
+
encoded_features : torch tensor
|
125 |
+
a tensor of encoded features from the encoder
|
126 |
+
block_1_features : torch tensor
|
127 |
+
a tensor of features from the intermediate layer from the encoder
|
128 |
+
|
129 |
+
-------
|
130 |
+
Returns
|
131 |
+
-------
|
132 |
+
final_output_feature : torch tensor
|
133 |
+
a tensor of final output logits
|
134 |
+
"""
|
135 |
encoder_connection = self.projection_conv(block_1_features)
|
136 |
aspp_output_feature = self.aspp_block(encoded_features)
|
137 |
aspp_output_feature = F.interpolate(
|
|
|
144 |
return final_output_feature
|
145 |
|
146 |
class ASPPConvLayer(nn.Sequential):
|
147 |
+
"""
|
148 |
+
ASPPConvLayer class to build the ASPPConvLayer used in ASPPBlock
|
149 |
+
|
150 |
+
----------
|
151 |
+
Attributes
|
152 |
+
----------
|
153 |
+
in_channels : int
|
154 |
+
number of input channels to ASPPConvLayer
|
155 |
+
out_channels : int
|
156 |
+
number of output channels from ASPPConvLayer
|
157 |
+
dilation : int
|
158 |
+
dilation rate
|
159 |
+
"""
|
160 |
def __init__(self, in_channels, out_channels, dilation):
|
161 |
super().__init__()
|
162 |
self.conv_block = nn.Sequential(
|
|
|
179 |
return
|
180 |
|
181 |
def forward(self, x):
|
182 |
+
"""
|
183 |
+
---------
|
184 |
+
Arguments
|
185 |
+
---------
|
186 |
+
x : torch tensor
|
187 |
+
a tensor of input features
|
188 |
+
|
189 |
+
-------
|
190 |
+
Returns
|
191 |
+
-------
|
192 |
+
x : torch tensor
|
193 |
+
output of the ASPPConvLayer
|
194 |
+
"""
|
195 |
x = self.conv_block(x)
|
196 |
return x
|
197 |
|
198 |
class ASPPPoolingLayer(nn.Sequential):
|
199 |
def __init__(self, in_channels, out_channels):
|
200 |
+
"""
|
201 |
+
ASPPPoolingLayer class to build the ASPPPoolingLayer used in ASPPBlock
|
202 |
+
|
203 |
+
----------
|
204 |
+
Attributes
|
205 |
+
----------
|
206 |
+
in_channels : int
|
207 |
+
number of input channels to ASPPPoolingLayer
|
208 |
+
out_channels : int
|
209 |
+
number of output channels from ASPPPoolingLayer
|
210 |
+
"""
|
211 |
super().__init__()
|
212 |
self.avg_pool_block = nn.Sequential(
|
213 |
nn.AdaptiveAvgPool2d(1),
|
|
|
227 |
return
|
228 |
|
229 |
def forward(self, x):
|
230 |
+
"""
|
231 |
+
---------
|
232 |
+
Arguments
|
233 |
+
---------
|
234 |
+
x : torch tensor
|
235 |
+
a tensor of input features
|
236 |
+
|
237 |
+
-------
|
238 |
+
Returns
|
239 |
+
-------
|
240 |
+
x : torch tensor
|
241 |
+
output of the ASPPPoolingLayer
|
242 |
+
"""
|
243 |
size = x.shape[2:]
|
244 |
x = self.avg_pool_block(x)
|
245 |
x = F.interpolate(x, size=size, mode="bilinear", align_corners=False)
|
|
|
247 |
|
248 |
class ASPPBlock(nn.Module):
|
249 |
def __init__(self, in_channels, atrous_rates, aspp_out_channels=256):
|
250 |
+
"""
|
251 |
+
ASPPBlock class to build the ASPPBlock
|
252 |
+
|
253 |
+
---------
|
254 |
+
Attributes
|
255 |
+
----------
|
256 |
+
in_channels : int
|
257 |
+
number of input channels to ASPPBlock
|
258 |
+
atrous_rates : list
|
259 |
+
list of dilation rates
|
260 |
+
aspp_out_channels : int
|
261 |
+
number of output channels of the ASPPBlock
|
262 |
+
"""
|
263 |
super().__init__()
|
264 |
|
265 |
self.aspp_init_conv = nn.Sequential(
|
|
|
297 |
return
|
298 |
|
299 |
def forward(self, x):
|
300 |
+
"""
|
301 |
+
---------
|
302 |
+
Arguments
|
303 |
+
---------
|
304 |
+
x : torch tensor
|
305 |
+
a tensor of input features
|
306 |
+
|
307 |
+
-------
|
308 |
+
Returns
|
309 |
+
-------
|
310 |
+
x : torch tensor
|
311 |
+
output of the ASPPBlock
|
312 |
+
"""
|
313 |
aspp_outputs = []
|
314 |
for aspp_layer in self.aspp_module_layers:
|
315 |
aspp_outputs.append(aspp_layer(x))
|
training/encoder_models.py
CHANGED
@@ -27,6 +27,29 @@ class CustomResNet(nn.Module):
|
|
27 |
replace_stride_with_dilation=None,
|
28 |
norm_layer=None,
|
29 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
super(CustomResNet, self).__init__()
|
32 |
|
@@ -120,6 +143,19 @@ class CustomResNet(nn.Module):
|
|
120 |
return nn.Sequential(*layers)
|
121 |
|
122 |
def forward(self, x):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
x = self.conv1(x)
|
124 |
x = self.bn1(x)
|
125 |
x = self.relu(x)
|
@@ -135,6 +171,25 @@ class CustomResNet(nn.Module):
|
|
135 |
return x
|
136 |
|
137 |
def _resnet(block_type, layers, weights=None, progress=True):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
model = CustomResNet(layers, block_type)
|
139 |
|
140 |
if weights is not None:
|
@@ -146,8 +201,11 @@ def resnet18(pretrained=True):
|
|
146 |
r"""ResNet-18 model from
|
147 |
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
|
148 |
|
149 |
-
|
150 |
-
|
|
|
|
|
|
|
151 |
"""
|
152 |
if pretrained:
|
153 |
weights = ResNet18_Weights.IMAGENET1K_V1
|
@@ -159,8 +217,11 @@ def resnet34(pretrained=True):
|
|
159 |
r"""ResNet-34 model from
|
160 |
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
|
161 |
|
162 |
-
|
163 |
-
|
|
|
|
|
|
|
164 |
"""
|
165 |
if pretrained:
|
166 |
weights = ResNet34_Weights.IMAGENET1K_V1
|
@@ -172,8 +233,11 @@ def resnet50(pretrained=True):
|
|
172 |
r"""ResNet-50 model from
|
173 |
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
|
174 |
|
175 |
-
|
176 |
-
|
|
|
|
|
|
|
177 |
"""
|
178 |
if pretrained:
|
179 |
weights = ResNet50_Weights.IMAGENET1K_V1
|
@@ -186,8 +250,11 @@ def resnet101(pretrained=True):
|
|
186 |
r"""ResNet-101 model from
|
187 |
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
|
188 |
|
189 |
-
|
190 |
-
|
|
|
|
|
|
|
191 |
"""
|
192 |
if pretrained:
|
193 |
weights = ResNet101_Weights.IMAGENET1K_V1
|
@@ -209,13 +276,22 @@ class CustomEfficientNet(nn.Module):
|
|
209 |
):
|
210 |
"""
|
211 |
EfficientNet V1 and V2 main class
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
219 |
"""
|
220 |
super().__init__()
|
221 |
self.dict_encoder_features = {}
|
@@ -321,6 +397,23 @@ def _efficientnet(
|
|
321 |
progress=True,
|
322 |
**kwargs: Any,
|
323 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
324 |
model = CustomEfficientNet(
|
325 |
inverted_residual_setting,
|
326 |
dropout,
|
@@ -335,6 +428,15 @@ def _efficientnet(
|
|
335 |
return model
|
336 |
|
337 |
def efficientnet_v2_s(pretrained=True, **kwargs: Any):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
338 |
which_efficientnet = "efficientnet_v2_s"
|
339 |
inverted_residual_setting, last_channel = _efficientnet_conf(which_efficientnet)
|
340 |
if pretrained:
|
@@ -350,6 +452,15 @@ def efficientnet_v2_s(pretrained=True, **kwargs: Any):
|
|
350 |
)
|
351 |
|
352 |
def efficientnet_v2_m(pretrained=True, **kwargs: Any):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
353 |
which_efficientnet = "efficientnet_v2_m"
|
354 |
inverted_residual_setting, last_channel = _efficientnet_conf(which_efficientnet)
|
355 |
if pretrained:
|
@@ -365,6 +476,15 @@ def efficientnet_v2_m(pretrained=True, **kwargs: Any):
|
|
365 |
)
|
366 |
|
367 |
def efficientnet_v2_l(pretrained=True, **kwargs: Any):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
368 |
which_efficientnet = "efficientnet_v2_l"
|
369 |
inverted_residual_setting, last_channel = _efficientnet_conf(which_efficientnet)
|
370 |
if pretrained:
|
|
|
27 |
replace_stride_with_dilation=None,
|
28 |
norm_layer=None,
|
29 |
):
|
30 |
+
"""
|
31 |
+
CustomResNet class to build the CustomResNet encoder model
|
32 |
+
|
33 |
+
----------
|
34 |
+
Attributes
|
35 |
+
----------
|
36 |
+
layers : list
|
37 |
+
list of number of layers in each residual block
|
38 |
+
block : object of block type
|
39 |
+
type of the residual block (options = [BasicBlock, Bottleneck])
|
40 |
+
zero_init_residual : bool
|
41 |
+
to indicate whether to use zero weights for BN
|
42 |
+
groups : int
|
43 |
+
indicates the number of groups (default: 1)
|
44 |
+
num_classes : int
|
45 |
+
indicates the number of classes (default: 1000)
|
46 |
+
width_per_group : int
|
47 |
+
indicates the width per group (default: 64)
|
48 |
+
replace_stride_with_dilation : list
|
49 |
+
a list indicating whether to replace stride with dilation (default: None)
|
50 |
+
norm_layer : object
|
51 |
+
object of type batch norm (default: None)
|
52 |
+
"""
|
53 |
|
54 |
super(CustomResNet, self).__init__()
|
55 |
|
|
|
143 |
return nn.Sequential(*layers)
|
144 |
|
145 |
def forward(self, x):
|
146 |
+
"""
|
147 |
+
---------
|
148 |
+
Arguments
|
149 |
+
---------
|
150 |
+
x : torch tensor
|
151 |
+
a tensor of input features
|
152 |
+
|
153 |
+
-------
|
154 |
+
Returns
|
155 |
+
-------
|
156 |
+
x : torch tensor
|
157 |
+
output of the CustomResNet
|
158 |
+
"""
|
159 |
x = self.conv1(x)
|
160 |
x = self.bn1(x)
|
161 |
x = self.relu(x)
|
|
|
171 |
return x
|
172 |
|
173 |
def _resnet(block_type, layers, weights=None, progress=True):
|
174 |
+
"""
|
175 |
+
---------
|
176 |
+
Arguments
|
177 |
+
---------
|
178 |
+
block_type : object
|
179 |
+
object of type block
|
180 |
+
layers : list
|
181 |
+
list of layers in each residual block
|
182 |
+
weights : object
|
183 |
+
object of type ResNet weights
|
184 |
+
progress : bool
|
185 |
+
indicates whether to show progress or not
|
186 |
+
|
187 |
+
-------
|
188 |
+
Returns
|
189 |
+
-------
|
190 |
+
model : object
|
191 |
+
model object of type CustomResNet
|
192 |
+
"""
|
193 |
model = CustomResNet(layers, block_type)
|
194 |
|
195 |
if weights is not None:
|
|
|
201 |
r"""ResNet-18 model from
|
202 |
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
|
203 |
|
204 |
+
---------
|
205 |
+
Arguments
|
206 |
+
---------
|
207 |
+
pretrained : bool
|
208 |
+
if True, returns a model pre-trained on ImageNet
|
209 |
"""
|
210 |
if pretrained:
|
211 |
weights = ResNet18_Weights.IMAGENET1K_V1
|
|
|
217 |
r"""ResNet-34 model from
|
218 |
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
|
219 |
|
220 |
+
---------
|
221 |
+
Arguments
|
222 |
+
---------
|
223 |
+
pretrained : bool
|
224 |
+
if True, returns a model pre-trained on ImageNet
|
225 |
"""
|
226 |
if pretrained:
|
227 |
weights = ResNet34_Weights.IMAGENET1K_V1
|
|
|
233 |
r"""ResNet-50 model from
|
234 |
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
|
235 |
|
236 |
+
---------
|
237 |
+
Arguments
|
238 |
+
---------
|
239 |
+
pretrained : bool
|
240 |
+
if True, returns a model pre-trained on ImageNet
|
241 |
"""
|
242 |
if pretrained:
|
243 |
weights = ResNet50_Weights.IMAGENET1K_V1
|
|
|
250 |
r"""ResNet-101 model from
|
251 |
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
|
252 |
|
253 |
+
---------
|
254 |
+
Arguments
|
255 |
+
---------
|
256 |
+
pretrained : bool
|
257 |
+
if True, returns a model pre-trained on ImageNet
|
258 |
"""
|
259 |
if pretrained:
|
260 |
weights = ResNet101_Weights.IMAGENET1K_V1
|
|
|
276 |
):
|
277 |
"""
|
278 |
EfficientNet V1 and V2 main class
|
279 |
+
|
280 |
+
----------
|
281 |
+
Attributes
|
282 |
+
----------
|
283 |
+
inverted_residual_setting : Sequence
|
284 |
+
network structure
|
285 |
+
dropout : float
|
286 |
+
the droupout probability
|
287 |
+
stochastic_depth_prob : float
|
288 |
+
the stochastic depth probability
|
289 |
+
num_classes : int
|
290 |
+
number of classes
|
291 |
+
norm_layer : object
|
292 |
+
object of type Module specifying the normalization layer to use
|
293 |
+
last_channel : int
|
294 |
+
the number of channels on the penultimate layer
|
295 |
"""
|
296 |
super().__init__()
|
297 |
self.dict_encoder_features = {}
|
|
|
397 |
progress=True,
|
398 |
**kwargs: Any,
|
399 |
):
|
400 |
+
"""
|
401 |
+
---------
|
402 |
+
Arguments
|
403 |
+
---------
|
404 |
+
inverted_residual_setting : Sequence
|
405 |
+
network structure
|
406 |
+
dropout : float
|
407 |
+
the droupout probability
|
408 |
+
last_channel : last_channel
|
409 |
+
the last channel
|
410 |
+
weights : object
|
411 |
+
object of type efficient_net weights
|
412 |
+
norm_layer : object
|
413 |
+
object of type Module specifying the normalization layer to use
|
414 |
+
progress : bool
|
415 |
+
indicates whether to show progress or not
|
416 |
+
"""
|
417 |
model = CustomEfficientNet(
|
418 |
inverted_residual_setting,
|
419 |
dropout,
|
|
|
428 |
return model
|
429 |
|
430 |
def efficientnet_v2_s(pretrained=True, **kwargs: Any):
|
431 |
+
"""
|
432 |
+
---------
|
433 |
+
Arguments
|
434 |
+
---------
|
435 |
+
pretrained : bool
|
436 |
+
if True, returns a model pre-trained on ImageNet
|
437 |
+
**kwargs :
|
438 |
+
additional arguments
|
439 |
+
"""
|
440 |
which_efficientnet = "efficientnet_v2_s"
|
441 |
inverted_residual_setting, last_channel = _efficientnet_conf(which_efficientnet)
|
442 |
if pretrained:
|
|
|
452 |
)
|
453 |
|
454 |
def efficientnet_v2_m(pretrained=True, **kwargs: Any):
|
455 |
+
"""
|
456 |
+
---------
|
457 |
+
Arguments
|
458 |
+
---------
|
459 |
+
pretrained : bool
|
460 |
+
if True, returns a model pre-trained on ImageNet
|
461 |
+
**kwargs :
|
462 |
+
additional arguments
|
463 |
+
"""
|
464 |
which_efficientnet = "efficientnet_v2_m"
|
465 |
inverted_residual_setting, last_channel = _efficientnet_conf(which_efficientnet)
|
466 |
if pretrained:
|
|
|
476 |
)
|
477 |
|
478 |
def efficientnet_v2_l(pretrained=True, **kwargs: Any):
|
479 |
+
"""
|
480 |
+
---------
|
481 |
+
Arguments
|
482 |
+
---------
|
483 |
+
pretrained : bool
|
484 |
+
if True, returns a model pre-trained on ImageNet
|
485 |
+
**kwargs :
|
486 |
+
additional arguments
|
487 |
+
"""
|
488 |
which_efficientnet = "efficientnet_v2_l"
|
489 |
inverted_residual_setting, last_channel = _efficientnet_conf(which_efficientnet)
|
490 |
if pretrained:
|
training/image_preprocessing.py
CHANGED
@@ -5,6 +5,25 @@ from skimage.io import imread
|
|
5 |
|
6 |
class ImagePadder:
|
7 |
def __init__(self, dir_images, pad_left=15, pad_right=15, pad_top=11, pad_bottom=11, file_anchor_image="img_0814.jpg"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
self._anchor_image = imread(os.path.join(dir_images, file_anchor_image))
|
9 |
self._anchor_image_shape = self._anchor_image.shape
|
10 |
self._pad_left = pad_left
|
|
|
5 |
|
6 |
class ImagePadder:
|
7 |
def __init__(self, dir_images, pad_left=15, pad_right=15, pad_top=11, pad_bottom=11, file_anchor_image="img_0814.jpg"):
|
8 |
+
"""
|
9 |
+
ImagePadder class for padding images
|
10 |
+
|
11 |
+
----------
|
12 |
+
Attributes
|
13 |
+
----------
|
14 |
+
dir_images : str
|
15 |
+
full directory path containing images
|
16 |
+
pad_left : int
|
17 |
+
number of pixels to be padded to the left of the input image (default: 15)
|
18 |
+
pad_right : int
|
19 |
+
number of pixels to be padded to the right of the input image (default: 15)
|
20 |
+
pad_top : int
|
21 |
+
number of pixels to be padded to the top of the input image (default: 11)
|
22 |
+
pad_bottom : int
|
23 |
+
number of pixels to be padded to the bottom of the input image (default: 11)
|
24 |
+
file_anchor_image : str
|
25 |
+
file with anchor image whose pixels will be used as a reference for padding (default: "img_0814.jpg")
|
26 |
+
"""
|
27 |
self._anchor_image = imread(os.path.join(dir_images, file_anchor_image))
|
28 |
self._anchor_image_shape = self._anchor_image.shape
|
29 |
self._pad_left = pad_left
|
training/inference.py
CHANGED
@@ -15,12 +15,41 @@ from seg_models import *
|
|
15 |
from dataset import get_dataloader_for_inference
|
16 |
|
17 |
def create_directory(dir_path):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
if not os.path.isdir(dir_path):
|
19 |
os.makedirs(dir_path)
|
20 |
print(f"Created directory: {dir_path}")
|
21 |
return
|
22 |
|
23 |
def inference_loop(dataset_loader, list_images, model, dir_labels, dir_masks, num_classes, device, image_format=".png"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
# for lossless, always save labels and masks as png and not as jpeg
|
25 |
model.eval()
|
26 |
size = len(dataset_loader.dataset)
|
|
|
15 |
from dataset import get_dataloader_for_inference
|
16 |
|
17 |
def create_directory(dir_path):
|
18 |
+
"""
|
19 |
+
---------
|
20 |
+
Arguments
|
21 |
+
---------
|
22 |
+
dir_path : str
|
23 |
+
full directory path that needs to be created if it does not exist
|
24 |
+
"""
|
25 |
if not os.path.isdir(dir_path):
|
26 |
os.makedirs(dir_path)
|
27 |
print(f"Created directory: {dir_path}")
|
28 |
return
|
29 |
|
30 |
def inference_loop(dataset_loader, list_images, model, dir_labels, dir_masks, num_classes, device, image_format=".png"):
|
31 |
+
"""
|
32 |
+
---------
|
33 |
+
Arguments
|
34 |
+
---------
|
35 |
+
dataset_loader : object
|
36 |
+
object of type dataloader
|
37 |
+
list_images : list
|
38 |
+
list of images for which the inference needs to be run
|
39 |
+
model : object
|
40 |
+
object of type model
|
41 |
+
dir_labels : str
|
42 |
+
full directory path to save prediction labels
|
43 |
+
dir_masks : str
|
44 |
+
full directory path to save prediction masks
|
45 |
+
num_classes : int
|
46 |
+
number of classes in the dataset
|
47 |
+
device : str
|
48 |
+
device on which inference needs to be run
|
49 |
+
image_format : str
|
50 |
+
the extension format of the images (default: ".png")
|
51 |
+
"""
|
52 |
+
|
53 |
# for lossless, always save labels and masks as png and not as jpeg
|
54 |
model.eval()
|
55 |
size = len(dataset_loader.dataset)
|
training/logger_utils.py
CHANGED
@@ -36,7 +36,16 @@ def load_dict_from_json(file_json):
|
|
36 |
|
37 |
class CSVWriter:
|
38 |
"""
|
39 |
-
for writing tabular data to a csv file
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
"""
|
41 |
def __init__(self, file_name, column_names):
|
42 |
self.file_name = file_name
|
@@ -58,6 +67,12 @@ class CSVWriter:
|
|
58 |
def write_row(self, row):
|
59 |
"""
|
60 |
writes a row into csv file
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
"""
|
62 |
self.writer.writerow(row)
|
63 |
return
|
|
|
36 |
|
37 |
class CSVWriter:
|
38 |
"""
|
39 |
+
CSVWriter class for writing tabular data to a csv file
|
40 |
+
|
41 |
+
----------
|
42 |
+
Attributes
|
43 |
+
----------
|
44 |
+
file_name : str
|
45 |
+
file name of the csv file
|
46 |
+
column_names : list
|
47 |
+
a list of column names
|
48 |
+
|
49 |
"""
|
50 |
def __init__(self, file_name, column_names):
|
51 |
self.file_name = file_name
|
|
|
67 |
def write_row(self, row):
|
68 |
"""
|
69 |
writes a row into csv file
|
70 |
+
|
71 |
+
---------
|
72 |
+
Arguments
|
73 |
+
---------
|
74 |
+
row : list
|
75 |
+
a list of row values
|
76 |
"""
|
77 |
self.writer.writerow(row)
|
78 |
return
|
training/metrics.py
CHANGED
@@ -6,6 +6,21 @@ import torch.nn.functional as F
|
|
6 |
|
7 |
# compute mean pixel accuracy
|
8 |
def compute_mean_pixel_acc(true_label, pred_label):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
if true_label.shape != pred_label.shape:
|
10 |
print("true_label has dimension", true_label.shape, ", pred_label values have shape", pred_label.shape)
|
11 |
return
|
@@ -33,6 +48,23 @@ def compute_mean_pixel_acc(true_label, pred_label):
|
|
33 |
|
34 |
# compute mean IOU
|
35 |
def compute_mean_IOU(true_label, pred_label, num_classes=5):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
iou_list = list()
|
37 |
present_iou_list = list()
|
38 |
|
@@ -55,6 +87,24 @@ def compute_mean_IOU(true_label, pred_label, num_classes=5):
|
|
55 |
return np.mean(present_iou_list)
|
56 |
|
57 |
def compute_class_IOU(true_label, pred_label, num_classes=5):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
iou_list = list()
|
59 |
present_iou_list = list()
|
60 |
|
|
|
6 |
|
7 |
# compute mean pixel accuracy
|
8 |
def compute_mean_pixel_acc(true_label, pred_label):
|
9 |
+
"""
|
10 |
+
---------
|
11 |
+
Arguments
|
12 |
+
---------
|
13 |
+
true_label : ndarray
|
14 |
+
a numpy array of groundtruth label
|
15 |
+
pred_label : ndarray
|
16 |
+
a numpy array of prediction label
|
17 |
+
|
18 |
+
-------
|
19 |
+
Returns
|
20 |
+
-------
|
21 |
+
mean_pixel_accuracy : float
|
22 |
+
mean pixel accuracy
|
23 |
+
"""
|
24 |
if true_label.shape != pred_label.shape:
|
25 |
print("true_label has dimension", true_label.shape, ", pred_label values have shape", pred_label.shape)
|
26 |
return
|
|
|
48 |
|
49 |
# compute mean IOU
|
50 |
def compute_mean_IOU(true_label, pred_label, num_classes=5):
|
51 |
+
"""
|
52 |
+
---------
|
53 |
+
Arguments
|
54 |
+
---------
|
55 |
+
true_label : ndarray
|
56 |
+
a numpy array of groundtruth label
|
57 |
+
pred_label : ndarray
|
58 |
+
a numpy array of prediction label
|
59 |
+
num_classes : int
|
60 |
+
number of classes in the dataset (default: 5)
|
61 |
+
|
62 |
+
-------
|
63 |
+
Returns
|
64 |
+
-------
|
65 |
+
mean_iou : float
|
66 |
+
mean IoU
|
67 |
+
"""
|
68 |
iou_list = list()
|
69 |
present_iou_list = list()
|
70 |
|
|
|
87 |
return np.mean(present_iou_list)
|
88 |
|
89 |
def compute_class_IOU(true_label, pred_label, num_classes=5):
|
90 |
+
"""
|
91 |
+
---------
|
92 |
+
Arguments
|
93 |
+
---------
|
94 |
+
true_label : ndarray
|
95 |
+
a numpy array of groundtruth label
|
96 |
+
pred_label : ndarray
|
97 |
+
a numpy array of prediction label
|
98 |
+
num_classes : int
|
99 |
+
number of classes in the dataset (default: 5)
|
100 |
+
|
101 |
+
|
102 |
+
-------
|
103 |
+
Returns
|
104 |
+
-------
|
105 |
+
per_class_iou : ndarray
|
106 |
+
a numpy array of per class IoU
|
107 |
+
"""
|
108 |
iou_list = list()
|
109 |
present_iou_list = list()
|
110 |
|
training/seg_models.py
CHANGED
@@ -8,6 +8,15 @@ from encoder_models import resnet18, resnet34, resnet50, resnet101, efficientnet
|
|
8 |
|
9 |
class ResNet18DeepLabV3Plus(nn.Module):
|
10 |
def __init__(self, num_classes, pretrained=True):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
super().__init__()
|
12 |
|
13 |
self.encoder = resnet18(pretrained=pretrained)
|
@@ -22,6 +31,15 @@ class ResNet18DeepLabV3Plus(nn.Module):
|
|
22 |
|
23 |
class ResNet34DeepLabV3Plus(nn.Module):
|
24 |
def __init__(self, num_classes, pretrained=True):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
super().__init__()
|
26 |
|
27 |
self.encoder = resnet34(pretrained=pretrained)
|
@@ -36,6 +54,15 @@ class ResNet34DeepLabV3Plus(nn.Module):
|
|
36 |
|
37 |
class ResNet50DeepLabV3Plus(nn.Module):
|
38 |
def __init__(self, num_classes, pretrained=True):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
super().__init__()
|
40 |
|
41 |
self.encoder = resnet50(pretrained=pretrained)
|
@@ -50,6 +77,15 @@ class ResNet50DeepLabV3Plus(nn.Module):
|
|
50 |
|
51 |
class ResNet101DeepLabV3Plus(nn.Module):
|
52 |
def __init__(self, num_classes, pretrained=True):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
super().__init__()
|
54 |
|
55 |
self.encoder = resnet101(pretrained=pretrained)
|
@@ -64,6 +100,15 @@ class ResNet101DeepLabV3Plus(nn.Module):
|
|
64 |
|
65 |
class EfficientNetSDeepLabV3(nn.Module):
|
66 |
def __init__(self, num_classes, pretrained=True):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
super().__init__()
|
68 |
|
69 |
self.encoder = efficientnet_v2_s(pretrained=pretrained)
|
@@ -81,6 +126,15 @@ class EfficientNetSDeepLabV3(nn.Module):
|
|
81 |
|
82 |
class EfficientNetMDeepLabV3(nn.Module):
|
83 |
def __init__(self, num_classes, pretrained=True):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
super().__init__()
|
85 |
|
86 |
self.encoder = efficientnet_v2_m(pretrained=pretrained)
|
@@ -98,6 +152,15 @@ class EfficientNetMDeepLabV3(nn.Module):
|
|
98 |
|
99 |
class EfficientNetLDeepLabV3(nn.Module):
|
100 |
def __init__(self, num_classes, pretrained=True):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
super().__init__()
|
102 |
|
103 |
self.encoder = efficientnet_v2_l(pretrained=pretrained)
|
|
|
8 |
|
9 |
class ResNet18DeepLabV3Plus(nn.Module):
|
10 |
def __init__(self, num_classes, pretrained=True):
|
11 |
+
"""
|
12 |
+
----------
|
13 |
+
Attributes
|
14 |
+
----------
|
15 |
+
num_classes : int
|
16 |
+
number of classes in the dataset
|
17 |
+
pretrained : bool
|
18 |
+
indicates whether to load pretrained weights for the encoder model (default: True)
|
19 |
+
"""
|
20 |
super().__init__()
|
21 |
|
22 |
self.encoder = resnet18(pretrained=pretrained)
|
|
|
31 |
|
32 |
class ResNet34DeepLabV3Plus(nn.Module):
|
33 |
def __init__(self, num_classes, pretrained=True):
|
34 |
+
"""
|
35 |
+
----------
|
36 |
+
Attributes
|
37 |
+
----------
|
38 |
+
num_classes : int
|
39 |
+
number of classes in the dataset
|
40 |
+
pretrained : bool
|
41 |
+
indicates whether to load pretrained weights for the encoder model (default: True)
|
42 |
+
"""
|
43 |
super().__init__()
|
44 |
|
45 |
self.encoder = resnet34(pretrained=pretrained)
|
|
|
54 |
|
55 |
class ResNet50DeepLabV3Plus(nn.Module):
|
56 |
def __init__(self, num_classes, pretrained=True):
|
57 |
+
"""
|
58 |
+
----------
|
59 |
+
Attributes
|
60 |
+
----------
|
61 |
+
num_classes : int
|
62 |
+
number of classes in the dataset
|
63 |
+
pretrained : bool
|
64 |
+
indicates whether to load pretrained weights for the encoder model (default: True)
|
65 |
+
"""
|
66 |
super().__init__()
|
67 |
|
68 |
self.encoder = resnet50(pretrained=pretrained)
|
|
|
77 |
|
78 |
class ResNet101DeepLabV3Plus(nn.Module):
|
79 |
def __init__(self, num_classes, pretrained=True):
|
80 |
+
"""
|
81 |
+
----------
|
82 |
+
Attributes
|
83 |
+
----------
|
84 |
+
num_classes : int
|
85 |
+
number of classes in the dataset
|
86 |
+
pretrained : bool
|
87 |
+
indicates whether to load pretrained weights for the encoder model (default: True)
|
88 |
+
"""
|
89 |
super().__init__()
|
90 |
|
91 |
self.encoder = resnet101(pretrained=pretrained)
|
|
|
100 |
|
101 |
class EfficientNetSDeepLabV3(nn.Module):
|
102 |
def __init__(self, num_classes, pretrained=True):
|
103 |
+
"""
|
104 |
+
----------
|
105 |
+
Attributes
|
106 |
+
----------
|
107 |
+
num_classes : int
|
108 |
+
number of classes in the dataset
|
109 |
+
pretrained : bool
|
110 |
+
indicates whether to load pretrained weights for the encoder model (default: True)
|
111 |
+
"""
|
112 |
super().__init__()
|
113 |
|
114 |
self.encoder = efficientnet_v2_s(pretrained=pretrained)
|
|
|
126 |
|
127 |
class EfficientNetMDeepLabV3(nn.Module):
|
128 |
def __init__(self, num_classes, pretrained=True):
|
129 |
+
"""
|
130 |
+
----------
|
131 |
+
Attributes
|
132 |
+
----------
|
133 |
+
num_classes : int
|
134 |
+
number of classes in the dataset
|
135 |
+
pretrained : bool
|
136 |
+
indicates whether to load pretrained weights for the encoder model (default: True)
|
137 |
+
"""
|
138 |
super().__init__()
|
139 |
|
140 |
self.encoder = efficientnet_v2_m(pretrained=pretrained)
|
|
|
152 |
|
153 |
class EfficientNetLDeepLabV3(nn.Module):
|
154 |
def __init__(self, num_classes, pretrained=True):
|
155 |
+
"""
|
156 |
+
----------
|
157 |
+
Attributes
|
158 |
+
----------
|
159 |
+
num_classes : int
|
160 |
+
number of classes in the dataset
|
161 |
+
pretrained : bool
|
162 |
+
indicates whether to load pretrained weights for the encoder model (default: True)
|
163 |
+
"""
|
164 |
super().__init__()
|
165 |
|
166 |
self.encoder = efficientnet_v2_l(pretrained=pretrained)
|
training/train.py
CHANGED
@@ -19,6 +19,23 @@ from logger_utils import CSVWriter, write_dict_to_json
|
|
19 |
from torch.optim.lr_scheduler import _LRScheduler
|
20 |
|
21 |
class PolynomialLR(_LRScheduler):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
def __init__(self, optimizer, max_epochs, power=0.9, last_epoch=-1, min_lr=1e-6):
|
23 |
self.power = power
|
24 |
self.max_epochs = max_epochs
|
@@ -30,6 +47,25 @@ class PolynomialLR(_LRScheduler):
|
|
30 |
for base_lr in self.base_lrs]
|
31 |
|
32 |
def validation_loop(dataset_loader, model, ce_loss, device):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
model.eval()
|
34 |
size = len(dataset_loader.dataset)
|
35 |
num_batches = len(dataset_loader)
|
@@ -55,6 +91,27 @@ def validation_loop(dataset_loader, model, ce_loss, device):
|
|
55 |
return valid_loss, valid_acc, valid_IOU
|
56 |
|
57 |
def train_loop(dataset_loader, model, ce_loss, optimizer, device):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
model.train()
|
59 |
size = len(dataset_loader.dataset)
|
60 |
num_batches = len(dataset_loader)
|
|
|
19 |
from torch.optim.lr_scheduler import _LRScheduler
|
20 |
|
21 |
class PolynomialLR(_LRScheduler):
|
22 |
+
"""
|
23 |
+
PolynomialLR class for the polynomial learning rate scheduler
|
24 |
+
|
25 |
+
----------
|
26 |
+
Attributes
|
27 |
+
----------
|
28 |
+
optimizer : object
|
29 |
+
object of type optimizer
|
30 |
+
max_epochs : int
|
31 |
+
maximum number of epochs for which optimization needs to be run
|
32 |
+
power : float
|
33 |
+
the power term in the polynomial learning rate scheduler (default: 0.9)
|
34 |
+
last_epoch : int
|
35 |
+
last epoch in the optimization (default: -1)
|
36 |
+
min_lr : float
|
37 |
+
minimum value for the learning rate (default: 1e-6)
|
38 |
+
"""
|
39 |
def __init__(self, optimizer, max_epochs, power=0.9, last_epoch=-1, min_lr=1e-6):
|
40 |
self.power = power
|
41 |
self.max_epochs = max_epochs
|
|
|
47 |
for base_lr in self.base_lrs]
|
48 |
|
49 |
def validation_loop(dataset_loader, model, ce_loss, device):
|
50 |
+
"""
|
51 |
+
---------
|
52 |
+
Arguments
|
53 |
+
---------
|
54 |
+
dataset_loader : object
|
55 |
+
object of type dataloader
|
56 |
+
model : object
|
57 |
+
object of type model
|
58 |
+
ce_loss : object
|
59 |
+
object of type cross entropy loss
|
60 |
+
device : str
|
61 |
+
device on which training needs to be run
|
62 |
+
|
63 |
+
-------
|
64 |
+
Returns
|
65 |
+
-------
|
66 |
+
(valid_loss, valid_acc, valid_IOU) : tuple
|
67 |
+
a tuples of torch floats of mean loss, mean accuracy, mean IoU for the validation set
|
68 |
+
"""
|
69 |
model.eval()
|
70 |
size = len(dataset_loader.dataset)
|
71 |
num_batches = len(dataset_loader)
|
|
|
91 |
return valid_loss, valid_acc, valid_IOU
|
92 |
|
93 |
def train_loop(dataset_loader, model, ce_loss, optimizer, device):
|
94 |
+
"""
|
95 |
+
---------
|
96 |
+
Arguments
|
97 |
+
---------
|
98 |
+
dataset_loader : object
|
99 |
+
object of type dataloader
|
100 |
+
model : object
|
101 |
+
object of type model
|
102 |
+
ce_loss : object
|
103 |
+
object of type cross entropy loss
|
104 |
+
optimizer : object
|
105 |
+
object of type optimizer
|
106 |
+
device : str
|
107 |
+
device on which training needs to be run
|
108 |
+
|
109 |
+
-------
|
110 |
+
Returns
|
111 |
+
-------
|
112 |
+
train_loss : torch float
|
113 |
+
mean loss for the training set
|
114 |
+
"""
|
115 |
model.train()
|
116 |
size = len(dataset_loader.dataset)
|
117 |
num_batches = len(dataset_loader)
|