ossaili commited on
Commit
9b43cf7
1 Parent(s): 080c67f
app.py CHANGED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import PIL
3
+ import cv2
4
+ import torch
5
+ import torchvision
6
+ import torch.nn as nn
7
+ from utils.save_load import load_model
8
+ import gradio as gr
9
+ from PIL import Image
10
+ from torchvision import transforms
11
+ import gradio as gr
12
+ from pytorch_grad_cam import GradCAM, AblationCAM, FullGrad, EigenGradCAM, LayerCAM
13
+ from pytorch_grad_cam.utils.image import show_cam_on_image
14
+ from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
15
+ from pytorch_grad_cam import DeepFeatureFactorization
16
+ from pytorch_grad_cam.utils.image import show_cam_on_image, preprocess_image, deprocess_image
17
+ import numpy as np
18
+ from typing import List
19
+ from matplotlib import pyplot as plt
20
+ from matplotlib.lines import Line2D
21
+
22
+ labels = [
23
+ "Achaemenid architecture",
24
+ "American craftsman style",
25
+ "American Foursquare architecture",
26
+ "Ancient Egyptian architecture",
27
+ "Art Deco architecture",
28
+ "Art Nouveau architecture",
29
+ "Baroque architecture",
30
+ "Bauhaus architecture",
31
+ "Beaux-Arts architecture",
32
+ "Brutalism architecture",
33
+ "Byzantine architecture",
34
+ "Chicago school architecture",
35
+ "Colonial architecture",
36
+ "Deconstructivism",
37
+ "Edwardian architecture",
38
+ "Georgian architecture",
39
+ "Gothic architecture",
40
+ "Greek Revival architecture",
41
+ "International style",
42
+ "Islamic architecture",
43
+ "Novelty architecture",
44
+ "Palladian architecture",
45
+ "Postmodern architecture",
46
+ "Queen Anne architecture",
47
+ "Romanesque architecture",
48
+ "Russian Revival architecture",
49
+ "Tudor Revival architecture"
50
+ ]
51
+
52
+ print(len(labels))
53
+ model = torchvision.models.efficientnet_v2_l()
54
+
55
+ model.classifier = nn.Sequential(
56
+ nn.Dropout(p=0.4, inplace=True),
57
+ nn.Linear(1280, len(labels), bias=True)
58
+ )
59
+
60
+ load_model(model)
61
+
62
+
63
+ target_layers = model.features[-1]
64
+ classifier = model.classifier
65
+ cam = LayerCAM(model=model, target_layers=target_layers, use_cuda=False)
66
+ dff = DeepFeatureFactorization(
67
+ model=model, target_layer=target_layers, computation_on_concepts=classifier)
68
+
69
+
70
+ def show_factorization_on_image(img: np.ndarray,
71
+ explanations: np.ndarray,
72
+ colors: List[np.ndarray] = None,
73
+ image_weight: float = 0.5,
74
+ concept_labels: List = None) -> np.ndarray:
75
+ n_components = explanations.shape[0]
76
+ if colors is None:
77
+ # taken from https://github.com/edocollins/DFF/blob/master/utils.py
78
+ _cmap = plt.cm.get_cmap('gist_rainbow')
79
+ colors = [
80
+ np.array(
81
+ _cmap(i)) for i in np.arange(
82
+ 0,
83
+ 1,
84
+ 1.0 /
85
+ n_components)]
86
+ concept_per_pixel = explanations.argmax(axis=0)
87
+ masks = []
88
+ for i in range(n_components):
89
+ mask = np.zeros(shape=(img.shape[0], img.shape[1], 3))
90
+ mask[:, :, :] = colors[i][:3]
91
+ explanation = explanations[i]
92
+ explanation[concept_per_pixel != i] = 0
93
+ mask = np.uint8(mask * 255)
94
+ mask = cv2.cvtColor(mask, cv2.COLOR_RGB2HSV)
95
+ mask[:, :, 2] = np.uint8(255 * explanation)
96
+ mask = cv2.cvtColor(mask, cv2.COLOR_HSV2RGB)
97
+ mask = np.float32(mask) / 255
98
+ masks.append(mask)
99
+
100
+ mask = np.sum(np.float32(masks), axis=0)
101
+ result = img * image_weight + mask * (1 - image_weight)
102
+ result = np.uint8(result * 255)
103
+
104
+ if concept_labels is not None:
105
+ px = 1 / plt.rcParams['figure.dpi'] # pixel in inches
106
+ fig = plt.figure(figsize=(result.shape[1] * px, result.shape[0] * px))
107
+ plt.rcParams['legend.fontsize'] = 6 * result.shape[0] / 256
108
+ lw = 5 * result.shape[0] / 256
109
+ lines = [Line2D([0], [0], color=colors[i], lw=lw)
110
+ for i in range(n_components)]
111
+ plt.legend(lines,
112
+ concept_labels,
113
+
114
+ fancybox=False,
115
+ shadow=False,
116
+ frameon=False,
117
+ loc="center")
118
+
119
+ plt.tight_layout(pad=0, w_pad=0, h_pad=0)
120
+ plt.axis('off')
121
+ fig.canvas.draw()
122
+ data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
123
+ plt.close(fig=fig)
124
+ data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))
125
+ data = cv2.resize(data, (result.shape[1], result.shape[0]))
126
+ result = np.vstack((result, data))
127
+ return result
128
+
129
+
130
+ def create_labels(concept_scores, top_k=2):
131
+ """ Create a list with the image-net category names of the top scoring categories"""
132
+ concept_categories = np.argsort(concept_scores, axis=1)[:, ::-1][:, :top_k]
133
+ concept_labels_topk = []
134
+ for concept_index in range(concept_categories.shape[0]):
135
+ categories = concept_categories[concept_index, :]
136
+ concept_labels = []
137
+ for category in categories:
138
+ score = concept_scores[concept_index, category]
139
+ label = f"{labels[category].split(',')[0]}:{score*100:.2f}%"
140
+ concept_labels.append(label)
141
+ concept_labels_topk.append("\n".join(concept_labels))
142
+ return concept_labels_topk
143
+
144
+
145
+ def predict(rgb_img, top_k):
146
+ print(top_k)
147
+ inp_01 = transforms.Compose(
148
+ [
149
+ transforms.ToTensor(),
150
+ transforms.Normalize([0.4937, 0.5060, 0.5030], [
151
+ 0.2705, 0.2653, 0.2998]),
152
+ transforms.Resize((224, 224)),
153
+ ])(rgb_img)
154
+
155
+ model.eval()
156
+ with torch.no_grad():
157
+ prediction = torch.nn.functional.softmax(
158
+ model(inp_01.unsqueeze(0))[0], dim=0)
159
+ confidences = {labels[i]: float(prediction[i])
160
+ for i in range(len(labels))}
161
+
162
+ concepts, batch_explanations, concept_outputs = dff(
163
+ inp_01.unsqueeze(0), 5)
164
+
165
+ concept_outputs = torch.softmax(
166
+ torch.from_numpy(concept_outputs), axis=-1).numpy()
167
+ concept_label_strings = create_labels(concept_outputs, top_k=top_k)
168
+
169
+ print(inp_01.shape)
170
+ print(batch_explanations[0].shape)
171
+ res = cv2.resize(np.transpose(
172
+ batch_explanations[0], (1, 2, 0)), (rgb_img.size[0], rgb_img.size[1]))
173
+ res = np.transpose(res, (2, 0, 1))
174
+ print(res.shape)
175
+
176
+ visualization_01 = show_factorization_on_image(np.float32(rgb_img)/255.0,
177
+ res,
178
+ image_weight=0.3,
179
+ concept_labels=concept_label_strings)
180
+
181
+ return confidences, visualization_01,
182
+
183
+
184
+ gr.Interface(fn=predict,
185
+ inputs=[gr.Image(type="pil"), gr.Slider(
186
+ minimum=1, maximum=4, label="Number of top results", step=1)],
187
+ outputs=[gr.Label(num_top_classes=5), "image"],
188
+ examples=[["./assets/bauhaus.jpg", 1],
189
+ ["./assets/frank_gehry.jpg", 2], ["./assets/pyramid.jpg", 3]]
190
+ ).launch()
191
+
192
+
193
+ # examples=["./assets/bauhaus.jpg", "./assets/frank_gehry.jpg", "./assets/pyramid.jpg"]
assets/bauhaus.jpg ADDED
assets/frank_gehry.jpg ADDED
assets/pyramid.jpg ADDED
models/model_weights_27_styles.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58ca956f118139d5e28e3181e80cd5d408f1a090656c9dba0c58dc4e260619c7
3
+ size 471688845
network.txt ADDED
@@ -0,0 +1,1855 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ EfficientNet(
2
+ (features): Sequential(
3
+ (0): Conv2dNormActivation(
4
+ (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
5
+ (1): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
6
+ (2): SiLU(inplace=True)
7
+ )
8
+ (1): Sequential(
9
+ (0): FusedMBConv(
10
+ (block): Sequential(
11
+ (0): Conv2dNormActivation(
12
+ (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
13
+ (1): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
14
+ (2): SiLU(inplace=True)
15
+ )
16
+ )
17
+ (stochastic_depth): StochasticDepth(p=0.0, mode=row)
18
+ )
19
+ (1): FusedMBConv(
20
+ (block): Sequential(
21
+ (0): Conv2dNormActivation(
22
+ (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
23
+ (1): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
24
+ (2): SiLU(inplace=True)
25
+ )
26
+ )
27
+ (stochastic_depth): StochasticDepth(p=0.002531645569620253, mode=row)
28
+ )
29
+ (2): FusedMBConv(
30
+ (block): Sequential(
31
+ (0): Conv2dNormActivation(
32
+ (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
33
+ (1): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
34
+ (2): SiLU(inplace=True)
35
+ )
36
+ )
37
+ (stochastic_depth): StochasticDepth(p=0.005063291139240506, mode=row)
38
+ )
39
+ (3): FusedMBConv(
40
+ (block): Sequential(
41
+ (0): Conv2dNormActivation(
42
+ (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
43
+ (1): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
44
+ (2): SiLU(inplace=True)
45
+ )
46
+ )
47
+ (stochastic_depth): StochasticDepth(p=0.007594936708860761, mode=row)
48
+ )
49
+ )
50
+ (2): Sequential(
51
+ (0): FusedMBConv(
52
+ (block): Sequential(
53
+ (0): Conv2dNormActivation(
54
+ (0): Conv2d(32, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
55
+ (1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
56
+ (2): SiLU(inplace=True)
57
+ )
58
+ (1): Conv2dNormActivation(
59
+ (0): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
60
+ (1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
61
+ )
62
+ )
63
+ (stochastic_depth): StochasticDepth(p=0.010126582278481013, mode=row)
64
+ )
65
+ (1): FusedMBConv(
66
+ (block): Sequential(
67
+ (0): Conv2dNormActivation(
68
+ (0): Conv2d(64, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
69
+ (1): BatchNorm2d(256, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
70
+ (2): SiLU(inplace=True)
71
+ )
72
+ (1): Conv2dNormActivation(
73
+ (0): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
74
+ (1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
75
+ )
76
+ )
77
+ (stochastic_depth): StochasticDepth(p=0.012658227848101266, mode=row)
78
+ )
79
+ (2): FusedMBConv(
80
+ (block): Sequential(
81
+ (0): Conv2dNormActivation(
82
+ (0): Conv2d(64, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
83
+ (1): BatchNorm2d(256, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
84
+ (2): SiLU(inplace=True)
85
+ )
86
+ (1): Conv2dNormActivation(
87
+ (0): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
88
+ (1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
89
+ )
90
+ )
91
+ (stochastic_depth): StochasticDepth(p=0.015189873417721522, mode=row)
92
+ )
93
+ (3): FusedMBConv(
94
+ (block): Sequential(
95
+ (0): Conv2dNormActivation(
96
+ (0): Conv2d(64, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
97
+ (1): BatchNorm2d(256, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
98
+ (2): SiLU(inplace=True)
99
+ )
100
+ (1): Conv2dNormActivation(
101
+ (0): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
102
+ (1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
103
+ )
104
+ )
105
+ (stochastic_depth): StochasticDepth(p=0.017721518987341773, mode=row)
106
+ )
107
+ (4): FusedMBConv(
108
+ (block): Sequential(
109
+ (0): Conv2dNormActivation(
110
+ (0): Conv2d(64, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
111
+ (1): BatchNorm2d(256, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
112
+ (2): SiLU(inplace=True)
113
+ )
114
+ (1): Conv2dNormActivation(
115
+ (0): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
116
+ (1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
117
+ )
118
+ )
119
+ (stochastic_depth): StochasticDepth(p=0.020253164556962026, mode=row)
120
+ )
121
+ (5): FusedMBConv(
122
+ (block): Sequential(
123
+ (0): Conv2dNormActivation(
124
+ (0): Conv2d(64, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
125
+ (1): BatchNorm2d(256, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
126
+ (2): SiLU(inplace=True)
127
+ )
128
+ (1): Conv2dNormActivation(
129
+ (0): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
130
+ (1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
131
+ )
132
+ )
133
+ (stochastic_depth): StochasticDepth(p=0.02278481012658228, mode=row)
134
+ )
135
+ (6): FusedMBConv(
136
+ (block): Sequential(
137
+ (0): Conv2dNormActivation(
138
+ (0): Conv2d(64, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
139
+ (1): BatchNorm2d(256, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
140
+ (2): SiLU(inplace=True)
141
+ )
142
+ (1): Conv2dNormActivation(
143
+ (0): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
144
+ (1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
145
+ )
146
+ )
147
+ (stochastic_depth): StochasticDepth(p=0.02531645569620253, mode=row)
148
+ )
149
+ )
150
+ (3): Sequential(
151
+ (0): FusedMBConv(
152
+ (block): Sequential(
153
+ (0): Conv2dNormActivation(
154
+ (0): Conv2d(64, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
155
+ (1): BatchNorm2d(256, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
156
+ (2): SiLU(inplace=True)
157
+ )
158
+ (1): Conv2dNormActivation(
159
+ (0): Conv2d(256, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
160
+ (1): BatchNorm2d(96, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
161
+ )
162
+ )
163
+ (stochastic_depth): StochasticDepth(p=0.027848101265822787, mode=row)
164
+ )
165
+ (1): FusedMBConv(
166
+ (block): Sequential(
167
+ (0): Conv2dNormActivation(
168
+ (0): Conv2d(96, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
169
+ (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
170
+ (2): SiLU(inplace=True)
171
+ )
172
+ (1): Conv2dNormActivation(
173
+ (0): Conv2d(384, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
174
+ (1): BatchNorm2d(96, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
175
+ )
176
+ )
177
+ (stochastic_depth): StochasticDepth(p=0.030379746835443044, mode=row)
178
+ )
179
+ (2): FusedMBConv(
180
+ (block): Sequential(
181
+ (0): Conv2dNormActivation(
182
+ (0): Conv2d(96, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
183
+ (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
184
+ (2): SiLU(inplace=True)
185
+ )
186
+ (1): Conv2dNormActivation(
187
+ (0): Conv2d(384, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
188
+ (1): BatchNorm2d(96, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
189
+ )
190
+ )
191
+ (stochastic_depth): StochasticDepth(p=0.03291139240506329, mode=row)
192
+ )
193
+ (3): FusedMBConv(
194
+ (block): Sequential(
195
+ (0): Conv2dNormActivation(
196
+ (0): Conv2d(96, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
197
+ (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
198
+ (2): SiLU(inplace=True)
199
+ )
200
+ (1): Conv2dNormActivation(
201
+ (0): Conv2d(384, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
202
+ (1): BatchNorm2d(96, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
203
+ )
204
+ )
205
+ (stochastic_depth): StochasticDepth(p=0.035443037974683546, mode=row)
206
+ )
207
+ (4): FusedMBConv(
208
+ (block): Sequential(
209
+ (0): Conv2dNormActivation(
210
+ (0): Conv2d(96, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
211
+ (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
212
+ (2): SiLU(inplace=True)
213
+ )
214
+ (1): Conv2dNormActivation(
215
+ (0): Conv2d(384, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
216
+ (1): BatchNorm2d(96, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
217
+ )
218
+ )
219
+ (stochastic_depth): StochasticDepth(p=0.0379746835443038, mode=row)
220
+ )
221
+ (5): FusedMBConv(
222
+ (block): Sequential(
223
+ (0): Conv2dNormActivation(
224
+ (0): Conv2d(96, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
225
+ (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
226
+ (2): SiLU(inplace=True)
227
+ )
228
+ (1): Conv2dNormActivation(
229
+ (0): Conv2d(384, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
230
+ (1): BatchNorm2d(96, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
231
+ )
232
+ )
233
+ (stochastic_depth): StochasticDepth(p=0.04050632911392405, mode=row)
234
+ )
235
+ (6): FusedMBConv(
236
+ (block): Sequential(
237
+ (0): Conv2dNormActivation(
238
+ (0): Conv2d(96, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
239
+ (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
240
+ (2): SiLU(inplace=True)
241
+ )
242
+ (1): Conv2dNormActivation(
243
+ (0): Conv2d(384, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
244
+ (1): BatchNorm2d(96, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
245
+ )
246
+ )
247
+ (stochastic_depth): StochasticDepth(p=0.04303797468354431, mode=row)
248
+ )
249
+ )
250
+ (4): Sequential(
251
+ (0): MBConv(
252
+ (block): Sequential(
253
+ (0): Conv2dNormActivation(
254
+ (0): Conv2d(96, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
255
+ (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
256
+ (2): SiLU(inplace=True)
257
+ )
258
+ (1): Conv2dNormActivation(
259
+ (0): Conv2d(384, 384, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=384, bias=False)
260
+ (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
261
+ (2): SiLU(inplace=True)
262
+ )
263
+ (2): SqueezeExcitation(
264
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
265
+ (fc1): Conv2d(384, 24, kernel_size=(1, 1), stride=(1, 1))
266
+ (fc2): Conv2d(24, 384, kernel_size=(1, 1), stride=(1, 1))
267
+ (activation): SiLU(inplace=True)
268
+ (scale_activation): Sigmoid()
269
+ )
270
+ (3): Conv2dNormActivation(
271
+ (0): Conv2d(384, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
272
+ (1): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
273
+ )
274
+ )
275
+ (stochastic_depth): StochasticDepth(p=0.04556962025316456, mode=row)
276
+ )
277
+ (1): MBConv(
278
+ (block): Sequential(
279
+ (0): Conv2dNormActivation(
280
+ (0): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
281
+ (1): BatchNorm2d(768, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
282
+ (2): SiLU(inplace=True)
283
+ )
284
+ (1): Conv2dNormActivation(
285
+ (0): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
286
+ (1): BatchNorm2d(768, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
287
+ (2): SiLU(inplace=True)
288
+ )
289
+ (2): SqueezeExcitation(
290
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
291
+ (fc1): Conv2d(768, 48, kernel_size=(1, 1), stride=(1, 1))
292
+ (fc2): Conv2d(48, 768, kernel_size=(1, 1), stride=(1, 1))
293
+ (activation): SiLU(inplace=True)
294
+ (scale_activation): Sigmoid()
295
+ )
296
+ (3): Conv2dNormActivation(
297
+ (0): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
298
+ (1): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
299
+ )
300
+ )
301
+ (stochastic_depth): StochasticDepth(p=0.04810126582278482, mode=row)
302
+ )
303
+ (2): MBConv(
304
+ (block): Sequential(
305
+ (0): Conv2dNormActivation(
306
+ (0): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
307
+ (1): BatchNorm2d(768, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
308
+ (2): SiLU(inplace=True)
309
+ )
310
+ (1): Conv2dNormActivation(
311
+ (0): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
312
+ (1): BatchNorm2d(768, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
313
+ (2): SiLU(inplace=True)
314
+ )
315
+ (2): SqueezeExcitation(
316
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
317
+ (fc1): Conv2d(768, 48, kernel_size=(1, 1), stride=(1, 1))
318
+ (fc2): Conv2d(48, 768, kernel_size=(1, 1), stride=(1, 1))
319
+ (activation): SiLU(inplace=True)
320
+ (scale_activation): Sigmoid()
321
+ )
322
+ (3): Conv2dNormActivation(
323
+ (0): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
324
+ (1): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
325
+ )
326
+ )
327
+ (stochastic_depth): StochasticDepth(p=0.05063291139240506, mode=row)
328
+ )
329
+ (3): MBConv(
330
+ (block): Sequential(
331
+ (0): Conv2dNormActivation(
332
+ (0): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
333
+ (1): BatchNorm2d(768, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
334
+ (2): SiLU(inplace=True)
335
+ )
336
+ (1): Conv2dNormActivation(
337
+ (0): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
338
+ (1): BatchNorm2d(768, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
339
+ (2): SiLU(inplace=True)
340
+ )
341
+ (2): SqueezeExcitation(
342
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
343
+ (fc1): Conv2d(768, 48, kernel_size=(1, 1), stride=(1, 1))
344
+ (fc2): Conv2d(48, 768, kernel_size=(1, 1), stride=(1, 1))
345
+ (activation): SiLU(inplace=True)
346
+ (scale_activation): Sigmoid()
347
+ )
348
+ (3): Conv2dNormActivation(
349
+ (0): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
350
+ (1): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
351
+ )
352
+ )
353
+ (stochastic_depth): StochasticDepth(p=0.053164556962025315, mode=row)
354
+ )
355
+ (4): MBConv(
356
+ (block): Sequential(
357
+ (0): Conv2dNormActivation(
358
+ (0): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
359
+ (1): BatchNorm2d(768, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
360
+ (2): SiLU(inplace=True)
361
+ )
362
+ (1): Conv2dNormActivation(
363
+ (0): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
364
+ (1): BatchNorm2d(768, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
365
+ (2): SiLU(inplace=True)
366
+ )
367
+ (2): SqueezeExcitation(
368
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
369
+ (fc1): Conv2d(768, 48, kernel_size=(1, 1), stride=(1, 1))
370
+ (fc2): Conv2d(48, 768, kernel_size=(1, 1), stride=(1, 1))
371
+ (activation): SiLU(inplace=True)
372
+ (scale_activation): Sigmoid()
373
+ )
374
+ (3): Conv2dNormActivation(
375
+ (0): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
376
+ (1): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
377
+ )
378
+ )
379
+ (stochastic_depth): StochasticDepth(p=0.055696202531645575, mode=row)
380
+ )
381
+ (5): MBConv(
382
+ (block): Sequential(
383
+ (0): Conv2dNormActivation(
384
+ (0): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
385
+ (1): BatchNorm2d(768, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
386
+ (2): SiLU(inplace=True)
387
+ )
388
+ (1): Conv2dNormActivation(
389
+ (0): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
390
+ (1): BatchNorm2d(768, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
391
+ (2): SiLU(inplace=True)
392
+ )
393
+ (2): SqueezeExcitation(
394
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
395
+ (fc1): Conv2d(768, 48, kernel_size=(1, 1), stride=(1, 1))
396
+ (fc2): Conv2d(48, 768, kernel_size=(1, 1), stride=(1, 1))
397
+ (activation): SiLU(inplace=True)
398
+ (scale_activation): Sigmoid()
399
+ )
400
+ (3): Conv2dNormActivation(
401
+ (0): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
402
+ (1): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
403
+ )
404
+ )
405
+ (stochastic_depth): StochasticDepth(p=0.05822784810126583, mode=row)
406
+ )
407
+ (6): MBConv(
408
+ (block): Sequential(
409
+ (0): Conv2dNormActivation(
410
+ (0): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
411
+ (1): BatchNorm2d(768, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
412
+ (2): SiLU(inplace=True)
413
+ )
414
+ (1): Conv2dNormActivation(
415
+ (0): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
416
+ (1): BatchNorm2d(768, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
417
+ (2): SiLU(inplace=True)
418
+ )
419
+ (2): SqueezeExcitation(
420
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
421
+ (fc1): Conv2d(768, 48, kernel_size=(1, 1), stride=(1, 1))
422
+ (fc2): Conv2d(48, 768, kernel_size=(1, 1), stride=(1, 1))
423
+ (activation): SiLU(inplace=True)
424
+ (scale_activation): Sigmoid()
425
+ )
426
+ (3): Conv2dNormActivation(
427
+ (0): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
428
+ (1): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
429
+ )
430
+ )
431
+ (stochastic_depth): StochasticDepth(p=0.06075949367088609, mode=row)
432
+ )
433
+ (7): MBConv(
434
+ (block): Sequential(
435
+ (0): Conv2dNormActivation(
436
+ (0): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
437
+ (1): BatchNorm2d(768, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
438
+ (2): SiLU(inplace=True)
439
+ )
440
+ (1): Conv2dNormActivation(
441
+ (0): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
442
+ (1): BatchNorm2d(768, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
443
+ (2): SiLU(inplace=True)
444
+ )
445
+ (2): SqueezeExcitation(
446
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
447
+ (fc1): Conv2d(768, 48, kernel_size=(1, 1), stride=(1, 1))
448
+ (fc2): Conv2d(48, 768, kernel_size=(1, 1), stride=(1, 1))
449
+ (activation): SiLU(inplace=True)
450
+ (scale_activation): Sigmoid()
451
+ )
452
+ (3): Conv2dNormActivation(
453
+ (0): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
454
+ (1): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
455
+ )
456
+ )
457
+ (stochastic_depth): StochasticDepth(p=0.06329113924050633, mode=row)
458
+ )
459
+ (8): MBConv(
460
+ (block): Sequential(
461
+ (0): Conv2dNormActivation(
462
+ (0): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
463
+ (1): BatchNorm2d(768, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
464
+ (2): SiLU(inplace=True)
465
+ )
466
+ (1): Conv2dNormActivation(
467
+ (0): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
468
+ (1): BatchNorm2d(768, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
469
+ (2): SiLU(inplace=True)
470
+ )
471
+ (2): SqueezeExcitation(
472
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
473
+ (fc1): Conv2d(768, 48, kernel_size=(1, 1), stride=(1, 1))
474
+ (fc2): Conv2d(48, 768, kernel_size=(1, 1), stride=(1, 1))
475
+ (activation): SiLU(inplace=True)
476
+ (scale_activation): Sigmoid()
477
+ )
478
+ (3): Conv2dNormActivation(
479
+ (0): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
480
+ (1): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
481
+ )
482
+ )
483
+ (stochastic_depth): StochasticDepth(p=0.06582278481012659, mode=row)
484
+ )
485
+ (9): MBConv(
486
+ (block): Sequential(
487
+ (0): Conv2dNormActivation(
488
+ (0): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
489
+ (1): BatchNorm2d(768, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
490
+ (2): SiLU(inplace=True)
491
+ )
492
+ (1): Conv2dNormActivation(
493
+ (0): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
494
+ (1): BatchNorm2d(768, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
495
+ (2): SiLU(inplace=True)
496
+ )
497
+ (2): SqueezeExcitation(
498
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
499
+ (fc1): Conv2d(768, 48, kernel_size=(1, 1), stride=(1, 1))
500
+ (fc2): Conv2d(48, 768, kernel_size=(1, 1), stride=(1, 1))
501
+ (activation): SiLU(inplace=True)
502
+ (scale_activation): Sigmoid()
503
+ )
504
+ (3): Conv2dNormActivation(
505
+ (0): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
506
+ (1): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
507
+ )
508
+ )
509
+ (stochastic_depth): StochasticDepth(p=0.06835443037974684, mode=row)
510
+ )
511
+ )
512
+ (5): Sequential(
513
+ (0): MBConv(
514
+ (block): Sequential(
515
+ (0): Conv2dNormActivation(
516
+ (0): Conv2d(192, 1152, kernel_size=(1, 1), stride=(1, 1), bias=False)
517
+ (1): BatchNorm2d(1152, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
518
+ (2): SiLU(inplace=True)
519
+ )
520
+ (1): Conv2dNormActivation(
521
+ (0): Conv2d(1152, 1152, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1152, bias=False)
522
+ (1): BatchNorm2d(1152, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
523
+ (2): SiLU(inplace=True)
524
+ )
525
+ (2): SqueezeExcitation(
526
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
527
+ (fc1): Conv2d(1152, 48, kernel_size=(1, 1), stride=(1, 1))
528
+ (fc2): Conv2d(48, 1152, kernel_size=(1, 1), stride=(1, 1))
529
+ (activation): SiLU(inplace=True)
530
+ (scale_activation): Sigmoid()
531
+ )
532
+ (3): Conv2dNormActivation(
533
+ (0): Conv2d(1152, 224, kernel_size=(1, 1), stride=(1, 1), bias=False)
534
+ (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
535
+ )
536
+ )
537
+ (stochastic_depth): StochasticDepth(p=0.07088607594936709, mode=row)
538
+ )
539
+ (1): MBConv(
540
+ (block): Sequential(
541
+ (0): Conv2dNormActivation(
542
+ (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False)
543
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
544
+ (2): SiLU(inplace=True)
545
+ )
546
+ (1): Conv2dNormActivation(
547
+ (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1344, bias=False)
548
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
549
+ (2): SiLU(inplace=True)
550
+ )
551
+ (2): SqueezeExcitation(
552
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
553
+ (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1))
554
+ (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1))
555
+ (activation): SiLU(inplace=True)
556
+ (scale_activation): Sigmoid()
557
+ )
558
+ (3): Conv2dNormActivation(
559
+ (0): Conv2d(1344, 224, kernel_size=(1, 1), stride=(1, 1), bias=False)
560
+ (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
561
+ )
562
+ )
563
+ (stochastic_depth): StochasticDepth(p=0.07341772151898734, mode=row)
564
+ )
565
+ (2): MBConv(
566
+ (block): Sequential(
567
+ (0): Conv2dNormActivation(
568
+ (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False)
569
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
570
+ (2): SiLU(inplace=True)
571
+ )
572
+ (1): Conv2dNormActivation(
573
+ (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1344, bias=False)
574
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
575
+ (2): SiLU(inplace=True)
576
+ )
577
+ (2): SqueezeExcitation(
578
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
579
+ (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1))
580
+ (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1))
581
+ (activation): SiLU(inplace=True)
582
+ (scale_activation): Sigmoid()
583
+ )
584
+ (3): Conv2dNormActivation(
585
+ (0): Conv2d(1344, 224, kernel_size=(1, 1), stride=(1, 1), bias=False)
586
+ (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
587
+ )
588
+ )
589
+ (stochastic_depth): StochasticDepth(p=0.0759493670886076, mode=row)
590
+ )
591
+ (3): MBConv(
592
+ (block): Sequential(
593
+ (0): Conv2dNormActivation(
594
+ (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False)
595
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
596
+ (2): SiLU(inplace=True)
597
+ )
598
+ (1): Conv2dNormActivation(
599
+ (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1344, bias=False)
600
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
601
+ (2): SiLU(inplace=True)
602
+ )
603
+ (2): SqueezeExcitation(
604
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
605
+ (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1))
606
+ (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1))
607
+ (activation): SiLU(inplace=True)
608
+ (scale_activation): Sigmoid()
609
+ )
610
+ (3): Conv2dNormActivation(
611
+ (0): Conv2d(1344, 224, kernel_size=(1, 1), stride=(1, 1), bias=False)
612
+ (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
613
+ )
614
+ )
615
+ (stochastic_depth): StochasticDepth(p=0.07848101265822785, mode=row)
616
+ )
617
+ (4): MBConv(
618
+ (block): Sequential(
619
+ (0): Conv2dNormActivation(
620
+ (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False)
621
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
622
+ (2): SiLU(inplace=True)
623
+ )
624
+ (1): Conv2dNormActivation(
625
+ (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1344, bias=False)
626
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
627
+ (2): SiLU(inplace=True)
628
+ )
629
+ (2): SqueezeExcitation(
630
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
631
+ (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1))
632
+ (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1))
633
+ (activation): SiLU(inplace=True)
634
+ (scale_activation): Sigmoid()
635
+ )
636
+ (3): Conv2dNormActivation(
637
+ (0): Conv2d(1344, 224, kernel_size=(1, 1), stride=(1, 1), bias=False)
638
+ (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
639
+ )
640
+ )
641
+ (stochastic_depth): StochasticDepth(p=0.0810126582278481, mode=row)
642
+ )
643
+ (5): MBConv(
644
+ (block): Sequential(
645
+ (0): Conv2dNormActivation(
646
+ (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False)
647
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
648
+ (2): SiLU(inplace=True)
649
+ )
650
+ (1): Conv2dNormActivation(
651
+ (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1344, bias=False)
652
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
653
+ (2): SiLU(inplace=True)
654
+ )
655
+ (2): SqueezeExcitation(
656
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
657
+ (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1))
658
+ (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1))
659
+ (activation): SiLU(inplace=True)
660
+ (scale_activation): Sigmoid()
661
+ )
662
+ (3): Conv2dNormActivation(
663
+ (0): Conv2d(1344, 224, kernel_size=(1, 1), stride=(1, 1), bias=False)
664
+ (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
665
+ )
666
+ )
667
+ (stochastic_depth): StochasticDepth(p=0.08354430379746836, mode=row)
668
+ )
669
+ (6): MBConv(
670
+ (block): Sequential(
671
+ (0): Conv2dNormActivation(
672
+ (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False)
673
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
674
+ (2): SiLU(inplace=True)
675
+ )
676
+ (1): Conv2dNormActivation(
677
+ (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1344, bias=False)
678
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
679
+ (2): SiLU(inplace=True)
680
+ )
681
+ (2): SqueezeExcitation(
682
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
683
+ (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1))
684
+ (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1))
685
+ (activation): SiLU(inplace=True)
686
+ (scale_activation): Sigmoid()
687
+ )
688
+ (3): Conv2dNormActivation(
689
+ (0): Conv2d(1344, 224, kernel_size=(1, 1), stride=(1, 1), bias=False)
690
+ (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
691
+ )
692
+ )
693
+ (stochastic_depth): StochasticDepth(p=0.08607594936708862, mode=row)
694
+ )
695
+ (7): MBConv(
696
+ (block): Sequential(
697
+ (0): Conv2dNormActivation(
698
+ (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False)
699
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
700
+ (2): SiLU(inplace=True)
701
+ )
702
+ (1): Conv2dNormActivation(
703
+ (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1344, bias=False)
704
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
705
+ (2): SiLU(inplace=True)
706
+ )
707
+ (2): SqueezeExcitation(
708
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
709
+ (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1))
710
+ (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1))
711
+ (activation): SiLU(inplace=True)
712
+ (scale_activation): Sigmoid()
713
+ )
714
+ (3): Conv2dNormActivation(
715
+ (0): Conv2d(1344, 224, kernel_size=(1, 1), stride=(1, 1), bias=False)
716
+ (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
717
+ )
718
+ )
719
+ (stochastic_depth): StochasticDepth(p=0.08860759493670886, mode=row)
720
+ )
721
+ (8): MBConv(
722
+ (block): Sequential(
723
+ (0): Conv2dNormActivation(
724
+ (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False)
725
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
726
+ (2): SiLU(inplace=True)
727
+ )
728
+ (1): Conv2dNormActivation(
729
+ (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1344, bias=False)
730
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
731
+ (2): SiLU(inplace=True)
732
+ )
733
+ (2): SqueezeExcitation(
734
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
735
+ (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1))
736
+ (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1))
737
+ (activation): SiLU(inplace=True)
738
+ (scale_activation): Sigmoid()
739
+ )
740
+ (3): Conv2dNormActivation(
741
+ (0): Conv2d(1344, 224, kernel_size=(1, 1), stride=(1, 1), bias=False)
742
+ (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
743
+ )
744
+ )
745
+ (stochastic_depth): StochasticDepth(p=0.09113924050632911, mode=row)
746
+ )
747
+ (9): MBConv(
748
+ (block): Sequential(
749
+ (0): Conv2dNormActivation(
750
+ (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False)
751
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
752
+ (2): SiLU(inplace=True)
753
+ )
754
+ (1): Conv2dNormActivation(
755
+ (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1344, bias=False)
756
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
757
+ (2): SiLU(inplace=True)
758
+ )
759
+ (2): SqueezeExcitation(
760
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
761
+ (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1))
762
+ (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1))
763
+ (activation): SiLU(inplace=True)
764
+ (scale_activation): Sigmoid()
765
+ )
766
+ (3): Conv2dNormActivation(
767
+ (0): Conv2d(1344, 224, kernel_size=(1, 1), stride=(1, 1), bias=False)
768
+ (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
769
+ )
770
+ )
771
+ (stochastic_depth): StochasticDepth(p=0.09367088607594937, mode=row)
772
+ )
773
+ (10): MBConv(
774
+ (block): Sequential(
775
+ (0): Conv2dNormActivation(
776
+ (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False)
777
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
778
+ (2): SiLU(inplace=True)
779
+ )
780
+ (1): Conv2dNormActivation(
781
+ (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1344, bias=False)
782
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
783
+ (2): SiLU(inplace=True)
784
+ )
785
+ (2): SqueezeExcitation(
786
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
787
+ (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1))
788
+ (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1))
789
+ (activation): SiLU(inplace=True)
790
+ (scale_activation): Sigmoid()
791
+ )
792
+ (3): Conv2dNormActivation(
793
+ (0): Conv2d(1344, 224, kernel_size=(1, 1), stride=(1, 1), bias=False)
794
+ (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
795
+ )
796
+ )
797
+ (stochastic_depth): StochasticDepth(p=0.09620253164556963, mode=row)
798
+ )
799
+ (11): MBConv(
800
+ (block): Sequential(
801
+ (0): Conv2dNormActivation(
802
+ (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False)
803
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
804
+ (2): SiLU(inplace=True)
805
+ )
806
+ (1): Conv2dNormActivation(
807
+ (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1344, bias=False)
808
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
809
+ (2): SiLU(inplace=True)
810
+ )
811
+ (2): SqueezeExcitation(
812
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
813
+ (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1))
814
+ (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1))
815
+ (activation): SiLU(inplace=True)
816
+ (scale_activation): Sigmoid()
817
+ )
818
+ (3): Conv2dNormActivation(
819
+ (0): Conv2d(1344, 224, kernel_size=(1, 1), stride=(1, 1), bias=False)
820
+ (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
821
+ )
822
+ )
823
+ (stochastic_depth): StochasticDepth(p=0.09873417721518989, mode=row)
824
+ )
825
+ (12): MBConv(
826
+ (block): Sequential(
827
+ (0): Conv2dNormActivation(
828
+ (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False)
829
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
830
+ (2): SiLU(inplace=True)
831
+ )
832
+ (1): Conv2dNormActivation(
833
+ (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1344, bias=False)
834
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
835
+ (2): SiLU(inplace=True)
836
+ )
837
+ (2): SqueezeExcitation(
838
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
839
+ (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1))
840
+ (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1))
841
+ (activation): SiLU(inplace=True)
842
+ (scale_activation): Sigmoid()
843
+ )
844
+ (3): Conv2dNormActivation(
845
+ (0): Conv2d(1344, 224, kernel_size=(1, 1), stride=(1, 1), bias=False)
846
+ (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
847
+ )
848
+ )
849
+ (stochastic_depth): StochasticDepth(p=0.10126582278481013, mode=row)
850
+ )
851
+ (13): MBConv(
852
+ (block): Sequential(
853
+ (0): Conv2dNormActivation(
854
+ (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False)
855
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
856
+ (2): SiLU(inplace=True)
857
+ )
858
+ (1): Conv2dNormActivation(
859
+ (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1344, bias=False)
860
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
861
+ (2): SiLU(inplace=True)
862
+ )
863
+ (2): SqueezeExcitation(
864
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
865
+ (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1))
866
+ (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1))
867
+ (activation): SiLU(inplace=True)
868
+ (scale_activation): Sigmoid()
869
+ )
870
+ (3): Conv2dNormActivation(
871
+ (0): Conv2d(1344, 224, kernel_size=(1, 1), stride=(1, 1), bias=False)
872
+ (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
873
+ )
874
+ )
875
+ (stochastic_depth): StochasticDepth(p=0.10379746835443039, mode=row)
876
+ )
877
+ (14): MBConv(
878
+ (block): Sequential(
879
+ (0): Conv2dNormActivation(
880
+ (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False)
881
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
882
+ (2): SiLU(inplace=True)
883
+ )
884
+ (1): Conv2dNormActivation(
885
+ (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1344, bias=False)
886
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
887
+ (2): SiLU(inplace=True)
888
+ )
889
+ (2): SqueezeExcitation(
890
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
891
+ (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1))
892
+ (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1))
893
+ (activation): SiLU(inplace=True)
894
+ (scale_activation): Sigmoid()
895
+ )
896
+ (3): Conv2dNormActivation(
897
+ (0): Conv2d(1344, 224, kernel_size=(1, 1), stride=(1, 1), bias=False)
898
+ (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
899
+ )
900
+ )
901
+ (stochastic_depth): StochasticDepth(p=0.10632911392405063, mode=row)
902
+ )
903
+ (15): MBConv(
904
+ (block): Sequential(
905
+ (0): Conv2dNormActivation(
906
+ (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False)
907
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
908
+ (2): SiLU(inplace=True)
909
+ )
910
+ (1): Conv2dNormActivation(
911
+ (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1344, bias=False)
912
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
913
+ (2): SiLU(inplace=True)
914
+ )
915
+ (2): SqueezeExcitation(
916
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
917
+ (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1))
918
+ (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1))
919
+ (activation): SiLU(inplace=True)
920
+ (scale_activation): Sigmoid()
921
+ )
922
+ (3): Conv2dNormActivation(
923
+ (0): Conv2d(1344, 224, kernel_size=(1, 1), stride=(1, 1), bias=False)
924
+ (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
925
+ )
926
+ )
927
+ (stochastic_depth): StochasticDepth(p=0.10886075949367088, mode=row)
928
+ )
929
+ (16): MBConv(
930
+ (block): Sequential(
931
+ (0): Conv2dNormActivation(
932
+ (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False)
933
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
934
+ (2): SiLU(inplace=True)
935
+ )
936
+ (1): Conv2dNormActivation(
937
+ (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1344, bias=False)
938
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
939
+ (2): SiLU(inplace=True)
940
+ )
941
+ (2): SqueezeExcitation(
942
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
943
+ (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1))
944
+ (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1))
945
+ (activation): SiLU(inplace=True)
946
+ (scale_activation): Sigmoid()
947
+ )
948
+ (3): Conv2dNormActivation(
949
+ (0): Conv2d(1344, 224, kernel_size=(1, 1), stride=(1, 1), bias=False)
950
+ (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
951
+ )
952
+ )
953
+ (stochastic_depth): StochasticDepth(p=0.11139240506329115, mode=row)
954
+ )
955
+ (17): MBConv(
956
+ (block): Sequential(
957
+ (0): Conv2dNormActivation(
958
+ (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False)
959
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
960
+ (2): SiLU(inplace=True)
961
+ )
962
+ (1): Conv2dNormActivation(
963
+ (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1344, bias=False)
964
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
965
+ (2): SiLU(inplace=True)
966
+ )
967
+ (2): SqueezeExcitation(
968
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
969
+ (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1))
970
+ (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1))
971
+ (activation): SiLU(inplace=True)
972
+ (scale_activation): Sigmoid()
973
+ )
974
+ (3): Conv2dNormActivation(
975
+ (0): Conv2d(1344, 224, kernel_size=(1, 1), stride=(1, 1), bias=False)
976
+ (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
977
+ )
978
+ )
979
+ (stochastic_depth): StochasticDepth(p=0.11392405063291139, mode=row)
980
+ )
981
+ (18): MBConv(
982
+ (block): Sequential(
983
+ (0): Conv2dNormActivation(
984
+ (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False)
985
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
986
+ (2): SiLU(inplace=True)
987
+ )
988
+ (1): Conv2dNormActivation(
989
+ (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1344, bias=False)
990
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
991
+ (2): SiLU(inplace=True)
992
+ )
993
+ (2): SqueezeExcitation(
994
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
995
+ (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1))
996
+ (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1))
997
+ (activation): SiLU(inplace=True)
998
+ (scale_activation): Sigmoid()
999
+ )
1000
+ (3): Conv2dNormActivation(
1001
+ (0): Conv2d(1344, 224, kernel_size=(1, 1), stride=(1, 1), bias=False)
1002
+ (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1003
+ )
1004
+ )
1005
+ (stochastic_depth): StochasticDepth(p=0.11645569620253166, mode=row)
1006
+ )
1007
+ )
1008
+ (6): Sequential(
1009
+ (0): MBConv(
1010
+ (block): Sequential(
1011
+ (0): Conv2dNormActivation(
1012
+ (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False)
1013
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1014
+ (2): SiLU(inplace=True)
1015
+ )
1016
+ (1): Conv2dNormActivation(
1017
+ (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=1344, bias=False)
1018
+ (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1019
+ (2): SiLU(inplace=True)
1020
+ )
1021
+ (2): SqueezeExcitation(
1022
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
1023
+ (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1))
1024
+ (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1))
1025
+ (activation): SiLU(inplace=True)
1026
+ (scale_activation): Sigmoid()
1027
+ )
1028
+ (3): Conv2dNormActivation(
1029
+ (0): Conv2d(1344, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1030
+ (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1031
+ )
1032
+ )
1033
+ (stochastic_depth): StochasticDepth(p=0.11898734177215191, mode=row)
1034
+ )
1035
+ (1): MBConv(
1036
+ (block): Sequential(
1037
+ (0): Conv2dNormActivation(
1038
+ (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False)
1039
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1040
+ (2): SiLU(inplace=True)
1041
+ )
1042
+ (1): Conv2dNormActivation(
1043
+ (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False)
1044
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1045
+ (2): SiLU(inplace=True)
1046
+ )
1047
+ (2): SqueezeExcitation(
1048
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
1049
+ (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1))
1050
+ (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1))
1051
+ (activation): SiLU(inplace=True)
1052
+ (scale_activation): Sigmoid()
1053
+ )
1054
+ (3): Conv2dNormActivation(
1055
+ (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1056
+ (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1057
+ )
1058
+ )
1059
+ (stochastic_depth): StochasticDepth(p=0.12151898734177217, mode=row)
1060
+ )
1061
+ (2): MBConv(
1062
+ (block): Sequential(
1063
+ (0): Conv2dNormActivation(
1064
+ (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False)
1065
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1066
+ (2): SiLU(inplace=True)
1067
+ )
1068
+ (1): Conv2dNormActivation(
1069
+ (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False)
1070
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1071
+ (2): SiLU(inplace=True)
1072
+ )
1073
+ (2): SqueezeExcitation(
1074
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
1075
+ (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1))
1076
+ (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1))
1077
+ (activation): SiLU(inplace=True)
1078
+ (scale_activation): Sigmoid()
1079
+ )
1080
+ (3): Conv2dNormActivation(
1081
+ (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1082
+ (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1083
+ )
1084
+ )
1085
+ (stochastic_depth): StochasticDepth(p=0.12405063291139241, mode=row)
1086
+ )
1087
+ (3): MBConv(
1088
+ (block): Sequential(
1089
+ (0): Conv2dNormActivation(
1090
+ (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False)
1091
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1092
+ (2): SiLU(inplace=True)
1093
+ )
1094
+ (1): Conv2dNormActivation(
1095
+ (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False)
1096
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1097
+ (2): SiLU(inplace=True)
1098
+ )
1099
+ (2): SqueezeExcitation(
1100
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
1101
+ (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1))
1102
+ (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1))
1103
+ (activation): SiLU(inplace=True)
1104
+ (scale_activation): Sigmoid()
1105
+ )
1106
+ (3): Conv2dNormActivation(
1107
+ (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1108
+ (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1109
+ )
1110
+ )
1111
+ (stochastic_depth): StochasticDepth(p=0.12658227848101267, mode=row)
1112
+ )
1113
+ (4): MBConv(
1114
+ (block): Sequential(
1115
+ (0): Conv2dNormActivation(
1116
+ (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False)
1117
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1118
+ (2): SiLU(inplace=True)
1119
+ )
1120
+ (1): Conv2dNormActivation(
1121
+ (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False)
1122
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1123
+ (2): SiLU(inplace=True)
1124
+ )
1125
+ (2): SqueezeExcitation(
1126
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
1127
+ (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1))
1128
+ (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1))
1129
+ (activation): SiLU(inplace=True)
1130
+ (scale_activation): Sigmoid()
1131
+ )
1132
+ (3): Conv2dNormActivation(
1133
+ (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1134
+ (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1135
+ )
1136
+ )
1137
+ (stochastic_depth): StochasticDepth(p=0.12911392405063293, mode=row)
1138
+ )
1139
+ (5): MBConv(
1140
+ (block): Sequential(
1141
+ (0): Conv2dNormActivation(
1142
+ (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False)
1143
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1144
+ (2): SiLU(inplace=True)
1145
+ )
1146
+ (1): Conv2dNormActivation(
1147
+ (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False)
1148
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1149
+ (2): SiLU(inplace=True)
1150
+ )
1151
+ (2): SqueezeExcitation(
1152
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
1153
+ (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1))
1154
+ (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1))
1155
+ (activation): SiLU(inplace=True)
1156
+ (scale_activation): Sigmoid()
1157
+ )
1158
+ (3): Conv2dNormActivation(
1159
+ (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1160
+ (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1161
+ )
1162
+ )
1163
+ (stochastic_depth): StochasticDepth(p=0.13164556962025317, mode=row)
1164
+ )
1165
+ (6): MBConv(
1166
+ (block): Sequential(
1167
+ (0): Conv2dNormActivation(
1168
+ (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False)
1169
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1170
+ (2): SiLU(inplace=True)
1171
+ )
1172
+ (1): Conv2dNormActivation(
1173
+ (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False)
1174
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1175
+ (2): SiLU(inplace=True)
1176
+ )
1177
+ (2): SqueezeExcitation(
1178
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
1179
+ (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1))
1180
+ (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1))
1181
+ (activation): SiLU(inplace=True)
1182
+ (scale_activation): Sigmoid()
1183
+ )
1184
+ (3): Conv2dNormActivation(
1185
+ (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1186
+ (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1187
+ )
1188
+ )
1189
+ (stochastic_depth): StochasticDepth(p=0.13417721518987344, mode=row)
1190
+ )
1191
+ (7): MBConv(
1192
+ (block): Sequential(
1193
+ (0): Conv2dNormActivation(
1194
+ (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False)
1195
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1196
+ (2): SiLU(inplace=True)
1197
+ )
1198
+ (1): Conv2dNormActivation(
1199
+ (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False)
1200
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1201
+ (2): SiLU(inplace=True)
1202
+ )
1203
+ (2): SqueezeExcitation(
1204
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
1205
+ (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1))
1206
+ (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1))
1207
+ (activation): SiLU(inplace=True)
1208
+ (scale_activation): Sigmoid()
1209
+ )
1210
+ (3): Conv2dNormActivation(
1211
+ (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1212
+ (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1213
+ )
1214
+ )
1215
+ (stochastic_depth): StochasticDepth(p=0.13670886075949368, mode=row)
1216
+ )
1217
+ (8): MBConv(
1218
+ (block): Sequential(
1219
+ (0): Conv2dNormActivation(
1220
+ (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False)
1221
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1222
+ (2): SiLU(inplace=True)
1223
+ )
1224
+ (1): Conv2dNormActivation(
1225
+ (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False)
1226
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1227
+ (2): SiLU(inplace=True)
1228
+ )
1229
+ (2): SqueezeExcitation(
1230
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
1231
+ (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1))
1232
+ (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1))
1233
+ (activation): SiLU(inplace=True)
1234
+ (scale_activation): Sigmoid()
1235
+ )
1236
+ (3): Conv2dNormActivation(
1237
+ (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1238
+ (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1239
+ )
1240
+ )
1241
+ (stochastic_depth): StochasticDepth(p=0.13924050632911392, mode=row)
1242
+ )
1243
+ (9): MBConv(
1244
+ (block): Sequential(
1245
+ (0): Conv2dNormActivation(
1246
+ (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False)
1247
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1248
+ (2): SiLU(inplace=True)
1249
+ )
1250
+ (1): Conv2dNormActivation(
1251
+ (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False)
1252
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1253
+ (2): SiLU(inplace=True)
1254
+ )
1255
+ (2): SqueezeExcitation(
1256
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
1257
+ (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1))
1258
+ (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1))
1259
+ (activation): SiLU(inplace=True)
1260
+ (scale_activation): Sigmoid()
1261
+ )
1262
+ (3): Conv2dNormActivation(
1263
+ (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1264
+ (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1265
+ )
1266
+ )
1267
+ (stochastic_depth): StochasticDepth(p=0.14177215189873418, mode=row)
1268
+ )
1269
+ (10): MBConv(
1270
+ (block): Sequential(
1271
+ (0): Conv2dNormActivation(
1272
+ (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False)
1273
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1274
+ (2): SiLU(inplace=True)
1275
+ )
1276
+ (1): Conv2dNormActivation(
1277
+ (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False)
1278
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1279
+ (2): SiLU(inplace=True)
1280
+ )
1281
+ (2): SqueezeExcitation(
1282
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
1283
+ (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1))
1284
+ (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1))
1285
+ (activation): SiLU(inplace=True)
1286
+ (scale_activation): Sigmoid()
1287
+ )
1288
+ (3): Conv2dNormActivation(
1289
+ (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1290
+ (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1291
+ )
1292
+ )
1293
+ (stochastic_depth): StochasticDepth(p=0.14430379746835442, mode=row)
1294
+ )
1295
+ (11): MBConv(
1296
+ (block): Sequential(
1297
+ (0): Conv2dNormActivation(
1298
+ (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False)
1299
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1300
+ (2): SiLU(inplace=True)
1301
+ )
1302
+ (1): Conv2dNormActivation(
1303
+ (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False)
1304
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1305
+ (2): SiLU(inplace=True)
1306
+ )
1307
+ (2): SqueezeExcitation(
1308
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
1309
+ (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1))
1310
+ (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1))
1311
+ (activation): SiLU(inplace=True)
1312
+ (scale_activation): Sigmoid()
1313
+ )
1314
+ (3): Conv2dNormActivation(
1315
+ (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1316
+ (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1317
+ )
1318
+ )
1319
+ (stochastic_depth): StochasticDepth(p=0.1468354430379747, mode=row)
1320
+ )
1321
+ (12): MBConv(
1322
+ (block): Sequential(
1323
+ (0): Conv2dNormActivation(
1324
+ (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False)
1325
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1326
+ (2): SiLU(inplace=True)
1327
+ )
1328
+ (1): Conv2dNormActivation(
1329
+ (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False)
1330
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1331
+ (2): SiLU(inplace=True)
1332
+ )
1333
+ (2): SqueezeExcitation(
1334
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
1335
+ (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1))
1336
+ (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1))
1337
+ (activation): SiLU(inplace=True)
1338
+ (scale_activation): Sigmoid()
1339
+ )
1340
+ (3): Conv2dNormActivation(
1341
+ (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1342
+ (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1343
+ )
1344
+ )
1345
+ (stochastic_depth): StochasticDepth(p=0.14936708860759496, mode=row)
1346
+ )
1347
+ (13): MBConv(
1348
+ (block): Sequential(
1349
+ (0): Conv2dNormActivation(
1350
+ (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False)
1351
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1352
+ (2): SiLU(inplace=True)
1353
+ )
1354
+ (1): Conv2dNormActivation(
1355
+ (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False)
1356
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1357
+ (2): SiLU(inplace=True)
1358
+ )
1359
+ (2): SqueezeExcitation(
1360
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
1361
+ (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1))
1362
+ (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1))
1363
+ (activation): SiLU(inplace=True)
1364
+ (scale_activation): Sigmoid()
1365
+ )
1366
+ (3): Conv2dNormActivation(
1367
+ (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1368
+ (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1369
+ )
1370
+ )
1371
+ (stochastic_depth): StochasticDepth(p=0.1518987341772152, mode=row)
1372
+ )
1373
+ (14): MBConv(
1374
+ (block): Sequential(
1375
+ (0): Conv2dNormActivation(
1376
+ (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False)
1377
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1378
+ (2): SiLU(inplace=True)
1379
+ )
1380
+ (1): Conv2dNormActivation(
1381
+ (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False)
1382
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1383
+ (2): SiLU(inplace=True)
1384
+ )
1385
+ (2): SqueezeExcitation(
1386
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
1387
+ (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1))
1388
+ (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1))
1389
+ (activation): SiLU(inplace=True)
1390
+ (scale_activation): Sigmoid()
1391
+ )
1392
+ (3): Conv2dNormActivation(
1393
+ (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1394
+ (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1395
+ )
1396
+ )
1397
+ (stochastic_depth): StochasticDepth(p=0.15443037974683546, mode=row)
1398
+ )
1399
+ (15): MBConv(
1400
+ (block): Sequential(
1401
+ (0): Conv2dNormActivation(
1402
+ (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False)
1403
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1404
+ (2): SiLU(inplace=True)
1405
+ )
1406
+ (1): Conv2dNormActivation(
1407
+ (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False)
1408
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1409
+ (2): SiLU(inplace=True)
1410
+ )
1411
+ (2): SqueezeExcitation(
1412
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
1413
+ (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1))
1414
+ (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1))
1415
+ (activation): SiLU(inplace=True)
1416
+ (scale_activation): Sigmoid()
1417
+ )
1418
+ (3): Conv2dNormActivation(
1419
+ (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1420
+ (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1421
+ )
1422
+ )
1423
+ (stochastic_depth): StochasticDepth(p=0.1569620253164557, mode=row)
1424
+ )
1425
+ (16): MBConv(
1426
+ (block): Sequential(
1427
+ (0): Conv2dNormActivation(
1428
+ (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False)
1429
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1430
+ (2): SiLU(inplace=True)
1431
+ )
1432
+ (1): Conv2dNormActivation(
1433
+ (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False)
1434
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1435
+ (2): SiLU(inplace=True)
1436
+ )
1437
+ (2): SqueezeExcitation(
1438
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
1439
+ (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1))
1440
+ (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1))
1441
+ (activation): SiLU(inplace=True)
1442
+ (scale_activation): Sigmoid()
1443
+ )
1444
+ (3): Conv2dNormActivation(
1445
+ (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1446
+ (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1447
+ )
1448
+ )
1449
+ (stochastic_depth): StochasticDepth(p=0.15949367088607597, mode=row)
1450
+ )
1451
+ (17): MBConv(
1452
+ (block): Sequential(
1453
+ (0): Conv2dNormActivation(
1454
+ (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False)
1455
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1456
+ (2): SiLU(inplace=True)
1457
+ )
1458
+ (1): Conv2dNormActivation(
1459
+ (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False)
1460
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1461
+ (2): SiLU(inplace=True)
1462
+ )
1463
+ (2): SqueezeExcitation(
1464
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
1465
+ (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1))
1466
+ (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1))
1467
+ (activation): SiLU(inplace=True)
1468
+ (scale_activation): Sigmoid()
1469
+ )
1470
+ (3): Conv2dNormActivation(
1471
+ (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1472
+ (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1473
+ )
1474
+ )
1475
+ (stochastic_depth): StochasticDepth(p=0.1620253164556962, mode=row)
1476
+ )
1477
+ (18): MBConv(
1478
+ (block): Sequential(
1479
+ (0): Conv2dNormActivation(
1480
+ (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False)
1481
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1482
+ (2): SiLU(inplace=True)
1483
+ )
1484
+ (1): Conv2dNormActivation(
1485
+ (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False)
1486
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1487
+ (2): SiLU(inplace=True)
1488
+ )
1489
+ (2): SqueezeExcitation(
1490
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
1491
+ (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1))
1492
+ (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1))
1493
+ (activation): SiLU(inplace=True)
1494
+ (scale_activation): Sigmoid()
1495
+ )
1496
+ (3): Conv2dNormActivation(
1497
+ (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1498
+ (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1499
+ )
1500
+ )
1501
+ (stochastic_depth): StochasticDepth(p=0.16455696202531644, mode=row)
1502
+ )
1503
+ (19): MBConv(
1504
+ (block): Sequential(
1505
+ (0): Conv2dNormActivation(
1506
+ (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False)
1507
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1508
+ (2): SiLU(inplace=True)
1509
+ )
1510
+ (1): Conv2dNormActivation(
1511
+ (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False)
1512
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1513
+ (2): SiLU(inplace=True)
1514
+ )
1515
+ (2): SqueezeExcitation(
1516
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
1517
+ (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1))
1518
+ (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1))
1519
+ (activation): SiLU(inplace=True)
1520
+ (scale_activation): Sigmoid()
1521
+ )
1522
+ (3): Conv2dNormActivation(
1523
+ (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1524
+ (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1525
+ )
1526
+ )
1527
+ (stochastic_depth): StochasticDepth(p=0.1670886075949367, mode=row)
1528
+ )
1529
+ (20): MBConv(
1530
+ (block): Sequential(
1531
+ (0): Conv2dNormActivation(
1532
+ (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False)
1533
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1534
+ (2): SiLU(inplace=True)
1535
+ )
1536
+ (1): Conv2dNormActivation(
1537
+ (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False)
1538
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1539
+ (2): SiLU(inplace=True)
1540
+ )
1541
+ (2): SqueezeExcitation(
1542
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
1543
+ (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1))
1544
+ (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1))
1545
+ (activation): SiLU(inplace=True)
1546
+ (scale_activation): Sigmoid()
1547
+ )
1548
+ (3): Conv2dNormActivation(
1549
+ (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1550
+ (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1551
+ )
1552
+ )
1553
+ (stochastic_depth): StochasticDepth(p=0.16962025316455698, mode=row)
1554
+ )
1555
+ (21): MBConv(
1556
+ (block): Sequential(
1557
+ (0): Conv2dNormActivation(
1558
+ (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False)
1559
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1560
+ (2): SiLU(inplace=True)
1561
+ )
1562
+ (1): Conv2dNormActivation(
1563
+ (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False)
1564
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1565
+ (2): SiLU(inplace=True)
1566
+ )
1567
+ (2): SqueezeExcitation(
1568
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
1569
+ (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1))
1570
+ (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1))
1571
+ (activation): SiLU(inplace=True)
1572
+ (scale_activation): Sigmoid()
1573
+ )
1574
+ (3): Conv2dNormActivation(
1575
+ (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1576
+ (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1577
+ )
1578
+ )
1579
+ (stochastic_depth): StochasticDepth(p=0.17215189873417724, mode=row)
1580
+ )
1581
+ (22): MBConv(
1582
+ (block): Sequential(
1583
+ (0): Conv2dNormActivation(
1584
+ (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False)
1585
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1586
+ (2): SiLU(inplace=True)
1587
+ )
1588
+ (1): Conv2dNormActivation(
1589
+ (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False)
1590
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1591
+ (2): SiLU(inplace=True)
1592
+ )
1593
+ (2): SqueezeExcitation(
1594
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
1595
+ (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1))
1596
+ (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1))
1597
+ (activation): SiLU(inplace=True)
1598
+ (scale_activation): Sigmoid()
1599
+ )
1600
+ (3): Conv2dNormActivation(
1601
+ (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1602
+ (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1603
+ )
1604
+ )
1605
+ (stochastic_depth): StochasticDepth(p=0.17468354430379748, mode=row)
1606
+ )
1607
+ (23): MBConv(
1608
+ (block): Sequential(
1609
+ (0): Conv2dNormActivation(
1610
+ (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False)
1611
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1612
+ (2): SiLU(inplace=True)
1613
+ )
1614
+ (1): Conv2dNormActivation(
1615
+ (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False)
1616
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1617
+ (2): SiLU(inplace=True)
1618
+ )
1619
+ (2): SqueezeExcitation(
1620
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
1621
+ (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1))
1622
+ (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1))
1623
+ (activation): SiLU(inplace=True)
1624
+ (scale_activation): Sigmoid()
1625
+ )
1626
+ (3): Conv2dNormActivation(
1627
+ (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1628
+ (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1629
+ )
1630
+ )
1631
+ (stochastic_depth): StochasticDepth(p=0.17721518987341772, mode=row)
1632
+ )
1633
+ (24): MBConv(
1634
+ (block): Sequential(
1635
+ (0): Conv2dNormActivation(
1636
+ (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False)
1637
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1638
+ (2): SiLU(inplace=True)
1639
+ )
1640
+ (1): Conv2dNormActivation(
1641
+ (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False)
1642
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1643
+ (2): SiLU(inplace=True)
1644
+ )
1645
+ (2): SqueezeExcitation(
1646
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
1647
+ (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1))
1648
+ (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1))
1649
+ (activation): SiLU(inplace=True)
1650
+ (scale_activation): Sigmoid()
1651
+ )
1652
+ (3): Conv2dNormActivation(
1653
+ (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1654
+ (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1655
+ )
1656
+ )
1657
+ (stochastic_depth): StochasticDepth(p=0.179746835443038, mode=row)
1658
+ )
1659
+ )
1660
+ (7): Sequential(
1661
+ (0): MBConv(
1662
+ (block): Sequential(
1663
+ (0): Conv2dNormActivation(
1664
+ (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False)
1665
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1666
+ (2): SiLU(inplace=True)
1667
+ )
1668
+ (1): Conv2dNormActivation(
1669
+ (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False)
1670
+ (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1671
+ (2): SiLU(inplace=True)
1672
+ )
1673
+ (2): SqueezeExcitation(
1674
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
1675
+ (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1))
1676
+ (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1))
1677
+ (activation): SiLU(inplace=True)
1678
+ (scale_activation): Sigmoid()
1679
+ )
1680
+ (3): Conv2dNormActivation(
1681
+ (0): Conv2d(2304, 640, kernel_size=(1, 1), stride=(1, 1), bias=False)
1682
+ (1): BatchNorm2d(640, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1683
+ )
1684
+ )
1685
+ (stochastic_depth): StochasticDepth(p=0.18227848101265823, mode=row)
1686
+ )
1687
+ (1): MBConv(
1688
+ (block): Sequential(
1689
+ (0): Conv2dNormActivation(
1690
+ (0): Conv2d(640, 3840, kernel_size=(1, 1), stride=(1, 1), bias=False)
1691
+ (1): BatchNorm2d(3840, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1692
+ (2): SiLU(inplace=True)
1693
+ )
1694
+ (1): Conv2dNormActivation(
1695
+ (0): Conv2d(3840, 3840, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=3840, bias=False)
1696
+ (1): BatchNorm2d(3840, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1697
+ (2): SiLU(inplace=True)
1698
+ )
1699
+ (2): SqueezeExcitation(
1700
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
1701
+ (fc1): Conv2d(3840, 160, kernel_size=(1, 1), stride=(1, 1))
1702
+ (fc2): Conv2d(160, 3840, kernel_size=(1, 1), stride=(1, 1))
1703
+ (activation): SiLU(inplace=True)
1704
+ (scale_activation): Sigmoid()
1705
+ )
1706
+ (3): Conv2dNormActivation(
1707
+ (0): Conv2d(3840, 640, kernel_size=(1, 1), stride=(1, 1), bias=False)
1708
+ (1): BatchNorm2d(640, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1709
+ )
1710
+ )
1711
+ (stochastic_depth): StochasticDepth(p=0.1848101265822785, mode=row)
1712
+ )
1713
+ (2): MBConv(
1714
+ (block): Sequential(
1715
+ (0): Conv2dNormActivation(
1716
+ (0): Conv2d(640, 3840, kernel_size=(1, 1), stride=(1, 1), bias=False)
1717
+ (1): BatchNorm2d(3840, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1718
+ (2): SiLU(inplace=True)
1719
+ )
1720
+ (1): Conv2dNormActivation(
1721
+ (0): Conv2d(3840, 3840, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=3840, bias=False)
1722
+ (1): BatchNorm2d(3840, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1723
+ (2): SiLU(inplace=True)
1724
+ )
1725
+ (2): SqueezeExcitation(
1726
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
1727
+ (fc1): Conv2d(3840, 160, kernel_size=(1, 1), stride=(1, 1))
1728
+ (fc2): Conv2d(160, 3840, kernel_size=(1, 1), stride=(1, 1))
1729
+ (activation): SiLU(inplace=True)
1730
+ (scale_activation): Sigmoid()
1731
+ )
1732
+ (3): Conv2dNormActivation(
1733
+ (0): Conv2d(3840, 640, kernel_size=(1, 1), stride=(1, 1), bias=False)
1734
+ (1): BatchNorm2d(640, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1735
+ )
1736
+ )
1737
+ (stochastic_depth): StochasticDepth(p=0.18734177215189873, mode=row)
1738
+ )
1739
+ (3): MBConv(
1740
+ (block): Sequential(
1741
+ (0): Conv2dNormActivation(
1742
+ (0): Conv2d(640, 3840, kernel_size=(1, 1), stride=(1, 1), bias=False)
1743
+ (1): BatchNorm2d(3840, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1744
+ (2): SiLU(inplace=True)
1745
+ )
1746
+ (1): Conv2dNormActivation(
1747
+ (0): Conv2d(3840, 3840, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=3840, bias=False)
1748
+ (1): BatchNorm2d(3840, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1749
+ (2): SiLU(inplace=True)
1750
+ )
1751
+ (2): SqueezeExcitation(
1752
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
1753
+ (fc1): Conv2d(3840, 160, kernel_size=(1, 1), stride=(1, 1))
1754
+ (fc2): Conv2d(160, 3840, kernel_size=(1, 1), stride=(1, 1))
1755
+ (activation): SiLU(inplace=True)
1756
+ (scale_activation): Sigmoid()
1757
+ )
1758
+ (3): Conv2dNormActivation(
1759
+ (0): Conv2d(3840, 640, kernel_size=(1, 1), stride=(1, 1), bias=False)
1760
+ (1): BatchNorm2d(640, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1761
+ )
1762
+ )
1763
+ (stochastic_depth): StochasticDepth(p=0.189873417721519, mode=row)
1764
+ )
1765
+ (4): MBConv(
1766
+ (block): Sequential(
1767
+ (0): Conv2dNormActivation(
1768
+ (0): Conv2d(640, 3840, kernel_size=(1, 1), stride=(1, 1), bias=False)
1769
+ (1): BatchNorm2d(3840, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1770
+ (2): SiLU(inplace=True)
1771
+ )
1772
+ (1): Conv2dNormActivation(
1773
+ (0): Conv2d(3840, 3840, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=3840, bias=False)
1774
+ (1): BatchNorm2d(3840, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1775
+ (2): SiLU(inplace=True)
1776
+ )
1777
+ (2): SqueezeExcitation(
1778
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
1779
+ (fc1): Conv2d(3840, 160, kernel_size=(1, 1), stride=(1, 1))
1780
+ (fc2): Conv2d(160, 3840, kernel_size=(1, 1), stride=(1, 1))
1781
+ (activation): SiLU(inplace=True)
1782
+ (scale_activation): Sigmoid()
1783
+ )
1784
+ (3): Conv2dNormActivation(
1785
+ (0): Conv2d(3840, 640, kernel_size=(1, 1), stride=(1, 1), bias=False)
1786
+ (1): BatchNorm2d(640, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1787
+ )
1788
+ )
1789
+ (stochastic_depth): StochasticDepth(p=0.19240506329113927, mode=row)
1790
+ )
1791
+ (5): MBConv(
1792
+ (block): Sequential(
1793
+ (0): Conv2dNormActivation(
1794
+ (0): Conv2d(640, 3840, kernel_size=(1, 1), stride=(1, 1), bias=False)
1795
+ (1): BatchNorm2d(3840, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1796
+ (2): SiLU(inplace=True)
1797
+ )
1798
+ (1): Conv2dNormActivation(
1799
+ (0): Conv2d(3840, 3840, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=3840, bias=False)
1800
+ (1): BatchNorm2d(3840, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1801
+ (2): SiLU(inplace=True)
1802
+ )
1803
+ (2): SqueezeExcitation(
1804
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
1805
+ (fc1): Conv2d(3840, 160, kernel_size=(1, 1), stride=(1, 1))
1806
+ (fc2): Conv2d(160, 3840, kernel_size=(1, 1), stride=(1, 1))
1807
+ (activation): SiLU(inplace=True)
1808
+ (scale_activation): Sigmoid()
1809
+ )
1810
+ (3): Conv2dNormActivation(
1811
+ (0): Conv2d(3840, 640, kernel_size=(1, 1), stride=(1, 1), bias=False)
1812
+ (1): BatchNorm2d(640, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1813
+ )
1814
+ )
1815
+ (stochastic_depth): StochasticDepth(p=0.1949367088607595, mode=row)
1816
+ )
1817
+ (6): MBConv(
1818
+ (block): Sequential(
1819
+ (0): Conv2dNormActivation(
1820
+ (0): Conv2d(640, 3840, kernel_size=(1, 1), stride=(1, 1), bias=False)
1821
+ (1): BatchNorm2d(3840, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1822
+ (2): SiLU(inplace=True)
1823
+ )
1824
+ (1): Conv2dNormActivation(
1825
+ (0): Conv2d(3840, 3840, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=3840, bias=False)
1826
+ (1): BatchNorm2d(3840, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1827
+ (2): SiLU(inplace=True)
1828
+ )
1829
+ (2): SqueezeExcitation(
1830
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
1831
+ (fc1): Conv2d(3840, 160, kernel_size=(1, 1), stride=(1, 1))
1832
+ (fc2): Conv2d(160, 3840, kernel_size=(1, 1), stride=(1, 1))
1833
+ (activation): SiLU(inplace=True)
1834
+ (scale_activation): Sigmoid()
1835
+ )
1836
+ (3): Conv2dNormActivation(
1837
+ (0): Conv2d(3840, 640, kernel_size=(1, 1), stride=(1, 1), bias=False)
1838
+ (1): BatchNorm2d(640, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1839
+ )
1840
+ )
1841
+ (stochastic_depth): StochasticDepth(p=0.19746835443037977, mode=row)
1842
+ )
1843
+ )
1844
+ (8): Conv2dNormActivation(
1845
+ (0): Conv2d(640, 1280, kernel_size=(1, 1), stride=(1, 1), bias=False)
1846
+ (1): BatchNorm2d(1280, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
1847
+ (2): SiLU(inplace=True)
1848
+ )
1849
+ )
1850
+ (avgpool): AdaptiveAvgPool2d(output_size=1)
1851
+ (classifier): Sequential(
1852
+ (0): Dropout(p=0.4, inplace=True)
1853
+ (1): Linear(in_features=1280, out_features=25, bias=True)
1854
+ )
1855
+ )
requirements.txt ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohttp==3.8.1
2
+ aiosignal==1.2.0
3
+ analytics-python==1.4.0
4
+ anyio==3.6.1
5
+ async-timeout==4.0.2
6
+ attrs==22.1.0
7
+ autopep8==1.6.0
8
+ backoff==1.10.0
9
+ bcrypt==3.2.2
10
+ certifi==2022.6.15
11
+ cffi==1.15.1
12
+ charset-normalizer==2.1.0
13
+ click==8.1.3
14
+ colorama==0.4.5
15
+ cryptography==37.0.4
16
+ cycler==0.11.0
17
+ fastapi==0.79.0
18
+ ffmpy==0.3.0
19
+ fonttools==4.34.4
20
+ frozenlist==1.3.1
21
+ fsspec==2022.7.1
22
+ grad-cam==1.4.2
23
+ gradio==3.1.4
24
+ h11==0.12.0
25
+ httpcore==0.15.0
26
+ httpx==0.23.0
27
+ idna==3.3
28
+ Jinja2==3.1.2
29
+ joblib==1.1.0
30
+ kiwisolver==1.4.4
31
+ linkify-it-py==1.0.3
32
+ markdown-it-py==2.1.0
33
+ MarkupSafe==2.1.1
34
+ matplotlib==3.5.2
35
+ mdit-py-plugins==0.3.0
36
+ mdurl==0.1.1
37
+ monotonic==1.6
38
+ multidict==6.0.2
39
+ numpy==1.23.1
40
+ opencv-python==4.6.0.66
41
+ orjson==3.7.11
42
+ packaging==21.3
43
+ pandas==1.4.3
44
+ paramiko==2.11.0
45
+ Pillow==9.2.0
46
+ pycodestyle==2.9.1
47
+ pycparser==2.21
48
+ pycryptodome==3.15.0
49
+ pydantic==1.9.1
50
+ pydub==0.25.1
51
+ PyNaCl==1.5.0
52
+ pyparsing==3.0.9
53
+ python-dateutil==2.8.2
54
+ python-multipart==0.0.5
55
+ pytz==2022.1
56
+ requests==2.28.1
57
+ rfc3986==1.5.0
58
+ scikit-learn==1.1.2
59
+ scipy==1.9.0
60
+ six==1.16.0
61
+ sniffio==1.2.0
62
+ starlette==0.19.1
63
+ threadpoolctl==3.1.0
64
+ toml==0.10.2
65
+ torch==1.12.1
66
+ torchaudio==0.12.1
67
+ torchvision==0.13.1
68
+ tqdm==4.64.0
69
+ ttach==0.0.3
70
+ typing_extensions==4.3.0
71
+ uc-micro-py==1.0.1
72
+ urllib3==1.26.11
73
+ uvicorn==0.18.2
74
+ yarl==1.8.1
utils/__init__.py ADDED
File without changes
utils/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (148 Bytes). View file
utils/__pycache__/imshow.cpython-310.pyc ADDED
Binary file (752 Bytes). View file
utils/__pycache__/save_load.cpython-310.pyc ADDED
Binary file (549 Bytes). View file
utils/__pycache__/utils.cpython-310.pyc ADDED
Binary file (411 Bytes). View file
utils/imshow.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from matplotlib import pyplot as plt
2
+ import numpy as np
3
+ import torchvision
4
+
5
+
6
+ def imshow(dataloader, title=None):
7
+ inputs, _ = next(iter(dataloader))
8
+ out = torchvision.utils.make_grid(inputs)
9
+ inp = out.numpy().transpose((1, 2, 0))
10
+ mean = np.array([0.485, 0.456, 0.406])
11
+ std = np.array([0.229, 0.224, 0.225])
12
+ inp = std * inp + mean
13
+ inp = np.clip(inp, 0, 1)
14
+ plt.imshow(inp)
15
+ if title is not None:
16
+ plt.title(title)
17
+ plt.show()
18
+ plt.pause(0.001) # pause a bit so that plots are updated
utils/save_load.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import torch
3
+
4
+
5
+ def save_model(model):
6
+ torch.save(model.state_dict(), 'model_weights.pth')
7
+
8
+
9
+ def load_model(model):
10
+ return model.load_state_dict(torch.load('./models/model_weights_27_styles.pth', map_location=torch.device('cpu')))