EfficientNet( (features): Sequential( (0): Conv2dNormActivation( (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (1): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Sequential( (0): FusedMBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (1): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) ) (stochastic_depth): StochasticDepth(p=0.0, mode=row) ) (1): FusedMBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (1): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) ) (stochastic_depth): StochasticDepth(p=0.002531645569620253, mode=row) ) (2): FusedMBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (1): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) ) (stochastic_depth): StochasticDepth(p=0.005063291139240506, mode=row) ) (3): FusedMBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (1): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) ) (stochastic_depth): StochasticDepth(p=0.007594936708860761, mode=row) ) ) (2): Sequential( (0): FusedMBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(32, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (1): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.010126582278481013, mode=row) ) (1): FusedMBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(64, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (1): BatchNorm2d(256, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.012658227848101266, mode=row) ) (2): FusedMBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(64, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (1): BatchNorm2d(256, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.015189873417721522, mode=row) ) (3): FusedMBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(64, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (1): BatchNorm2d(256, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.017721518987341773, mode=row) ) (4): FusedMBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(64, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (1): BatchNorm2d(256, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.020253164556962026, mode=row) ) (5): FusedMBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(64, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (1): BatchNorm2d(256, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.02278481012658228, mode=row) ) (6): FusedMBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(64, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (1): BatchNorm2d(256, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.02531645569620253, mode=row) ) ) (3): Sequential( (0): FusedMBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(64, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (1): BatchNorm2d(256, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(256, 96, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(96, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.027848101265822787, mode=row) ) (1): FusedMBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(96, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(384, 96, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(96, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.030379746835443044, mode=row) ) (2): FusedMBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(96, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(384, 96, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(96, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.03291139240506329, mode=row) ) (3): FusedMBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(96, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(384, 96, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(96, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.035443037974683546, mode=row) ) (4): FusedMBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(96, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(384, 96, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(96, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.0379746835443038, mode=row) ) (5): FusedMBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(96, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(384, 96, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(96, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.04050632911392405, mode=row) ) (6): FusedMBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(96, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(384, 96, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(96, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.04303797468354431, mode=row) ) ) (4): Sequential( (0): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(96, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(384, 384, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=384, bias=False) (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(384, 24, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(24, 384, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(384, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.04556962025316456, mode=row) ) (1): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(768, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False) (1): BatchNorm2d(768, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(768, 48, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(48, 768, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.04810126582278482, mode=row) ) (2): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(768, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False) (1): BatchNorm2d(768, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(768, 48, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(48, 768, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.05063291139240506, mode=row) ) (3): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(768, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False) (1): BatchNorm2d(768, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(768, 48, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(48, 768, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.053164556962025315, mode=row) ) (4): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(768, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False) (1): BatchNorm2d(768, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(768, 48, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(48, 768, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.055696202531645575, mode=row) ) (5): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(768, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False) (1): BatchNorm2d(768, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(768, 48, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(48, 768, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.05822784810126583, mode=row) ) (6): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(768, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False) (1): BatchNorm2d(768, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(768, 48, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(48, 768, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.06075949367088609, mode=row) ) (7): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(768, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False) (1): BatchNorm2d(768, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(768, 48, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(48, 768, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.06329113924050633, mode=row) ) (8): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(768, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False) (1): BatchNorm2d(768, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(768, 48, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(48, 768, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.06582278481012659, mode=row) ) (9): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(768, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False) (1): BatchNorm2d(768, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(768, 48, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(48, 768, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.06835443037974684, mode=row) ) ) (5): Sequential( (0): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(192, 1152, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1152, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1152, 1152, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1152, bias=False) (1): BatchNorm2d(1152, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1152, 48, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(48, 1152, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1152, 224, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.07088607594936709, mode=row) ) (1): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1344, bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1344, 224, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.07341772151898734, mode=row) ) (2): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1344, bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1344, 224, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.0759493670886076, mode=row) ) (3): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1344, bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1344, 224, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.07848101265822785, mode=row) ) (4): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1344, bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1344, 224, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.0810126582278481, mode=row) ) (5): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1344, bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1344, 224, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.08354430379746836, mode=row) ) (6): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1344, bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1344, 224, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.08607594936708862, mode=row) ) (7): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1344, bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1344, 224, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.08860759493670886, mode=row) ) (8): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1344, bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1344, 224, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.09113924050632911, mode=row) ) (9): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1344, bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1344, 224, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.09367088607594937, mode=row) ) (10): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1344, bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1344, 224, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.09620253164556963, mode=row) ) (11): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1344, bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1344, 224, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.09873417721518989, mode=row) ) (12): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1344, bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1344, 224, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.10126582278481013, mode=row) ) (13): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1344, bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1344, 224, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.10379746835443039, mode=row) ) (14): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1344, bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1344, 224, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.10632911392405063, mode=row) ) (15): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1344, bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1344, 224, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.10886075949367088, mode=row) ) (16): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1344, bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1344, 224, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.11139240506329115, mode=row) ) (17): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1344, bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1344, 224, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.11392405063291139, mode=row) ) (18): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1344, bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1344, 224, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.11645569620253166, mode=row) ) ) (6): Sequential( (0): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(224, 1344, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1344, 1344, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=1344, bias=False) (1): BatchNorm2d(1344, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1344, 56, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(56, 1344, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1344, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.11898734177215191, mode=row) ) (1): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.12151898734177217, mode=row) ) (2): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.12405063291139241, mode=row) ) (3): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.12658227848101267, mode=row) ) (4): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.12911392405063293, mode=row) ) (5): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.13164556962025317, mode=row) ) (6): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.13417721518987344, mode=row) ) (7): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.13670886075949368, mode=row) ) (8): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.13924050632911392, mode=row) ) (9): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.14177215189873418, mode=row) ) (10): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.14430379746835442, mode=row) ) (11): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.1468354430379747, mode=row) ) (12): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.14936708860759496, mode=row) ) (13): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.1518987341772152, mode=row) ) (14): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.15443037974683546, mode=row) ) (15): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.1569620253164557, mode=row) ) (16): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.15949367088607597, mode=row) ) (17): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.1620253164556962, mode=row) ) (18): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.16455696202531644, mode=row) ) (19): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.1670886075949367, mode=row) ) (20): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.16962025316455698, mode=row) ) (21): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.17215189873417724, mode=row) ) (22): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.17468354430379748, mode=row) ) (23): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.17721518987341772, mode=row) ) (24): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.179746835443038, mode=row) ) ) (7): Sequential( (0): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False) (1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(2304, 640, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(640, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.18227848101265823, mode=row) ) (1): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(640, 3840, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(3840, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(3840, 3840, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=3840, bias=False) (1): BatchNorm2d(3840, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(3840, 160, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(160, 3840, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(3840, 640, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(640, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.1848101265822785, mode=row) ) (2): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(640, 3840, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(3840, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(3840, 3840, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=3840, bias=False) (1): BatchNorm2d(3840, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(3840, 160, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(160, 3840, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(3840, 640, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(640, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.18734177215189873, mode=row) ) (3): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(640, 3840, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(3840, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(3840, 3840, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=3840, bias=False) (1): BatchNorm2d(3840, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(3840, 160, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(160, 3840, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(3840, 640, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(640, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.189873417721519, mode=row) ) (4): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(640, 3840, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(3840, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(3840, 3840, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=3840, bias=False) (1): BatchNorm2d(3840, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(3840, 160, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(160, 3840, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(3840, 640, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(640, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.19240506329113927, mode=row) ) (5): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(640, 3840, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(3840, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(3840, 3840, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=3840, bias=False) (1): BatchNorm2d(3840, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(3840, 160, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(160, 3840, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(3840, 640, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(640, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.1949367088607595, mode=row) ) (6): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(640, 3840, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(3840, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(3840, 3840, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=3840, bias=False) (1): BatchNorm2d(3840, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(3840, 160, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(160, 3840, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(3840, 640, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(640, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.19746835443037977, mode=row) ) ) (8): Conv2dNormActivation( (0): Conv2d(640, 1280, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1280, eps=0.001, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) ) (avgpool): AdaptiveAvgPool2d(output_size=1) (classifier): Sequential( (0): Dropout(p=0.4, inplace=True) (1): Linear(in_features=1280, out_features=25, bias=True) ) )