|
from functools import partial |
|
import torch.nn as nn |
|
from detectron2.config import LazyCall as L |
|
from detectron2.modeling import ViT, SimpleFeaturePyramid |
|
from detectron2.modeling.backbone.fpn import LastLevelMaxPool |
|
|
|
from .mask_rcnn_fpn import model |
|
from ..data.constants import constants |
|
|
|
model.pixel_mean = constants.imagenet_rgb256_mean |
|
model.pixel_std = constants.imagenet_rgb256_std |
|
model.input_format = "RGB" |
|
|
|
|
|
embed_dim, depth, num_heads, dp = 768, 12, 12, 0.1 |
|
|
|
model.backbone = L(SimpleFeaturePyramid)( |
|
net=L(ViT)( |
|
img_size=1024, |
|
patch_size=16, |
|
embed_dim=embed_dim, |
|
depth=depth, |
|
num_heads=num_heads, |
|
drop_path_rate=dp, |
|
window_size=14, |
|
mlp_ratio=4, |
|
qkv_bias=True, |
|
norm_layer=partial(nn.LayerNorm, eps=1e-6), |
|
window_block_indexes=[ |
|
|
|
0, |
|
1, |
|
3, |
|
4, |
|
6, |
|
7, |
|
9, |
|
10, |
|
], |
|
residual_block_indexes=[], |
|
use_rel_pos=True, |
|
out_feature="last_feat", |
|
), |
|
in_feature="${.net.out_feature}", |
|
out_channels=256, |
|
scale_factors=(4.0, 2.0, 1.0, 0.5), |
|
top_block=L(LastLevelMaxPool)(), |
|
norm="LN", |
|
square_pad=1024, |
|
) |
|
|
|
model.roi_heads.box_head.conv_norm = model.roi_heads.mask_head.conv_norm = "LN" |
|
|
|
|
|
model.proposal_generator.head.conv_dims = [-1, -1] |
|
|
|
|
|
model.roi_heads.box_head.conv_dims = [256, 256, 256, 256] |
|
model.roi_heads.box_head.fc_dims = [1024] |
|
|