virtex-redcaps / virtex /hubconf.py
zamborg's picture
added datasets and virtex
a5f8a35
raw
history blame
1.23 kB
dependencies = ["torch"]
import torch
import torchvision
def resnet50(pretrained: bool = False, **kwargs):
r"""
ResNet-50 visual backbone from the best performing VirTex model: pretrained
for bicaptioning on COCO Captions, with textual head ``L = 1, H = 2048``.
This is a torchvision-like model, with the last ``avgpool`` and `fc``
modules replaced with ``nn.Identity()`` modules. Given a batch of image
tensors with size ``(B, 3, 224, 224)``, this model computes spatial image
features of size ``(B, 7, 7, 2048)``, where B = batch size.
pretrained (bool): Whether to load model with pretrained weights.
"""
# Create a torchvision resnet50 with randomly initialized weights.
model = torchvision.models.resnet50(pretrained=False, **kwargs)
# Replace global average pooling and fully connected layers with identity
# modules.
model.avgpool = torch.nn.Identity()
model.fc = torch.nn.Identity()
if pretrained:
model.load_state_dict(
torch.hub.load_state_dict_from_url(
"https://umich.box.com/shared/static/gsjqm4i4fm1wpzi947h27wweljd8gcpy.pth",
progress=False,
)["model"]
)
return model