Spaces:
Running
Running
from .clip import clip | |
from PIL import Image | |
import torch.nn as nn | |
CHANNELS = { | |
"RN50" : 1024, | |
"ViT-L/14" : 768 | |
} | |
class CLIPModel(nn.Module): | |
def __init__(self, name, num_classes=1): | |
super(CLIPModel, self).__init__() | |
self.model, self.preprocess = clip.load(name, device="cpu") # self.preprocess will not be used during training, which is handled in Dataset class | |
self.fc = nn.Linear( CHANNELS[name], num_classes ) | |
def forward(self, x, return_feature=False): | |
features = self.model.encode_image(x) | |
if return_feature: | |
return features | |
return self.fc(features) | |