File size: 3,800 Bytes
49bceed |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
import os
import sys
sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))
import timm
import torch
import torch.nn as nn
from transformers import CLIPModel as CLIPTransformersModel
from utils import configs
from utils.functional import check_data_type_variable, get_device
class CLIPModel(nn.Module):
def __init__(
self,
model_clip_name: str,
freeze_model: bool,
pretrained_model: bool,
num_classes: int,
):
super().__init__()
self.model_clip_name = model_clip_name
self.freeze_model = freeze_model
self.pretrained_model = pretrained_model
self.num_classes = num_classes
self.device = get_device()
self.check_arguments()
self.init_model()
def check_arguments(self):
check_data_type_variable(self.model_clip_name, str)
check_data_type_variable(self.freeze_model, bool)
check_data_type_variable(self.pretrained_model, bool)
check_data_type_variable(self.num_classes, int)
if self.model_clip_name != configs.CLIP_NAME_MODEL:
raise ValueError(
f"Model clip name must be {configs.CLIP_NAME_MODEL}, but it is {self.model_clip_name}"
)
def init_model(self):
clip_model = CLIPTransformersModel.from_pretrained(self.model_clip_name)
for layer in clip_model.children():
if hasattr(layer, "reset_parameters") and not self.pretrained_model:
layer.reset_parameters()
for param in clip_model.parameters():
param.required_grad = False if not self.freeze_model else True
self.vision_model = clip_model.vision_model.to(self.device)
self.visual_projection = clip_model.visual_projection.to(self.device).to(
self.device
)
self.classifier = nn.Linear(
512, 1 if self.num_classes in (1, 2) else self.num_classes
).to(self.device)
def forward(self, x: torch.Tensor) -> torch.Tensor:
x = self.vision_model(x)
x = self.visual_projection(x.pooler_output)
x = self.classifier(x)
return x
class TorchModel(nn.Module):
def __init__(
self,
name_model: str,
freeze_model: bool,
pretrained_model: bool,
num_classes: int,
):
super().__init__()
self.name_model = name_model
self.freeze_model = freeze_model
self.pretrained_model = pretrained_model
self.num_classes = num_classes
self.device = get_device()
self.check_arguments()
self.init_model()
def check_arguments(self):
check_data_type_variable(self.name_model, str)
check_data_type_variable(self.freeze_model, bool)
check_data_type_variable(self.pretrained_model, bool)
check_data_type_variable(self.num_classes, int)
if self.name_model not in tuple(configs.NAME_MODELS.keys()):
raise ValueError(
f"Name model must be in {tuple(configs.NAME_MODELS.keys())}, but it is {self.name_model}"
)
def init_model(self):
self.model = timm.create_model(
self.name_model, pretrained=self.pretrained_model, num_classes=0
).to(self.device)
for param in self.model.parameters():
param.required_grad = False if not self.freeze_model else True
self.classifier = nn.Linear(
self.model.num_features,
1 if self.num_classes in (1, 2) else self.num_classes,
).to(self.device)
def forward(self, x: torch.Tensor) -> torch.Tensor:
x = self.model(x)
x = self.classifier(x)
return x
|