Spaces:
Sleeping
Sleeping
File size: 2,102 Bytes
cefcefa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import torch
import clip
from PIL import Image
from typing import List, Union
from . import mlp
QUALITY_TO_RATING = {
'amazing': 10,
'best': 8.5,
'high': 7,
'normal': 5,
'low': 2.5,
'worst': 0,
'horrible': 0,
}
MODEL_TYPE = {
'mlp': mlp.MLP,
'res_mlp': mlp.ResMLP,
}
def quality_rating(img_info):
quality = (img_info.caption.quality or 'normal') if img_info.caption is not None else 'normal'
rating = QUALITY_TO_RATING[quality]
return rating
def get_model_cls(model_type) -> Union[mlp.MLP, None]:
return MODEL_TYPE.get(model_type, mlp.MLP)
def load_clip_models(name: str = "ViT-L/14", device='cuda'):
model2, preprocess = clip.load(name, device=device) # RN50x64
return model2, preprocess
def load_model(model_path: str = None, model_type=None, input_size=768, batch_norm: bool = True, device: str = 'cuda', dtype=None):
model_cls = get_model_cls(model_type)
print(f"Loading model from class `{model_cls}`...")
model_kwargs = {}
if model_type in ('large', 'res_large'):
model_kwargs['batch_norm'] = True
model = model_cls(input_size, **model_kwargs)
if model_path:
try:
s = torch.load(model_path, map_location=device)
model.load_state_dict(s)
except Exception as e:
print(f"Model type mismatch. Desired model type: `{model_type}` (model class: `{model_cls}`).")
raise e
model.to(device)
if dtype:
model = model.to(dtype=dtype)
return model
def normalized(a: torch.Tensor, order=2, dim=-1):
l2 = a.norm(order, dim, keepdim=True)
l2[l2 == 0] = 1
return a / l2
@torch.no_grad()
def encode_images(images: List[Image.Image], model2, preprocess, device='cuda') -> torch.Tensor:
if isinstance(images, Image.Image):
images = [images]
image_tensors = [preprocess(img).unsqueeze(0) for img in images]
image_batch = torch.cat(image_tensors).to(device)
image_features = model2.encode_image(image_batch)
im_emb_arr = normalized(image_features).cpu().float()
return im_emb_arr
|