# model_part5_deploy.py # 01. Import Packages {{{ import sys sys.path.append('.') import os import time import torch import torch.nn as nn from torch.utils.data import DataLoader, Dataset import timm from timm.data import resolve_data_config from timm.data.transforms_factory import create_transform import pandas as pd import numpy as np from PIL import Image from glob import glob import gc import albumentations as A import gradio as gr #}}} # Gradio wrap {{{ title = "🐱用AI给小可爱们打个分🐶" description = """

这是一个用AI来判断你小可爱照片有多受网友欢迎的小工具。
作者希望能帮助动物救助组织给流浪的猫猫狗狗们更快找到一个温暖的家🤗
因为输入图片总共要跑4个算法,推导出综合分数,所以有点慢😥,大家请担待👐🏼
此模型使用已被打分的喵/汪照片进行训练.所以目前只支持喵星人和汪星人打分😸🐶
其他小动物不能保证分数准确度😓

""" article = "了解更多,请联系作者.微信:Roy_Ma_US." def score(input_img): thefile = input_img # 02. Model constants {{{ class Config: im_size = 224 batch_size = 1 num_workers = 0 # }}} # 03. Define Dataset {{{ class PetDataset(Dataset): def __init__(self, image_filepaths, targets, transform=None): self.image_filepaths = image_filepaths self.targets = targets self.transform = transform def __len__(self): return len(self.image_filepaths) def __getitem__(self, idx): image_filepath = self.image_filepaths with open(image_filepath, 'rb') as f: image = Image.open(f) image_rgb = image.convert('RGB') # convert image to RGB image = np.array(image_rgb) # convert image to ndarray; current shape (H, W, C) if self.transform is not None: image = self.transform(image = image)["image"] image = image / 255 # normalize to [0, 1] image = np.transpose(image, (2, 0, 1)).astype(np.float32) # from HWC to CHW target = self.targets image = torch.tensor(image, dtype = torch.float) target = torch.tensor(target, dtype = torch.float) return image, target def inference_fixed_transforms(mode=0, dim = 224): if mode == 0: # keep original aspects, colors and angles return A.Compose([ A.SmallestMaxSize(max_size=dim, p=1.0), # scale image with small edge = dim(224), with 100% probability A.CenterCrop(height=dim, width=dim, p=1.0), # crop out a 224*224 area of the center, with 100% probability ], p=1.0) elif mode == 1: # enlarge and flip return A.Compose([ A.SmallestMaxSize(max_size=dim+16, p=1.0), A.CenterCrop(height=dim, width=dim, p=1.0), A.HorizontalFlip(p = 1.0) ], p=1.0) # }}} # 04. Define model class {{{ class PetNet(nn.Module): def __init__(self, model_name, out_features = 1, inp_channels = 3, pretrained = False): # able to load weights into the layers super().__init__() # inherit from nn.Module self.model = timm.create_model(model_name, pretrained=False, in_chans=3, num_classes = 1) # create timm model with same init parameters def forward(self, image): output = self.model(image) # forward pass return output # we have 1 out_feature here so the output is (batch_size, 1) def tta_fn(filepaths, model, im_size, ttas=[0, 1]): # tta = Test Time Augmentation. Apply augs to testsets, inference and return an emsemble result print('Image Size:', im_size) model.eval() # set to eval mode tta_preds = [] # create a list to store predictions for tta_mode in ttas: # switch between 0 and 1 tta mode. current only mode 1 is passed into func print(f'tta mode:{tta_mode}') test_dataset = PetDataset(image_filepaths = filepaths, # full file path to 40 test images targets = np.zeros(1), # create targets for testset, which is all 0 transform = inference_fixed_transforms(tta_mode, dim = im_size) # mode 1 transf is applied ) test_loader = DataLoader(test_dataset, # just using torch's default dataloader batch_size = Config.batch_size, shuffle = False, num_workers = Config.num_workers, pin_memory = True ) #stream = tqdm(test_loader) tta_pred = [] for images, target in test_loader: # DataLoader loads batch_size # of images and targets at a time images = images.float() # non_blocking may help with bottle neck if training include asynchronous data transfer target = target.float().view(-1, 1) # -1 in view() is a placeholder. view(unkownn, 1), will always give the dims args passed to view() with torch.no_grad(): # disable gradients. Note above model.eval() output = model(images) # make raw prediction in inference mode pred = (torch.sigmoid(output).detach().numpy() * 100).ravel().tolist() # detach creates a copy w/ no_grad; ravel = flatten; pred=16 values numpy list tta_pred.extend(pred) # simliar to append, but also works with iterables, adds all element of the iterable break tta_preds.append(np.array(tta_pred)) # shape(40, 1) fold_preds = tta_preds[0] # take only the 40 values, like flattening the list for n in range(1, len(tta_preds)): fold_preds += tta_preds[n] fold_preds /= len(tta_preds) del test_loader, test_dataset gc.collect() return fold_preds # }}} # 05. Inference 1 {{{ test_preds = [] test_preds_model = [] test_preds_fold = [] model = PetNet(model_name = 'swin_large_patch4_window7_224', out_features = 1, inp_channels = 3, pretrained=False) model.load_state_dict(torch.load('swin_large_patch4_window7_224_fold0_half.pth', map_location=torch.device('cpu'))) model = model.float() model.eval() test_preds_fold = tta_fn(thefile, model, Config.im_size, [1]) test_preds_model.append(test_preds_fold) final_predictions53 = np.mean(np.array(test_preds_model), axis=0) # }}} # 06. Inference 2 {{{ test_preds = [] test_preds_model = [] test_preds_fold = [] model = PetNet(model_name = 'beit_large_patch16_224', out_features = 1, inp_channels = 3, pretrained=False) model.load_state_dict(torch.load('beit_large_patch16_224_fold0_half.pth', map_location=torch.device('cpu'))) model = model.float() model.eval() test_preds_fold = tta_fn(thefile, model, Config.im_size, [0]) test_preds_model.append(test_preds_fold) final_predictions55 = np.mean(np.array(test_preds_model), axis=0) # }}} # 07. Inference 3 {{{ test_preds = [] test_preds_model = [] test_preds_fold = [] model = PetNet(model_name = 'swin_large_patch4_window12_384_in22k', out_features = 1, inp_channels = 3, pretrained=False) model.load_state_dict(torch.load('swin_large_patch4_window12_384_in22k_fold0_half.pth', map_location=torch.device('cpu'))) model = model.float() model.eval() test_preds_fold = tta_fn(thefile, model, 384, [0]) test_preds_model.append(test_preds_fold) final_predictions66 = np.mean(np.array(test_preds_model), axis=0) #}}} # 08. Inference 4 {{{ class PetNet_exp77(nn.Module): def __init__(self, model_name, out_features = 1, inp_channels = 3, pretrained = False): super().__init__() NC = 1000 self.model = timm.create_model(model_name, pretrained=False) self.dropout = nn.Dropout(0.05) self.head = nn.Linear(NC, 1) def forward(self, image): output = self.model(image) output = self.dropout(output) output = self.head(output) return output test_preds = [] test_preds_model = [] test_preds_fold = [] model = PetNet_exp77(model_name = 'beit_large_patch16_224', out_features = 1, inp_channels = 3, pretrained=False) model.load_state_dict(torch.load('beit_large_patch16_224_fold1_half.pth', map_location=torch.device('cpu'))) model = model.float() model.eval() test_preds_fold = tta_fn(thefile, model, Config.im_size, [0]) test_preds_model.append(test_preds_fold) final_predictions77 = np.mean(np.array(test_preds_model), axis=0) #}}} # 09. Final predicted scores {{{ final_predictions = (3*final_predictions53 + 4*final_predictions55 + 3*final_predictions66 + 4*final_predictions77 ) / (3+4+3+4) # take the mean of all predictions boosted = (final_predictions + 20).round(2) boosted = boosted[0] #}}} if boosted > 80: boosted = str(boosted) return boosted+"分! 美照!🥰" else: boosted = str(boosted) return "大数据说"+boosted+"分. 多拍几张试试?🤗" iface = gr.Interface(fn=score, inputs=gr.inputs.Image(label="给哪位小可爱打分?😉", type='filepath'), # input_img.shape: h, w, c, if type=numpy outputs=gr.outputs.Textbox(label="得分是...✨", type='str'), allow_flagging="never", title = title, description = description, article = article, ) iface.launch() #}}}