pet_score / app.py
Roll20's picture
app.py
b258ef2
# model_part5_deploy.py
# 01. Import Packages {{{
import sys
sys.path.append('.')
import os
import time
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import timm
from timm.data import resolve_data_config
from timm.data.transforms_factory import create_transform
import pandas as pd
import numpy as np
from PIL import Image
from glob import glob
import gc
import albumentations as A
import gradio as gr
#}}}
# Gradio wrap {{{
title = "🐱用AI给小可爱们打个分🐶"
description = """
<center>
<p style="text-align:center">这是一个用AI来判断你小可爱照片有多受网友欢迎的小工具。<br>
作者希望能帮助动物救助组织给流浪的猫猫狗狗们更快找到一个温暖的家🤗<br>
因为输入图片总共要跑4个算法,推导出综合分数,所以有点慢😥,大家请担待👐🏼<br>
此模型使用已被打分的喵/汪照片进行训练.所以目前只支持喵星人和汪星人打分😸🐶<br>
其他小动物不能保证分数准确度😓
</center>
"""
article = "了解更多,请联系作者.微信:Roy_Ma_US."
def score(input_img):
thefile = input_img
# 02. Model constants {{{
class Config:
im_size = 224
batch_size = 1
num_workers = 0
# }}}
# 03. Define Dataset {{{
class PetDataset(Dataset):
def __init__(self, image_filepaths, targets, transform=None):
self.image_filepaths = image_filepaths
self.targets = targets
self.transform = transform
def __len__(self):
return len(self.image_filepaths)
def __getitem__(self, idx):
image_filepath = self.image_filepaths
with open(image_filepath, 'rb') as f:
image = Image.open(f)
image_rgb = image.convert('RGB') # convert image to RGB
image = np.array(image_rgb) # convert image to ndarray; current shape (H, W, C)
if self.transform is not None:
image = self.transform(image = image)["image"]
image = image / 255 # normalize to [0, 1]
image = np.transpose(image, (2, 0, 1)).astype(np.float32) # from HWC to CHW
target = self.targets
image = torch.tensor(image, dtype = torch.float)
target = torch.tensor(target, dtype = torch.float)
return image, target
def inference_fixed_transforms(mode=0, dim = 224):
if mode == 0: # keep original aspects, colors and angles
return A.Compose([
A.SmallestMaxSize(max_size=dim, p=1.0), # scale image with small edge = dim(224), with 100% probability
A.CenterCrop(height=dim, width=dim, p=1.0), # crop out a 224*224 area of the center, with 100% probability
], p=1.0)
elif mode == 1: # enlarge and flip
return A.Compose([
A.SmallestMaxSize(max_size=dim+16, p=1.0),
A.CenterCrop(height=dim, width=dim, p=1.0),
A.HorizontalFlip(p = 1.0)
], p=1.0)
# }}}
# 04. Define model class {{{
class PetNet(nn.Module):
def __init__(self, model_name, out_features = 1, inp_channels = 3, pretrained = False): # able to load weights into the layers
super().__init__() # inherit from nn.Module
self.model = timm.create_model(model_name, pretrained=False, in_chans=3, num_classes = 1) # create timm model with same init parameters
def forward(self, image):
output = self.model(image) # forward pass
return output # we have 1 out_feature here so the output is (batch_size, 1)
def tta_fn(filepaths, model, im_size, ttas=[0, 1]): # tta = Test Time Augmentation. Apply augs to testsets, inference and return an emsemble result
print('Image Size:', im_size)
model.eval() # set to eval mode
tta_preds = [] # create a list to store predictions
for tta_mode in ttas: # switch between 0 and 1 tta mode. current only mode 1 is passed into func
print(f'tta mode:{tta_mode}')
test_dataset = PetDataset(image_filepaths = filepaths, # full file path to 40 test images
targets = np.zeros(1), # create targets for testset, which is all 0
transform = inference_fixed_transforms(tta_mode, dim = im_size) # mode 1 transf is applied
)
test_loader = DataLoader(test_dataset, # just using torch's default dataloader
batch_size = Config.batch_size,
shuffle = False,
num_workers = Config.num_workers,
pin_memory = True
)
#stream = tqdm(test_loader)
tta_pred = []
for images, target in test_loader: # DataLoader loads batch_size # of images and targets at a time
images = images.float() # non_blocking may help with bottle neck if training include asynchronous data transfer
target = target.float().view(-1, 1) # -1 in view() is a placeholder. view(unkownn, 1), will always give the dims args passed to view()
with torch.no_grad(): # disable gradients. Note above model.eval()
output = model(images) # make raw prediction in inference mode
pred = (torch.sigmoid(output).detach().numpy() * 100).ravel().tolist() # detach creates a copy w/ no_grad; ravel = flatten; pred=16 values numpy list
tta_pred.extend(pred) # simliar to append, but also works with iterables, adds all element of the iterable
break
tta_preds.append(np.array(tta_pred)) # shape(40, 1)
fold_preds = tta_preds[0] # take only the 40 values, like flattening the list
for n in range(1, len(tta_preds)):
fold_preds += tta_preds[n]
fold_preds /= len(tta_preds)
del test_loader, test_dataset
gc.collect()
return fold_preds
# }}}
# 05. Inference 1 {{{
test_preds = []
test_preds_model = []
test_preds_fold = []
model = PetNet(model_name = 'swin_large_patch4_window7_224', out_features = 1, inp_channels = 3, pretrained=False)
model.load_state_dict(torch.load('swin_large_patch4_window7_224_fold0_half.pth', map_location=torch.device('cpu')))
model = model.float()
model.eval()
test_preds_fold = tta_fn(thefile, model, Config.im_size, [1])
test_preds_model.append(test_preds_fold)
final_predictions53 = np.mean(np.array(test_preds_model), axis=0)
# }}}
# 06. Inference 2 {{{
test_preds = []
test_preds_model = []
test_preds_fold = []
model = PetNet(model_name = 'beit_large_patch16_224', out_features = 1, inp_channels = 3, pretrained=False)
model.load_state_dict(torch.load('beit_large_patch16_224_fold0_half.pth', map_location=torch.device('cpu')))
model = model.float()
model.eval()
test_preds_fold = tta_fn(thefile, model, Config.im_size, [0])
test_preds_model.append(test_preds_fold)
final_predictions55 = np.mean(np.array(test_preds_model), axis=0)
# }}}
# 07. Inference 3 {{{
test_preds = []
test_preds_model = []
test_preds_fold = []
model = PetNet(model_name = 'swin_large_patch4_window12_384_in22k', out_features = 1, inp_channels = 3, pretrained=False)
model.load_state_dict(torch.load('swin_large_patch4_window12_384_in22k_fold0_half.pth', map_location=torch.device('cpu')))
model = model.float()
model.eval()
test_preds_fold = tta_fn(thefile, model, 384, [0])
test_preds_model.append(test_preds_fold)
final_predictions66 = np.mean(np.array(test_preds_model), axis=0)
#}}}
# 08. Inference 4 {{{
class PetNet_exp77(nn.Module):
def __init__(self, model_name, out_features = 1, inp_channels = 3, pretrained = False):
super().__init__()
NC = 1000
self.model = timm.create_model(model_name, pretrained=False)
self.dropout = nn.Dropout(0.05)
self.head = nn.Linear(NC, 1)
def forward(self, image):
output = self.model(image)
output = self.dropout(output)
output = self.head(output)
return output
test_preds = []
test_preds_model = []
test_preds_fold = []
model = PetNet_exp77(model_name = 'beit_large_patch16_224', out_features = 1, inp_channels = 3, pretrained=False)
model.load_state_dict(torch.load('beit_large_patch16_224_fold1_half.pth', map_location=torch.device('cpu')))
model = model.float()
model.eval()
test_preds_fold = tta_fn(thefile, model, Config.im_size, [0])
test_preds_model.append(test_preds_fold)
final_predictions77 = np.mean(np.array(test_preds_model), axis=0)
#}}}
# 09. Final predicted scores {{{
final_predictions = (3*final_predictions53 +
4*final_predictions55 +
3*final_predictions66 +
4*final_predictions77
) / (3+4+3+4) # take the mean of all predictions
boosted = (final_predictions + 20).round(2)
boosted = boosted[0]
#}}}
if boosted > 80:
boosted = str(boosted)
return boosted+"分! 美照!🥰"
else:
boosted = str(boosted)
return "大数据说"+boosted+"分. 多拍几张试试?🤗"
iface = gr.Interface(fn=score,
inputs=gr.inputs.Image(label="给哪位小可爱打分?😉", type='filepath'), # input_img.shape: h, w, c, if type=numpy
outputs=gr.outputs.Textbox(label="得分是...✨", type='str'),
allow_flagging="never",
title = title, description = description, article = article,
)
iface.launch()
#}}}