# model_part5_deploy.py
# 01. Import Packages {{{
import sys
import os
import time
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import timm
from timm.data import resolve_data_config
from timm.data.transforms_factory import create_transform
import pandas as pd
import numpy as np
from PIL import Image
from glob import glob
import gc
import albumentations as A
import gradio as gr
# Gradio wrap {{{
title = "🐱用AI给小可爱们打个分🐶"
description = """
article = "此模型使用已被打分的喵/汪照片进行训练.所以目前只支持喵星人和汪星人打分。了解更多,请联系作者.微信:Roy_Ma_US."
def score(input_img):
thefile = input_img
# 02. Model constants {{{
class Config:
im_size = 224
batch_size = 1
num_workers = 0
# }}}
# 03. Define Dataset {{{
class PetDataset(Dataset):
def __init__(self, image_filepaths, targets, transform=None):
self.image_filepaths = image_filepaths
self.targets = targets
self.transform = transform
def __len__(self):
return len(self.image_filepaths)
def __getitem__(self, idx):
image_filepath = self.image_filepaths
with open(image_filepath, 'rb') as f:
image = Image.open(f)
image_rgb = image.convert('RGB') # convert image to RGB
image = np.array(image_rgb) # convert image to ndarray; current shape (H, W, C)
if self.transform is not None:
image = self.transform(image = image)["image"]
image = image / 255 # normalize to [0, 1]
image = np.transpose(image, (2, 0, 1)).astype(np.float32) # from HWC to CHW
target = self.targets
image = torch.tensor(image, dtype = torch.float)
target = torch.tensor(target, dtype = torch.float)
return image, target
def inference_fixed_transforms(mode=0, dim = 224):
if mode == 0: # keep original aspects, colors and angles
return A.Compose([
A.SmallestMaxSize(max_size=dim, p=1.0), # scale image with small edge = dim(224), with 100% probability
A.CenterCrop(height=dim, width=dim, p=1.0), # crop out a 224*224 area of the center, with 100% probability
], p=1.0)
elif mode == 1: # enlarge and flip
return A.Compose([
A.SmallestMaxSize(max_size=dim+16, p=1.0),
A.CenterCrop(height=dim, width=dim, p=1.0),
A.HorizontalFlip(p = 1.0)
], p=1.0)
# }}}
# 04. Define model class {{{
class PetNet(nn.Module):
def __init__(self, model_name, out_features = 1, inp_channels = 3, pretrained = False): # able to load weights into the layers
super().__init__() # inherit from nn.Module
self.model = timm.create_model(model_name, pretrained=False, in_chans=3, num_classes = 1) # create timm model with same init parameters
def forward(self, image):
output = self.model(image) # forward pass
return output # we have 1 out_feature here so the output is (batch_size, 1)
def tta_fn(filepaths, model, im_size, ttas=[0, 1]): # tta = Test Time Augmentation. Apply augs to testsets, inference and return an emsemble result
print('Image Size:', im_size)
model.eval() # set to eval mode
tta_preds = [] # create a list to store predictions
for tta_mode in ttas: # switch between 0 and 1 tta mode. current only mode 1 is passed into func
print(f'tta mode:{tta_mode}')
test_dataset = PetDataset(image_filepaths = filepaths, # full file path to 40 test images
targets = np.zeros(1), # create targets for testset, which is all 0
transform = inference_fixed_transforms(tta_mode, dim = im_size) # mode 1 transf is applied
test_loader = DataLoader(test_dataset, # just using torch's default dataloader
batch_size = Config.batch_size,
shuffle = False,
num_workers = Config.num_workers,
pin_memory = True
#stream = tqdm(test_loader)
tta_pred = []
for images, target in test_loader: # DataLoader loads batch_size # of images and targets at a time
images = images.float() # non_blocking may help with bottle neck if training include asynchronous data transfer
target = target.float().view(-1, 1) # -1 in view() is a placeholder. view(unkownn, 1), will always give the dims args passed to view()
with torch.no_grad(): # disable gradients. Note above model.eval()
output = model(images) # make raw prediction in inference mode
pred = (torch.sigmoid(output).detach().numpy() * 100).ravel().tolist() # detach creates a copy w/ no_grad; ravel = flatten; pred=16 values numpy list
tta_pred.extend(pred) # simliar to append, but also works with iterables, adds all element of the iterable
tta_preds.append(np.array(tta_pred)) # shape(40, 1)
fold_preds = tta_preds[0] # take only the 40 values, like flattening the list
for n in range(1, len(tta_preds)):
fold_preds += tta_preds[n]
fold_preds /= len(tta_preds)
del test_loader, test_dataset
return fold_preds
# }}}
# 05. Inference 1 {{{
test_preds = []
test_preds_model = []
test_preds_fold = []
model = PetNet(model_name = 'swin_large_patch4_window7_224', out_features = 1, inp_channels = 3, pretrained=False)
model.load_state_dict(torch.load('swin_large_patch4_window7_224_fold0_half.pth', map_location=torch.device('cpu')))
model = model.float()
test_preds_fold = tta_fn(thefile, model, Config.im_size, [1])
final_predictions53 = np.mean(np.array(test_preds_model), axis=0)
# }}}
# 06. Inference 2 {{{
test_preds = []
test_preds_model = []
test_preds_fold = []
model = PetNet(model_name = 'beit_large_patch16_224', out_features = 1, inp_channels = 3, pretrained=False)
model.load_state_dict(torch.load('beit_large_patch16_224_fold0_half.pth', map_location=torch.device('cpu')))
model = model.float()
test_preds_fold = tta_fn(thefile, model, Config.im_size, [0])
final_predictions55 = np.mean(np.array(test_preds_model), axis=0)
# }}}
# 07. Inference 3 {{{
test_preds = []
test_preds_model = []
test_preds_fold = []
model = PetNet(model_name = 'swin_large_patch4_window12_384_in22k', out_features = 1, inp_channels = 3, pretrained=False)
model.load_state_dict(torch.load('swin_large_patch4_window12_384_in22k_fold0_half.pth', map_location=torch.device('cpu')))
model = model.float()
test_preds_fold = tta_fn(thefile, model, 384, [0])
final_predictions66 = np.mean(np.array(test_preds_model), axis=0)
# 08. Inference 4 {{{
class PetNet_exp77(nn.Module):
def __init__(self, model_name, out_features = 1, inp_channels = 3, pretrained = False):
NC = 1000
self.model = timm.create_model(model_name, pretrained=False)
self.dropout = nn.Dropout(0.05)
self.head = nn.Linear(NC, 1)
def forward(self, image):
output = self.model(image)
output = self.dropout(output)
output = self.head(output)
return output
test_preds = []
test_preds_model = []
test_preds_fold = []
model = PetNet_exp77(model_name = 'beit_large_patch16_224', out_features = 1, inp_channels = 3, pretrained=False)
model.load_state_dict(torch.load('beit_large_patch16_224_fold1_half.pth', map_location=torch.device('cpu')))
model = model.float()
test_preds_fold = tta_fn(thefile, model, Config.im_size, [0])
final_predictions77 = np.mean(np.array(test_preds_model), axis=0)
# 09. Final predicted scores {{{
final_predictions = (3*final_predictions53 +
4*final_predictions55 +
3*final_predictions66 +
) / (3+4+3+4) # take the mean of all predictions
boosted = (final_predictions + 20).round(2)
boosted = boosted[0]
if boosted > 80:
boosted = str(boosted)
return boosted+"分! 美照!🥰"
boosted = str(boosted)
return "大数据说"+boosted+"分. 多拍几张试试?🤗"
iface = gr.Interface(fn=score,
inputs=gr.inputs.Image(label="给哪位小可爱打分?😉", type='filepath'), # input_img.shape: h, w, c, if type=numpy
outputs=gr.outputs.Textbox(label="得分是...✨", type='str'),
title = title, description = description, article = article,