# general setup
import os
os.system('pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.9/index.html')
os.system('pip install opencv-python')

# setup detectron2 logger
import torch, detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import os, json, cv2
import pandas as pd
from PIL import Image
from torchvision import transforms
from torchvision import models
from torch import nn

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog

# import gradio
import gradio as gr

# set device
DEVICE = 'cpu'

# load model
model = models.resnet18(pretrained=True)
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 5)

# insert trained paramters
model.load_state_dict(torch.load('model_modernity.pth', map_location=torch.device('cpu')))

# enable model eval
model.eval()

# define mean and std of resent training data
mean = [0.485, 0.456, 0.406]
std=[0.229, 0.224, 0.225]

# define transforms
test_transform = transforms.Compose([
                                transforms.Resize((224,224)),
                                transforms.ToTensor(),
                                transforms.Normalize(mean=mean,
                                                    std=std)
])

# define input and outputs
i1 = gr.inputs.Image(type="numpy", label="Input image")
o1 = gr.outputs.Image(type="pil", label="Cropped image")
o2 = gr.outputs.Textbox(label="Modernity score")

# define function to be called by gradio interface
def modernity(im):

  # create detectron2 config and detectron2 DefaultPredictor to run inference on image
  cfg = get_cfg()
  cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
  cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
  cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
  cfg.MODEL.DEVICE='cpu'
  predictor = DefaultPredictor(cfg)
  outputs = predictor(im)

  # get all masks of input image
  masks = outputs['instances'].pred_masks.to('cpu').numpy()

  # create empty lists for objects names and object sizes
  obj = []
  obj_size = []

  # iterate over all detected objects in input image to obtain object names and object sizes
  for idx, data in enumerate(outputs['instances'].pred_classes):
    num = data.item()
    obj.append(MetadataCatalog.get(cfg.DATASETS.TRAIN[0]).thing_classes[num])
    obj_size.append(masks[idx].sum())

  # define output if there is no automobile detected
  if 'car' not in obj:
  
    # return image with all detected objects highlighted
    v = Visualizer(im[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
    out = v.draw_instance_predictions(outputs["instances"].to('cpu'))
    img = (out.get_image()[:, :, ::-1])
    
    # return message
    out = 'No automobiles were found in the image.'

  else:
  
    # create data frame containing all object names and sizes
    objects = pd.DataFrame({'obj': obj, 
                       'obj_size': obj_size})
                       
    # get mask of the largest object that is labeled as car
    item_mask = masks[objects[objects['obj'] == 'car']['obj_size'].idxmax()]

    # create segmentation
    segmentation = np.where(item_mask == True)

    # get x and y boundaries
    x_min = int(np.min(segmentation[1]))
    x_max = int(np.max(segmentation[1]))
    y_min = int(np.min(segmentation[0]))
    y_max = int(np.max(segmentation[0]))

    # create cropped image
    cropped = Image.fromarray(im[y_min:y_max, x_min:x_max, :], mode='RGB')

    # create mask
    mask = Image.fromarray((item_mask * 255).astype('uint8'))
    
    # create cropped mask
    cropped_mask = mask.crop((x_min, y_min, x_max, y_max))

    # create background
    background = Image.new(mode='RGB', size=cropped_mask.size, color='white')
    
    # define paste position
    paste_position = (0,0)

    # create foreground image 
    new_fg_image = Image.new('RGB', background.size)
    new_fg_image.paste(cropped, paste_position)
    
    # composite final image
    img = Image.composite(new_fg_image, background, cropped_mask)

    # apply previously defined transformations
    img_t = test_transform(img).to(DEVICE)
    
    # feed transformed image to the model
    out = model(img_t[None, :])
    
    # apply softmax
    softmax = nn.Softmax(dim=1)
    out = softmax(out)
    
    # get label classes
    label_classes=torch.tensor([0,1,2,3,4]).to(DEVICE)
    
    # compute modernity score
    out = round((label_classes * out).sum(axis=1).item(),1)

  return img, out

# set interface title
title = 'Design Modernity of Automobiles'

# set interface description
description = "Demo for design modernity of automobiles. To use it, simply upload your image, or click one of the examples to load them."

# include example images
examples = [['input.jpg'],['input1.jpg']]

# define interface
interface = gr.Interface(modernity,inputs=i1, outputs=[o1, o2], title=title, description=description, examples=examples, cache_examples=False)

# launch interface
interface.launch()