import cv2
import numpy as np
import torch
from detectron2 import model_zoo
from detectron2.config import get_cfg
from detectron2.engine import DefaultPredictor
import gradio as gr
from PIL import Image
from torchvision import transforms, models
from torchcam.methods import GradCAM
from torchcam.utils import overlay_mask
from torchvision.transforms.functional import to_pil_image


# model modernity
model_mod = models.resnet18(weights='DEFAULT')

# Freeze layers up 4.1 and add hidden layer
layer = 'layer4.1'
for param in model_mod.parameters():
    param.requires_grad = False
freeze = False
for name, param in model_mod.named_parameters():
    if layer in name:
        freeze = True
    param.requires_grad = freeze

# add last layers
num_classes = 5
model_mod.fc = torch.nn.Sequential(
    torch.nn.Linear(model_mod.fc.in_features, 4096),
    torch.nn.ReLU(),
    torch.nn.Linear(4096, num_classes)
)

model_mod.load_state_dict(torch.load('model_11.pth',map_location=torch.device('cpu')))
model_mod.to('cpu')
model_mod.eval()


# model typicality
model_typ = models.resnet18(weights='DEFAULT')

# Freeze layers up 4.1 and add hidden layer
layer = 'layer4.1'
for param in model_typ.parameters():
    param.requires_grad = False
freeze = False
for name, param in model_typ.named_parameters():
    if layer in name:
        freeze = True
    param.requires_grad = freeze

# add last layers
num_classes = 5
model_typ.fc = torch.nn.Sequential(
    torch.nn.Linear(model_typ.fc.in_features, 4096),
    torch.nn.ReLU(),
    torch.nn.Linear(4096, num_classes)
)

model_typ.load_state_dict(torch.load('model_task2.pth',map_location=torch.device('cpu')))
model_typ.to('cpu')
model_typ.eval()

################

val_test_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
#################

def crop(image):
    inp = Image.fromarray(image)
    
    # initialize detectron2
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.8
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
    cfg.MODEL.DEVICE = "cpu"
    predictor = DefaultPredictor(cfg)
    
    # segment
    outputs = predictor(image)
    instances = outputs["instances"]
    
    # handle no cars found
    car_indices = (instances.pred_classes == 2).nonzero(as_tuple=True)[0]  # COCO class 2 corresponds to 'car'
    if len(car_indices) == 0:
        return np.ones_like(image) * 255, "No cars were found.",  np.ones_like(image) * 255, np.ones_like(image) * 255

    # get largest car
    largest_car_idx = car_indices[instances.pred_masks[car_indices].sum(dim=(1, 2)).argmax()]
    car_mask = instances.pred_masks[largest_car_idx].cpu().numpy()
    car_mask_pil = Image.fromarray((car_mask * 255).astype(np.uint8))

    # paste onto white image with original size
    transparent_image = Image.new("RGBA", inp.size, (0, 0, 0, 0))
    transparent_image.paste(inp, (0, 0), car_mask_pil)

    # cut out car again and paste onto right size
    pos = np.where(car_mask)
    y1, y2 = np.min(pos[0]), np.max(pos[0])
    x1, x2 = np.min(pos[1]), np.max(pos[1])
    car_region = transparent_image.crop((x1, y1, x2+1, y2+1))

    # scale accordingly
    car_width, car_height = car_region.size
    if car_height > car_width:
        new_height = 224
        new_width = int(car_width * 224 / car_height)
    else:
        new_width = 224
        new_height = int(car_height * 224 / car_width)

    resized_car = car_region.resize((new_width, new_height))

    new_image = Image.new("RGB", (224, 224), (255, 255, 255))
    # right position
    paste_x = (224 - new_width) // 2
    paste_y = (224 - new_height) // 2
    new_image.paste(resized_car, (paste_x, paste_y), resized_car)
    
    
    # make predictions
    transformed_image = val_test_transform(new_image).unsqueeze(0)

    # get heatmap (and outputs) for typicality
    with GradCAM(model_mod, target_layer='layer4') as cam_extractor:
        outputs_mod = model_mod(transformed_image)
        _, predicted_mod = torch.max(outputs_mod, 1)
        activation_map_mod = cam_extractor(outputs_mod.squeeze(0).argmax().item(), outputs_mod)

    # Overlay the heatmap on the original image
    heatmap_mod = overlay_mask(new_image, to_pil_image(activation_map_mod[0].squeeze(0), mode='F'), alpha=0.5)
    
    # get heatmap (and outputs) for typicality
    with GradCAM(model_typ, target_layer='layer4') as cam_extractor:
        outputs_typ = model_typ(transformed_image)
        _, predicted_typ = torch.max(outputs_typ, 1)
        activation_map_typ = cam_extractor(outputs_typ.squeeze(0).argmax().item(), outputs_typ)

    # Overlay the heatmap on the original image
    heatmap_mod = overlay_mask(new_image, to_pil_image(activation_map_mod[0].squeeze(0), mode='F'), alpha=0.5)
    heatmap_typ = overlay_mask(new_image, to_pil_image(activation_map_typ[0].squeeze(0), mode='F'), alpha=0.5)

    return new_image, "Car detected! Typicality rating: " + str(predicted_typ.item()) + ", Modernity rating: " + str(predicted_mod.item()), heatmap_mod, heatmap_typ


myexamples = [['BMW.jpg'], ['Pferd.jpeg']]

interface = gr.Interface(crop, inputs='image', outputs=[gr.Image(), gr.Textbox(), gr.Image(), gr.Image()], 
                         title='Cars', 
                         description='Schaun ma ma was wird',
                         examples=myexamples)
interface.launch()