import cv2 import numpy as np import torch from detectron2 import model_zoo from detectron2.config import get_cfg from detectron2.engine import DefaultPredictor import gradio as gr from PIL import Image from torchvision import transforms, models from torchcam.methods import GradCAM from torchcam.utils import overlay_mask from torchvision.transforms.functional import to_pil_image # model modernity model_mod = models.resnet18(weights='DEFAULT') # Freeze layers up 4.1 and add hidden layer layer = 'layer4.1' for param in model_mod.parameters(): param.requires_grad = False freeze = False for name, param in model_mod.named_parameters(): if layer in name: freeze = True param.requires_grad = freeze # add last layers num_classes = 5 model_mod.fc = torch.nn.Sequential( torch.nn.Linear(model_mod.fc.in_features, 4096), torch.nn.ReLU(), torch.nn.Linear(4096, num_classes) ) model_mod.load_state_dict(torch.load('model_11.pth',map_location=torch.device('cpu'))) model_mod.to('cpu') model_mod.eval() # model typicality model_typ = models.resnet18(weights='DEFAULT') # Freeze layers up 4.1 and add hidden layer layer = 'layer4.1' for param in model_typ.parameters(): param.requires_grad = False freeze = False for name, param in model_typ.named_parameters(): if layer in name: freeze = True param.requires_grad = freeze # add last layers num_classes = 5 model_typ.fc = torch.nn.Sequential( torch.nn.Linear(model_typ.fc.in_features, 4096), torch.nn.ReLU(), torch.nn.Linear(4096, num_classes) ) model_typ.load_state_dict(torch.load('model_task2.pth',map_location=torch.device('cpu'))) model_typ.to('cpu') model_typ.eval() ################ val_test_transform = transforms.Compose([ transforms.Resize((224,224)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) ################# def crop(image): inp = Image.fromarray(image) # initialize detectron2 cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.8 cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml") cfg.MODEL.DEVICE = "cpu" predictor = DefaultPredictor(cfg) # segment outputs = predictor(image) instances = outputs["instances"] # handle no cars found car_indices = (instances.pred_classes == 2).nonzero(as_tuple=True)[0] # COCO class 2 corresponds to 'car' if len(car_indices) == 0: return np.ones_like(image) * 255, "No cars were found.", np.ones_like(image) * 255, np.ones_like(image) * 255 # get largest car largest_car_idx = car_indices[instances.pred_masks[car_indices].sum(dim=(1, 2)).argmax()] car_mask = instances.pred_masks[largest_car_idx].cpu().numpy() car_mask_pil = Image.fromarray((car_mask * 255).astype(np.uint8)) # paste onto white image with original size transparent_image = Image.new("RGBA", inp.size, (0, 0, 0, 0)) transparent_image.paste(inp, (0, 0), car_mask_pil) # cut out car again and paste onto right size pos = np.where(car_mask) y1, y2 = np.min(pos[0]), np.max(pos[0]) x1, x2 = np.min(pos[1]), np.max(pos[1]) car_region = transparent_image.crop((x1, y1, x2+1, y2+1)) # scale accordingly car_width, car_height = car_region.size if car_height > car_width: new_height = 224 new_width = int(car_width * 224 / car_height) else: new_width = 224 new_height = int(car_height * 224 / car_width) resized_car = car_region.resize((new_width, new_height)) new_image = Image.new("RGB", (224, 224), (255, 255, 255)) # right position paste_x = (224 - new_width) // 2 paste_y = (224 - new_height) // 2 new_image.paste(resized_car, (paste_x, paste_y), resized_car) # make predictions transformed_image = val_test_transform(new_image).unsqueeze(0) # get heatmap (and outputs) for typicality with GradCAM(model_mod, target_layer='layer4') as cam_extractor: outputs_mod = model_mod(transformed_image) _, predicted_mod = torch.max(outputs_mod, 1) activation_map_mod = cam_extractor(outputs_mod.squeeze(0).argmax().item(), outputs_mod) # Overlay the heatmap on the original image heatmap_mod = overlay_mask(new_image, to_pil_image(activation_map_mod[0].squeeze(0), mode='F'), alpha=0.5) # get heatmap (and outputs) for typicality with GradCAM(model_typ, target_layer='layer4') as cam_extractor: outputs_typ = model_typ(transformed_image) _, predicted_typ = torch.max(outputs_typ, 1) activation_map_typ = cam_extractor(outputs_typ.squeeze(0).argmax().item(), outputs_typ) # Overlay the heatmap on the original image heatmap_mod = overlay_mask(new_image, to_pil_image(activation_map_mod[0].squeeze(0), mode='F'), alpha=0.5) heatmap_typ = overlay_mask(new_image, to_pil_image(activation_map_typ[0].squeeze(0), mode='F'), alpha=0.5) return new_image, "Car detected! Typicality rating: " + str(predicted_typ.item()) + ", Modernity rating: " + str(predicted_mod.item()), heatmap_mod, heatmap_typ myexamples = [['BMW.jpg'], ['Pferd.jpeg']] interface = gr.Interface(crop, inputs='image', outputs=[gr.Image(), gr.Textbox(), gr.Image(), gr.Image()], title='Cars', description='Schaun ma ma was wird', examples=myexamples) interface.launch()