Spaces:

Aurel-test
/

SegFormer-Model

Runtime error

App Files Files Community

SegFormer-Model / app.py

Aurel-test

Change theme

4436966 11 months ago

raw

history blame contribute delete

21.3 kB

	import gradio as gr
	import torch
	from transformers import SegformerForSemanticSegmentation, SegformerImageProcessor
	from PIL import Image
	import plotly.graph_objects as go
	import numpy as np
	import os
	import torch.nn as nn
	from sklearn.metrics import jaccard_score, accuracy_score
	from collections import Counter
	import matplotlib.pyplot as plt
	import seaborn as sns
	import torch.nn.functional as F
	import seaborn as sns
	from functools import partial
	from pytorch_grad_cam.utils.image import (
	show_cam_on_image,
	preprocess_image as grad_preprocess,
	)
	from pytorch_grad_cam import GradCAM
	import cv2
	import transformers
	from torchvision import transforms
	import albumentations as A

	device = "cuda" if torch.cuda.is_available() else "cpu"
	data_folder = "data_sample"
	id2label = {
	0: "void",
	1: "flat",
	2: "construction",
	3: "object",
	4: "nature",
	5: "sky",
	6: "human",
	7: "vehicle",
	}
	label2id = {v: k for k, v in id2label.items()}
	num_labels = len(id2label)
	checkpoint = "nvidia/segformer-b3-finetuned-cityscapes-1024-1024"
	image_processor = SegformerImageProcessor(do_resize=False)
	state_dict_path = f"runs/{checkpoint}/best_model.pt"
	model = SegformerForSemanticSegmentation.from_pretrained(
	checkpoint,
	num_labels=num_labels,
	id2label=id2label,
	label2id=label2id,
	ignore_mismatched_sizes=True,
	)
	loaded_state_dict = torch.load(
	state_dict_path, map_location=torch.device("cpu"), weights_only=True
	)
	model.load_state_dict(loaded_state_dict)
	model = model.to(device)
	model.eval()

	# ---- Partie Segmentation


	def load_and_prepare_images(image_name, segformer=False):
	"""
	Charge et prépare les images, les masques et les prédictions associées pour une image donnée.

	Args:
	image_name (str): Le nom du fichier de l'image à charger.
	segformer (bool, optional): Si True, prédit également le masque avec SegFormer. Par défaut False.

	Returns:
	tuple: Contient l'image originale redimensionnée, le masque réel, la prédiction FPN,
	et la prédiction SegFormer si `segformer` est True.
	"""
	image_path = os.path.join(data_folder, "images", image_name)
	mask_name = image_name.replace("_leftImg8bit.png", "_gtFine_labelIds.png")
	mask_path = os.path.join(data_folder, "masks", mask_name)
	fpn_pred_path = os.path.join(data_folder, "resnet101_mask", image_name)

	if not os.path.exists(image_path):
	raise FileNotFoundError(f"Image not found: {image_path}")
	if not os.path.exists(mask_path):
	raise FileNotFoundError(f"Mask not found: {mask_path}")
	if not os.path.exists(fpn_pred_path):
	raise FileNotFoundError(f"FPN prediction not found: {fpn_pred_path}")

	original_image = Image.open(image_path).convert("RGB")
	original = original_image.resize((1024, 512))
	true_mask = np.array(Image.open(mask_path))
	fpn_pred = np.array(Image.open(fpn_pred_path))
	if segformer:
	segformer_pred = predict_segmentation(original)
	return original, true_mask, fpn_pred, segformer_pred

	return original, true_mask, fpn_pred


	def predict_segmentation(image):
	"""
	Prédit la segmentation d'une image donnée à l'aide d'un modèle pré-entraîné.

	Args:
	image (PIL.Image.Image): L'image à segmenter.

	Returns:
	numpy.ndarray: La carte de segmentation prédite.
	"""

	inputs = image_processor(images=image, return_tensors="pt")
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model.to(device)
	pixel_values = inputs.pixel_values.to(device)

	with torch.no_grad():
	outputs = model(pixel_values=pixel_values)
	logits = outputs.logits

	upsampled_logits = nn.functional.interpolate(
	logits,
	size=image.size[::-1], # (height, width)
	mode="bilinear",
	align_corners=False,
	)
	pred_seg = upsampled_logits.argmax(dim=1)[0].cpu().numpy()

	return pred_seg


	def process_image(image_name):
	"""
	Traite une image en chargeant l'image originale, le masque réel, et les prédictions de masques.
	Envoie la liste de tuple à l'interface "Predictions" de Gradio

	Args:
	image_name (str): Le nom de l'image à traiter.

	Returns:
	list: Une liste de tuples contenant l'image et son titre associé.
	"""
	original, true_mask, fpn_pred, segformer_pred = load_and_prepare_images(
	image_name, segformer=True
	)
	true_mask_colored = colorize_mask(true_mask)
	true_mask_colored = Image.fromarray(true_mask_colored.astype("uint8"))
	true_mask_colored = true_mask_colored.resize((1024, 512))
	# fpn_pred_colored = colorize_mask(fpn_pred)
	segformer_pred_colored = colorize_mask(segformer_pred)
	segformer_pred_colored = Image.fromarray(segformer_pred_colored.astype("uint8"))
	segformer_pred_colored = segformer_pred_colored.resize((1024, 512))

	return [
	(original, "Image originale"),
	(true_mask_colored, "Masque réel"),
	(fpn_pred, "Prédiction FPN"),
	(segformer_pred_colored, "Prédiction SegFormer"),
	]


	def create_cityscapes_label_colormap():
	"""
	Crée une colormap pour les labels Cityscapes.

	Returns:
	numpy.ndarray: Un tableau 2D où chaque ligne représente la couleur RGB d'un label.
	"""
	colormap = np.zeros((256, 3), dtype=np.uint8)
	colormap[0] = [78, 82, 110]
	colormap[1] = [128, 64, 128]
	colormap[2] = [154, 156, 153]
	colormap[3] = [168, 167, 18]
	colormap[4] = [80, 108, 28]
	colormap[5] = [112, 164, 196]
	colormap[6] = [168, 28, 52]
	colormap[7] = [16, 18, 112]
	return colormap


	# Créer la colormap une fois
	cityscapes_colormap = create_cityscapes_label_colormap()


	def colorize_mask(mask):
	return cityscapes_colormap[mask]


	# ---- Fin Partie Segmentation

	# ---- Partie EDA


	def analyse_mask(real_mask, num_labels):
	"""
	Analyse la distribution des classes dans un masque réel.

	Args:
	real_mask (numpy.ndarray): Le masque de labels réels.
	num_labels (int): Le nombre total de classes.

	Returns:
	dict: Un dictionnaire contenant les proportions des classes dans le masque.
	"""
	counts = np.bincount(real_mask.ravel(), minlength=num_labels)
	total_pixels = real_mask.size
	class_proportions = counts / total_pixels
	return dict(enumerate(class_proportions))


	def show_eda(image_name):
	"""
	Affiche une analyse exploratoire de la distribution des classes pour une image et son masque associé.

	Args:
	image_name (str): Le nom de l'image à analyser.

	Returns:
	tuple: Contient l'image originale, le masque réel coloré et une figure Plotly représentant
	la distribution des classes.
	"""
	original_image, true_mask, _ = load_and_prepare_images(image_name)
	class_proportions = analyse_mask(true_mask, num_labels)
	cityscapes_colormap = create_cityscapes_label_colormap()
	true_mask_colored = colorize_mask(true_mask)
	true_mask_colored = Image.fromarray(true_mask_colored.astype("uint8"))
	true_mask_colored = true_mask_colored.resize((1024, 512))

	# Trier les classes par proportion croissante
	sorted_classes = sorted(
	class_proportions.keys(), key=lambda x: class_proportions[x]
	)

	# Préparer les données pour le barplot
	categories = [id2label[i] for i in sorted_classes]
	values = [class_proportions[i] for i in sorted_classes]
	color_list = [
	f"rgb({cityscapes_colormap[i][0]}, {cityscapes_colormap[i][1]}, {cityscapes_colormap[i][2]})"
	for i in sorted_classes
	]

	# Distribution des classes avec la colormap personnalisée
	fig = go.Figure()

	fig.add_trace(
	go.Bar(
	x=categories,
	y=values,
	marker_color=color_list,
	text=[f"{v:.2f}" for v in values],
	textposition="outside",
	)
	)

	# Ajouter un titre et des labels, modifier la rotation et la taille de la police
	fig.update_layout(
	title={"text": "Distribution des classes", "font": {"size": 24}},
	xaxis_title={"text": "Catégories", "font": {"size": 18}},
	yaxis_title={"text": "Proportion", "font": {"size": 18}},
	xaxis_tickangle=0, # Rotation modifiée à -45 degrés
	uniformtext_minsize=12,
	uniformtext_mode="hide",
	font=dict(size=14),
	autosize=True,
	bargap=0.2,
	height=600,
	margin=dict(l=20, r=20, t=50, b=20),
	)

	return original_image, true_mask_colored, fig


	# ----Fin Partie EDA

	# ----Partie Explication GradCam


	class SegformerWrapper(nn.Module):
	"""
	Un wrapper pour le modèle SegFormer qui renvoie uniquement les logits en sortie.

	Args:
	model (torch.nn.Module): Le modèle SegFormer pré-entraîné.
	"""

	def __init__(self, model):
	"""
	Initialise le SegformerWrapper.

	Args:
	model (torch.nn.Module): Le modèle SegFormer pré-entraîné.
	"""
	super().__init__()
	self.model = model

	def forward(self, x):
	"""
	Renvoie les logits du modèle au lieu de renvoyer un dictionnaire.

	Args:
	x (torch.Tensor): Les entrées du modèle.

	Returns:
	torch.Tensor: Les logits du modèle.
	"""
	output = self.model(x)
	return output.logits


	class SemanticSegmentationTarget:
	"""
	Représente une classe cible pour la segmentation sémantique utilisée dans GradCAM.

	Args:
	category (int): L'index de la catégorie cible.
	mask (numpy.ndarray): Le masque binaire indiquant les pixels d'intérêt.
	"""

	def __init__(self, category, mask):
	"""
	Initialise la cible de segmentation sémantique.

	Args:
	category (int): L'index de la catégorie cible.
	mask (numpy.ndarray): Le masque binaire indiquant les pixels d'intérêt.
	"""
	self.category = category
	self.mask = torch.from_numpy(mask)
	if torch.cuda.is_available():
	self.mask = self.mask.cuda()

	def __call__(self, model_output):
	if isinstance(
	model_output, (dict, transformers.modeling_outputs.SemanticSegmenterOutput)
	):
	logits = (
	model_output["logits"]
	if isinstance(model_output, dict)
	else model_output.logits
	)
	elif isinstance(model_output, torch.Tensor):
	logits = model_output
	else:
	raise ValueError(f"Unexpected model_output type: {type(model_output)}")

	if logits.dim() == 4: # [batch, classes, height, width]
	return (logits[0, self.category, :, :] * self.mask).sum()
	elif logits.dim() == 3: # [classes, height, width]
	return (logits[self.category, :, :] * self.mask).sum()
	else:
	raise ValueError(f"Unexpected logits shape: {logits.shape}")


	def segformer_reshape_transform_huggingface(tensor, width, height):
	"""
	Réorganise les dimensions du tenseur pour qu'elles correspondent au format attendu par GradCAM.

	Args:
	tensor (torch.Tensor): Le tenseur à réorganiser.
	width (int): La nouvelle largeur.
	height (int): La nouvelle hauteur.

	Returns:
	torch.Tensor: Le tenseur réorganisé.
	"""
	result = tensor.reshape(tensor.size(0), height, width, tensor.size(2))
	result = result.transpose(2, 3).transpose(1, 2)
	return result


	def explain_model(image_name, category_name):
	"""
	Explique les prédictions du modèle SegFormer en utilisant GradCAM pour une image et une catégorie données.

	Args:
	image_name (str): Le nom de l'image à expliquer.
	category_name (str): Le nom de la catégorie cible.

	Returns:
	matplotlib.figure.Figure: Une figure matplotlib contenant la carte de chaleur GradCAM superposée sur l'image originale.
	"""
	original_image, _, _ = load_and_prepare_images(image_name)
	rgb_img = np.float32(original_image) / 255
	img_tensor = transforms.ToTensor()(rgb_img)
	input_tensor = transforms.Normalize(
	mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
	)(img_tensor)
	input_tensor = input_tensor.unsqueeze(0).to(device)
	wrapped_model = SegformerWrapper(model).to(device)
	with torch.no_grad():
	output = wrapped_model(input_tensor)
	upsampled_logits = nn.functional.interpolate(
	output, size=input_tensor.shape[-2:], mode="bilinear", align_corners=False
	)

	normalized_masks = torch.nn.functional.softmax(upsampled_logits, dim=1).cpu()
	category = label2id[category_name]
	mask = normalized_masks[0].argmax(dim=0).numpy()
	mask_float = np.float32(mask == category)
	reshape_transform = partial(
	segformer_reshape_transform_huggingface, # réorganise les dimensions du tenseur pour qu'elles correspondent au format attendu par GradCAM.
	width=img_tensor.shape[2] // 32,
	height=img_tensor.shape[1] // 32,
	)
	target_layers = [wrapped_model.model.segformer.encoder.layer_norm[-1]]
	mask_float_resized = cv2.resize(mask_float, (output.shape[3], output.shape[2]))
	targets = [SemanticSegmentationTarget(category, mask_float_resized)]
	cam = GradCAM(
	model=wrapped_model,
	target_layers=target_layers,
	reshape_transform=reshape_transform,
	)

	grayscale_cam = cam(input_tensor=input_tensor, targets=targets)
	threshold = 0.01 # Seuil de 1% de sureté
	thresholded_cam = grayscale_cam.copy()
	thresholded_cam[grayscale_cam < threshold] = 0
	if np.max(thresholded_cam) > 0:
	thresholded_cam = thresholded_cam / np.max(thresholded_cam)
	else:
	thresholded_cam = grayscale_cam[0]
	resized_cam = cv2.resize(
	thresholded_cam[0], (input_tensor.shape[3], input_tensor.shape[2])
	)
	masked_cam = resized_cam * mask_float
	if np.max(masked_cam) > 0:
	cam_image = show_cam_on_image(rgb_img, masked_cam, use_rgb=True)
	else:
	cam_image = original_image
	fig, ax = plt.subplots(figsize=(15, 10))
	ax.imshow(cam_image)
	ax.axis("off")
	ax.set_title(f"Masque de chaleur GradCam pour {category_name}", color="white")
	margin = 0.02 # Adjust this value to change the size of the margin
	margin_color = "#0a0f1e"
	fig.subplots_adjust(left=margin, right=1 - margin, top=1 - margin, bottom=margin)
	fig.patch.set_facecolor(margin_color)
	plt.close()

	return fig


	# ----Fin Partie Explication GradCam

	# ----Partie Data augmentation
	import random


	def change_image():
	"""
	Sélectionne et charge aléatoirement une image depuis un dossier spécifié.

	Returns:
	PIL.Image.Image: L'image sélectionnée.
	"""
	image_dir = (
	"data_sample/images" # Remplacez par le chemin de votre dossier d'images
	)
	image_list = [f for f in os.listdir(image_dir) if f.endswith(".png")]
	random_image = random.choice(image_list)
	return Image.open(os.path.join(image_dir, random_image))


	def apply_augmentation(image, augmentation_names):
	"""
	Applique une ou plusieurs augmentations à une image.

	Args:
	image (PIL.Image.Image): L'image à augmenter.
	augmentation_names (list of str): Les noms des augmentations à appliquer.

	Returns:
	PIL.Image.Image: L'image augmentée.
	"""
	augmentations = {
	"Horizontal Flip": A.HorizontalFlip(p=1),
	"Shift Scale Rotate": A.ShiftScaleRotate(p=1),
	"Random Brightness Contrast": A.RandomBrightnessContrast(p=1),
	"RGB Shift": A.RGBShift(p=1),
	"Blur": A.Blur(blur_limit=(5, 7), p=1),
	"Gaussian Noise": A.GaussNoise(p=1),
	"Grid Distortion": A.GridDistortion(p=1),
	"Random Sun": A.RandomSunFlare(p=1),
	}

	image_array = np.array(image)

	if augmentation_names is not None:
	selected_augs = [
	augmentations[name] for name in augmentation_names if name in augmentations
	]
	compose = A.Compose(selected_augs)

	# Appliquer la composition d'augmentations
	augmented = compose(image=image_array)
	return Image.fromarray(augmented["image"])
	else:
	return image


	# ---- Fin Partie Data augmentation

	image_list = [
	f for f in os.listdir(os.path.join(data_folder, "images")) if f.endswith(".png")
	]
	category_list = list(id2label.values())
	image_name = "dusseldorf_000012_000019_leftImg8bit.png"
	default_image = os.path.join(data_folder, "images", image_name)

	my_theme = gr.Theme.from_hub("gstaff/whiteboard")

	with gr.Blocks(title="Preuve de concept", theme=my_theme) as demo:
	gr.Markdown("# Projet 10 - Développer une preuve de concept")
	with gr.Tab("Distribution"):
	gr.Markdown("## Distribution des classes Cityscapes")
	gr.Markdown(
	"### Visualisation de la distribution de chaque classe selon l'image choisie."
	)
	eda_image_input = gr.Dropdown(
	choices=image_list,
	label="Sélectionnez une image",
	)

	with gr.Row():
	original_image_output = gr.Image(type="pil", label="Image originale")
	original_mask_output = gr.Image(type="pil", label="Masque original")
	class_distribution_plot = gr.Plot(label="Distribution des classes")
	eda_image_input.change(
	fn=show_eda,
	inputs=eda_image_input,
	outputs=[
	original_image_output,
	original_mask_output,
	class_distribution_plot,
	],
	)

	with gr.Tab("Data Augmentation"):
	gr.Markdown("## Visualisation de l'augmentation des données")
	gr.Markdown(
	"### Sélectionnez une ou plusieurs augmentations pour l'appliquer à l'image."
	)
	gr.Markdown("### Vous pouvez également changer d'image.")

	with gr.Row():
	image_display = gr.Image(
	value=default_image,
	label="Image",
	show_download_button=False,
	interactive=False,
	)
	augmented_image = gr.Image(label="Image Augmentée")

	with gr.Row():
	change_image_button = gr.Button("Changer image")
	augmentation_dropdown = gr.Dropdown(
	choices=[
	"Horizontal Flip",
	"Shift Scale Rotate",
	"Random Brightness Contrast",
	"RGB Shift",
	"Blur",
	"Gaussian Noise",
	"Grid Distortion",
	"Random Sun",
	],
	label="Sélectionnez une augmentation",
	multiselect=True,
	)
	apply_button = gr.Button("Appliquer l'augmentation")

	change_image_button.click(fn=change_image, outputs=image_display)

	apply_button.click(
	fn=apply_augmentation,
	inputs=[image_display, augmentation_dropdown],
	outputs=augmented_image,
	)

	with gr.Tab("Prédictions"):
	gr.Markdown("## Comparaison de segmentations d'images Cityscapes")
	gr.Markdown(
	"### Sélectionnez une image pour voir la comparaison entre le masque réel, la prédiction FPN (pré-enregistré) et la prédiction du modèle SegFormer."
	)

	image_input = gr.Dropdown(choices=image_list, label="Sélectionnez une image")

	gallery_output = gr.Gallery(
	label="Résultats de segmentation",
	show_label=True,
	elem_id="gallery",
	columns=[2],
	rows=[2],
	object_fit="contain",
	height="512px",
	min_width="1024px",
	)

	image_input.change(fn=process_image, inputs=image_input, outputs=gallery_output)

	with gr.Tab("Explication SegFormer"):
	gr.Markdown("## Explication du modèle SegFormer")
	gr.Markdown(
	"### La méthode Grad-CAM est une technique populaire de visualisation qui est utile pour comprendre comment un réseau neuronal convolutif a été conduit à prendre une décision de classification. Elle est spécifique à chaque classe, ce qui signifie qu’elle peut produire une visualisation distincte pour chaque classe présente dans l’image."
	)
	gr.Markdown(
	"### NB: Si l'image s'affiche sans masque, c'est que le modèle ne trouve pas de zones significatives pour une catégorie donnée."
	)

	with gr.Row():
	explain_image_input = gr.Dropdown(
	choices=image_list, label="Sélectionnez une image"
	)
	explain_category_input = gr.Dropdown(
	choices=category_list, label="Sélectionnez une catégorie"
	)

	explain_button = gr.Button("Expliquer")
	explain_output = gr.Plot(label="Explication SegFormer", min_width=200)
	explain_button.click(
	fn=explain_model,
	inputs=[explain_image_input, explain_category_input],
	outputs=explain_output,
	)


	# Lancer l'application
	demo.launch(favicon_path="favicon.ico")