Spaces:

martintomov
/

InsectSAM

Running on Zero

InsectSAM / app.py

Martin Tomov

import app.py

ea49ded verified 6 months ago

9.05 kB

	import random
	from dataclasses import dataclass
	from typing import Any, List, Dict, Optional, Union, Tuple

	import cv2
	import torch
	import requests
	import numpy as np
	from PIL import Image
	import matplotlib.pyplot as plt
	from transformers import AutoModelForMaskGeneration, AutoProcessor, pipeline
	import gradio as gr

	@dataclass
	class BoundingBox:
	xmin: int
	ymin: int
	xmax: int
	ymax: int

	@property
	def xyxy(self) -> List[float]:
	return [self.xmin, self.ymin, self.xmax, self.ymax]

	@dataclass
	class DetectionResult:
	score: float
	label: str
	box: BoundingBox
	mask: Optional[np.array] = None

	@classmethod
	def from_dict(cls, detection_dict: Dict) -> 'DetectionResult':
	return cls(score=detection_dict['score'],
	label=detection_dict['label'],
	box=BoundingBox(xmin=detection_dict['box']['xmin'],
	ymin=detection_dict['box']['ymin'],
	xmax=detection_dict['box']['xmax'],
	ymax=detection_dict['box']['ymax']))

	def annotate(image: Union[Image.Image, np.ndarray], detection_results: List[DetectionResult]) -> np.ndarray:
	image_cv2 = np.array(image) if isinstance(image, Image.Image) else image
	image_cv2 = cv2.cvtColor(image_cv2, cv2.COLOR_RGB2BGR)

	for detection in detection_results:
	label = detection.label
	score = detection.score
	box = detection.box
	mask = detection.mask

	color = np.random.randint(0, 256, size=3)

	cv2.rectangle(image_cv2, (box.xmin, box.ymin), (box.xmax, box.ymax), color.tolist(), 2)
	cv2.putText(image_cv2, f'{label}: {score:.2f}', (box.xmin, box.ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color.tolist(), 2)

	if mask is not None:
	mask_uint8 = (mask * 255).astype(np.uint8)
	contours, _ = cv2.findContours(mask_uint8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	cv2.drawContours(image_cv2, contours, -1, color.tolist(), 2)

	return cv2.cvtColor(image_cv2, cv2.COLOR_BGR2RGB)

	def plot_detections(image: Union[Image.Image, np.ndarray], detections: List[DetectionResult]) -> np.ndarray:
	annotated_image = annotate(image, detections)
	return annotated_image

	def load_image(image: Union[str, Image.Image]) -> Image.Image:
	if isinstance(image, str) and image.startswith("http"):
	image = Image.open(requests.get(image, stream=True).raw).convert("RGB")
	elif isinstance(image, str):
	image = Image.open(image).convert("RGB")
	else:
	image = image.convert("RGB")
	return image

	def get_boxes(detection_results: List[DetectionResult]) -> List[List[List[float]]]:
	boxes = []
	for result in detection_results:
	xyxy = result.box.xyxy
	boxes.append(xyxy)
	return [boxes]

	def refine_masks(masks: torch.BoolTensor, polygon_refinement: bool = False) -> List[np.ndarray]:
	masks = masks.cpu().float().permute(0, 2, 3, 1).mean(axis=-1).numpy().astype(np.uint8)
	masks = (masks > 0).astype(np.uint8)
	if polygon_refinement:
	for idx, mask in enumerate(masks):
	shape = mask.shape
	polygon = mask_to_polygon(mask)
	masks[idx] = polygon_to_mask(polygon, shape)
	return list(masks)

	def detect(image: Image.Image, labels: List[str], threshold: float = 0.3, detector_id: Optional[str] = None) -> List[Dict[str, Any]]:
	device = "cuda" if torch.cuda.is_available() else "cpu"
	detector_id = detector_id if detector_id else "IDEA-Research/grounding-dino-base"
	object_detector = pipeline(model=detector_id, task="zero-shot-object-detection", device=device)
	labels = [label if label.endswith(".") else label+"." for label in labels]
	results = object_detector(image, candidate_labels=labels, threshold=threshold)
	return [DetectionResult.from_dict(result) for result in results]

	def segment(image: Image.Image, detection_results: List[DetectionResult], polygon_refinement: bool = False, segmenter_id: Optional[str] = None) -> List[DetectionResult]:
	device = "cuda" if torch.cuda.is_available() else "cpu"
	segmenter_id = segmenter_id if segmenter_id else "martintmv/InsectSAM"
	segmentator = AutoModelForMaskGeneration.from_pretrained(segmenter_id).to(device)
	processor = AutoProcessor.from_pretrained(segmenter_id)

	boxes = get_boxes(detection_results)
	inputs = processor(images=image, input_boxes=boxes, return_tensors="pt").to(device)
	outputs = segmentator(**inputs)
	masks = processor.post_process_masks(masks=outputs.pred_masks, original_sizes=inputs.original_sizes, reshaped_input_sizes=inputs.reshaped_input_sizes)[0]
	masks = refine_masks(masks, polygon_refinement)

	for detection_result, mask in zip(detection_results, masks):
	detection_result.mask = mask

	return detection_results

	def grounded_segmentation(image: Union[Image.Image, str], labels: List[str], threshold: float = 0.3, polygon_refinement: bool = False, detector_id: Optional[str] = None, segmenter_id: Optional[str] = None) -> Tuple[np.ndarray, List[DetectionResult]]:
	image = load_image(image)
	detections = detect(image, labels, threshold, detector_id)
	detections = segment(image, detections, polygon_refinement, segmenter_id)
	return np.array(image), detections

	def extract_insect_masks(image: np.ndarray, detections: List[DetectionResult]) -> List[np.ndarray]:
	return [detection.mask for detection in detections if detection.mask is not None]

	def put_masks_on_yellow_background(image_shape: Tuple[int, int], masks: List[np.ndarray]) -> np.ndarray:
	yellow_background = np.full((image_shape[0], image_shape[1], 3), (0, 255, 255), dtype=np.uint8)
	for mask in masks:
	mask_rgb = cv2.cvtColor(mask, cv2.COLOR_GRAY2RGB)
	for c in range(3):
	yellow_background[:,:,c] = cv2.bitwise_or(yellow_background[:,:,c], mask_rgb[:,:,c])
	return yellow_background

	def mask_to_min_max(mask: np.ndarray) -> Tuple[int, int, int, int]:
	y, x = np.where(mask)
	return x.min(), y.min(), x.max(), y.max()

	def extract_and_paste_insect(original_image: np.ndarray, detection: DetectionResult, background: np.ndarray) -> None:
	mask = detection.mask
	xmin, ymin, xmax, ymax = mask_to_min_max(mask)
	insect_crop = original_image[ymin:ymax, xmin:xmax]
	mask_crop = mask[ymin:ymax, xmin:xmax]
	insect = cv2.bitwise_and(insect_crop, insect_crop, mask=mask_crop)
	x_offset, y_offset = detection.box.xmin, detection.box.ymin
	x_end, y_end = x_offset + insect.shape[1], y_offset + insect.shape[0]
	inverse_mask = cv2.bitwise_not(mask_crop)
	bg_region = background[y_offset:y_end, x_offset:x_end]
	bg_ready = cv2.bitwise_and(bg_region, bg_region, mask=inverse_mask)
	combined = cv2.add(insect, bg_ready)
	background[y_offset:y_end, x_offset:x_end] = combined

	def create_yellow_background_with_insects(image: np.ndarray, detections: List[DetectionResult]) -> np.ndarray:
	yellow_background = np.full_like(image, (0, 255, 255), dtype=np.uint8)
	for detection in detections:
	if detection.mask is not None:
	extract_and_paste_insect(image, detection, yellow_background)
	return yellow_background

	def draw_classification_boxes(image_with_insects: np.ndarray, detections: List[DetectionResult]) -> np.ndarray:
	for detection in detections:
	label = detection.label
	score = detection.score
	box = detection.box
	color = np.random.randint(0, 256, size=3).tolist()
	cv2.rectangle(image_with_insects, (box.xmin, box.ymin), (box.xmax, box.ymax), color, 2)
	(text_width, text_height), baseline = cv2.getTextSize(f"{label}: {score:.2f}", cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)
	cv2.rectangle(image_with_insects, (box.xmin, box.ymin - text_height - baseline), (box.xmin + text_width, box.ymin), color, thickness=cv2.FILLED)
	cv2.putText(image_with_insects, f"{label}: {score:.2f}", (box.xmin, box.ymin - baseline), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
	return image_with_insects

	def process_image(image):
	labels = ["ant", "beetle", "butterfly", "caterpillar", "dragonfly"]
	original_image, detections = grounded_segmentation(image, labels, threshold=0.3, polygon_refinement=True)
	masked_image = plot_detections(original_image, detections)
	insect_masks = extract_insect_masks(original_image, detections)
	yellow_background_with_masks = put_masks_on_yellow_background(original_image.shape[:2], insect_masks)
	yellow_background_with_insects = create_yellow_background_with_insects(original_image, detections)
	yellow_background_with_boxes = draw_classification_boxes(yellow_background_with_insects, detections)

	return masked_image, yellow_background_with_masks, yellow_background_with_boxes

	gr.Interface(
	fn=process_image,
	inputs=gr.Image(type="pil"),
	outputs=[gr.Image(type="numpy"), gr.Image(type="numpy"), gr.Image(type="numpy")],
	title="Insect Detection and Masking"
	).launch()