martintmv commited on
Commit
ea49ded
β€’
1 Parent(s): 2644f9b

import app.py

Browse files
Files changed (1) hide show
  1. app.py +188 -0
app.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ from dataclasses import dataclass
3
+ from typing import Any, List, Dict, Optional, Union, Tuple
4
+
5
+ import cv2
6
+ import torch
7
+ import requests
8
+ import numpy as np
9
+ from PIL import Image
10
+ import matplotlib.pyplot as plt
11
+ from transformers import AutoModelForMaskGeneration, AutoProcessor, pipeline
12
+ import gradio as gr
13
+
14
+ @dataclass
15
+ class BoundingBox:
16
+ xmin: int
17
+ ymin: int
18
+ xmax: int
19
+ ymax: int
20
+
21
+ @property
22
+ def xyxy(self) -> List[float]:
23
+ return [self.xmin, self.ymin, self.xmax, self.ymax]
24
+
25
+ @dataclass
26
+ class DetectionResult:
27
+ score: float
28
+ label: str
29
+ box: BoundingBox
30
+ mask: Optional[np.array] = None
31
+
32
+ @classmethod
33
+ def from_dict(cls, detection_dict: Dict) -> 'DetectionResult':
34
+ return cls(score=detection_dict['score'],
35
+ label=detection_dict['label'],
36
+ box=BoundingBox(xmin=detection_dict['box']['xmin'],
37
+ ymin=detection_dict['box']['ymin'],
38
+ xmax=detection_dict['box']['xmax'],
39
+ ymax=detection_dict['box']['ymax']))
40
+
41
+ def annotate(image: Union[Image.Image, np.ndarray], detection_results: List[DetectionResult]) -> np.ndarray:
42
+ image_cv2 = np.array(image) if isinstance(image, Image.Image) else image
43
+ image_cv2 = cv2.cvtColor(image_cv2, cv2.COLOR_RGB2BGR)
44
+
45
+ for detection in detection_results:
46
+ label = detection.label
47
+ score = detection.score
48
+ box = detection.box
49
+ mask = detection.mask
50
+
51
+ color = np.random.randint(0, 256, size=3)
52
+
53
+ cv2.rectangle(image_cv2, (box.xmin, box.ymin), (box.xmax, box.ymax), color.tolist(), 2)
54
+ cv2.putText(image_cv2, f'{label}: {score:.2f}', (box.xmin, box.ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color.tolist(), 2)
55
+
56
+ if mask is not None:
57
+ mask_uint8 = (mask * 255).astype(np.uint8)
58
+ contours, _ = cv2.findContours(mask_uint8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
59
+ cv2.drawContours(image_cv2, contours, -1, color.tolist(), 2)
60
+
61
+ return cv2.cvtColor(image_cv2, cv2.COLOR_BGR2RGB)
62
+
63
+ def plot_detections(image: Union[Image.Image, np.ndarray], detections: List[DetectionResult]) -> np.ndarray:
64
+ annotated_image = annotate(image, detections)
65
+ return annotated_image
66
+
67
+ def load_image(image: Union[str, Image.Image]) -> Image.Image:
68
+ if isinstance(image, str) and image.startswith("http"):
69
+ image = Image.open(requests.get(image, stream=True).raw).convert("RGB")
70
+ elif isinstance(image, str):
71
+ image = Image.open(image).convert("RGB")
72
+ else:
73
+ image = image.convert("RGB")
74
+ return image
75
+
76
+ def get_boxes(detection_results: List[DetectionResult]) -> List[List[List[float]]]:
77
+ boxes = []
78
+ for result in detection_results:
79
+ xyxy = result.box.xyxy
80
+ boxes.append(xyxy)
81
+ return [boxes]
82
+
83
+ def refine_masks(masks: torch.BoolTensor, polygon_refinement: bool = False) -> List[np.ndarray]:
84
+ masks = masks.cpu().float().permute(0, 2, 3, 1).mean(axis=-1).numpy().astype(np.uint8)
85
+ masks = (masks > 0).astype(np.uint8)
86
+ if polygon_refinement:
87
+ for idx, mask in enumerate(masks):
88
+ shape = mask.shape
89
+ polygon = mask_to_polygon(mask)
90
+ masks[idx] = polygon_to_mask(polygon, shape)
91
+ return list(masks)
92
+
93
+ def detect(image: Image.Image, labels: List[str], threshold: float = 0.3, detector_id: Optional[str] = None) -> List[Dict[str, Any]]:
94
+ device = "cuda" if torch.cuda.is_available() else "cpu"
95
+ detector_id = detector_id if detector_id else "IDEA-Research/grounding-dino-base"
96
+ object_detector = pipeline(model=detector_id, task="zero-shot-object-detection", device=device)
97
+ labels = [label if label.endswith(".") else label+"." for label in labels]
98
+ results = object_detector(image, candidate_labels=labels, threshold=threshold)
99
+ return [DetectionResult.from_dict(result) for result in results]
100
+
101
+ def segment(image: Image.Image, detection_results: List[DetectionResult], polygon_refinement: bool = False, segmenter_id: Optional[str] = None) -> List[DetectionResult]:
102
+ device = "cuda" if torch.cuda.is_available() else "cpu"
103
+ segmenter_id = segmenter_id if segmenter_id else "martintmv/InsectSAM"
104
+ segmentator = AutoModelForMaskGeneration.from_pretrained(segmenter_id).to(device)
105
+ processor = AutoProcessor.from_pretrained(segmenter_id)
106
+
107
+ boxes = get_boxes(detection_results)
108
+ inputs = processor(images=image, input_boxes=boxes, return_tensors="pt").to(device)
109
+ outputs = segmentator(**inputs)
110
+ masks = processor.post_process_masks(masks=outputs.pred_masks, original_sizes=inputs.original_sizes, reshaped_input_sizes=inputs.reshaped_input_sizes)[0]
111
+ masks = refine_masks(masks, polygon_refinement)
112
+
113
+ for detection_result, mask in zip(detection_results, masks):
114
+ detection_result.mask = mask
115
+
116
+ return detection_results
117
+
118
+ def grounded_segmentation(image: Union[Image.Image, str], labels: List[str], threshold: float = 0.3, polygon_refinement: bool = False, detector_id: Optional[str] = None, segmenter_id: Optional[str] = None) -> Tuple[np.ndarray, List[DetectionResult]]:
119
+ image = load_image(image)
120
+ detections = detect(image, labels, threshold, detector_id)
121
+ detections = segment(image, detections, polygon_refinement, segmenter_id)
122
+ return np.array(image), detections
123
+
124
+ def extract_insect_masks(image: np.ndarray, detections: List[DetectionResult]) -> List[np.ndarray]:
125
+ return [detection.mask for detection in detections if detection.mask is not None]
126
+
127
+ def put_masks_on_yellow_background(image_shape: Tuple[int, int], masks: List[np.ndarray]) -> np.ndarray:
128
+ yellow_background = np.full((image_shape[0], image_shape[1], 3), (0, 255, 255), dtype=np.uint8)
129
+ for mask in masks:
130
+ mask_rgb = cv2.cvtColor(mask, cv2.COLOR_GRAY2RGB)
131
+ for c in range(3):
132
+ yellow_background[:,:,c] = cv2.bitwise_or(yellow_background[:,:,c], mask_rgb[:,:,c])
133
+ return yellow_background
134
+
135
+ def mask_to_min_max(mask: np.ndarray) -> Tuple[int, int, int, int]:
136
+ y, x = np.where(mask)
137
+ return x.min(), y.min(), x.max(), y.max()
138
+
139
+ def extract_and_paste_insect(original_image: np.ndarray, detection: DetectionResult, background: np.ndarray) -> None:
140
+ mask = detection.mask
141
+ xmin, ymin, xmax, ymax = mask_to_min_max(mask)
142
+ insect_crop = original_image[ymin:ymax, xmin:xmax]
143
+ mask_crop = mask[ymin:ymax, xmin:xmax]
144
+ insect = cv2.bitwise_and(insect_crop, insect_crop, mask=mask_crop)
145
+ x_offset, y_offset = detection.box.xmin, detection.box.ymin
146
+ x_end, y_end = x_offset + insect.shape[1], y_offset + insect.shape[0]
147
+ inverse_mask = cv2.bitwise_not(mask_crop)
148
+ bg_region = background[y_offset:y_end, x_offset:x_end]
149
+ bg_ready = cv2.bitwise_and(bg_region, bg_region, mask=inverse_mask)
150
+ combined = cv2.add(insect, bg_ready)
151
+ background[y_offset:y_end, x_offset:x_end] = combined
152
+
153
+ def create_yellow_background_with_insects(image: np.ndarray, detections: List[DetectionResult]) -> np.ndarray:
154
+ yellow_background = np.full_like(image, (0, 255, 255), dtype=np.uint8)
155
+ for detection in detections:
156
+ if detection.mask is not None:
157
+ extract_and_paste_insect(image, detection, yellow_background)
158
+ return yellow_background
159
+
160
+ def draw_classification_boxes(image_with_insects: np.ndarray, detections: List[DetectionResult]) -> np.ndarray:
161
+ for detection in detections:
162
+ label = detection.label
163
+ score = detection.score
164
+ box = detection.box
165
+ color = np.random.randint(0, 256, size=3).tolist()
166
+ cv2.rectangle(image_with_insects, (box.xmin, box.ymin), (box.xmax, box.ymax), color, 2)
167
+ (text_width, text_height), baseline = cv2.getTextSize(f"{label}: {score:.2f}", cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)
168
+ cv2.rectangle(image_with_insects, (box.xmin, box.ymin - text_height - baseline), (box.xmin + text_width, box.ymin), color, thickness=cv2.FILLED)
169
+ cv2.putText(image_with_insects, f"{label}: {score:.2f}", (box.xmin, box.ymin - baseline), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
170
+ return image_with_insects
171
+
172
+ def process_image(image):
173
+ labels = ["ant", "beetle", "butterfly", "caterpillar", "dragonfly"]
174
+ original_image, detections = grounded_segmentation(image, labels, threshold=0.3, polygon_refinement=True)
175
+ masked_image = plot_detections(original_image, detections)
176
+ insect_masks = extract_insect_masks(original_image, detections)
177
+ yellow_background_with_masks = put_masks_on_yellow_background(original_image.shape[:2], insect_masks)
178
+ yellow_background_with_insects = create_yellow_background_with_insects(original_image, detections)
179
+ yellow_background_with_boxes = draw_classification_boxes(yellow_background_with_insects, detections)
180
+
181
+ return masked_image, yellow_background_with_masks, yellow_background_with_boxes
182
+
183
+ gr.Interface(
184
+ fn=process_image,
185
+ inputs=gr.Image(type="pil"),
186
+ outputs=[gr.Image(type="numpy"), gr.Image(type="numpy"), gr.Image(type="numpy")],
187
+ title="Insect Detection and Masking"
188
+ ).launch()