Spaces:
Runtime error
Runtime error
sfmig
commited on
Commit
•
562224f
1
Parent(s):
d14d564
added confidence of segmentation from notebook detr
Browse files
app.py
CHANGED
@@ -3,6 +3,13 @@ Using as reference:
|
|
3 |
- https://huggingface.co/nvidia/segformer-b0-finetuned-ade-512-512
|
4 |
- https://huggingface.co/spaces/chansung/segformer-tf-transformers/blob/main/app.py
|
5 |
- https://huggingface.co/facebook/detr-resnet-50-panoptic
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
"""
|
7 |
|
8 |
from transformers import DetrFeatureExtractor, DetrForSegmentation
|
@@ -168,26 +175,31 @@ def ade_palette():
|
|
168 |
[102, 255, 0],
|
169 |
[92, 0, 255],
|
170 |
]
|
|
|
171 |
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
# gradio components
|
176 |
-
input = gr.inputs.Image()
|
177 |
-
output = gr.outputs.Image()
|
178 |
-
|
179 |
-
def predict_animal_mask(im):
|
180 |
image = Image.fromarray(im) # im: numpy array 3d: 480, 640, 3: to PIL Image
|
181 |
image = image.resize((200,200)) # PIL image # could I upsample output instead? better?
|
182 |
|
183 |
-
|
184 |
-
|
185 |
-
|
|
|
186 |
bboxes = outputs.pred_boxes
|
187 |
-
masks = outputs.pred_masks # torch.Size([1, 100, 200, 200])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
|
189 |
# postprocess the mask (numpy arrays)
|
190 |
-
label_per_pixel = torch.argmax(masks.squeeze(),dim=0).detach().numpy()
|
191 |
color_mask = np.zeros(image.size+(3,))
|
192 |
for lbl, color in enumerate(ade_palette()):
|
193 |
color_mask[label_per_pixel==lbl,:] = color
|
@@ -198,12 +210,23 @@ def predict_animal_mask(im):
|
|
198 |
|
199 |
return pred_img
|
200 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
|
202 |
####################################################
|
203 |
# Create user interface and launch
|
204 |
gr.Interface(predict_animal_mask,
|
205 |
-
inputs =
|
206 |
-
outputs =
|
207 |
title = 'Animals* segmentation in images',
|
208 |
description = "An animal* segmentation image webapp using DETR (End-to-End Object Detection) model with ResNet-50 backbone").launch()
|
209 |
|
|
|
3 |
- https://huggingface.co/nvidia/segformer-b0-finetuned-ade-512-512
|
4 |
- https://huggingface.co/spaces/chansung/segformer-tf-transformers/blob/main/app.py
|
5 |
- https://huggingface.co/facebook/detr-resnet-50-panoptic
|
6 |
+
# https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
|
7 |
+
|
8 |
+
https://colab.research.google.com/github/NielsRogge/Transformers-Tutorials/blob/master/DETR/DETR_panoptic_segmentation_minimal_example_(with_DetrFeatureExtractor).ipynb
|
9 |
+
|
10 |
+
Additions
|
11 |
+
- add shown labels as strings
|
12 |
+
- show only animal masks (ask an nlp model?)
|
13 |
"""
|
14 |
|
15 |
from transformers import DetrFeatureExtractor, DetrForSegmentation
|
|
|
175 |
[102, 255, 0],
|
176 |
[92, 0, 255],
|
177 |
]
|
178 |
+
|
179 |
|
180 |
+
def predict_animal_mask(im,
|
181 |
+
flag_high_confidence):
|
|
|
|
|
|
|
|
|
|
|
|
|
182 |
image = Image.fromarray(im) # im: numpy array 3d: 480, 640, 3: to PIL Image
|
183 |
image = image.resize((200,200)) # PIL image # could I upsample output instead? better?
|
184 |
|
185 |
+
# encoding is a dict with pixel_values and pixel_mask
|
186 |
+
encoding = feature_extractor(images=image, return_tensors="pt") #pt=Pytorch, tf=TensorFlow
|
187 |
+
outputs = model(**encoding) # odict with keys: ['logits', 'pred_boxes', 'pred_masks', 'last_hidden_state', 'encoder_last_hidden_state']
|
188 |
+
logits = outputs.logits # torch.Size([1, 100, 251]); why 251?
|
189 |
bboxes = outputs.pred_boxes
|
190 |
+
masks = outputs.pred_masks # torch.Size([1, 100, 200, 200]); for every pixel, score in each of the 100 classes? there is a mask per class
|
191 |
+
|
192 |
+
# keep only the masks with high confidence?--------------------------------
|
193 |
+
# compute the prob per mask (i.e., class), excluding the "no-object" class (the last one)
|
194 |
+
if flag_high_confidence:
|
195 |
+
prob_per_query = outputs.logits.softmax(-1)[..., :-1].max(-1)[0] # why logits last dim 251?
|
196 |
+
# threshold the confidence
|
197 |
+
keep = prob_per_query > 0.85
|
198 |
+
else:
|
199 |
+
keep = torch.ones(outputs.logits.shape[0:2], dtype=torch.bool)
|
200 |
|
201 |
# postprocess the mask (numpy arrays)
|
202 |
+
label_per_pixel = torch.argmax(masks[keep].squeeze(),dim=0).detach().numpy() # from the masks per class, select the highest per pixel
|
203 |
color_mask = np.zeros(image.size+(3,))
|
204 |
for lbl, color in enumerate(ade_palette()):
|
205 |
color_mask[label_per_pixel==lbl,:] = color
|
|
|
210 |
|
211 |
return pred_img
|
212 |
|
213 |
+
#######################################
|
214 |
+
# get models from hugging face
|
215 |
+
feature_extractor = DetrFeatureExtractor.from_pretrained('facebook/detr-resnet-50-panoptic')
|
216 |
+
model = DetrForSegmentation.from_pretrained('facebook/detr-resnet-50-panoptic')
|
217 |
+
|
218 |
+
# gradio components -inputs
|
219 |
+
gr_image_input = gr.inputs.Image()
|
220 |
+
gr_checkbox_high_confidence = gr.inputs.Checkbox(False,
|
221 |
+
label='disply high confidence only?')
|
222 |
+
# gradio outputs
|
223 |
+
gr_image_output = gr.outputs.Image()
|
224 |
|
225 |
####################################################
|
226 |
# Create user interface and launch
|
227 |
gr.Interface(predict_animal_mask,
|
228 |
+
inputs = [gr_image_input,gr_checkbox_high_confidence],
|
229 |
+
outputs = gr_image_output,
|
230 |
title = 'Animals* segmentation in images',
|
231 |
description = "An animal* segmentation image webapp using DETR (End-to-End Object Detection) model with ResNet-50 backbone").launch()
|
232 |
|