Spaces:
Runtime error
Runtime error
feature: switch to detectron2
Browse files
app.py
CHANGED
@@ -1,22 +1,6 @@
|
|
1 |
"""
|
2 |
-
|
3 |
-
|
4 |
-
- https://huggingface.co/spaces/chansung/segformer-tf-transformers/blob/main/app.py
|
5 |
-
- https://huggingface.co/facebook/detr-resnet-50-panoptic
|
6 |
-
# https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
|
7 |
-
https://colab.research.google.com/github/NielsRogge/Transformers-Tutorials/blob/master/DETR/DETR_panoptic_segmentation_minimal_example_(with_DetrFeatureExtractor).ipynb
|
8 |
-
https://arxiv.org/abs/2005.12872
|
9 |
-
https://arxiv.org/pdf/1801.00868.pdf
|
10 |
-
Additions
|
11 |
-
- add shown labels as strings
|
12 |
-
- show only animal masks (ask an nlp model?)
|
13 |
-
For next time
|
14 |
-
- for diff 'confidence' the high conf masks should change....
|
15 |
-
- colors are not great and should be constant per class? add text?
|
16 |
-
- Im getting core dumped (segmentation fault) when loading hugging face model.. :()
|
17 |
-
https://github.com/huggingface/transformers/issues/16939
|
18 |
-
- cap slider to 95?
|
19 |
-
- switch between panoptic and semantic?
|
20 |
"""
|
21 |
|
22 |
from transformers import DetrFeatureExtractor, DetrForSegmentation
|
@@ -25,53 +9,31 @@ import gradio as gr
|
|
25 |
import numpy as np
|
26 |
import torch
|
27 |
import torchvision
|
|
|
28 |
|
29 |
import itertools
|
30 |
import seaborn as sns
|
31 |
|
32 |
-
|
33 |
-
gr_slider_confidence):
|
34 |
-
image = Image.fromarray(im) # im: numpy array 3d: 480, 640, 3: to PIL Image
|
35 |
-
image = image.resize((200,200)) # PIL image # could I upsample output instead? better?
|
36 |
|
37 |
-
|
38 |
-
encoding = feature_extractor(images=image, return_tensors="pt") #pt=Pytorch, tf=TensorFlow
|
39 |
-
outputs = model(**encoding) # odict with keys: ['logits', 'pred_boxes', 'pred_masks', 'last_hidden_state', 'encoder_last_hidden_state']
|
40 |
-
logits = outputs.logits # torch.Size([1, 100, 251]); class logits? but why 251?
|
41 |
-
bboxes = outputs.pred_boxes
|
42 |
-
masks = outputs.pred_masks # torch.Size([1, 100, 200, 200]); mask logits? for every pixel, score in each of the 100 classes? there is a mask per class
|
43 |
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
# threshold the confidence
|
48 |
-
keep = prob_per_query > gr_slider_confidence/100.0
|
49 |
|
50 |
-
|
51 |
-
label_per_pixel = torch.argmax(masks[keep].squeeze(),dim=0).detach().numpy() # from the masks per class, select the highest per pixel
|
52 |
-
color_mask = np.zeros(image.size+(3,))
|
53 |
-
palette = itertools.cycle(sns.color_palette())
|
54 |
-
for lbl in np.unique(label_per_pixel): #enumerate(palette()):
|
55 |
-
color_mask[label_per_pixel==lbl,:] = np.asarray(next(palette))*255 #color
|
56 |
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
pred_img = np.array(image.convert('RGB'))*0.25 + color_mask*0.75
|
63 |
-
pred_img = pred_img.astype(np.uint8)
|
64 |
-
|
65 |
-
return pred_img
|
66 |
-
|
67 |
-
# get models from hugging face
|
68 |
-
feature_extractor = DetrFeatureExtractor.from_pretrained('facebook/detr-resnet-50-panoptic')
|
69 |
-
model = DetrForSegmentation.from_pretrained('facebook/detr-resnet-50-panoptic')
|
70 |
|
71 |
# gradio components -inputs
|
72 |
gr_image_input = gr.inputs.Image()
|
73 |
-
gr_slider_confidence = gr.inputs.Slider(0,
|
74 |
-
label='Set confidence threshold for masks')
|
75 |
# gradio outputs
|
76 |
gr_image_output = gr.outputs.Image()
|
77 |
|
@@ -79,6 +41,6 @@ gr_image_output = gr.outputs.Image()
|
|
79 |
gr.Interface(predict_building_mask,
|
80 |
inputs = [gr_image_input,gr_slider_confidence],
|
81 |
outputs = gr_image_output,
|
82 |
-
title = '
|
83 |
-
description = "
|
84 |
|
|
|
1 |
"""
|
2 |
+
building-segmentation
|
3 |
+
Proof of concept showing effectiveness of a fine tuned instance segmentation model for deteting buildings.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
"""
|
5 |
|
6 |
from transformers import DetrFeatureExtractor, DetrForSegmentation
|
|
|
9 |
import numpy as np
|
10 |
import torch
|
11 |
import torchvision
|
12 |
+
import detectron2
|
13 |
|
14 |
import itertools
|
15 |
import seaborn as sns
|
16 |
|
17 |
+
cfg = get_cfg()
|
|
|
|
|
|
|
18 |
|
19 |
+
def segment_buildings(input_image, confidence):
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
+
cfg.MODEL.WEIGHTS = "model_weights/chatswood_buildings_poc.pth"
|
22 |
+
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7 # set a custom testing threshold
|
23 |
+
predictor = DefaultPredictor(cfg)
|
|
|
|
|
24 |
|
25 |
+
outputs = predictor(im)
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
+
v = Visualizer(im[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
|
28 |
+
output = v.draw_instance_predictions(outputs["instances"].to("cpu"))
|
29 |
+
output_image = output.get_image()[:, :, ::-1])
|
30 |
+
|
31 |
+
return(output_image)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
# gradio components -inputs
|
34 |
gr_image_input = gr.inputs.Image()
|
35 |
+
gr_slider_confidence = gr.inputs.Slider(0,1,.1,.7,
|
36 |
+
label='Set confidence threshold % for masks')
|
37 |
# gradio outputs
|
38 |
gr_image_output = gr.outputs.Image()
|
39 |
|
|
|
41 |
gr.Interface(predict_building_mask,
|
42 |
inputs = [gr_image_input,gr_slider_confidence],
|
43 |
outputs = gr_image_output,
|
44 |
+
title = 'Building Segmentation',
|
45 |
+
description = "An instance segmentation webapp using DETR (End-to-End Object Detection) model with MaskRCNN-101 backbone").launch()
|
46 |
|