hlydecker commited on
Commit
a4c8bb5
1 Parent(s): d093a76

feature: switch to detectron2

Browse files
Files changed (1) hide show
  1. app.py +18 -56
app.py CHANGED
@@ -1,22 +1,6 @@
1
  """
2
- Using as reference:
3
- - https://huggingface.co/nvidia/segformer-b0-finetuned-ade-512-512
4
- - https://huggingface.co/spaces/chansung/segformer-tf-transformers/blob/main/app.py
5
- - https://huggingface.co/facebook/detr-resnet-50-panoptic
6
- # https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
7
- https://colab.research.google.com/github/NielsRogge/Transformers-Tutorials/blob/master/DETR/DETR_panoptic_segmentation_minimal_example_(with_DetrFeatureExtractor).ipynb
8
- https://arxiv.org/abs/2005.12872
9
- https://arxiv.org/pdf/1801.00868.pdf
10
- Additions
11
- - add shown labels as strings
12
- - show only animal masks (ask an nlp model?)
13
- For next time
14
- - for diff 'confidence' the high conf masks should change....
15
- - colors are not great and should be constant per class? add text?
16
- - Im getting core dumped (segmentation fault) when loading hugging face model.. :()
17
- https://github.com/huggingface/transformers/issues/16939
18
- - cap slider to 95?
19
- - switch between panoptic and semantic?
20
  """
21
 
22
  from transformers import DetrFeatureExtractor, DetrForSegmentation
@@ -25,53 +9,31 @@ import gradio as gr
25
  import numpy as np
26
  import torch
27
  import torchvision
 
28
 
29
  import itertools
30
  import seaborn as sns
31
 
32
- def predict_building_mask(im,
33
- gr_slider_confidence):
34
- image = Image.fromarray(im) # im: numpy array 3d: 480, 640, 3: to PIL Image
35
- image = image.resize((200,200)) # PIL image # could I upsample output instead? better?
36
 
37
- # encoding is a dict with pixel_values and pixel_mask
38
- encoding = feature_extractor(images=image, return_tensors="pt") #pt=Pytorch, tf=TensorFlow
39
- outputs = model(**encoding) # odict with keys: ['logits', 'pred_boxes', 'pred_masks', 'last_hidden_state', 'encoder_last_hidden_state']
40
- logits = outputs.logits # torch.Size([1, 100, 251]); class logits? but why 251?
41
- bboxes = outputs.pred_boxes
42
- masks = outputs.pred_masks # torch.Size([1, 100, 200, 200]); mask logits? for every pixel, score in each of the 100 classes? there is a mask per class
43
 
44
- # keep only the masks with high confidence?--------------------------------
45
- # compute the prob per mask (i.e., class), excluding the "no-object" class (the last one)
46
- prob_per_query = outputs.logits.softmax(-1)[..., :-1].max(-1)[0] # why logits last dim 251?
47
- # threshold the confidence
48
- keep = prob_per_query > gr_slider_confidence/100.0
49
 
50
- # postprocess the mask (numpy arrays)
51
- label_per_pixel = torch.argmax(masks[keep].squeeze(),dim=0).detach().numpy() # from the masks per class, select the highest per pixel
52
- color_mask = np.zeros(image.size+(3,))
53
- palette = itertools.cycle(sns.color_palette())
54
- for lbl in np.unique(label_per_pixel): #enumerate(palette()):
55
- color_mask[label_per_pixel==lbl,:] = np.asarray(next(palette))*255 #color
56
 
57
- # color_mask = np.zeros(image.size+(3,))
58
- # for lbl, color in enumerate(ade_palette()):
59
- # color_mask[label_per_pixel==lbl,:] = color
60
-
61
- # Show image + mask
62
- pred_img = np.array(image.convert('RGB'))*0.25 + color_mask*0.75
63
- pred_img = pred_img.astype(np.uint8)
64
-
65
- return pred_img
66
-
67
- # get models from hugging face
68
- feature_extractor = DetrFeatureExtractor.from_pretrained('facebook/detr-resnet-50-panoptic')
69
- model = DetrForSegmentation.from_pretrained('facebook/detr-resnet-50-panoptic')
70
 
71
  # gradio components -inputs
72
  gr_image_input = gr.inputs.Image()
73
- gr_slider_confidence = gr.inputs.Slider(0,100,5,85,
74
- label='Set confidence threshold for masks')
75
  # gradio outputs
76
  gr_image_output = gr.outputs.Image()
77
 
@@ -79,6 +41,6 @@ gr_image_output = gr.outputs.Image()
79
  gr.Interface(predict_building_mask,
80
  inputs = [gr_image_input,gr_slider_confidence],
81
  outputs = gr_image_output,
82
- title = 'Image segmentation with varying confidence',
83
- description = "A panoptic (semantic+instance) segmentation webapp using DETR (End-to-End Object Detection) model with ResNet-50 backbone").launch()
84
 
 
1
  """
2
+ building-segmentation
3
+ Proof of concept showing effectiveness of a fine tuned instance segmentation model for deteting buildings.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  """
5
 
6
  from transformers import DetrFeatureExtractor, DetrForSegmentation
 
9
  import numpy as np
10
  import torch
11
  import torchvision
12
+ import detectron2
13
 
14
  import itertools
15
  import seaborn as sns
16
 
17
+ cfg = get_cfg()
 
 
 
18
 
19
+ def segment_buildings(input_image, confidence):
 
 
 
 
 
20
 
21
+ cfg.MODEL.WEIGHTS = "model_weights/chatswood_buildings_poc.pth"
22
+ cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7 # set a custom testing threshold
23
+ predictor = DefaultPredictor(cfg)
 
 
24
 
25
+ outputs = predictor(im)
 
 
 
 
 
26
 
27
+ v = Visualizer(im[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
28
+ output = v.draw_instance_predictions(outputs["instances"].to("cpu"))
29
+ output_image = output.get_image()[:, :, ::-1])
30
+
31
+ return(output_image)
 
 
 
 
 
 
 
 
32
 
33
  # gradio components -inputs
34
  gr_image_input = gr.inputs.Image()
35
+ gr_slider_confidence = gr.inputs.Slider(0,1,.1,.7,
36
+ label='Set confidence threshold % for masks')
37
  # gradio outputs
38
  gr_image_output = gr.outputs.Image()
39
 
 
41
  gr.Interface(predict_building_mask,
42
  inputs = [gr_image_input,gr_slider_confidence],
43
  outputs = gr_image_output,
44
+ title = 'Building Segmentation',
45
+ description = "An instance segmentation webapp using DETR (End-to-End Object Detection) model with MaskRCNN-101 backbone").launch()
46