mattb512 commited on
Commit
1b7e337
1 Parent(s): b2f5c8e

switch to nvidia seg model

Browse files
Files changed (3) hide show
  1. .gitignore +7 -1
  2. app.py +88 -35
  3. app.pybak +54 -0
.gitignore CHANGED
@@ -1 +1,7 @@
1
- .venv/**
 
 
 
 
 
 
 
1
+ .venv/**
2
+ datasets/**
3
+ .DS_Store
4
+ dataset.py
5
+ out.jpeg
6
+ out2.jpeg
7
+ __pycache__
app.py CHANGED
@@ -1,42 +1,95 @@
1
- import gradio as gr
2
- import fastai.vision.all as fv
3
  from PIL import Image, ImageDraw
4
- import skimage
 
 
5
  import os
6
 
7
- learn = fv.load_learner("model.pkl")
8
-
9
- def call(image, step_size:int=100, blocks:int=4):
10
- # print(image)
11
- original_image = Image.fromarray(image).resize((400,400))
12
-
13
- image = Image.new(mode='RGB', size=(step_size*blocks, step_size*blocks)) #, color=255
14
-
15
- draw = ImageDraw.Draw(image)
16
- for (x,y) in [ (x,y) for x in range(0, blocks * step_size, step_size) for y in range(0, blocks * step_size, step_size)]:
17
- cropped_image = original_image.crop((x, y, x+step_size, y+step_size))
18
- image.paste(cropped_image, (x,y))
19
- prediction = learn.predict(cropped_image)
20
- print(prediction)
21
- marker = f"{prediction[0][0].upper()} {prediction[2][prediction[1].item()].item()*100:.0f}"
22
- position = (x+10, y+10)
23
-
24
- bbox = draw.textbbox(position, marker, font=None)
25
- draw.rectangle(bbox, fill="white")
26
- draw.text(position, marker, font=None, fill="black")
27
-
28
- draw = ImageDraw.Draw(image)
29
- for x in range(0, blocks * step_size, step_size):
30
- # vertical line
31
- line = ((x, 0), (x, blocks * step_size))
32
- draw.line(line, fill=128, width=3)
33
-
34
- # horizontal line
35
- line = ((0, x), (blocks * step_size, x))
36
- draw.line(line, fill=128, width=3)
37
-
38
- return image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
 
 
40
 
41
  title = "Traffic Light Detector"
42
  description = "Experiment traffic light detection to evaluate the value of captcha security controls"
 
1
+ from transformers import SegformerFeatureExtractor, SegformerForSemanticSegmentation
 
2
  from PIL import Image, ImageDraw
3
+ import numpy as np
4
+ from torch import nn
5
+ import gradio as gr
6
  import os
7
 
8
+ feature_extractor = SegformerFeatureExtractor.from_pretrained("nvidia/segformer-b5-finetuned-cityscapes-1024-1024")
9
+ model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b5-finetuned-cityscapes-1024-1024")
10
+
11
+ def cityscapes_palette():
12
+ """Cityscapes palette for external use."""
13
+ return [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156],
14
+ [190, 153, 153], [153, 153, 153], [250, 170, 30], [220, 220, 0],
15
+ [107, 142, 35], [152, 251, 152], [70, 130, 180], [220, 20, 60],
16
+ [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100], [0, 80, 100],
17
+ [0, 0, 230], [119, 11, 32]]
18
+
19
+ def cityscapes_classes():
20
+ """Cityscapes class names for external use."""
21
+ return [
22
+ 'road', 'sidewalk', 'building', 'wall', 'fence', 'pole',
23
+ 'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky',
24
+ 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle',
25
+ 'bicycle'
26
+ ]
27
+
28
+ def annotation(image:ImageDraw, color_seg:np.array):
29
+ assert image.size == (1024, 1024)
30
+ assert color_seg.shape == (1024, 1024, 3)
31
+ blocks = 4 # 4x4 sub grid
32
+ step_size = 256 # sub square edge size
33
+
34
+ draw = ImageDraw.Draw(image)
35
+
36
+ sub_square_xy = [(x,y) for x in range(0, blocks * step_size, step_size) for y in range(0, blocks * step_size, step_size)]
37
+ # print(f"{sub_square_xy=}")
38
+ for (x,y) in sub_square_xy:
39
+ reduced_seg = color_seg.sum(axis=2) # collapsing all colors into 1024 x 1024
40
+ # print(f"{reduced_seg.shape=}")
41
+
42
+ sub_square_seg = reduced_seg[ y:y+step_size, x:x+step_size]
43
+ # print(f"{sub_square_seg.shape=}, {sub_square_seg.sum()}")
44
+
45
+ if (sub_square_seg.sum() > 1000000):
46
+ print("light found at square ", x, y)
47
+ draw.rectangle([(x, y), (x + step_size, y + step_size)], outline=128, width=3)
48
+
49
+ def call(image: Image):
50
+ resized_image = original_image.resize((1024,1024))
51
+ print(f"{np.array(resized_image).shape=}") # 1024, 1024, 3
52
+ inputs = feature_extractor(images=resized_image, return_tensors="pt")
53
+
54
+ outputs = model(**inputs)
55
+ print(f"{outputs.logits.shape=}") # shape (batch_size, num_labels, height/4, width/4) -> 3, 19, 256 ,256
56
+ # print(f"{logits}")
57
+
58
+ # First, rescale logits to original image size
59
+ interpolated_logits = nn.functional.interpolate(
60
+ outputs.logits,
61
+ size=resized_image.size[::-1], # (height, width)
62
+ mode='bilinear',
63
+ align_corners=False)
64
+ print(f"{interpolated_logits.shape=}, {outputs.logits.shape=}") # 1, 19, 1024, 1024
65
+
66
+ # Second, apply argmax on the class dimension
67
+ seg = interpolated_logits.argmax(dim=1)[0]
68
+ print(f"{seg.shape=}")
69
+ color_seg = np.zeros((seg.shape[0], seg.shape[1], 3), dtype=np.uint8) # height, width, 3
70
+ print(f"{color_seg.shape=}")
71
+
72
+ for label, color in enumerate(cityscapes_palette()):
73
+ if (label == 6): color_seg[seg == label, :] = color
74
+
75
+ # Convert to BGR
76
+ color_seg = color_seg[..., ::-1]
77
+ print(f"{color_seg.shape=}")
78
+
79
+ # Show image + mask
80
+ img = np.array(resized_image) * 0.5 + color_seg * 0.5
81
+ img = img.astype(np.uint8)
82
+
83
+ out_im_file = Image.fromarray(img)
84
+ annotation(out_im_file, color_seg)
85
+
86
+ return out_im_file
87
+
88
+ original_image = Image.open("./examples/1.jpg")
89
+ print(f"{np.array(original_image).shape=}") # eg 729, 1000, 3
90
 
91
+ # out = call(original_image)
92
+ # out.save("out2.jpeg")
93
 
94
  title = "Traffic Light Detector"
95
  description = "Experiment traffic light detection to evaluate the value of captcha security controls"
app.pybak ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import fastai.vision.all as fv
3
+ from PIL import Image, ImageDraw
4
+ import skimage
5
+ import os
6
+
7
+ learn = fv.load_learner("model.pkl")
8
+
9
+ def call(image, step_size:int=100, blocks:int=4):
10
+ # print(image)
11
+ original_image = Image.fromarray(image).resize((400,400))
12
+
13
+ image = Image.new(mode='RGB', size=(step_size*blocks, step_size*blocks)) #, color=255
14
+
15
+ draw = ImageDraw.Draw(image)
16
+ for (x,y) in [ (x,y) for x in range(0, blocks * step_size, step_size) for y in range(0, blocks * step_size, step_size)]:
17
+ cropped_image = original_image.crop((x, y, x+step_size, y+step_size))
18
+ image.paste(cropped_image, (x,y))
19
+ prediction = learn.predict(cropped_image)
20
+ print(prediction)
21
+ marker = f"{prediction[0][0].upper()} {prediction[2][prediction[1].item()].item()*100:.0f}"
22
+ position = (x+10, y+10)
23
+
24
+ bbox = draw.textbbox(position, marker, font=None)
25
+ draw.rectangle(bbox, fill="white")
26
+ draw.text(position, marker, font=None, fill="black")
27
+
28
+ draw = ImageDraw.Draw(image)
29
+ for x in range(0, blocks * step_size, step_size):
30
+ # vertical line
31
+ line = ((x, 0), (x, blocks * step_size))
32
+ draw.line(line, fill=128, width=3)
33
+
34
+ # horizontal line
35
+ line = ((0, x), (blocks * step_size, x))
36
+ draw.line(line, fill=128, width=3)
37
+
38
+ return image
39
+
40
+
41
+ title = "Traffic Light Detector"
42
+ description = "Experiment traffic light detection to evaluate the value of captcha security controls"
43
+
44
+ iface = gr.Interface(fn=call,
45
+ inputs="image",
46
+ outputs="image",
47
+ title=title,
48
+ description=description,
49
+ examples=[
50
+ os.path.join(os.path.dirname(__file__), "examples/1.jpg"),
51
+ os.path.join(os.path.dirname(__file__), "examples/2.jpg")
52
+ ],
53
+ thumbnail="thumbnail.webp")
54
+ iface.launch()