Spaces:

mattb512
/

trafficlight

Sleeping

App Files Files Community

mattb512 commited on Dec 20, 2023

Commit

1b7e337

•

1 Parent(s): b2f5c8e

switch to nvidia seg model

Browse files

Files changed (3) hide show

.gitignore +7 -1
app.py +88 -35
app.pybak +54 -0

.gitignore CHANGED Viewed

	@@ -1 +1,7 @@
1	- .venv/**

+.venv/**
+datasets/**
+.DS_Store
+dataset.py
+out.jpeg
+out2.jpeg
+__pycache__

app.py CHANGED Viewed

@@ -1,42 +1,95 @@
-import gradio as gr
-import fastai.vision.all as fv
 from PIL import Image, ImageDraw
-import skimage
 import os
-learn = fv.load_learner("model.pkl")
-def call(image, step_size:int=100, blocks:int=4):
-  # print(image)
-  original_image = Image.fromarray(image).resize((400,400))
-  image = Image.new(mode='RGB', size=(step_size*blocks, step_size*blocks)) #, color=255
-  draw = ImageDraw.Draw(image)
-  for (x,y) in [ (x,y) for x in range(0, blocks * step_size, step_size) for y in range(0, blocks * step_size, step_size)]:
-    cropped_image = original_image.crop((x, y, x+step_size, y+step_size))
-    image.paste(cropped_image, (x,y))
-    prediction = learn.predict(cropped_image)
-    print(prediction)
-    marker = f"{prediction[0][0].upper()} {prediction[2][prediction[1].item()].item()*100:.0f}"
-    position = (x+10, y+10)
-    bbox = draw.textbbox(position, marker, font=None)
-    draw.rectangle(bbox, fill="white")
-    draw.text(position, marker, font=None, fill="black")
-  draw = ImageDraw.Draw(image)
-  for x in range(0, blocks * step_size, step_size):
-    # vertical line
-    line = ((x, 0), (x, blocks * step_size))
-    draw.line(line, fill=128, width=3)
-    # horizontal line
-    line = ((0, x), (blocks * step_size, x))
-    draw.line(line, fill=128, width=3)
-  return image
 title = "Traffic Light Detector"
 description = "Experiment traffic light detection to evaluate the value of captcha security controls"

+from transformers import SegformerFeatureExtractor, SegformerForSemanticSegmentation
 from PIL import Image, ImageDraw
+import numpy as np
+from torch import nn
+import gradio as gr
 import os
+feature_extractor = SegformerFeatureExtractor.from_pretrained("nvidia/segformer-b5-finetuned-cityscapes-1024-1024")
+model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b5-finetuned-cityscapes-1024-1024")
+def cityscapes_palette():
+    """Cityscapes palette for external use."""
+    return [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156],
+            [190, 153, 153], [153, 153, 153], [250, 170, 30], [220, 220, 0],
+            [107, 142, 35], [152, 251, 152], [70, 130, 180], [220, 20, 60],
+            [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100], [0, 80, 100],
+            [0, 0, 230], [119, 11, 32]]
+def cityscapes_classes():
+    """Cityscapes class names for external use."""
+    return [
+        'road', 'sidewalk', 'building', 'wall', 'fence', 'pole',
+        'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky',
+        'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle',
+        'bicycle'
+    ]
+def annotation(image:ImageDraw, color_seg:np.array):
+    assert image.size == (1024, 1024)
+    assert color_seg.shape == (1024, 1024, 3)
+    blocks = 4 # 4x4 sub grid
+    step_size = 256 # sub square edge size
+    draw = ImageDraw.Draw(image)
+    sub_square_xy = [(x,y) for x in range(0, blocks * step_size, step_size) for y in range(0, blocks * step_size, step_size)]
+    # print(f"{sub_square_xy=}")
+    for (x,y) in sub_square_xy:
+        reduced_seg = color_seg.sum(axis=2) # collapsing all colors into 1024 x 1024
+        # print(f"{reduced_seg.shape=}")
+        sub_square_seg = reduced_seg[ y:y+step_size, x:x+step_size]
+        # print(f"{sub_square_seg.shape=}, {sub_square_seg.sum()}")
+        if (sub_square_seg.sum() > 1000000):
+            print("light found at square ", x, y)
+            draw.rectangle([(x, y), (x + step_size, y + step_size)], outline=128, width=3)
+def call(image: Image):
+    resized_image = original_image.resize((1024,1024))
+    print(f"{np.array(resized_image).shape=}") # 1024, 1024, 3
+    inputs = feature_extractor(images=resized_image, return_tensors="pt")
+    outputs  = model(**inputs)
+    print(f"{outputs.logits.shape=}") # shape (batch_size, num_labels, height/4, width/4) -> 3, 19, 256 ,256
+    # print(f"{logits}")
+    # First, rescale logits to original image size
+    interpolated_logits =  nn.functional.interpolate(
+        outputs.logits,
+        size=resized_image.size[::-1], # (height, width)
+        mode='bilinear',
+        align_corners=False)
+    print(f"{interpolated_logits.shape=}, {outputs.logits.shape=}") # 1, 19, 1024, 1024
+    # Second, apply argmax on the class dimension
+    seg = interpolated_logits.argmax(dim=1)[0]
+    print(f"{seg.shape=}")
+    color_seg = np.zeros((seg.shape[0], seg.shape[1], 3), dtype=np.uint8) # height, width, 3
+    print(f"{color_seg.shape=}")
+    for label, color in enumerate(cityscapes_palette()):
+        if (label == 6): color_seg[seg == label, :] = color
+    # Convert to BGR
+    color_seg = color_seg[..., ::-1]
+    print(f"{color_seg.shape=}")
+    # Show image + mask
+    img = np.array(resized_image) * 0.5 + color_seg * 0.5
+    img = img.astype(np.uint8)
+    out_im_file = Image.fromarray(img)
+    annotation(out_im_file, color_seg)
+    return out_im_file
+original_image = Image.open("./examples/1.jpg")
+print(f"{np.array(original_image).shape=}") # eg 729, 1000, 3
+# out = call(original_image)
+# out.save("out2.jpeg")
 title = "Traffic Light Detector"
 description = "Experiment traffic light detection to evaluate the value of captcha security controls"

app.pybak ADDED Viewed

	@@ -0,0 +1,54 @@

+import gradio as gr
+import fastai.vision.all as fv
+from PIL import Image, ImageDraw
+import skimage
+import os
+learn = fv.load_learner("model.pkl")
+def call(image, step_size:int=100, blocks:int=4):
+  # print(image)
+  original_image = Image.fromarray(image).resize((400,400))
+  image = Image.new(mode='RGB', size=(step_size*blocks, step_size*blocks)) #, color=255
+  draw = ImageDraw.Draw(image)
+  for (x,y) in [ (x,y) for x in range(0, blocks * step_size, step_size) for y in range(0, blocks * step_size, step_size)]:
+    cropped_image = original_image.crop((x, y, x+step_size, y+step_size))
+    image.paste(cropped_image, (x,y))
+    prediction = learn.predict(cropped_image)
+    print(prediction)
+    marker = f"{prediction[0][0].upper()} {prediction[2][prediction[1].item()].item()*100:.0f}"
+    position = (x+10, y+10)
+    bbox = draw.textbbox(position, marker, font=None)
+    draw.rectangle(bbox, fill="white")
+    draw.text(position, marker, font=None, fill="black")
+  draw = ImageDraw.Draw(image)
+  for x in range(0, blocks * step_size, step_size):
+    # vertical line
+    line = ((x, 0), (x, blocks * step_size))
+    draw.line(line, fill=128, width=3)
+    # horizontal line
+    line = ((0, x), (blocks * step_size, x))
+    draw.line(line, fill=128, width=3)
+  return image
+title = "Traffic Light Detector"
+description = "Experiment traffic light detection to evaluate the value of captcha security controls"
+iface = gr.Interface(fn=call,
+                     inputs="image",
+                     outputs="image",
+                     title=title,
+                     description=description,
+                     examples=[
+                       os.path.join(os.path.dirname(__file__), "examples/1.jpg"),
+                       os.path.join(os.path.dirname(__file__), "examples/2.jpg")
+                     ],
+                     thumbnail="thumbnail.webp")
+iface.launch()