File size: 5,490 Bytes
1b7e337
968f0c3
1b7e337
 
 
60d260f
94c65ad
43353bb
968f0c3
1b7e337
 
 
8a73c52
 
 
 
 
b403896
8a73c52
 
487a9af
 
1b7e337
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
487a9af
1b7e337
e4a77bc
1b7e337
487a9af
43353bb
487a9af
 
 
1b7e337
487a9af
8a73c52
487a9af
 
 
b403896
 
8a73c52
 
1b7e337
b403896
 
 
1b7e337
 
8a73c52
1b7e337
 
b403896
487a9af
1b7e337
 
b403896
1b7e337
 
 
 
 
 
 
 
 
 
 
 
 
 
8a73c52
 
1b7e337
 
 
 
 
 
 
8a73c52
43353bb
1b7e337
 
487a9af
 
968f0c3
1b7e337
 
968f0c3
 
 
 
60d260f
 
 
 
 
 
 
487a9af
4704c26
 
 
 
b2f5c8e
 
968f0c3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
from transformers import SegformerFeatureExtractor, SegformerForSemanticSegmentation
from PIL import Image, ImageDraw
import numpy as np
from torch import nn
import gradio as gr
import os
import torch
import time

feature_extractor = SegformerFeatureExtractor.from_pretrained("nvidia/segformer-b5-finetuned-cityscapes-1024-1024")
model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b5-finetuned-cityscapes-1024-1024")

device = 'cuda' if torch.cuda.is_available() else 'cpu'

print(f"Is CUDA available: {torch.cuda.is_available()} --> {device=}")
if (torch.cuda.is_available()):
    print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
 
model.to(device)

# https://github.com/NielsRogge/Transformers-Tutorials/blob/master/SegFormer/Segformer_inference_notebook.ipynb

def cityscapes_palette():
    """Cityscapes palette for external use."""
    return [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156],
            [190, 153, 153], [153, 153, 153], [250, 170, 30], [220, 220, 0],
            [107, 142, 35], [152, 251, 152], [70, 130, 180], [220, 20, 60],
            [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100], [0, 80, 100],
            [0, 0, 230], [119, 11, 32]]

def cityscapes_classes():
    """Cityscapes class names for external use."""
    return [
        'road', 'sidewalk', 'building', 'wall', 'fence', 'pole',
        'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky',
        'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle',
        'bicycle'
    ]

def annotation(image:ImageDraw, color_seg:np.array):
    assert image.size == (1024, 1024)
    assert color_seg.shape == (1024, 1024, 3)
    blocks = 4 # 4x4 sub grid
    step_size = 256 # sub square edge size

    draw = ImageDraw.Draw(image)

    sub_square_xy = [(x,y) for x in range(0, blocks * step_size, step_size) for y in range(0, blocks * step_size, step_size)]
    # print(f"{sub_square_xy=}")
    for (x,y) in sub_square_xy:
        reduced_seg = color_seg.sum(axis=2) # collapsing all colors into 1024 x 1024
        # print(f"{reduced_seg.shape=}")
        
        sub_square_seg = reduced_seg[ y:y+step_size, x:x+step_size]
        # print(f"{sub_square_seg.shape=}, {sub_square_seg.sum()}")
        
        if (sub_square_seg.sum() > 100000): 
            print("light found at square ", x, y)
            draw.rectangle([(x, y), (x + step_size, y + step_size)], outline="white", width=3)

def call(image): #nparray
    start = time.time()

    resized = Image.fromarray(image).resize((1024,1024))
    resized_image = np.array(resized)
    print(f"{np.array(resized_image).shape=}") # 1024, 1024, 3

    print(f"*processing time: {(time.time() - start):.2f} s")
    # resized_image = Image.fromarray(resized_image_np)
    # print(f"{resized_image=}")

    inputs = feature_extractor(images=resized_image, return_tensors="pt").to(device)
    
    print(f"**processing time: {(time.time() - start):.2f} s")

    outputs  = model(**inputs)
    logits = outputs.logits.cpu()

    print(f"{logits.shape=}") # shape (batch_size, num_labels, height/4, width/4) -> 3, 19, 256 ,256
    # print(f"{logits}")

    print(f"***processing time: {(time.time() - start):.2f} s")
    # First, rescale logits to original image size
    interpolated_logits =  nn.functional.interpolate(
        logits,
        size=[1024, 1024], #resized_image.size[::-1], # (height, width)
        mode='bilinear',
        align_corners=False)
    print(f"{interpolated_logits.shape=}, {logits.shape=}") # 1, 19, 1024, 1024

    # Second, apply argmax on the class dimension
    seg = interpolated_logits.argmax(dim=1)[0]
    print(f"{seg.shape=}")
    color_seg = np.zeros((seg.shape[0], seg.shape[1], 3), dtype=np.uint8) # height, width, 3
    print(f"{color_seg.shape=}")

    for label, color in enumerate(cityscapes_palette()):
        if (label == 6): color_seg[seg == label, :] = color

    # Convert to BGR
    color_seg = color_seg[..., ::-1]
    print(f"{color_seg.shape=}")

    print(f"****processing time: {(time.time() - start):.2f} s")

    # Show image + mask
    img = np.array(resized_image) * 0.5 + color_seg * 0.5
    img = img.astype(np.uint8)

    out_im_file = Image.fromarray(img)
    annotation(out_im_file, color_seg)

    print(f"--> processing time: {(time.time() - start):.2f} s")

    return out_im_file

# original_image = Image.open("./examples/1.jpg")
# print(f"{np.array(original_image).shape=}") # eg 729, 1000, 3

# out = call(original_image)
# out.save("out2.jpeg")

title = "Traffic Light Detector"
description = "Experiment traffic light detection to evaluate the value of captcha security controls"

iface = gr.Interface(fn=call, 
                     inputs="image", 
                     outputs="image", 
                     title=title, 
                     description=description, 
                     examples=[
                       os.path.join(os.path.dirname(__file__), "examples/1.jpg"),
                       os.path.join(os.path.dirname(__file__), "examples/2.jpg"),
                       os.path.join(os.path.dirname(__file__), "examples/3.jpg"),
                       os.path.join(os.path.dirname(__file__), "examples/4.jpg"),
                       os.path.join(os.path.dirname(__file__), "examples/5.jpg"),
                       os.path.join(os.path.dirname(__file__), "examples/6.jpg"),
                     ],
                     thumbnail="thumbnail.webp")
iface.launch()