File size: 11,060 Bytes
8c36ac7
 
 
 
 
 
 
 
 
 
1d75c5c
8c36ac7
 
 
 
 
 
1d75c5c
8c36ac7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d75c5c
8c36ac7
1d75c5c
8c36ac7
 
 
 
 
 
 
 
 
1d75c5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8c36ac7
 
 
1d75c5c
 
 
 
8c36ac7
1d75c5c
8c36ac7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d75c5c
 
 
 
8c36ac7
 
1d75c5c
 
 
8c36ac7
 
 
 
 
 
 
 
 
 
1d75c5c
 
 
 
 
 
 
 
 
 
 
 
 
 
8c36ac7
 
7b6b6f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1f95369
 
 
 
 
8c36ac7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1f95369
 
8c36ac7
1f95369
 
 
 
 
 
 
8c36ac7
 
1f95369
 
 
1d75c5c
1f95369
 
 
1d75c5c
 
 
 
 
 
 
1f95369
 
1d75c5c
 
 
 
7b6b6f7
1d75c5c
 
7b6b6f7
1d75c5c
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
import gradio as gr
import torch
from PIL import Image
from torchvision import transforms
import numpy as np
from matplotlib import pyplot as plt
from torch import nn
from transformers import SegformerForSemanticSegmentation
import sys
import io 
import pdb

###################
# Setup label names
target_list = ['Crack', 'ACrack', 'Wetspot', 'Efflorescence', 'Rust', 'Rockpocket', 'Hollowareas', 'Cavity',
               'Spalling', 'Graffiti', 'Weathering', 'Restformwork', 'ExposedRebars', 
               'Bearing', 'EJoint', 'Drainage', 'PEquipment', 'JTape', 'WConccor']
target_list_all = ["All"] + target_list
classes, nclasses = target_list, len(target_list)
label2id = dict(zip(classes, range(nclasses)))
id2label = dict(zip(range(nclasses), classes))

############
# Load model
device = torch.device('cpu')
segformer = SegformerForSemanticSegmentation.from_pretrained("nvidia/mit-b1",
            id2label=id2label,
            label2id=label2id)

# SegModel
class SegModel(nn.Module):
    def __init__(self, segformer):
        super(SegModel, self).__init__()
        self.segformer = segformer
        self.upsample = nn.Upsample(scale_factor=4, mode='nearest')

    def forward(self, x):
        return self.upsample(self.segformer(x).logits)
 
model = SegModel(segformer)  
path = "runs/2023-08-31_rich-paper-12/best_model_cpu.pth"
print(f"Load Segformer weights from {path}")
#model = model.load_state_dict(torch.load(path, map_location=device))
model = torch.load(path)
model.eval()

##################
# Image preprocess
##################

to_tensor = transforms.ToTensor()
to_array = transforms.ToPILImage()
resize = transforms.Resize((512, 512))
resize_small = transforms.Resize((369,369))
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

def process_pil(img):
    img = to_tensor(img)
    img = resize(img)
    img = normalize(img)
    return img

# the background of the image
def resize_pil(img):
    img = to_tensor(img)
    img = resize_small(img)
    img = to_array(img)
    return img

# combine the foreground (mask_all) and background (original image) to create one image
def transparent(fg, bg, alpha_factor):

    foreground = np.array(fg)
    background = np.array(bg)

    background = Image.fromarray(bg)
    foreground = Image.fromarray(fg)
    new_alpha_factor = int(255*alpha_factor)
    foreground.putalpha(new_alpha_factor)
    background.paste(foreground, (0, 0), foreground)

    return background

def show_img(all_imgs, dropdown, bg, alpha_factor):
    idx = target_list_all.index(dropdown)
    fg= all_imgs[idx]["name"]

    foreground = Image.open(fg)
    background = np.array(bg)

    background = Image.fromarray(bg)
    new_alpha_factor = int(255*alpha_factor)
    foreground.putalpha(new_alpha_factor)
    background.paste(foreground, (0, 0), foreground)

    return background

###########
# Inference


def inference(img, alpha_factor):
    background = resize_pil(img) 

    img = process_pil(img)

    mask = model(img.unsqueeze(0)) # we need a batch, hence we introduce an extra dimenation at position 0 (unsqueeze)
    mask = mask[0]

    # Get probability values (logits to probs)
    mask_probs = torch.sigmoid(mask)
    mask_probs = mask_probs.detach().numpy()
    mask_probs.shape

    # Make binary mask
    THRESHOLD = 0.5
    mask_preds = mask_probs > THRESHOLD

    # All combined
    mask_all =  mask_preds.sum(axis=0)
    mask_all = np.expand_dims(mask_all, axis=0)
    mask_all.shape

    # Concat all combined with normal preds
    mask_preds = np.concatenate((mask_all, mask_preds),axis=0)
    labs = ["ALL"] + target_list

    fig, axes = plt.subplots(5, 4, figsize = (10,10))
    
    # save all mask_preds in all_mask
    all_masks = []

    for i, ax in enumerate(axes.flat):
        label = labs[i]
        
        all_masks.append(mask_preds[i])

        ax.imshow(mask_preds[i])
        ax.set_title(label)
          
    plt.tight_layout()        

    # plt to PIL
    img_buf = io.BytesIO()
    fig.savefig(img_buf, format='png')
    im = Image.open(img_buf)

    # Saved all masks combined with unvisible xaxis und yaxis and without a white 
    # background.
    all_images = []
    for i in range(len(all_masks)):
        plt.figure()
        fig = plt.imshow(all_masks[i])
        plt.axis('off')
        fig.axes.get_xaxis().set_visible(False)
        fig.axes.get_yaxis().set_visible(False)
        img_buf = io.BytesIO()
        plt.savefig(img_buf, bbox_inches='tight', pad_inches = 0, format='png')
        all_images.append(Image.open(img_buf))

    return im, all_images, background



examples=[
["assets/dacl10k_v2_validation_0026.jpg", "dacl10k_v2_validation_0026.jpg"],
["assets/dacl10k_v2_validation_0037.jpg", "dacl10k_v2_validation_0037.jpg"],
["assets/dacl10k_v2_validation_0053.jpg", "dacl10k_v2_validation_0053.jpg"],
["assets/dacl10k_v2_validation_0068.jpg", "dacl10k_v2_validation_0068.jpg"],
["assets/dacl10k_v2_validation_0153.jpg", "dacl10k_v2_validation_0153.jpg"],
["assets/dacl10k_v2_validation_0263.jpg", "dacl10k_v2_validation_0263.jpg"],
["assets/dacl10k_v2_validation_0336.jpg", "dacl10k_v2_validation_0336.jpg"],
["assets/dacl10k_v2_validation_0500.jpg", "dacl10k_v2_validation_0500.jpg"],
["assets/dacl10k_v2_validation_0549.jpg", "dacl10k_v2_validation_0549.jpg"],
["assets/dacl10k_v2_validation_0609.jpg", "dacl10k_v2_validation_0609.jpg"]
]



title = "dacl-challenge @ WACV2024"
description = """
<p style="text-align:center">
<h1>dacl-challenge @ WACV2024</h1>
</p>
<b>
<p style="text-align:center">
<a href='https://twitter.com/dacl_ai' target='_blank'>Twitter</a><a href='https://x.com/dacl_ai' target='_blank'>/X</a> |
<a href='https://wacv2024.thecvf.com/workshops/' target='_blank'>WACV2024</a> |
<a href='https://arxiv.org/abs/2309.00460' target='_blank'>arXiv</a> | 
<a href='https://github.com/phiyodr/dacl10k-toolkit' target='_blank'>Python Toolkit</a> | 
<a href='https://try.fiftyone.ai/datasets/dacl10k/samples' target='_blank'>voxel51.com</a> | 
<a href='https://eval.ai/web/challenges/challenge-page/2130/overview' target='_blank'>eval.ai</a> | 
<a href='https://dacl.ai/workshop.html' target='_blank'>dacl.ai workshop page</a> 
</p>

<p></p>
<p>πŸ“› The challenge uses the dacl10k dataset, which stands for <i>damage classification 10k images</i> and is a <b>multi-label semantic segmentation dataset</b> for 19 classes (13 damages and 6 objects) present on bridges.</p>
<p></p>

<p>πŸ† The dataset is used in the <a href='https://eval.ai/web/challenges/challenge-page/2130/overview' target='_blank'>dacl-challenge</a> associated with the "<a href='https://dacl.ai/workshop' target='_blank'>1st Workshop on Vision-Based Structural Inspections in Civil Engineering</a>" at <a href='https://wacv2024.thecvf.com/workshops/' target='_blank'>WACV2024</a>.</p>
<p></p>
</b>

<p>Civil engineering structures such as power plants, sewers, and bridges form essential components of the public infrastructure. It is mandatory to keep these structures in a safe and operational state. In order to ensure this, they are frequently inspected where the current recognition and documentation of defects and building components is mostly carried out manually. A failure of individual structures results in enormous costs. For example, the economic costs caused by the closure of a bridge due to congestion is many times the cost of the bridge itself and its maintenance.</p>
<p></p>

<p>Recent advancements in hardware and software offer great potential for increasing the quality, traceability, and efficiency of the structural inspection process. In particular, methods from the field of computer vision play an important role. The new techniques support the inspection engineer at the building site, raising quality and efficiency of the inspection. There is a high demand worldwide for the automation of structural inspections in the areas of building construction, bridge construction, tunnel construction, sewage plants, and other critical infrastructures.</p>
<p></p>

<p>In the β€œ<a href='https://dacl.ai/workshop' target='_blank'>1st Workshop on Vision-Based Structural Inspections in Civil Engineering</a>,” approaches utilizing computer vision for analyzing and assessing civil engineering structures will be explored. The workshop will provide a platform for experts from both the academic and application community. The core of the workshop is the β€œ<a href='https://eval.ai/web/challenges/challenge-page/2130/overview' target='_blank'>dacl-challenge</a>,” which aims to find the best models for recognizing bridge defects and bridge components by means of semantic segmentation. The challenge is based on the β€œ<b>dacl10k</b>” dataset, a novel, real-world, large-scale benchmark for multi-label semantic segmentation that distinguishes between <b>13 defect types</b> and <b>six building components</b>. The workshop will take place at the <a href='https://wacv2024.thecvf.com/workshops/' target='_blank'>IEEE/CVF Winter Conference on Applications of Computer Vision (WACV) 2024</a>.</p>
<p></p>

<p>Details: 
 <ul>
  <li>Model: <a href='https://huggingface.co/nvidia/mit-b1' target='_blank'>SegFormer mit-b1</a>, trained on resized 512x512 images for (only) 10 epochs.</li>
  <li>Label description of dacl10k dataset: "A.3. Class descriptions" in <a href='https://arxiv.org/pdf/2309.00460.pdf' target='_blank'>J. Flotzinger, P.J. RΓΆsch, T. Braml: "dacl10k: Benchmark for Semantic Bridge Damage Segmentation".</a></li>
</ul> 
<p></p>


<p>Workflow: 
 <ul>
  <li>Upload an image or select one from "Examples". </li>
  <li>Then click "1) Generate Masks"</li>
  <li>Select an damage or object type in "Select Label" and choose an "Alpha Factor" for transparancy.</li>
  <li>Then click "2) Generate Transparent Mask (with Alpha Factor)"</li>
</ul> 
"""

article = "<p style='text-align: center'><a href='https://github.com/phiyodr/dacl10k-toolkit' target='_blank'>Github Repo</a></p>"



with gr.Blocks() as app:
    with gr.Row():
        gr.Markdown(description)
    with gr.Row():
        input_img = gr.inputs.Image(type="pil", label="Original Image")
        gr.Examples(examples=examples, inputs=[input_img])
    with gr.Row():
        img = gr.outputs.Image(type="pil", label="All Masks")
        transparent_img = gr.outputs.Image(type="pil", label="Transparent Image")
    with gr.Row():
        dropdown = gr.Dropdown(choices=target_list_all, label="Select Label", value="All")
        slider = gr.Slider(minimum=0, maximum=1, value=0.4, label="Alpha Factor")
    
    all_masks = gr.Gallery(visible=False)
    background = gr.Image(visible=False)

    generate_mask_slider = gr.Button("1) Generate Masks")
    generate_mask_slider.click(inference, inputs=[input_img], outputs=[img, all_masks, background])

    submit_transparent_img = gr.Button("2) Generate Transparent Mask (with Alpha Factor)")
    submit_transparent_img.click(show_img, inputs=[all_masks, dropdown, background, slider], outputs=[transparent_img])


app.launch()