File size: 3,403 Bytes
a859642
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import gradio as gr
import cv2
import numpy as np
import os
import json
from openvino.runtime import Core  # Assuming you're using OpenVINO
from tqdm import tqdm
from PIL import Image

from tf_post_processing import non_max_suppression #,optimized_object_detection

# Load the OpenVINO model
classification_model_xml = "./model/best.xml"
core = Core()
config = {
    "INFERENCE_NUM_THREADS": 2,
    "ENABLE_CPU_PINNING": True
}
model = core.read_model(model=classification_model_xml)
compiled_model = core.compile_model(model=model, device_name="CPU", config=config)

label_to_class_text = {0: 'range',
 1: ' entry door',
 2: 'kitchen sink',
 3: ' bathroom sink',
 4: 'toilet',
 5: 'double  folding door',
 6: 'window',
 7: 'shower',
 8: 'bathtub',
 9: 'single folding door',
 10: 'dishwasher',
 11: 'refrigerator'}

# Function to perform inference
def predict_image(image):
    # Convert PIL Image to numpy array (OpenCV uses numpy arrays)
    image = np.array(image)

    # Resize, preprocess, and reshape the input image
    img_size = 960
    resized_image = cv2.resize(image, (img_size, img_size)) / 255.0
    resized_image = resized_image.transpose(2, 0, 1)
    reshaped_image = np.expand_dims(resized_image, axis=0).astype(np.float32)
    
    im_height, im_width, _ = image.shape
    output_numpy = compiled_model(reshaped_image)[0]
    results = non_max_suppression(output_numpy, conf_thres=0.2, iou_thres=0.6, max_wh=15000)[0]

    # Prepare output paths
    predictions = []

    # Draw boxes and collect prediction data
    for result in results:
        boxes = result[:4]
        prob = result[4]
        classes = int(result[5])

        x1, y1, x2, y2 = np.uint16([
            boxes[0] * im_width,
            boxes[1] * im_height,
            boxes[2] * im_width,
            boxes[3] * im_height
        ])

        if prob > 0.2:
            cv2.rectangle(image, (x1, y1), (x2, y2), (255, 255, 0), 2)
            label_text = f"{classes} {round(prob, 2)}"
            cv2.putText(image, label_text, (x1, y1), 0, 0.5, (0, 255, 0), 2)

            # Store prediction info in a JSON-compatible format
            predictions.append({
                "class": label_to_class_text[classes],
                "probability": round(float(prob), 2),
                "coordinates": {
                    "xmin": int(x1),
                    "ymin": int(y1),
                    "xmax": int(x2),
                    "ymax": int(y2)
                }
            })

    # Convert the processed image back to PIL Image for Gradio
    pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

    return pil_image, json.dumps(predictions, indent=4)

# Sample images for Gradio examples
# Define sample images for user convenience
sample_images = [
    "./sample/10_2.jpg",
    "./sample/10_10.jpg",
    "./sample/10_12.jpg"
]


# Gradio UI setup with examples
gr_interface = gr.Interface(
    fn=predict_image, 
    inputs=gr.Image(type="pil"),  # Updated to gr.Image for image input
    outputs=[gr.Image(type="pil"), gr.Textbox()],  # Updated to gr.Image and gr.Textbox
    title="House CAD Design Object Detection", 
    description="Upload a CAD design image of a house to detect objects with bounding boxes and probabilities.",
    examples=sample_images  # Add the examples here
)

# Launch the Gradio interface if run as main
if __name__ == "__main__":
    gr_interface.launch()