File size: 8,165 Bytes
0edeb99
949ddd4
0edeb99
6ef5d11
0edeb99
 
414a2f5
0edeb99
949ddd4
 
414a2f5
949ddd4
0edeb99
414a2f5
062811c
0edeb99
414a2f5
 
 
 
 
 
 
949ddd4
3c07a47
 
0edeb99
414a2f5
0edeb99
 
 
 
 
e6c056d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0edeb99
414a2f5
949ddd4
414a2f5
949ddd4
414a2f5
 
 
 
ee78a76
6ef5d11
a6a7e77
6ef5d11
ee78a76
d5af99f
ee78a76
 
6ef5d11
414a2f5
 
 
1299d70
414a2f5
 
 
 
 
 
6ef5d11
 
 
 
 
 
 
 
414a2f5
 
 
ee78a76
1299d70
ee78a76
414a2f5
 
9d9dd06
414a2f5
ee78a76
6ef5d11
9d9dd06
6ef5d11
 
1299d70
414a2f5
1299d70
 
414a2f5
6ef5d11
 
 
 
 
414a2f5
1299d70
 
414a2f5
 
6ef5d11
 
1299d70
6ef5d11
a5ddf32
414a2f5
 
0edeb99
414a2f5
9d9dd06
 
c179387
 
 
9d9dd06
 
c179387
 
9d9dd06
77937d3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9d9dd06
414a2f5
1299d70
414a2f5
1299d70
414a2f5
6ef5d11
 
e6c056d
6ef5d11
 
1299d70
414a2f5
 
 
 
 
9d9dd06
414a2f5
 
 
6ef5d11
414a2f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
949ddd4
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
import gradio as gr
import spaces
from transformers import AutoImageProcessor, DFineForObjectDetection
from PIL import Image, ImageDraw, ImageFont
import torch

# Load model and processor (keep on CPU initially for Zero GPU)
processor = AutoImageProcessor.from_pretrained("ustc-community/dfine-medium-obj2coco")
model = DFineForObjectDetection.from_pretrained("ustc-community/dfine-medium-obj2coco")

# IMPORTANT: For Zero GPU, keep model on CPU initially
model = model.to("cpu")

# Inference function with Zero GPU decorator
@spaces.GPU(duration=15)  # Specify duration for Zero GPU
def detect_objects(image):
    # Move model to GPU only during inference
    device = torch.device("cuda")
    model.to(device)
    
    # Process image
    inputs = processor(images=image, return_tensors="pt")
    inputs = {k: v.to(device) for k, v in inputs.items()}
    
    with torch.no_grad():
        outputs = model(**inputs)

    # Post-process results
    results = processor.post_process_object_detection(
        outputs,
        target_sizes=torch.tensor([image.size[::-1]]),
        threshold=0.3
    )
    
    # Filter to keep only logos
    if len(results) > 0:
        # Find the label ID for "logo" in the model's label mapping
        logo_label_id = None
        for label_id, label_name in model.config.id2label.items():
            if label_name.lower() == "logo":
                logo_label_id = label_id
                break
        
        # Filter results to keep only logos
        if logo_label_id is not None and len(results[0]["boxes"]) > 0:
            logo_mask = results[0]["labels"] == logo_label_id
            results[0]["boxes"] = results[0]["boxes"][logo_mask]
            results[0]["labels"] = results[0]["labels"][logo_mask]
            results[0]["scores"] = results[0]["scores"][logo_mask]

    # Move model back to CPU after inference (important for Zero GPU)
    model.to("cpu")
    torch.cuda.empty_cache()  # Clear GPU cache
    
    # Draw bounding boxes on the original image
    image_with_boxes = image.copy()
    draw = ImageDraw.Draw(image_with_boxes)
    
    # Try to use a larger font if available
    try:
        font = ImageFont.truetype("DejaVuSans.ttf", 24)
    except:
        try:
            font = ImageFont.truetype("/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf", 24)
        except:
            font = ImageFont.load_default()
    
    detection_results = []
    
    if len(results) > 0 and len(results[0]["boxes"]) > 0:
        object_counter = 1
        for box, label, score in zip(results[0]["boxes"], results[0]["labels"], results[0]["scores"]):
            # Convert tensors to CPU before processing
            box = box.cpu().tolist()
            label_id = label.cpu().item()
            score_val = score.cpu().item()
            
            # Calculate width and height
            width_px = box[2] - box[0]
            height_px = box[3] - box[1]
            
            # Convert to mm (divide by 11.91 and round to 2 decimals)
            width_mm = round(width_px / 11.91, 2)
            height_mm = round(height_px / 11.91, 2)
            
            # Round coordinates
            box = [round(x, 2) for x in box]
            
            # Get generic object name
            object_name = f"Object {object_counter}"
            label_text = object_name
            
            # Draw bounding box
            draw.rectangle(box, outline=(45, 136, 58), width=4)
            
            # Draw label only (no score, no size info)
            text_bbox = draw.textbbox((box[0], box[1] - 2), label_text, font=font)
            draw.rectangle([text_bbox[0]-2, text_bbox[1]-2, text_bbox[2]+2, text_bbox[3]+2], fill=(45, 136, 58))
            draw.text((box[0], box[1] - 2), label_text, fill="white", font=font)
            
            # Store detection info with generic name
            detection_results.append({
                "label": object_name,
                "actual_label": model.config.id2label[label_id],  # Store actual label internally if needed
                "score": score_val,
                "box": box,
                "width_px": int(width_px),
                "height_px": int(height_px),
                "width_mm": width_mm,
                "height_mm": height_mm
            })
            
            object_counter += 1
    
    # Create detection summary
    summary = f"Detected {len(detection_results)} object(s)\n\n"
    for i, det in enumerate(detection_results[:10], 1):  # Show top 10 detections
        summary += f"{det['label']}: {det['score']:.2%}\n"
        summary += f"   Size: {det['width_px']} × {det['height_px']} px | {det['width_mm']} × {det['height_mm']} mm\n\n"
        summary += f"   Bounding Box: TL({det['box'][0]}, {det['box'][1]}) TR({det['box'][2]}, {det['box'][1]}) BR({det['box'][2]}, {det['box'][3]}) BL({det['box'][0]}, {det['box'][3]})\n\n"
    
    return image_with_boxes, summary

# Create Gradio interface
with gr.Blocks(title="Logo Detection", css="""
    .green-button {
        background-color: rgb(145, 236, 158) !important;
        border-color: rgb(145, 236, 158) !important;
        color: #333 !important;
    }
    .green-button:hover {
        background-color: rgb(125, 216, 138) !important;
        border-color: rgb(125, 216, 138) !important;
    }
    
    /* Override Gradio's orange with green */
    .gr-button-primary {
        background-color: rgb(145, 236, 158) !important;
        border-color: rgb(145, 236, 158) !important;
    }
    
    /* Progress bars */
    .progress-bar {
        background-color: rgb(145, 236, 158) !important;
    }
    
    /* Input focus states */
    .gr-input:focus, .gr-textarea:focus {
        border-color: rgb(145, 236, 158) !important;
        outline-color: rgb(145, 236, 158) !important;
    }
    
    /* Override orange in various Gradio elements */
    .gr-check-radio:checked {
        background-color: rgb(145, 236, 158) !important;
        border-color: rgb(145, 236, 158) !important;
    }
    
    /* Links */
    a {
        color: rgb(45, 136, 58) !important;
    }
    
    /* Loading spinner */
    .gr-loading {
        color: rgb(145, 236, 158) !important;
    }
    
    /* Slider handles and tracks */
    .gr-slider input[type="range"]::-webkit-slider-thumb {
        background-color: rgb(145, 236, 158) !important;
    }
    .gr-slider input[type="range"]::-moz-range-thumb {
        background-color: rgb(145, 236, 158) !important;
    }
    
    /* Any element using Gradio's primary color */
    [style*="rgb(249, 115, 22)"] {
        color: rgb(145, 236, 158) !important;
    }
    [style*="background-color: rgb(249, 115, 22)"] {
        background-color: rgb(145, 236, 158) !important;
    }
""") as demo:
    gr.Markdown("""
    # Logo Detection with Size Measurements
    
    Upload an image to detect logos.
    This Space uses Zero GPU for efficient inference.
    
    **Features:**
    - Logo detection only
    - Size display in pixels (blue label)
    - Size display in millimeters (green label) - converted using 11.91 pixels/mm
    - Objects are labeled generically as "Object 1", "Object 2", etc.
    """)
    
    with gr.Row():
        with gr.Column():
            input_image = gr.Image(type="pil", label="Input Image")
            detect_btn = gr.Button("Detect Objects", variant="primary", elem_classes="green-button")
        
        with gr.Column():
            output_image = gr.Image(label="Detection Results")
            output_text = gr.Textbox(label="Detection Summary", lines=12)
    
    # Set up event handler
    detect_btn.click(
        fn=detect_objects,
        inputs=input_image,
        outputs=[output_image, output_text]
    )
    
    # Add examples (comment out if you don't have example images)
    # gr.Examples(
    #     examples=[
    #         ["example1.jpg"],
    #         ["example2.jpg"],
    #     ],
    #     inputs=input_image,
    #     outputs=[output_image, output_text],
    #     fn=detect_objects,
    #     cache_examples=False  # Don't cache for Zero GPU
    # )

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)