File size: 5,886 Bytes
c211f28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5355cb8
c211f28
354bf84
 
 
c211f28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
354bf84
 
 
c211f28
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import gradio as gr
from pydantic import BaseModel
from typing import List, Optional
from PIL import Image, ImageDraw, ImageFont
import random
import torch
from transformers import Owlv2Processor, Owlv2ForObjectDetection
import logging
from logging.handlers import RotatingFileHandler
import base64
import io
import os
import numpy as np

class DetectionRequest(BaseModel):
    image_data: str
    texts: List[List[str]]

class DetectionResult(BaseModel):
    detections: List[str]
    image_with_boxes: str

processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16-ensemble")
model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16-ensemble")
# Create logs directory if it doesn't exist
if not os.path.exists('logs'):
    os.makedirs('logs')

def draw_bounding_boxes(image: Image, boxes, scores, labels, text_labels):
    draw = ImageDraw.Draw(image)
    width, height = image.size

    # Define the color bank
    color_bank = ["#0AC2FF", "#47FF0A", "#FF0AC2", "#ADD8E6", "#FF0A47"]

    # Use default font
    font = ImageFont.load_default()

    for box, score, label in zip(boxes, scores, labels):
        # Choose a random color
        color = random.choice(color_bank)

        # Convert the box to a Python list if it's not already
        if isinstance(box, torch.Tensor):
            box = box.tolist()
        elif not isinstance(box, (list, tuple)):
            raise TypeError("Box must be a list or tuple of coordinates.")

        # Draw the rectangle
        draw.rectangle(box, outline=color, width=2)

        # Get the text to display
        display_text = f"{text_labels[label]}: {score:.2f}"

        # Calculate position for the text
        text_position = (box[0], box[1] - 10)

        # Draw the text
        draw.text(text_position, display_text, fill=color, font=font)

    return image

def detect_objects_logic(image_data, texts):
    try:
        # Decode the base64 image
        image_data_bytes = base64.b64decode(image_data)
        image = Image.open(io.BytesIO(image_data_bytes))
        width, height = image.size

        inputs = processor(text=texts, images=image, return_tensors="pt")
        outputs = model(**inputs)

        target_sizes = torch.Tensor([image.size[::-1]])
        results = processor.post_process_object_detection(outputs=outputs, threshold=0.1, target_sizes=target_sizes)

        detection_strings = []
        image_with_boxes = image.copy()  # Copy the image only once
        
        for i, text_group in enumerate(texts):
            if i >= len(results):
                logging.error(f"Text group index {i} exceeds results length.")
                continue
            logging.info(f"Processing texts: {texts}")
            results_per_group = results[i]
            boxes = results_per_group["boxes"]
            scores = results_per_group["scores"]
            labels = results_per_group["labels"]

            image_with_boxes = draw_bounding_boxes(image_with_boxes, boxes, scores, labels, text_group)

            for box, score, label in zip(boxes, scores, labels):
                scaled_box = [round(box[i].item() * (width if i % 2 == 0 else height), 2) for i in range(len(box))]
                detection_string = f"Detected {text_group[label]} with confidence {round(score.item(), 3)} at location {scaled_box}"
                detection_strings.append(detection_string)

        logging.info("Bounding boxes and labels have been drawn on the image.")

        return image_with_boxes, detection_strings

    except IndexError as e:
        logging.error(f"Index error: {e}. Check if the number of text groups matches the model's output.")
        raise e
    except Exception as e:
        logging.error(f"An unexpected error occurred: {e}", exc_info=True)
        raise e

def gradio_detect_and_draw(image, text_labels):
    # Check if the image is None
    if image is None:
        raise ValueError("No image was provided.")

    # Convert the input image to PIL Image if it's a numpy array
    if isinstance(image, np.ndarray):
        image = Image.fromarray(image.astype('uint8'), 'RGB')

    # Convert PIL Image to base64 for your logic function
    buffered = io.BytesIO()
    image.save(buffered, format="JPEG")
    image_data = base64.b64encode(buffered.getvalue()).decode("utf-8")

    # Process texts input
    text_labels = [text_labels.split(',')] if text_labels else []

    # Call your detection logic
    processed_image, detections = detect_objects_logic(image_data, text_labels)

    # Convert the output image to PIL Image if it's a numpy array
    if isinstance(processed_image, np.ndarray):
        processed_image = Image.fromarray(processed_image.astype('uint8'), 'RGB')

    return processed_image, detections


with gr.Blocks(gr.themes.Soft()) as demo:
    gr.Markdown("## Owlv2 Object Detection Demo")
    gr.Markdown('Run this space on your own hardware with this command: ```docker run -it -p 7860:7860 --platform=linux/amd64 \
	registry.hf.space/macadeliccc-owlv2-base-patch-16-ensemble-demo:latest python app.py```')
    
    with gr.Row():
        with gr.Column():
            image_input = gr.Image(type="pil", label="Upload or draw an image")
            text_input = gr.Textbox(label="Enter comma-separated labels for detection")
            submit_button = gr.Button("Detect")
        with gr.Column():
            image_output = gr.Image(label="Processed Image")
            text_output = gr.Text(label="Detections")
    

    submit_button.click(
        gradio_detect_and_draw, 
        inputs=[image_input, text_input], 
        outputs=[image_output, text_output]
    )
   # Add examples
    examples = [
        ["assets/snowman.jpg", "snowman"],
        ["assets/traffic.jpg", "taxi,traffic light"],
        ["assets/umbrellas.jpg", "umbrella"]
    ]
    gr.Examples(examples, inputs=[image_input, text_input])


demo.launch()