File size: 4,585 Bytes
9bf16b8
9228783
 
 
 
 
 
 
 
 
 
437ecef
b8a30eb
9bf16b8
82f910d
9228783
 
1b8ba0a
9228783
9bf16b8
9228783
 
 
 
 
7da3754
 
2a63a46
5256208
 
 
7da3754
5256208
02bbfd9
7da3754
 
753797a
7da3754
 
 
 
 
 
 
 
 
 
 
 
 
753797a
 
7da3754
 
b103f0e
753797a
7da3754
753797a
 
 
 
 
 
7da3754
b103f0e
 
b8a30eb
753797a
 
 
b103f0e
 
b8a30eb
b103f0e
b8a30eb
 
b103f0e
fe44ad8
b103f0e
 
fe44ad8
753797a
 
 
 
 
 
 
b8a30eb
 
 
b103f0e
9228783
 
2a63a46
9228783
9bf16b8
9228783
 
 
 
 
 
 
 
 
7da3754
9228783
 
7da3754
 
 
9228783
 
 
 
 
 
82f910d
9228783
 
 
 
 
 
 
 
 
9bf16b8
9228783
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import gradio as gr
import PIL.Image
import transformers
from transformers import PaliGemmaForConditionalGeneration, PaliGemmaProcessor
import torch
import os
import string
import functools
import re
import numpy as np
import spaces
from PIL import Image, ImageDraw
import re

model_id = "mattraj/curacel-autodamage-1"
COLORS = ['#4285f4', '#db4437', '#f4b400', '#0f9d58', '#e48ef1']
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = PaliGemmaForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.bfloat16).eval().to(device)
processor = PaliGemmaProcessor.from_pretrained(model_id)

###### Transformers Inference
@spaces.GPU
def infer(
        image: PIL.Image.Image,
        text: str,
        max_new_tokens: int = 2048
) -> tuple:
    inputs = processor(text=text, images=image, return_tensors="pt", padding="longest", do_convert_rgb=True).to(device).to(dtype=model.dtype)
    with torch.no_grad():
        generated_ids = model.generate(
            **inputs,
            max_length=max_new_tokens
        )
    result = processor.decode(generated_ids[0], skip_special_tokens=True)

    # Placeholder to extract bounding box info from the result (you should replace this with actual bounding box extraction)
    bounding_boxes = extract_bounding_boxes(result, image)
    
    # Draw bounding boxes on the image
    annotated_image = image.copy()
    draw = ImageDraw.Draw(annotated_image)
    
    # Example of drawing bounding boxes (replace with actual coordinates)
    for idx, (box, label) in enumerate(bounding_boxes):
        color = COLORS[idx % len(COLORS)]
        draw.rectangle(box, outline=color, width=3)
        draw.text((box[0], box[1]), label, fill=color)
    
    return result, annotated_image


def extract_bounding_boxes(result, image):
    """
    Extract bounding boxes and labels from the model result.
    
    Coordinates are scaled by dividing by 1024 and then multiplying by the image dimensions.
    
    Args:
        result (str): The model's output string containing bounding box data.
        image (PIL.Image.Image): The image to use for scaling the bounding boxes.
        
    Returns:
        List[Tuple[Tuple[int, int, int, int], str]]: A list of bounding boxes and labels.
    """
    # Regular expression to capture the <loc> tags and their associated labels
    loc_pattern = re.compile(r"<loc(\d{4})><loc(\d{4})><loc(\d{4})><loc(\d{4})>\s*([a-zA-Z\-]+)")
    
    # Get image dimensions
    width, height = image.size
    
    # Find all matches of bounding box coordinates and labels in the result string
    matches = loc_pattern.findall(result)
    
    bounding_boxes = []
    
    for match in matches:
        x1, y1, x2, y2, label = match
        
        # Convert coordinates from string to integer
        x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
        
        # Scale coordinates
        x1 = int((x1 / 1024) * width)
        y1 = int((y1 / 1024) * height)
        x2 = int((x2 / 1024) * width)
        y2 = int((y2 / 1024) * height)
        
        # Append the scaled bounding box and label as a tuple
        bounding_boxes.append(((x1, y1, x2, y2), label))
    
    return bounding_boxes
    
######## Demo

INTRO_TEXT = """## Curacel Auto Damage demo\n\n
Finetuned from: google/paligemma-3b-pt-448
"""

with gr.Blocks(css="style.css") as demo:
    gr.Markdown(INTRO_TEXT)
    with gr.Tab("Text Generation"):
        with gr.Column():
            image = gr.Image(type="pil")
            text_input = gr.Text(label="Input Text")

            text_output = gr.Text(label="Text Output")
            output_image = gr.Image(label="Annotated Image")
            chat_btn = gr.Button()

        chat_inputs = [image, text_input]
        chat_outputs = [text_output, output_image]

        chat_btn.click(
            fn=infer,
            inputs=chat_inputs,
            outputs=chat_outputs,
        )

        examples = [["./car-1.png", "detect Front-Windscreen-Damage ; Headlight-Damage ; Major-Rear-Bumper-Dent ; Rear-windscreen-Damage ; RunningBoard-Dent ; Sidemirror-Damage ; Signlight-Damage ; Taillight-Damage ; bonnet-dent ; doorouter-dent ; doorouter-scratch ; fender-dent ; front-bumper-dent ; front-bumper-scratch ; medium-Bodypanel-Dent ; paint-chip ; paint-trace ; pillar-dent ; quaterpanel-dent ; rear-bumper-dent ; rear-bumper-scratch ; roof-dent"]]
        gr.Markdown("")

        gr.Examples(
            examples=examples,
            inputs=chat_inputs,
        )

#########

if __name__ == "__main__":
    demo.queue(max_size=10).launch(debug=True)