Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -116,6 +116,8 @@ def annotate_image(image: Image.Image, result: dict):
|
|
| 116 |
if not isinstance(image, Image.Image) or not isinstance(result, dict):
|
| 117 |
return image
|
| 118 |
|
|
|
|
|
|
|
| 119 |
original_width, original_height = image.size
|
| 120 |
|
| 121 |
if "points" in result and result["points"]:
|
|
@@ -129,7 +131,8 @@ def annotate_image(image: Image.Image, result: dict):
|
|
| 129 |
points_array = np.array(points_list).reshape(1, -1, 2)
|
| 130 |
key_points = sv.KeyPoints(xy=points_array)
|
| 131 |
vertex_annotator = sv.VertexAnnotator(radius=4, color=sv.Color.RED)
|
| 132 |
-
|
|
|
|
| 133 |
|
| 134 |
if "objects" in result and result["objects"]:
|
| 135 |
boxes = []
|
|
@@ -139,17 +142,18 @@ def annotate_image(image: Image.Image, result: dict):
|
|
| 139 |
x_max = obj.get("x_max", 0.0) * original_width
|
| 140 |
y_max = obj.get("y_max", 0.0) * original_height
|
| 141 |
boxes.append([x_min, y_min, x_max, y_max])
|
| 142 |
-
|
| 143 |
if not boxes:
|
| 144 |
return image
|
| 145 |
-
|
| 146 |
detections = sv.Detections(xyxy=np.array(boxes))
|
| 147 |
|
| 148 |
if len(detections) == 0:
|
| 149 |
return image
|
| 150 |
|
| 151 |
box_annotator = sv.BoxAnnotator(color_lookup=sv.ColorLookup.INDEX, thickness=2)
|
| 152 |
-
|
|
|
|
| 153 |
|
| 154 |
return image
|
| 155 |
|
|
@@ -243,6 +247,18 @@ def process_inputs(image, category, prompt):
|
|
| 243 |
|
| 244 |
return qwen_annotated_image, qwen_text
|
| 245 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 246 |
css = """
|
| 247 |
#main-title h1 {
|
| 248 |
font-size: 2.3em !important;
|
|
@@ -255,7 +271,7 @@ css = """
|
|
| 255 |
with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
|
| 256 |
with gr.Column(elem_id="col-container"):
|
| 257 |
gr.Markdown("# **Qwen-3VL: Multimodal Understanding**", elem_id="main-title")
|
| 258 |
-
|
| 259 |
with gr.Row():
|
| 260 |
with gr.Column(scale=1):
|
| 261 |
image_input = gr.Image(type="pil", label="Upload Image")
|
|
@@ -266,7 +282,7 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
|
|
| 266 |
interactive=True,
|
| 267 |
)
|
| 268 |
prompt_input = gr.Textbox(
|
| 269 |
-
placeholder="e.g.,
|
| 270 |
label="Prompt",
|
| 271 |
lines=1,
|
| 272 |
)
|
|
@@ -283,12 +299,11 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
|
|
| 283 |
["examples/4.jpg", "Detect", "Headlight"],
|
| 284 |
["examples/3.jpg", "Point", "Gun"],
|
| 285 |
["examples/1.jpg", "Query", "Count the total number of boats and describe the environment."],
|
| 286 |
-
["examples/2.jpg", "Caption", "
|
| 287 |
-
|
| 288 |
],
|
| 289 |
inputs=[image_input, category_select, prompt_input],
|
| 290 |
)
|
| 291 |
-
|
| 292 |
category_select.change(
|
| 293 |
fn=on_category_change,
|
| 294 |
inputs=[category_select],
|
|
|
|
| 116 |
if not isinstance(image, Image.Image) or not isinstance(result, dict):
|
| 117 |
return image
|
| 118 |
|
| 119 |
+
# Ensure image is mutable
|
| 120 |
+
image = image.convert("RGB")
|
| 121 |
original_width, original_height = image.size
|
| 122 |
|
| 123 |
if "points" in result and result["points"]:
|
|
|
|
| 131 |
points_array = np.array(points_list).reshape(1, -1, 2)
|
| 132 |
key_points = sv.KeyPoints(xy=points_array)
|
| 133 |
vertex_annotator = sv.VertexAnnotator(radius=4, color=sv.Color.RED)
|
| 134 |
+
annotated_image = vertex_annotator.annotate(scene=np.array(image.copy()), key_points=key_points)
|
| 135 |
+
return Image.fromarray(annotated_image)
|
| 136 |
|
| 137 |
if "objects" in result and result["objects"]:
|
| 138 |
boxes = []
|
|
|
|
| 142 |
x_max = obj.get("x_max", 0.0) * original_width
|
| 143 |
y_max = obj.get("y_max", 0.0) * original_height
|
| 144 |
boxes.append([x_min, y_min, x_max, y_max])
|
| 145 |
+
|
| 146 |
if not boxes:
|
| 147 |
return image
|
| 148 |
+
|
| 149 |
detections = sv.Detections(xyxy=np.array(boxes))
|
| 150 |
|
| 151 |
if len(detections) == 0:
|
| 152 |
return image
|
| 153 |
|
| 154 |
box_annotator = sv.BoxAnnotator(color_lookup=sv.ColorLookup.INDEX, thickness=2)
|
| 155 |
+
annotated_image = box_annotator.annotate(scene=np.array(image.copy()), detections=detections)
|
| 156 |
+
return Image.fromarray(annotated_image)
|
| 157 |
|
| 158 |
return image
|
| 159 |
|
|
|
|
| 247 |
|
| 248 |
return qwen_annotated_image, qwen_text
|
| 249 |
|
| 250 |
+
def on_category_change(category: str):
|
| 251 |
+
if category == "Query":
|
| 252 |
+
return gr.Textbox(placeholder="e.g., Count the total number of boats and describe the environment.")
|
| 253 |
+
elif category == "Caption":
|
| 254 |
+
return gr.Textbox(placeholder="e.g., short, normal, detailed")
|
| 255 |
+
elif category == "Point":
|
| 256 |
+
return gr.Textbox(placeholder="e.g., The gun held by the person.")
|
| 257 |
+
elif category == "Detect":
|
| 258 |
+
return gr.Textbox(placeholder="e.g., The headlight of the car.")
|
| 259 |
+
return gr.Textbox(placeholder="e.g., detect the object.")
|
| 260 |
+
|
| 261 |
+
|
| 262 |
css = """
|
| 263 |
#main-title h1 {
|
| 264 |
font-size: 2.3em !important;
|
|
|
|
| 271 |
with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
|
| 272 |
with gr.Column(elem_id="col-container"):
|
| 273 |
gr.Markdown("# **Qwen-3VL: Multimodal Understanding**", elem_id="main-title")
|
| 274 |
+
|
| 275 |
with gr.Row():
|
| 276 |
with gr.Column(scale=1):
|
| 277 |
image_input = gr.Image(type="pil", label="Upload Image")
|
|
|
|
| 282 |
interactive=True,
|
| 283 |
)
|
| 284 |
prompt_input = gr.Textbox(
|
| 285 |
+
placeholder="e.g., Count the total number of boats and describe the environment.",
|
| 286 |
label="Prompt",
|
| 287 |
lines=1,
|
| 288 |
)
|
|
|
|
| 299 |
["examples/4.jpg", "Detect", "Headlight"],
|
| 300 |
["examples/3.jpg", "Point", "Gun"],
|
| 301 |
["examples/1.jpg", "Query", "Count the total number of boats and describe the environment."],
|
| 302 |
+
["examples/2.jpg", "Caption", "a brief"],
|
|
|
|
| 303 |
],
|
| 304 |
inputs=[image_input, category_select, prompt_input],
|
| 305 |
)
|
| 306 |
+
|
| 307 |
category_select.change(
|
| 308 |
fn=on_category_change,
|
| 309 |
inputs=[category_select],
|