valegro's picture
Update app.py
ed309ba verified
import gradio as gr, numpy as np
from utils import SAM, GD
from groundingdino.util.utils import clean_text
from PIL import Image
import cv2, torch
def pipeline(image, prompt):
# 1. segmenta con SAM
img_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
SAM.set_image(img_cv)
masks, _, _ = SAM.predict(box=None, point_coords=None, point_labels=None, multimask_output=False)
annotated = image.copy()
boxes, labels, feats = [], [], []
for m in masks:
coords = np.argwhere(m)
y1, x1 = coords.min(0)
y2, x2 = coords.max(0)
box = np.array([x1, y1, x2, y2])
boxes.append(box)
if boxes:
# 2. grounding DINO zero‑shot
dino_out = GD.predict_with_caption(
image=np.array(image),
captions=[prompt] * len(boxes),
boxes=np.vstack(boxes)
)
for box, text in zip(dino_out["boxes"], dino_out["captions"]):
x1,y1,x2,y2 = map(int, box)
cv2.rectangle(annotated, (x1,y1), (x2,y2), (255,0,0), 2)
cv2.putText(annotated, clean_text(text), (x1, y1-6),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,0,0), 2)
return Image.fromarray(annotated)
demo = gr.Interface(
fn=pipeline,
inputs=[
gr.Image(type="pil"),
gr.Textbox(value="lamiera, foro circolare, vite, bullone, scanalatura")
],
outputs=gr.Image(type="pil"),
title="Zero‑Shot Mechanical Part Finder",
description=(
"Carica una foto di componenti meccanici a fine vita e scrivi le etichette "
"che vuoi cercare (separate da virgole). Il sistema segmenta con SAM e fa "
"grounding zero‑shot con GroundingDINO."
)
)
if __name__ == "__main__":
demo.launch()