import gradio as gr, numpy as np
from utils import SAM, GD
from groundingdino.util.utils import clean_text
from PIL import Image
import cv2, torch

def pipeline(image, prompt):
    # 1. segmenta con SAM
    img_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
    SAM.set_image(img_cv)
    masks, _, _ = SAM.predict(box=None, point_coords=None, point_labels=None, multimask_output=False)

    annotated = image.copy()
    boxes, labels, feats = [], [], []

    for m in masks:
        coords = np.argwhere(m)
        y1, x1 = coords.min(0)
        y2, x2 = coords.max(0)
        box = np.array([x1, y1, x2, y2])
        boxes.append(box)

    if boxes:
        # 2. grounding DINO zero‑shot
        dino_out = GD.predict_with_caption(
            image=np.array(image),
            captions=[prompt] * len(boxes),
            boxes=np.vstack(boxes)
        )
        for box, text in zip(dino_out["boxes"], dino_out["captions"]):
            x1,y1,x2,y2 = map(int, box)
            cv2.rectangle(annotated, (x1,y1), (x2,y2), (255,0,0), 2)
            cv2.putText(annotated, clean_text(text), (x1, y1-6),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,0,0), 2)

    return Image.fromarray(annotated)

demo = gr.Interface(
    fn=pipeline,
    inputs=[
        gr.Image(type="pil"),
        gr.Textbox(value="lamiera, foro circolare, vite, bullone, scanalatura")
    ],
    outputs=gr.Image(type="pil"),
    title="Zero‑Shot Mechanical Part Finder",
    description=(
        "Carica una foto di componenti meccanici a fine vita e scrivi le etichette "
        "che vuoi cercare (separate da virgole). Il sistema segmenta con SAM e fa "
        "grounding zero‑shot con GroundingDINO."
    )
)

if __name__ == "__main__":
    demo.launch()