import gradio as gr
from transformers import DetrImageProcessor, DetrForObjectDetection
import torch
import supervision as sv
import json
import requests
from PIL import Image
import numpy as np
 
image_processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
model = DetrForObjectDetection.from_pretrained("Guy2/AirportSec-150epoch")
id2label = {0: 'dangerous-items', 1: 'Gun', 2: 'Knife', 3: 'Pliers', 4: 'Scissors', 5: 'Wrench'}
def anylize(url):
    image = Image.open(requests.get(url, stream=True).raw)
    image = np.array(image)
    with torch.no_grad():

        inputs = image_processor(images=image, return_tensors='pt')
        outputs = model(**inputs)
    
        target_sizes = torch.tensor([image.shape[:2]])
        results = image_processor.post_process_object_detection(
            outputs=outputs, 
            threshold=0.8, 
            target_sizes=target_sizes
        )[0]

    # annotate
    detections = sv.Detections.from_transformers(transformers_results=results).with_nms(threshold=0.5)
    labels = [f"{id2label[class_id]} {confidence:.2f}" for _, _, confidence, class_id, _ in detections]
    box_annotator = sv.BoxAnnotator()
    frame = box_annotator.annotate(scene=image.copy(), detections=detections, labels=labels)
    return frame

output = gr.components.Image(type="numpy", label="Output Image")
gr.Interface(fn = anylize, inputs="text", outputs=output).launch()