import io 

import requests 
import numpy as np
import gradio as gr 
from PIL import Image
import matplotlib.pyplot as plt 
from transformers import pipeline 

# Load the pipeline
obj_detector = pipeline(
    task="object-detection",
    model="facebook/detr-resnet-50"
)

# Object detection utilities
def load_image_from_url(url: str):
    return Image.open(requests.get(url, stream=True).raw).convert("RGB")

def render_results_in_image(img, detection_results):
    plt.figure(figsize=(16, 10))
    plt.imshow(img)

    ax = plt.gca()

    for prediction in detection_results:
        x, y = prediction["box"]["xmin"], prediction["box"]["ymin"]
        w = prediction["box"]["xmax"] - prediction["box"]["xmin"]
        h = prediction["box"]["ymax"] - prediction["box"]["ymin"]

        ax.add_patch(
            plt.Rectangle(
                (x, y),
                w,
                h,
                fill=False,
                color="green",
                linewidth=2
            )
        )

        ax.text(
            x,
            y,
            f"{prediction['label']}: {round(prediction['score']*100, 1)}%"
        )
    plt.axis("off")

    # save the modified image to a BytesIO object
    img_buf = io.BytesIO()
    plt.savefig(img_buf, format="png",
                bbox_inches="tight",
                pad_inches=0)
    img_buf.seek(0)
    modified_image = Image.open(img_buf)

    # close the plot to prevent it from being displayed
    plt.close()

    return modified_image 

def summarize_detection_results(detection_results):
    summary = {}

    for prediction in detection_results:
        label = prediction["label"]

        if label in summary:
            summary[label] += 1
        else:
            summary[label] = 1 

    summary_string = "In this image, there are "
    
    for i, (label, count) in enumerate(summary.items()):
        summary_string += f"{str(count)} {label}"

        if count > 1:
            summary_string += "s"
        
        summary_string += ", "

        if i == len(summary) - 2:
            summary_string += "and "

    # remove the trailing comma and space
    summary_string = summary_string.rstrip(", ") + "."

    return summary_string 

def detect_objects(image):
    detection_results = obj_detector(image)

    processed_image = render_results_in_image(image, detection_results)

    summary_string = summarize_detection_results(detection_results)

    return processed_image, summary_string 

obj_detection_interface = gr.Interface(
    fn=detect_objects,
    inputs=gr.Image(label="Input Image", type="pil"),
    outputs=[
        gr.Image(label="Output image with predicted objects", type="pil"),
        gr.Textbox(label="Object detection summary")
    ],
    title="Object Detection Application",
    description="This app detects objects from an image.",
    examples=["./examples/image1.jpg"]
)