import cv2
import numpy as np
import gradio as gr
import matplotlib.pyplot as plt
from transformers import pipeline


model = pipeline("object-detection", "facebook/detr-resnet-50")  #loading model

#render function

def render_results(raw_image, model_output):

  raw_image = np.array(raw_image)

  for detection in model_output:
      label = detection['label']
      score = detection['score']
      box = detection['box']
      xmin, ymin, xmax, ymax = box['xmin'], box['ymin'], box['xmax'], box['ymax']

      #Drawing the bounding box
      cv2.rectangle(raw_image, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)

      #Puting label and score near the bounding box
      cv2.putText(raw_image, f"{label}: {score:.2f}", (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
  return raw_image


def get_object_counts(detections):  ##to get count of object detected in the image
    object_counts = {}
    for detection in detections:
        label = detection['label']
        if label in object_counts:
            object_counts[label] += 1
        else:
            object_counts[label] = 1

    return object_counts

def generate_output_text(object_counts):  ##to get the output string
    output_text = "In this image there are"
    for label, count in object_counts.items():
        output_text += f" {count} {label},"
    output_text = output_text.rstrip(',') + "."
    return output_text


def main(pil_image):
  pipeline_output = model(pil_image)  #model output
  processed_image = render_results(pil_image, pipeline_output)  ##process image by drawing bounding boxes
  output_text = generate_output_text(get_object_counts(pipeline_output))  ##output string

  return processed_image, output_text


demo = gr.Interface(
    fn = main,
    inputs = gr.Image(label = "Input Image", type = "pil"),
    outputs = [gr.Image(label = "Modle output Predictions", type = "numpy"), gr.Text(label="Output Text")]
)

demo.launch()