import cv2 import numpy as np import gradio as gr import matplotlib.pyplot as plt from transformers import pipeline model = pipeline("object-detection", "facebook/detr-resnet-50") #loading model #render function def render_results(raw_image, model_output): raw_image = np.array(raw_image) for detection in model_output: label = detection['label'] score = detection['score'] box = detection['box'] xmin, ymin, xmax, ymax = box['xmin'], box['ymin'], box['xmax'], box['ymax'] #Drawing the bounding box cv2.rectangle(raw_image, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2) #Puting label and score near the bounding box cv2.putText(raw_image, f"{label}: {score:.2f}", (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) return raw_image def get_object_counts(detections): ##to get count of object detected in the image object_counts = {} for detection in detections: label = detection['label'] if label in object_counts: object_counts[label] += 1 else: object_counts[label] = 1 return object_counts def generate_output_text(object_counts): ##to get the output string output_text = "In this image there are" for label, count in object_counts.items(): output_text += f" {count} {label}," output_text = output_text.rstrip(',') + "." return output_text def main(pil_image): pipeline_output = model(pil_image) #model output processed_image = render_results(pil_image, pipeline_output) ##process image by drawing bounding boxes output_text = generate_output_text(get_object_counts(pipeline_output)) ##output string return processed_image, output_text demo = gr.Interface( fn = main, inputs = gr.Image(label = "Input Image", type = "pil"), outputs = [gr.Image(label = "Modle output Predictions", type = "numpy"), gr.Text(label="Output Text")] ) demo.launch()