import io import requests import numpy as np import gradio as gr from PIL import Image import matplotlib.pyplot as plt from transformers import pipeline # Load the pipeline obj_detector = pipeline( task="object-detection", model="facebook/detr-resnet-50" ) # Object detection utilities def load_image_from_url(url: str): return Image.open(requests.get(url, stream=True).raw).convert("RGB") def render_results_in_image(img, detection_results): plt.figure(figsize=(16, 10)) plt.imshow(img) ax = plt.gca() for prediction in detection_results: x, y = prediction["box"]["xmin"], prediction["box"]["ymin"] w = prediction["box"]["xmax"] - prediction["box"]["xmin"] h = prediction["box"]["ymax"] - prediction["box"]["ymin"] ax.add_patch( plt.Rectangle( (x, y), w, h, fill=False, color="green", linewidth=2 ) ) ax.text( x, y, f"{prediction['label']}: {round(prediction['score']*100, 1)}%" ) plt.axis("off") # save the modified image to a BytesIO object img_buf = io.BytesIO() plt.savefig(img_buf, format="png", bbox_inches="tight", pad_inches=0) img_buf.seek(0) modified_image = Image.open(img_buf) # close the plot to prevent it from being displayed plt.close() return modified_image def summarize_detection_results(detection_results): summary = {} for prediction in detection_results: label = prediction["label"] if label in summary: summary[label] += 1 else: summary[label] = 1 summary_string = "In this image, there are " for i, (label, count) in enumerate(summary.items()): summary_string += f"{str(count)} {label}" if count > 1: summary_string += "s" summary_string += ", " if i == len(summary) - 2: summary_string += "and " # remove the trailing comma and space summary_string = summary_string.rstrip(", ") + "." return summary_string def detect_objects(image): detection_results = obj_detector(image) processed_image = render_results_in_image(image, detection_results) summary_string = summarize_detection_results(detection_results) return processed_image, summary_string obj_detection_interface = gr.Interface( fn=detect_objects, inputs=gr.Image(label="Input Image", type="pil"), outputs=[ gr.Image(label="Output image with predicted objects", type="pil"), gr.Textbox(label="Object detection summary") ], title="Object Detection Application", description="This app detects objects from an image.", examples=["./examples/image1.jpg"] )