import gradio as gr import cv2 import requests import os import cvzone import math import random import numpy as np from ultralytics import YOLO model = YOLO('yolov8x-seg.pt') path = [] video_path = [] listClasses = ['person', 'bicycle', 'car'] def show_preds_image(image_path): image = cv2.imread(image_path) outputs = model.predict(source=image_path) results = outputs[0].cpu().numpy() yolo_classes = list(model.names.values()) classes_ids = [yolo_classes.index(clas) for clas in yolo_classes] colors = [random.choices(range(256), k=3) for _ in classes_ids] for result in outputs: for mask, box in zip(result.masks.xy, result.boxes): #for r in results: #boxes = r.boxes #for box in boxes: cls = box.cls[0] conf = math.ceil((box.conf[0]*100))/100 if (int(cls)<3) and (conf > 0.70): points = np.int32([mask]) # cv2.polylines(img, points, True, (255, 0, 0), 1) color_number = classes_ids.index(int(box.cls[0])) color = colors[color_number] cv2.fillPoly(image, points, color) x1, y1, x2, y2 = box.xyxy[0] x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) cv2.rectangle( image, (int(x1), int(y1), int(x2), int(y2)), color=(0, 0, 255), thickness=2, lineType=cv2.LINE_AA ) name = yolo_classes[int(cls)] # fontScale fontScale = 0.5 color_number = classes_ids.index(int(box.cls[0])) color = colors[color_number] # Line thickness of 2 px thickness = 1 font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText(image, str(name) + " " + str(conf), (max(0,x1), max(35,y1)), font, fontScale, color, thickness, cv2.LINE_AA) return cv2.cvtColor(image, cv2.COLOR_BGR2RGB) def show_preds_webcam(pil_image): image= np.array(pil_image) outputs = model.predict(image) results = outputs[0].cpu().numpy() yolo_classes = list(model.names.values()) classes_ids = [yolo_classes.index(clas) for clas in yolo_classes] colors = [random.choices(range(256), k=3) for _ in classes_ids] for result in outputs: for mask, box in zip(result.masks.xy, result.boxes): #for r in results: #boxes = r.boxes #for box in boxes: cls = box.cls[0] conf = math.ceil((box.conf[0]*100))/100 if (int(cls)<3) and (conf > 0.70): points = np.int32([mask]) # cv2.polylines(img, points, True, (255, 0, 0), 1) color_number = classes_ids.index(int(box.cls[0])) color = colors[color_number] cv2.fillPoly(image, points, color) x1, y1, x2, y2 = box.xyxy[0] x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) cv2.rectangle( image, (int(x1), int(y1), int(x2), int(y2)), color=(0, 0, 255), thickness=2, lineType=cv2.LINE_AA ) name = yolo_classes[int(cls)] # fontScale fontScale = 0.5 color_number = classes_ids.index(int(box.cls[0])) color = colors[color_number] # Line thickness of 2 px thickness = 1 font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText(image, str(name) + " " + str(conf), (max(0,x1), max(35,y1)), font, fontScale, color, thickness, cv2.LINE_AA) return image inputs_image = [ gr.components.Image(type="filepath", label="Input Image"), ] outputs_image = [ gr.components.Image(type="numpy", label="Output Image"), ] interface_image = gr.Interface( fn=show_preds_image, inputs=inputs_image, outputs=outputs_image, title="Object segmentation", examples=path, cache_examples=False, ) outputs_video = [ gr.components.Image(type="numpy", label="Output Image"), ] interface_webcam = gr.Interface( fn=show_preds_webcam, live=True, inputs=gr.Image(source="webcam", streaming=True, type="pil"), outputs=outputs_video, ) gr.TabbedInterface( [ interface_webcam, interface_image], tab_names=[ 'Webcam', "Image"] ).queue().launch()