Spaces:
Sleeping
Sleeping
from PIL import Image | |
import io | |
import pandas as pd | |
import numpy as np | |
import gradio as gr | |
import cv2 | |
import requests | |
import os | |
from ultralytics import YOLO | |
from ultralytics.utils.plotting import Annotator, colors | |
from render import custom_render_result | |
file_urls = [ | |
'https://www.dropbox.com/s/b5g97xo901zb3ds/pothole_example.jpg?dl=1', | |
'https://www.dropbox.com/s/86uxlxxlm1iaexa/pothole_screenshot.png?dl=1', | |
'https://www.dropbox.com/s/7sjfwncffg8xej2/video_7.mp4?dl=1' | |
] | |
def download_file(url, save_name): | |
url = url | |
if not os.path.exists(save_name): | |
file = requests.get(url) | |
open(save_name, 'wb').write(file.content) | |
for i, url in enumerate(file_urls): | |
if 'mp4' in file_urls[i]: | |
download_file( | |
file_urls[i], | |
f"video.mp4" | |
) | |
else: | |
download_file( | |
file_urls[i], | |
f"image_{i}.jpg" | |
) | |
def get_image_from_bytes(binary_image: bytes) -> Image: | |
"""Convert image from bytes to PIL RGB format | |
**Args:** | |
- **binary_image (bytes):** The binary representation of the image | |
**Returns:** | |
- **PIL.Image:** The image in PIL RGB format | |
""" | |
input_image = Image.open(io.BytesIO(binary_image)).convert("RGB") | |
return input_image | |
def get_bytes_from_image(image: Image) -> bytes: | |
""" | |
Convert PIL image to Bytes | |
Args: | |
image (Image): A PIL image instance | |
Returns: | |
bytes : BytesIO object that contains the image in JPEG format with quality 85 | |
""" | |
return_image = io.BytesIO() | |
image.save(return_image, format='JPEG', quality=85) # save the image in JPEG format with quality 85 | |
return_image.seek(0) # set the pointer to the beginning of the file | |
return return_image | |
def transform_predict_to_df(results: list, labeles_dict: dict) -> pd.DataFrame: | |
""" | |
Transform predict from yolov8 (torch.Tensor) to pandas DataFrame. | |
Args: | |
results (list): A list containing the predict output from yolov8 in the form of a torch.Tensor. | |
labeles_dict (dict): A dictionary containing the labels names, where the keys are the class ids and the values are the label names. | |
Returns: | |
predict_bbox (pd.DataFrame): A DataFrame containing the bounding box coordinates, confidence scores and class labels. | |
""" | |
# Transform the Tensor to numpy array | |
predict_bbox = pd.DataFrame(results[0].to("cpu").numpy().boxes.xyxy, columns=['xmin', 'ymin', 'xmax', 'ymax']) | |
# Add the confidence of the prediction to the DataFrame | |
predict_bbox['confidence'] = results[0].to("cpu").numpy().boxes.conf | |
# Add the class of the prediction to the DataFrame | |
predict_bbox['class'] = (results[0].to("cpu").numpy().boxes.cls).astype(int) | |
# Replace the class number with the class name from the labeles_dict | |
predict_bbox['name'] = predict_bbox["class"].replace(labeles_dict) | |
return predict_bbox | |
def get_model_predict(model: YOLO, input_image: Image, save: bool = False, image_size: int = 1248, conf: float = 0.5, | |
augment: bool = False) -> pd.DataFrame: | |
""" | |
Get the predictions of a model on an input image. | |
Args: | |
model (YOLO): The trained YOLO model. | |
input_image (Image): The image on which the model will make predictions. | |
save (bool, optional): Whether to save the image with the predictions. Defaults to False. | |
image_size (int, optional): The size of the image the model will receive. Defaults to 1248. | |
conf (float, optional): The confidence threshold for the predictions. Defaults to 0.5. | |
augment (bool, optional): Whether to apply data augmentation on the input image. Defaults to False. | |
Returns: | |
pd.DataFrame: A DataFrame containing the predictions. | |
""" | |
# Make predictions | |
predictions = model.predict( | |
imgsz=image_size, | |
source=input_image, | |
conf=conf, | |
save=save, | |
augment=augment, | |
flipud=0.0, | |
fliplr=0.0, | |
mosaic=0.0, | |
) | |
# Transform predictions to pandas dataframe | |
predictions = transform_predict_to_df(predictions, model.model.names) | |
return predictions | |
def get_model_segment(model: YOLO, input_image: Image, save: bool = False, image_size: int = 1248, conf: float = 0.25, | |
augment: bool = False) -> pd.DataFrame: | |
""" | |
Get the predictions of a model on an input image. | |
Args: | |
model (YOLO): The trained YOLO model. | |
input_image (Image): The image on which the model will make predictions. | |
save (bool, optional): Whether to save the image with the predictions. Defaults to False. | |
image_size (int, optional): The size of the image the model will receive. Defaults to 1248. | |
conf (float, optional): The confidence threshold for the predictions. Defaults to 0.25. | |
augment (bool, optional): Whether to apply data augmentation on the input image. Defaults to False. | |
Returns: | |
pd.DataFrame: A DataFrame containing the predictions. | |
""" | |
# Make predictions | |
predictions = model.predict( | |
imgsz=image_size, | |
source=input_image, | |
conf=conf, | |
save=save, | |
augment=augment, | |
flipud=0.0, | |
fliplr=0.0, | |
mosaic=0.0, | |
) | |
# Transform predictions to pandas dataframe | |
predictions = transform_predict_to_df(predictions, model.model.names) | |
return predictions | |
################################# BBOX Func ##################################### | |
def add_bboxs_on_img(image: Image, predict: pd.DataFrame()) -> Image: | |
""" | |
add a bounding box on the image | |
Args: | |
image (Image): input image | |
predict (pd.DataFrame): predict from model | |
Returns: | |
Image: image whis bboxs | |
""" | |
# Create an annotator object | |
annotator = Annotator(np.array(image)) | |
# sort predict by xmin value | |
predict = predict.sort_values(by=['xmin'], ascending=True) | |
# iterate over the rows of predict dataframe | |
for i, row in predict.iterrows(): | |
# create the text to be displayed on image | |
text = f"{row['name']}: {int(row['confidence'] * 100)}%" | |
# get the bounding box coordinates | |
bbox = [row['xmin'], row['ymin'], row['xmax'], row['ymax']] | |
# add the bounding box and text on the image | |
annotator.box_label(bbox, text, color=colors(row['class'], True)) | |
# convert the annotated image to PIL image | |
return Image.fromarray(annotator.result()) | |
################################# Models ##################################### | |
def detect_sample_model(input_image: Image) -> pd.DataFrame: | |
""" | |
Predict from sample_model. | |
Base on YoloV8 | |
Args: | |
input_image (Image): The input image. | |
Returns: | |
pd.DataFrame: DataFrame containing the object location. | |
""" | |
predict = get_model_predict( | |
model=model_sample_detect, | |
input_image=input_image, | |
save=False, | |
image_size=640, | |
augment=False, | |
conf=0.2, | |
) | |
return predict | |
def yoloV8_func(image: gr.Image = None, | |
image_size: int = 640, | |
conf_threshold: float = 0.4, | |
iou_threshold: float = 0.5, | |
model_name: str = 'YOLOv8-medium'): | |
"""This function performs YOLOv8 object detection on the given image. | |
Args: | |
image (gr.Image, optional): Input image to detect objects on. Defaults to None. | |
image_size (int, optional): Desired image size for the model. Defaults to 640. | |
conf_threshold (float, optional): Confidence threshold for object detection. Defaults to 0.4. | |
iou_threshold (float, optional): Intersection over Union threshold for object detection. Defaults to 0.50. | |
""" | |
# Load the YOLOv8 model from the 'best.pt' checkpoint | |
# model_path = "best.pt" | |
# model = torch.hub.load('ultralytics/yolov8', 'custom', path='/content/best.pt', force_reload=True, trust_repo=True) | |
# Perform object detection on the input image using the YOLOv8 model | |
results = model.predict(image, | |
conf=conf_threshold, | |
iou=iou_threshold, | |
imgsz=image_size) | |
# Print the detected objects' information (class, coordinates, and probability) | |
box = results[0].boxes | |
#print("Object type:", box.cls) | |
#print("Coordinates:", box.xyxy) | |
#print("Probability:", box.conf) | |
# Render the output image with bounding boxes around detected objects | |
render = custom_render_result(model=model, image=image, result=results[0]) | |
return render | |
model = YOLO('best.pt') | |
path = [['image_tyre.png'], ['image_ladder.png']] | |
video_path = [['video.mp4']] | |
outputs_image = gr.components.Image(label="Output Image") | |
inputs_image= [ | |
gr.components.Image(label="Input Image"), | |
gr.Slider(minimum=320, maximum=1280, step=32, label="Image Size", value=640), | |
gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label="Confidence Threshold",value=0.4, info="Usual value is 0.5"), | |
gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label="IOU Threshold",value=0.5, info="Usual value greater than 0.2"), | |
gr.components.Dropdown(["YOLOv8-nano", "YOLOv8-small", "YOLOv8-medium", "YOLOv8-large", "YOLOv8-xlarge"], value="YOLOv8-medium", label="YOLOv8 Model") | |
] | |
interface_image = gr.Interface( | |
fn=yoloV8_func, | |
inputs=inputs_image, | |
outputs=[outputs_image], | |
title="NonConforming Detector", | |
examples=path, | |
cache_examples=False, | |
) | |
def show_preds_video(video_path): | |
cap = cv2.VideoCapture(video_path) | |
conf_threshold = 0.4 | |
iou_threshold = 0.5 | |
image_size = 640 | |
while(cap.isOpened()): | |
ret, frame = cap.read() | |
if ret: | |
frame_copy = frame.copy() | |
results = model.predict(frame, | |
conf=conf_threshold, | |
iou=iou_threshold, | |
imgsz=image_size) | |
# Print the detected objects' information (class, coordinates, and probability) | |
box = results[0].boxes | |
#print("Object type:", box.cls) | |
#print("Coordinates:", box.xyxy) | |
#print("Probability:", box.conf) | |
# Render the output image with bounding boxes around detected objects | |
render = custom_render_result(model=model, image=frame, result=results[0]) | |
yield render | |
""" | |
outputs = model.predict(source=frame) | |
results = outputs[0].cpu().numpy() | |
for i, det in enumerate(results.boxes.xyxy): | |
cv2.rectangle( | |
frame_copy, | |
(int(det[0]), int(det[1])), | |
(int(det[2]), int(det[3])), | |
color=(0, 0, 255), | |
thickness=2, | |
lineType=cv2.LINE_AA | |
) | |
yield cv2.cvtColor(frame_copy, cv2.COLOR_BGR2RGB) | |
""" | |
inputs_video = [ | |
gr.components.Video(label="Input Video"), | |
] | |
outputs_video = [ | |
gr.components.Image(label="Output Image"), | |
] | |
interface_video = gr.Interface( | |
fn=show_preds_video, | |
inputs=inputs_video, | |
outputs=outputs_video, | |
title="NonConforming Video Detector", | |
examples=video_path, | |
cache_examples=False, | |
) | |
gr.TabbedInterface( | |
[interface_image, interface_video], | |
tab_names=['Image inference', 'Video inference'] | |
).queue().launch() | |