Spaces:
Sleeping
Sleeping
# -*- coding: utf-8 -*- | |
"""Judol Gradio YOLO11.ipynb | |
Automatically generated by Colab. | |
Original file is located at | |
https://colab.research.google.com/drive/1oiuTAi-cys1ydtUhSDJSRdeA02mAmZQH | |
""" | |
import cv2 | |
from ultralytics import YOLO | |
import gradio as gr | |
import imageio | |
model = YOLO('https://huggingface.co/JrEasy/Judol-Detection-YOLO11/resolve/main/best.pt') | |
confidence_threshold = 0.6 | |
class_names = { | |
0: "BK8", | |
1: "Gate of Olympus", | |
2: "Princess", | |
3: "Starlight Princess", | |
4: "Zeus", | |
} | |
class_colors = { | |
0: (0, 255, 0), # Green for BK8 | |
1: (255, 0, 0), # Blue for Gate of Olympus | |
2: (0, 0, 255), # Red for Princess | |
3: (255, 255, 0), # Cyan for Starlight Princess | |
4: (255, 0, 255), # Magenta for Zeus | |
} | |
def format_time_ranges(timestamps, classes): | |
if not timestamps: | |
return "" | |
class_timestamps = {} | |
for timestamp, class_id in zip(timestamps, classes): | |
class_name = class_names.get(class_id, 'Unknown') | |
if class_name not in class_timestamps: | |
class_timestamps[class_name] = [] | |
class_timestamps[class_name].append(timestamp) | |
formatted_ranges = [] | |
for class_name, timestamps in class_timestamps.items(): | |
timestamps = sorted(timestamps) | |
ranges = [] | |
start = timestamps[0] | |
for i in range(1, len(timestamps)): | |
if timestamps[i] - timestamps[i - 1] <= 1: | |
continue | |
else: | |
ranges.append(f"{int(start)}-{int(timestamps[i - 1])}") | |
start = timestamps[i] | |
ranges.append(f"{int(start)}-{int(timestamps[-1])}") | |
formatted_ranges.append(f"{class_name} = {', '.join(ranges)}") | |
return ", ".join(formatted_ranges) | |
import os | |
def process_video(input_video): | |
cap = cv2.VideoCapture(input_video) | |
if not cap.isOpened(): | |
print("Error: Could not open input video.") | |
return None, [] | |
fps = cap.get(cv2.CAP_PROP_FPS) | |
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) | |
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) | |
# Define the output video path in the current directory | |
output_video_path = os.path.join(os.getcwd(), "processed_video.mp4") | |
writer = imageio.get_writer(output_video_path, fps=fps, codec="h264") | |
frame_count = 0 | |
timestamps = [] | |
classes_detected = [] | |
while cap.isOpened(): | |
ret, frame = cap.read() | |
if not ret: | |
break | |
timestamp = frame_count / fps | |
frame_count += 1 | |
# Resize the frame to 640x640 before passing to the model | |
resized_frame = cv2.resize(frame, (640, 640)) | |
gray_frame = cv2.cvtColor(resized_frame, cv2.COLOR_BGR2GRAY) | |
input_frame = cv2.merge([gray_frame, gray_frame, gray_frame]) | |
results = model.predict(input_frame) | |
for result in results: | |
for box in result.boxes: | |
if box.conf[0] >= confidence_threshold: | |
x1, y1, x2, y2 = map(int, box.xyxy[0]) | |
class_id = int(box.cls[0]) | |
class_name = class_names.get(class_id, f"Class {class_id}") | |
color = class_colors.get(class_id, (0, 255, 0)) | |
cv2.rectangle(resized_frame, (x1, y1), (x2, y2), color, 2) | |
text = f'{class_name}, Conf: {box.conf[0]:.2f}' | |
text_position = (x1, y1 - 10 if y1 > 20 else y1 + 20) | |
cv2.putText(resized_frame, text, text_position, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) | |
timestamps.append(timestamp) | |
classes_detected.append(class_id) | |
# Resize the frame back to original size for the output video | |
output_frame = cv2.resize(resized_frame, (frame_width, frame_height)) | |
writer.append_data(cv2.cvtColor(output_frame, cv2.COLOR_BGR2RGB)) | |
cap.release() | |
writer.close() | |
formatted_time_ranges = format_time_ranges(timestamps, classes_detected) | |
print(f"Processed video saved at: {output_video_path}") | |
return output_video_path, formatted_time_ranges | |
def process_image(input_image): | |
# Convert image from RGB to BGR for OpenCV processing | |
bgr_frame = cv2.cvtColor(input_image, cv2.COLOR_RGB2BGR) | |
# Resize the frame to 640x640 before passing to the model | |
resized_frame = cv2.resize(bgr_frame, (640, 640)) | |
# Convert to grayscale and create a 3-channel grayscale image | |
gray_frame = cv2.cvtColor(resized_frame, cv2.COLOR_BGR2GRAY) | |
input_frame = cv2.merge([gray_frame, gray_frame, gray_frame]) | |
results = model.predict(input_frame) | |
detections_log = [] | |
classes_detected = [] | |
for result in results: | |
for box in result.boxes: | |
if box.conf[0] >= confidence_threshold: | |
x1, y1, x2, y2 = map(int, box.xyxy[0]) | |
class_id = int(box.cls[0]) | |
class_name = class_names.get(class_id, f"Class {class_id}") | |
color = class_colors.get(class_id, (0, 255, 0)) # Default green color | |
cv2.rectangle(resized_frame, (x1, y1), (x2, y2), color, 2) | |
text = f'{class_name}, Conf: {box.conf[0]:.2f}' | |
text_position = (x1, y1 - 10 if y1 > 20 else y1 + 20) | |
cv2.putText(resized_frame, text, text_position, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) | |
detections_log.append({ | |
"class": class_name, | |
"confidence": box.conf[0] | |
}) | |
classes_detected.append(class_id) | |
# Count occurrences of each class detected | |
class_count = {class_names.get(cls, f"Class {cls}"): classes_detected.count(cls) for cls in set(classes_detected)} | |
# Format the detections as 'Class = Count' pairs | |
formatted_log = ", ".join([f"{class_name} = {count}" for class_name, count in class_count.items()]) | |
# Convert the output frame back to RGB | |
output_image = cv2.cvtColor(resized_frame, cv2.COLOR_BGR2RGB) | |
return output_image, formatted_log | |
with gr.Blocks() as app: | |
gr.Markdown("## Judol Detection using YOLOv11") | |
with gr.Tab("Video Detection"): | |
with gr.Row(): | |
input_video = gr.Video(label="Upload a video") | |
output_video = gr.Video(label="Processed Video") | |
detections_log = gr.Textbox(label="Detections Log", lines=10) | |
input_video.change( | |
fn=lambda input_video: process_video(input_video) if input_video else ("", []), | |
inputs=input_video, | |
outputs=[output_video, detections_log], | |
) | |
with gr.Tab("Image Detection"): | |
with gr.Row(): | |
input_image = gr.Image(label="Upload an image") | |
output_image = gr.Image(label="Processed Image") | |
image_detections_log = gr.Textbox(label="Detections Log", lines=10) | |
input_image.change( | |
fn=process_image, | |
inputs=input_image, | |
outputs=[output_image, image_detections_log], | |
) | |
app.launch() |