Spaces:
Running
Running
# -------------------------------------------------------- | |
# YOLOv12 Streamlit App with Emoji-Powered UI ๐๐ | |
# Based on yolov10: https://github.com/THU-MIG/yolov10/app.py | |
# -------------------------------------------------------- | |
import streamlit as st | |
import cv2 | |
import tempfile | |
from ultralytics import YOLO | |
from PIL import Image | |
import os | |
# Page config with a cool vibe | |
st.set_page_config( | |
page_title="YOLOv12 Detector ๐ต๏ธโโ๏ธ", | |
page_icon="๐", | |
layout="wide" | |
) | |
def yolov12_inference(uploaded_file, model_id, image_size, conf_threshold, input_type): | |
"""The magic happens here โจ""" | |
model = YOLO(model_id) | |
if input_type == "Image" and uploaded_file: | |
with st.spinner("๐ผ๏ธ Painting detections..."): | |
image = Image.open(uploaded_file) | |
results = model.predict(source=image, imgsz=image_size, conf=conf_threshold) | |
annotated_image = results[0].plot() | |
return annotated_image[:, :, ::-1], None | |
elif input_type == "Video" and uploaded_file: | |
with st.spinner("๐ฅ Cooking up a detected video..."): | |
video_path = tempfile.mktemp(suffix=".mp4") | |
with open(video_path, "wb") as f: | |
f.write(uploaded_file.read()) | |
cap = cv2.VideoCapture(video_path) | |
fps = cap.get(cv2.CAP_PROP_FPS) | |
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) | |
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) | |
output_video_path = tempfile.mktemp(suffix=".mp4") | |
out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height)) | |
frame_count = 0 | |
while cap.isOpened(): | |
ret, frame = cap.read() | |
if not ret: | |
break | |
results = model.predict(source=frame, imgsz=image_size, conf=conf_threshold) | |
annotated_frame = results[0].plot() | |
out.write(annotated_frame) | |
frame_count += 1 | |
if frame_count % 30 == 0: | |
st.text(f"๐ณ Processed {frame_count} frames...") | |
cap.release() | |
out.release() | |
if os.path.exists(video_path): | |
os.remove(video_path) | |
return None, output_video_path | |
return None, None | |
def main(): | |
# Header with flair | |
st.title("YOLOv12: Object Detection Superhero ๐ฆธโโ๏ธ") | |
st.markdown("Powered by xAI's cosmic tech ๐ | [arXiv ๐](https://arxiv.org/abs/2502.12524) | [GitHub ๐](https://github.com/sunsmarterjie/yolov12)") | |
# Layout in two columns | |
col1, col2 = st.columns([1, 1]) | |
with col1: | |
st.subheader("๐ฎ Control Room") | |
# Upload section | |
uploaded_file = st.file_uploader( | |
"Drop your file here ๐ฅ - Images or Videos welcome!", | |
type=['jpg', 'jpeg', 'png', 'mp4'], | |
help="Upload an image or video to detect objects in!" | |
) | |
# Input type selector | |
input_type = st.radio( | |
"What's your flavor? ๐ฆ", | |
("Image", "Video"), | |
help="Tell me if it's a still or moving picture!" | |
) | |
# Model selection | |
model_id = st.selectbox( | |
"Pick your YOLO weapon โ๏ธ", | |
["yolov12n.pt", "yolov12s.pt", "yolov12m.pt", "yolov12l.pt", "yolov12x.pt"], | |
index=2, | |
help="Choose your model power level: n (nano) to x (extra spicy)!" | |
) | |
# Image size slider | |
image_size = st.slider( | |
"Zoom level ๐", | |
min_value=320, | |
max_value=1280, | |
value=640, | |
step=32, | |
help="Bigger numbers = sharper eyes (but slower)!" | |
) | |
# Confidence threshold | |
conf_threshold = st.slider( | |
"Certainty meter ๐ฏ", | |
min_value=0.0, | |
max_value=1.0, | |
value=0.25, | |
step=0.05, | |
help="How sure should I be? Higher = pickier!" | |
) | |
# The big red button | |
if st.button("Detect Objects! ๐", help="Click to unleash the detection magic!"): | |
if uploaded_file is None: | |
st.error("Yo! Upload something first ๐") | |
else: | |
annotated_image, annotated_video = yolov12_inference( | |
uploaded_file, model_id, image_size, conf_threshold, input_type | |
) | |
st.session_state['results'] = (annotated_image, annotated_video) | |
with col2: | |
st.subheader("๐ฅ๏ธ Detection HQ") | |
# Display results | |
if 'results' in st.session_state: | |
annotated_image, annotated_video = st.session_state['results'] | |
if input_type == "Image" and annotated_image is not None: | |
st.image( | |
annotated_image, | |
caption="Your Detected Masterpiece ๐จ", | |
use_column_width=True | |
) | |
elif input_type == "Video" and annotated_video is not None: | |
st.video( | |
annotated_video, | |
format="video/mp4", | |
start_time=0 | |
) | |
# Clean up temporary video file | |
if os.path.exists(annotated_video): | |
os.remove(annotated_video) | |
else: | |
st.warning("Nothing to show yet! Hit the button! โก") | |
else: | |
st.info("Awaiting your command, captain! ๐ Upload and detect to see results!") | |
# Footer with sass | |
st.markdown("---") | |
st.markdown("Built with ๐ by xAI's minions | Objects beware, YOLOv12 is here! ๐") | |
if __name__ == '__main__': | |
main() |