Spaces:
Running
Running
from ultralytics import YOLO | |
from PIL import Image | |
import gradio as gr | |
from huggingface_hub import snapshot_download | |
import os | |
import zipfile | |
import cv2 | |
from tqdm import tqdm | |
def load_model(repo_id): | |
download_dir = snapshot_download(repo_id) | |
print(download_dir) | |
with zipfile.ZipFile(os.path.join(download_dir, "best_int8_openvino_model.zip"), 'r') as zip_ref: | |
zip_ref.extractall(download_dir) | |
path = os.path.join(download_dir, "best_int8_openvino_model") | |
print(path) | |
detection_model = YOLO(path, task='detect') | |
return detection_model | |
def predict(pilimg): | |
source = pilimg | |
# x = np.asarray(pilimg) | |
# print(x.shape) | |
result = detection_model.predict(source, conf=0.5, iou=0.6) | |
img_bgr = result[0].plot() | |
out_pilimg = Image.fromarray(img_bgr[..., ::-1]) # RGB-order PIL image | |
return out_pilimg | |
def predict_video(video_input): | |
# Open the video file | |
video_reader = cv2.VideoCapture(video_input) | |
nb_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT)) | |
frame_h = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT)) | |
frame_w = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH)) | |
fps = video_reader.get(cv2.CAP_PROP_FPS) | |
video_out_filepath = f"{video_input}_output.mp4" | |
video_writer = cv2.VideoWriter(video_out_filepath, | |
cv2.VideoWriter_fourcc(*'mp4v'), | |
fps, | |
(frame_w, frame_h)) | |
# Loop through the video frames | |
for i in tqdm(range(nb_frames)): | |
# Read a frame from the video | |
success, frame = video_reader.read() | |
if success: | |
results = detection_model(frame, device='cpu') | |
# Visualize the results on the frame | |
annotated_frame = results[0].plot() | |
# Write the annotated frame | |
video_writer.write(annotated_frame) | |
video_reader.release() | |
video_writer.release() | |
cv2.destroyAllWindows() | |
cv2.waitKey(1) | |
return video_out_filepath | |
REPO_ID = "GranularFireplace/food_yolov8" | |
detection_model = load_model(REPO_ID) | |
image_interface = gr.Interface( | |
fn=predict, | |
inputs=gr.Image(type="pil", label="Upload photo"), | |
outputs=gr.Image(type="pil", label="Result") | |
) | |
video_interface = gr.Interface( | |
fn=predict_video, | |
inputs=gr.Video(label="Upload video"), | |
outputs=gr.Video(label="Result") | |
) | |
gr.TabbedInterface( | |
[image_interface, video_interface], | |
["Photo", "Video"] | |
).launch(share=True) | |