# Pose inferencing import mmpose from mmpose.apis import MMPoseInferencer # Ultralytics from ultralytics import YOLO import torch # Gradio import gradio as gr # System and files import os import glob import uuid # Image manipulation import numpy as np import cv2 print("[INFO]: Imported modules!") human = MMPoseInferencer("human") hand = MMPoseInferencer("hand") human3d = MMPoseInferencer(pose3d="human3d") track_model = YOLO('yolov8n.pt') # Load an official Detect model # ultraltics # Defining inferencer models to lookup in function inferencers = {"Estimate human 2d poses":human, "Estimate human 2d hand poses":hand, "Estimate human 3d poses":human3d, "Detect and track":track_model} print("[INFO]: Downloaded models!") def tracking(video, model, boxes=True): print("[INFO] Loading model...") # Load an official or custom model # Perform tracking with the model print("[INFO] Starting tracking!") # https://docs.ultralytics.com/modes/predict/ annotated_frame = model(video, boxes=boxes) return annotated_frame def show_tracking(video_content): video = cv2.VideoCapture(video_content) # Track video_track = tracking(video_content, track_model.track) # Prepare to save video #out_file = os.path.join(vis_out_dir, "track.mp4") out_file = "track.mp4" print("[INFO]: TRACK", out_file) fourcc = cv2.VideoWriter_fourcc(*"mp4v") # Codec for MP4 video fps = video.get(cv2.CAP_PROP_FPS) height, width, _ = video_track[0][0].orig_img.shape size = (width,height) out_track = cv2.VideoWriter(out_file, fourcc, fps, size) # Go through frames and write them for frame_track in video_track: result_track = frame_track[0].plot() # plot a BGR numpy array of predictions print("[INFO] Done with frames") #print(type(result_pose)) numpy ndarray out_track.write(result_track) out_track.release() video.release() cv2.destroyAllWindows() # Closing window return out_file def pose3d(video): add_dir = str(uuid.uuid4()) #vidname = video.split("/")[-1] vis_out_dir = "/".join(["/".join(video.split("/")[:-1]), add_dir]) print("[INFO]: CURRENT OUT DIR: ", vis_out_dir) #full name = os.path.join(vis_out_dir, vidname) result_generator = human3d(video, vis_out_dir = vis_out_dir, thickness=2, rebase_keypoint_height=True, device="cuda") result = [result for result in result_generator] #next(result_generator) out_file = glob.glob(os.path.join(vis_out_dir, "*")) print("[INFO]: CURRENT OUT FILE NAME: ", out_file) return out_file def pose2d(video): add_dir = str(uuid.uuid4()) vis_out_dir = "/".join(["/".join(video.split("/")[:-1]), add_dir]) print("[INFO]: CURRENT OUT DIR: ", vis_out_dir) result_generator = human(video, vis_out_dir = vis_out_dir, thickness=2, rebase_keypoint_height=True, device="cuda") result = [result for result in result_generator] #next(result_generator) out_file = glob.glob(os.path.join(vis_out_dir, "*")) print("[INFO]: CURRENT OUT FILE NAME: ", out_file) return out_file def pose2dhand(video): add_dir = str(uuid.uuid4()) vis_out_dir = "/".join(["/".join(video.split("/")[:-1]), add_dir]) print("[INFO]: CURRENT OUT DIR: ", vis_out_dir) vis_out_dir = str(uuid.uuid4()) result_generator = hand(video, vis_out_dir = vis_out_dir, thickness=2, rebase_keypoint_height=True, device="cuda") result = [result for result in result_generator] #next(result_generator) out_file = glob.glob(os.path.join(vis_out_dir, "*")) print("[INFO]: CURRENT OUT FILE NAME: ", out_file) return out_file with gr.Blocks() as demo: with gr.Column(): with gr.Tab("Upload video"): with gr.Row(): with gr.Column(): video_input = gr.Video(source="upload", type="filepath", height=512) submit_pose_file = gr.Button("Make 2d pose estimation") submit_pose3d_file = gr.Button("Make 3d pose estimation") submit_hand_file = gr.Button("Make 2d hand estimation") submit_detect_file = gr.Button("Detect and track objects") video_output = gr.Video(height=512) with gr.Tab("Record video with webcam"): with gr.Row(): with gr.Column(): webcam_input = gr.Video(source="webcam", height=512) submit_pose_web = gr.Button("Make 2d pose estimation") submit_pose3d_web = gr.Button("Make 3d pose estimation") submit_hand_web = gr.Button("Make 2d hand estimation") submit_detect_web = gr.Button("Detect and track objects") webcam_output = gr.Video(height=512) # From file submit_pose_file.click(fn=pose2d, inputs= video_input, outputs = video_output) submit_pose3d_file.click(fn=pose3d, inputs= video_input, outputs = video_output) submit_hand_file.click(fn=pose2dhand, inputs= video_input, outputs = video_output) submit_detect_file.click(fn=show_tracking, inputs= video_input, outputs = video_output) # Web submit_pose_web.click(fn=pose2d, inputs= video_input, outputs = video_output) submit_pose3d_web.click(fn=pose3d, inputs= video_input, outputs = video_output) submit_hand_web.click(fn=pose2dhand, inputs= video_input, outputs = video_output) submit_detect_web.click(fn=show_tracking, inputs= video_input, outputs = video_output) demo.launch()