# Pose inferencing import mmpose from mmpose.apis import MMPoseInferencer # Ultralytics from ultralytics import YOLO import torch # Gradio import gradio as gr # System and files import os import glob import uuid # Image manipulation import numpy as np import cv2 print("[INFO]: Imported modules!") human = MMPoseInferencer("human") hand = MMPoseInferencer("hand") #kpt_thr (float) – The threshold to visualize the keypoints. Defaults to 0.3 human3d = MMPoseInferencer(pose3d="human3d") track_model = YOLO('yolov8n.pt') # Load an official Detect model # ultraltics # [INFO] VIDEO INPUT: /tmp/gradio/927601b660ec45919366ce37df1ed004a1fcffab/sample_flip.webm # Defining inferencer models to lookup in function inferencers = {"Estimate human 2d poses":human, "Estimate human 2d hand poses":hand, "Estimate human 3d poses":human3d, "Detect and track":track_model} print("[INFO]: Downloaded models!") def tracking(video, model, boxes=True): print("[INFO] Loading model...") # Load an official or custom model # Perform tracking with the model print("[INFO] Starting tracking!") # https://docs.ultralytics.com/modes/predict/ annotated_frame = model(video, boxes=boxes) return annotated_frame def show_tracking(video_content, vis_out_dir, model): video = cv2.VideoCapture(video_content) # Track video_track = tracking(video_content, model.track) # Prepare to save video #out_file = os.path.join(vis_out_dir, "track.mp4") out_file = "track.mp4" print("[INFO]: TRACK", out_file) fourcc = cv2.VideoWriter_fourcc(*"mp4v") # Codec for MP4 video fps = video.get(cv2.CAP_PROP_FPS) height, width, _ = video_track[0][0].orig_img.shape size = (width,height) out_track = cv2.VideoWriter(out_file, fourcc, fps, size) # Go through frames and write them for frame_track in video_track: result_track = frame_track[0].plot() # plot a BGR numpy array of predictions out_track.write(result_track) print("[INFO] Done with frames") #print(type(result_pose)) numpy ndarray out_track.release() video.release() cv2.destroyAllWindows() # Closing window return out_file def poses(inferencer, video, vis_out_dir, kpt_thr): print("[INFO] VIDEO INPUT: ", video) result_generator = inferencer(video, vis_out_dir = vis_out_dir, return_vis=True, thickness=2, rebase_keypoint_height=True, #kpt_thr=kpt_thr, device="cuda" ) result = [result for result in result_generator] #next(result_generator) out_file = glob.glob(os.path.join(vis_out_dir, "*.mp4")) return out_file def infer(video, check, kpt_thr, webcam=True): print("[INFO] VIDEO INPUT: ", video) # Selecting the specific inferencer out_files=[] for i in check: # Create out directory vis_out_dir = str(uuid.uuid4()) inferencer = inferencers[i] # 'hand', 'human , device='cuda' if i == "Detect and track": #continue trackfile = show_tracking(video, vis_out_dir, inferencer) else: if webcam==True: print("WEBCAM") add_dir = str(uuid.uuid4()) vidname = video.split("/")[-1] vis_out_dir_web = "/".join(["/".join(video.split("/")[:-1]), add_dir]) out_file = poses(inferencer, video, vis_out_dir_web, kpt_thr) fullname = os.path.join(vis_out_dir_web, vidname) #if i == "Estimate human 3d poses": # fullname = fullname[:-4]+"mp4" #Change to .mp4 # out_files.append(fullname) #else: out_files.append(fullname) else: out_files.extend(out_file) print(out_files) return "track.mp4", out_files[0], out_files[1], out_files[2] # out_files[3] def run(): #https://github.com/open-mmlab/mmpose/blob/main/docs/en/user_guides/inference.md check_web = gr.CheckboxGroup(choices = ["Detect and track", "Estimate human 2d poses", "Estimate human 2d hand poses", "Estimate human 3d poses"], label="Methods", type="value", info="Select the model(s) you want") check_file = gr.CheckboxGroup(choices = ["Detect and track", "Estimate human 2d poses", "Estimate human 2d hand poses", "Estimate human 3d poses"], label="Methods", type="value", info="Select the model(s) you want") description = """ \n\nHere you can upload videos or record one with your webcam and track objects or detect bodyposes in 2d and 3d. """ # Insert slider with kpt_thr web_kpthr = gr.Slider(0, 1, value=0.3) file_kpthr = gr.Slider(0, 1, value=0.3) webcam = gr.Interface( fn=infer, inputs= [gr.Video(source="webcam", height=512), check_web, web_kpthr], # /tmp/gradio/927601b660ec45919366ce37df1ed004a1fcffab/sample_flip.webm outputs = [gr.Video(format='mp4', height=512, label="Detect and track", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 2d poses", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 2d hand poses", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 3d poses", show_label=True)], title = 'Tracking and pose estimation', description = description, allow_flagging=False ) file = gr.Interface( infer, inputs = [gr.Video(source="upload", height=512), check_file, file_kpthr], outputs = [gr.Video(format='mp4', height=512, label="Detect and track", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 2d poses", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 2d hand poses", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 3d poses", show_label=True)], title = 'Tracking and pose estimation', description = description, allow_flagging=False ) demo = gr.TabbedInterface( interface_list=[file, webcam], tab_names=["From a File", "From your Webcam"] ) demo.launch(server_name="0.0.0.0", server_port=7860) if __name__ == "__main__": run() # https://github.com/open-mmlab/mmpose/tree/dev-1.x/configs/body_3d_keypoint/pose_lift # motionbert_ft_h36m-d80af323_20230531.pth # simple3Dbaseline_h36m-f0ad73a4_20210419.pth # videopose_h36m_243frames_fullconv_supervised_cpn_ft-88f5abbb_20210527.pth # videopose_h36m_81frames_fullconv_supervised-1f2d1104_20210527.pth # videopose_h36m_27frames_fullconv_supervised-fe8fbba9_20210527.pth # videopose_h36m_1frame_fullconv_supervised_cpn_ft-5c3afaed_20210527.pth # https://github.com/open-mmlab/mmpose/blob/main/mmpose/apis/inferencers/pose3d_inferencer.py # 00000.mp4 # 000000.mp4