|
|
|
import mmpose |
|
from mmpose.apis import MMPoseInferencer |
|
|
|
|
|
from ultralytics import YOLO |
|
|
|
|
|
import gradio as gr |
|
|
|
|
|
import os |
|
import glob |
|
import uuid |
|
|
|
|
|
import numpy as np |
|
import cv2 |
|
|
|
print("[INFO]: Imported modules!") |
|
human = MMPoseInferencer("human") |
|
hand = MMPoseInferencer("hand") |
|
human3d = MMPoseInferencer(pose3d="human3d") |
|
|
|
|
|
|
|
inferencers = {"Estimate human 2d poses":human, "Estimate human 2d hand poses":hand, "Estimate human 3d poses":human3d} |
|
|
|
track_model = YOLO('yolov8n.pt') |
|
|
|
print("[INFO]: Downloaded models!") |
|
|
|
def tracking(video, model, boxes=True): |
|
print("[INFO] Loading model...") |
|
|
|
|
|
|
|
print("[INFO] Starting tracking!") |
|
|
|
annotated_frame = model(video, device="cuda", boxes=boxes) |
|
|
|
return annotated_frame |
|
|
|
|
|
|
|
def show_tracking(video_content, vis_out_dir): |
|
video = cv2.VideoCapture(video_content) |
|
|
|
|
|
video_track = tracking(video_content, track_model.track) |
|
|
|
|
|
out_file = os.path.join(vis_out_dir, "track.mp4") |
|
|
|
fourcc = cv2.VideoWriter_fourcc(*"mp4v") |
|
fps = video.get(cv2.CAP_PROP_FPS) |
|
height, width, _ = video_track[0][0].orig_img.shape |
|
size = (width,height) |
|
|
|
out_track = cv2.VideoWriter(out_file, fourcc, fps, size) |
|
|
|
|
|
for frame_track in video_track: |
|
result_track = frame_track[0].plot() |
|
|
|
|
|
out_track.write(result_track) |
|
|
|
out_track.release() |
|
|
|
video.release() |
|
cv2.destroyAllWindows() |
|
|
|
return out_file |
|
|
|
|
|
def poses(inferencer, video, vis_out_dir): |
|
|
|
result_generator = inferencer(video, |
|
vis_out_dir = vis_out_dir, |
|
return_vis=True, |
|
thickness=2, |
|
rebase_keypoint_height=True) |
|
|
|
result = [result for result in result_generator] |
|
|
|
out_file = glob.glob(os.path.join(vis_out_dir, "*.mp4")) |
|
return out_file |
|
|
|
def infer(video, check): |
|
|
|
out_files=[] |
|
|
|
for i in check: |
|
|
|
vis_out_dir = str(uuid.uuid4()) |
|
inferencer = inferencers[i] |
|
|
|
print("[INFO]: Running inference!") |
|
out_file = poses(inferencer, video, vis_out_dir) |
|
if i == "Detect and track": |
|
show_tracking(video, vis_out_dir) |
|
|
|
out_files.append(out_file) |
|
|
|
return out_files |
|
|
|
def run(): |
|
|
|
check_web = gr.CheckboxGroup(choices = ["Estimate human 2d poses", "Estimate human 2d hand poses", "Estimate human 3d poses", "Detect and track"], label="Methods", type="value", info="Select the model(s) you want") |
|
check_file = gr.CheckboxGroup(choices = ["Estimate human 2d poses", "Estimate human 2d hand poses", "Estimate human 3d poses", "Detect and track"], label="Methods", type="value", info="Select the model(s) you want") |
|
|
|
|
|
webcam = gr.Interface( |
|
fn=infer, |
|
inputs= [gr.Video(source="webcam", height=412), check_web], |
|
outputs = [gr.PlayableVideo(), gr.PlayableVideo(), gr.PlayableVideo(), gr.PlayableVideo()], |
|
title = 'Pose estimation', |
|
description = 'Pose estimation on video', |
|
allow_flagging=False |
|
) |
|
|
|
file = gr.Interface( |
|
infer, |
|
inputs = [gr.Video(source="upload", height=412), check_file], |
|
outputs = [gr.PlayableVideo(), gr.PlayableVideo(), gr.PlayableVideo(), gr.PlayableVideo()], |
|
allow_flagging=False |
|
) |
|
|
|
demo = gr.TabbedInterface( |
|
interface_list=[file, webcam], |
|
tab_names=["From a File", "From your Webcam"] |
|
) |
|
|
|
demo.launch(server_name="0.0.0.0", server_port=7860) |
|
|
|
|
|
if __name__ == "__main__": |
|
run() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|