|
|
|
import mmpose |
|
from mmpose.apis import MMPoseInferencer |
|
|
|
|
|
from ultralytics import YOLO |
|
import torch |
|
|
|
|
|
import gradio as gr |
|
|
|
|
|
import os |
|
import glob |
|
import uuid |
|
|
|
|
|
import numpy as np |
|
import cv2 |
|
|
|
print("[INFO]: Imported modules!") |
|
human = MMPoseInferencer("human") |
|
hand = MMPoseInferencer("hand") |
|
human3d = MMPoseInferencer(pose3d="human3d") |
|
track_model = YOLO('yolov8n.pt') |
|
|
|
|
|
|
|
|
|
|
|
|
|
inferencers = {"Estimate human 2d poses":human, "Estimate human 2d hand poses":hand, "Estimate human 3d poses":human3d, "Detect and track":track_model} |
|
|
|
print("[INFO]: Downloaded models!") |
|
|
|
def tracking(video, model, boxes=True): |
|
print("[INFO] Loading model...") |
|
|
|
|
|
|
|
print("[INFO] Starting tracking!") |
|
|
|
annotated_frame = model(video, boxes=boxes) |
|
|
|
return annotated_frame |
|
|
|
def show_tracking(video_content, vis_out_dir, model): |
|
video = cv2.VideoCapture(video_content) |
|
|
|
|
|
video_track = tracking(video_content, model.track) |
|
|
|
|
|
|
|
out_file = "track.mp4" |
|
print("[INFO]: TRACK", out_file) |
|
|
|
fourcc = cv2.VideoWriter_fourcc(*"mp4v") |
|
fps = video.get(cv2.CAP_PROP_FPS) |
|
height, width, _ = video_track[0][0].orig_img.shape |
|
size = (width,height) |
|
|
|
out_track = cv2.VideoWriter(out_file, fourcc, fps, size) |
|
|
|
|
|
for frame_track in video_track: |
|
result_track = frame_track[0].plot() |
|
out_track.write(result_track) |
|
|
|
print("[INFO] Done with frames") |
|
|
|
|
|
|
|
out_track.release() |
|
|
|
video.release() |
|
cv2.destroyAllWindows() |
|
|
|
return out_file |
|
|
|
|
|
def poses(inferencer, video, vis_out_dir, kpt_thr): |
|
print("[INFO] VIDEO INPUT: ", video) |
|
result_generator = inferencer(video, |
|
vis_out_dir = vis_out_dir, |
|
return_vis=True, |
|
thickness=2, |
|
rebase_keypoint_height=True, |
|
|
|
device="cuda" |
|
) |
|
|
|
result = [result for result in result_generator] |
|
|
|
out_file = glob.glob(os.path.join(vis_out_dir, "*.mp4")) |
|
|
|
return out_file |
|
|
|
def infer(video, check, kpt_thr, webcam=True): |
|
print("[INFO] VIDEO INPUT: ", video) |
|
|
|
|
|
out_files=[] |
|
|
|
for i in check: |
|
|
|
vis_out_dir = str(uuid.uuid4()) |
|
inferencer = inferencers[i] |
|
|
|
if i == "Detect and track": |
|
|
|
trackfile = show_tracking(video, vis_out_dir, inferencer) |
|
|
|
else: |
|
if webcam==True: |
|
add_dir = str(uuid.uuid4()) |
|
vidname = video.split("/")[-1] |
|
vis_out_dir = "/".join(["/".join(video.split("/")[:-1]), add_dir]) |
|
out_file = poses(inferencer, video, vis_out_dir, kpt_thr) |
|
out_file = os.path.join(vis_out_dir, vidname) |
|
out_files.append(out_file) |
|
|
|
else: |
|
out_files.extend(out_file) |
|
|
|
print(out_files) |
|
|
|
return "track.mp4", out_files[0], out_files[1], out_files[2] |
|
|
|
def run(): |
|
|
|
check_web = gr.CheckboxGroup(choices = ["Detect and track", "Estimate human 2d poses", "Estimate human 2d hand poses", "Estimate human 3d poses"], label="Methods", type="value", info="Select the model(s) you want") |
|
check_file = gr.CheckboxGroup(choices = ["Detect and track", "Estimate human 2d poses", "Estimate human 2d hand poses", "Estimate human 3d poses"], label="Methods", type="value", info="Select the model(s) you want") |
|
|
|
description = """ |
|
\n\nHere you can upload videos or record one with your webcam and track objects or detect bodyposes in 2d and 3d. |
|
""" |
|
|
|
|
|
web_kpthr = gr.Slider(0, 1, value=0.3) |
|
file_kpthr = gr.Slider(0, 1, value=0.3) |
|
|
|
webcam = gr.Interface( |
|
fn=infer, |
|
inputs= [gr.Video(source="webcam", height=512), check_web, web_kpthr], |
|
outputs = [gr.Video(format='mp4', height=512, label="Detect and track", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 2d poses", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 2d hand poses", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 3d poses", show_label=True)], |
|
title = 'Tracking and pose estimation', |
|
description = description, |
|
allow_flagging=False |
|
) |
|
|
|
file = gr.Interface( |
|
infer, |
|
inputs = [gr.Video(source="upload", height=512), check_file, file_kpthr], |
|
outputs = [gr.Video(format='mp4', height=512, label="Detect and track", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 2d poses", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 2d hand poses", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 3d poses", show_label=True)], |
|
title = 'Tracking and pose estimation', |
|
description = description, |
|
allow_flagging=False |
|
) |
|
|
|
demo = gr.TabbedInterface( |
|
interface_list=[file, webcam], |
|
tab_names=["From a File", "From your Webcam"] |
|
) |
|
|
|
demo.launch(server_name="0.0.0.0", server_port=7860) |
|
|
|
|
|
if __name__ == "__main__": |
|
run() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|