File size: 9,109 Bytes
3bbef49 34a9259 afd2199 34a9259 22e7a27 91f9212 59f93ed 22e7a27 34a9259 afd2199 4ce546f 34a9259 cd41c0c a21de06 afd2199 e2522a6 0e929df 3bbef49 0e929df 7d480ef 34a9259 0e929df 34a9259 7d480ef 08528b3 8db8164 b8eab7d 22e7a27 d567440 3e81a23 22e7a27 c76c2fc 3bbef49 7c1031f 91f9212 3bbef49 91f9212 5a730a3 08d7883 8ce2567 91f9212 559fc59 b6358bf 91f9212 3bbef49 91f9212 3bbef49 b8eab7d c186703 3bbef49 ae92d0e 435673b 3bbef49 577ab65 582b742 577ab65 3bbef49 91f9212 c186703 41707d0 c186703 91f9212 92499cd b8eab7d 4ce546f c186703 3bbef49 ae92d0e 435673b 3bbef49 aa67e27 4ce546f b8eab7d 4ce546f 3bbef49 fc40636 3bbef49 c186703 879a6e4 3bbef49 92499cd b8eab7d c186703 3bbef49 ae92d0e 435673b 3bbef49 582b742 3bbef49 b8eab7d 3bbef49 0c80e1d c186703 92499cd 879a6e4 d151311 c186703 d151311 4ce546f d151311 b8eab7d 5d648b4 d151311 5d648b4 02cf03c d151311 3bbef49 d151311 b8eab7d d151311 b8eab7d d151311 b8eab7d d151311 b8eab7d 879a6e4 d151311 879a6e4 97f0668 d151311 879a6e4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 |
# Pose inferencing
import mmpose
from mmpose.apis import MMPoseInferencer
# Ultralytics
from ultralytics import YOLO
import torch
# Gradio
import gradio as gr
import moviepy.editor as moviepy
# System and files
import os
import glob
import uuid
# Image manipulation
import numpy as np
import cv2
print("[INFO]: Imported modules!")
human = MMPoseInferencer("human")
hand = MMPoseInferencer("hand")
human3d = MMPoseInferencer(pose3d="human3d")
track_model = YOLO('yolov8n.pt') # Load an official Detect model
# ultraltics
# Defining inferencer models to lookup in function
inferencers = {"Estimate human 2d poses":human, "Estimate human 2d hand poses":hand, "Estimate human 3d poses":human3d, "Detect and track":track_model}
print("[INFO]: Downloaded models!")
def check_extension(video):
split_tup = os.path.splitext(video)
# extract the file name and extension
file_name = split_tup[0]
file_extension = split_tup[1]
if file_extension is not ".mp4":
clip = moviepy.VideoFileClip(video)
video = file_name+".mp4"
clip.write_videofile(video)
return video
def tracking(video, model, boxes=True):
print("[INFO] Loading model...")
# Load an official or custom model
# Perform tracking with the model
print("[INFO] Starting tracking!")
# https://docs.ultralytics.com/modes/predict/
annotated_frame = model(video, boxes=boxes)
return annotated_frame
def show_tracking(video_content):
# https://docs.ultralytics.com/datasets/detect/coco/
video = cv2.VideoCapture(video_content)
# Track
video_track = tracking(video_content, track_model.track)
# Prepare to save video
#out_file = os.path.join(vis_out_dir, "track.mp4")
out_file = "track.mp4"
print("[INFO]: TRACK", out_file)
fourcc = cv2.VideoWriter_fourcc(*"mp4v") # Codec for MP4 video
fps = video.get(cv2.CAP_PROP_FPS)
height, width, _ = video_track[0][0].orig_img.shape
size = (width,height)
out_track = cv2.VideoWriter(out_file, fourcc, fps, size)
# Go through frames and write them
for frame_track in video_track:
result_track = frame_track[0].plot() # plot a BGR numpy array of predictions
print("[INFO] Done with frames")
#print(type(result_pose)) numpy ndarray
out_track.write(result_track)
out_track.release()
video.release()
cv2.destroyAllWindows() # Closing window
return out_file
def pose3d(video):
video = check_extension(video)
# Define new unique folder
add_dir = str(uuid.uuid4())
vis_out_dir = os.path.join("/".join(video.split("/")[:-1]), add_dir)
os.makedirs(vis_out_dir)
result_generator = human3d(video,
vis_out_dir = vis_out_dir,
thickness=2,
return_vis=True,
rebase_keypoint_height=True,
device="cuda")
result = [result for result in result_generator] #next(result_generator)
out_file = glob.glob(os.path.join(vis_out_dir, "*.mp4")) #+ glob.glob(os.path.join(vis_out_dir, "*.webm"))
return "".join(out_file)
def pose2d(video, kpt_threshold):
video = check_extension(video)
# Define new unique folder
add_dir = str(uuid.uuid4())
vis_out_dir = os.path.join("/".join(video.split("/")[:-1]), add_dir)
os.makedirs(vis_out_dir)
result_generator = human(video,
vis_out_dir = vis_out_dir,
return_vis=True,
thickness=2,
rebase_keypoint_height=True,
kpt_thr=kpt_threshold,
device="cuda"
)
result = [result for result in result_generator] #next(result_generator)
out_file = glob.glob(os.path.join(vis_out_dir, "*.mp4")) #+ glob.glob(os.path.join(vis_out_dir, "*.webm"))
return "".join(out_file)
def pose2dhand(video, kpt_threshold):
video = check_extension(video)
# Define new unique folder
add_dir = str(uuid.uuid4())
vis_out_dir = os.path.join("/".join(video.split("/")[:-1]), add_dir)
os.makedirs(vis_out_dir)
result_generator = hand(video,
vis_out_dir = vis_out_dir,
return_vis=True,
thickness=2,
rebase_keypoint_height=True,
kpt_thr=kpt_threshold,
device="cuda")
result = [result for result in result_generator] #next(result_generator)
out_file = glob.glob(os.path.join(vis_out_dir, "*.mp4")) #+ glob.glob(os.path.join(vis_out_dir, "*.webm"))
return "".join(out_file)
def run_UI():
with gr.Blocks() as demo:
with gr.Column():
with gr.Tab("Upload video"):
with gr.Row():
with gr.Column():
video_input = gr.Video(source="upload", type="filepath", height=612)
# Insert slider with kpt_thr
file_kpthr = gr.Slider(minimum=1e3, maximum=1e6, step=1e3, default=1e3, label='Keypoint threshold')
submit_pose_file = gr.Button("Make 2d pose estimation")
submit_pose3d_file = gr.Button("Make 3d pose estimation")
submit_hand_file = gr.Button("Make 2d hand estimation")
submit_detect_file = gr.Button("Detect and track objects")
with gr.Column():
video_output1 = gr.PlayableVideo(height=512, label = "Estimate human 2d poses", show_label=True)
video_output2 = gr.PlayableVideo(height=512, label = "Estimate human 3d poses", show_label=True)
video_output3 = gr.PlayableVideo(height=512, label = "Estimate human hand poses", show_label=True)
video_output4 = gr.Video(height=512, label = "Detection and tracking", show_label=True, format="mp4")
with gr.Tab("Record video with webcam"):
with gr.Column:
with gr.Row():
with gr.Column():
webcam_input = gr.Video(source="webcam", height=612)
web_kpthr = gr.Slider(minimum=0.1, maximum=1, step=1e3, default=0.3, label='Keypoint threshold')
submit_pose_web = gr.Button("Make 2d pose estimation")
submit_pose3d_web = gr.Button("Make 3d pose estimation")
submit_hand_web = gr.Button("Make 2d hand estimation")
submit_detect_web = gr.Button("Detect and track objects")
with gr.Row():
webcam_output1 = gr.PlayableVideo(height=512, label = "Estimate human 2d poses", show_label=True)
webcam_output2 = gr.PlayableVideo(height=512, label = "Estimate human 3d poses", show_label=True)
webcam_output3 = gr.PlayableVideo(height=512, label = "Estimate human hand position", show_label=True)
webcam_output4 = gr.Video(height=512, label = "Detection and tracking", show_label=True, format="mp4")
# From file
submit_pose_file.click(fn=pose2d,
inputs= [video_input, file_kpthr],
outputs = video_output1)
submit_pose3d_file.click(fn=pose3d,
inputs= video_input,
outputs = video_output2)
submit_hand_file.click(fn=pose2dhand,
inputs= [video_input, file_kpthr],
outputs = video_output3)
submit_detect_file.click(fn=show_tracking,
inputs= video_input,
outputs = video_output4)
# Web
submit_pose_web.click(fn=pose2d,
inputs = [webcam_input, web_kpthr],
outputs = webcam_output1)
submit_pose3d_web.click(fn=pose3d,
inputs= webcam_input,
outputs = webcam_output2)
submit_hand_web.click(fn=pose2dhand,
inputs= [webcam_input, web_kpthr],
outputs = webcam_output3)
submit_detect_web.click(fn=show_tracking,
inputs= webcam_input,
outputs = webcam_output4)
demo.launch(server_name="0.0.0.0", server_port=7860)
if __name__ == "__main__":
run_UI()
|