File size: 11,778 Bytes
3bbef49 34a9259 afd2199 34a9259 22e7a27 91f9212 59f93ed 22e7a27 34a9259 afd2199 4ce546f 34a9259 cd41c0c a21de06 afd2199 e2522a6 0e929df 3bbef49 0e929df 7d480ef f5eb0df 34a9259 22e21ad f5eb0df 0e929df 8db8164 b8eab7d 6c321f9 3bc0fc7 b8eab7d 22e7a27 551301a f5eb0df 22e7a27 22e21ad 3e81a23 22e7a27 c76c2fc 3bbef49 551301a 7c1031f 91f9212 3bbef49 91f9212 5a730a3 08d7883 8ce2567 91f9212 6c321f9 b6358bf 91f9212 6c321f9 91f9212 3bbef49 b8eab7d f5eb0df c186703 3bbef49 ae92d0e 435673b 3bbef49 577ab65 582b742 577ab65 22e21ad 91f9212 c186703 41707d0 c186703 91f9212 92499cd b8eab7d f5eb0df 4ce546f c186703 3bbef49 ae92d0e 435673b 3bbef49 aa67e27 4ce546f b8eab7d 22e21ad 4ce546f 3bbef49 fc40636 3bbef49 c186703 879a6e4 3bbef49 92499cd b8eab7d f5eb0df c227693 c186703 3bbef49 ae92d0e 435673b 3bbef49 582b742 3bbef49 b8eab7d 22e21ad 0c80e1d c186703 92499cd 879a6e4 d151311 26a664d d151311 26a664d b8eab7d 5d648b4 d151311 5d648b4 551301a 5d648b4 c227693 5d648b4 02cf03c d151311 11b00b8 c227693 11b00b8 3bbef49 d151311 b8eab7d d151311 b8eab7d d151311 b8eab7d d151311 b8eab7d 879a6e4 d151311 879a6e4 97f0668 d151311 879a6e4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 |
# Pose inferencing
import mmpose
from mmpose.apis import MMPoseInferencer
# Ultralytics
from ultralytics import YOLO
import torch
# Gradio
import gradio as gr
import moviepy.editor as moviepy
# System and files
import os
import glob
import uuid
# Image manipulation
import numpy as np
import cv2
print("[INFO]: Imported modules!")
human = MMPoseInferencer("human")
hand = MMPoseInferencer("hand")
human3d = MMPoseInferencer(pose3d="human3d")
track_model = YOLO('yolov8n.pt') # Load an official Detect model
# ultraltics
if torch.cuda.is_available():
device = "cuda"
else:
device = "cpu"
print("[INFO]: Downloaded models!")
def check_extension(video):
split_tup = os.path.splitext(video)
# extract the file name and extension
file_name = split_tup[0]
file_extension = split_tup[1]
if file_extension != ".mp4":
print("Converting to mp4")
clip = moviepy.VideoFileClip(video)
video = file_name+".mp4"
clip.write_videofile(video)
return video
def tracking(video, model, boxes=True):
print("[INFO] Is cuda available? ", torch.cuda.is_available())
print(device)
print("[INFO] Loading model...")
# Load an official or custom model
# Perform tracking with the model
print("[INFO] Starting tracking!")
# https://docs.ultralytics.com/modes/predict/
annotated_frame = model(video, boxes=boxes, device=device)
return annotated_frame
def show_tracking(video_content):
# https://docs.ultralytics.com/datasets/detect/coco/
video = cv2.VideoCapture(video_content)
# Track
video_track = tracking(video_content, track_model.track)
# Prepare to save video
#out_file = os.path.join(vis_out_dir, "track.mp4")
out_file = "track.mp4"
print("[INFO]: TRACK", out_file)
fourcc = cv2.VideoWriter_fourcc(*"mp4v") # Codec for MP4 video
fps = video.get(cv2.CAP_PROP_FPS)
height, width, _ = video_track[0][0].orig_img.shape
size = (width,height)
out_track = cv2.VideoWriter(out_file, fourcc, fps, size)
# Go through frames and write them
for frame_track in video_track:
result_track = frame_track[0].plot() # plot a BGR numpy array of predictions
out_track.write(result_track)
print("[INFO] Done with frames")
#print(type(result_pose)) numpy ndarray
out_track.release()
video.release()
cv2.destroyAllWindows() # Closing window
return out_file
def pose3d(video):
video = check_extension(video)
print(device)
# Define new unique folder
add_dir = str(uuid.uuid4())
vis_out_dir = os.path.join("/".join(video.split("/")[:-1]), add_dir)
os.makedirs(vis_out_dir)
result_generator = human3d(video,
vis_out_dir = vis_out_dir,
thickness=2,
return_vis=True,
rebase_keypoint_height=True,
device=device)
result = [result for result in result_generator] #next(result_generator)
out_file = glob.glob(os.path.join(vis_out_dir, "*.mp4")) #+ glob.glob(os.path.join(vis_out_dir, "*.webm"))
return "".join(out_file)
def pose2d(video, kpt_threshold):
video = check_extension(video)
print(device)
# Define new unique folder
add_dir = str(uuid.uuid4())
vis_out_dir = os.path.join("/".join(video.split("/")[:-1]), add_dir)
os.makedirs(vis_out_dir)
result_generator = human(video,
vis_out_dir = vis_out_dir,
return_vis=True,
thickness=2,
rebase_keypoint_height=True,
kpt_thr=kpt_threshold,
device=device
)
result = [result for result in result_generator] #next(result_generator)
out_file = glob.glob(os.path.join(vis_out_dir, "*.mp4")) #+ glob.glob(os.path.join(vis_out_dir, "*.webm"))
return "".join(out_file)
def pose2dhand(video, kpt_threshold):
video = check_extension(video)
print(device)
# ultraltics
# Define new unique folder
add_dir = str(uuid.uuid4())
vis_out_dir = os.path.join("/".join(video.split("/")[:-1]), add_dir)
os.makedirs(vis_out_dir)
result_generator = hand(video,
vis_out_dir = vis_out_dir,
return_vis=True,
thickness=2,
rebase_keypoint_height=True,
kpt_thr=kpt_threshold,
device=device)
result = [result for result in result_generator] #next(result_generator)
out_file = glob.glob(os.path.join(vis_out_dir, "*.mp4")) #+ glob.glob(os.path.join(vis_out_dir, "*.webm"))
return "".join(out_file)
def run_UI():
with gr.Blocks() as demo:
with gr.Column():
with gr.Tab("Upload video"):
with gr.Column():
with gr.Row():
with gr.Column():
video_input = gr.Video(source="upload", type="filepath", height=612)
# Insert slider with kpt_thr
file_kpthr = gr.Slider(minimum=0.1, maximum=1, step=20, default=0.3, label='Keypoint threshold')
submit_pose_file = gr.Button("Make 2d pose estimation")
submit_pose3d_file = gr.Button("Make 3d pose estimation")
submit_hand_file = gr.Button("Make 2d hand estimation")
submit_detect_file = gr.Button("Detect and track objects")
with gr.Row():
video_output1 = gr.PlayableVideo(height=512, label = "Estimate human 2d poses", show_label=True)
video_output2 = gr.PlayableVideo(height=512, label = "Estimate human 3d poses", show_label=True)
video_output3 = gr.PlayableVideo(height=512, label = "Estimate human hand poses", show_label=True)
video_output4 = gr.Video(height=512, label = "Detection and tracking", show_label=True, format="mp4")
with gr.Tab("Record video with webcam"):
with gr.Column():
with gr.Row():
with gr.Column():
webcam_input = gr.Video(source="webcam", height=612)
web_kpthr = gr.Slider(minimum=0.1, maximum=1, step=20, default=0.3, label='Keypoint threshold')
submit_pose_web = gr.Button("Make 2d pose estimation")
submit_pose3d_web = gr.Button("Make 3d pose estimation")
submit_hand_web = gr.Button("Make 2d hand estimation")
submit_detect_web = gr.Button("Detect and track objects")
with gr.Row():
webcam_output1 = gr.PlayableVideo(height=512, label = "Estimate human 2d poses", show_label=True)
webcam_output2 = gr.PlayableVideo(height=512, label = "Estimate human 3d poses", show_label=True)
webcam_output3 = gr.PlayableVideo(height=512, label = "Estimate human hand position", show_label=True)
webcam_output4 = gr.Video(height=512, label = "Detection and tracking", show_label=True, format="mp4")
with gr.Tab("General information"):
gr.Markdown("You can load the keypoints in python in the following way: ")
gr.Code(
value="""def hello_world():
return "Hello, world!"
print(hello_world())""",
language="python",
interactive=True,
show_label=False,
)
gr.Markdown("""Information about the models
Pose models: `mmpose` is a library for human pose estimation that provides pre-trained models for 2D and 3D pose estimation.
The 2D pose model is used for estimating the 2D coordinates of human body joints from an image or a video frame. The model uses a convolutional neural network (CNN) to predict the joint locations and their confidence scores.
The 2D hand model is a specialized version of the 2D pose model that is designed for hand pose estimation. It uses a similar CNN architecture to the 2D pose model but is trained specifically for detecting the joints in the hand.
The 3D pose model is used for estimating the 3D coordinates of human body joints from an image or a video frame. The model uses a combination of 2D pose estimation and depth estimation to infer the 3D joint locations.
All of these models are pre-trained on large datasets and can be fine-tuned on custom datasets for specific applications.
Ultralight detection and tracking model: The `track()` method in the Ultralight model is used for object tracking in videos. It takes a video file or a camera stream as input and returns the tracked objects in each frame. The method uses the COCO dataset classes for object detection and tracking. The COCO dataset contains 80 classes of objects such as person, car, bicycle, etc. See https://docs.ultralytics.com/datasets/detect/coco/ for all available classes. The `track()` method uses the COCO classes to detect and track the objects in the video frames.
The tracked objects are represented as bounding boxes with labels indicating the class of the object. The Ultralight model is designed to be fast and efficient, making it suitable for real-time object tracking applications.""")
# From file
submit_pose_file.click(fn=pose2d,
inputs= [video_input, file_kpthr],
outputs = video_output1)
submit_pose3d_file.click(fn=pose3d,
inputs= video_input,
outputs = video_output2)
submit_hand_file.click(fn=pose2dhand,
inputs= [video_input, file_kpthr],
outputs = video_output3)
submit_detect_file.click(fn=show_tracking,
inputs= video_input,
outputs = video_output4)
# Web
submit_pose_web.click(fn=pose2d,
inputs = [webcam_input, web_kpthr],
outputs = webcam_output1)
submit_pose3d_web.click(fn=pose3d,
inputs= webcam_input,
outputs = webcam_output2)
submit_hand_web.click(fn=pose2dhand,
inputs= [webcam_input, web_kpthr],
outputs = webcam_output3)
submit_detect_web.click(fn=show_tracking,
inputs= webcam_input,
outputs = webcam_output4)
demo.launch(server_name="0.0.0.0", server_port=7860)
if __name__ == "__main__":
run_UI()
|