blocks
Browse files- Dockerfile +1 -1
- main.py +109 -94
- main_blocks.py → main_webcamtest.py +94 -109
Dockerfile
CHANGED
@@ -42,4 +42,4 @@ WORKDIR $HOME/app
|
|
42 |
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
43 |
COPY --chown=user . $HOME/app
|
44 |
|
45 |
-
CMD ["python", "
|
|
|
42 |
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
43 |
COPY --chown=user . $HOME/app
|
44 |
|
45 |
+
CMD ["python", "main.py"]
|
main.py
CHANGED
@@ -1,3 +1,5 @@
|
|
|
|
|
|
1 |
# Pose inferencing
|
2 |
import mmpose
|
3 |
from mmpose.apis import MMPoseInferencer
|
@@ -20,14 +22,12 @@ import cv2
|
|
20 |
|
21 |
print("[INFO]: Imported modules!")
|
22 |
human = MMPoseInferencer("human")
|
23 |
-
hand = MMPoseInferencer("hand")
|
24 |
human3d = MMPoseInferencer(pose3d="human3d")
|
25 |
track_model = YOLO('yolov8n.pt') # Load an official Detect model
|
26 |
|
27 |
# ultraltics
|
28 |
|
29 |
-
# [INFO] VIDEO INPUT: /tmp/gradio/927601b660ec45919366ce37df1ed004a1fcffab/sample_flip.webm
|
30 |
-
|
31 |
# Defining inferencer models to lookup in function
|
32 |
inferencers = {"Estimate human 2d poses":human, "Estimate human 2d hand poses":hand, "Estimate human 3d poses":human3d, "Detect and track":track_model}
|
33 |
|
@@ -44,11 +44,11 @@ def tracking(video, model, boxes=True):
|
|
44 |
|
45 |
return annotated_frame
|
46 |
|
47 |
-
def show_tracking(video_content
|
48 |
video = cv2.VideoCapture(video_content)
|
49 |
|
50 |
# Track
|
51 |
-
video_track = tracking(video_content,
|
52 |
|
53 |
# Prepare to save video
|
54 |
#out_file = os.path.join(vis_out_dir, "track.mp4")
|
@@ -65,11 +65,9 @@ def show_tracking(video_content, vis_out_dir, model):
|
|
65 |
# Go through frames and write them
|
66 |
for frame_track in video_track:
|
67 |
result_track = frame_track[0].plot() # plot a BGR numpy array of predictions
|
68 |
-
out_track.write(result_track)
|
69 |
-
|
70 |
print("[INFO] Done with frames")
|
71 |
#print(type(result_pose)) numpy ndarray
|
72 |
-
|
73 |
|
74 |
out_track.release()
|
75 |
|
@@ -79,112 +77,129 @@ def show_tracking(video_content, vis_out_dir, model):
|
|
79 |
return out_file
|
80 |
|
81 |
|
82 |
-
def
|
83 |
-
|
84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
vis_out_dir = vis_out_dir,
|
86 |
-
return_vis=True,
|
87 |
thickness=2,
|
88 |
rebase_keypoint_height=True,
|
89 |
-
|
90 |
-
device="cuda"
|
91 |
-
)
|
92 |
|
93 |
result = [result for result in result_generator] #next(result_generator)
|
94 |
-
|
95 |
-
|
96 |
|
97 |
return out_file
|
98 |
|
99 |
-
def infer(video, check, kpt_thr, webcam=True):
|
100 |
-
print("[INFO] VIDEO INPUT: ", video)
|
101 |
-
|
102 |
-
# Selecting the specific inferencer
|
103 |
-
out_files=[]
|
104 |
-
|
105 |
-
for i in check:
|
106 |
-
# Create out directory
|
107 |
-
vis_out_dir = str(uuid.uuid4())
|
108 |
-
inferencer = inferencers[i] # 'hand', 'human , device='cuda'
|
109 |
-
|
110 |
-
if i == "Detect and track":
|
111 |
-
#continue
|
112 |
-
trackfile = show_tracking(video, vis_out_dir, inferencer)
|
113 |
-
|
114 |
-
else:
|
115 |
-
if webcam==True:
|
116 |
-
print("WEBCAM")
|
117 |
-
add_dir = str(uuid.uuid4())
|
118 |
-
vidname = video.split("/")[-1]
|
119 |
-
vis_out_dir_web = "/".join(["/".join(video.split("/")[:-1]), add_dir])
|
120 |
-
out_file = poses(inferencer, video, vis_out_dir_web, kpt_thr)
|
121 |
-
fullname = os.path.join(vis_out_dir_web, vidname)
|
122 |
-
#if i == "Estimate human 3d poses":
|
123 |
-
# fullname = fullname[:-4]+"mp4" #Change to .mp4
|
124 |
-
# out_files.append(fullname)
|
125 |
-
#else:
|
126 |
-
out_files.append(fullname)
|
127 |
-
|
128 |
-
else:
|
129 |
-
out_files.extend(out_file)
|
130 |
-
|
131 |
-
print(out_files)
|
132 |
|
133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
|
135 |
-
def
|
136 |
-
|
137 |
-
|
138 |
-
check_file = gr.CheckboxGroup(choices = ["Detect and track", "Estimate human 2d poses", "Estimate human 2d hand poses", "Estimate human 3d poses"], label="Methods", type="value", info="Select the model(s) you want")
|
139 |
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
|
144 |
-
|
145 |
-
|
146 |
-
|
|
|
|
|
147 |
|
148 |
-
|
149 |
-
fn=infer,
|
150 |
-
inputs= [gr.Video(source="webcam", height=512), check_web, web_kpthr], # /tmp/gradio/927601b660ec45919366ce37df1ed004a1fcffab/sample_flip.webm
|
151 |
-
outputs = [gr.Video(format='mp4', height=512, label="Detect and track", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 2d poses", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 2d hand poses", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 3d poses", show_label=True)],
|
152 |
-
title = 'Tracking and pose estimation',
|
153 |
-
description = description,
|
154 |
-
allow_flagging=False
|
155 |
-
)
|
156 |
|
157 |
-
|
158 |
-
|
159 |
-
inputs = [gr.Video(source="upload", height=512), check_file, file_kpthr],
|
160 |
-
outputs = [gr.Video(format='mp4', height=512, label="Detect and track", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 2d poses", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 2d hand poses", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 3d poses", show_label=True)],
|
161 |
-
title = 'Tracking and pose estimation',
|
162 |
-
description = description,
|
163 |
-
allow_flagging=False
|
164 |
-
)
|
165 |
|
166 |
-
|
167 |
-
interface_list=[file, webcam],
|
168 |
-
tab_names=["From a File", "From your Webcam"]
|
169 |
-
)
|
170 |
|
171 |
-
demo.launch(server_name="0.0.0.0", server_port=7860)
|
172 |
|
173 |
|
174 |
-
|
175 |
-
|
|
|
|
|
|
|
|
|
176 |
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
# videopose_h36m_81frames_fullconv_supervised-1f2d1104_20210527.pth
|
182 |
-
# videopose_h36m_27frames_fullconv_supervised-fe8fbba9_20210527.pth
|
183 |
-
# videopose_h36m_1frame_fullconv_supervised_cpn_ft-5c3afaed_20210527.pth
|
184 |
-
# https://github.com/open-mmlab/mmpose/blob/main/mmpose/apis/inferencers/pose3d_inferencer.py
|
185 |
|
|
|
186 |
|
187 |
-
|
188 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
|
|
|
|
1 |
+
|
2 |
+
|
3 |
# Pose inferencing
|
4 |
import mmpose
|
5 |
from mmpose.apis import MMPoseInferencer
|
|
|
22 |
|
23 |
print("[INFO]: Imported modules!")
|
24 |
human = MMPoseInferencer("human")
|
25 |
+
hand = MMPoseInferencer("hand")
|
26 |
human3d = MMPoseInferencer(pose3d="human3d")
|
27 |
track_model = YOLO('yolov8n.pt') # Load an official Detect model
|
28 |
|
29 |
# ultraltics
|
30 |
|
|
|
|
|
31 |
# Defining inferencer models to lookup in function
|
32 |
inferencers = {"Estimate human 2d poses":human, "Estimate human 2d hand poses":hand, "Estimate human 3d poses":human3d, "Detect and track":track_model}
|
33 |
|
|
|
44 |
|
45 |
return annotated_frame
|
46 |
|
47 |
+
def show_tracking(video_content):
|
48 |
video = cv2.VideoCapture(video_content)
|
49 |
|
50 |
# Track
|
51 |
+
video_track = tracking(video_content, track_model.track)
|
52 |
|
53 |
# Prepare to save video
|
54 |
#out_file = os.path.join(vis_out_dir, "track.mp4")
|
|
|
65 |
# Go through frames and write them
|
66 |
for frame_track in video_track:
|
67 |
result_track = frame_track[0].plot() # plot a BGR numpy array of predictions
|
|
|
|
|
68 |
print("[INFO] Done with frames")
|
69 |
#print(type(result_pose)) numpy ndarray
|
70 |
+
out_track.write(result_track)
|
71 |
|
72 |
out_track.release()
|
73 |
|
|
|
77 |
return out_file
|
78 |
|
79 |
|
80 |
+
def pose3d(video):
|
81 |
+
add_dir = str(uuid.uuid4())
|
82 |
+
#vidname = video.split("/")[-1]
|
83 |
+
vis_out_dir = "/".join(["/".join(video.split("/")[:-1]), add_dir])
|
84 |
+
print("[INFO]: CURRENT OUT DIR: ", vis_out_dir)
|
85 |
+
|
86 |
+
#full name = os.path.join(vis_out_dir, vidname)
|
87 |
+
|
88 |
+
result_generator = human3d(video,
|
89 |
vis_out_dir = vis_out_dir,
|
|
|
90 |
thickness=2,
|
91 |
rebase_keypoint_height=True,
|
92 |
+
device="cuda")
|
|
|
|
|
93 |
|
94 |
result = [result for result in result_generator] #next(result_generator)
|
95 |
+
out_file = glob.glob(os.path.join(vis_out_dir, "*"))
|
96 |
+
print("[INFO]: CURRENT OUT FILE NAME: ", out_file)
|
97 |
|
98 |
return out_file
|
99 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
|
101 |
+
def pose2d(video):
|
102 |
+
add_dir = str(uuid.uuid4())
|
103 |
+
vis_out_dir = "/".join(["/".join(video.split("/")[:-1]), add_dir])
|
104 |
+
print("[INFO]: CURRENT OUT DIR: ", vis_out_dir)
|
105 |
+
|
106 |
+
|
107 |
+
result_generator = human(video,
|
108 |
+
vis_out_dir = vis_out_dir,
|
109 |
+
thickness=2,
|
110 |
+
rebase_keypoint_height=True,
|
111 |
+
device="cuda")
|
112 |
+
|
113 |
+
result = [result for result in result_generator] #next(result_generator)
|
114 |
+
|
115 |
+
out_file = glob.glob(os.path.join(vis_out_dir, "*"))
|
116 |
+
print("[INFO]: CURRENT OUT FILE NAME: ", out_file)
|
117 |
+
|
118 |
+
return out_file
|
119 |
+
|
120 |
|
121 |
+
def pose2dhand(video):
|
122 |
+
add_dir = str(uuid.uuid4())
|
123 |
+
vis_out_dir = "/".join(["/".join(video.split("/")[:-1]), add_dir])
|
|
|
124 |
|
125 |
+
print("[INFO]: CURRENT OUT DIR: ", vis_out_dir)
|
126 |
+
|
127 |
+
vis_out_dir = str(uuid.uuid4())
|
128 |
|
129 |
+
result_generator = hand(video,
|
130 |
+
vis_out_dir = vis_out_dir,
|
131 |
+
thickness=2,
|
132 |
+
rebase_keypoint_height=True,
|
133 |
+
device="cuda")
|
134 |
|
135 |
+
result = [result for result in result_generator] #next(result_generator)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
|
137 |
+
out_file = glob.glob(os.path.join(vis_out_dir, "*"))
|
138 |
+
print("[INFO]: CURRENT OUT FILE NAME: ", out_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
|
140 |
+
return out_file
|
|
|
|
|
|
|
141 |
|
|
|
142 |
|
143 |
|
144 |
+
with gr.Blocks() as demo:
|
145 |
+
with gr.Column():
|
146 |
+
with gr.Tab("Upload video"):
|
147 |
+
with gr.Row():
|
148 |
+
with gr.Column():
|
149 |
+
video_input = gr.Video(source="upload", type="filepath", height=512)
|
150 |
|
151 |
+
submit_pose_file = gr.Button("Make 2d pose estimation")
|
152 |
+
submit_pose3d_file = gr.Button("Make 3d pose estimation")
|
153 |
+
submit_hand_file = gr.Button("Make 2d hand estimation")
|
154 |
+
submit_detect_file = gr.Button("Detect and track objects")
|
|
|
|
|
|
|
|
|
155 |
|
156 |
+
video_output = gr.Video(height=512)
|
157 |
|
158 |
+
with gr.Tab("Record video with webcam"):
|
159 |
+
with gr.Row():
|
160 |
+
with gr.Column():
|
161 |
+
webcam_input = gr.Video(source="webcam", height=512)
|
162 |
+
|
163 |
+
submit_pose_web = gr.Button("Make 2d pose estimation")
|
164 |
+
submit_pose3d_web = gr.Button("Make 3d pose estimation")
|
165 |
+
submit_hand_web = gr.Button("Make 2d hand estimation")
|
166 |
+
submit_detect_web = gr.Button("Detect and track objects")
|
167 |
|
168 |
+
webcam_output = gr.Video(height=512)
|
169 |
+
|
170 |
+
|
171 |
+
# From file
|
172 |
+
submit_pose_file.click(fn=pose2d,
|
173 |
+
inputs= video_input,
|
174 |
+
outputs = video_output)
|
175 |
+
|
176 |
+
submit_pose3d_file.click(fn=pose3d,
|
177 |
+
inputs= video_input,
|
178 |
+
outputs = video_output)
|
179 |
+
|
180 |
+
submit_hand_file.click(fn=pose2dhand,
|
181 |
+
inputs= video_input,
|
182 |
+
outputs = video_output)
|
183 |
+
|
184 |
+
submit_detect_file.click(fn=show_tracking,
|
185 |
+
inputs= video_input,
|
186 |
+
outputs = video_output)
|
187 |
+
|
188 |
+
# Web
|
189 |
+
submit_pose_web.click(fn=pose2d,
|
190 |
+
inputs= video_input,
|
191 |
+
outputs = video_output)
|
192 |
+
|
193 |
+
submit_pose3d_web.click(fn=pose3d,
|
194 |
+
inputs= video_input,
|
195 |
+
outputs = video_output)
|
196 |
+
|
197 |
+
submit_hand_web.click(fn=pose2dhand,
|
198 |
+
inputs= video_input,
|
199 |
+
outputs = video_output)
|
200 |
+
|
201 |
+
submit_detect_web.click(fn=show_tracking,
|
202 |
+
inputs= video_input,
|
203 |
+
outputs = video_output)
|
204 |
|
205 |
+
demo.launch()
|
main_blocks.py → main_webcamtest.py
RENAMED
@@ -1,5 +1,3 @@
|
|
1 |
-
|
2 |
-
|
3 |
# Pose inferencing
|
4 |
import mmpose
|
5 |
from mmpose.apis import MMPoseInferencer
|
@@ -22,12 +20,14 @@ import cv2
|
|
22 |
|
23 |
print("[INFO]: Imported modules!")
|
24 |
human = MMPoseInferencer("human")
|
25 |
-
hand = MMPoseInferencer("hand")
|
26 |
human3d = MMPoseInferencer(pose3d="human3d")
|
27 |
track_model = YOLO('yolov8n.pt') # Load an official Detect model
|
28 |
|
29 |
# ultraltics
|
30 |
|
|
|
|
|
31 |
# Defining inferencer models to lookup in function
|
32 |
inferencers = {"Estimate human 2d poses":human, "Estimate human 2d hand poses":hand, "Estimate human 3d poses":human3d, "Detect and track":track_model}
|
33 |
|
@@ -44,11 +44,11 @@ def tracking(video, model, boxes=True):
|
|
44 |
|
45 |
return annotated_frame
|
46 |
|
47 |
-
def show_tracking(video_content):
|
48 |
video = cv2.VideoCapture(video_content)
|
49 |
|
50 |
# Track
|
51 |
-
video_track = tracking(video_content,
|
52 |
|
53 |
# Prepare to save video
|
54 |
#out_file = os.path.join(vis_out_dir, "track.mp4")
|
@@ -65,9 +65,11 @@ def show_tracking(video_content):
|
|
65 |
# Go through frames and write them
|
66 |
for frame_track in video_track:
|
67 |
result_track = frame_track[0].plot() # plot a BGR numpy array of predictions
|
|
|
|
|
68 |
print("[INFO] Done with frames")
|
69 |
#print(type(result_pose)) numpy ndarray
|
70 |
-
|
71 |
|
72 |
out_track.release()
|
73 |
|
@@ -77,129 +79,112 @@ def show_tracking(video_content):
|
|
77 |
return out_file
|
78 |
|
79 |
|
80 |
-
def
|
81 |
-
|
82 |
-
|
83 |
-
vis_out_dir = "/".join(["/".join(video.split("/")[:-1]), add_dir])
|
84 |
-
print("[INFO]: CURRENT OUT DIR: ", vis_out_dir)
|
85 |
-
|
86 |
-
#full name = os.path.join(vis_out_dir, vidname)
|
87 |
-
|
88 |
-
result_generator = human3d(video,
|
89 |
-
vis_out_dir = vis_out_dir,
|
90 |
-
thickness=2,
|
91 |
-
rebase_keypoint_height=True,
|
92 |
-
device="cuda")
|
93 |
-
|
94 |
-
result = [result for result in result_generator] #next(result_generator)
|
95 |
-
out_file = glob.glob(os.path.join(vis_out_dir, "*"))
|
96 |
-
print("[INFO]: CURRENT OUT FILE NAME: ", out_file)
|
97 |
-
|
98 |
-
return out_file
|
99 |
-
|
100 |
-
|
101 |
-
def pose2d(video):
|
102 |
-
add_dir = str(uuid.uuid4())
|
103 |
-
vis_out_dir = "/".join(["/".join(video.split("/")[:-1]), add_dir])
|
104 |
-
print("[INFO]: CURRENT OUT DIR: ", vis_out_dir)
|
105 |
-
|
106 |
-
|
107 |
-
result_generator = human(video,
|
108 |
vis_out_dir = vis_out_dir,
|
|
|
109 |
thickness=2,
|
110 |
rebase_keypoint_height=True,
|
111 |
-
|
|
|
|
|
112 |
|
113 |
result = [result for result in result_generator] #next(result_generator)
|
114 |
|
115 |
-
out_file = glob.glob(os.path.join(vis_out_dir, "
|
116 |
-
print("[INFO]: CURRENT OUT FILE NAME: ", out_file)
|
117 |
|
118 |
return out_file
|
119 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
|
121 |
-
|
122 |
-
add_dir = str(uuid.uuid4())
|
123 |
-
vis_out_dir = "/".join(["/".join(video.split("/")[:-1]), add_dir])
|
124 |
-
|
125 |
-
print("[INFO]: CURRENT OUT DIR: ", vis_out_dir)
|
126 |
|
127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
rebase_keypoint_height=True,
|
133 |
-
device="cuda")
|
134 |
|
135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
|
137 |
-
|
138 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
|
140 |
-
|
|
|
|
|
|
|
141 |
|
|
|
142 |
|
143 |
|
144 |
-
|
145 |
-
|
146 |
-
with gr.Tab("Upload video"):
|
147 |
-
with gr.Row():
|
148 |
-
with gr.Column():
|
149 |
-
video_input = gr.Video(source="upload", type="filepath", height=512)
|
150 |
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
|
|
|
|
|
|
|
|
155 |
|
156 |
-
video_output = gr.Video(height=512)
|
157 |
|
158 |
-
|
159 |
-
|
160 |
-
with gr.Column():
|
161 |
-
webcam_input = gr.Video(source="webcam", height=512)
|
162 |
-
|
163 |
-
submit_pose_web = gr.Button("Make 2d pose estimation")
|
164 |
-
submit_pose3d_web = gr.Button("Make 3d pose estimation")
|
165 |
-
submit_hand_web = gr.Button("Make 2d hand estimation")
|
166 |
-
submit_detect_web = gr.Button("Detect and track objects")
|
167 |
|
168 |
-
webcam_output = gr.Video(height=512)
|
169 |
-
|
170 |
-
|
171 |
-
# From file
|
172 |
-
submit_pose_file.click(fn=pose2d,
|
173 |
-
inputs= video_input,
|
174 |
-
outputs = video_output)
|
175 |
-
|
176 |
-
submit_pose3d_file.click(fn=pose3d,
|
177 |
-
inputs= video_input,
|
178 |
-
outputs = video_output)
|
179 |
-
|
180 |
-
submit_hand_file.click(fn=pose2dhand,
|
181 |
-
inputs= video_input,
|
182 |
-
outputs = video_output)
|
183 |
-
|
184 |
-
submit_detect_file.click(fn=show_tracking,
|
185 |
-
inputs= video_input,
|
186 |
-
outputs = video_output)
|
187 |
-
|
188 |
-
# Web
|
189 |
-
submit_pose_web.click(fn=pose2d,
|
190 |
-
inputs= video_input,
|
191 |
-
outputs = video_output)
|
192 |
-
|
193 |
-
submit_pose3d_web.click(fn=pose3d,
|
194 |
-
inputs= video_input,
|
195 |
-
outputs = video_output)
|
196 |
-
|
197 |
-
submit_hand_web.click(fn=pose2dhand,
|
198 |
-
inputs= video_input,
|
199 |
-
outputs = video_output)
|
200 |
-
|
201 |
-
submit_detect_web.click(fn=show_tracking,
|
202 |
-
inputs= video_input,
|
203 |
-
outputs = video_output)
|
204 |
|
205 |
-
demo.launch()
|
|
|
|
|
|
|
1 |
# Pose inferencing
|
2 |
import mmpose
|
3 |
from mmpose.apis import MMPoseInferencer
|
|
|
20 |
|
21 |
print("[INFO]: Imported modules!")
|
22 |
human = MMPoseInferencer("human")
|
23 |
+
hand = MMPoseInferencer("hand") #kpt_thr (float) – The threshold to visualize the keypoints. Defaults to 0.3
|
24 |
human3d = MMPoseInferencer(pose3d="human3d")
|
25 |
track_model = YOLO('yolov8n.pt') # Load an official Detect model
|
26 |
|
27 |
# ultraltics
|
28 |
|
29 |
+
# [INFO] VIDEO INPUT: /tmp/gradio/927601b660ec45919366ce37df1ed004a1fcffab/sample_flip.webm
|
30 |
+
|
31 |
# Defining inferencer models to lookup in function
|
32 |
inferencers = {"Estimate human 2d poses":human, "Estimate human 2d hand poses":hand, "Estimate human 3d poses":human3d, "Detect and track":track_model}
|
33 |
|
|
|
44 |
|
45 |
return annotated_frame
|
46 |
|
47 |
+
def show_tracking(video_content, vis_out_dir, model):
|
48 |
video = cv2.VideoCapture(video_content)
|
49 |
|
50 |
# Track
|
51 |
+
video_track = tracking(video_content, model.track)
|
52 |
|
53 |
# Prepare to save video
|
54 |
#out_file = os.path.join(vis_out_dir, "track.mp4")
|
|
|
65 |
# Go through frames and write them
|
66 |
for frame_track in video_track:
|
67 |
result_track = frame_track[0].plot() # plot a BGR numpy array of predictions
|
68 |
+
out_track.write(result_track)
|
69 |
+
|
70 |
print("[INFO] Done with frames")
|
71 |
#print(type(result_pose)) numpy ndarray
|
72 |
+
|
73 |
|
74 |
out_track.release()
|
75 |
|
|
|
79 |
return out_file
|
80 |
|
81 |
|
82 |
+
def poses(inferencer, video, vis_out_dir, kpt_thr):
|
83 |
+
print("[INFO] VIDEO INPUT: ", video)
|
84 |
+
result_generator = inferencer(video,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
vis_out_dir = vis_out_dir,
|
86 |
+
return_vis=True,
|
87 |
thickness=2,
|
88 |
rebase_keypoint_height=True,
|
89 |
+
#kpt_thr=kpt_thr,
|
90 |
+
device="cuda"
|
91 |
+
)
|
92 |
|
93 |
result = [result for result in result_generator] #next(result_generator)
|
94 |
|
95 |
+
out_file = glob.glob(os.path.join(vis_out_dir, "*.mp4"))
|
|
|
96 |
|
97 |
return out_file
|
98 |
|
99 |
+
def infer(video, check, kpt_thr, webcam=True):
|
100 |
+
print("[INFO] VIDEO INPUT: ", video)
|
101 |
+
|
102 |
+
# Selecting the specific inferencer
|
103 |
+
out_files=[]
|
104 |
+
|
105 |
+
for i in check:
|
106 |
+
# Create out directory
|
107 |
+
vis_out_dir = str(uuid.uuid4())
|
108 |
+
inferencer = inferencers[i] # 'hand', 'human , device='cuda'
|
109 |
+
|
110 |
+
if i == "Detect and track":
|
111 |
+
#continue
|
112 |
+
trackfile = show_tracking(video, vis_out_dir, inferencer)
|
113 |
+
|
114 |
+
else:
|
115 |
+
if webcam==True:
|
116 |
+
print("WEBCAM")
|
117 |
+
add_dir = str(uuid.uuid4())
|
118 |
+
vidname = video.split("/")[-1]
|
119 |
+
vis_out_dir_web = "/".join(["/".join(video.split("/")[:-1]), add_dir])
|
120 |
+
out_file = poses(inferencer, video, vis_out_dir_web, kpt_thr)
|
121 |
+
fullname = os.path.join(vis_out_dir_web, vidname)
|
122 |
+
#if i == "Estimate human 3d poses":
|
123 |
+
# fullname = fullname[:-4]+"mp4" #Change to .mp4
|
124 |
+
# out_files.append(fullname)
|
125 |
+
#else:
|
126 |
+
out_files.append(fullname)
|
127 |
+
|
128 |
+
else:
|
129 |
+
out_files.extend(out_file)
|
130 |
+
|
131 |
+
print(out_files)
|
132 |
|
133 |
+
return "track.mp4", out_files[0], out_files[1], out_files[2] # out_files[3]
|
|
|
|
|
|
|
|
|
134 |
|
135 |
+
def run():
|
136 |
+
#https://github.com/open-mmlab/mmpose/blob/main/docs/en/user_guides/inference.md
|
137 |
+
check_web = gr.CheckboxGroup(choices = ["Detect and track", "Estimate human 2d poses", "Estimate human 2d hand poses", "Estimate human 3d poses"], label="Methods", type="value", info="Select the model(s) you want")
|
138 |
+
check_file = gr.CheckboxGroup(choices = ["Detect and track", "Estimate human 2d poses", "Estimate human 2d hand poses", "Estimate human 3d poses"], label="Methods", type="value", info="Select the model(s) you want")
|
139 |
+
|
140 |
+
description = """
|
141 |
+
\n\nHere you can upload videos or record one with your webcam and track objects or detect bodyposes in 2d and 3d.
|
142 |
+
"""
|
143 |
|
144 |
+
# Insert slider with kpt_thr
|
145 |
+
web_kpthr = gr.Slider(0, 1, value=0.3)
|
146 |
+
file_kpthr = gr.Slider(0, 1, value=0.3)
|
|
|
|
|
147 |
|
148 |
+
webcam = gr.Interface(
|
149 |
+
fn=infer,
|
150 |
+
inputs= [gr.Video(source="webcam", height=512), check_web, web_kpthr], # /tmp/gradio/927601b660ec45919366ce37df1ed004a1fcffab/sample_flip.webm
|
151 |
+
outputs = [gr.Video(format='mp4', height=512, label="Detect and track", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 2d poses", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 2d hand poses", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 3d poses", show_label=True)],
|
152 |
+
title = 'Tracking and pose estimation',
|
153 |
+
description = description,
|
154 |
+
allow_flagging=False
|
155 |
+
)
|
156 |
|
157 |
+
file = gr.Interface(
|
158 |
+
infer,
|
159 |
+
inputs = [gr.Video(source="upload", height=512), check_file, file_kpthr],
|
160 |
+
outputs = [gr.Video(format='mp4', height=512, label="Detect and track", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 2d poses", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 2d hand poses", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 3d poses", show_label=True)],
|
161 |
+
title = 'Tracking and pose estimation',
|
162 |
+
description = description,
|
163 |
+
allow_flagging=False
|
164 |
+
)
|
165 |
|
166 |
+
demo = gr.TabbedInterface(
|
167 |
+
interface_list=[file, webcam],
|
168 |
+
tab_names=["From a File", "From your Webcam"]
|
169 |
+
)
|
170 |
|
171 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|
172 |
|
173 |
|
174 |
+
if __name__ == "__main__":
|
175 |
+
run()
|
|
|
|
|
|
|
|
|
176 |
|
177 |
+
# https://github.com/open-mmlab/mmpose/tree/dev-1.x/configs/body_3d_keypoint/pose_lift
|
178 |
+
# motionbert_ft_h36m-d80af323_20230531.pth
|
179 |
+
# simple3Dbaseline_h36m-f0ad73a4_20210419.pth
|
180 |
+
# videopose_h36m_243frames_fullconv_supervised_cpn_ft-88f5abbb_20210527.pth
|
181 |
+
# videopose_h36m_81frames_fullconv_supervised-1f2d1104_20210527.pth
|
182 |
+
# videopose_h36m_27frames_fullconv_supervised-fe8fbba9_20210527.pth
|
183 |
+
# videopose_h36m_1frame_fullconv_supervised_cpn_ft-5c3afaed_20210527.pth
|
184 |
+
# https://github.com/open-mmlab/mmpose/blob/main/mmpose/apis/inferencers/pose3d_inferencer.py
|
185 |
|
|
|
186 |
|
187 |
+
# 00000.mp4
|
188 |
+
# 000000.mp4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
|
|