Spaces:

NN-BRD
/

MMpose

Build error

App Files Files Community

MMpose / main.py

xmrt

no kpt_thr

e207c04 over 1 year ago

raw

history blame

5.52 kB

	# Pose inferencing
	import mmpose
	from mmpose.apis import MMPoseInferencer

	# Ultralytics
	from ultralytics import YOLO
	import torch

	# Gradio
	import gradio as gr

	# System and files
	import os
	import glob
	import uuid

	# Image manipulation
	import numpy as np
	import cv2

	print("[INFO]: Imported modules!")
	human = MMPoseInferencer("human")
	hand = MMPoseInferencer("hand") #kpt_thr (float) – The threshold to visualize the keypoints. Defaults to 0.3
	human3d = MMPoseInferencer(pose3d="human3d")
	track_model = YOLO('yolov8n.pt') # Load an official Detect model

	# ultraltics

	# Defining inferencer models to lookup in function
	inferencers = {"Estimate human 2d poses":human, "Estimate human 2d hand poses":hand, "Estimate human 3d poses":human3d, "Detect and track":track_model}

	print("[INFO]: Downloaded models!")

	def tracking(video, model, boxes=True):
	print("[INFO] Loading model...")
	# Load an official or custom model

	# Perform tracking with the model
	print("[INFO] Starting tracking!")
	# https://docs.ultralytics.com/modes/predict/
	annotated_frame = model(video, boxes=boxes)

	return annotated_frame

	def show_tracking(video_content, vis_out_dir, model):
	video = cv2.VideoCapture(video_content)

	# Track
	video_track = tracking(video_content, model.track)

	# Prepare to save video
	#out_file = os.path.join(vis_out_dir, "track.mp4")
	out_file = "track.mp4"
	print("[INFO]: TRACK", out_file)

	fourcc = cv2.VideoWriter_fourcc(*"mp4v") # Codec for MP4 video
	fps = video.get(cv2.CAP_PROP_FPS)
	height, width, _ = video_track[0][0].orig_img.shape
	size = (width,height)

	out_track = cv2.VideoWriter(out_file, fourcc, fps, size)

	# Go through frames and write them
	for frame_track in video_track:
	result_track = frame_track[0].plot() # plot a BGR numpy array of predictions
	out_track.write(result_track)

	print("[INFO] Done with frames")
	#print(type(result_pose)) numpy ndarray


	out_track.release()

	video.release()
	cv2.destroyAllWindows() # Closing window

	return out_file


	def poses(inferencer, video, vis_out_dir):

	result_generator = inferencer(video,
	vis_out_dir = vis_out_dir,
	return_vis=True,
	thickness=2,
	rebase_keypoint_height=True,
	device="cuda")

	result = [result for result in result_generator] #next(result_generator)

	out_file = glob.glob(os.path.join(vis_out_dir, "*.mp4"))

	return out_file

	def infer(video, check):
	# Selecting the specific inferencer
	out_files=[]

	for i in check:
	# Create out directory
	vis_out_dir = str(uuid.uuid4())
	inferencer = inferencers[i] # 'hand', 'human , device='cuda'

	if i == "Detect and track":
	#continue
	[out_file] = show_tracking(video, vis_out_dir, inferencer)

	else:
	out_file = poses(inferencer, video, vis_out_dir)

	out_files.extend(out_file)
	print(out_files)

	return "track.mp4", out_files[1], out_files[2], out_files[3] # out_files[3]

	def run():
	#https://github.com/open-mmlab/mmpose/blob/main/docs/en/user_guides/inference.md
	check_web = gr.CheckboxGroup(choices = ["Detect and track", "Estimate human 2d poses", "Estimate human 2d hand poses", "Estimate human 3d poses"], label="Methods", type="value", info="Select the model(s) you want")
	check_file = gr.CheckboxGroup(choices = ["Detect and track", "Estimate human 2d poses", "Estimate human 2d hand poses", "Estimate human 3d poses"], label="Methods", type="value", info="Select the model(s) you want")

	# Insert slider with kpt_thr

	webcam = gr.Interface(
	fn=infer,
	inputs= [gr.Video(source="webcam", height=412), check_web],
	outputs = [gr.Video(format='mp4'), gr.PlayableVideo(), gr.PlayableVideo(), gr.PlayableVideo()],
	title = 'Pose estimation',
	description = 'Pose estimation on video',
	allow_flagging=False
	)

	file = gr.Interface(
	infer,
	inputs = [gr.Video(source="upload", height=412), check_file],
	outputs = [gr.Video(format='mp4'), gr.PlayableVideo(), gr.PlayableVideo(), gr.PlayableVideo()],
	allow_flagging=False
	)

	demo = gr.TabbedInterface(
	interface_list=[file, webcam],
	tab_names=["From a File", "From your Webcam"]
	)

	demo.launch(server_name="0.0.0.0", server_port=7860)


	if __name__ == "__main__":
	run()

	# https://github.com/open-mmlab/mmpose/tree/dev-1.x/configs/body_3d_keypoint/pose_lift
	# motionbert_ft_h36m-d80af323_20230531.pth
	# simple3Dbaseline_h36m-f0ad73a4_20210419.pth
	# videopose_h36m_243frames_fullconv_supervised_cpn_ft-88f5abbb_20210527.pth
	# videopose_h36m_81frames_fullconv_supervised-1f2d1104_20210527.pth
	# videopose_h36m_27frames_fullconv_supervised-fe8fbba9_20210527.pth
	# videopose_h36m_1frame_fullconv_supervised_cpn_ft-5c3afaed_20210527.pth
	# https://github.com/open-mmlab/mmpose/blob/main/mmpose/apis/inferencers/pose3d_inferencer.py


	# 00000.mp4
	# 000000.mp4