Spaces:

DeclK
/

pose

Running

App Files Files Community

pose / app.py

DeclK

fix concat bug

71b2f33 about 2 years ago

raw

history blame contribute delete

3.64 kB

	# Inference 2 videos and use dtw to match the pose keypoints.
	from tools.inferencer import PoseInferencerV2
	from tools.dtw import DTWForKeypoints
	from tools.visualizer import FastVisualizer
	from tools.utils import convert_video_to_playable_mp4
	from pathlib import Path
	from tqdm import tqdm
	import mmengine
	import numpy as np
	import mmcv
	import cv2
	import gradio as gr

	def concat(img1, img2, height=1080):
	h1, w1, _ = img1.shape
	h2, w2, _ = img2.shape

	# Calculate the scaling factor for each image
	scale1 = height / img1.shape[0]
	scale2 = height / img2.shape[0]

	# Resize the images
	img1 = cv2.resize(img1, (int(w1scale1), int(h1scale1)))
	img2 = cv2.resize(img2, (int(w2scale2), int(h2scale2)))

	# Concatenate the images horizontally
	image = cv2.hconcat([img1, img2])
	return image

	def draw(vis: FastVisualizer, img, keypoint, box, oks, oks_unnorm,
	draw_human_keypoints=True,
	draw_score_bar=True):
	vis.set_image(img)
	vis.draw_non_transparent_area(box)
	if draw_score_bar:
	vis.draw_score_bar(oks)
	if draw_human_keypoints:
	vis.draw_human_keypoints(keypoint, oks_unnorm)
	return vis.get_image()

	def main(video1, video2, draw_human_keypoints,
	progress=gr.Progress(track_tqdm=True)):
	# build PoseInferencerV2
	config = 'configs/mark2.py'
	cfg = mmengine.Config.fromfile(config)
	pose_inferencer = PoseInferencerV2(
	cfg.det_cfg,
	cfg.pose_cfg,
	device='cpu')

	v1 = mmcv.VideoReader(video1)
	v2 = mmcv.VideoReader(video2)
	video_writer = None

	all_det1, all_pose1 = pose_inferencer.inference_video(video1)
	all_det2, all_pose2 = pose_inferencer.inference_video(video2)

	keypoints1 = np.stack([p.keypoints[0] for p in all_pose1]) # forced the first pred
	keypoints2 = np.stack([p.keypoints[0] for p in all_pose2])
	boxes1 = np.stack([d.bboxes[0] for d in all_det1])
	boxes2 = np.stack([d.bboxes[0] for d in all_det2])

	dtw_path, oks, oks_unnorm = DTWForKeypoints(keypoints1, keypoints2).get_dtw_path()

	vis = FastVisualizer()

	for i, j in tqdm(dtw_path, desc='Visualizing'):
	frame1 = v1[i]
	frame2 = v2[j]

	frame1_ = draw(vis, frame1.copy(), keypoints1[i], boxes1[i],
	oks[i, j], oks_unnorm[i, j], draw_human_keypoints)
	frame2_ = draw(vis, frame2.copy(), keypoints2[j], boxes2[j],
	oks[i, j], oks_unnorm[i, j], draw_human_keypoints, draw_score_bar=False)
	# concate two frames
	frame = concat(frame1_, frame2_)
	# draw logo
	vis.set_image(frame)
	frame = vis.draw_logo().get_image()
	# write video
	w, h = frame.shape[1], frame.shape[0]
	if video_writer is None:
	fourcc = cv2.VideoWriter_fourcc(*'mp4v')
	video_writer = cv2.VideoWriter('dtw_compare.mp4',
	fourcc, v1.fps, (w, h))
	video_writer.write(frame)
	video_writer.release()
	# output video file
	convert_video_to_playable_mp4('dtw_compare.mp4')
	output = str(Path('dtw_compare.mp4').resolve())
	return output

	if __name__ == '__main__':
	config = 'configs/mark2.py'
	cfg = mmengine.Config.fromfile(config)

	inputs = [
	gr.Video(label="Input video 1"),
	gr.Video(label="Input video 2"),
	"checkbox"
	]

	output = gr.Video(label="Output video")

	demo = gr.Interface(fn=main, inputs=inputs, outputs=output,
	allow_flagging='never').queue()
	demo.launch()