Spaces:

Skywork
/

skyreels-a1-talking-head

Running on L40S

App Files Files Community

skyreels-a1-talking-head / eval /pose_score.py

multimodalart HF Staff

Upload 83 files

38e20ed verified 9 months ago

raw

history blame contribute delete

24.5 kB

	import torch
	from collections import OrderedDict
	import os
	import torch
	import torch.nn as nn
	import cv2
	import numpy
	import numpy as np
	import math
	import time
	from scipy.ndimage.filters import gaussian_filter
	import matplotlib.pyplot as plt
	import matplotlib
	import torch
	from torchvision import transforms


	def transfer(model, model_weights):
	transfered_model_weights = {}
	for weights_name in model.state_dict().keys():
	transfered_model_weights[weights_name] = model_weights['.'.join(weights_name.split('.')[1:])]
	return transfered_model_weights

	def padRightDownCorner(img, stride, padValue):
	h = img.shape[0]
	w = img.shape[1]

	pad = 4 * [None]
	pad[0] = 0 # up
	pad[1] = 0 # left
	pad[2] = 0 if (h % stride == 0) else stride - (h % stride) # down
	pad[3] = 0 if (w % stride == 0) else stride - (w % stride) # right

	img_padded = img
	pad_up = np.tile(img_padded[0:1, :, :]*0 + padValue, (pad[0], 1, 1))
	img_padded = np.concatenate((pad_up, img_padded), axis=0)
	pad_left = np.tile(img_padded[:, 0:1, :]*0 + padValue, (1, pad[1], 1))
	img_padded = np.concatenate((pad_left, img_padded), axis=1)
	pad_down = np.tile(img_padded[-2:-1, :, :]*0 + padValue, (pad[2], 1, 1))
	img_padded = np.concatenate((img_padded, pad_down), axis=0)
	pad_right = np.tile(img_padded[:, -2:-1, :]*0 + padValue, (1, pad[3], 1))
	img_padded = np.concatenate((img_padded, pad_right), axis=1)

	return img_padded, pad

	def make_layers(block, no_relu_layers):
	layers = []
	for layer_name, v in block.items():
	if 'pool' in layer_name:
	layer = nn.MaxPool2d(kernel_size=v[0], stride=v[1],
	padding=v[2])
	layers.append((layer_name, layer))
	else:
	conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1],
	kernel_size=v[2], stride=v[3],
	padding=v[4])
	layers.append((layer_name, conv2d))
	if layer_name not in no_relu_layers:
	layers.append(('relu_'+layer_name, nn.ReLU(inplace=True)))

	return nn.Sequential(OrderedDict(layers))

	class bodypose_model(nn.Module):
	def __init__(self):
	super(bodypose_model, self).__init__()

	# these layers have no relu layer
	no_relu_layers = ['conv5_5_CPM_L1', 'conv5_5_CPM_L2', 'Mconv7_stage2_L1',\
	'Mconv7_stage2_L2', 'Mconv7_stage3_L1', 'Mconv7_stage3_L2',\
	'Mconv7_stage4_L1', 'Mconv7_stage4_L2', 'Mconv7_stage5_L1',\
	'Mconv7_stage5_L2', 'Mconv7_stage6_L1', 'Mconv7_stage6_L1']
	blocks = {}
	block0 = OrderedDict([
	('conv1_1', [3, 64, 3, 1, 1]),
	('conv1_2', [64, 64, 3, 1, 1]),
	('pool1_stage1', [2, 2, 0]),
	('conv2_1', [64, 128, 3, 1, 1]),
	('conv2_2', [128, 128, 3, 1, 1]),
	('pool2_stage1', [2, 2, 0]),
	('conv3_1', [128, 256, 3, 1, 1]),
	('conv3_2', [256, 256, 3, 1, 1]),
	('conv3_3', [256, 256, 3, 1, 1]),
	('conv3_4', [256, 256, 3, 1, 1]),
	('pool3_stage1', [2, 2, 0]),
	('conv4_1', [256, 512, 3, 1, 1]),
	('conv4_2', [512, 512, 3, 1, 1]),
	('conv4_3_CPM', [512, 256, 3, 1, 1]),
	('conv4_4_CPM', [256, 128, 3, 1, 1])
	])


	# Stage 1
	block1_1 = OrderedDict([
	('conv5_1_CPM_L1', [128, 128, 3, 1, 1]),
	('conv5_2_CPM_L1', [128, 128, 3, 1, 1]),
	('conv5_3_CPM_L1', [128, 128, 3, 1, 1]),
	('conv5_4_CPM_L1', [128, 512, 1, 1, 0]),
	('conv5_5_CPM_L1', [512, 38, 1, 1, 0])
	])

	block1_2 = OrderedDict([
	('conv5_1_CPM_L2', [128, 128, 3, 1, 1]),
	('conv5_2_CPM_L2', [128, 128, 3, 1, 1]),
	('conv5_3_CPM_L2', [128, 128, 3, 1, 1]),
	('conv5_4_CPM_L2', [128, 512, 1, 1, 0]),
	('conv5_5_CPM_L2', [512, 19, 1, 1, 0])
	])
	blocks['block1_1'] = block1_1
	blocks['block1_2'] = block1_2

	self.model0 = make_layers(block0, no_relu_layers)

	# Stages 2 - 6
	for i in range(2, 7):
	blocks['block%d_1' % i] = OrderedDict([
	('Mconv1_stage%d_L1' % i, [185, 128, 7, 1, 3]),
	('Mconv2_stage%d_L1' % i, [128, 128, 7, 1, 3]),
	('Mconv3_stage%d_L1' % i, [128, 128, 7, 1, 3]),
	('Mconv4_stage%d_L1' % i, [128, 128, 7, 1, 3]),
	('Mconv5_stage%d_L1' % i, [128, 128, 7, 1, 3]),
	('Mconv6_stage%d_L1' % i, [128, 128, 1, 1, 0]),
	('Mconv7_stage%d_L1' % i, [128, 38, 1, 1, 0])
	])

	blocks['block%d_2' % i] = OrderedDict([
	('Mconv1_stage%d_L2' % i, [185, 128, 7, 1, 3]),
	('Mconv2_stage%d_L2' % i, [128, 128, 7, 1, 3]),
	('Mconv3_stage%d_L2' % i, [128, 128, 7, 1, 3]),
	('Mconv4_stage%d_L2' % i, [128, 128, 7, 1, 3]),
	('Mconv5_stage%d_L2' % i, [128, 128, 7, 1, 3]),
	('Mconv6_stage%d_L2' % i, [128, 128, 1, 1, 0]),
	('Mconv7_stage%d_L2' % i, [128, 19, 1, 1, 0])
	])

	for k in blocks.keys():
	blocks[k] = make_layers(blocks[k], no_relu_layers)

	self.model1_1 = blocks['block1_1']
	self.model2_1 = blocks['block2_1']
	self.model3_1 = blocks['block3_1']
	self.model4_1 = blocks['block4_1']
	self.model5_1 = blocks['block5_1']
	self.model6_1 = blocks['block6_1']

	self.model1_2 = blocks['block1_2']
	self.model2_2 = blocks['block2_2']
	self.model3_2 = blocks['block3_2']
	self.model4_2 = blocks['block4_2']
	self.model5_2 = blocks['block5_2']
	self.model6_2 = blocks['block6_2']


	def forward(self, x):

	out1 = self.model0(x)

	out1_1 = self.model1_1(out1)
	out1_2 = self.model1_2(out1)
	out2 = torch.cat([out1_1, out1_2, out1], 1)

	out2_1 = self.model2_1(out2)
	out2_2 = self.model2_2(out2)
	out3 = torch.cat([out2_1, out2_2, out1], 1)

	out3_1 = self.model3_1(out3)
	out3_2 = self.model3_2(out3)
	out4 = torch.cat([out3_1, out3_2, out1], 1)

	out4_1 = self.model4_1(out4)
	out4_2 = self.model4_2(out4)
	out5 = torch.cat([out4_1, out4_2, out1], 1)

	out5_1 = self.model5_1(out5)
	out5_2 = self.model5_2(out5)
	out6 = torch.cat([out5_1, out5_2, out1], 1)

	out6_1 = self.model6_1(out6)
	out6_2 = self.model6_2(out6)

	return out6_1, out6_2

	class handpose_model(nn.Module):
	def __init__(self):
	super(handpose_model, self).__init__()

	# these layers have no relu layer
	no_relu_layers = ['conv6_2_CPM', 'Mconv7_stage2', 'Mconv7_stage3',\
	'Mconv7_stage4', 'Mconv7_stage5', 'Mconv7_stage6']
	# stage 1
	block1_0 = OrderedDict([
	('conv1_1', [3, 64, 3, 1, 1]),
	('conv1_2', [64, 64, 3, 1, 1]),
	('pool1_stage1', [2, 2, 0]),
	('conv2_1', [64, 128, 3, 1, 1]),
	('conv2_2', [128, 128, 3, 1, 1]),
	('pool2_stage1', [2, 2, 0]),
	('conv3_1', [128, 256, 3, 1, 1]),
	('conv3_2', [256, 256, 3, 1, 1]),
	('conv3_3', [256, 256, 3, 1, 1]),
	('conv3_4', [256, 256, 3, 1, 1]),
	('pool3_stage1', [2, 2, 0]),
	('conv4_1', [256, 512, 3, 1, 1]),
	('conv4_2', [512, 512, 3, 1, 1]),
	('conv4_3', [512, 512, 3, 1, 1]),
	('conv4_4', [512, 512, 3, 1, 1]),
	('conv5_1', [512, 512, 3, 1, 1]),
	('conv5_2', [512, 512, 3, 1, 1]),
	('conv5_3_CPM', [512, 128, 3, 1, 1])
	])

	block1_1 = OrderedDict([
	('conv6_1_CPM', [128, 512, 1, 1, 0]),
	('conv6_2_CPM', [512, 22, 1, 1, 0])
	])

	blocks = {}
	blocks['block1_0'] = block1_0
	blocks['block1_1'] = block1_1

	# stage 2-6
	for i in range(2, 7):
	blocks['block%d' % i] = OrderedDict([
	('Mconv1_stage%d' % i, [150, 128, 7, 1, 3]),
	('Mconv2_stage%d' % i, [128, 128, 7, 1, 3]),
	('Mconv3_stage%d' % i, [128, 128, 7, 1, 3]),
	('Mconv4_stage%d' % i, [128, 128, 7, 1, 3]),
	('Mconv5_stage%d' % i, [128, 128, 7, 1, 3]),
	('Mconv6_stage%d' % i, [128, 128, 1, 1, 0]),
	('Mconv7_stage%d' % i, [128, 22, 1, 1, 0])
	])

	for k in blocks.keys():
	blocks[k] = make_layers(blocks[k], no_relu_layers)

	self.model1_0 = blocks['block1_0']
	self.model1_1 = blocks['block1_1']
	self.model2 = blocks['block2']
	self.model3 = blocks['block3']
	self.model4 = blocks['block4']
	self.model5 = blocks['block5']
	self.model6 = blocks['block6']

	def forward(self, x):
	out1_0 = self.model1_0(x)
	out1_1 = self.model1_1(out1_0)
	concat_stage2 = torch.cat([out1_1, out1_0], 1)
	out_stage2 = self.model2(concat_stage2)
	concat_stage3 = torch.cat([out_stage2, out1_0], 1)
	out_stage3 = self.model3(concat_stage3)
	concat_stage4 = torch.cat([out_stage3, out1_0], 1)
	out_stage4 = self.model4(concat_stage4)
	concat_stage5 = torch.cat([out_stage4, out1_0], 1)
	out_stage5 = self.model5(concat_stage5)
	concat_stage6 = torch.cat([out_stage5, out1_0], 1)
	out_stage6 = self.model6(concat_stage6)
	return out_stage6

	class Body(object):
	def __init__(self, model_path):
	self.model = bodypose_model()
	if torch.cuda.is_available():
	self.model = self.model.cuda()
	print('cuda')
	model_dict = transfer(self.model, torch.load(model_path))
	self.model.load_state_dict(model_dict)
	self.model.eval()

	def __call__(self, oriImg):
	# scale_search = [0.5, 1.0, 1.5, 2.0]
	scale_search = [0.5]
	boxsize = 368
	stride = 8
	padValue = 128
	thre1 = 0.1
	thre2 = 0.05
	multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search]
	heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 19))
	paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38))

	for m in range(len(multiplier)):
	scale = multiplier[m]
	imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
	imageToTest_padded, pad = padRightDownCorner(imageToTest, stride, padValue)
	im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
	im = np.ascontiguousarray(im)

	data = torch.from_numpy(im).float()
	if torch.cuda.is_available():
	data = data.cuda()
	# data = data.permute([2, 0, 1]).unsqueeze(0).float()
	with torch.no_grad():
	Mconv7_stage6_L1, Mconv7_stage6_L2 = self.model(data)
	Mconv7_stage6_L1 = Mconv7_stage6_L1.cpu().numpy()
	Mconv7_stage6_L2 = Mconv7_stage6_L2.cpu().numpy()

	# extract outputs, resize, and remove padding
	# heatmap = np.transpose(np.squeeze(net.blobs[output_blobs.keys()[1]].data), (1, 2, 0)) # output 1 is heatmaps
	heatmap = np.transpose(np.squeeze(Mconv7_stage6_L2), (1, 2, 0)) # output 1 is heatmaps
	heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
	heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
	heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)

	# paf = np.transpose(np.squeeze(net.blobs[output_blobs.keys()[0]].data), (1, 2, 0)) # output 0 is PAFs
	paf = np.transpose(np.squeeze(Mconv7_stage6_L1), (1, 2, 0)) # output 0 is PAFs
	paf = cv2.resize(paf, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
	paf = paf[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
	paf = cv2.resize(paf, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)

	heatmap_avg += heatmap_avg + heatmap / len(multiplier)
	paf_avg += + paf / len(multiplier)

	all_peaks = []
	peak_counter = 0

	for part in range(18):
	map_ori = heatmap_avg[:, :, part]
	one_heatmap = gaussian_filter(map_ori, sigma=3)

	map_left = np.zeros(one_heatmap.shape)
	map_left[1:, :] = one_heatmap[:-1, :]
	map_right = np.zeros(one_heatmap.shape)
	map_right[:-1, :] = one_heatmap[1:, :]
	map_up = np.zeros(one_heatmap.shape)
	map_up[:, 1:] = one_heatmap[:, :-1]
	map_down = np.zeros(one_heatmap.shape)
	map_down[:, :-1] = one_heatmap[:, 1:]

	peaks_binary = np.logical_and.reduce(
	(one_heatmap >= map_left, one_heatmap >= map_right, one_heatmap >= map_up, one_heatmap >= map_down, one_heatmap > thre1))
	peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])) # note reverse
	peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks]
	peak_id = range(peak_counter, peak_counter + len(peaks))
	peaks_with_score_and_id = [peaks_with_score[i] + (peak_id[i],) for i in range(len(peak_id))]

	all_peaks.append(peaks_with_score_and_id)
	peak_counter += len(peaks)

	# find connection in the specified sequence, center 29 is in the position 15
	limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \
	[10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \
	[1, 16], [16, 18], [3, 17], [6, 18]]
	# the middle joints heatmap correpondence
	mapIdx = [[31, 32], [39, 40], [33, 34], [35, 36], [41, 42], [43, 44], [19, 20], [21, 22], \
	[23, 24], [25, 26], [27, 28], [29, 30], [47, 48], [49, 50], [53, 54], [51, 52], \
	[55, 56], [37, 38], [45, 46]]

	connection_all = []
	special_k = []
	mid_num = 10

	for k in range(len(mapIdx)):
	score_mid = paf_avg[:, :, [x - 19 for x in mapIdx[k]]]
	candA = all_peaks[limbSeq[k][0] - 1]
	candB = all_peaks[limbSeq[k][1] - 1]
	nA = len(candA)
	nB = len(candB)
	indexA, indexB = limbSeq[k]
	if (nA != 0 and nB != 0):
	connection_candidate = []
	for i in range(nA):
	for j in range(nB):
	vec = np.subtract(candB[j][:2], candA[i][:2])
	norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1])
	norm = max(0.001, norm)
	vec = np.divide(vec, norm)

	startend = list(zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \
	np.linspace(candA[i][1], candB[j][1], num=mid_num)))

	vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \
	for I in range(len(startend))])
	vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \
	for I in range(len(startend))])

	score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1])
	score_with_dist_prior = sum(score_midpts) / len(score_midpts) + min(
	0.5 * oriImg.shape[0] / norm - 1, 0)
	criterion1 = len(np.nonzero(score_midpts > thre2)[0]) > 0.8 * len(score_midpts)
	criterion2 = score_with_dist_prior > 0
	if criterion1 and criterion2:
	connection_candidate.append(
	[i, j, score_with_dist_prior, score_with_dist_prior + candA[i][2] + candB[j][2]])

	connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True)
	connection = np.zeros((0, 5))
	for c in range(len(connection_candidate)):
	i, j, s = connection_candidate[c][0:3]
	if (i not in connection[:, 3] and j not in connection[:, 4]):
	connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]])
	if (len(connection) >= min(nA, nB)):
	break

	connection_all.append(connection)
	else:
	special_k.append(k)
	connection_all.append([])

	# last number in each row is the total parts number of that person
	# the second last number in each row is the score of the overall configuration
	subset = -1 * np.ones((0, 20))
	candidate = np.array([item for sublist in all_peaks for item in sublist])

	for k in range(len(mapIdx)):
	if k not in special_k:
	partAs = connection_all[k][:, 0]
	partBs = connection_all[k][:, 1]
	indexA, indexB = np.array(limbSeq[k]) - 1

	for i in range(len(connection_all[k])): # = 1:size(temp,1)
	found = 0
	subset_idx = [-1, -1]
	for j in range(len(subset)): # 1:size(subset,1):
	if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]:
	subset_idx[found] = j
	found += 1

	if found == 1:
	j = subset_idx[0]
	if subset[j][indexB] != partBs[i]:
	subset[j][indexB] = partBs[i]
	subset[j][-1] += 1
	subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
	elif found == 2: # if found 2 and disjoint, merge them
	j1, j2 = subset_idx
	membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2]
	if len(np.nonzero(membership == 2)[0]) == 0: # merge
	subset[j1][:-2] += (subset[j2][:-2] + 1)
	subset[j1][-2:] += subset[j2][-2:]
	subset[j1][-2] += connection_all[k][i][2]
	subset = np.delete(subset, j2, 0)
	else: # as like found == 1
	subset[j1][indexB] = partBs[i]
	subset[j1][-1] += 1
	subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]

	# if find no partA in the subset, create a new subset
	elif not found and k < 17:
	row = -1 * np.ones(20)
	row[indexA] = partAs[i]
	row[indexB] = partBs[i]
	row[-1] = 2
	row[-2] = sum(candidate[connection_all[k][i, :2].astype(int), 2]) + connection_all[k][i][2]
	subset = np.vstack([subset, row])
	# delete some rows of subset which has few parts occur
	deleteIdx = []
	for i in range(len(subset)):
	if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4:
	deleteIdx.append(i)
	subset = np.delete(subset, deleteIdx, axis=0)

	# subset: n*20 array, 0-17 is the index in candidate, 18 is the total score, 19 is the total parts
	# candidate: x, y, score, id
	return candidate, subset



	def sample_video_frames(video_path,):
	cap = cv2.VideoCapture(video_path)
	total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
	frame_indices = np.linspace(0, total_frames - 1, total_frames, dtype=int)

	frames = []
	for idx in frame_indices:
	cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
	ret, frame = cap.read()
	if ret:
	if frame.shape[1] > 1024:
	frame = frame[:, 1440:, :]
	frame = cv2.resize(frame, (720, 480))
	frames.append(frame)
	cap.release()
	return frames


	def process_image(pose_model, image_path):
	if isinstance(image_path, str):
	np_faceid_image = np.array(Image.open(image_path).convert("RGB"))
	elif isinstance(image_path, numpy.ndarray):
	np_faceid_image = image_path
	else:
	raise TypeError("image_path should be a string or PIL.Image.Image object")

	image_bgr = cv2.cvtColor(np_faceid_image, cv2.COLOR_RGB2BGR)
	candidate, subset = pose_model(image_bgr)

	pose_list = []
	for c in candidate:
	pose_list.append([c[0], c[1]])
	return pose_list


	def process_video(video_path, pose_model):
	video_frames = sample_video_frames(video_path,)
	print(len(video_frames))
	pose_list = []
	for frame in video_frames:
	# Convert to RGB once at the beginning
	frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	pose = process_image(pose_model, frame_rgb)
	pose_list.append(pose)
	# break
	return pose_list


	def calculate_l1_distance(list1, list2):
	"""
	计算两个列表的 L1 距离
	:return: L1 距离
	"""
	# 将列表转换为 NumPy 数组
	list1 = np.array(list1)
	list2 = np.array(list2)

	min_d = min(list1.shape[0], list2.shape[0])
	list1 = list1[:min_d, :]
	list2 = list2[:min_d, :]
	# 计算每对点的 L1 距离
	l1_distances = np.abs(list1 - list2).sum(axis=1)

	# 返回所有点的 L1 距离之和
	return l1_distances.sum()


	def calculate_pose(list1, list2):
	distance_list = []
	for kps1 in list1:
	min_dis = (480 + 720) * 17 + 1
	for kps2 in list2:
	try:
	min_dis = min(min_dis, calculate_l1_distance(kps1, kps2))
	except:
	continue
	min_dis = min_dis/(480+720)/16
	if min_dis > 1:
	continue
	distance_list.append(min_dis)

	if len(distance_list) > 0:
	return sum(distance_list)/len(distance_list)
	else:
	return 0.

	def main():
	body_estimation = Body('eval/pose/body_pose_model.pth')

	device = "cuda"
	data_path = "data/SkyActor"
	# data_path = "data/LivePotraits"
	# data_path = "data/Actor-One"
	# data_path = "data/FollowYourEmoji"
	img_path = "/maindata/data/shared/public/rui.wang/act_review/driving_video"
	pre_tag = True
	mp4_list = os.listdir(data_path)
	print(mp4_list)

	img_list = []
	video_list = []
	for mp4 in mp4_list:
	if "mp4" not in mp4:
	continue
	if pre_tag:
	png_path = mp4.split('.')[0].split('-')[1] + ".mp4"
	else:
	if "-" in mp4:
	png_path = mp4.split('.')[0].split('-')[0] + ".mp4"
	else:
	png_path = mp4.split('.')[0].split('_')[0] + ".mp4"
	img_list.append(os.path.join(img_path, png_path))
	video_list.append(os.path.join(data_path, mp4))
	print(img_list)
	print(video_list[0])

	pd_list = []
	for i in range(len(img_list)):
	print("number: ", str(i), " total: ", len(img_list), data_path)

	pose_1 = process_video(video_list[i], body_estimation)
	pose_2 = process_video(img_list[i], body_estimation)

	dis = calculate_pose(pose_1, pose_2)
	print(dis)
	if dis > 0.0001:
	pd_list.append(dis)

	print("pose", sum(pd_list)/ len(pd_list))


	main()