Spaces:

bipinkrish
/

signbridge

Runtime error

App Files Files Community

signbridge / gloss2pose.py

bipinkrish

Upload 13 files

29d72c3 verified over 1 year ago

raw

history blame contribute delete

8.08 kB

	import os
	import re
	from typing import List, Tuple
	import numpy as np
	import scipy.signal
	from scipy.spatial.distance import cdist
	from pose_format import Pose
	from pose_format.utils.generic import reduce_holistic, correct_wrists, pose_normalization_info
	from pose_format.numpy import NumPyPoseBody
	from num2words import num2words


	# concatenate


	def normalize_pose(pose: Pose) -> Pose:
	return pose.normalize(pose_normalization_info(pose.header))


	def trim_pose(pose: Pose, start=True, end=True):
	if len(pose.body.data) == 0:
	return pose

	wrist_indexes = [
	pose.header._get_point_index('LEFT_HAND_LANDMARKS', 'WRIST'),
	pose.header._get_point_index('RIGHT_HAND_LANDMARKS', 'WRIST')
	]
	either_hand = pose.body.confidence[:, 0, wrist_indexes].sum(axis=1) > 0

	first_non_zero_index = np.argmax(either_hand) if start else 0
	last_non_zero_index = (
	len(either_hand) - np.argmax(either_hand[::-1]) - 1) if end else len(either_hand)

	pose.body.data = pose.body.data[first_non_zero_index:last_non_zero_index]
	pose.body.confidence = pose.body.confidence[first_non_zero_index:last_non_zero_index]
	return pose


	def concatenate_poses(poses: List[Pose]) -> Pose:
	# print('Reducing poses...')
	poses = [reduce_holistic(p) for p in poses]

	# print('Normalizing poses...')
	poses = [normalize_pose(p) for p in poses]

	# Trim the poses to only include the parts where the hands are visible
	# print('Trimming poses...')
	poses = [trim_pose(p, i > 0, i < len(poses) - 1)
	for i, p in enumerate(poses)]

	# Concatenate all poses
	# print('Smooth concatenating poses...')
	pose = smooth_concatenate_poses(poses)

	# Correct the wrists (should be after smoothing)
	# print('Correcting wrists...')
	pose = correct_wrists(pose)

	# Scale the newly created pose
	# print('Scaling pose...')
	new_width = 512
	shift = 1.25
	shift_vec = np.full(
	shape=(pose.body.data.shape[-1]), fill_value=shift, dtype=np.float32)
	pose.body.data = (pose.body.data + shift_vec) * new_width
	pose.header.dimensions.height = pose.header.dimensions.width = int(
	new_width * shift * 2)

	return pose


	# lookup


	class PoseLookup:
	def __init__(self, directory: str, language: str):
	with open(os.path.join(directory, 'words.txt'), mode='r', encoding='utf-8') as f:
	words = f.readlines()

	self.glosses = set(word.replace("\n", "") for word in words)
	self.directory = directory
	self.language = language

	def read_pose(self, pose_path: str):
	pose_path = os.path.join(
	self.directory, self.language, pose_path + ".pose")
	with open(pose_path, "rb") as f:
	return Pose.read(f.read())

	def lookup(self, word: str) -> Pose:
	word = word.lower().strip()
	if word in self.glosses:
	return self.read_pose(word)

	def lookup_sequence(self, glosses: List[str]) -> Tuple[List[Pose], List[str]]:
	poses: List[Pose] = []
	words: List[str] = []

	for gloss in glosses:
	pose = self.lookup(gloss)
	if pose:
	poses.append(pose)
	words.append(gloss)
	else:
	for char in gloss:
	pose = self.lookup(char)
	if pose:
	poses.append(pose)
	words.append(char)

	return poses, words

	def gloss_to_pose(self, glosses: List[str]) -> Tuple[Pose, List[str]]:
	# Transform the list of glosses into a list of poses
	poses, words = self.lookup_sequence(glosses)

	if poses:
	# Concatenate the poses to create a single pose
	return concatenate_poses(poses), words

	return None, None


	# smoothing


	def pose_savgol_filter(pose: Pose):
	# If we want this to be faster, here is a possible solution
	# https://stackoverflow.com/questions/75221888/fast-savgol-filter-on-3d-tensor/75406720#75406720

	# Smoothing the face does not result in a good result, so we skip it
	[face_component] = [c for c in pose.header.components if c.name == 'FACE_LANDMARKS']
	face_range = range(
	pose.header._get_point_index(
	'FACE_LANDMARKS', face_component.points[0]),
	pose.header._get_point_index(
	'FACE_LANDMARKS', face_component.points[-1]),
	)

	_, _, points, dims = pose.body.data.shape
	for p in range(points):
	if p not in face_range:
	for d in range(dims):
	pose.body.data[:, 0, p, d] = scipy.signal.savgol_filter(
	pose.body.data[:, 0, p, d], 3, 1)
	return pose


	def create_padding(time: float, example: Pose) -> NumPyPoseBody:
	fps = example.body.fps
	padding_frames = int(time * fps)
	data_shape = example.body.data.shape
	return NumPyPoseBody(fps=fps,
	data=np.zeros(
	shape=(padding_frames, data_shape[1], data_shape[2], data_shape[3])),
	confidence=np.zeros(shape=(padding_frames, data_shape[1], data_shape[2])))


	def s_concatenate_poses(poses: List[Pose], padding: NumPyPoseBody, interpolation='linear') -> Pose:
	# Add padding to all poses except the last one
	for pose in poses[:-1]:
	pose.body.data = np.concatenate((pose.body.data, padding.data))
	pose.body.confidence = np.concatenate(
	(pose.body.confidence, padding.confidence))

	# Concatenate all tensors
	new_data = np.concatenate([pose.body.data for pose in poses])
	new_conf = np.concatenate([pose.body.confidence for pose in poses])
	new_body = NumPyPoseBody(
	fps=poses[0].body.fps, data=new_data, confidence=new_conf)
	new_body = new_body.interpolate(kind=interpolation)
	return Pose(header=poses[0].header, body=new_body)


	def find_best_connection_point(pose1: Pose, pose2: Pose, window=0.3):
	p1_size = int(len(pose1.body.data) * window)
	p2_size = int(len(pose2.body.data) * window)

	last_data = pose1.body.data[len(pose1.body.data) - p1_size:]
	first_data = pose2.body.data[:p2_size]

	last_vectors = last_data.reshape(len(last_data), -1)
	first_vectors = first_data.reshape(len(first_data), -1)

	distances_matrix = cdist(last_vectors, first_vectors, 'euclidean')
	min_index = np.unravel_index(
	np.argmin(distances_matrix, axis=None), distances_matrix.shape)
	last_index = len(pose1.body.data) - p1_size + min_index[0]
	return last_index, min_index[1]


	def smooth_concatenate_poses(poses: List[Pose], padding=0.20) -> Pose:
	if len(poses) == 1:
	return poses[0]

	start = 0
	for i, pose in enumerate(poses):
	# print('Processing', i + 1, 'of', len(poses), '...')
	if i != len(poses) - 1:
	end, next_start = find_best_connection_point(
	poses[i], poses[i + 1])
	else:
	end = len(pose.body.data)
	next_start = None

	pose.body = pose.body[start:end]
	start = next_start

	padding_pose = create_padding(padding, poses[0])
	# print('Concatenating...')
	single_pose = s_concatenate_poses(poses, padding_pose)
	# print('Smoothing...')
	return pose_savgol_filter(single_pose)


	# utils

	def scale_down(pose: Pose, value: int = 256):
	scale = pose.header.dimensions.width / value
	pose.header.dimensions.width = int(pose.header.dimensions.width / scale)
	pose.header.dimensions.height = int(pose.header.dimensions.height / scale)
	pose.body.data = pose.body.data / scale


	def scale_up(pose: Pose, value: int = 2):
	pose.body.data *= value
	pose.header.dimensions.width *= value
	pose.header.dimensions.height *= value


	def prepare_glosses(sentence: str) -> List[str]:
	glosses: List[str] = re.findall(r'\b[a-zA-Z0-9]+\b', sentence.lower())

	for i, word in enumerate(glosses):
	if word.isdigit():
	number_words = num2words(int(word)).split()
	glosses[i:i+1] = number_words

	return glosses