Spaces:

CVPR
/

SPOTER_Sign_Language_Recognition

Build error

SPOTER_Sign_Language_Recognition / spoter_mod /datasets /czech_slr_dataset.py

Matyáš Boháček

Init commit

a001524 about 3 years ago

5.52 kB

	import ast
	import torch

	import pandas as pd
	import torch.utils.data as torch_data

	from random import randrange
	from augmentations import *
	from normalization.body_normalization import BODY_IDENTIFIERS
	from normalization.hand_normalization import HAND_IDENTIFIERS
	from normalization.body_normalization import normalize_single_dict as normalize_single_body_dict
	from normalization.hand_normalization import normalize_single_dict as normalize_single_hand_dict

	HAND_IDENTIFIERS = [id + "_0" for id in HAND_IDENTIFIERS] + [id + "_1" for id in HAND_IDENTIFIERS]

	DEFAULT_AUGMENTATIONS_CONFIG = {
	"rotate-angle": 13,
	"perspective-transform-ratio": 0.1,
	"squeeze-ratio": 0.15,
	"arm-joint-rotate-angle": 4,
	"arm-joint-rotate-probability": 0.3
	}


	def load_dataset(file_location: str):

	# Load the datset csv file
	df = pd.read_csv(file_location, encoding="utf-8")

	# TO BE DELETED
	df.columns = [item.replace("_Left_", "_0_").replace("_Right_", "_1_") for item in list(df.columns)]
	if "neck_X" not in df.columns:
	df["neck_X"] = [0 for _ in range(df.shape[0])]
	df["neck_Y"] = [0 for _ in range(df.shape[0])]

	# TEMP
	labels = df["labels"].to_list()
	labels = [label + 1 for label in df["labels"].to_list()]
	data = []

	for row_index, row in df.iterrows():
	current_row = np.empty(shape=(len(ast.literal_eval(row["leftEar_X"])), len(BODY_IDENTIFIERS + HAND_IDENTIFIERS), 2))
	for index, identifier in enumerate(BODY_IDENTIFIERS + HAND_IDENTIFIERS):
	current_row[:, index, 0] = ast.literal_eval(row[identifier + "_X"])
	current_row[:, index, 1] = ast.literal_eval(row[identifier + "_Y"])

	data.append(current_row)

	return data, labels


	def tensor_to_dictionary(landmarks_tensor: torch.Tensor) -> dict:

	data_array = landmarks_tensor.numpy()
	output = {}

	for landmark_index, identifier in enumerate(BODY_IDENTIFIERS + HAND_IDENTIFIERS):
	output[identifier] = data_array[:, landmark_index]

	return output


	def dictionary_to_tensor(landmarks_dict: dict) -> torch.Tensor:

	output = np.empty(shape=(len(landmarks_dict["leftEar"]), len(BODY_IDENTIFIERS + HAND_IDENTIFIERS), 2))

	for landmark_index, identifier in enumerate(BODY_IDENTIFIERS + HAND_IDENTIFIERS):
	output[:, landmark_index, 0] = [frame[0] for frame in landmarks_dict[identifier]]
	output[:, landmark_index, 1] = [frame[1] for frame in landmarks_dict[identifier]]

	return torch.from_numpy(output)


	class CzechSLRDataset(torch_data.Dataset):
	"""Advanced object representation of the HPOES dataset for loading hand joints landmarks utilizing the Torch's
	built-in Dataset properties"""

	data: [np.ndarray]
	labels: [np.ndarray]

	def __init__(self, dataset_filename: str, num_labels=5, transform=None, augmentations=False,
	augmentations_prob=0.5, normalize=True, augmentations_config: dict = DEFAULT_AUGMENTATIONS_CONFIG):
	"""
	Initiates the HPOESDataset with the pre-loaded data from the h5 file.

	:param dataset_filename: Path to the h5 file
	:param transform: Any data transformation to be applied (default: None)
	"""

	loaded_data = load_dataset(dataset_filename)
	data, labels = loaded_data[0], loaded_data[1]

	self.data = data
	self.labels = labels
	self.targets = list(labels)
	self.num_labels = num_labels
	self.transform = transform

	self.augmentations = augmentations
	self.augmentations_prob = augmentations_prob
	self.augmentations_config = augmentations_config
	self.normalize = normalize

	def __getitem__(self, idx):
	"""
	Allocates, potentially transforms and returns the item at the desired index.

	:param idx: Index of the item
	:return: Tuple containing both the depth map and the label
	"""

	depth_map = torch.from_numpy(np.copy(self.data[idx]))
	label = torch.Tensor([self.labels[idx] - 1])

	depth_map = tensor_to_dictionary(depth_map)

	# Apply potential augmentations
	if self.augmentations and random.random() < self.augmentations_prob:

	selected_aug = randrange(4)

	if selected_aug == 0:
	depth_map = augment_rotate(depth_map, (-self.augmentations_config["rotate-angle"], self.augmentations_config["rotate-angle"]))

	if selected_aug == 1:
	depth_map = augment_shear(depth_map, "perspective", (0, self.augmentations_config["perspective-transform-ratio"]))

	if selected_aug == 2:
	depth_map = augment_shear(depth_map, "squeeze", (0, self.augmentations_config["squeeze-ratio"]))

	if selected_aug == 3:
	depth_map = augment_arm_joint_rotate(depth_map, self.augmentations_config["arm-joint-rotate-probability"], (-self.augmentations_config["arm-joint-rotate-angle"], self.augmentations_config["arm-joint-rotate-angle"]))

	if self.normalize:
	depth_map = normalize_single_body_dict(depth_map)
	depth_map = normalize_single_hand_dict(depth_map)

	depth_map = dictionary_to_tensor(depth_map)

	# Move the landmark position interval to improve performance
	depth_map = depth_map - 0.5

	if self.transform:
	depth_map = self.transform(depth_map)

	return depth_map, label

	def __len__(self):
	return len(self.labels)


	if __name__ == "__main__":
	pass