Spaces:
Build error
Build error
import os | |
import pickle | |
from tqdm import tqdm | |
import shutil | |
import torch | |
import numpy as np | |
import librosa | |
import random | |
speakers = ['seth', 'conan', 'oliver', 'chemistry'] | |
data_root = "../ExpressiveWholeBodyDatasetv1.0/" | |
split = 'train' | |
def split_list(full_list,shuffle=False,ratio=0.2): | |
n_total = len(full_list) | |
offset_0 = int(n_total * ratio) | |
offset_1 = int(n_total * ratio * 2) | |
if n_total==0 or offset_1<1: | |
return [],full_list | |
if shuffle: | |
random.shuffle(full_list) | |
sublist_0 = full_list[:offset_0] | |
sublist_1 = full_list[offset_0:offset_1] | |
sublist_2 = full_list[offset_1:] | |
return sublist_0, sublist_1, sublist_2 | |
def moveto(list, file): | |
for f in list: | |
before, after = '/'.join(f.split('/')[:-1]), f.split('/')[-1] | |
new_path = os.path.join(before, file) | |
new_path = os.path.join(new_path, after) | |
# os.makedirs(new_path) | |
# os.path.isdir(new_path) | |
# shutil.move(f, new_path) | |
#转移到新目录 | |
shutil.copytree(f, new_path) | |
#删除原train里的文件 | |
shutil.rmtree(f) | |
return None | |
def read_pkl(data): | |
betas = np.array(data['betas']) | |
jaw_pose = np.array(data['jaw_pose']) | |
leye_pose = np.array(data['leye_pose']) | |
reye_pose = np.array(data['reye_pose']) | |
global_orient = np.array(data['global_orient']).squeeze() | |
body_pose = np.array(data['body_pose_axis']) | |
left_hand_pose = np.array(data['left_hand_pose']) | |
right_hand_pose = np.array(data['right_hand_pose']) | |
full_body = np.concatenate( | |
(jaw_pose, leye_pose, reye_pose, global_orient, body_pose, left_hand_pose, right_hand_pose), axis=1) | |
expression = np.array(data['expression']) | |
full_body = np.concatenate((full_body, expression), axis=1) | |
if (full_body.shape[0] < 90) or (torch.isnan(torch.from_numpy(full_body)).sum() > 0): | |
return 1 | |
else: | |
return 0 | |
for speaker_name in speakers: | |
speaker_root = os.path.join(data_root, speaker_name) | |
videos = [v for v in os.listdir(speaker_root)] | |
print(videos) | |
haode = huaide = 0 | |
total_seqs = [] | |
for vid in tqdm(videos, desc="Processing training data of {}......".format(speaker_name)): | |
# for vid in videos: | |
source_vid = vid | |
vid_pth = os.path.join(speaker_root, source_vid) | |
# vid_pth = os.path.join(speaker_root, source_vid, 'images/half', split) | |
t = os.path.join(speaker_root, source_vid, 'test') | |
v = os.path.join(speaker_root, source_vid, 'val') | |
# if os.path.exists(t): | |
# shutil.rmtree(t) | |
# if os.path.exists(v): | |
# shutil.rmtree(v) | |
try: | |
seqs = [s for s in os.listdir(vid_pth)] | |
except: | |
continue | |
# if len(seqs) == 0: | |
# shutil.rmtree(os.path.join(speaker_root, source_vid)) | |
# None | |
for s in seqs: | |
quality = 0 | |
total_seqs.append(os.path.join(vid_pth,s)) | |
seq_root = os.path.join(vid_pth, s) | |
key = seq_root # correspond to clip****** | |
audio_fname = os.path.join(speaker_root, source_vid, s, '%s.wav' % (s)) | |
# delete the data without audio or the audio file could not be read | |
if os.path.isfile(audio_fname): | |
try: | |
audio = librosa.load(audio_fname) | |
except: | |
# print(key) | |
shutil.rmtree(key) | |
huaide = huaide + 1 | |
continue | |
else: | |
huaide = huaide + 1 | |
# print(key) | |
shutil.rmtree(key) | |
continue | |
# check motion file | |
motion_fname = os.path.join(speaker_root, source_vid, s, '%s.pkl' % (s)) | |
try: | |
f = open(motion_fname, 'rb+') | |
except: | |
shutil.rmtree(key) | |
huaide = huaide + 1 | |
continue | |
data = pickle.load(f) | |
w = read_pkl(data) | |
f.close() | |
quality = quality + w | |
if w == 1: | |
shutil.rmtree(key) | |
# print(key) | |
huaide = huaide + 1 | |
continue | |
haode = haode + 1 | |
print("huaide:{}, haode:{}, total_seqs:{}".format(huaide, haode, total_seqs.__len__())) | |
for speaker_name in speakers: | |
speaker_root = os.path.join(data_root, speaker_name) | |
videos = [v for v in os.listdir(speaker_root)] | |
print(videos) | |
haode = huaide = 0 | |
total_seqs = [] | |
for vid in tqdm(videos, desc="Processing training data of {}......".format(speaker_name)): | |
# for vid in videos: | |
source_vid = vid | |
vid_pth = os.path.join(speaker_root, source_vid) | |
try: | |
seqs = [s for s in os.listdir(vid_pth)] | |
except: | |
continue | |
for s in seqs: | |
quality = 0 | |
total_seqs.append(os.path.join(vid_pth, s)) | |
print("total_seqs:{}".format(total_seqs.__len__())) | |
# split the dataset | |
test_list, val_list, train_list = split_list(total_seqs, True, 0.1) | |
print(len(test_list), len(val_list), len(train_list)) | |
moveto(train_list, 'train') | |
moveto(test_list, 'test') | |
moveto(val_list, 'val') | |