File size: 1,271 Bytes
df07554
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
gpu = '0'
random_seed = 0
data_type = 'LRS2_CTC2'
video_path = ''
train_list = f'data/{data_type}_train.txt'
val_list = f'data/{data_type}_val.txt'
anno_path = 'GRID_align_txt'
vid_padding = 100
txt_padding = 200
batch_size = 32
base_lr = 8e-5
num_workers = 8
max_epoch = 10000
display = 10
test_step = 1000
save_prefix = f'weights/LipNet_{data_type}'
is_optimize = True

run_name = 'phonemes-lrs2'
lsr2_dir = '/home/milselarch/projects/SUTD/50-035/LRS2'
# lsr2_dir = '/media/milselarch/47FC4BC577667AAD/LRS2'
pre_gru_repeats = 1
frame_doubling = False

video_dir = f'{lsr2_dir}/lrs2_v1/main'
# video_dir = 'lip/GRID'
audio_dir = 'lip/GRID_wavs'
alignments_dir = f'{lsr2_dir}/lrs2_v1/mvlrs_v1/main'
# alignments_dir = 'lip/GRID_aligns'
crop_images_dir = f'{lsr2_dir}/lrs2_v1/mvlrs_v1/main_images'
# crop_images_dir = 'lip/GRID_lips'
images_dir = crop_images_dir
dataset = 'LRS2'

# phonemes_dir = 'lip/GRID_phonemes'
phonemes_dir = f'{lsr2_dir}/lrs2_v1/mvlrs_v1/main_phonemes'
cache_videos = False
use_lip_crops = True
# what character set to have lipnet map to
# options right now are 'letters' and 'phonemes'
# 'lrs2_text' and 'cmu_phonemes
text_char_map = 'lsr2_text'
char_map = 'cmu_phonemes'

# weights = 'weights/phoneme-231201-0052/I198000-L00048-W00018-C00005.pt'