gpu = '0' random_seed = 0 data_type = 'LRS2_CTC2' video_path = '' train_list = f'data/{data_type}_train.txt' val_list = f'data/{data_type}_val.txt' anno_path = 'GRID_align_txt' vid_padding = 100 txt_padding = 200 batch_size = 32 base_lr = 8e-5 num_workers = 8 max_epoch = 10000 display = 10 test_step = 1000 save_prefix = f'weights/LipNet_{data_type}' is_optimize = True run_name = 'phonemes-lrs2' lsr2_dir = '/home/milselarch/projects/SUTD/50-035/LRS2' # lsr2_dir = '/media/milselarch/47FC4BC577667AAD/LRS2' pre_gru_repeats = 1 frame_doubling = False video_dir = f'{lsr2_dir}/lrs2_v1/main' # video_dir = 'lip/GRID' audio_dir = 'lip/GRID_wavs' alignments_dir = f'{lsr2_dir}/lrs2_v1/mvlrs_v1/main' # alignments_dir = 'lip/GRID_aligns' crop_images_dir = f'{lsr2_dir}/lrs2_v1/mvlrs_v1/main_images' # crop_images_dir = 'lip/GRID_lips' images_dir = crop_images_dir dataset = 'LRS2' # phonemes_dir = 'lip/GRID_phonemes' phonemes_dir = f'{lsr2_dir}/lrs2_v1/mvlrs_v1/main_phonemes' cache_videos = False use_lip_crops = True # what character set to have lipnet map to # options right now are 'letters' and 'phonemes' # 'lrs2_text' and 'cmu_phonemes text_char_map = 'lsr2_text' char_map = 'cmu_phonemes' # weights = 'weights/phoneme-231201-0052/I198000-L00048-W00018-C00005.pt'