data: | |
cache_dir: data/external/huggingface | |
input_streams: | |
- rgb | |
level: word | |
max_sent_length: 400 | |
name: wlasl300 | |
num_proc: 2 | |
subset: rgb_videos | |
transform_cfg: | |
cent_crop_size: 224 | |
color_jitter_threshold: 0.2 | |
rand_crop_aspect_ratio_max: 1.3 | |
rand_crop_aspect_ratio_min: 0.75 | |
rand_crop_bottom_area: 0.7 | |
rand_crop_size: 224 | |
rand_crop_threshold: 0.7 | |
scale_size: 224 | |
temporal_augmentation: | |
tmax: 1.5 | |
tmin: 0.5 | |
txt_lowercase: true | |
model: | |
RecognitionNetwork: | |
GlossTokenizer: | |
gloss2id_file: pretrained/mBart_en/gloss2ids.pkl | |
s3d: | |
freeze_block: 1 | |
pretrained_ckpt: pretrained/s3ds_actioncls | |
use_block: 4 | |
visual_head: | |
ff_kernelsize: | |
- 3 | |
- 3 | |
ff_size: 2048 | |
hidden_size: 512 | |
input_size: 832 | |
pe: true | |
task: S2G | |
testing: | |
cfg: | |
recognition: | |
beam_size: 5 | |
training: | |
batch_size: 3 | |
from_best: true | |
from_ckpt: true | |
keep_last_ckpts: 5 | |
model_dir: experiments/outputs/SingleStream/wlasl300_s2g | |
optimization: | |
betas: | |
- 0.9 | |
- 0.998 | |
learning_rate: | |
default: 0.002 | |
momentum: 0.9 | |
optimizer: sgd | |
scheduler: cosineannealing | |
t_max: 50 | |
weight_decay: 0.001 | |
overwrite: false | |
shuffle: true | |
total_epoch: 50 | |
validation: | |
cfg: | |
recognition: | |
beam_size: 2 | |
freq: 1 | |
unit: epoch | |