tanthinhdt's picture
Upload vsl_s2g-2024-03-04_14-11-05/vsl_s2g.yaml with huggingface_hub
0aaa74a verified
raw
history blame
No virus
1.29 kB
task: S2G
data:
name: vsl
subset: rgb_videos
num_proc: 4
cache_dir: data/external/huggingface
input_streams:
- rgb
level: word #word or char
txt_lowercase: true
max_sent_length: 400
transform_cfg:
rand_crop_size: 224
rand_crop_threshold: 0.5
rand_crop_bottom_area: 0.7
rand_crop_aspect_ratio_min: 0.75
rand_crop_aspect_ratio_max: 1.3
cent_crop_size: 224
scale_size: 224
color_jitter_threshold: 0.2
testing:
cfg:
recognition:
beam_size: 5
training:
overwrite: false
model_dir: experiments/outputs/SingleStream/vsl_s2g
shuffle: true
batch_size: 4
total_epoch: 100
keep_last_ckpts: 5
validation:
unit: epoch
freq: 1
cfg:
recognition:
beam_size: 2
optimization:
learning_rate:
default: 1.0e-3
optimizer: sgd
weight_decay: 1.0e-4
momentum: 0.9
betas:
- 0.9
- 0.998
scheduler: cosineannealing
t_max: 50
model:
RecognitionNetwork:
GlossTokenizer:
gloss2id_file: pretrained/mBart_vi/gloss2ids.pkl
s3d:
pretrained_ckpt: pretrained/s3ds_actioncls
use_block: 4
freeze_block: 1
visual_head:
input_size: 832
hidden_size: 512
ff_size: 2048
pe: True
ff_kernelsize:
- 3
- 3