File size: 1,374 Bytes
6228935 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
data:
cache_dir: data/external/huggingface
input_streams:
- rgb
level: word
max_sent_length: 400
name: wlasl300
num_proc: 2
subset: rgb_videos
transform_cfg:
cent_crop_size: 224
color_jitter_threshold: 0.2
rand_crop_aspect_ratio_max: 1.3
rand_crop_aspect_ratio_min: 0.75
rand_crop_bottom_area: 0.7
rand_crop_size: 224
rand_crop_threshold: 0.7
scale_size: 224
temporal_augmentation:
tmax: 1.5
tmin: 0.5
txt_lowercase: true
model:
RecognitionNetwork:
GlossTokenizer:
gloss2id_file: pretrained/mBart_en/gloss2ids.pkl
s3d:
freeze_block: 1
pretrained_ckpt: pretrained/s3ds_actioncls
use_block: 4
visual_head:
ff_kernelsize:
- 3
- 3
ff_size: 2048
hidden_size: 512
input_size: 832
pe: true
task: S2G
testing:
cfg:
recognition:
beam_size: 5
training:
batch_size: 3
from_best: true
from_ckpt: true
keep_last_ckpts: 5
model_dir: experiments/outputs/SingleStream/wlasl300_s2g
optimization:
betas:
- 0.9
- 0.998
learning_rate:
default: 0.002
momentum: 0.9
optimizer: sgd
scheduler: cosineannealing
t_max: 50
weight_decay: 0.001
overwrite: false
shuffle: true
total_epoch: 50
validation:
cfg:
recognition:
beam_size: 2
freq: 1
unit: epoch
|