vsltranslation
/

SingleStream

tanthinhdt commited on Mar 4

Commit

7382540

•

1 Parent(s): f1b22b0

Upload vsl_s2g-2024-03-04_08-52-53/vsl_s2g.yaml with huggingface_hub

Files changed (1) hide show

vsl_s2g-2024-03-04_08-52-53/vsl_s2g.yaml ADDED Viewed

+task: S2G
+data:
+  name: vsl
+  subset: rgb_videos
+  num_proc: 24
+  cache_dir: data/external/huggingface
+  input_streams:
+    - rgb
+  level: word #word or char
+  txt_lowercase: true
+  max_sent_length: 400
+  transform_cfg:
+    rand_crop_size: 224
+    rand_crop_threshold: 0.7
+    rand_crop_bottom_area: 0.7
+    rand_crop_aspect_ratio_min: 0.75
+    rand_crop_aspect_ratio_max: 1.3
+    cent_crop_size: 224
+    scale_size: 224
+    color_jitter_threshold: 0.2
+    temporal_augmentation:
+      tmin: 0.5
+      tmax: 1.5
+testing:
+  cfg:
+    recognition:
+      beam_size: 5
+training:
+  overwrite: false
+  model_dir: experiments/outputs/SingleStream/vsl_s2g
+  shuffle: true
+  batch_size: 4
+  total_epoch: 100
+  keep_last_ckpts: 5
+  validation:
+    unit: epoch
+    freq: 1
+    cfg:
+      recognition:
+        beam_size: 2
+  optimization:
+    learning_rate:
+      default: 4.0e-3
+    optimizer: sgd
+    weight_decay: 1.0e-3
+    momentum: 0.9
+    betas:
+    - 0.9
+    - 0.998
+    scheduler: cosineannealing
+    t_max: 50
+model:
+  RecognitionNetwork:
+    GlossTokenizer:
+      gloss2id_file: pretrained/mBart_vi/gloss2ids.pkl
+    s3d:
+      pretrained_ckpt: pretrained/s3ds_actioncls
+      use_block: 4
+      freeze_block: 1
+    visual_head:
+      input_size: 832
+      hidden_size: 512
+      ff_size: 2048
+      pe: True
+      ff_kernelsize:
+        - 3
+        - 3