sanchit-gandhi
/

flax-wav2vec2-2-bart-large

Model card Files Files and versions Community

sanchit-gandhi HF staff commited on Apr 23, 2022

Commit

572827c

•

1 Parent(s): 6f60663

up

Files changed (1) hide show

wer-sweep.yaml +9 -48

wer-sweep.yaml CHANGED Viewed

@@ -13,53 +13,24 @@ metric:
   goal: minimize
   name: eval/wer
 parameters:
-  activation_dropout:
-    distribution: log_uniform
-    max: -1.2
-    min: -3.4
   dataset_cache_dir:
     value: /home/sanchitgandhi/cache/huggingface/datasets
   dataset_config_name:
     value: clean
   dataset_name:
     value: librispeech_asr
-  decoder_activation_dropout:
-    distribution: log_uniform
-    max: -1.2
-    min: -3.4
-  decoder_attention_dropout:
-    distribution: log_uniform
-    max: -1.2
-    min: -3.4
-  decoder_dropout:
-    distribution: log_uniform
-    max: -1.2
-    min: -3.4
   eval_split_name:
     value: validation
   eval_steps:
     value: 500
-  feat_proj_dropout:
-    distribution: log_uniform
-    max: -1.2
-    min: -3.4
   generation_max_length:
     value: 40
   generation_num_beams:
     value: 1
   gradient_accumulation_steps:
-    values:
-      - 2
-      - 4
-      - 8
-  hidden_dropout:
-    distribution: log_uniform
-    max: -1.2
-    min: -3.4
-  layerdrop:
-    distribution: log_uniform
-    max: -1.2
-    min: -3.4
   learning_rate:
     distribution: log_uniform
     max: -6.9
@@ -67,31 +38,21 @@ parameters:
   length_column_name:
     value: input_length
   logging_steps:
-    value: 10
   max_duration_in_seconds:
-    value: 10
-  max_grad_norm:
-    distribution: log_uniform
-    max: 0.0
-    min: -2.3
   max_target_length:
-    value: 64
-  mixed_precision:
-    values:
-      - True
-      - False
   model_name_or_path:
     value: ./
   num_train_epochs:
-    value: 10
   output_dir:
     value: ./output_dir
   per_device_eval_batch_size:
     value: 2
   per_device_train_batch_size:
-    values:
-      - 1
-      - 2
   preprocessing_num_workers:
     value: 16
   text_column_name:
@@ -101,4 +62,4 @@ parameters:
   warmup_steps:
     value: 500
 program: run_flax_speech_recognition_seq2seq.py
-project: flax-wav2vec2-2-bart-large

   goal: minimize
   name: eval/wer
 parameters:
   dataset_cache_dir:
     value: /home/sanchitgandhi/cache/huggingface/datasets
   dataset_config_name:
     value: clean
   dataset_name:
     value: librispeech_asr
   eval_split_name:
     value: validation
   eval_steps:
     value: 500
   generation_max_length:
     value: 40
   generation_num_beams:
     value: 1
   gradient_accumulation_steps:
+    value: 1
+  gradient_checkpointing:
+    value: True
   learning_rate:
     distribution: log_uniform
     max: -6.9
   length_column_name:
     value: input_length
   logging_steps:
+    value: 25
   max_duration_in_seconds:
+    value: 20
   max_target_length:
+    value: 128
   model_name_or_path:
     value: ./
   num_train_epochs:
+    value: 3
   output_dir:
     value: ./output_dir
   per_device_eval_batch_size:
     value: 2
   per_device_train_batch_size:
+    value: 1
   preprocessing_num_workers:
     value: 16
   text_column_name:
   warmup_steps:
     value: 500
 program: run_flax_speech_recognition_seq2seq.py
+project: flax-wav2vec2-2-bart-large-checkpointing-scan