sanchit-gandhi HF staff commited on
Commit
572827c
1 Parent(s): 6f60663
Files changed (1) hide show
  1. wer-sweep.yaml +9 -48
wer-sweep.yaml CHANGED
@@ -13,53 +13,24 @@ metric:
13
  goal: minimize
14
  name: eval/wer
15
  parameters:
16
- activation_dropout:
17
- distribution: log_uniform
18
- max: -1.2
19
- min: -3.4
20
  dataset_cache_dir:
21
  value: /home/sanchitgandhi/cache/huggingface/datasets
22
  dataset_config_name:
23
  value: clean
24
  dataset_name:
25
  value: librispeech_asr
26
- decoder_activation_dropout:
27
- distribution: log_uniform
28
- max: -1.2
29
- min: -3.4
30
- decoder_attention_dropout:
31
- distribution: log_uniform
32
- max: -1.2
33
- min: -3.4
34
- decoder_dropout:
35
- distribution: log_uniform
36
- max: -1.2
37
- min: -3.4
38
  eval_split_name:
39
  value: validation
40
  eval_steps:
41
  value: 500
42
- feat_proj_dropout:
43
- distribution: log_uniform
44
- max: -1.2
45
- min: -3.4
46
  generation_max_length:
47
  value: 40
48
  generation_num_beams:
49
  value: 1
50
  gradient_accumulation_steps:
51
- values:
52
- - 2
53
- - 4
54
- - 8
55
- hidden_dropout:
56
- distribution: log_uniform
57
- max: -1.2
58
- min: -3.4
59
- layerdrop:
60
- distribution: log_uniform
61
- max: -1.2
62
- min: -3.4
63
  learning_rate:
64
  distribution: log_uniform
65
  max: -6.9
@@ -67,31 +38,21 @@ parameters:
67
  length_column_name:
68
  value: input_length
69
  logging_steps:
70
- value: 10
71
  max_duration_in_seconds:
72
- value: 10
73
- max_grad_norm:
74
- distribution: log_uniform
75
- max: 0.0
76
- min: -2.3
77
  max_target_length:
78
- value: 64
79
- mixed_precision:
80
- values:
81
- - True
82
- - False
83
  model_name_or_path:
84
  value: ./
85
  num_train_epochs:
86
- value: 10
87
  output_dir:
88
  value: ./output_dir
89
  per_device_eval_batch_size:
90
  value: 2
91
  per_device_train_batch_size:
92
- values:
93
- - 1
94
- - 2
95
  preprocessing_num_workers:
96
  value: 16
97
  text_column_name:
@@ -101,4 +62,4 @@ parameters:
101
  warmup_steps:
102
  value: 500
103
  program: run_flax_speech_recognition_seq2seq.py
104
- project: flax-wav2vec2-2-bart-large
 
13
  goal: minimize
14
  name: eval/wer
15
  parameters:
 
 
 
 
16
  dataset_cache_dir:
17
  value: /home/sanchitgandhi/cache/huggingface/datasets
18
  dataset_config_name:
19
  value: clean
20
  dataset_name:
21
  value: librispeech_asr
 
 
 
 
 
 
 
 
 
 
 
 
22
  eval_split_name:
23
  value: validation
24
  eval_steps:
25
  value: 500
 
 
 
 
26
  generation_max_length:
27
  value: 40
28
  generation_num_beams:
29
  value: 1
30
  gradient_accumulation_steps:
31
+ value: 1
32
+ gradient_checkpointing:
33
+ value: True
 
 
 
 
 
 
 
 
 
34
  learning_rate:
35
  distribution: log_uniform
36
  max: -6.9
 
38
  length_column_name:
39
  value: input_length
40
  logging_steps:
41
+ value: 25
42
  max_duration_in_seconds:
43
+ value: 20
 
 
 
 
44
  max_target_length:
45
+ value: 128
 
 
 
 
46
  model_name_or_path:
47
  value: ./
48
  num_train_epochs:
49
+ value: 3
50
  output_dir:
51
  value: ./output_dir
52
  per_device_eval_batch_size:
53
  value: 2
54
  per_device_train_batch_size:
55
+ value: 1
 
 
56
  preprocessing_num_workers:
57
  value: 16
58
  text_column_name:
 
62
  warmup_steps:
63
  value: 500
64
  program: run_flax_speech_recognition_seq2seq.py
65
+ project: flax-wav2vec2-2-bart-large-checkpointing-scan