Commit
•
572827c
1
Parent(s):
6f60663
up
Browse files- wer-sweep.yaml +9 -48
wer-sweep.yaml
CHANGED
@@ -13,53 +13,24 @@ metric:
|
|
13 |
goal: minimize
|
14 |
name: eval/wer
|
15 |
parameters:
|
16 |
-
activation_dropout:
|
17 |
-
distribution: log_uniform
|
18 |
-
max: -1.2
|
19 |
-
min: -3.4
|
20 |
dataset_cache_dir:
|
21 |
value: /home/sanchitgandhi/cache/huggingface/datasets
|
22 |
dataset_config_name:
|
23 |
value: clean
|
24 |
dataset_name:
|
25 |
value: librispeech_asr
|
26 |
-
decoder_activation_dropout:
|
27 |
-
distribution: log_uniform
|
28 |
-
max: -1.2
|
29 |
-
min: -3.4
|
30 |
-
decoder_attention_dropout:
|
31 |
-
distribution: log_uniform
|
32 |
-
max: -1.2
|
33 |
-
min: -3.4
|
34 |
-
decoder_dropout:
|
35 |
-
distribution: log_uniform
|
36 |
-
max: -1.2
|
37 |
-
min: -3.4
|
38 |
eval_split_name:
|
39 |
value: validation
|
40 |
eval_steps:
|
41 |
value: 500
|
42 |
-
feat_proj_dropout:
|
43 |
-
distribution: log_uniform
|
44 |
-
max: -1.2
|
45 |
-
min: -3.4
|
46 |
generation_max_length:
|
47 |
value: 40
|
48 |
generation_num_beams:
|
49 |
value: 1
|
50 |
gradient_accumulation_steps:
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
- 8
|
55 |
-
hidden_dropout:
|
56 |
-
distribution: log_uniform
|
57 |
-
max: -1.2
|
58 |
-
min: -3.4
|
59 |
-
layerdrop:
|
60 |
-
distribution: log_uniform
|
61 |
-
max: -1.2
|
62 |
-
min: -3.4
|
63 |
learning_rate:
|
64 |
distribution: log_uniform
|
65 |
max: -6.9
|
@@ -67,31 +38,21 @@ parameters:
|
|
67 |
length_column_name:
|
68 |
value: input_length
|
69 |
logging_steps:
|
70 |
-
value:
|
71 |
max_duration_in_seconds:
|
72 |
-
value:
|
73 |
-
max_grad_norm:
|
74 |
-
distribution: log_uniform
|
75 |
-
max: 0.0
|
76 |
-
min: -2.3
|
77 |
max_target_length:
|
78 |
-
value:
|
79 |
-
mixed_precision:
|
80 |
-
values:
|
81 |
-
- True
|
82 |
-
- False
|
83 |
model_name_or_path:
|
84 |
value: ./
|
85 |
num_train_epochs:
|
86 |
-
value:
|
87 |
output_dir:
|
88 |
value: ./output_dir
|
89 |
per_device_eval_batch_size:
|
90 |
value: 2
|
91 |
per_device_train_batch_size:
|
92 |
-
|
93 |
-
- 1
|
94 |
-
- 2
|
95 |
preprocessing_num_workers:
|
96 |
value: 16
|
97 |
text_column_name:
|
@@ -101,4 +62,4 @@ parameters:
|
|
101 |
warmup_steps:
|
102 |
value: 500
|
103 |
program: run_flax_speech_recognition_seq2seq.py
|
104 |
-
project: flax-wav2vec2-2-bart-large
|
|
|
13 |
goal: minimize
|
14 |
name: eval/wer
|
15 |
parameters:
|
|
|
|
|
|
|
|
|
16 |
dataset_cache_dir:
|
17 |
value: /home/sanchitgandhi/cache/huggingface/datasets
|
18 |
dataset_config_name:
|
19 |
value: clean
|
20 |
dataset_name:
|
21 |
value: librispeech_asr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
eval_split_name:
|
23 |
value: validation
|
24 |
eval_steps:
|
25 |
value: 500
|
|
|
|
|
|
|
|
|
26 |
generation_max_length:
|
27 |
value: 40
|
28 |
generation_num_beams:
|
29 |
value: 1
|
30 |
gradient_accumulation_steps:
|
31 |
+
value: 1
|
32 |
+
gradient_checkpointing:
|
33 |
+
value: True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
learning_rate:
|
35 |
distribution: log_uniform
|
36 |
max: -6.9
|
|
|
38 |
length_column_name:
|
39 |
value: input_length
|
40 |
logging_steps:
|
41 |
+
value: 25
|
42 |
max_duration_in_seconds:
|
43 |
+
value: 20
|
|
|
|
|
|
|
|
|
44 |
max_target_length:
|
45 |
+
value: 128
|
|
|
|
|
|
|
|
|
46 |
model_name_or_path:
|
47 |
value: ./
|
48 |
num_train_epochs:
|
49 |
+
value: 3
|
50 |
output_dir:
|
51 |
value: ./output_dir
|
52 |
per_device_eval_batch_size:
|
53 |
value: 2
|
54 |
per_device_train_batch_size:
|
55 |
+
value: 1
|
|
|
|
|
56 |
preprocessing_num_workers:
|
57 |
value: 16
|
58 |
text_column_name:
|
|
|
62 |
warmup_steps:
|
63 |
value: 500
|
64 |
program: run_flax_speech_recognition_seq2seq.py
|
65 |
+
project: flax-wav2vec2-2-bart-large-checkpointing-scan
|