jangmin commited on
Commit
853ba7b
1 Parent(s): ffc59f1

upload model files

Browse files
Files changed (37) hide show
  1. checkpoint-17550/config.json +50 -0
  2. checkpoint-17550/generation_config.json +221 -0
  3. checkpoint-17550/optimizer.pt +3 -0
  4. checkpoint-17550/preprocessor_config.json +14 -0
  5. checkpoint-17550/pytorch_model.bin +3 -0
  6. checkpoint-17550/rng_state.pth +3 -0
  7. checkpoint-17550/scaler.pt +3 -0
  8. checkpoint-17550/scheduler.pt +3 -0
  9. checkpoint-17550/trainer_state.json +556 -0
  10. checkpoint-17550/training_args.bin +3 -0
  11. checkpoint-26325/config.json +50 -0
  12. checkpoint-26325/generation_config.json +221 -0
  13. checkpoint-26325/optimizer.pt +3 -0
  14. checkpoint-26325/preprocessor_config.json +14 -0
  15. checkpoint-26325/pytorch_model.bin +3 -0
  16. checkpoint-26325/rng_state.pth +3 -0
  17. checkpoint-26325/scaler.pt +3 -0
  18. checkpoint-26325/scheduler.pt +3 -0
  19. checkpoint-26325/trainer_state.json +829 -0
  20. checkpoint-26325/training_args.bin +3 -0
  21. checkpoint-8775/config.json +50 -0
  22. checkpoint-8775/generation_config.json +221 -0
  23. checkpoint-8775/optimizer.pt +3 -0
  24. checkpoint-8775/preprocessor_config.json +14 -0
  25. checkpoint-8775/pytorch_model.bin +3 -0
  26. checkpoint-8775/rng_state.pth +3 -0
  27. checkpoint-8775/scaler.pt +3 -0
  28. checkpoint-8775/scheduler.pt +3 -0
  29. checkpoint-8775/trainer_state.json +283 -0
  30. checkpoint-8775/training_args.bin +3 -0
  31. config.json +50 -0
  32. generation_config.json +221 -0
  33. preprocessor_config.json +14 -0
  34. pytorch_model.bin +3 -0
  35. runs/Jun01_09-47-23_50c558b7cdc4/1685612863.6569612/events.out.tfevents.1685612863.50c558b7cdc4.1505.1 +3 -0
  36. runs/Jun01_09-47-23_50c558b7cdc4/events.out.tfevents.1685612863.50c558b7cdc4.1505.0 +3 -0
  37. training_args.bin +3 -0
checkpoint-17550/config.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "openai/whisper-medium",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "gelu",
5
+ "apply_spec_augment": false,
6
+ "architectures": [
7
+ "WhisperForConditionalGeneration"
8
+ ],
9
+ "attention_dropout": 0.0,
10
+ "begin_suppress_tokens": [
11
+ 220,
12
+ 50257
13
+ ],
14
+ "bos_token_id": 50257,
15
+ "classifier_proj_size": 256,
16
+ "d_model": 1024,
17
+ "decoder_attention_heads": 16,
18
+ "decoder_ffn_dim": 4096,
19
+ "decoder_layerdrop": 0.0,
20
+ "decoder_layers": 24,
21
+ "decoder_start_token_id": 50258,
22
+ "dropout": 0.0,
23
+ "encoder_attention_heads": 16,
24
+ "encoder_ffn_dim": 4096,
25
+ "encoder_layerdrop": 0.0,
26
+ "encoder_layers": 24,
27
+ "eos_token_id": 50257,
28
+ "forced_decoder_ids": null,
29
+ "init_std": 0.02,
30
+ "is_encoder_decoder": true,
31
+ "mask_feature_length": 10,
32
+ "mask_feature_min_masks": 0,
33
+ "mask_feature_prob": 0.0,
34
+ "mask_time_length": 10,
35
+ "mask_time_min_masks": 2,
36
+ "mask_time_prob": 0.05,
37
+ "max_length": 448,
38
+ "max_source_positions": 1500,
39
+ "max_target_positions": 448,
40
+ "model_type": "whisper",
41
+ "num_hidden_layers": 24,
42
+ "num_mel_bins": 80,
43
+ "pad_token_id": 50257,
44
+ "scale_embedding": false,
45
+ "torch_dtype": "float32",
46
+ "transformers_version": "4.28.0.dev0",
47
+ "use_cache": false,
48
+ "use_weighted_layer_sum": false,
49
+ "vocab_size": 51865
50
+ }
checkpoint-17550/generation_config.json ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "begin_suppress_tokens": [
3
+ 220,
4
+ 50257
5
+ ],
6
+ "bos_token_id": 50257,
7
+ "decoder_start_token_id": 50258,
8
+ "eos_token_id": 50257,
9
+ "forced_decoder_ids": [
10
+ [
11
+ 1,
12
+ null
13
+ ],
14
+ [
15
+ 2,
16
+ 50359
17
+ ]
18
+ ],
19
+ "is_multilingual": true,
20
+ "lang_to_id": {
21
+ "<|af|>": 50327,
22
+ "<|am|>": 50334,
23
+ "<|ar|>": 50272,
24
+ "<|as|>": 50350,
25
+ "<|az|>": 50304,
26
+ "<|ba|>": 50355,
27
+ "<|be|>": 50330,
28
+ "<|bg|>": 50292,
29
+ "<|bn|>": 50302,
30
+ "<|bo|>": 50347,
31
+ "<|br|>": 50309,
32
+ "<|bs|>": 50315,
33
+ "<|ca|>": 50270,
34
+ "<|cs|>": 50283,
35
+ "<|cy|>": 50297,
36
+ "<|da|>": 50285,
37
+ "<|de|>": 50261,
38
+ "<|el|>": 50281,
39
+ "<|en|>": 50259,
40
+ "<|es|>": 50262,
41
+ "<|et|>": 50307,
42
+ "<|eu|>": 50310,
43
+ "<|fa|>": 50300,
44
+ "<|fi|>": 50277,
45
+ "<|fo|>": 50338,
46
+ "<|fr|>": 50265,
47
+ "<|gl|>": 50319,
48
+ "<|gu|>": 50333,
49
+ "<|haw|>": 50352,
50
+ "<|ha|>": 50354,
51
+ "<|he|>": 50279,
52
+ "<|hi|>": 50276,
53
+ "<|hr|>": 50291,
54
+ "<|ht|>": 50339,
55
+ "<|hu|>": 50286,
56
+ "<|hy|>": 50312,
57
+ "<|id|>": 50275,
58
+ "<|is|>": 50311,
59
+ "<|it|>": 50274,
60
+ "<|ja|>": 50266,
61
+ "<|jw|>": 50356,
62
+ "<|ka|>": 50329,
63
+ "<|kk|>": 50316,
64
+ "<|km|>": 50323,
65
+ "<|kn|>": 50306,
66
+ "<|ko|>": 50264,
67
+ "<|la|>": 50294,
68
+ "<|lb|>": 50345,
69
+ "<|ln|>": 50353,
70
+ "<|lo|>": 50336,
71
+ "<|lt|>": 50293,
72
+ "<|lv|>": 50301,
73
+ "<|mg|>": 50349,
74
+ "<|mi|>": 50295,
75
+ "<|mk|>": 50308,
76
+ "<|ml|>": 50296,
77
+ "<|mn|>": 50314,
78
+ "<|mr|>": 50320,
79
+ "<|ms|>": 50282,
80
+ "<|mt|>": 50343,
81
+ "<|my|>": 50346,
82
+ "<|ne|>": 50313,
83
+ "<|nl|>": 50271,
84
+ "<|nn|>": 50342,
85
+ "<|no|>": 50288,
86
+ "<|oc|>": 50328,
87
+ "<|pa|>": 50321,
88
+ "<|pl|>": 50269,
89
+ "<|ps|>": 50340,
90
+ "<|pt|>": 50267,
91
+ "<|ro|>": 50284,
92
+ "<|ru|>": 50263,
93
+ "<|sa|>": 50344,
94
+ "<|sd|>": 50332,
95
+ "<|si|>": 50322,
96
+ "<|sk|>": 50298,
97
+ "<|sl|>": 50305,
98
+ "<|sn|>": 50324,
99
+ "<|so|>": 50326,
100
+ "<|sq|>": 50317,
101
+ "<|sr|>": 50303,
102
+ "<|su|>": 50357,
103
+ "<|sv|>": 50273,
104
+ "<|sw|>": 50318,
105
+ "<|ta|>": 50287,
106
+ "<|te|>": 50299,
107
+ "<|tg|>": 50331,
108
+ "<|th|>": 50289,
109
+ "<|tk|>": 50341,
110
+ "<|tl|>": 50348,
111
+ "<|tr|>": 50268,
112
+ "<|tt|>": 50351,
113
+ "<|uk|>": 50280,
114
+ "<|ur|>": 50290,
115
+ "<|uz|>": 50337,
116
+ "<|vi|>": 50278,
117
+ "<|yi|>": 50335,
118
+ "<|yo|>": 50325,
119
+ "<|zh|>": 50260
120
+ },
121
+ "max_initial_timestamp_index": 1,
122
+ "max_length": 448,
123
+ "no_timestamps_token_id": 50363,
124
+ "pad_token_id": 50257,
125
+ "return_timestamps": false,
126
+ "suppress_tokens": [
127
+ 1,
128
+ 2,
129
+ 7,
130
+ 8,
131
+ 9,
132
+ 10,
133
+ 14,
134
+ 25,
135
+ 26,
136
+ 27,
137
+ 28,
138
+ 29,
139
+ 31,
140
+ 58,
141
+ 59,
142
+ 60,
143
+ 61,
144
+ 62,
145
+ 63,
146
+ 90,
147
+ 91,
148
+ 92,
149
+ 93,
150
+ 359,
151
+ 503,
152
+ 522,
153
+ 542,
154
+ 873,
155
+ 893,
156
+ 902,
157
+ 918,
158
+ 922,
159
+ 931,
160
+ 1350,
161
+ 1853,
162
+ 1982,
163
+ 2460,
164
+ 2627,
165
+ 3246,
166
+ 3253,
167
+ 3268,
168
+ 3536,
169
+ 3846,
170
+ 3961,
171
+ 4183,
172
+ 4667,
173
+ 6585,
174
+ 6647,
175
+ 7273,
176
+ 9061,
177
+ 9383,
178
+ 10428,
179
+ 10929,
180
+ 11938,
181
+ 12033,
182
+ 12331,
183
+ 12562,
184
+ 13793,
185
+ 14157,
186
+ 14635,
187
+ 15265,
188
+ 15618,
189
+ 16553,
190
+ 16604,
191
+ 18362,
192
+ 18956,
193
+ 20075,
194
+ 21675,
195
+ 22520,
196
+ 26130,
197
+ 26161,
198
+ 26435,
199
+ 28279,
200
+ 29464,
201
+ 31650,
202
+ 32302,
203
+ 32470,
204
+ 36865,
205
+ 42863,
206
+ 47425,
207
+ 49870,
208
+ 50254,
209
+ 50258,
210
+ 50358,
211
+ 50359,
212
+ 50360,
213
+ 50361,
214
+ 50362
215
+ ],
216
+ "task_to_id": {
217
+ "transcribe": 50359,
218
+ "translate": 50358
219
+ },
220
+ "transformers_version": "4.28.0.dev0"
221
+ }
checkpoint-17550/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fddd964872da6b774692472cb2845c06d37db92a702ce0e3d45505d73b358c50
3
+ size 6111428695
checkpoint-17550/preprocessor_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "chunk_length": 30,
3
+ "feature_extractor_type": "WhisperFeatureExtractor",
4
+ "feature_size": 80,
5
+ "hop_length": 160,
6
+ "n_fft": 400,
7
+ "n_samples": 480000,
8
+ "nb_max_frames": 3000,
9
+ "padding_side": "right",
10
+ "padding_value": 0.0,
11
+ "processor_class": "WhisperProcessor",
12
+ "return_attention_mask": false,
13
+ "sampling_rate": 16000
14
+ }
checkpoint-17550/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72bdd2fbdc26ec63fc7590a91f72153ae9ffe57903cd8680492a964528cde1cd
3
+ size 3055754841
checkpoint-17550/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04cad02a949bde7c7cd5892608eebfb4156e51a091e1e1dfb29bd0570be71173
3
+ size 14511
checkpoint-17550/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2a607fd57d596582a1196e150246a8da0e98285bc5bf5d84b4e39cb5235b8a4
3
+ size 557
checkpoint-17550/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:192d70b48d743164ef8b157734c06f5b7be25c38053a034e4b75090e580e7b38
3
+ size 627
checkpoint-17550/trainer_state.json ADDED
@@ -0,0 +1,556 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "global_step": 17550,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.02,
12
+ "learning_rate": 9.962631077216398e-06,
13
+ "loss": 0.1958,
14
+ "step": 200
15
+ },
16
+ {
17
+ "epoch": 0.05,
18
+ "learning_rate": 9.886367969494757e-06,
19
+ "loss": 0.1228,
20
+ "step": 400
21
+ },
22
+ {
23
+ "epoch": 0.07,
24
+ "learning_rate": 9.810104861773118e-06,
25
+ "loss": 0.1051,
26
+ "step": 600
27
+ },
28
+ {
29
+ "epoch": 0.09,
30
+ "learning_rate": 9.733841754051478e-06,
31
+ "loss": 0.0977,
32
+ "step": 800
33
+ },
34
+ {
35
+ "epoch": 0.11,
36
+ "learning_rate": 9.657578646329838e-06,
37
+ "loss": 0.0891,
38
+ "step": 1000
39
+ },
40
+ {
41
+ "epoch": 0.14,
42
+ "learning_rate": 9.581315538608199e-06,
43
+ "loss": 0.0827,
44
+ "step": 1200
45
+ },
46
+ {
47
+ "epoch": 0.16,
48
+ "learning_rate": 9.50505243088656e-06,
49
+ "loss": 0.0822,
50
+ "step": 1400
51
+ },
52
+ {
53
+ "epoch": 0.18,
54
+ "learning_rate": 9.42878932316492e-06,
55
+ "loss": 0.0796,
56
+ "step": 1600
57
+ },
58
+ {
59
+ "epoch": 0.21,
60
+ "learning_rate": 9.352526215443281e-06,
61
+ "loss": 0.0777,
62
+ "step": 1800
63
+ },
64
+ {
65
+ "epoch": 0.23,
66
+ "learning_rate": 9.27626310772164e-06,
67
+ "loss": 0.0769,
68
+ "step": 2000
69
+ },
70
+ {
71
+ "epoch": 0.25,
72
+ "learning_rate": 9.200000000000002e-06,
73
+ "loss": 0.0736,
74
+ "step": 2200
75
+ },
76
+ {
77
+ "epoch": 0.27,
78
+ "learning_rate": 9.123736892278361e-06,
79
+ "loss": 0.0721,
80
+ "step": 2400
81
+ },
82
+ {
83
+ "epoch": 0.3,
84
+ "learning_rate": 9.047473784556723e-06,
85
+ "loss": 0.0748,
86
+ "step": 2600
87
+ },
88
+ {
89
+ "epoch": 0.32,
90
+ "learning_rate": 8.971210676835082e-06,
91
+ "loss": 0.0736,
92
+ "step": 2800
93
+ },
94
+ {
95
+ "epoch": 0.34,
96
+ "learning_rate": 8.894947569113442e-06,
97
+ "loss": 0.0714,
98
+ "step": 3000
99
+ },
100
+ {
101
+ "epoch": 0.36,
102
+ "learning_rate": 8.818684461391803e-06,
103
+ "loss": 0.0718,
104
+ "step": 3200
105
+ },
106
+ {
107
+ "epoch": 0.39,
108
+ "learning_rate": 8.742421353670163e-06,
109
+ "loss": 0.0729,
110
+ "step": 3400
111
+ },
112
+ {
113
+ "epoch": 0.41,
114
+ "learning_rate": 8.666158245948522e-06,
115
+ "loss": 0.0711,
116
+ "step": 3600
117
+ },
118
+ {
119
+ "epoch": 0.43,
120
+ "learning_rate": 8.589895138226883e-06,
121
+ "loss": 0.0663,
122
+ "step": 3800
123
+ },
124
+ {
125
+ "epoch": 0.46,
126
+ "learning_rate": 8.513632030505243e-06,
127
+ "loss": 0.0675,
128
+ "step": 4000
129
+ },
130
+ {
131
+ "epoch": 0.48,
132
+ "learning_rate": 8.437368922783604e-06,
133
+ "loss": 0.0689,
134
+ "step": 4200
135
+ },
136
+ {
137
+ "epoch": 0.5,
138
+ "learning_rate": 8.361105815061964e-06,
139
+ "loss": 0.0685,
140
+ "step": 4400
141
+ },
142
+ {
143
+ "epoch": 0.52,
144
+ "learning_rate": 8.284842707340325e-06,
145
+ "loss": 0.0663,
146
+ "step": 4600
147
+ },
148
+ {
149
+ "epoch": 0.55,
150
+ "learning_rate": 8.208579599618686e-06,
151
+ "loss": 0.0651,
152
+ "step": 4800
153
+ },
154
+ {
155
+ "epoch": 0.57,
156
+ "learning_rate": 8.132316491897046e-06,
157
+ "loss": 0.0652,
158
+ "step": 5000
159
+ },
160
+ {
161
+ "epoch": 0.59,
162
+ "learning_rate": 8.056053384175407e-06,
163
+ "loss": 0.0653,
164
+ "step": 5200
165
+ },
166
+ {
167
+ "epoch": 0.62,
168
+ "learning_rate": 7.979790276453767e-06,
169
+ "loss": 0.0656,
170
+ "step": 5400
171
+ },
172
+ {
173
+ "epoch": 0.64,
174
+ "learning_rate": 7.903527168732126e-06,
175
+ "loss": 0.0635,
176
+ "step": 5600
177
+ },
178
+ {
179
+ "epoch": 0.66,
180
+ "learning_rate": 7.827264061010486e-06,
181
+ "loss": 0.061,
182
+ "step": 5800
183
+ },
184
+ {
185
+ "epoch": 0.68,
186
+ "learning_rate": 7.751000953288847e-06,
187
+ "loss": 0.0637,
188
+ "step": 6000
189
+ },
190
+ {
191
+ "epoch": 0.71,
192
+ "learning_rate": 7.674737845567207e-06,
193
+ "loss": 0.0621,
194
+ "step": 6200
195
+ },
196
+ {
197
+ "epoch": 0.73,
198
+ "learning_rate": 7.598856053384176e-06,
199
+ "loss": 0.0657,
200
+ "step": 6400
201
+ },
202
+ {
203
+ "epoch": 0.75,
204
+ "learning_rate": 7.522592945662537e-06,
205
+ "loss": 0.0618,
206
+ "step": 6600
207
+ },
208
+ {
209
+ "epoch": 0.77,
210
+ "learning_rate": 7.446329837940897e-06,
211
+ "loss": 0.0617,
212
+ "step": 6800
213
+ },
214
+ {
215
+ "epoch": 0.8,
216
+ "learning_rate": 7.370066730219257e-06,
217
+ "loss": 0.0623,
218
+ "step": 7000
219
+ },
220
+ {
221
+ "epoch": 0.82,
222
+ "learning_rate": 7.293803622497617e-06,
223
+ "loss": 0.0581,
224
+ "step": 7200
225
+ },
226
+ {
227
+ "epoch": 0.84,
228
+ "learning_rate": 7.217540514775978e-06,
229
+ "loss": 0.0612,
230
+ "step": 7400
231
+ },
232
+ {
233
+ "epoch": 0.87,
234
+ "learning_rate": 7.141277407054338e-06,
235
+ "loss": 0.0584,
236
+ "step": 7600
237
+ },
238
+ {
239
+ "epoch": 0.89,
240
+ "learning_rate": 7.065014299332699e-06,
241
+ "loss": 0.0595,
242
+ "step": 7800
243
+ },
244
+ {
245
+ "epoch": 0.91,
246
+ "learning_rate": 6.9887511916110584e-06,
247
+ "loss": 0.0605,
248
+ "step": 8000
249
+ },
250
+ {
251
+ "epoch": 0.93,
252
+ "learning_rate": 6.912488083889419e-06,
253
+ "loss": 0.0584,
254
+ "step": 8200
255
+ },
256
+ {
257
+ "epoch": 0.96,
258
+ "learning_rate": 6.836606291706388e-06,
259
+ "loss": 0.06,
260
+ "step": 8400
261
+ },
262
+ {
263
+ "epoch": 0.98,
264
+ "learning_rate": 6.760343183984748e-06,
265
+ "loss": 0.0589,
266
+ "step": 8600
267
+ },
268
+ {
269
+ "epoch": 1.0,
270
+ "eval_loss": 0.12254729866981506,
271
+ "eval_runtime": 5547.0518,
272
+ "eval_samples_per_second": 6.063,
273
+ "eval_steps_per_second": 0.189,
274
+ "eval_wer": 0.06041116269525739,
275
+ "step": 8775
276
+ },
277
+ {
278
+ "epoch": 1.0,
279
+ "learning_rate": 6.684080076263109e-06,
280
+ "loss": 0.0547,
281
+ "step": 8800
282
+ },
283
+ {
284
+ "epoch": 1.03,
285
+ "learning_rate": 6.6078169685414686e-06,
286
+ "loss": 0.0363,
287
+ "step": 9000
288
+ },
289
+ {
290
+ "epoch": 1.05,
291
+ "learning_rate": 6.531553860819829e-06,
292
+ "loss": 0.0357,
293
+ "step": 9200
294
+ },
295
+ {
296
+ "epoch": 1.07,
297
+ "learning_rate": 6.4552907530981886e-06,
298
+ "loss": 0.0375,
299
+ "step": 9400
300
+ },
301
+ {
302
+ "epoch": 1.09,
303
+ "learning_rate": 6.37902764537655e-06,
304
+ "loss": 0.0347,
305
+ "step": 9600
306
+ },
307
+ {
308
+ "epoch": 1.12,
309
+ "learning_rate": 6.302764537654909e-06,
310
+ "loss": 0.0347,
311
+ "step": 9800
312
+ },
313
+ {
314
+ "epoch": 1.14,
315
+ "learning_rate": 6.226501429933271e-06,
316
+ "loss": 0.0342,
317
+ "step": 10000
318
+ },
319
+ {
320
+ "epoch": 1.16,
321
+ "learning_rate": 6.15023832221163e-06,
322
+ "loss": 0.0358,
323
+ "step": 10200
324
+ },
325
+ {
326
+ "epoch": 1.19,
327
+ "learning_rate": 6.073975214489991e-06,
328
+ "loss": 0.0344,
329
+ "step": 10400
330
+ },
331
+ {
332
+ "epoch": 1.21,
333
+ "learning_rate": 5.997712106768351e-06,
334
+ "loss": 0.0333,
335
+ "step": 10600
336
+ },
337
+ {
338
+ "epoch": 1.23,
339
+ "learning_rate": 5.9218303145853195e-06,
340
+ "loss": 0.0357,
341
+ "step": 10800
342
+ },
343
+ {
344
+ "epoch": 1.25,
345
+ "learning_rate": 5.845567206863681e-06,
346
+ "loss": 0.0355,
347
+ "step": 11000
348
+ },
349
+ {
350
+ "epoch": 1.28,
351
+ "learning_rate": 5.76930409914204e-06,
352
+ "loss": 0.0348,
353
+ "step": 11200
354
+ },
355
+ {
356
+ "epoch": 1.3,
357
+ "learning_rate": 5.693040991420401e-06,
358
+ "loss": 0.0339,
359
+ "step": 11400
360
+ },
361
+ {
362
+ "epoch": 1.32,
363
+ "learning_rate": 5.61677788369876e-06,
364
+ "loss": 0.0344,
365
+ "step": 11600
366
+ },
367
+ {
368
+ "epoch": 1.34,
369
+ "learning_rate": 5.540514775977122e-06,
370
+ "loss": 0.0345,
371
+ "step": 11800
372
+ },
373
+ {
374
+ "epoch": 1.37,
375
+ "learning_rate": 5.464251668255481e-06,
376
+ "loss": 0.0329,
377
+ "step": 12000
378
+ },
379
+ {
380
+ "epoch": 1.39,
381
+ "learning_rate": 5.3879885605338424e-06,
382
+ "loss": 0.0334,
383
+ "step": 12200
384
+ },
385
+ {
386
+ "epoch": 1.41,
387
+ "learning_rate": 5.311725452812202e-06,
388
+ "loss": 0.0355,
389
+ "step": 12400
390
+ },
391
+ {
392
+ "epoch": 1.44,
393
+ "learning_rate": 5.235462345090562e-06,
394
+ "loss": 0.0338,
395
+ "step": 12600
396
+ },
397
+ {
398
+ "epoch": 1.46,
399
+ "learning_rate": 5.159199237368924e-06,
400
+ "loss": 0.0339,
401
+ "step": 12800
402
+ },
403
+ {
404
+ "epoch": 1.48,
405
+ "learning_rate": 5.082936129647283e-06,
406
+ "loss": 0.0357,
407
+ "step": 13000
408
+ },
409
+ {
410
+ "epoch": 1.5,
411
+ "learning_rate": 5.0066730219256445e-06,
412
+ "loss": 0.0323,
413
+ "step": 13200
414
+ },
415
+ {
416
+ "epoch": 1.53,
417
+ "learning_rate": 4.930409914204004e-06,
418
+ "loss": 0.034,
419
+ "step": 13400
420
+ },
421
+ {
422
+ "epoch": 1.55,
423
+ "learning_rate": 4.8541468064823645e-06,
424
+ "loss": 0.0333,
425
+ "step": 13600
426
+ },
427
+ {
428
+ "epoch": 1.57,
429
+ "learning_rate": 4.777883698760725e-06,
430
+ "loss": 0.034,
431
+ "step": 13800
432
+ },
433
+ {
434
+ "epoch": 1.6,
435
+ "learning_rate": 4.701620591039085e-06,
436
+ "loss": 0.033,
437
+ "step": 14000
438
+ },
439
+ {
440
+ "epoch": 1.62,
441
+ "learning_rate": 4.625357483317446e-06,
442
+ "loss": 0.0359,
443
+ "step": 14200
444
+ },
445
+ {
446
+ "epoch": 1.64,
447
+ "learning_rate": 4.549475691134414e-06,
448
+ "loss": 0.0339,
449
+ "step": 14400
450
+ },
451
+ {
452
+ "epoch": 1.66,
453
+ "learning_rate": 4.473212583412775e-06,
454
+ "loss": 0.0343,
455
+ "step": 14600
456
+ },
457
+ {
458
+ "epoch": 1.69,
459
+ "learning_rate": 4.396949475691134e-06,
460
+ "loss": 0.0335,
461
+ "step": 14800
462
+ },
463
+ {
464
+ "epoch": 1.71,
465
+ "learning_rate": 4.320686367969495e-06,
466
+ "loss": 0.0327,
467
+ "step": 15000
468
+ },
469
+ {
470
+ "epoch": 1.73,
471
+ "learning_rate": 4.244423260247855e-06,
472
+ "loss": 0.033,
473
+ "step": 15200
474
+ },
475
+ {
476
+ "epoch": 1.75,
477
+ "learning_rate": 4.1681601525262155e-06,
478
+ "loss": 0.0339,
479
+ "step": 15400
480
+ },
481
+ {
482
+ "epoch": 1.78,
483
+ "learning_rate": 4.091897044804577e-06,
484
+ "loss": 0.0334,
485
+ "step": 15600
486
+ },
487
+ {
488
+ "epoch": 1.8,
489
+ "learning_rate": 4.015633937082936e-06,
490
+ "loss": 0.0306,
491
+ "step": 15800
492
+ },
493
+ {
494
+ "epoch": 1.82,
495
+ "learning_rate": 3.939370829361297e-06,
496
+ "loss": 0.0315,
497
+ "step": 16000
498
+ },
499
+ {
500
+ "epoch": 1.85,
501
+ "learning_rate": 3.863107721639657e-06,
502
+ "loss": 0.0326,
503
+ "step": 16200
504
+ },
505
+ {
506
+ "epoch": 1.87,
507
+ "learning_rate": 3.7868446139180175e-06,
508
+ "loss": 0.0309,
509
+ "step": 16400
510
+ },
511
+ {
512
+ "epoch": 1.89,
513
+ "learning_rate": 3.710581506196378e-06,
514
+ "loss": 0.0321,
515
+ "step": 16600
516
+ },
517
+ {
518
+ "epoch": 1.91,
519
+ "learning_rate": 3.6343183984747384e-06,
520
+ "loss": 0.033,
521
+ "step": 16800
522
+ },
523
+ {
524
+ "epoch": 1.94,
525
+ "learning_rate": 3.5580552907530984e-06,
526
+ "loss": 0.0338,
527
+ "step": 17000
528
+ },
529
+ {
530
+ "epoch": 1.96,
531
+ "learning_rate": 3.4817921830314588e-06,
532
+ "loss": 0.0319,
533
+ "step": 17200
534
+ },
535
+ {
536
+ "epoch": 1.98,
537
+ "learning_rate": 3.405529075309819e-06,
538
+ "loss": 0.031,
539
+ "step": 17400
540
+ },
541
+ {
542
+ "epoch": 2.0,
543
+ "eval_loss": 0.11860840022563934,
544
+ "eval_runtime": 5539.7182,
545
+ "eval_samples_per_second": 6.071,
546
+ "eval_steps_per_second": 0.19,
547
+ "eval_wer": 0.056729379878763,
548
+ "step": 17550
549
+ }
550
+ ],
551
+ "max_steps": 26325,
552
+ "num_train_epochs": 3,
553
+ "total_flos": 4.298318392216781e+20,
554
+ "trial_name": null,
555
+ "trial_params": null
556
+ }
checkpoint-17550/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e4ebd7db27b4b10b645b0710064ae069d9dfa7caee40828967e6a896c7c4a9d
3
+ size 3771
checkpoint-26325/config.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "openai/whisper-medium",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "gelu",
5
+ "apply_spec_augment": false,
6
+ "architectures": [
7
+ "WhisperForConditionalGeneration"
8
+ ],
9
+ "attention_dropout": 0.0,
10
+ "begin_suppress_tokens": [
11
+ 220,
12
+ 50257
13
+ ],
14
+ "bos_token_id": 50257,
15
+ "classifier_proj_size": 256,
16
+ "d_model": 1024,
17
+ "decoder_attention_heads": 16,
18
+ "decoder_ffn_dim": 4096,
19
+ "decoder_layerdrop": 0.0,
20
+ "decoder_layers": 24,
21
+ "decoder_start_token_id": 50258,
22
+ "dropout": 0.0,
23
+ "encoder_attention_heads": 16,
24
+ "encoder_ffn_dim": 4096,
25
+ "encoder_layerdrop": 0.0,
26
+ "encoder_layers": 24,
27
+ "eos_token_id": 50257,
28
+ "forced_decoder_ids": null,
29
+ "init_std": 0.02,
30
+ "is_encoder_decoder": true,
31
+ "mask_feature_length": 10,
32
+ "mask_feature_min_masks": 0,
33
+ "mask_feature_prob": 0.0,
34
+ "mask_time_length": 10,
35
+ "mask_time_min_masks": 2,
36
+ "mask_time_prob": 0.05,
37
+ "max_length": 448,
38
+ "max_source_positions": 1500,
39
+ "max_target_positions": 448,
40
+ "model_type": "whisper",
41
+ "num_hidden_layers": 24,
42
+ "num_mel_bins": 80,
43
+ "pad_token_id": 50257,
44
+ "scale_embedding": false,
45
+ "torch_dtype": "float32",
46
+ "transformers_version": "4.28.0.dev0",
47
+ "use_cache": false,
48
+ "use_weighted_layer_sum": false,
49
+ "vocab_size": 51865
50
+ }
checkpoint-26325/generation_config.json ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "begin_suppress_tokens": [
3
+ 220,
4
+ 50257
5
+ ],
6
+ "bos_token_id": 50257,
7
+ "decoder_start_token_id": 50258,
8
+ "eos_token_id": 50257,
9
+ "forced_decoder_ids": [
10
+ [
11
+ 1,
12
+ null
13
+ ],
14
+ [
15
+ 2,
16
+ 50359
17
+ ]
18
+ ],
19
+ "is_multilingual": true,
20
+ "lang_to_id": {
21
+ "<|af|>": 50327,
22
+ "<|am|>": 50334,
23
+ "<|ar|>": 50272,
24
+ "<|as|>": 50350,
25
+ "<|az|>": 50304,
26
+ "<|ba|>": 50355,
27
+ "<|be|>": 50330,
28
+ "<|bg|>": 50292,
29
+ "<|bn|>": 50302,
30
+ "<|bo|>": 50347,
31
+ "<|br|>": 50309,
32
+ "<|bs|>": 50315,
33
+ "<|ca|>": 50270,
34
+ "<|cs|>": 50283,
35
+ "<|cy|>": 50297,
36
+ "<|da|>": 50285,
37
+ "<|de|>": 50261,
38
+ "<|el|>": 50281,
39
+ "<|en|>": 50259,
40
+ "<|es|>": 50262,
41
+ "<|et|>": 50307,
42
+ "<|eu|>": 50310,
43
+ "<|fa|>": 50300,
44
+ "<|fi|>": 50277,
45
+ "<|fo|>": 50338,
46
+ "<|fr|>": 50265,
47
+ "<|gl|>": 50319,
48
+ "<|gu|>": 50333,
49
+ "<|haw|>": 50352,
50
+ "<|ha|>": 50354,
51
+ "<|he|>": 50279,
52
+ "<|hi|>": 50276,
53
+ "<|hr|>": 50291,
54
+ "<|ht|>": 50339,
55
+ "<|hu|>": 50286,
56
+ "<|hy|>": 50312,
57
+ "<|id|>": 50275,
58
+ "<|is|>": 50311,
59
+ "<|it|>": 50274,
60
+ "<|ja|>": 50266,
61
+ "<|jw|>": 50356,
62
+ "<|ka|>": 50329,
63
+ "<|kk|>": 50316,
64
+ "<|km|>": 50323,
65
+ "<|kn|>": 50306,
66
+ "<|ko|>": 50264,
67
+ "<|la|>": 50294,
68
+ "<|lb|>": 50345,
69
+ "<|ln|>": 50353,
70
+ "<|lo|>": 50336,
71
+ "<|lt|>": 50293,
72
+ "<|lv|>": 50301,
73
+ "<|mg|>": 50349,
74
+ "<|mi|>": 50295,
75
+ "<|mk|>": 50308,
76
+ "<|ml|>": 50296,
77
+ "<|mn|>": 50314,
78
+ "<|mr|>": 50320,
79
+ "<|ms|>": 50282,
80
+ "<|mt|>": 50343,
81
+ "<|my|>": 50346,
82
+ "<|ne|>": 50313,
83
+ "<|nl|>": 50271,
84
+ "<|nn|>": 50342,
85
+ "<|no|>": 50288,
86
+ "<|oc|>": 50328,
87
+ "<|pa|>": 50321,
88
+ "<|pl|>": 50269,
89
+ "<|ps|>": 50340,
90
+ "<|pt|>": 50267,
91
+ "<|ro|>": 50284,
92
+ "<|ru|>": 50263,
93
+ "<|sa|>": 50344,
94
+ "<|sd|>": 50332,
95
+ "<|si|>": 50322,
96
+ "<|sk|>": 50298,
97
+ "<|sl|>": 50305,
98
+ "<|sn|>": 50324,
99
+ "<|so|>": 50326,
100
+ "<|sq|>": 50317,
101
+ "<|sr|>": 50303,
102
+ "<|su|>": 50357,
103
+ "<|sv|>": 50273,
104
+ "<|sw|>": 50318,
105
+ "<|ta|>": 50287,
106
+ "<|te|>": 50299,
107
+ "<|tg|>": 50331,
108
+ "<|th|>": 50289,
109
+ "<|tk|>": 50341,
110
+ "<|tl|>": 50348,
111
+ "<|tr|>": 50268,
112
+ "<|tt|>": 50351,
113
+ "<|uk|>": 50280,
114
+ "<|ur|>": 50290,
115
+ "<|uz|>": 50337,
116
+ "<|vi|>": 50278,
117
+ "<|yi|>": 50335,
118
+ "<|yo|>": 50325,
119
+ "<|zh|>": 50260
120
+ },
121
+ "max_initial_timestamp_index": 1,
122
+ "max_length": 448,
123
+ "no_timestamps_token_id": 50363,
124
+ "pad_token_id": 50257,
125
+ "return_timestamps": false,
126
+ "suppress_tokens": [
127
+ 1,
128
+ 2,
129
+ 7,
130
+ 8,
131
+ 9,
132
+ 10,
133
+ 14,
134
+ 25,
135
+ 26,
136
+ 27,
137
+ 28,
138
+ 29,
139
+ 31,
140
+ 58,
141
+ 59,
142
+ 60,
143
+ 61,
144
+ 62,
145
+ 63,
146
+ 90,
147
+ 91,
148
+ 92,
149
+ 93,
150
+ 359,
151
+ 503,
152
+ 522,
153
+ 542,
154
+ 873,
155
+ 893,
156
+ 902,
157
+ 918,
158
+ 922,
159
+ 931,
160
+ 1350,
161
+ 1853,
162
+ 1982,
163
+ 2460,
164
+ 2627,
165
+ 3246,
166
+ 3253,
167
+ 3268,
168
+ 3536,
169
+ 3846,
170
+ 3961,
171
+ 4183,
172
+ 4667,
173
+ 6585,
174
+ 6647,
175
+ 7273,
176
+ 9061,
177
+ 9383,
178
+ 10428,
179
+ 10929,
180
+ 11938,
181
+ 12033,
182
+ 12331,
183
+ 12562,
184
+ 13793,
185
+ 14157,
186
+ 14635,
187
+ 15265,
188
+ 15618,
189
+ 16553,
190
+ 16604,
191
+ 18362,
192
+ 18956,
193
+ 20075,
194
+ 21675,
195
+ 22520,
196
+ 26130,
197
+ 26161,
198
+ 26435,
199
+ 28279,
200
+ 29464,
201
+ 31650,
202
+ 32302,
203
+ 32470,
204
+ 36865,
205
+ 42863,
206
+ 47425,
207
+ 49870,
208
+ 50254,
209
+ 50258,
210
+ 50358,
211
+ 50359,
212
+ 50360,
213
+ 50361,
214
+ 50362
215
+ ],
216
+ "task_to_id": {
217
+ "transcribe": 50359,
218
+ "translate": 50358
219
+ },
220
+ "transformers_version": "4.28.0.dev0"
221
+ }
checkpoint-26325/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3c8d38b37d54a3faa187715cb760189ac0ecb8f92c43cf73667f9b15a1386e5
3
+ size 6111428695
checkpoint-26325/preprocessor_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "chunk_length": 30,
3
+ "feature_extractor_type": "WhisperFeatureExtractor",
4
+ "feature_size": 80,
5
+ "hop_length": 160,
6
+ "n_fft": 400,
7
+ "n_samples": 480000,
8
+ "nb_max_frames": 3000,
9
+ "padding_side": "right",
10
+ "padding_value": 0.0,
11
+ "processor_class": "WhisperProcessor",
12
+ "return_attention_mask": false,
13
+ "sampling_rate": 16000
14
+ }
checkpoint-26325/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3b4805f3b36d5d4addebb0a95d8e7bd2556e30e4101757b18ef74210bbbd9ad
3
+ size 3055754841
checkpoint-26325/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d75b09de0dd7793a3154f38806084ec9d7ab6fd9f85b13391ebf48dd7b8083ca
3
+ size 14575
checkpoint-26325/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:501ccca2e3bd6639af46bface336f198c3660f95c06f4eb059f15e50de6d2a38
3
+ size 557
checkpoint-26325/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83aefff223857ac9944297f37d671ca469b223d8cbd562549afd7649af46c6ff
3
+ size 627
checkpoint-26325/trainer_state.json ADDED
@@ -0,0 +1,829 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
+ "global_step": 26325,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.02,
12
+ "learning_rate": 9.962631077216398e-06,
13
+ "loss": 0.1958,
14
+ "step": 200
15
+ },
16
+ {
17
+ "epoch": 0.05,
18
+ "learning_rate": 9.886367969494757e-06,
19
+ "loss": 0.1228,
20
+ "step": 400
21
+ },
22
+ {
23
+ "epoch": 0.07,
24
+ "learning_rate": 9.810104861773118e-06,
25
+ "loss": 0.1051,
26
+ "step": 600
27
+ },
28
+ {
29
+ "epoch": 0.09,
30
+ "learning_rate": 9.733841754051478e-06,
31
+ "loss": 0.0977,
32
+ "step": 800
33
+ },
34
+ {
35
+ "epoch": 0.11,
36
+ "learning_rate": 9.657578646329838e-06,
37
+ "loss": 0.0891,
38
+ "step": 1000
39
+ },
40
+ {
41
+ "epoch": 0.14,
42
+ "learning_rate": 9.581315538608199e-06,
43
+ "loss": 0.0827,
44
+ "step": 1200
45
+ },
46
+ {
47
+ "epoch": 0.16,
48
+ "learning_rate": 9.50505243088656e-06,
49
+ "loss": 0.0822,
50
+ "step": 1400
51
+ },
52
+ {
53
+ "epoch": 0.18,
54
+ "learning_rate": 9.42878932316492e-06,
55
+ "loss": 0.0796,
56
+ "step": 1600
57
+ },
58
+ {
59
+ "epoch": 0.21,
60
+ "learning_rate": 9.352526215443281e-06,
61
+ "loss": 0.0777,
62
+ "step": 1800
63
+ },
64
+ {
65
+ "epoch": 0.23,
66
+ "learning_rate": 9.27626310772164e-06,
67
+ "loss": 0.0769,
68
+ "step": 2000
69
+ },
70
+ {
71
+ "epoch": 0.25,
72
+ "learning_rate": 9.200000000000002e-06,
73
+ "loss": 0.0736,
74
+ "step": 2200
75
+ },
76
+ {
77
+ "epoch": 0.27,
78
+ "learning_rate": 9.123736892278361e-06,
79
+ "loss": 0.0721,
80
+ "step": 2400
81
+ },
82
+ {
83
+ "epoch": 0.3,
84
+ "learning_rate": 9.047473784556723e-06,
85
+ "loss": 0.0748,
86
+ "step": 2600
87
+ },
88
+ {
89
+ "epoch": 0.32,
90
+ "learning_rate": 8.971210676835082e-06,
91
+ "loss": 0.0736,
92
+ "step": 2800
93
+ },
94
+ {
95
+ "epoch": 0.34,
96
+ "learning_rate": 8.894947569113442e-06,
97
+ "loss": 0.0714,
98
+ "step": 3000
99
+ },
100
+ {
101
+ "epoch": 0.36,
102
+ "learning_rate": 8.818684461391803e-06,
103
+ "loss": 0.0718,
104
+ "step": 3200
105
+ },
106
+ {
107
+ "epoch": 0.39,
108
+ "learning_rate": 8.742421353670163e-06,
109
+ "loss": 0.0729,
110
+ "step": 3400
111
+ },
112
+ {
113
+ "epoch": 0.41,
114
+ "learning_rate": 8.666158245948522e-06,
115
+ "loss": 0.0711,
116
+ "step": 3600
117
+ },
118
+ {
119
+ "epoch": 0.43,
120
+ "learning_rate": 8.589895138226883e-06,
121
+ "loss": 0.0663,
122
+ "step": 3800
123
+ },
124
+ {
125
+ "epoch": 0.46,
126
+ "learning_rate": 8.513632030505243e-06,
127
+ "loss": 0.0675,
128
+ "step": 4000
129
+ },
130
+ {
131
+ "epoch": 0.48,
132
+ "learning_rate": 8.437368922783604e-06,
133
+ "loss": 0.0689,
134
+ "step": 4200
135
+ },
136
+ {
137
+ "epoch": 0.5,
138
+ "learning_rate": 8.361105815061964e-06,
139
+ "loss": 0.0685,
140
+ "step": 4400
141
+ },
142
+ {
143
+ "epoch": 0.52,
144
+ "learning_rate": 8.284842707340325e-06,
145
+ "loss": 0.0663,
146
+ "step": 4600
147
+ },
148
+ {
149
+ "epoch": 0.55,
150
+ "learning_rate": 8.208579599618686e-06,
151
+ "loss": 0.0651,
152
+ "step": 4800
153
+ },
154
+ {
155
+ "epoch": 0.57,
156
+ "learning_rate": 8.132316491897046e-06,
157
+ "loss": 0.0652,
158
+ "step": 5000
159
+ },
160
+ {
161
+ "epoch": 0.59,
162
+ "learning_rate": 8.056053384175407e-06,
163
+ "loss": 0.0653,
164
+ "step": 5200
165
+ },
166
+ {
167
+ "epoch": 0.62,
168
+ "learning_rate": 7.979790276453767e-06,
169
+ "loss": 0.0656,
170
+ "step": 5400
171
+ },
172
+ {
173
+ "epoch": 0.64,
174
+ "learning_rate": 7.903527168732126e-06,
175
+ "loss": 0.0635,
176
+ "step": 5600
177
+ },
178
+ {
179
+ "epoch": 0.66,
180
+ "learning_rate": 7.827264061010486e-06,
181
+ "loss": 0.061,
182
+ "step": 5800
183
+ },
184
+ {
185
+ "epoch": 0.68,
186
+ "learning_rate": 7.751000953288847e-06,
187
+ "loss": 0.0637,
188
+ "step": 6000
189
+ },
190
+ {
191
+ "epoch": 0.71,
192
+ "learning_rate": 7.674737845567207e-06,
193
+ "loss": 0.0621,
194
+ "step": 6200
195
+ },
196
+ {
197
+ "epoch": 0.73,
198
+ "learning_rate": 7.598856053384176e-06,
199
+ "loss": 0.0657,
200
+ "step": 6400
201
+ },
202
+ {
203
+ "epoch": 0.75,
204
+ "learning_rate": 7.522592945662537e-06,
205
+ "loss": 0.0618,
206
+ "step": 6600
207
+ },
208
+ {
209
+ "epoch": 0.77,
210
+ "learning_rate": 7.446329837940897e-06,
211
+ "loss": 0.0617,
212
+ "step": 6800
213
+ },
214
+ {
215
+ "epoch": 0.8,
216
+ "learning_rate": 7.370066730219257e-06,
217
+ "loss": 0.0623,
218
+ "step": 7000
219
+ },
220
+ {
221
+ "epoch": 0.82,
222
+ "learning_rate": 7.293803622497617e-06,
223
+ "loss": 0.0581,
224
+ "step": 7200
225
+ },
226
+ {
227
+ "epoch": 0.84,
228
+ "learning_rate": 7.217540514775978e-06,
229
+ "loss": 0.0612,
230
+ "step": 7400
231
+ },
232
+ {
233
+ "epoch": 0.87,
234
+ "learning_rate": 7.141277407054338e-06,
235
+ "loss": 0.0584,
236
+ "step": 7600
237
+ },
238
+ {
239
+ "epoch": 0.89,
240
+ "learning_rate": 7.065014299332699e-06,
241
+ "loss": 0.0595,
242
+ "step": 7800
243
+ },
244
+ {
245
+ "epoch": 0.91,
246
+ "learning_rate": 6.9887511916110584e-06,
247
+ "loss": 0.0605,
248
+ "step": 8000
249
+ },
250
+ {
251
+ "epoch": 0.93,
252
+ "learning_rate": 6.912488083889419e-06,
253
+ "loss": 0.0584,
254
+ "step": 8200
255
+ },
256
+ {
257
+ "epoch": 0.96,
258
+ "learning_rate": 6.836606291706388e-06,
259
+ "loss": 0.06,
260
+ "step": 8400
261
+ },
262
+ {
263
+ "epoch": 0.98,
264
+ "learning_rate": 6.760343183984748e-06,
265
+ "loss": 0.0589,
266
+ "step": 8600
267
+ },
268
+ {
269
+ "epoch": 1.0,
270
+ "eval_loss": 0.12254729866981506,
271
+ "eval_runtime": 5547.0518,
272
+ "eval_samples_per_second": 6.063,
273
+ "eval_steps_per_second": 0.189,
274
+ "eval_wer": 0.06041116269525739,
275
+ "step": 8775
276
+ },
277
+ {
278
+ "epoch": 1.0,
279
+ "learning_rate": 6.684080076263109e-06,
280
+ "loss": 0.0547,
281
+ "step": 8800
282
+ },
283
+ {
284
+ "epoch": 1.03,
285
+ "learning_rate": 6.6078169685414686e-06,
286
+ "loss": 0.0363,
287
+ "step": 9000
288
+ },
289
+ {
290
+ "epoch": 1.05,
291
+ "learning_rate": 6.531553860819829e-06,
292
+ "loss": 0.0357,
293
+ "step": 9200
294
+ },
295
+ {
296
+ "epoch": 1.07,
297
+ "learning_rate": 6.4552907530981886e-06,
298
+ "loss": 0.0375,
299
+ "step": 9400
300
+ },
301
+ {
302
+ "epoch": 1.09,
303
+ "learning_rate": 6.37902764537655e-06,
304
+ "loss": 0.0347,
305
+ "step": 9600
306
+ },
307
+ {
308
+ "epoch": 1.12,
309
+ "learning_rate": 6.302764537654909e-06,
310
+ "loss": 0.0347,
311
+ "step": 9800
312
+ },
313
+ {
314
+ "epoch": 1.14,
315
+ "learning_rate": 6.226501429933271e-06,
316
+ "loss": 0.0342,
317
+ "step": 10000
318
+ },
319
+ {
320
+ "epoch": 1.16,
321
+ "learning_rate": 6.15023832221163e-06,
322
+ "loss": 0.0358,
323
+ "step": 10200
324
+ },
325
+ {
326
+ "epoch": 1.19,
327
+ "learning_rate": 6.073975214489991e-06,
328
+ "loss": 0.0344,
329
+ "step": 10400
330
+ },
331
+ {
332
+ "epoch": 1.21,
333
+ "learning_rate": 5.997712106768351e-06,
334
+ "loss": 0.0333,
335
+ "step": 10600
336
+ },
337
+ {
338
+ "epoch": 1.23,
339
+ "learning_rate": 5.9218303145853195e-06,
340
+ "loss": 0.0357,
341
+ "step": 10800
342
+ },
343
+ {
344
+ "epoch": 1.25,
345
+ "learning_rate": 5.845567206863681e-06,
346
+ "loss": 0.0355,
347
+ "step": 11000
348
+ },
349
+ {
350
+ "epoch": 1.28,
351
+ "learning_rate": 5.76930409914204e-06,
352
+ "loss": 0.0348,
353
+ "step": 11200
354
+ },
355
+ {
356
+ "epoch": 1.3,
357
+ "learning_rate": 5.693040991420401e-06,
358
+ "loss": 0.0339,
359
+ "step": 11400
360
+ },
361
+ {
362
+ "epoch": 1.32,
363
+ "learning_rate": 5.61677788369876e-06,
364
+ "loss": 0.0344,
365
+ "step": 11600
366
+ },
367
+ {
368
+ "epoch": 1.34,
369
+ "learning_rate": 5.540514775977122e-06,
370
+ "loss": 0.0345,
371
+ "step": 11800
372
+ },
373
+ {
374
+ "epoch": 1.37,
375
+ "learning_rate": 5.464251668255481e-06,
376
+ "loss": 0.0329,
377
+ "step": 12000
378
+ },
379
+ {
380
+ "epoch": 1.39,
381
+ "learning_rate": 5.3879885605338424e-06,
382
+ "loss": 0.0334,
383
+ "step": 12200
384
+ },
385
+ {
386
+ "epoch": 1.41,
387
+ "learning_rate": 5.311725452812202e-06,
388
+ "loss": 0.0355,
389
+ "step": 12400
390
+ },
391
+ {
392
+ "epoch": 1.44,
393
+ "learning_rate": 5.235462345090562e-06,
394
+ "loss": 0.0338,
395
+ "step": 12600
396
+ },
397
+ {
398
+ "epoch": 1.46,
399
+ "learning_rate": 5.159199237368924e-06,
400
+ "loss": 0.0339,
401
+ "step": 12800
402
+ },
403
+ {
404
+ "epoch": 1.48,
405
+ "learning_rate": 5.082936129647283e-06,
406
+ "loss": 0.0357,
407
+ "step": 13000
408
+ },
409
+ {
410
+ "epoch": 1.5,
411
+ "learning_rate": 5.0066730219256445e-06,
412
+ "loss": 0.0323,
413
+ "step": 13200
414
+ },
415
+ {
416
+ "epoch": 1.53,
417
+ "learning_rate": 4.930409914204004e-06,
418
+ "loss": 0.034,
419
+ "step": 13400
420
+ },
421
+ {
422
+ "epoch": 1.55,
423
+ "learning_rate": 4.8541468064823645e-06,
424
+ "loss": 0.0333,
425
+ "step": 13600
426
+ },
427
+ {
428
+ "epoch": 1.57,
429
+ "learning_rate": 4.777883698760725e-06,
430
+ "loss": 0.034,
431
+ "step": 13800
432
+ },
433
+ {
434
+ "epoch": 1.6,
435
+ "learning_rate": 4.701620591039085e-06,
436
+ "loss": 0.033,
437
+ "step": 14000
438
+ },
439
+ {
440
+ "epoch": 1.62,
441
+ "learning_rate": 4.625357483317446e-06,
442
+ "loss": 0.0359,
443
+ "step": 14200
444
+ },
445
+ {
446
+ "epoch": 1.64,
447
+ "learning_rate": 4.549475691134414e-06,
448
+ "loss": 0.0339,
449
+ "step": 14400
450
+ },
451
+ {
452
+ "epoch": 1.66,
453
+ "learning_rate": 4.473212583412775e-06,
454
+ "loss": 0.0343,
455
+ "step": 14600
456
+ },
457
+ {
458
+ "epoch": 1.69,
459
+ "learning_rate": 4.396949475691134e-06,
460
+ "loss": 0.0335,
461
+ "step": 14800
462
+ },
463
+ {
464
+ "epoch": 1.71,
465
+ "learning_rate": 4.320686367969495e-06,
466
+ "loss": 0.0327,
467
+ "step": 15000
468
+ },
469
+ {
470
+ "epoch": 1.73,
471
+ "learning_rate": 4.244423260247855e-06,
472
+ "loss": 0.033,
473
+ "step": 15200
474
+ },
475
+ {
476
+ "epoch": 1.75,
477
+ "learning_rate": 4.1681601525262155e-06,
478
+ "loss": 0.0339,
479
+ "step": 15400
480
+ },
481
+ {
482
+ "epoch": 1.78,
483
+ "learning_rate": 4.091897044804577e-06,
484
+ "loss": 0.0334,
485
+ "step": 15600
486
+ },
487
+ {
488
+ "epoch": 1.8,
489
+ "learning_rate": 4.015633937082936e-06,
490
+ "loss": 0.0306,
491
+ "step": 15800
492
+ },
493
+ {
494
+ "epoch": 1.82,
495
+ "learning_rate": 3.939370829361297e-06,
496
+ "loss": 0.0315,
497
+ "step": 16000
498
+ },
499
+ {
500
+ "epoch": 1.85,
501
+ "learning_rate": 3.863107721639657e-06,
502
+ "loss": 0.0326,
503
+ "step": 16200
504
+ },
505
+ {
506
+ "epoch": 1.87,
507
+ "learning_rate": 3.7868446139180175e-06,
508
+ "loss": 0.0309,
509
+ "step": 16400
510
+ },
511
+ {
512
+ "epoch": 1.89,
513
+ "learning_rate": 3.710581506196378e-06,
514
+ "loss": 0.0321,
515
+ "step": 16600
516
+ },
517
+ {
518
+ "epoch": 1.91,
519
+ "learning_rate": 3.6343183984747384e-06,
520
+ "loss": 0.033,
521
+ "step": 16800
522
+ },
523
+ {
524
+ "epoch": 1.94,
525
+ "learning_rate": 3.5580552907530984e-06,
526
+ "loss": 0.0338,
527
+ "step": 17000
528
+ },
529
+ {
530
+ "epoch": 1.96,
531
+ "learning_rate": 3.4817921830314588e-06,
532
+ "loss": 0.0319,
533
+ "step": 17200
534
+ },
535
+ {
536
+ "epoch": 1.98,
537
+ "learning_rate": 3.405529075309819e-06,
538
+ "loss": 0.031,
539
+ "step": 17400
540
+ },
541
+ {
542
+ "epoch": 2.0,
543
+ "eval_loss": 0.11860840022563934,
544
+ "eval_runtime": 5539.7182,
545
+ "eval_samples_per_second": 6.071,
546
+ "eval_steps_per_second": 0.19,
547
+ "eval_wer": 0.056729379878763,
548
+ "step": 17550
549
+ },
550
+ {
551
+ "epoch": 2.01,
552
+ "learning_rate": 3.3292659675881796e-06,
553
+ "loss": 0.0288,
554
+ "step": 17600
555
+ },
556
+ {
557
+ "epoch": 2.03,
558
+ "learning_rate": 3.2530028598665396e-06,
559
+ "loss": 0.0175,
560
+ "step": 17800
561
+ },
562
+ {
563
+ "epoch": 2.05,
564
+ "learning_rate": 3.1767397521449e-06,
565
+ "loss": 0.0172,
566
+ "step": 18000
567
+ },
568
+ {
569
+ "epoch": 2.07,
570
+ "learning_rate": 3.1004766444232604e-06,
571
+ "loss": 0.0168,
572
+ "step": 18200
573
+ },
574
+ {
575
+ "epoch": 2.1,
576
+ "learning_rate": 3.024213536701621e-06,
577
+ "loss": 0.0158,
578
+ "step": 18400
579
+ },
580
+ {
581
+ "epoch": 2.12,
582
+ "learning_rate": 2.9483317445185893e-06,
583
+ "loss": 0.0169,
584
+ "step": 18600
585
+ },
586
+ {
587
+ "epoch": 2.14,
588
+ "learning_rate": 2.8720686367969497e-06,
589
+ "loss": 0.0174,
590
+ "step": 18800
591
+ },
592
+ {
593
+ "epoch": 2.17,
594
+ "learning_rate": 2.79580552907531e-06,
595
+ "loss": 0.0171,
596
+ "step": 19000
597
+ },
598
+ {
599
+ "epoch": 2.19,
600
+ "learning_rate": 2.71954242135367e-06,
601
+ "loss": 0.0169,
602
+ "step": 19200
603
+ },
604
+ {
605
+ "epoch": 2.21,
606
+ "learning_rate": 2.643660629170639e-06,
607
+ "loss": 0.0181,
608
+ "step": 19400
609
+ },
610
+ {
611
+ "epoch": 2.23,
612
+ "learning_rate": 2.5673975214489994e-06,
613
+ "loss": 0.0166,
614
+ "step": 19600
615
+ },
616
+ {
617
+ "epoch": 2.26,
618
+ "learning_rate": 2.4911344137273594e-06,
619
+ "loss": 0.0159,
620
+ "step": 19800
621
+ },
622
+ {
623
+ "epoch": 2.28,
624
+ "learning_rate": 2.41487130600572e-06,
625
+ "loss": 0.0174,
626
+ "step": 20000
627
+ },
628
+ {
629
+ "epoch": 2.3,
630
+ "learning_rate": 2.3386081982840803e-06,
631
+ "loss": 0.0168,
632
+ "step": 20200
633
+ },
634
+ {
635
+ "epoch": 2.32,
636
+ "learning_rate": 2.2623450905624407e-06,
637
+ "loss": 0.0167,
638
+ "step": 20400
639
+ },
640
+ {
641
+ "epoch": 2.35,
642
+ "learning_rate": 2.1860819828408007e-06,
643
+ "loss": 0.0166,
644
+ "step": 20600
645
+ },
646
+ {
647
+ "epoch": 2.37,
648
+ "learning_rate": 2.109818875119161e-06,
649
+ "loss": 0.0166,
650
+ "step": 20800
651
+ },
652
+ {
653
+ "epoch": 2.39,
654
+ "learning_rate": 2.033555767397522e-06,
655
+ "loss": 0.016,
656
+ "step": 21000
657
+ },
658
+ {
659
+ "epoch": 2.42,
660
+ "learning_rate": 1.957292659675882e-06,
661
+ "loss": 0.0162,
662
+ "step": 21200
663
+ },
664
+ {
665
+ "epoch": 2.44,
666
+ "learning_rate": 1.8810295519542423e-06,
667
+ "loss": 0.0159,
668
+ "step": 21400
669
+ },
670
+ {
671
+ "epoch": 2.46,
672
+ "learning_rate": 1.8047664442326028e-06,
673
+ "loss": 0.0156,
674
+ "step": 21600
675
+ },
676
+ {
677
+ "epoch": 2.48,
678
+ "learning_rate": 1.728503336510963e-06,
679
+ "loss": 0.0168,
680
+ "step": 21800
681
+ },
682
+ {
683
+ "epoch": 2.51,
684
+ "learning_rate": 1.6522402287893232e-06,
685
+ "loss": 0.0167,
686
+ "step": 22000
687
+ },
688
+ {
689
+ "epoch": 2.53,
690
+ "learning_rate": 1.5759771210676836e-06,
691
+ "loss": 0.0163,
692
+ "step": 22200
693
+ },
694
+ {
695
+ "epoch": 2.55,
696
+ "learning_rate": 1.4997140133460438e-06,
697
+ "loss": 0.0161,
698
+ "step": 22400
699
+ },
700
+ {
701
+ "epoch": 2.58,
702
+ "learning_rate": 1.4234509056244042e-06,
703
+ "loss": 0.0162,
704
+ "step": 22600
705
+ },
706
+ {
707
+ "epoch": 2.6,
708
+ "learning_rate": 1.3471877979027648e-06,
709
+ "loss": 0.015,
710
+ "step": 22800
711
+ },
712
+ {
713
+ "epoch": 2.62,
714
+ "learning_rate": 1.270924690181125e-06,
715
+ "loss": 0.0157,
716
+ "step": 23000
717
+ },
718
+ {
719
+ "epoch": 2.64,
720
+ "learning_rate": 1.1946615824594855e-06,
721
+ "loss": 0.0157,
722
+ "step": 23200
723
+ },
724
+ {
725
+ "epoch": 2.67,
726
+ "learning_rate": 1.1183984747378457e-06,
727
+ "loss": 0.0153,
728
+ "step": 23400
729
+ },
730
+ {
731
+ "epoch": 2.69,
732
+ "learning_rate": 1.042135367016206e-06,
733
+ "loss": 0.0149,
734
+ "step": 23600
735
+ },
736
+ {
737
+ "epoch": 2.71,
738
+ "learning_rate": 9.662535748331745e-07,
739
+ "loss": 0.0157,
740
+ "step": 23800
741
+ },
742
+ {
743
+ "epoch": 2.74,
744
+ "learning_rate": 8.899904671115349e-07,
745
+ "loss": 0.0144,
746
+ "step": 24000
747
+ },
748
+ {
749
+ "epoch": 2.76,
750
+ "learning_rate": 8.137273593898952e-07,
751
+ "loss": 0.0164,
752
+ "step": 24200
753
+ },
754
+ {
755
+ "epoch": 2.78,
756
+ "learning_rate": 7.374642516682555e-07,
757
+ "loss": 0.0156,
758
+ "step": 24400
759
+ },
760
+ {
761
+ "epoch": 2.8,
762
+ "learning_rate": 6.612011439466159e-07,
763
+ "loss": 0.0158,
764
+ "step": 24600
765
+ },
766
+ {
767
+ "epoch": 2.83,
768
+ "learning_rate": 5.849380362249762e-07,
769
+ "loss": 0.0152,
770
+ "step": 24800
771
+ },
772
+ {
773
+ "epoch": 2.85,
774
+ "learning_rate": 5.086749285033365e-07,
775
+ "loss": 0.0149,
776
+ "step": 25000
777
+ },
778
+ {
779
+ "epoch": 2.87,
780
+ "learning_rate": 4.324118207816969e-07,
781
+ "loss": 0.0155,
782
+ "step": 25200
783
+ },
784
+ {
785
+ "epoch": 2.89,
786
+ "learning_rate": 3.5614871306005724e-07,
787
+ "loss": 0.0164,
788
+ "step": 25400
789
+ },
790
+ {
791
+ "epoch": 2.92,
792
+ "learning_rate": 2.7988560533841755e-07,
793
+ "loss": 0.0147,
794
+ "step": 25600
795
+ },
796
+ {
797
+ "epoch": 2.94,
798
+ "learning_rate": 2.0362249761677792e-07,
799
+ "loss": 0.0143,
800
+ "step": 25800
801
+ },
802
+ {
803
+ "epoch": 2.96,
804
+ "learning_rate": 1.2774070543374643e-07,
805
+ "loss": 0.0145,
806
+ "step": 26000
807
+ },
808
+ {
809
+ "epoch": 2.99,
810
+ "learning_rate": 5.147759771210677e-08,
811
+ "loss": 0.0148,
812
+ "step": 26200
813
+ },
814
+ {
815
+ "epoch": 3.0,
816
+ "eval_loss": 0.12545564770698547,
817
+ "eval_runtime": 5578.7222,
818
+ "eval_samples_per_second": 6.029,
819
+ "eval_steps_per_second": 0.188,
820
+ "eval_wer": 0.05513910619779243,
821
+ "step": 26325
822
+ }
823
+ ],
824
+ "max_steps": 26325,
825
+ "num_train_epochs": 3,
826
+ "total_flos": 6.447477588325171e+20,
827
+ "trial_name": null,
828
+ "trial_params": null
829
+ }
checkpoint-26325/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e4ebd7db27b4b10b645b0710064ae069d9dfa7caee40828967e6a896c7c4a9d
3
+ size 3771
checkpoint-8775/config.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "openai/whisper-medium",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "gelu",
5
+ "apply_spec_augment": false,
6
+ "architectures": [
7
+ "WhisperForConditionalGeneration"
8
+ ],
9
+ "attention_dropout": 0.0,
10
+ "begin_suppress_tokens": [
11
+ 220,
12
+ 50257
13
+ ],
14
+ "bos_token_id": 50257,
15
+ "classifier_proj_size": 256,
16
+ "d_model": 1024,
17
+ "decoder_attention_heads": 16,
18
+ "decoder_ffn_dim": 4096,
19
+ "decoder_layerdrop": 0.0,
20
+ "decoder_layers": 24,
21
+ "decoder_start_token_id": 50258,
22
+ "dropout": 0.0,
23
+ "encoder_attention_heads": 16,
24
+ "encoder_ffn_dim": 4096,
25
+ "encoder_layerdrop": 0.0,
26
+ "encoder_layers": 24,
27
+ "eos_token_id": 50257,
28
+ "forced_decoder_ids": null,
29
+ "init_std": 0.02,
30
+ "is_encoder_decoder": true,
31
+ "mask_feature_length": 10,
32
+ "mask_feature_min_masks": 0,
33
+ "mask_feature_prob": 0.0,
34
+ "mask_time_length": 10,
35
+ "mask_time_min_masks": 2,
36
+ "mask_time_prob": 0.05,
37
+ "max_length": 448,
38
+ "max_source_positions": 1500,
39
+ "max_target_positions": 448,
40
+ "model_type": "whisper",
41
+ "num_hidden_layers": 24,
42
+ "num_mel_bins": 80,
43
+ "pad_token_id": 50257,
44
+ "scale_embedding": false,
45
+ "torch_dtype": "float32",
46
+ "transformers_version": "4.28.0.dev0",
47
+ "use_cache": false,
48
+ "use_weighted_layer_sum": false,
49
+ "vocab_size": 51865
50
+ }
checkpoint-8775/generation_config.json ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "begin_suppress_tokens": [
3
+ 220,
4
+ 50257
5
+ ],
6
+ "bos_token_id": 50257,
7
+ "decoder_start_token_id": 50258,
8
+ "eos_token_id": 50257,
9
+ "forced_decoder_ids": [
10
+ [
11
+ 1,
12
+ null
13
+ ],
14
+ [
15
+ 2,
16
+ 50359
17
+ ]
18
+ ],
19
+ "is_multilingual": true,
20
+ "lang_to_id": {
21
+ "<|af|>": 50327,
22
+ "<|am|>": 50334,
23
+ "<|ar|>": 50272,
24
+ "<|as|>": 50350,
25
+ "<|az|>": 50304,
26
+ "<|ba|>": 50355,
27
+ "<|be|>": 50330,
28
+ "<|bg|>": 50292,
29
+ "<|bn|>": 50302,
30
+ "<|bo|>": 50347,
31
+ "<|br|>": 50309,
32
+ "<|bs|>": 50315,
33
+ "<|ca|>": 50270,
34
+ "<|cs|>": 50283,
35
+ "<|cy|>": 50297,
36
+ "<|da|>": 50285,
37
+ "<|de|>": 50261,
38
+ "<|el|>": 50281,
39
+ "<|en|>": 50259,
40
+ "<|es|>": 50262,
41
+ "<|et|>": 50307,
42
+ "<|eu|>": 50310,
43
+ "<|fa|>": 50300,
44
+ "<|fi|>": 50277,
45
+ "<|fo|>": 50338,
46
+ "<|fr|>": 50265,
47
+ "<|gl|>": 50319,
48
+ "<|gu|>": 50333,
49
+ "<|haw|>": 50352,
50
+ "<|ha|>": 50354,
51
+ "<|he|>": 50279,
52
+ "<|hi|>": 50276,
53
+ "<|hr|>": 50291,
54
+ "<|ht|>": 50339,
55
+ "<|hu|>": 50286,
56
+ "<|hy|>": 50312,
57
+ "<|id|>": 50275,
58
+ "<|is|>": 50311,
59
+ "<|it|>": 50274,
60
+ "<|ja|>": 50266,
61
+ "<|jw|>": 50356,
62
+ "<|ka|>": 50329,
63
+ "<|kk|>": 50316,
64
+ "<|km|>": 50323,
65
+ "<|kn|>": 50306,
66
+ "<|ko|>": 50264,
67
+ "<|la|>": 50294,
68
+ "<|lb|>": 50345,
69
+ "<|ln|>": 50353,
70
+ "<|lo|>": 50336,
71
+ "<|lt|>": 50293,
72
+ "<|lv|>": 50301,
73
+ "<|mg|>": 50349,
74
+ "<|mi|>": 50295,
75
+ "<|mk|>": 50308,
76
+ "<|ml|>": 50296,
77
+ "<|mn|>": 50314,
78
+ "<|mr|>": 50320,
79
+ "<|ms|>": 50282,
80
+ "<|mt|>": 50343,
81
+ "<|my|>": 50346,
82
+ "<|ne|>": 50313,
83
+ "<|nl|>": 50271,
84
+ "<|nn|>": 50342,
85
+ "<|no|>": 50288,
86
+ "<|oc|>": 50328,
87
+ "<|pa|>": 50321,
88
+ "<|pl|>": 50269,
89
+ "<|ps|>": 50340,
90
+ "<|pt|>": 50267,
91
+ "<|ro|>": 50284,
92
+ "<|ru|>": 50263,
93
+ "<|sa|>": 50344,
94
+ "<|sd|>": 50332,
95
+ "<|si|>": 50322,
96
+ "<|sk|>": 50298,
97
+ "<|sl|>": 50305,
98
+ "<|sn|>": 50324,
99
+ "<|so|>": 50326,
100
+ "<|sq|>": 50317,
101
+ "<|sr|>": 50303,
102
+ "<|su|>": 50357,
103
+ "<|sv|>": 50273,
104
+ "<|sw|>": 50318,
105
+ "<|ta|>": 50287,
106
+ "<|te|>": 50299,
107
+ "<|tg|>": 50331,
108
+ "<|th|>": 50289,
109
+ "<|tk|>": 50341,
110
+ "<|tl|>": 50348,
111
+ "<|tr|>": 50268,
112
+ "<|tt|>": 50351,
113
+ "<|uk|>": 50280,
114
+ "<|ur|>": 50290,
115
+ "<|uz|>": 50337,
116
+ "<|vi|>": 50278,
117
+ "<|yi|>": 50335,
118
+ "<|yo|>": 50325,
119
+ "<|zh|>": 50260
120
+ },
121
+ "max_initial_timestamp_index": 1,
122
+ "max_length": 448,
123
+ "no_timestamps_token_id": 50363,
124
+ "pad_token_id": 50257,
125
+ "return_timestamps": false,
126
+ "suppress_tokens": [
127
+ 1,
128
+ 2,
129
+ 7,
130
+ 8,
131
+ 9,
132
+ 10,
133
+ 14,
134
+ 25,
135
+ 26,
136
+ 27,
137
+ 28,
138
+ 29,
139
+ 31,
140
+ 58,
141
+ 59,
142
+ 60,
143
+ 61,
144
+ 62,
145
+ 63,
146
+ 90,
147
+ 91,
148
+ 92,
149
+ 93,
150
+ 359,
151
+ 503,
152
+ 522,
153
+ 542,
154
+ 873,
155
+ 893,
156
+ 902,
157
+ 918,
158
+ 922,
159
+ 931,
160
+ 1350,
161
+ 1853,
162
+ 1982,
163
+ 2460,
164
+ 2627,
165
+ 3246,
166
+ 3253,
167
+ 3268,
168
+ 3536,
169
+ 3846,
170
+ 3961,
171
+ 4183,
172
+ 4667,
173
+ 6585,
174
+ 6647,
175
+ 7273,
176
+ 9061,
177
+ 9383,
178
+ 10428,
179
+ 10929,
180
+ 11938,
181
+ 12033,
182
+ 12331,
183
+ 12562,
184
+ 13793,
185
+ 14157,
186
+ 14635,
187
+ 15265,
188
+ 15618,
189
+ 16553,
190
+ 16604,
191
+ 18362,
192
+ 18956,
193
+ 20075,
194
+ 21675,
195
+ 22520,
196
+ 26130,
197
+ 26161,
198
+ 26435,
199
+ 28279,
200
+ 29464,
201
+ 31650,
202
+ 32302,
203
+ 32470,
204
+ 36865,
205
+ 42863,
206
+ 47425,
207
+ 49870,
208
+ 50254,
209
+ 50258,
210
+ 50358,
211
+ 50359,
212
+ 50360,
213
+ 50361,
214
+ 50362
215
+ ],
216
+ "task_to_id": {
217
+ "transcribe": 50359,
218
+ "translate": 50358
219
+ },
220
+ "transformers_version": "4.28.0.dev0"
221
+ }
checkpoint-8775/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b9fbb7a8e62e7817ce8abe423f6716e1d1d09da8a1512cea8394f7851ddf98c
3
+ size 6111428695
checkpoint-8775/preprocessor_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "chunk_length": 30,
3
+ "feature_extractor_type": "WhisperFeatureExtractor",
4
+ "feature_size": 80,
5
+ "hop_length": 160,
6
+ "n_fft": 400,
7
+ "n_samples": 480000,
8
+ "nb_max_frames": 3000,
9
+ "padding_side": "right",
10
+ "padding_value": 0.0,
11
+ "processor_class": "WhisperProcessor",
12
+ "return_attention_mask": false,
13
+ "sampling_rate": 16000
14
+ }
checkpoint-8775/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ac8d89161f6c8aeaeb90e9664d4d9ea738f3ef50715a8a8a6f7fb549c46e26a
3
+ size 3055754841
checkpoint-8775/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7791687854887995a2c6d857f2f461b01cdbe591c138b81bf2640c6ad0348bff
3
+ size 14575
checkpoint-8775/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1b38fe92077f5aecf64bde325c48e0edc30faae44751436bafa1fb95c2687c8
3
+ size 557
checkpoint-8775/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e2cf90eca69726f8dda840469a0c3e7821cb49a58fad8f9b6dd90c500b14277
3
+ size 627
checkpoint-8775/trainer_state.json ADDED
@@ -0,0 +1,283 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "global_step": 8775,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.02,
12
+ "learning_rate": 9.962631077216398e-06,
13
+ "loss": 0.1958,
14
+ "step": 200
15
+ },
16
+ {
17
+ "epoch": 0.05,
18
+ "learning_rate": 9.886367969494757e-06,
19
+ "loss": 0.1228,
20
+ "step": 400
21
+ },
22
+ {
23
+ "epoch": 0.07,
24
+ "learning_rate": 9.810104861773118e-06,
25
+ "loss": 0.1051,
26
+ "step": 600
27
+ },
28
+ {
29
+ "epoch": 0.09,
30
+ "learning_rate": 9.733841754051478e-06,
31
+ "loss": 0.0977,
32
+ "step": 800
33
+ },
34
+ {
35
+ "epoch": 0.11,
36
+ "learning_rate": 9.657578646329838e-06,
37
+ "loss": 0.0891,
38
+ "step": 1000
39
+ },
40
+ {
41
+ "epoch": 0.14,
42
+ "learning_rate": 9.581315538608199e-06,
43
+ "loss": 0.0827,
44
+ "step": 1200
45
+ },
46
+ {
47
+ "epoch": 0.16,
48
+ "learning_rate": 9.50505243088656e-06,
49
+ "loss": 0.0822,
50
+ "step": 1400
51
+ },
52
+ {
53
+ "epoch": 0.18,
54
+ "learning_rate": 9.42878932316492e-06,
55
+ "loss": 0.0796,
56
+ "step": 1600
57
+ },
58
+ {
59
+ "epoch": 0.21,
60
+ "learning_rate": 9.352526215443281e-06,
61
+ "loss": 0.0777,
62
+ "step": 1800
63
+ },
64
+ {
65
+ "epoch": 0.23,
66
+ "learning_rate": 9.27626310772164e-06,
67
+ "loss": 0.0769,
68
+ "step": 2000
69
+ },
70
+ {
71
+ "epoch": 0.25,
72
+ "learning_rate": 9.200000000000002e-06,
73
+ "loss": 0.0736,
74
+ "step": 2200
75
+ },
76
+ {
77
+ "epoch": 0.27,
78
+ "learning_rate": 9.123736892278361e-06,
79
+ "loss": 0.0721,
80
+ "step": 2400
81
+ },
82
+ {
83
+ "epoch": 0.3,
84
+ "learning_rate": 9.047473784556723e-06,
85
+ "loss": 0.0748,
86
+ "step": 2600
87
+ },
88
+ {
89
+ "epoch": 0.32,
90
+ "learning_rate": 8.971210676835082e-06,
91
+ "loss": 0.0736,
92
+ "step": 2800
93
+ },
94
+ {
95
+ "epoch": 0.34,
96
+ "learning_rate": 8.894947569113442e-06,
97
+ "loss": 0.0714,
98
+ "step": 3000
99
+ },
100
+ {
101
+ "epoch": 0.36,
102
+ "learning_rate": 8.818684461391803e-06,
103
+ "loss": 0.0718,
104
+ "step": 3200
105
+ },
106
+ {
107
+ "epoch": 0.39,
108
+ "learning_rate": 8.742421353670163e-06,
109
+ "loss": 0.0729,
110
+ "step": 3400
111
+ },
112
+ {
113
+ "epoch": 0.41,
114
+ "learning_rate": 8.666158245948522e-06,
115
+ "loss": 0.0711,
116
+ "step": 3600
117
+ },
118
+ {
119
+ "epoch": 0.43,
120
+ "learning_rate": 8.589895138226883e-06,
121
+ "loss": 0.0663,
122
+ "step": 3800
123
+ },
124
+ {
125
+ "epoch": 0.46,
126
+ "learning_rate": 8.513632030505243e-06,
127
+ "loss": 0.0675,
128
+ "step": 4000
129
+ },
130
+ {
131
+ "epoch": 0.48,
132
+ "learning_rate": 8.437368922783604e-06,
133
+ "loss": 0.0689,
134
+ "step": 4200
135
+ },
136
+ {
137
+ "epoch": 0.5,
138
+ "learning_rate": 8.361105815061964e-06,
139
+ "loss": 0.0685,
140
+ "step": 4400
141
+ },
142
+ {
143
+ "epoch": 0.52,
144
+ "learning_rate": 8.284842707340325e-06,
145
+ "loss": 0.0663,
146
+ "step": 4600
147
+ },
148
+ {
149
+ "epoch": 0.55,
150
+ "learning_rate": 8.208579599618686e-06,
151
+ "loss": 0.0651,
152
+ "step": 4800
153
+ },
154
+ {
155
+ "epoch": 0.57,
156
+ "learning_rate": 8.132316491897046e-06,
157
+ "loss": 0.0652,
158
+ "step": 5000
159
+ },
160
+ {
161
+ "epoch": 0.59,
162
+ "learning_rate": 8.056053384175407e-06,
163
+ "loss": 0.0653,
164
+ "step": 5200
165
+ },
166
+ {
167
+ "epoch": 0.62,
168
+ "learning_rate": 7.979790276453767e-06,
169
+ "loss": 0.0656,
170
+ "step": 5400
171
+ },
172
+ {
173
+ "epoch": 0.64,
174
+ "learning_rate": 7.903527168732126e-06,
175
+ "loss": 0.0635,
176
+ "step": 5600
177
+ },
178
+ {
179
+ "epoch": 0.66,
180
+ "learning_rate": 7.827264061010486e-06,
181
+ "loss": 0.061,
182
+ "step": 5800
183
+ },
184
+ {
185
+ "epoch": 0.68,
186
+ "learning_rate": 7.751000953288847e-06,
187
+ "loss": 0.0637,
188
+ "step": 6000
189
+ },
190
+ {
191
+ "epoch": 0.71,
192
+ "learning_rate": 7.674737845567207e-06,
193
+ "loss": 0.0621,
194
+ "step": 6200
195
+ },
196
+ {
197
+ "epoch": 0.73,
198
+ "learning_rate": 7.598856053384176e-06,
199
+ "loss": 0.0657,
200
+ "step": 6400
201
+ },
202
+ {
203
+ "epoch": 0.75,
204
+ "learning_rate": 7.522592945662537e-06,
205
+ "loss": 0.0618,
206
+ "step": 6600
207
+ },
208
+ {
209
+ "epoch": 0.77,
210
+ "learning_rate": 7.446329837940897e-06,
211
+ "loss": 0.0617,
212
+ "step": 6800
213
+ },
214
+ {
215
+ "epoch": 0.8,
216
+ "learning_rate": 7.370066730219257e-06,
217
+ "loss": 0.0623,
218
+ "step": 7000
219
+ },
220
+ {
221
+ "epoch": 0.82,
222
+ "learning_rate": 7.293803622497617e-06,
223
+ "loss": 0.0581,
224
+ "step": 7200
225
+ },
226
+ {
227
+ "epoch": 0.84,
228
+ "learning_rate": 7.217540514775978e-06,
229
+ "loss": 0.0612,
230
+ "step": 7400
231
+ },
232
+ {
233
+ "epoch": 0.87,
234
+ "learning_rate": 7.141277407054338e-06,
235
+ "loss": 0.0584,
236
+ "step": 7600
237
+ },
238
+ {
239
+ "epoch": 0.89,
240
+ "learning_rate": 7.065014299332699e-06,
241
+ "loss": 0.0595,
242
+ "step": 7800
243
+ },
244
+ {
245
+ "epoch": 0.91,
246
+ "learning_rate": 6.9887511916110584e-06,
247
+ "loss": 0.0605,
248
+ "step": 8000
249
+ },
250
+ {
251
+ "epoch": 0.93,
252
+ "learning_rate": 6.912488083889419e-06,
253
+ "loss": 0.0584,
254
+ "step": 8200
255
+ },
256
+ {
257
+ "epoch": 0.96,
258
+ "learning_rate": 6.836606291706388e-06,
259
+ "loss": 0.06,
260
+ "step": 8400
261
+ },
262
+ {
263
+ "epoch": 0.98,
264
+ "learning_rate": 6.760343183984748e-06,
265
+ "loss": 0.0589,
266
+ "step": 8600
267
+ },
268
+ {
269
+ "epoch": 1.0,
270
+ "eval_loss": 0.12254729866981506,
271
+ "eval_runtime": 5547.0518,
272
+ "eval_samples_per_second": 6.063,
273
+ "eval_steps_per_second": 0.189,
274
+ "eval_wer": 0.06041116269525739,
275
+ "step": 8775
276
+ }
277
+ ],
278
+ "max_steps": 26325,
279
+ "num_train_epochs": 3,
280
+ "total_flos": 2.1491591961083904e+20,
281
+ "trial_name": null,
282
+ "trial_params": null
283
+ }
checkpoint-8775/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e4ebd7db27b4b10b645b0710064ae069d9dfa7caee40828967e6a896c7c4a9d
3
+ size 3771
config.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "openai/whisper-medium",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "gelu",
5
+ "apply_spec_augment": false,
6
+ "architectures": [
7
+ "WhisperForConditionalGeneration"
8
+ ],
9
+ "attention_dropout": 0.0,
10
+ "begin_suppress_tokens": [
11
+ 220,
12
+ 50257
13
+ ],
14
+ "bos_token_id": 50257,
15
+ "classifier_proj_size": 256,
16
+ "d_model": 1024,
17
+ "decoder_attention_heads": 16,
18
+ "decoder_ffn_dim": 4096,
19
+ "decoder_layerdrop": 0.0,
20
+ "decoder_layers": 24,
21
+ "decoder_start_token_id": 50258,
22
+ "dropout": 0.0,
23
+ "encoder_attention_heads": 16,
24
+ "encoder_ffn_dim": 4096,
25
+ "encoder_layerdrop": 0.0,
26
+ "encoder_layers": 24,
27
+ "eos_token_id": 50257,
28
+ "forced_decoder_ids": null,
29
+ "init_std": 0.02,
30
+ "is_encoder_decoder": true,
31
+ "mask_feature_length": 10,
32
+ "mask_feature_min_masks": 0,
33
+ "mask_feature_prob": 0.0,
34
+ "mask_time_length": 10,
35
+ "mask_time_min_masks": 2,
36
+ "mask_time_prob": 0.05,
37
+ "max_length": 448,
38
+ "max_source_positions": 1500,
39
+ "max_target_positions": 448,
40
+ "model_type": "whisper",
41
+ "num_hidden_layers": 24,
42
+ "num_mel_bins": 80,
43
+ "pad_token_id": 50257,
44
+ "scale_embedding": false,
45
+ "torch_dtype": "float32",
46
+ "transformers_version": "4.28.0.dev0",
47
+ "use_cache": false,
48
+ "use_weighted_layer_sum": false,
49
+ "vocab_size": 51865
50
+ }
generation_config.json ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "begin_suppress_tokens": [
3
+ 220,
4
+ 50257
5
+ ],
6
+ "bos_token_id": 50257,
7
+ "decoder_start_token_id": 50258,
8
+ "eos_token_id": 50257,
9
+ "forced_decoder_ids": [
10
+ [
11
+ 1,
12
+ null
13
+ ],
14
+ [
15
+ 2,
16
+ 50359
17
+ ]
18
+ ],
19
+ "is_multilingual": true,
20
+ "lang_to_id": {
21
+ "<|af|>": 50327,
22
+ "<|am|>": 50334,
23
+ "<|ar|>": 50272,
24
+ "<|as|>": 50350,
25
+ "<|az|>": 50304,
26
+ "<|ba|>": 50355,
27
+ "<|be|>": 50330,
28
+ "<|bg|>": 50292,
29
+ "<|bn|>": 50302,
30
+ "<|bo|>": 50347,
31
+ "<|br|>": 50309,
32
+ "<|bs|>": 50315,
33
+ "<|ca|>": 50270,
34
+ "<|cs|>": 50283,
35
+ "<|cy|>": 50297,
36
+ "<|da|>": 50285,
37
+ "<|de|>": 50261,
38
+ "<|el|>": 50281,
39
+ "<|en|>": 50259,
40
+ "<|es|>": 50262,
41
+ "<|et|>": 50307,
42
+ "<|eu|>": 50310,
43
+ "<|fa|>": 50300,
44
+ "<|fi|>": 50277,
45
+ "<|fo|>": 50338,
46
+ "<|fr|>": 50265,
47
+ "<|gl|>": 50319,
48
+ "<|gu|>": 50333,
49
+ "<|haw|>": 50352,
50
+ "<|ha|>": 50354,
51
+ "<|he|>": 50279,
52
+ "<|hi|>": 50276,
53
+ "<|hr|>": 50291,
54
+ "<|ht|>": 50339,
55
+ "<|hu|>": 50286,
56
+ "<|hy|>": 50312,
57
+ "<|id|>": 50275,
58
+ "<|is|>": 50311,
59
+ "<|it|>": 50274,
60
+ "<|ja|>": 50266,
61
+ "<|jw|>": 50356,
62
+ "<|ka|>": 50329,
63
+ "<|kk|>": 50316,
64
+ "<|km|>": 50323,
65
+ "<|kn|>": 50306,
66
+ "<|ko|>": 50264,
67
+ "<|la|>": 50294,
68
+ "<|lb|>": 50345,
69
+ "<|ln|>": 50353,
70
+ "<|lo|>": 50336,
71
+ "<|lt|>": 50293,
72
+ "<|lv|>": 50301,
73
+ "<|mg|>": 50349,
74
+ "<|mi|>": 50295,
75
+ "<|mk|>": 50308,
76
+ "<|ml|>": 50296,
77
+ "<|mn|>": 50314,
78
+ "<|mr|>": 50320,
79
+ "<|ms|>": 50282,
80
+ "<|mt|>": 50343,
81
+ "<|my|>": 50346,
82
+ "<|ne|>": 50313,
83
+ "<|nl|>": 50271,
84
+ "<|nn|>": 50342,
85
+ "<|no|>": 50288,
86
+ "<|oc|>": 50328,
87
+ "<|pa|>": 50321,
88
+ "<|pl|>": 50269,
89
+ "<|ps|>": 50340,
90
+ "<|pt|>": 50267,
91
+ "<|ro|>": 50284,
92
+ "<|ru|>": 50263,
93
+ "<|sa|>": 50344,
94
+ "<|sd|>": 50332,
95
+ "<|si|>": 50322,
96
+ "<|sk|>": 50298,
97
+ "<|sl|>": 50305,
98
+ "<|sn|>": 50324,
99
+ "<|so|>": 50326,
100
+ "<|sq|>": 50317,
101
+ "<|sr|>": 50303,
102
+ "<|su|>": 50357,
103
+ "<|sv|>": 50273,
104
+ "<|sw|>": 50318,
105
+ "<|ta|>": 50287,
106
+ "<|te|>": 50299,
107
+ "<|tg|>": 50331,
108
+ "<|th|>": 50289,
109
+ "<|tk|>": 50341,
110
+ "<|tl|>": 50348,
111
+ "<|tr|>": 50268,
112
+ "<|tt|>": 50351,
113
+ "<|uk|>": 50280,
114
+ "<|ur|>": 50290,
115
+ "<|uz|>": 50337,
116
+ "<|vi|>": 50278,
117
+ "<|yi|>": 50335,
118
+ "<|yo|>": 50325,
119
+ "<|zh|>": 50260
120
+ },
121
+ "max_initial_timestamp_index": 1,
122
+ "max_length": 448,
123
+ "no_timestamps_token_id": 50363,
124
+ "pad_token_id": 50257,
125
+ "return_timestamps": false,
126
+ "suppress_tokens": [
127
+ 1,
128
+ 2,
129
+ 7,
130
+ 8,
131
+ 9,
132
+ 10,
133
+ 14,
134
+ 25,
135
+ 26,
136
+ 27,
137
+ 28,
138
+ 29,
139
+ 31,
140
+ 58,
141
+ 59,
142
+ 60,
143
+ 61,
144
+ 62,
145
+ 63,
146
+ 90,
147
+ 91,
148
+ 92,
149
+ 93,
150
+ 359,
151
+ 503,
152
+ 522,
153
+ 542,
154
+ 873,
155
+ 893,
156
+ 902,
157
+ 918,
158
+ 922,
159
+ 931,
160
+ 1350,
161
+ 1853,
162
+ 1982,
163
+ 2460,
164
+ 2627,
165
+ 3246,
166
+ 3253,
167
+ 3268,
168
+ 3536,
169
+ 3846,
170
+ 3961,
171
+ 4183,
172
+ 4667,
173
+ 6585,
174
+ 6647,
175
+ 7273,
176
+ 9061,
177
+ 9383,
178
+ 10428,
179
+ 10929,
180
+ 11938,
181
+ 12033,
182
+ 12331,
183
+ 12562,
184
+ 13793,
185
+ 14157,
186
+ 14635,
187
+ 15265,
188
+ 15618,
189
+ 16553,
190
+ 16604,
191
+ 18362,
192
+ 18956,
193
+ 20075,
194
+ 21675,
195
+ 22520,
196
+ 26130,
197
+ 26161,
198
+ 26435,
199
+ 28279,
200
+ 29464,
201
+ 31650,
202
+ 32302,
203
+ 32470,
204
+ 36865,
205
+ 42863,
206
+ 47425,
207
+ 49870,
208
+ 50254,
209
+ 50258,
210
+ 50358,
211
+ 50359,
212
+ 50360,
213
+ 50361,
214
+ 50362
215
+ ],
216
+ "task_to_id": {
217
+ "transcribe": 50359,
218
+ "translate": 50358
219
+ },
220
+ "transformers_version": "4.28.0.dev0"
221
+ }
preprocessor_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "chunk_length": 30,
3
+ "feature_extractor_type": "WhisperFeatureExtractor",
4
+ "feature_size": 80,
5
+ "hop_length": 160,
6
+ "n_fft": 400,
7
+ "n_samples": 480000,
8
+ "nb_max_frames": 3000,
9
+ "padding_side": "right",
10
+ "padding_value": 0.0,
11
+ "processor_class": "WhisperProcessor",
12
+ "return_attention_mask": false,
13
+ "sampling_rate": 16000
14
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3b4805f3b36d5d4addebb0a95d8e7bd2556e30e4101757b18ef74210bbbd9ad
3
+ size 3055754841
runs/Jun01_09-47-23_50c558b7cdc4/1685612863.6569612/events.out.tfevents.1685612863.50c558b7cdc4.1505.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5827c464162d853653f1352f005002e3e72c1a66dde9209e0be2797e5dac1fc4
3
+ size 6111
runs/Jun01_09-47-23_50c558b7cdc4/events.out.tfevents.1685612863.50c558b7cdc4.1505.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dea86d2b7553dade5e16c15d1b862958498625be9acc46a86533638b5984a8e7
3
+ size 26794
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e4ebd7db27b4b10b645b0710064ae069d9dfa7caee40828967e6a896c7c4a9d
3
+ size 3771