jstoone commited on
Commit
b6d23f6
1 Parent(s): 58656ba

Training in progress, step 1000

Browse files
checkpoint-1000/added_tokens.json CHANGED
@@ -17,7 +17,6 @@
17
  "<|da|>": 50285,
18
  "<|de|>": 50261,
19
  "<|el|>": 50281,
20
- "<|endoftext|>": 50257,
21
  "<|en|>": 50259,
22
  "<|es|>": 50262,
23
  "<|et|>": 50307,
@@ -30,6 +29,7 @@
30
  "<|gu|>": 50333,
31
  "<|haw|>": 50352,
32
  "<|ha|>": 50354,
 
33
  "<|hi|>": 50276,
34
  "<|hr|>": 50291,
35
  "<|ht|>": 50339,
@@ -38,7 +38,6 @@
38
  "<|id|>": 50275,
39
  "<|is|>": 50311,
40
  "<|it|>": 50274,
41
- "<|iw|>": 50279,
42
  "<|ja|>": 50266,
43
  "<|jw|>": 50356,
44
  "<|ka|>": 50329,
 
17
  "<|da|>": 50285,
18
  "<|de|>": 50261,
19
  "<|el|>": 50281,
 
20
  "<|en|>": 50259,
21
  "<|es|>": 50262,
22
  "<|et|>": 50307,
 
29
  "<|gu|>": 50333,
30
  "<|haw|>": 50352,
31
  "<|ha|>": 50354,
32
+ "<|he|>": 50279,
33
  "<|hi|>": 50276,
34
  "<|hr|>": 50291,
35
  "<|ht|>": 50339,
 
38
  "<|id|>": 50275,
39
  "<|is|>": 50311,
40
  "<|it|>": 50274,
 
41
  "<|ja|>": 50266,
42
  "<|jw|>": 50356,
43
  "<|ka|>": 50329,
checkpoint-1000/config.json CHANGED
@@ -2,6 +2,7 @@
2
  "_name_or_path": "openai/whisper-small",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
 
5
  "architectures": [
6
  "WhisperForConditionalGeneration"
7
  ],
@@ -11,6 +12,7 @@
11
  50257
12
  ],
13
  "bos_token_id": 50257,
 
14
  "d_model": 768,
15
  "decoder_attention_heads": 12,
16
  "decoder_ffn_dim": 3072,
@@ -23,20 +25,128 @@
23
  "encoder_layerdrop": 0.0,
24
  "encoder_layers": 12,
25
  "eos_token_id": 50257,
26
- "forced_decoder_ids": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  "init_std": 0.02,
28
  "is_encoder_decoder": true,
 
 
 
 
 
 
29
  "max_length": 448,
30
  "max_source_positions": 1500,
31
  "max_target_positions": 448,
 
32
  "model_type": "whisper",
33
  "num_hidden_layers": 12,
34
  "num_mel_bins": 80,
35
  "pad_token_id": 50257,
36
  "scale_embedding": false,
37
- "suppress_tokens": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  "torch_dtype": "float32",
39
- "transformers_version": "4.26.0.dev0",
40
  "use_cache": false,
 
41
  "vocab_size": 51865
42
  }
 
2
  "_name_or_path": "openai/whisper-small",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
+ "apply_spec_augment": false,
6
  "architectures": [
7
  "WhisperForConditionalGeneration"
8
  ],
 
12
  50257
13
  ],
14
  "bos_token_id": 50257,
15
+ "classifier_proj_size": 256,
16
  "d_model": 768,
17
  "decoder_attention_heads": 12,
18
  "decoder_ffn_dim": 3072,
 
25
  "encoder_layerdrop": 0.0,
26
  "encoder_layers": 12,
27
  "eos_token_id": 50257,
28
+ "forced_decoder_ids": [
29
+ [
30
+ 1,
31
+ 50259
32
+ ],
33
+ [
34
+ 2,
35
+ 50359
36
+ ],
37
+ [
38
+ 3,
39
+ 50363
40
+ ]
41
+ ],
42
  "init_std": 0.02,
43
  "is_encoder_decoder": true,
44
+ "mask_feature_length": 10,
45
+ "mask_feature_min_masks": 0,
46
+ "mask_feature_prob": 0.0,
47
+ "mask_time_length": 10,
48
+ "mask_time_min_masks": 2,
49
+ "mask_time_prob": 0.05,
50
  "max_length": 448,
51
  "max_source_positions": 1500,
52
  "max_target_positions": 448,
53
+ "median_filter_width": 7,
54
  "model_type": "whisper",
55
  "num_hidden_layers": 12,
56
  "num_mel_bins": 80,
57
  "pad_token_id": 50257,
58
  "scale_embedding": false,
59
+ "suppress_tokens": [
60
+ 1,
61
+ 2,
62
+ 7,
63
+ 8,
64
+ 9,
65
+ 10,
66
+ 14,
67
+ 25,
68
+ 26,
69
+ 27,
70
+ 28,
71
+ 29,
72
+ 31,
73
+ 58,
74
+ 59,
75
+ 60,
76
+ 61,
77
+ 62,
78
+ 63,
79
+ 90,
80
+ 91,
81
+ 92,
82
+ 93,
83
+ 359,
84
+ 503,
85
+ 522,
86
+ 542,
87
+ 873,
88
+ 893,
89
+ 902,
90
+ 918,
91
+ 922,
92
+ 931,
93
+ 1350,
94
+ 1853,
95
+ 1982,
96
+ 2460,
97
+ 2627,
98
+ 3246,
99
+ 3253,
100
+ 3268,
101
+ 3536,
102
+ 3846,
103
+ 3961,
104
+ 4183,
105
+ 4667,
106
+ 6585,
107
+ 6647,
108
+ 7273,
109
+ 9061,
110
+ 9383,
111
+ 10428,
112
+ 10929,
113
+ 11938,
114
+ 12033,
115
+ 12331,
116
+ 12562,
117
+ 13793,
118
+ 14157,
119
+ 14635,
120
+ 15265,
121
+ 15618,
122
+ 16553,
123
+ 16604,
124
+ 18362,
125
+ 18956,
126
+ 20075,
127
+ 21675,
128
+ 22520,
129
+ 26130,
130
+ 26161,
131
+ 26435,
132
+ 28279,
133
+ 29464,
134
+ 31650,
135
+ 32302,
136
+ 32470,
137
+ 36865,
138
+ 42863,
139
+ 47425,
140
+ 49870,
141
+ 50254,
142
+ 50258,
143
+ 50360,
144
+ 50361,
145
+ 50362
146
+ ],
147
  "torch_dtype": "float32",
148
+ "transformers_version": "4.32.0.dev0",
149
  "use_cache": false,
150
+ "use_weighted_layer_sum": false,
151
  "vocab_size": 51865
152
  }
checkpoint-1000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e6fe0c6f0eac88f1cc16f01d87deb654bb38ec599d516fbb65887d3c7024429
3
- size 1934161093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ada4ddfb2e769624d4488e914698e699c49a0c17440ca6153e345382f5571ba1
3
+ size 1934280442
checkpoint-1000/preprocessor_config.json CHANGED
The diff for this file is too large to render. See raw diff
 
checkpoint-1000/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8d9ba95b4975f67a26eebc5d8be7611e4bd0310df8e42d82b8ae604043c15c7
3
- size 967102601
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c11a28c6e2cecb8650e35cb93c3e324fe2dd8de15cb39c730a62fa71dabf43f
3
+ size 967102729
checkpoint-1000/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d7481ac0ac8b8ec3ef783909326ed9f9d334d4d8de966e3338d7afdfb0de1ba
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ee9453d879144260ca40e3cf2f6584e6740a68d1b433953f5d5afba4db36af9
3
  size 14575
checkpoint-1000/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39bbcd98761ccd19ac42eeb53aab539f456575da1fc475983f8133e637dfe1ba
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b01a343d67d13eb9987e91240fc97feb943af5b06c8ae7ac1339f4e27b194544
3
  size 627
checkpoint-1000/special_tokens_map.json CHANGED
@@ -22,7 +22,7 @@
22
  "<|hi|>",
23
  "<|fi|>",
24
  "<|vi|>",
25
- "<|iw|>",
26
  "<|uk|>",
27
  "<|el|>",
28
  "<|ms|>",
@@ -124,7 +124,7 @@
124
  },
125
  "pad_token": "<|endoftext|>",
126
  "unk_token": {
127
- "content": "",
128
  "lstrip": false,
129
  "normalized": true,
130
  "rstrip": false,
 
22
  "<|hi|>",
23
  "<|fi|>",
24
  "<|vi|>",
25
+ "<|he|>",
26
  "<|uk|>",
27
  "<|el|>",
28
  "<|ms|>",
 
124
  },
125
  "pad_token": "<|endoftext|>",
126
  "unk_token": {
127
+ "content": "<|endoftext|>",
128
  "lstrip": false,
129
  "normalized": true,
130
  "rstrip": false,
checkpoint-1000/tokenizer_config.json CHANGED
@@ -9,6 +9,7 @@
9
  "rstrip": false,
10
  "single_word": false
11
  },
 
12
  "eos_token": {
13
  "__type": "AddedToken",
14
  "content": "<|endoftext|>",
@@ -19,15 +20,13 @@
19
  },
20
  "errors": "replace",
21
  "model_max_length": 1024,
22
- "name_or_path": "openai/whisper-small",
23
  "pad_token": null,
24
  "processor_class": "WhisperProcessor",
25
  "return_attention_mask": false,
26
- "special_tokens_map_file": null,
27
  "tokenizer_class": "WhisperTokenizer",
28
  "unk_token": {
29
  "__type": "AddedToken",
30
- "content": "",
31
  "lstrip": false,
32
  "normalized": true,
33
  "rstrip": false,
 
9
  "rstrip": false,
10
  "single_word": false
11
  },
12
+ "clean_up_tokenization_spaces": true,
13
  "eos_token": {
14
  "__type": "AddedToken",
15
  "content": "<|endoftext|>",
 
20
  },
21
  "errors": "replace",
22
  "model_max_length": 1024,
 
23
  "pad_token": null,
24
  "processor_class": "WhisperProcessor",
25
  "return_attention_mask": false,
 
26
  "tokenizer_class": "WhisperTokenizer",
27
  "unk_token": {
28
  "__type": "AddedToken",
29
+ "content": "<|endoftext|>",
30
  "lstrip": false,
31
  "normalized": true,
32
  "rstrip": false,
checkpoint-1000/trainer_state.json CHANGED
@@ -1,265 +1,276 @@
1
  {
2
- "best_metric": 197.54339821146766,
3
- "best_model_checkpoint": "./checkpoint-1000",
4
- "epoch": 15.002,
5
  "global_step": 1000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.01,
12
- "learning_rate": 4.2e-09,
13
- "loss": 4.0163,
14
  "step": 25
15
  },
16
  {
17
- "epoch": 0.01,
18
- "learning_rate": 9.2e-09,
19
- "loss": 3.9266,
20
  "step": 50
21
  },
22
  {
23
- "epoch": 1.0,
24
- "learning_rate": 1.4199999999999998e-08,
25
- "loss": 3.8847,
26
  "step": 75
27
  },
28
  {
29
- "epoch": 1.01,
30
- "learning_rate": 1.92e-08,
31
- "loss": 3.9306,
32
  "step": 100
33
  },
34
  {
35
- "epoch": 1.01,
36
- "learning_rate": 2.4199999999999998e-08,
37
- "loss": 3.9051,
38
  "step": 125
39
  },
40
  {
41
- "epoch": 2.0,
42
- "learning_rate": 2.9199999999999997e-08,
43
- "loss": 3.8691,
44
  "step": 150
45
  },
46
  {
47
- "epoch": 2.01,
48
- "learning_rate": 3.42e-08,
49
- "loss": 3.7792,
50
  "step": 175
51
  },
52
  {
53
- "epoch": 3.0,
54
- "learning_rate": 3.92e-08,
55
- "loss": 3.7109,
56
  "step": 200
57
  },
58
  {
59
- "epoch": 3.01,
60
- "learning_rate": 4.42e-08,
61
- "loss": 3.6114,
62
  "step": 225
63
  },
64
  {
65
- "epoch": 3.01,
66
- "learning_rate": 4.92e-08,
67
- "loss": 3.475,
68
  "step": 250
69
  },
70
  {
71
- "epoch": 4.0,
72
- "learning_rate": 5.42e-08,
73
- "loss": 3.342,
74
  "step": 275
75
  },
76
  {
77
- "epoch": 4.01,
78
- "learning_rate": 5.9199999999999994e-08,
79
- "loss": 3.1764,
80
  "step": 300
81
  },
82
  {
83
- "epoch": 4.01,
84
- "learning_rate": 6.419999999999999e-08,
85
- "loss": 3.0853,
86
  "step": 325
87
  },
88
  {
89
- "epoch": 5.0,
90
- "learning_rate": 6.92e-08,
91
- "loss": 2.9605,
92
  "step": 350
93
  },
94
  {
95
- "epoch": 5.01,
96
- "learning_rate": 7.419999999999999e-08,
97
- "loss": 2.8524,
98
  "step": 375
99
  },
100
  {
101
- "epoch": 6.0,
102
- "learning_rate": 7.92e-08,
103
- "loss": 2.7282,
104
  "step": 400
105
  },
106
  {
107
- "epoch": 6.01,
108
- "learning_rate": 8.42e-08,
109
- "loss": 2.6852,
110
  "step": 425
111
  },
112
  {
113
- "epoch": 6.01,
114
- "learning_rate": 8.919999999999999e-08,
115
- "loss": 2.5189,
116
  "step": 450
117
  },
118
  {
119
- "epoch": 7.0,
120
- "learning_rate": 9.42e-08,
121
- "loss": 2.4286,
122
  "step": 475
123
  },
124
  {
125
- "epoch": 7.01,
126
- "learning_rate": 9.919999999999999e-08,
127
- "loss": 2.3425,
128
  "step": 500
129
  },
130
  {
131
- "epoch": 7.01,
132
- "learning_rate": 9.953333333333332e-08,
133
- "loss": 2.2299,
 
 
 
 
 
 
 
 
 
 
134
  "step": 525
135
  },
136
  {
137
- "epoch": 8.0,
138
- "learning_rate": 9.897777777777777e-08,
139
- "loss": 2.1313,
140
  "step": 550
141
  },
142
  {
143
- "epoch": 8.01,
144
- "learning_rate": 9.842222222222222e-08,
145
- "loss": 1.967,
146
  "step": 575
147
  },
148
  {
149
- "epoch": 9.0,
150
- "learning_rate": 9.786666666666667e-08,
151
- "loss": 1.8323,
152
  "step": 600
153
  },
154
  {
155
- "epoch": 9.01,
156
- "learning_rate": 9.73111111111111e-08,
157
- "loss": 1.7521,
158
  "step": 625
159
  },
160
  {
161
- "epoch": 9.01,
162
- "learning_rate": 9.675555555555554e-08,
163
- "loss": 1.6346,
164
  "step": 650
165
  },
166
  {
167
- "epoch": 10.0,
168
- "learning_rate": 9.619999999999999e-08,
169
- "loss": 1.5736,
170
  "step": 675
171
  },
172
  {
173
- "epoch": 10.01,
174
- "learning_rate": 9.564444444444444e-08,
175
- "loss": 1.5367,
176
  "step": 700
177
  },
178
  {
179
- "epoch": 10.01,
180
- "learning_rate": 9.508888888888888e-08,
181
- "loss": 1.4935,
182
  "step": 725
183
  },
184
  {
185
- "epoch": 11.0,
186
- "learning_rate": 9.453333333333333e-08,
187
- "loss": 1.4747,
188
  "step": 750
189
  },
190
  {
191
- "epoch": 11.01,
192
- "learning_rate": 9.397777777777778e-08,
193
- "loss": 1.4071,
194
  "step": 775
195
  },
196
  {
197
- "epoch": 12.0,
198
- "learning_rate": 9.342222222222222e-08,
199
- "loss": 1.3692,
200
  "step": 800
201
  },
202
  {
203
- "epoch": 12.01,
204
- "learning_rate": 9.286666666666666e-08,
205
- "loss": 1.383,
206
  "step": 825
207
  },
208
  {
209
- "epoch": 12.01,
210
- "learning_rate": 9.231111111111111e-08,
211
- "loss": 1.3336,
212
  "step": 850
213
  },
214
  {
215
- "epoch": 13.0,
216
- "learning_rate": 9.175555555555555e-08,
217
- "loss": 1.316,
218
  "step": 875
219
  },
220
  {
221
- "epoch": 13.01,
222
- "learning_rate": 9.12e-08,
223
- "loss": 1.2831,
224
  "step": 900
225
  },
226
  {
227
- "epoch": 14.0,
228
- "learning_rate": 9.064444444444444e-08,
229
- "loss": 1.2845,
230
  "step": 925
231
  },
232
  {
233
- "epoch": 14.01,
234
- "learning_rate": 9.008888888888888e-08,
235
- "loss": 1.2834,
236
  "step": 950
237
  },
238
  {
239
- "epoch": 14.01,
240
- "learning_rate": 8.953333333333333e-08,
241
- "loss": 1.2358,
242
  "step": 975
243
  },
244
  {
245
- "epoch": 15.0,
246
- "learning_rate": 8.897777777777778e-08,
247
- "loss": 1.22,
248
  "step": 1000
249
  },
250
  {
251
- "epoch": 15.0,
252
- "eval_loss": 1.223775029182434,
253
- "eval_runtime": 3040.7291,
254
- "eval_samples_per_second": 0.701,
255
- "eval_steps_per_second": 0.022,
256
- "eval_wer": 197.54339821146766,
 
257
  "step": 1000
258
  }
259
  ],
260
- "max_steps": 5000,
261
- "num_train_epochs": 9223372036854775807,
262
- "total_flos": 1.84088626864128e+19,
263
  "trial_name": null,
264
  "trial_params": null
265
  }
 
1
  {
2
+ "best_metric": 291.28146333837094,
3
+ "best_model_checkpoint": "./whisper-small-da/checkpoint-1000",
4
+ "epoch": 3.215434083601286,
5
  "global_step": 1000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.08,
12
+ "learning_rate": 4.4e-06,
13
+ "loss": 3.4178,
14
  "step": 25
15
  },
16
  {
17
+ "epoch": 0.16,
18
+ "learning_rate": 9.4e-06,
19
+ "loss": 1.5549,
20
  "step": 50
21
  },
22
  {
23
+ "epoch": 0.24,
24
+ "learning_rate": 9.944303797468356e-06,
25
+ "loss": 1.0382,
26
  "step": 75
27
  },
28
  {
29
+ "epoch": 0.32,
30
+ "learning_rate": 9.881012658227848e-06,
31
+ "loss": 0.8101,
32
  "step": 100
33
  },
34
  {
35
+ "epoch": 0.4,
36
+ "learning_rate": 9.817721518987343e-06,
37
+ "loss": 0.6829,
38
  "step": 125
39
  },
40
  {
41
+ "epoch": 0.48,
42
+ "learning_rate": 9.754430379746837e-06,
43
+ "loss": 0.5773,
44
  "step": 150
45
  },
46
  {
47
+ "epoch": 0.56,
48
+ "learning_rate": 9.69113924050633e-06,
49
+ "loss": 0.5259,
50
  "step": 175
51
  },
52
  {
53
+ "epoch": 0.64,
54
+ "learning_rate": 9.627848101265824e-06,
55
+ "loss": 0.5151,
56
  "step": 200
57
  },
58
  {
59
+ "epoch": 0.72,
60
+ "learning_rate": 9.564556962025318e-06,
61
+ "loss": 0.4695,
62
  "step": 225
63
  },
64
  {
65
+ "epoch": 0.8,
66
+ "learning_rate": 9.50126582278481e-06,
67
+ "loss": 0.4829,
68
  "step": 250
69
  },
70
  {
71
+ "epoch": 0.88,
72
+ "learning_rate": 9.437974683544305e-06,
73
+ "loss": 0.4487,
74
  "step": 275
75
  },
76
  {
77
+ "epoch": 0.96,
78
+ "learning_rate": 9.3746835443038e-06,
79
+ "loss": 0.4519,
80
  "step": 300
81
  },
82
  {
83
+ "epoch": 1.05,
84
+ "learning_rate": 9.311392405063292e-06,
85
+ "loss": 0.3214,
86
  "step": 325
87
  },
88
  {
89
+ "epoch": 1.13,
90
+ "learning_rate": 9.248101265822786e-06,
91
+ "loss": 0.2389,
92
  "step": 350
93
  },
94
  {
95
+ "epoch": 1.21,
96
+ "learning_rate": 9.18481012658228e-06,
97
+ "loss": 0.2392,
98
  "step": 375
99
  },
100
  {
101
+ "epoch": 1.29,
102
+ "learning_rate": 9.121518987341773e-06,
103
+ "loss": 0.232,
104
  "step": 400
105
  },
106
  {
107
+ "epoch": 1.37,
108
+ "learning_rate": 9.058227848101267e-06,
109
+ "loss": 0.228,
110
  "step": 425
111
  },
112
  {
113
+ "epoch": 1.45,
114
+ "learning_rate": 8.99493670886076e-06,
115
+ "loss": 0.2359,
116
  "step": 450
117
  },
118
  {
119
+ "epoch": 1.53,
120
+ "learning_rate": 8.931645569620254e-06,
121
+ "loss": 0.2212,
122
  "step": 475
123
  },
124
  {
125
+ "epoch": 1.61,
126
+ "learning_rate": 8.868354430379748e-06,
127
+ "loss": 0.2354,
128
  "step": 500
129
  },
130
  {
131
+ "epoch": 1.61,
132
+ "eval_loss": 0.47343894839286804,
133
+ "eval_runtime": 1085.9479,
134
+ "eval_samples_per_second": 1.989,
135
+ "eval_steps_per_second": 0.124,
136
+ "eval_wer": 302.73073114805356,
137
+ "eval_wer_ortho": 244.70214207536353,
138
+ "step": 500
139
+ },
140
+ {
141
+ "epoch": 1.69,
142
+ "learning_rate": 8.805063291139241e-06,
143
+ "loss": 0.2396,
144
  "step": 525
145
  },
146
  {
147
+ "epoch": 1.77,
148
+ "learning_rate": 8.741772151898735e-06,
149
+ "loss": 0.2306,
150
  "step": 550
151
  },
152
  {
153
+ "epoch": 1.85,
154
+ "learning_rate": 8.67848101265823e-06,
155
+ "loss": 0.2226,
156
  "step": 575
157
  },
158
  {
159
+ "epoch": 1.93,
160
+ "learning_rate": 8.615189873417722e-06,
161
+ "loss": 0.2193,
162
  "step": 600
163
  },
164
  {
165
+ "epoch": 2.01,
166
+ "learning_rate": 8.551898734177216e-06,
167
+ "loss": 0.207,
168
  "step": 625
169
  },
170
  {
171
+ "epoch": 2.09,
172
+ "learning_rate": 8.488607594936709e-06,
173
+ "loss": 0.1026,
174
  "step": 650
175
  },
176
  {
177
+ "epoch": 2.17,
178
+ "learning_rate": 8.425316455696203e-06,
179
+ "loss": 0.1064,
180
  "step": 675
181
  },
182
  {
183
+ "epoch": 2.25,
184
+ "learning_rate": 8.362025316455696e-06,
185
+ "loss": 0.1182,
186
  "step": 700
187
  },
188
  {
189
+ "epoch": 2.33,
190
+ "learning_rate": 8.29873417721519e-06,
191
+ "loss": 0.1093,
192
  "step": 725
193
  },
194
  {
195
+ "epoch": 2.41,
196
+ "learning_rate": 8.235443037974684e-06,
197
+ "loss": 0.1083,
198
  "step": 750
199
  },
200
  {
201
+ "epoch": 2.49,
202
+ "learning_rate": 8.172151898734177e-06,
203
+ "loss": 0.1056,
204
  "step": 775
205
  },
206
  {
207
+ "epoch": 2.57,
208
+ "learning_rate": 8.108860759493671e-06,
209
+ "loss": 0.1087,
210
  "step": 800
211
  },
212
  {
213
+ "epoch": 2.65,
214
+ "learning_rate": 8.045569620253165e-06,
215
+ "loss": 0.1136,
216
  "step": 825
217
  },
218
  {
219
+ "epoch": 2.73,
220
+ "learning_rate": 7.982278481012658e-06,
221
+ "loss": 0.1071,
222
  "step": 850
223
  },
224
  {
225
+ "epoch": 2.81,
226
+ "learning_rate": 7.918987341772152e-06,
227
+ "loss": 0.1132,
228
  "step": 875
229
  },
230
  {
231
+ "epoch": 2.89,
232
+ "learning_rate": 7.855696202531647e-06,
233
+ "loss": 0.0973,
234
  "step": 900
235
  },
236
  {
237
+ "epoch": 2.97,
238
+ "learning_rate": 7.79240506329114e-06,
239
+ "loss": 0.1058,
240
  "step": 925
241
  },
242
  {
243
+ "epoch": 3.05,
244
+ "learning_rate": 7.729113924050633e-06,
245
+ "loss": 0.0772,
246
  "step": 950
247
  },
248
  {
249
+ "epoch": 3.14,
250
+ "learning_rate": 7.665822784810128e-06,
251
+ "loss": 0.0521,
252
  "step": 975
253
  },
254
  {
255
+ "epoch": 3.22,
256
+ "learning_rate": 7.602531645569621e-06,
257
+ "loss": 0.0569,
258
  "step": 1000
259
  },
260
  {
261
+ "epoch": 3.22,
262
+ "eval_loss": 0.4948515295982361,
263
+ "eval_runtime": 1087.2449,
264
+ "eval_samples_per_second": 1.987,
265
+ "eval_steps_per_second": 0.124,
266
+ "eval_wer": 291.28146333837094,
267
+ "eval_wer_ortho": 196.40902694532755,
268
  "step": 1000
269
  }
270
  ],
271
+ "max_steps": 4000,
272
+ "num_train_epochs": 13,
273
+ "total_flos": 4.61044035551232e+18,
274
  "trial_name": null,
275
  "trial_params": null
276
  }
checkpoint-1000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:263926ce703608f7dde9ae8aee16b9d7f67f2e4a55171927bcbcbfd10ecdc647
3
- size 3579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae366487037b6d8e17543ba2ee751902a2bf1d5224020d70db520321aa2ff13e
3
+ size 4155
checkpoint-1000/vocab.json CHANGED
@@ -314,6 +314,7 @@
314
  ";;": 35746,
315
  "<": 27,
316
  "</": 3433,
 
317
  "=": 28,
318
  "=\"": 13114,
319
  "=\"#": 34106,
 
314
  ";;": 35746,
315
  "<": 27,
316
  "</": 3433,
317
+ "<|endoftext|>": 50257,
318
  "=": 28,
319
  "=\"": 13114,
320
  "=\"#": 34106,
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:466f195c5feb6befd08449c9414d8144362e60cc9d660b93c7528174f6f48823
3
  size 967102729
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c11a28c6e2cecb8650e35cb93c3e324fe2dd8de15cb39c730a62fa71dabf43f
3
  size 967102729
runs/Jul30_21-45-57_748a1c003589/events.out.tfevents.1690754286.748a1c003589.2285.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f48dd761216626b0d77f0c06cf316e673d65bc2bdddf6107bbc95073860d0fd
3
- size 9347
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:468c9de3e7479fb2c93e4eefe9267fba52f90d95eecb1f985a9b58781f715552
3
+ size 12858