mprzibilla commited on
Commit
d1517a6
1 Parent(s): 0f000cc

Training in progress, epoch 1

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f2fb35b64f668567811a213cdc1071780c91b6c53c54f8cd0099fa086852d65
3
- size 174443
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d184fe2a459d72ab4f17f9800832f7189c8809e50bf6f25e22d726b15e12b61
3
+ size 721661957
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:823124d7b3c57a5d49f76380cfdef993242c2f09379aad159cd616aa33d53e83
3
  size 377646433
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78b8af0529522cbae8a484627f1931663045d8e1f9d51e2919c7c377f92e91cf
3
  size 377646433
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78b452a196ff537133ad21db6358981ae79efa83bf1bb6fa65ea52d4f4f814fb
3
  size 14639
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c2d6b5f0154fde0c288bdb2ab6d96ec62bdc8d9fe5703db59a28c412b3261fa
3
  size 14639
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e27801f4fd62147e483469cdfce5fee96801e62edb4854e281b2150912ec391
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b63cb11ac01032c977cdc85c62fc3b92562156ab5c4471214784ecf2efc9205
3
  size 557
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ffef3b2283afa05a6f5f6370d44f20f10535bd3da41245a94669aed13c81edc8
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e89163b4d12e6402024e5086a1eecd70185a21973d01ffc02723c8dcbaa443b9
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 15.0,
5
- "global_step": 10050,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -10,262 +10,24 @@
10
  {
11
  "epoch": 1.0,
12
  "learning_rate": 9.832425638877252e-05,
13
- "loss": 95.9188,
14
  "step": 670
15
  },
16
  {
17
  "epoch": 1.0,
18
  "eval_cer": 1.0,
19
- "eval_loss": 4.503777027130127,
20
  "eval_new_wer": 1.0,
21
  "eval_old_wer": 1.0,
22
- "eval_runtime": 6.9965,
23
- "eval_samples_per_second": 25.727,
24
- "eval_steps_per_second": 3.287,
25
  "step": 670
26
- },
27
- {
28
- "epoch": 2.0,
29
- "learning_rate": 9.130708001675745e-05,
30
- "loss": 3.6163,
31
- "step": 1340
32
- },
33
- {
34
- "epoch": 2.0,
35
- "eval_cer": 1.0,
36
- "eval_loss": 3.9380106925964355,
37
- "eval_new_wer": 1.0,
38
- "eval_old_wer": 1.0,
39
- "eval_runtime": 6.9825,
40
- "eval_samples_per_second": 25.779,
41
- "eval_steps_per_second": 3.294,
42
- "step": 1340
43
- },
44
- {
45
- "epoch": 3.0,
46
- "learning_rate": 8.428990364474236e-05,
47
- "loss": 3.4075,
48
- "step": 2010
49
- },
50
- {
51
- "epoch": 3.0,
52
- "eval_cer": 1.0,
53
- "eval_loss": 3.9024274349212646,
54
- "eval_new_wer": 1.0,
55
- "eval_old_wer": 1.0,
56
- "eval_runtime": 7.1179,
57
- "eval_samples_per_second": 25.288,
58
- "eval_steps_per_second": 3.231,
59
- "step": 2010
60
- },
61
- {
62
- "epoch": 4.0,
63
- "learning_rate": 7.727272727272727e-05,
64
- "loss": 3.3579,
65
- "step": 2680
66
- },
67
- {
68
- "epoch": 4.0,
69
- "eval_cer": 1.0,
70
- "eval_loss": 3.84831166267395,
71
- "eval_new_wer": 1.0,
72
- "eval_old_wer": 1.0,
73
- "eval_runtime": 7.0506,
74
- "eval_samples_per_second": 25.53,
75
- "eval_steps_per_second": 3.262,
76
- "step": 2680
77
- },
78
- {
79
- "epoch": 5.0,
80
- "learning_rate": 7.02555509007122e-05,
81
- "loss": 3.322,
82
- "step": 3350
83
- },
84
- {
85
- "epoch": 5.0,
86
- "eval_cer": 1.0,
87
- "eval_loss": 3.8365871906280518,
88
- "eval_new_wer": 1.0,
89
- "eval_old_wer": 1.0,
90
- "eval_runtime": 7.7477,
91
- "eval_samples_per_second": 23.233,
92
- "eval_steps_per_second": 2.969,
93
- "step": 3350
94
- },
95
- {
96
- "epoch": 6.0,
97
- "learning_rate": 6.323837452869711e-05,
98
- "loss": 3.2953,
99
- "step": 4020
100
- },
101
- {
102
- "epoch": 6.0,
103
- "eval_cer": 1.0,
104
- "eval_loss": 3.838519334793091,
105
- "eval_new_wer": 1.0,
106
- "eval_old_wer": 1.0,
107
- "eval_runtime": 8.1306,
108
- "eval_samples_per_second": 22.139,
109
- "eval_steps_per_second": 2.829,
110
- "step": 4020
111
- },
112
- {
113
- "epoch": 7.0,
114
- "learning_rate": 5.622119815668203e-05,
115
- "loss": 3.2672,
116
- "step": 4690
117
- },
118
- {
119
- "epoch": 7.0,
120
- "eval_cer": 1.0,
121
- "eval_loss": 3.821115016937256,
122
- "eval_new_wer": 1.0,
123
- "eval_old_wer": 1.0,
124
- "eval_runtime": 7.1147,
125
- "eval_samples_per_second": 25.3,
126
- "eval_steps_per_second": 3.233,
127
- "step": 4690
128
- },
129
- {
130
- "epoch": 8.0,
131
- "learning_rate": 4.920402178466695e-05,
132
- "loss": 3.2428,
133
- "step": 5360
134
- },
135
- {
136
- "epoch": 8.0,
137
- "eval_cer": 1.0,
138
- "eval_loss": 3.810203790664673,
139
- "eval_new_wer": 1.0,
140
- "eval_old_wer": 1.0,
141
- "eval_runtime": 8.283,
142
- "eval_samples_per_second": 21.731,
143
- "eval_steps_per_second": 2.777,
144
- "step": 5360
145
- },
146
- {
147
- "epoch": 9.0,
148
- "learning_rate": 4.2186845412651864e-05,
149
- "loss": 3.219,
150
- "step": 6030
151
- },
152
- {
153
- "epoch": 9.0,
154
- "eval_cer": 1.0,
155
- "eval_loss": 3.7927558422088623,
156
- "eval_new_wer": 1.0,
157
- "eval_old_wer": 1.0,
158
- "eval_runtime": 8.3214,
159
- "eval_samples_per_second": 21.631,
160
- "eval_steps_per_second": 2.764,
161
- "step": 6030
162
- },
163
- {
164
- "epoch": 10.0,
165
- "learning_rate": 3.516966904063678e-05,
166
- "loss": 3.1953,
167
- "step": 6700
168
- },
169
- {
170
- "epoch": 10.0,
171
- "eval_cer": 1.0,
172
- "eval_loss": 3.765852212905884,
173
- "eval_new_wer": 1.0,
174
- "eval_old_wer": 1.0,
175
- "eval_runtime": 7.1004,
176
- "eval_samples_per_second": 25.351,
177
- "eval_steps_per_second": 3.239,
178
- "step": 6700
179
- },
180
- {
181
- "epoch": 11.0,
182
- "learning_rate": 2.8152492668621706e-05,
183
- "loss": 3.1763,
184
- "step": 7370
185
- },
186
- {
187
- "epoch": 11.0,
188
- "eval_cer": 1.0,
189
- "eval_loss": 3.7410292625427246,
190
- "eval_new_wer": 1.0,
191
- "eval_old_wer": 1.0,
192
- "eval_runtime": 7.0826,
193
- "eval_samples_per_second": 25.414,
194
- "eval_steps_per_second": 3.247,
195
- "step": 7370
196
- },
197
- {
198
- "epoch": 12.0,
199
- "learning_rate": 2.113531629660662e-05,
200
- "loss": 3.1599,
201
- "step": 8040
202
- },
203
- {
204
- "epoch": 12.0,
205
- "eval_cer": 1.0,
206
- "eval_loss": 3.718080759048462,
207
- "eval_new_wer": 1.0,
208
- "eval_old_wer": 1.0,
209
- "eval_runtime": 7.1262,
210
- "eval_samples_per_second": 25.259,
211
- "eval_steps_per_second": 3.228,
212
- "step": 8040
213
- },
214
- {
215
- "epoch": 13.0,
216
- "learning_rate": 1.4118139924591539e-05,
217
- "loss": 3.1486,
218
- "step": 8710
219
- },
220
- {
221
- "epoch": 13.0,
222
- "eval_cer": 1.0,
223
- "eval_loss": 3.698702096939087,
224
- "eval_new_wer": 1.0,
225
- "eval_old_wer": 1.0,
226
- "eval_runtime": 7.1837,
227
- "eval_samples_per_second": 25.057,
228
- "eval_steps_per_second": 3.202,
229
- "step": 8710
230
- },
231
- {
232
- "epoch": 14.0,
233
- "learning_rate": 7.100963552576456e-06,
234
- "loss": 3.1406,
235
- "step": 9380
236
- },
237
- {
238
- "epoch": 14.0,
239
- "eval_cer": 1.0,
240
- "eval_loss": 3.6906392574310303,
241
- "eval_new_wer": 1.0,
242
- "eval_old_wer": 1.0,
243
- "eval_runtime": 6.667,
244
- "eval_samples_per_second": 26.999,
245
- "eval_steps_per_second": 3.45,
246
- "step": 9380
247
- },
248
- {
249
- "epoch": 15.0,
250
- "learning_rate": 8.378718056137412e-08,
251
- "loss": 3.1362,
252
- "step": 10050
253
- },
254
- {
255
- "epoch": 15.0,
256
- "eval_cer": 1.0,
257
- "eval_loss": 3.688339948654175,
258
- "eval_new_wer": 1.0,
259
- "eval_old_wer": 1.0,
260
- "eval_runtime": 6.6264,
261
- "eval_samples_per_second": 27.164,
262
- "eval_steps_per_second": 3.471,
263
- "step": 10050
264
  }
265
  ],
266
  "max_steps": 10050,
267
  "num_train_epochs": 15,
268
- "total_flos": 7.632809641470351e+18,
269
  "trial_name": null,
270
  "trial_params": null
271
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "global_step": 670,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
10
  {
11
  "epoch": 1.0,
12
  "learning_rate": 9.832425638877252e-05,
13
+ "loss": 16.2547,
14
  "step": 670
15
  },
16
  {
17
  "epoch": 1.0,
18
  "eval_cer": 1.0,
19
+ "eval_loss": 3.4289255142211914,
20
  "eval_new_wer": 1.0,
21
  "eval_old_wer": 1.0,
22
+ "eval_runtime": 6.8022,
23
+ "eval_samples_per_second": 26.462,
24
+ "eval_steps_per_second": 3.381,
25
  "step": 670
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  }
27
  ],
28
  "max_steps": 10050,
29
  "num_train_epochs": 15,
30
+ "total_flos": 4.9786204745604096e+17,
31
  "trial_name": null,
32
  "trial_params": null
33
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d4e1ecc5fa26f638726cb7298e2320d46fa3abc13a5d3f54489ef31ef42a5be
3
  size 3387
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10f57580858b49a35243099dbdd8cae4d75b5c98e2ac927e2270c781b5291665
3
  size 3387
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:823124d7b3c57a5d49f76380cfdef993242c2f09379aad159cd616aa33d53e83
3
  size 377646433
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78b8af0529522cbae8a484627f1931663045d8e1f9d51e2919c7c377f92e91cf
3
  size 377646433
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d4e1ecc5fa26f638726cb7298e2320d46fa3abc13a5d3f54489ef31ef42a5be
3
  size 3387
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10f57580858b49a35243099dbdd8cae4d75b5c98e2ac927e2270c781b5291665
3
  size 3387