jimregan commited on
Commit
9506fb5
1 Parent(s): d568a81
Files changed (3) hide show
  1. optimizer.pt +0 -3
  2. scheduler.pt +0 -3
  3. trainer_state.json +0 -394
optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e0fadf6bcb1f30106e97b1487a9ce13a59d541a1de164dec82e44118301582a
3
- size 2490372359
 
 
 
scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:803dc2c7dbe95fa79644abcf677114107a5f278f5f4d0b7535a7d91cd4fff228
3
- size 623
 
 
 
trainer_state.json DELETED
@@ -1,394 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 11.297071129707113,
5
- "global_step": 10800,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.42,
12
- "learning_rate": 0.00023999999999999998,
13
- "loss": 6.6124,
14
- "step": 400
15
- },
16
- {
17
- "epoch": 0.42,
18
- "eval_loss": 3.268874168395996,
19
- "eval_runtime": 47.7642,
20
- "eval_samples_per_second": 10.594,
21
- "eval_wer": 0.9499561018437226,
22
- "step": 400
23
- },
24
- {
25
- "epoch": 0.84,
26
- "learning_rate": 0.0002924547283702213,
27
- "loss": Infinity,
28
- "step": 800
29
- },
30
- {
31
- "epoch": 0.84,
32
- "eval_loss": 1.5890493392944336,
33
- "eval_runtime": 46.9733,
34
- "eval_samples_per_second": 10.772,
35
- "eval_wer": 0.9242025168276266,
36
- "step": 800
37
- },
38
- {
39
- "epoch": 1.26,
40
- "learning_rate": 0.0002823943661971831,
41
- "loss": NaN,
42
- "step": 1200
43
- },
44
- {
45
- "epoch": 1.26,
46
- "eval_loss": 1.399864673614502,
47
- "eval_runtime": 49.6014,
48
- "eval_samples_per_second": 10.201,
49
- "eval_wer": 0.8653789874158618,
50
- "step": 1200
51
- },
52
- {
53
- "epoch": 1.67,
54
- "learning_rate": 0.00027233400402414484,
55
- "loss": NaN,
56
- "step": 1600
57
- },
58
- {
59
- "epoch": 1.67,
60
- "eval_loss": 1.1861594915390015,
61
- "eval_runtime": 45.83,
62
- "eval_samples_per_second": 11.041,
63
- "eval_wer": 0.8627450980392157,
64
- "step": 1600
65
- },
66
- {
67
- "epoch": 2.09,
68
- "learning_rate": 0.0002622736418511066,
69
- "loss": NaN,
70
- "step": 2000
71
- },
72
- {
73
- "epoch": 2.09,
74
- "eval_loss": 1.1273273229599,
75
- "eval_runtime": 47.5676,
76
- "eval_samples_per_second": 10.638,
77
- "eval_wer": 0.8124085455077553,
78
- "step": 2000
79
- },
80
- {
81
- "epoch": 2.51,
82
- "learning_rate": 0.0002522132796780684,
83
- "loss": NaN,
84
- "step": 2400
85
- },
86
- {
87
- "epoch": 2.51,
88
- "eval_loss": 1.0673868656158447,
89
- "eval_runtime": 46.2685,
90
- "eval_samples_per_second": 10.936,
91
- "eval_wer": 0.8141644717588528,
92
- "step": 2400
93
- },
94
- {
95
- "epoch": 2.93,
96
- "learning_rate": 0.00024215291750503016,
97
- "loss": NaN,
98
- "step": 2800
99
- },
100
- {
101
- "epoch": 2.93,
102
- "eval_loss": 1.012987732887268,
103
- "eval_runtime": 46.0398,
104
- "eval_samples_per_second": 10.991,
105
- "eval_wer": 0.8112379280070237,
106
- "step": 2800
107
- },
108
- {
109
- "epoch": 3.35,
110
- "learning_rate": 0.00023209255533199194,
111
- "loss": NaN,
112
- "step": 3200
113
- },
114
- {
115
- "epoch": 3.35,
116
- "eval_loss": 1.0372620820999146,
117
- "eval_runtime": 47.1969,
118
- "eval_samples_per_second": 10.721,
119
- "eval_wer": 0.7860696517412935,
120
- "step": 3200
121
- },
122
- {
123
- "epoch": 3.77,
124
- "learning_rate": 0.00022203219315895372,
125
- "loss": NaN,
126
- "step": 3600
127
- },
128
- {
129
- "epoch": 3.77,
130
- "eval_loss": 0.9511893391609192,
131
- "eval_runtime": 46.2728,
132
- "eval_samples_per_second": 10.935,
133
- "eval_wer": 0.7960199004975125,
134
- "step": 3600
135
- },
136
- {
137
- "epoch": 4.18,
138
- "learning_rate": 0.00021197183098591548,
139
- "loss": NaN,
140
- "step": 4000
141
- },
142
- {
143
- "epoch": 4.18,
144
- "eval_loss": 0.9626357555389404,
145
- "eval_runtime": 46.2615,
146
- "eval_samples_per_second": 10.938,
147
- "eval_wer": 0.7573895229733685,
148
- "step": 4000
149
- },
150
- {
151
- "epoch": 4.6,
152
- "learning_rate": 0.00020191146881287726,
153
- "loss": NaN,
154
- "step": 4400
155
- },
156
- {
157
- "epoch": 4.6,
158
- "eval_loss": 0.9149179458618164,
159
- "eval_runtime": 47.2032,
160
- "eval_samples_per_second": 10.72,
161
- "eval_wer": 0.755340942347088,
162
- "step": 4400
163
- },
164
- {
165
- "epoch": 5.02,
166
- "learning_rate": 0.000191851106639839,
167
- "loss": NaN,
168
- "step": 4800
169
- },
170
- {
171
- "epoch": 5.02,
172
- "eval_loss": 0.945462703704834,
173
- "eval_runtime": 47.6471,
174
- "eval_samples_per_second": 10.62,
175
- "eval_wer": 0.7515364354697103,
176
- "step": 4800
177
- },
178
- {
179
- "epoch": 5.44,
180
- "learning_rate": 0.0001817907444668008,
181
- "loss": NaN,
182
- "step": 5200
183
- },
184
- {
185
- "epoch": 5.44,
186
- "eval_loss": 0.9152739644050598,
187
- "eval_runtime": 48.1688,
188
- "eval_samples_per_second": 10.505,
189
- "eval_wer": 0.746268656716418,
190
- "step": 5200
191
- },
192
- {
193
- "epoch": 5.86,
194
- "learning_rate": 0.00017173038229376258,
195
- "loss": NaN,
196
- "step": 5600
197
- },
198
- {
199
- "epoch": 5.86,
200
- "eval_loss": 0.9364785552024841,
201
- "eval_runtime": 47.6477,
202
- "eval_samples_per_second": 10.62,
203
- "eval_wer": 0.7260755048287972,
204
- "step": 5600
205
- },
206
- {
207
- "epoch": 6.28,
208
- "learning_rate": 0.00016167002012072433,
209
- "loss": NaN,
210
- "step": 6000
211
- },
212
- {
213
- "epoch": 6.28,
214
- "eval_loss": 0.9098660349845886,
215
- "eval_runtime": 46.4104,
216
- "eval_samples_per_second": 10.903,
217
- "eval_wer": 0.7380743342112964,
218
- "step": 6000
219
- },
220
- {
221
- "epoch": 6.69,
222
- "learning_rate": 0.0001516096579476861,
223
- "loss": NaN,
224
- "step": 6400
225
- },
226
- {
227
- "epoch": 6.69,
228
- "eval_loss": 0.9048272371292114,
229
- "eval_runtime": 46.2425,
230
- "eval_samples_per_second": 10.942,
231
- "eval_wer": 0.7205150717003219,
232
- "step": 6400
233
- },
234
- {
235
- "epoch": 7.11,
236
- "learning_rate": 0.00014154929577464787,
237
- "loss": NaN,
238
- "step": 6800
239
- },
240
- {
241
- "epoch": 7.11,
242
- "eval_loss": 0.8889923691749573,
243
- "eval_runtime": 46.6074,
244
- "eval_samples_per_second": 10.857,
245
- "eval_wer": 0.7172958735733099,
246
- "step": 6800
247
- },
248
- {
249
- "epoch": 7.53,
250
- "learning_rate": 0.00013148893360160965,
251
- "loss": NaN,
252
- "step": 7200
253
- },
254
- {
255
- "epoch": 7.53,
256
- "eval_loss": 0.870766818523407,
257
- "eval_runtime": 47.1858,
258
- "eval_samples_per_second": 10.724,
259
- "eval_wer": 0.7091015510681885,
260
- "step": 7200
261
- },
262
- {
263
- "epoch": 7.95,
264
- "learning_rate": 0.00012142857142857142,
265
- "loss": NaN,
266
- "step": 7600
267
- },
268
- {
269
- "epoch": 7.95,
270
- "eval_loss": 0.8733641505241394,
271
- "eval_runtime": 47.6751,
272
- "eval_samples_per_second": 10.614,
273
- "eval_wer": 0.6971027216856892,
274
- "step": 7600
275
- },
276
- {
277
- "epoch": 8.37,
278
- "learning_rate": 0.00011136820925553318,
279
- "loss": NaN,
280
- "step": 8000
281
- },
282
- {
283
- "epoch": 8.37,
284
- "eval_loss": 0.88031005859375,
285
- "eval_runtime": 47.4892,
286
- "eval_samples_per_second": 10.655,
287
- "eval_wer": 0.6947614866842259,
288
- "step": 8000
289
- },
290
- {
291
- "epoch": 8.79,
292
- "learning_rate": 0.00010130784708249495,
293
- "loss": NaN,
294
- "step": 8400
295
- },
296
- {
297
- "epoch": 8.79,
298
- "eval_loss": 0.8849018216133118,
299
- "eval_runtime": 47.5503,
300
- "eval_samples_per_second": 10.641,
301
- "eval_wer": 0.6930055604331284,
302
- "step": 8400
303
- },
304
- {
305
- "epoch": 9.21,
306
- "learning_rate": 9.124748490945673e-05,
307
- "loss": NaN,
308
- "step": 8800
309
- },
310
- {
311
- "epoch": 9.21,
312
- "eval_loss": 0.8564967513084412,
313
- "eval_runtime": 46.9348,
314
- "eval_samples_per_second": 10.781,
315
- "eval_wer": 0.6903716710564823,
316
- "step": 8800
317
- },
318
- {
319
- "epoch": 9.62,
320
- "learning_rate": 8.11871227364185e-05,
321
- "loss": NaN,
322
- "step": 9200
323
- },
324
- {
325
- "epoch": 9.62,
326
- "eval_loss": 0.875277042388916,
327
- "eval_runtime": 46.126,
328
- "eval_samples_per_second": 10.97,
329
- "eval_wer": 0.6871524729294703,
330
- "step": 9200
331
- },
332
- {
333
- "epoch": 10.04,
334
- "learning_rate": 7.112676056338028e-05,
335
- "loss": NaN,
336
- "step": 9600
337
- },
338
- {
339
- "epoch": 10.04,
340
- "eval_loss": 0.836927056312561,
341
- "eval_runtime": 47.5639,
342
- "eval_samples_per_second": 10.638,
343
- "eval_wer": 0.6877377816798361,
344
- "step": 9600
345
- },
346
- {
347
- "epoch": 10.46,
348
- "learning_rate": 6.106639839034204e-05,
349
- "loss": NaN,
350
- "step": 10000
351
- },
352
- {
353
- "epoch": 10.46,
354
- "eval_loss": 0.8403338193893433,
355
- "eval_runtime": 47.7824,
356
- "eval_samples_per_second": 10.59,
357
- "eval_wer": 0.6798361135498976,
358
- "step": 10000
359
- },
360
- {
361
- "epoch": 10.88,
362
- "learning_rate": 5.100603621730382e-05,
363
- "loss": NaN,
364
- "step": 10400
365
- },
366
- {
367
- "epoch": 10.88,
368
- "eval_loss": 0.8528462648391724,
369
- "eval_runtime": 47.7751,
370
- "eval_samples_per_second": 10.591,
371
- "eval_wer": 0.6865671641791045,
372
- "step": 10400
373
- },
374
- {
375
- "epoch": 11.3,
376
- "learning_rate": 4.094567404426559e-05,
377
- "loss": NaN,
378
- "step": 10800
379
- },
380
- {
381
- "epoch": 11.3,
382
- "eval_loss": 0.8435601592063904,
383
- "eval_runtime": 47.6913,
384
- "eval_samples_per_second": 10.61,
385
- "eval_wer": 0.6777875329236172,
386
- "step": 10800
387
- }
388
- ],
389
- "max_steps": 12428,
390
- "num_train_epochs": 13,
391
- "total_flos": 2.5666394845611536e+19,
392
- "trial_name": null,
393
- "trial_params": null
394
- }