IanA commited on
Commit
03aff71
·
1 Parent(s): 15e6c30

Delete trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +0 -532
trainer_state.json DELETED
@@ -1,532 +0,0 @@
1
- {
2
- "best_metric": 0.440588116645813,
3
- "best_model_checkpoint": "./results/checkpoint-1500",
4
- "epoch": 1.9920318725099602,
5
- "global_step": 3000,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.03,
12
- "eval_loss": 0.4968358278274536,
13
- "eval_runtime": 22.8103,
14
- "eval_samples_per_second": 29.373,
15
- "eval_steps_per_second": 29.373,
16
- "step": 50
17
- },
18
- {
19
- "epoch": 0.07,
20
- "eval_loss": 0.4962530732154846,
21
- "eval_runtime": 22.8045,
22
- "eval_samples_per_second": 29.38,
23
- "eval_steps_per_second": 29.38,
24
- "step": 100
25
- },
26
- {
27
- "epoch": 0.1,
28
- "eval_loss": 0.5020118951797485,
29
- "eval_runtime": 22.8025,
30
- "eval_samples_per_second": 29.383,
31
- "eval_steps_per_second": 29.383,
32
- "step": 150
33
- },
34
- {
35
- "epoch": 0.13,
36
- "eval_loss": 0.4915091395378113,
37
- "eval_runtime": 22.7983,
38
- "eval_samples_per_second": 29.388,
39
- "eval_steps_per_second": 29.388,
40
- "step": 200
41
- },
42
- {
43
- "epoch": 0.17,
44
- "eval_loss": 0.48744314908981323,
45
- "eval_runtime": 22.7937,
46
- "eval_samples_per_second": 29.394,
47
- "eval_steps_per_second": 29.394,
48
- "step": 250
49
- },
50
- {
51
- "epoch": 0.2,
52
- "eval_loss": 0.48627975583076477,
53
- "eval_runtime": 22.8,
54
- "eval_samples_per_second": 29.386,
55
- "eval_steps_per_second": 29.386,
56
- "step": 300
57
- },
58
- {
59
- "epoch": 0.23,
60
- "eval_loss": 0.47990691661834717,
61
- "eval_runtime": 22.793,
62
- "eval_samples_per_second": 29.395,
63
- "eval_steps_per_second": 29.395,
64
- "step": 350
65
- },
66
- {
67
- "epoch": 0.27,
68
- "eval_loss": 0.47654837369918823,
69
- "eval_runtime": 22.799,
70
- "eval_samples_per_second": 29.387,
71
- "eval_steps_per_second": 29.387,
72
- "step": 400
73
- },
74
- {
75
- "epoch": 0.3,
76
- "eval_loss": 0.475556343793869,
77
- "eval_runtime": 22.8029,
78
- "eval_samples_per_second": 29.382,
79
- "eval_steps_per_second": 29.382,
80
- "step": 450
81
- },
82
- {
83
- "epoch": 0.33,
84
- "learning_rate": 4.732839838492598e-05,
85
- "loss": 0.5362,
86
- "step": 500
87
- },
88
- {
89
- "epoch": 0.33,
90
- "eval_loss": 0.47196289896965027,
91
- "eval_runtime": 22.7979,
92
- "eval_samples_per_second": 29.389,
93
- "eval_steps_per_second": 29.389,
94
- "step": 500
95
- },
96
- {
97
- "epoch": 0.37,
98
- "eval_loss": 0.470073401927948,
99
- "eval_runtime": 22.8065,
100
- "eval_samples_per_second": 29.378,
101
- "eval_steps_per_second": 29.378,
102
- "step": 550
103
- },
104
- {
105
- "epoch": 0.4,
106
- "eval_loss": 0.4680205285549164,
107
- "eval_runtime": 22.7971,
108
- "eval_samples_per_second": 29.39,
109
- "eval_steps_per_second": 29.39,
110
- "step": 600
111
- },
112
- {
113
- "epoch": 0.43,
114
- "eval_loss": 0.46594592928886414,
115
- "eval_runtime": 22.7927,
116
- "eval_samples_per_second": 29.395,
117
- "eval_steps_per_second": 29.395,
118
- "step": 650
119
- },
120
- {
121
- "epoch": 0.46,
122
- "eval_loss": 0.463152676820755,
123
- "eval_runtime": 22.7891,
124
- "eval_samples_per_second": 29.4,
125
- "eval_steps_per_second": 29.4,
126
- "step": 700
127
- },
128
- {
129
- "epoch": 0.5,
130
- "eval_loss": 0.4629766345024109,
131
- "eval_runtime": 22.7924,
132
- "eval_samples_per_second": 29.396,
133
- "eval_steps_per_second": 29.396,
134
- "step": 750
135
- },
136
- {
137
- "epoch": 0.53,
138
- "eval_loss": 0.45898592472076416,
139
- "eval_runtime": 22.7951,
140
- "eval_samples_per_second": 29.392,
141
- "eval_steps_per_second": 29.392,
142
- "step": 800
143
- },
144
- {
145
- "epoch": 0.56,
146
- "eval_loss": 0.4574301540851593,
147
- "eval_runtime": 22.7963,
148
- "eval_samples_per_second": 29.391,
149
- "eval_steps_per_second": 29.391,
150
- "step": 850
151
- },
152
- {
153
- "epoch": 0.6,
154
- "eval_loss": 0.4551514983177185,
155
- "eval_runtime": 22.7965,
156
- "eval_samples_per_second": 29.39,
157
- "eval_steps_per_second": 29.39,
158
- "step": 900
159
- },
160
- {
161
- "epoch": 0.63,
162
- "eval_loss": 0.4532181918621063,
163
- "eval_runtime": 22.7913,
164
- "eval_samples_per_second": 29.397,
165
- "eval_steps_per_second": 29.397,
166
- "step": 950
167
- },
168
- {
169
- "epoch": 0.66,
170
- "learning_rate": 4.396366083445492e-05,
171
- "loss": 0.4595,
172
- "step": 1000
173
- },
174
- {
175
- "epoch": 0.66,
176
- "eval_loss": 0.4534400999546051,
177
- "eval_runtime": 22.7971,
178
- "eval_samples_per_second": 29.39,
179
- "eval_steps_per_second": 29.39,
180
- "step": 1000
181
- },
182
- {
183
- "epoch": 0.7,
184
- "eval_loss": 0.4516228139400482,
185
- "eval_runtime": 22.8066,
186
- "eval_samples_per_second": 29.378,
187
- "eval_steps_per_second": 29.378,
188
- "step": 1050
189
- },
190
- {
191
- "epoch": 0.73,
192
- "eval_loss": 0.4516217112541199,
193
- "eval_runtime": 22.7934,
194
- "eval_samples_per_second": 29.394,
195
- "eval_steps_per_second": 29.394,
196
- "step": 1100
197
- },
198
- {
199
- "epoch": 0.76,
200
- "eval_loss": 0.4490440785884857,
201
- "eval_runtime": 22.7982,
202
- "eval_samples_per_second": 29.388,
203
- "eval_steps_per_second": 29.388,
204
- "step": 1150
205
- },
206
- {
207
- "epoch": 0.8,
208
- "eval_loss": 0.44825297594070435,
209
- "eval_runtime": 22.8047,
210
- "eval_samples_per_second": 29.38,
211
- "eval_steps_per_second": 29.38,
212
- "step": 1200
213
- },
214
- {
215
- "epoch": 0.83,
216
- "eval_loss": 0.44741538166999817,
217
- "eval_runtime": 22.8005,
218
- "eval_samples_per_second": 29.385,
219
- "eval_steps_per_second": 29.385,
220
- "step": 1250
221
- },
222
- {
223
- "epoch": 0.86,
224
- "eval_loss": 0.44648081064224243,
225
- "eval_runtime": 22.7982,
226
- "eval_samples_per_second": 29.388,
227
- "eval_steps_per_second": 29.388,
228
- "step": 1300
229
- },
230
- {
231
- "epoch": 0.9,
232
- "eval_loss": 0.44202762842178345,
233
- "eval_runtime": 22.7988,
234
- "eval_samples_per_second": 29.388,
235
- "eval_steps_per_second": 29.388,
236
- "step": 1350
237
- },
238
- {
239
- "epoch": 0.93,
240
- "eval_loss": 0.442158043384552,
241
- "eval_runtime": 22.7966,
242
- "eval_samples_per_second": 29.39,
243
- "eval_steps_per_second": 29.39,
244
- "step": 1400
245
- },
246
- {
247
- "epoch": 0.96,
248
- "eval_loss": 0.44166651368141174,
249
- "eval_runtime": 22.7983,
250
- "eval_samples_per_second": 29.388,
251
- "eval_steps_per_second": 29.388,
252
- "step": 1450
253
- },
254
- {
255
- "epoch": 1.0,
256
- "learning_rate": 4.0598923283983853e-05,
257
- "loss": 0.4383,
258
- "step": 1500
259
- },
260
- {
261
- "epoch": 1.0,
262
- "eval_loss": 0.440588116645813,
263
- "eval_runtime": 22.7995,
264
- "eval_samples_per_second": 29.387,
265
- "eval_steps_per_second": 29.387,
266
- "step": 1500
267
- },
268
- {
269
- "epoch": 1.03,
270
- "eval_loss": 0.45270833373069763,
271
- "eval_runtime": 22.8208,
272
- "eval_samples_per_second": 29.359,
273
- "eval_steps_per_second": 29.359,
274
- "step": 1550
275
- },
276
- {
277
- "epoch": 1.06,
278
- "eval_loss": 0.45460081100463867,
279
- "eval_runtime": 22.7988,
280
- "eval_samples_per_second": 29.388,
281
- "eval_steps_per_second": 29.388,
282
- "step": 1600
283
- },
284
- {
285
- "epoch": 1.1,
286
- "eval_loss": 0.4546465575695038,
287
- "eval_runtime": 22.8024,
288
- "eval_samples_per_second": 29.383,
289
- "eval_steps_per_second": 29.383,
290
- "step": 1650
291
- },
292
- {
293
- "epoch": 1.13,
294
- "eval_loss": 0.45767539739608765,
295
- "eval_runtime": 22.8006,
296
- "eval_samples_per_second": 29.385,
297
- "eval_steps_per_second": 29.385,
298
- "step": 1700
299
- },
300
- {
301
- "epoch": 1.16,
302
- "eval_loss": 0.4575343728065491,
303
- "eval_runtime": 22.8053,
304
- "eval_samples_per_second": 29.379,
305
- "eval_steps_per_second": 29.379,
306
- "step": 1750
307
- },
308
- {
309
- "epoch": 1.2,
310
- "eval_loss": 0.4562443494796753,
311
- "eval_runtime": 22.7983,
312
- "eval_samples_per_second": 29.388,
313
- "eval_steps_per_second": 29.388,
314
- "step": 1800
315
- },
316
- {
317
- "epoch": 1.23,
318
- "eval_loss": 0.4556874632835388,
319
- "eval_runtime": 22.7968,
320
- "eval_samples_per_second": 29.39,
321
- "eval_steps_per_second": 29.39,
322
- "step": 1850
323
- },
324
- {
325
- "epoch": 1.26,
326
- "eval_loss": 0.4551508128643036,
327
- "eval_runtime": 22.8001,
328
- "eval_samples_per_second": 29.386,
329
- "eval_steps_per_second": 29.386,
330
- "step": 1900
331
- },
332
- {
333
- "epoch": 1.29,
334
- "eval_loss": 0.4562254846096039,
335
- "eval_runtime": 22.7972,
336
- "eval_samples_per_second": 29.39,
337
- "eval_steps_per_second": 29.39,
338
- "step": 1950
339
- },
340
- {
341
- "epoch": 1.33,
342
- "learning_rate": 3.723418573351279e-05,
343
- "loss": 0.2994,
344
- "step": 2000
345
- },
346
- {
347
- "epoch": 1.33,
348
- "eval_loss": 0.4507925510406494,
349
- "eval_runtime": 22.8043,
350
- "eval_samples_per_second": 29.38,
351
- "eval_steps_per_second": 29.38,
352
- "step": 2000
353
- },
354
- {
355
- "epoch": 1.36,
356
- "eval_loss": 0.45181065797805786,
357
- "eval_runtime": 22.8163,
358
- "eval_samples_per_second": 29.365,
359
- "eval_steps_per_second": 29.365,
360
- "step": 2050
361
- },
362
- {
363
- "epoch": 1.39,
364
- "eval_loss": 0.45148006081581116,
365
- "eval_runtime": 22.8045,
366
- "eval_samples_per_second": 29.38,
367
- "eval_steps_per_second": 29.38,
368
- "step": 2100
369
- },
370
- {
371
- "epoch": 1.43,
372
- "eval_loss": 0.45323121547698975,
373
- "eval_runtime": 22.805,
374
- "eval_samples_per_second": 29.38,
375
- "eval_steps_per_second": 29.38,
376
- "step": 2150
377
- },
378
- {
379
- "epoch": 1.46,
380
- "eval_loss": 0.45210888981819153,
381
- "eval_runtime": 22.8053,
382
- "eval_samples_per_second": 29.379,
383
- "eval_steps_per_second": 29.379,
384
- "step": 2200
385
- },
386
- {
387
- "epoch": 1.49,
388
- "eval_loss": 0.4499363601207733,
389
- "eval_runtime": 22.8014,
390
- "eval_samples_per_second": 29.384,
391
- "eval_steps_per_second": 29.384,
392
- "step": 2250
393
- },
394
- {
395
- "epoch": 1.53,
396
- "eval_loss": 0.4511328935623169,
397
- "eval_runtime": 22.8034,
398
- "eval_samples_per_second": 29.382,
399
- "eval_steps_per_second": 29.382,
400
- "step": 2300
401
- },
402
- {
403
- "epoch": 1.56,
404
- "eval_loss": 0.44887685775756836,
405
- "eval_runtime": 22.8034,
406
- "eval_samples_per_second": 29.382,
407
- "eval_steps_per_second": 29.382,
408
- "step": 2350
409
- },
410
- {
411
- "epoch": 1.59,
412
- "eval_loss": 0.4465619921684265,
413
- "eval_runtime": 22.8019,
414
- "eval_samples_per_second": 29.383,
415
- "eval_steps_per_second": 29.383,
416
- "step": 2400
417
- },
418
- {
419
- "epoch": 1.63,
420
- "eval_loss": 0.4489704966545105,
421
- "eval_runtime": 22.801,
422
- "eval_samples_per_second": 29.385,
423
- "eval_steps_per_second": 29.385,
424
- "step": 2450
425
- },
426
- {
427
- "epoch": 1.66,
428
- "learning_rate": 3.386944818304172e-05,
429
- "loss": 0.2946,
430
- "step": 2500
431
- },
432
- {
433
- "epoch": 1.66,
434
- "eval_loss": 0.4510194659233093,
435
- "eval_runtime": 22.7961,
436
- "eval_samples_per_second": 29.391,
437
- "eval_steps_per_second": 29.391,
438
- "step": 2500
439
- },
440
- {
441
- "epoch": 1.69,
442
- "eval_loss": 0.4499521851539612,
443
- "eval_runtime": 22.8095,
444
- "eval_samples_per_second": 29.374,
445
- "eval_steps_per_second": 29.374,
446
- "step": 2550
447
- },
448
- {
449
- "epoch": 1.73,
450
- "eval_loss": 0.44297194480895996,
451
- "eval_runtime": 22.8114,
452
- "eval_samples_per_second": 29.371,
453
- "eval_steps_per_second": 29.371,
454
- "step": 2600
455
- },
456
- {
457
- "epoch": 1.76,
458
- "eval_loss": 0.4441739618778229,
459
- "eval_runtime": 22.8127,
460
- "eval_samples_per_second": 29.37,
461
- "eval_steps_per_second": 29.37,
462
- "step": 2650
463
- },
464
- {
465
- "epoch": 1.79,
466
- "eval_loss": 0.44600966572761536,
467
- "eval_runtime": 22.8114,
468
- "eval_samples_per_second": 29.371,
469
- "eval_steps_per_second": 29.371,
470
- "step": 2700
471
- },
472
- {
473
- "epoch": 1.83,
474
- "eval_loss": 0.4461658000946045,
475
- "eval_runtime": 22.8112,
476
- "eval_samples_per_second": 29.372,
477
- "eval_steps_per_second": 29.372,
478
- "step": 2750
479
- },
480
- {
481
- "epoch": 1.86,
482
- "eval_loss": 0.4427674114704132,
483
- "eval_runtime": 22.8085,
484
- "eval_samples_per_second": 29.375,
485
- "eval_steps_per_second": 29.375,
486
- "step": 2800
487
- },
488
- {
489
- "epoch": 1.89,
490
- "eval_loss": 0.44410833716392517,
491
- "eval_runtime": 22.8091,
492
- "eval_samples_per_second": 29.374,
493
- "eval_steps_per_second": 29.374,
494
- "step": 2850
495
- },
496
- {
497
- "epoch": 1.93,
498
- "eval_loss": 0.4429542124271393,
499
- "eval_runtime": 22.8068,
500
- "eval_samples_per_second": 29.377,
501
- "eval_steps_per_second": 29.377,
502
- "step": 2900
503
- },
504
- {
505
- "epoch": 1.96,
506
- "eval_loss": 0.44093453884124756,
507
- "eval_runtime": 22.8017,
508
- "eval_samples_per_second": 29.384,
509
- "eval_steps_per_second": 29.384,
510
- "step": 2950
511
- },
512
- {
513
- "epoch": 1.99,
514
- "learning_rate": 3.050471063257066e-05,
515
- "loss": 0.2961,
516
- "step": 3000
517
- },
518
- {
519
- "epoch": 1.99,
520
- "eval_loss": 0.44067710638046265,
521
- "eval_runtime": 22.8039,
522
- "eval_samples_per_second": 29.381,
523
- "eval_steps_per_second": 29.381,
524
- "step": 3000
525
- }
526
- ],
527
- "max_steps": 7530,
528
- "num_train_epochs": 5,
529
- "total_flos": 5.2553298345984e+16,
530
- "trial_name": null,
531
- "trial_params": null
532
- }