Hossein Ghorbanfekr commited on
Commit
8e06946
1 Parent(s): c953183

removed training files

Browse files
Files changed (3) hide show
  1. optimizer.pt +0 -3
  2. scheduler.pt +0 -3
  3. trainer_state.json +0 -719
optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc38eb5cfa7c84be805ac8c79b8b60660b46dc0d382a7d55fdb108077a2cc07e
3
- size 873520581
 
 
 
 
scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a65bcf181720c4bb1063c4bcf5f9008975b112f9a87b7b1edff2a7384fade15c
3
- size 627
 
 
 
 
trainer_state.json DELETED
@@ -1,719 +0,0 @@
1
- {
2
- "best_metric": 0.675777792930603,
3
- "best_model_checkpoint": "trainer/checkpoint-372428",
4
- "epoch": 50.0,
5
- "eval_steps": 500,
6
- "global_step": 396200,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 1.0,
13
- "learning_rate": 2e-05,
14
- "loss": 2.2513,
15
- "step": 7924
16
- },
17
- {
18
- "epoch": 1.0,
19
- "eval_loss": 1.2939835786819458,
20
- "eval_runtime": 37.6044,
21
- "eval_samples_per_second": 749.168,
22
- "eval_steps_per_second": 23.428,
23
- "step": 7924
24
- },
25
- {
26
- "epoch": 2.0,
27
- "learning_rate": 4e-05,
28
- "loss": 1.3752,
29
- "step": 15848
30
- },
31
- {
32
- "epoch": 2.0,
33
- "eval_loss": 1.1416432857513428,
34
- "eval_runtime": 37.8169,
35
- "eval_samples_per_second": 744.958,
36
- "eval_steps_per_second": 23.296,
37
- "step": 15848
38
- },
39
- {
40
- "epoch": 3.0,
41
- "learning_rate": 4.9473684210526315e-05,
42
- "loss": 1.2436,
43
- "step": 23772
44
- },
45
- {
46
- "epoch": 3.0,
47
- "eval_loss": 1.0676215887069702,
48
- "eval_runtime": 37.7184,
49
- "eval_samples_per_second": 746.904,
50
- "eval_steps_per_second": 23.357,
51
- "step": 23772
52
- },
53
- {
54
- "epoch": 4.0,
55
- "learning_rate": 4.842105263157895e-05,
56
- "loss": 1.1458,
57
- "step": 31696
58
- },
59
- {
60
- "epoch": 4.0,
61
- "eval_loss": 0.9978280067443848,
62
- "eval_runtime": 36.3705,
63
- "eval_samples_per_second": 774.584,
64
- "eval_steps_per_second": 24.223,
65
- "step": 31696
66
- },
67
- {
68
- "epoch": 5.0,
69
- "learning_rate": 4.736842105263158e-05,
70
- "loss": 1.0841,
71
- "step": 39620
72
- },
73
- {
74
- "epoch": 5.0,
75
- "eval_loss": 0.9508912563323975,
76
- "eval_runtime": 36.5102,
77
- "eval_samples_per_second": 771.62,
78
- "eval_steps_per_second": 24.13,
79
- "step": 39620
80
- },
81
- {
82
- "epoch": 6.0,
83
- "learning_rate": 4.6315789473684214e-05,
84
- "loss": 1.0386,
85
- "step": 47544
86
- },
87
- {
88
- "epoch": 6.0,
89
- "eval_loss": 0.9356514811515808,
90
- "eval_runtime": 36.323,
91
- "eval_samples_per_second": 775.598,
92
- "eval_steps_per_second": 24.255,
93
- "step": 47544
94
- },
95
- {
96
- "epoch": 7.0,
97
- "learning_rate": 4.5263157894736846e-05,
98
- "loss": 0.9989,
99
- "step": 55468
100
- },
101
- {
102
- "epoch": 7.0,
103
- "eval_loss": 0.9180737137794495,
104
- "eval_runtime": 36.4427,
105
- "eval_samples_per_second": 773.05,
106
- "eval_steps_per_second": 24.175,
107
- "step": 55468
108
- },
109
- {
110
- "epoch": 8.0,
111
- "learning_rate": 4.421052631578947e-05,
112
- "loss": 0.9686,
113
- "step": 63392
114
- },
115
- {
116
- "epoch": 8.0,
117
- "eval_loss": 0.8952454328536987,
118
- "eval_runtime": 36.1822,
119
- "eval_samples_per_second": 778.615,
120
- "eval_steps_per_second": 24.349,
121
- "step": 63392
122
- },
123
- {
124
- "epoch": 9.0,
125
- "learning_rate": 4.3157894736842105e-05,
126
- "loss": 0.9426,
127
- "step": 71316
128
- },
129
- {
130
- "epoch": 9.0,
131
- "eval_loss": 0.8876378536224365,
132
- "eval_runtime": 36.5182,
133
- "eval_samples_per_second": 771.451,
134
- "eval_steps_per_second": 24.125,
135
- "step": 71316
136
- },
137
- {
138
- "epoch": 10.0,
139
- "learning_rate": 4.210526315789474e-05,
140
- "loss": 0.9198,
141
- "step": 79240
142
- },
143
- {
144
- "epoch": 10.0,
145
- "eval_loss": 0.8818822503089905,
146
- "eval_runtime": 36.3065,
147
- "eval_samples_per_second": 775.949,
148
- "eval_steps_per_second": 24.266,
149
- "step": 79240
150
- },
151
- {
152
- "epoch": 11.0,
153
- "learning_rate": 4.105263157894737e-05,
154
- "loss": 0.9053,
155
- "step": 87164
156
- },
157
- {
158
- "epoch": 11.0,
159
- "eval_loss": 0.8349147439002991,
160
- "eval_runtime": 36.3771,
161
- "eval_samples_per_second": 774.444,
162
- "eval_steps_per_second": 24.219,
163
- "step": 87164
164
- },
165
- {
166
- "epoch": 12.0,
167
- "learning_rate": 4e-05,
168
- "loss": 0.8881,
169
- "step": 95088
170
- },
171
- {
172
- "epoch": 12.0,
173
- "eval_loss": 0.8407663702964783,
174
- "eval_runtime": 36.4047,
175
- "eval_samples_per_second": 773.857,
176
- "eval_steps_per_second": 24.2,
177
- "step": 95088
178
- },
179
- {
180
- "epoch": 13.0,
181
- "learning_rate": 3.894736842105263e-05,
182
- "loss": 0.8704,
183
- "step": 103012
184
- },
185
- {
186
- "epoch": 13.0,
187
- "eval_loss": 0.8339666128158569,
188
- "eval_runtime": 36.314,
189
- "eval_samples_per_second": 775.79,
190
- "eval_steps_per_second": 24.261,
191
- "step": 103012
192
- },
193
- {
194
- "epoch": 14.0,
195
- "learning_rate": 3.789473684210527e-05,
196
- "loss": 0.8533,
197
- "step": 110936
198
- },
199
- {
200
- "epoch": 14.0,
201
- "eval_loss": 0.8264057636260986,
202
- "eval_runtime": 36.2648,
203
- "eval_samples_per_second": 776.841,
204
- "eval_steps_per_second": 24.294,
205
- "step": 110936
206
- },
207
- {
208
- "epoch": 15.0,
209
- "learning_rate": 3.6842105263157895e-05,
210
- "loss": 0.8418,
211
- "step": 118860
212
- },
213
- {
214
- "epoch": 15.0,
215
- "eval_loss": 0.8100990653038025,
216
- "eval_runtime": 36.5073,
217
- "eval_samples_per_second": 771.682,
218
- "eval_steps_per_second": 24.132,
219
- "step": 118860
220
- },
221
- {
222
- "epoch": 16.0,
223
- "learning_rate": 3.578947368421053e-05,
224
- "loss": 0.8307,
225
- "step": 126784
226
- },
227
- {
228
- "epoch": 16.0,
229
- "eval_loss": 0.8106787800788879,
230
- "eval_runtime": 36.2767,
231
- "eval_samples_per_second": 776.586,
232
- "eval_steps_per_second": 24.286,
233
- "step": 126784
234
- },
235
- {
236
- "epoch": 17.0,
237
- "learning_rate": 3.473684210526316e-05,
238
- "loss": 0.815,
239
- "step": 134708
240
- },
241
- {
242
- "epoch": 17.0,
243
- "eval_loss": 0.7991083264350891,
244
- "eval_runtime": 36.2711,
245
- "eval_samples_per_second": 776.707,
246
- "eval_steps_per_second": 24.289,
247
- "step": 134708
248
- },
249
- {
250
- "epoch": 18.0,
251
- "learning_rate": 3.368421052631579e-05,
252
- "loss": 0.8015,
253
- "step": 142632
254
- },
255
- {
256
- "epoch": 18.0,
257
- "eval_loss": 0.7952774167060852,
258
- "eval_runtime": 37.5556,
259
- "eval_samples_per_second": 750.142,
260
- "eval_steps_per_second": 23.459,
261
- "step": 142632
262
- },
263
- {
264
- "epoch": 19.0,
265
- "learning_rate": 3.2631578947368426e-05,
266
- "loss": 0.7894,
267
- "step": 150556
268
- },
269
- {
270
- "epoch": 19.0,
271
- "eval_loss": 0.7720882296562195,
272
- "eval_runtime": 36.9729,
273
- "eval_samples_per_second": 761.964,
274
- "eval_steps_per_second": 23.828,
275
- "step": 150556
276
- },
277
- {
278
- "epoch": 20.0,
279
- "learning_rate": 3.157894736842105e-05,
280
- "loss": 0.7789,
281
- "step": 158480
282
- },
283
- {
284
- "epoch": 20.0,
285
- "eval_loss": 0.7802249789237976,
286
- "eval_runtime": 37.1298,
287
- "eval_samples_per_second": 758.744,
288
- "eval_steps_per_second": 23.728,
289
- "step": 158480
290
- },
291
- {
292
- "epoch": 21.0,
293
- "learning_rate": 3.0526315789473684e-05,
294
- "loss": 0.7678,
295
- "step": 166404
296
- },
297
- {
298
- "epoch": 21.0,
299
- "eval_loss": 0.7610885500907898,
300
- "eval_runtime": 36.2573,
301
- "eval_samples_per_second": 777.002,
302
- "eval_steps_per_second": 24.299,
303
- "step": 166404
304
- },
305
- {
306
- "epoch": 22.0,
307
- "learning_rate": 2.9473684210526314e-05,
308
- "loss": 0.7534,
309
- "step": 174328
310
- },
311
- {
312
- "epoch": 22.0,
313
- "eval_loss": 0.782088577747345,
314
- "eval_runtime": 37.4893,
315
- "eval_samples_per_second": 751.467,
316
- "eval_steps_per_second": 23.5,
317
- "step": 174328
318
- },
319
- {
320
- "epoch": 23.0,
321
- "learning_rate": 2.842105263157895e-05,
322
- "loss": 0.7502,
323
- "step": 182252
324
- },
325
- {
326
- "epoch": 23.0,
327
- "eval_loss": 0.7673630714416504,
328
- "eval_runtime": 36.6312,
329
- "eval_samples_per_second": 769.07,
330
- "eval_steps_per_second": 24.051,
331
- "step": 182252
332
- },
333
- {
334
- "epoch": 24.0,
335
- "learning_rate": 2.7368421052631583e-05,
336
- "loss": 0.7345,
337
- "step": 190176
338
- },
339
- {
340
- "epoch": 24.0,
341
- "eval_loss": 0.7627705335617065,
342
- "eval_runtime": 37.0149,
343
- "eval_samples_per_second": 761.099,
344
- "eval_steps_per_second": 23.801,
345
- "step": 190176
346
- },
347
- {
348
- "epoch": 25.0,
349
- "learning_rate": 2.6315789473684212e-05,
350
- "loss": 0.7264,
351
- "step": 198100
352
- },
353
- {
354
- "epoch": 25.0,
355
- "eval_loss": 0.7561437487602234,
356
- "eval_runtime": 37.2677,
357
- "eval_samples_per_second": 755.937,
358
- "eval_steps_per_second": 23.64,
359
- "step": 198100
360
- },
361
- {
362
- "epoch": 26.0,
363
- "learning_rate": 2.5263157894736845e-05,
364
- "loss": 0.7142,
365
- "step": 206024
366
- },
367
- {
368
- "epoch": 26.0,
369
- "eval_loss": 0.7509896159172058,
370
- "eval_runtime": 37.3161,
371
- "eval_samples_per_second": 754.956,
372
- "eval_steps_per_second": 23.609,
373
- "step": 206024
374
- },
375
- {
376
- "epoch": 27.0,
377
- "learning_rate": 2.4210526315789474e-05,
378
- "loss": 0.7018,
379
- "step": 213948
380
- },
381
- {
382
- "epoch": 27.0,
383
- "eval_loss": 0.7464780807495117,
384
- "eval_runtime": 36.7538,
385
- "eval_samples_per_second": 766.505,
386
- "eval_steps_per_second": 23.97,
387
- "step": 213948
388
- },
389
- {
390
- "epoch": 28.0,
391
- "learning_rate": 2.3157894736842107e-05,
392
- "loss": 0.6897,
393
- "step": 221872
394
- },
395
- {
396
- "epoch": 28.0,
397
- "eval_loss": 0.7344001531600952,
398
- "eval_runtime": 37.0652,
399
- "eval_samples_per_second": 760.066,
400
- "eval_steps_per_second": 23.769,
401
- "step": 221872
402
- },
403
- {
404
- "epoch": 29.0,
405
- "learning_rate": 2.2105263157894736e-05,
406
- "loss": 0.682,
407
- "step": 229796
408
- },
409
- {
410
- "epoch": 29.0,
411
- "eval_loss": 0.7429642081260681,
412
- "eval_runtime": 37.2732,
413
- "eval_samples_per_second": 755.824,
414
- "eval_steps_per_second": 23.636,
415
- "step": 229796
416
- },
417
- {
418
- "epoch": 30.0,
419
- "learning_rate": 2.105263157894737e-05,
420
- "loss": 0.6754,
421
- "step": 237720
422
- },
423
- {
424
- "epoch": 30.0,
425
- "eval_loss": 0.7481978535652161,
426
- "eval_runtime": 37.0222,
427
- "eval_samples_per_second": 760.949,
428
- "eval_steps_per_second": 23.797,
429
- "step": 237720
430
- },
431
- {
432
- "epoch": 31.0,
433
- "learning_rate": 2e-05,
434
- "loss": 0.6679,
435
- "step": 245644
436
- },
437
- {
438
- "epoch": 31.0,
439
- "eval_loss": 0.7224923968315125,
440
- "eval_runtime": 37.2439,
441
- "eval_samples_per_second": 756.419,
442
- "eval_steps_per_second": 23.655,
443
- "step": 245644
444
- },
445
- {
446
- "epoch": 32.0,
447
- "learning_rate": 1.8947368421052634e-05,
448
- "loss": 0.6566,
449
- "step": 253568
450
- },
451
- {
452
- "epoch": 32.0,
453
- "eval_loss": 0.715844452381134,
454
- "eval_runtime": 36.5457,
455
- "eval_samples_per_second": 770.871,
456
- "eval_steps_per_second": 24.107,
457
- "step": 253568
458
- },
459
- {
460
- "epoch": 33.0,
461
- "learning_rate": 1.7894736842105264e-05,
462
- "loss": 0.6492,
463
- "step": 261492
464
- },
465
- {
466
- "epoch": 33.0,
467
- "eval_loss": 0.7234057188034058,
468
- "eval_runtime": 36.3822,
469
- "eval_samples_per_second": 774.335,
470
- "eval_steps_per_second": 24.215,
471
- "step": 261492
472
- },
473
- {
474
- "epoch": 34.0,
475
- "learning_rate": 1.6842105263157896e-05,
476
- "loss": 0.642,
477
- "step": 269416
478
- },
479
- {
480
- "epoch": 34.0,
481
- "eval_loss": 0.7132413983345032,
482
- "eval_runtime": 36.3793,
483
- "eval_samples_per_second": 774.396,
484
- "eval_steps_per_second": 24.217,
485
- "step": 269416
486
- },
487
- {
488
- "epoch": 35.0,
489
- "learning_rate": 1.5789473684210526e-05,
490
- "loss": 0.6342,
491
- "step": 277340
492
- },
493
- {
494
- "epoch": 35.0,
495
- "eval_loss": 0.7007443904876709,
496
- "eval_runtime": 36.3032,
497
- "eval_samples_per_second": 776.02,
498
- "eval_steps_per_second": 24.268,
499
- "step": 277340
500
- },
501
- {
502
- "epoch": 36.0,
503
- "learning_rate": 1.4736842105263157e-05,
504
- "loss": 0.6236,
505
- "step": 285264
506
- },
507
- {
508
- "epoch": 36.0,
509
- "eval_loss": 0.69706791639328,
510
- "eval_runtime": 37.0755,
511
- "eval_samples_per_second": 759.854,
512
- "eval_steps_per_second": 23.762,
513
- "step": 285264
514
- },
515
- {
516
- "epoch": 37.0,
517
- "learning_rate": 1.3684210526315791e-05,
518
- "loss": 0.6146,
519
- "step": 293188
520
- },
521
- {
522
- "epoch": 37.0,
523
- "eval_loss": 0.6900755167007446,
524
- "eval_runtime": 36.5007,
525
- "eval_samples_per_second": 771.822,
526
- "eval_steps_per_second": 24.137,
527
- "step": 293188
528
- },
529
- {
530
- "epoch": 38.0,
531
- "learning_rate": 1.2631578947368422e-05,
532
- "loss": 0.6087,
533
- "step": 301112
534
- },
535
- {
536
- "epoch": 38.0,
537
- "eval_loss": 0.6962341666221619,
538
- "eval_runtime": 36.6631,
539
- "eval_samples_per_second": 768.402,
540
- "eval_steps_per_second": 24.03,
541
- "step": 301112
542
- },
543
- {
544
- "epoch": 39.0,
545
- "learning_rate": 1.1578947368421053e-05,
546
- "loss": 0.5989,
547
- "step": 309036
548
- },
549
- {
550
- "epoch": 39.0,
551
- "eval_loss": 0.7045713067054749,
552
- "eval_runtime": 36.6758,
553
- "eval_samples_per_second": 768.136,
554
- "eval_steps_per_second": 24.021,
555
- "step": 309036
556
- },
557
- {
558
- "epoch": 40.0,
559
- "learning_rate": 1.0526315789473684e-05,
560
- "loss": 0.5924,
561
- "step": 316960
562
- },
563
- {
564
- "epoch": 40.0,
565
- "eval_loss": 0.6984645128250122,
566
- "eval_runtime": 36.7394,
567
- "eval_samples_per_second": 766.807,
568
- "eval_steps_per_second": 23.98,
569
- "step": 316960
570
- },
571
- {
572
- "epoch": 41.0,
573
- "learning_rate": 9.473684210526317e-06,
574
- "loss": 0.5827,
575
- "step": 324884
576
- },
577
- {
578
- "epoch": 41.0,
579
- "eval_loss": 0.6994604468345642,
580
- "eval_runtime": 36.8305,
581
- "eval_samples_per_second": 764.91,
582
- "eval_steps_per_second": 23.92,
583
- "step": 324884
584
- },
585
- {
586
- "epoch": 42.0,
587
- "learning_rate": 8.421052631578948e-06,
588
- "loss": 0.5731,
589
- "step": 332808
590
- },
591
- {
592
- "epoch": 42.0,
593
- "eval_loss": 0.6827645301818848,
594
- "eval_runtime": 36.7809,
595
- "eval_samples_per_second": 765.941,
596
- "eval_steps_per_second": 23.953,
597
- "step": 332808
598
- },
599
- {
600
- "epoch": 43.0,
601
- "learning_rate": 7.3684210526315784e-06,
602
- "loss": 0.5718,
603
- "step": 340732
604
- },
605
- {
606
- "epoch": 43.0,
607
- "eval_loss": 0.7020975947380066,
608
- "eval_runtime": 36.6627,
609
- "eval_samples_per_second": 768.411,
610
- "eval_steps_per_second": 24.03,
611
- "step": 340732
612
- },
613
- {
614
- "epoch": 44.0,
615
- "learning_rate": 6.315789473684211e-06,
616
- "loss": 0.5663,
617
- "step": 348656
618
- },
619
- {
620
- "epoch": 44.0,
621
- "eval_loss": 0.6774910092353821,
622
- "eval_runtime": 36.8941,
623
- "eval_samples_per_second": 763.59,
624
- "eval_steps_per_second": 23.879,
625
- "step": 348656
626
- },
627
- {
628
- "epoch": 45.0,
629
- "learning_rate": 5.263157894736842e-06,
630
- "loss": 0.5575,
631
- "step": 356580
632
- },
633
- {
634
- "epoch": 45.0,
635
- "eval_loss": 0.683965265750885,
636
- "eval_runtime": 36.8405,
637
- "eval_samples_per_second": 764.703,
638
- "eval_steps_per_second": 23.914,
639
- "step": 356580
640
- },
641
- {
642
- "epoch": 46.0,
643
- "learning_rate": 4.210526315789474e-06,
644
- "loss": 0.5524,
645
- "step": 364504
646
- },
647
- {
648
- "epoch": 46.0,
649
- "eval_loss": 0.6812577247619629,
650
- "eval_runtime": 36.3196,
651
- "eval_samples_per_second": 775.669,
652
- "eval_steps_per_second": 24.257,
653
- "step": 364504
654
- },
655
- {
656
- "epoch": 47.0,
657
- "learning_rate": 3.1578947368421056e-06,
658
- "loss": 0.5499,
659
- "step": 372428
660
- },
661
- {
662
- "epoch": 47.0,
663
- "eval_loss": 0.675777792930603,
664
- "eval_runtime": 36.282,
665
- "eval_samples_per_second": 776.473,
666
- "eval_steps_per_second": 24.282,
667
- "step": 372428
668
- },
669
- {
670
- "epoch": 48.0,
671
- "learning_rate": 2.105263157894737e-06,
672
- "loss": 0.541,
673
- "step": 380352
674
- },
675
- {
676
- "epoch": 48.0,
677
- "eval_loss": 0.678913950920105,
678
- "eval_runtime": 36.4409,
679
- "eval_samples_per_second": 773.087,
680
- "eval_steps_per_second": 24.176,
681
- "step": 380352
682
- },
683
- {
684
- "epoch": 49.0,
685
- "learning_rate": 1.0526315789473685e-06,
686
- "loss": 0.5372,
687
- "step": 388276
688
- },
689
- {
690
- "epoch": 49.0,
691
- "eval_loss": 0.6796761155128479,
692
- "eval_runtime": 36.5663,
693
- "eval_samples_per_second": 770.435,
694
- "eval_steps_per_second": 24.093,
695
- "step": 388276
696
- },
697
- {
698
- "epoch": 50.0,
699
- "learning_rate": 0.0,
700
- "loss": 0.535,
701
- "step": 396200
702
- },
703
- {
704
- "epoch": 50.0,
705
- "eval_loss": 0.6791965365409851,
706
- "eval_runtime": 36.555,
707
- "eval_samples_per_second": 770.674,
708
- "eval_steps_per_second": 24.101,
709
- "step": 396200
710
- }
711
- ],
712
- "logging_steps": 500,
713
- "max_steps": 396200,
714
- "num_train_epochs": 50,
715
- "save_steps": 500,
716
- "total_flos": 7.079826371258392e+17,
717
- "trial_name": null,
718
- "trial_params": null
719
- }