longcld commited on
Commit
c2b786e
1 Parent(s): 8cb881f

revert checkpoint 8500

Browse files
Files changed (5) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +213 -3
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fdd143975ad3d690151a2d6876b52021dd76600fddcf276b2fcfaf6726c29735
3
  size 352532601
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3d699c309551e55aea38a0695d1193822a55b4313f94d57c6dec4e6a72980f0
3
  size 352532601
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:017d822db885d347639f43c348f2479cb57ca865ba3977e3a340468595ffbfef
3
  size 688496379
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f21c54330bde69593dcf7dd9a914ed642190dcab5e32efc5d8e38b67978270ef
3
  size 688496379
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ebb5b2b28192f1263b7a83b5fd6852393d35c165b8351517043a7223b0c7ff6
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0663d71adc38c87b4ce427f03d6b812499a5b8889126cd51ab1cc868860350d
3
  size 14503
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21eae3a4696247cc4c013194b7462574deabe979fe17917077937f09ff448918
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c6747f392d5a349de5e04bff46be642d98f99cf1525c743a2c739cb62cce9c4
3
  size 623
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.543441679213533,
5
- "global_step": 5000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -306,11 +306,221 @@
306
  "learning_rate": 6.456413890857548e-09,
307
  "loss": 1.9762,
308
  "step": 5000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
309
  }
310
  ],
311
  "max_steps": 14110,
312
  "num_train_epochs": 10,
313
- "total_flos": 3.580791724112394e+17,
314
  "trial_name": null,
315
  "trial_params": null
316
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.024089983172439,
5
+ "global_step": 8500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
306
  "learning_rate": 6.456413890857548e-09,
307
  "loss": 1.9762,
308
  "step": 5000
309
+ },
310
+ {
311
+ "epoch": 3.61,
312
+ "learning_rate": 6.385542168674698e-09,
313
+ "loss": 2.0763,
314
+ "step": 5100
315
+ },
316
+ {
317
+ "epoch": 3.69,
318
+ "learning_rate": 6.31467044649185e-09,
319
+ "loss": 2.065,
320
+ "step": 5200
321
+ },
322
+ {
323
+ "epoch": 3.76,
324
+ "learning_rate": 6.243798724309001e-09,
325
+ "loss": 2.06,
326
+ "step": 5300
327
+ },
328
+ {
329
+ "epoch": 3.83,
330
+ "learning_rate": 6.172927002126152e-09,
331
+ "loss": 2.0403,
332
+ "step": 5400
333
+ },
334
+ {
335
+ "epoch": 3.9,
336
+ "learning_rate": 6.102055279943303e-09,
337
+ "loss": 2.0715,
338
+ "step": 5500
339
+ },
340
+ {
341
+ "epoch": 3.97,
342
+ "learning_rate": 6.031183557760453e-09,
343
+ "loss": 2.0762,
344
+ "step": 5600
345
+ },
346
+ {
347
+ "epoch": 4.04,
348
+ "learning_rate": 5.960311835577604e-09,
349
+ "loss": 2.0365,
350
+ "step": 5700
351
+ },
352
+ {
353
+ "epoch": 4.11,
354
+ "learning_rate": 5.889440113394756e-09,
355
+ "loss": 2.0154,
356
+ "step": 5800
357
+ },
358
+ {
359
+ "epoch": 4.18,
360
+ "learning_rate": 5.818568391211907e-09,
361
+ "loss": 2.0166,
362
+ "step": 5900
363
+ },
364
+ {
365
+ "epoch": 4.25,
366
+ "learning_rate": 5.747696669029058e-09,
367
+ "loss": 2.0339,
368
+ "step": 6000
369
+ },
370
+ {
371
+ "epoch": 4.32,
372
+ "learning_rate": 5.676824946846208e-09,
373
+ "loss": 2.0451,
374
+ "step": 6100
375
+ },
376
+ {
377
+ "epoch": 4.39,
378
+ "learning_rate": 5.605953224663359e-09,
379
+ "loss": 2.023,
380
+ "step": 6200
381
+ },
382
+ {
383
+ "epoch": 4.46,
384
+ "learning_rate": 5.53508150248051e-09,
385
+ "loss": 2.0222,
386
+ "step": 6300
387
+ },
388
+ {
389
+ "epoch": 4.54,
390
+ "learning_rate": 5.464209780297661e-09,
391
+ "loss": 2.0037,
392
+ "step": 6400
393
+ },
394
+ {
395
+ "epoch": 4.61,
396
+ "learning_rate": 5.393338058114813e-09,
397
+ "loss": 2.0116,
398
+ "step": 6500
399
+ },
400
+ {
401
+ "epoch": 4.68,
402
+ "learning_rate": 5.322466335931963e-09,
403
+ "loss": 2.0236,
404
+ "step": 6600
405
+ },
406
+ {
407
+ "epoch": 4.75,
408
+ "learning_rate": 5.251594613749114e-09,
409
+ "loss": 2.0302,
410
+ "step": 6700
411
+ },
412
+ {
413
+ "epoch": 4.82,
414
+ "learning_rate": 5.180722891566265e-09,
415
+ "loss": 2.0236,
416
+ "step": 6800
417
+ },
418
+ {
419
+ "epoch": 4.89,
420
+ "learning_rate": 5.109851169383416e-09,
421
+ "loss": 2.015,
422
+ "step": 6900
423
+ },
424
+ {
425
+ "epoch": 4.96,
426
+ "learning_rate": 5.038979447200567e-09,
427
+ "loss": 2.0037,
428
+ "step": 7000
429
+ },
430
+ {
431
+ "epoch": 5.03,
432
+ "learning_rate": 4.9681077250177184e-09,
433
+ "loss": 2.0319,
434
+ "step": 7100
435
+ },
436
+ {
437
+ "epoch": 5.1,
438
+ "learning_rate": 4.897236002834869e-09,
439
+ "loss": 2.0271,
440
+ "step": 7200
441
+ },
442
+ {
443
+ "epoch": 5.17,
444
+ "learning_rate": 4.82636428065202e-09,
445
+ "loss": 2.0337,
446
+ "step": 7300
447
+ },
448
+ {
449
+ "epoch": 5.24,
450
+ "learning_rate": 4.755492558469171e-09,
451
+ "loss": 2.0305,
452
+ "step": 7400
453
+ },
454
+ {
455
+ "epoch": 5.32,
456
+ "learning_rate": 4.684620836286322e-09,
457
+ "loss": 2.0261,
458
+ "step": 7500
459
+ },
460
+ {
461
+ "epoch": 5.39,
462
+ "learning_rate": 4.613749114103473e-09,
463
+ "loss": 2.0234,
464
+ "step": 7600
465
+ },
466
+ {
467
+ "epoch": 5.46,
468
+ "learning_rate": 4.542877391920624e-09,
469
+ "loss": 2.0209,
470
+ "step": 7700
471
+ },
472
+ {
473
+ "epoch": 5.53,
474
+ "learning_rate": 4.472005669737775e-09,
475
+ "loss": 2.0166,
476
+ "step": 7800
477
+ },
478
+ {
479
+ "epoch": 5.6,
480
+ "learning_rate": 4.401133947554926e-09,
481
+ "loss": 2.0268,
482
+ "step": 7900
483
+ },
484
+ {
485
+ "epoch": 5.67,
486
+ "learning_rate": 4.330262225372077e-09,
487
+ "loss": 2.0199,
488
+ "step": 8000
489
+ },
490
+ {
491
+ "epoch": 5.74,
492
+ "learning_rate": 4.259390503189228e-09,
493
+ "loss": 2.005,
494
+ "step": 8100
495
+ },
496
+ {
497
+ "epoch": 5.81,
498
+ "learning_rate": 4.188518781006379e-09,
499
+ "loss": 2.0096,
500
+ "step": 8200
501
+ },
502
+ {
503
+ "epoch": 5.88,
504
+ "learning_rate": 4.117647058823529e-09,
505
+ "loss": 2.0239,
506
+ "step": 8300
507
+ },
508
+ {
509
+ "epoch": 5.95,
510
+ "learning_rate": 4.046775336640681e-09,
511
+ "loss": 2.0096,
512
+ "step": 8400
513
+ },
514
+ {
515
+ "epoch": 6.02,
516
+ "learning_rate": 3.975903614457831e-09,
517
+ "loss": 2.0432,
518
+ "step": 8500
519
  }
520
  ],
521
  "max_steps": 14110,
522
  "num_train_epochs": 10,
523
+ "total_flos": 6.087535537968384e+17,
524
  "trial_name": null,
525
  "trial_params": null
526
  }