JulienRPA commited on
Commit
5714c5e
1 Parent(s): d05232a

Training in progress, step 9000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4144fbcdc5d4f3fafbf6afc510ff856f789e4829a4858bcd16d4d390e9e5f2d
3
  size 2023671531
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68eb91b9bf98a03eaaadcb8bd0b58614a7003d8477069f541ff537e0ec0bc34b
3
  size 2023671531
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e042bccb9e210670a39bb71562238d3440188919639fe843a2f55fef029e0ed
3
  size 1014236857
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43a9f23b83b30496ac553a4c926b305dc1444595fad21ac2439436b871476ca3
3
  size 1014236857
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7436c395c71fe4a126e1e8460c76c5d99ba4173ae247ae8f8b8372d61aabdb5
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a515c4e1bc2c8452db42ea9dca43bd2d9ef7f8fe92b3a49a30af214963f24ac
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ace18575c64fb9061a4bdb4187294f04e31e9a65a2e4da680ca78aeef9f963e2
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2fc1f775554587532b2ba4f009351aacf9c868152217b1b6eb9954f3a42aa4b
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.1928721174004195,
5
- "global_step": 6000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -399,11 +399,202 @@
399
  "eval_samples_per_second": 2.827,
400
  "eval_steps_per_second": 0.354,
401
  "step": 6000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
402
  }
403
  ],
404
  "max_steps": 11448,
405
  "num_train_epochs": 8,
406
- "total_flos": 3958825611574680.0,
407
  "trial_name": null,
408
  "trial_params": null
409
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.289308176100629,
5
+ "global_step": 9000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
399
  "eval_samples_per_second": 2.827,
400
  "eval_steps_per_second": 0.354,
401
  "step": 6000
402
+ },
403
+ {
404
+ "epoch": 4.26,
405
+ "learning_rate": 2.9883772910147524e-05,
406
+ "loss": 1.5864,
407
+ "step": 6100
408
+ },
409
+ {
410
+ "epoch": 4.33,
411
+ "learning_rate": 2.9324988824318283e-05,
412
+ "loss": 1.5608,
413
+ "step": 6200
414
+ },
415
+ {
416
+ "epoch": 4.4,
417
+ "learning_rate": 2.876620473848905e-05,
418
+ "loss": 1.5144,
419
+ "step": 6300
420
+ },
421
+ {
422
+ "epoch": 4.47,
423
+ "learning_rate": 2.8207420652659816e-05,
424
+ "loss": 1.4582,
425
+ "step": 6400
426
+ },
427
+ {
428
+ "epoch": 4.54,
429
+ "learning_rate": 2.7648636566830576e-05,
430
+ "loss": 1.4793,
431
+ "step": 6500
432
+ },
433
+ {
434
+ "epoch": 4.61,
435
+ "learning_rate": 2.7089852481001342e-05,
436
+ "loss": 1.472,
437
+ "step": 6600
438
+ },
439
+ {
440
+ "epoch": 4.68,
441
+ "learning_rate": 2.653106839517211e-05,
442
+ "loss": 1.4424,
443
+ "step": 6700
444
+ },
445
+ {
446
+ "epoch": 4.75,
447
+ "learning_rate": 2.597228430934287e-05,
448
+ "loss": 1.3779,
449
+ "step": 6800
450
+ },
451
+ {
452
+ "epoch": 4.82,
453
+ "learning_rate": 2.5413500223513638e-05,
454
+ "loss": 1.3611,
455
+ "step": 6900
456
+ },
457
+ {
458
+ "epoch": 4.89,
459
+ "learning_rate": 2.48547161376844e-05,
460
+ "loss": 1.3311,
461
+ "step": 7000
462
+ },
463
+ {
464
+ "epoch": 4.96,
465
+ "learning_rate": 2.4295932051855164e-05,
466
+ "loss": 1.3164,
467
+ "step": 7100
468
+ },
469
+ {
470
+ "epoch": 5.03,
471
+ "learning_rate": 2.373714796602593e-05,
472
+ "loss": 1.2119,
473
+ "step": 7200
474
+ },
475
+ {
476
+ "epoch": 5.1,
477
+ "learning_rate": 2.3178363880196693e-05,
478
+ "loss": 1.1122,
479
+ "step": 7300
480
+ },
481
+ {
482
+ "epoch": 5.17,
483
+ "learning_rate": 2.2619579794367456e-05,
484
+ "loss": 1.1198,
485
+ "step": 7400
486
+ },
487
+ {
488
+ "epoch": 5.24,
489
+ "learning_rate": 2.206079570853822e-05,
490
+ "loss": 1.0416,
491
+ "step": 7500
492
+ },
493
+ {
494
+ "epoch": 5.31,
495
+ "learning_rate": 2.1502011622708985e-05,
496
+ "loss": 1.1042,
497
+ "step": 7600
498
+ },
499
+ {
500
+ "epoch": 5.38,
501
+ "learning_rate": 2.0943227536879752e-05,
502
+ "loss": 1.0715,
503
+ "step": 7700
504
+ },
505
+ {
506
+ "epoch": 5.45,
507
+ "learning_rate": 2.0384443451050515e-05,
508
+ "loss": 1.0815,
509
+ "step": 7800
510
+ },
511
+ {
512
+ "epoch": 5.52,
513
+ "learning_rate": 1.982565936522128e-05,
514
+ "loss": 1.0445,
515
+ "step": 7900
516
+ },
517
+ {
518
+ "epoch": 5.59,
519
+ "learning_rate": 1.9266875279392044e-05,
520
+ "loss": 1.0512,
521
+ "step": 8000
522
+ },
523
+ {
524
+ "epoch": 5.59,
525
+ "eval_bleu": 71.1272,
526
+ "eval_em": 0.0241,
527
+ "eval_gen_len": 46.0672,
528
+ "eval_loss": 1.2382431030273438,
529
+ "eval_runtime": 358.1089,
530
+ "eval_samples_per_second": 3.365,
531
+ "eval_steps_per_second": 0.422,
532
+ "step": 8000
533
+ },
534
+ {
535
+ "epoch": 5.66,
536
+ "learning_rate": 1.8708091193562807e-05,
537
+ "loss": 1.0201,
538
+ "step": 8100
539
+ },
540
+ {
541
+ "epoch": 5.73,
542
+ "learning_rate": 1.8149307107733573e-05,
543
+ "loss": 1.0377,
544
+ "step": 8200
545
+ },
546
+ {
547
+ "epoch": 5.8,
548
+ "learning_rate": 1.7590523021904336e-05,
549
+ "loss": 0.986,
550
+ "step": 8300
551
+ },
552
+ {
553
+ "epoch": 5.87,
554
+ "learning_rate": 1.70317389360751e-05,
555
+ "loss": 1.0244,
556
+ "step": 8400
557
+ },
558
+ {
559
+ "epoch": 5.94,
560
+ "learning_rate": 1.6472954850245866e-05,
561
+ "loss": 0.9654,
562
+ "step": 8500
563
+ },
564
+ {
565
+ "epoch": 6.01,
566
+ "learning_rate": 1.5914170764416632e-05,
567
+ "loss": 0.9454,
568
+ "step": 8600
569
+ },
570
+ {
571
+ "epoch": 6.08,
572
+ "learning_rate": 1.5355386678587395e-05,
573
+ "loss": 0.8179,
574
+ "step": 8700
575
+ },
576
+ {
577
+ "epoch": 6.15,
578
+ "learning_rate": 1.479660259275816e-05,
579
+ "loss": 0.8433,
580
+ "step": 8800
581
+ },
582
+ {
583
+ "epoch": 6.22,
584
+ "learning_rate": 1.4237818506928924e-05,
585
+ "loss": 0.8235,
586
+ "step": 8900
587
+ },
588
+ {
589
+ "epoch": 6.29,
590
+ "learning_rate": 1.3679034421099687e-05,
591
+ "loss": 0.832,
592
+ "step": 9000
593
  }
594
  ],
595
  "max_steps": 11448,
596
  "num_train_epochs": 8,
597
+ "total_flos": 5913945388013520.0,
598
  "trial_name": null,
599
  "trial_params": null
600
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e042bccb9e210670a39bb71562238d3440188919639fe843a2f55fef029e0ed
3
  size 1014236857
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43a9f23b83b30496ac553a4c926b305dc1444595fad21ac2439436b871476ca3
3
  size 1014236857
runs/May31_16-24-16_71176b7c2bb4/events.out.tfevents.1685551038.71176b7c2bb4.5217.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bfb70b57b2babcef0706723afa3bd21e04e88364a0d1cc6d98d2b82193854754
3
- size 10573
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9aaf6bccdfb83457b4668835611bb29989514e5d51caead87baa97fe237bc968
3
+ size 15699