hariniiiiiiiiii commited on
Commit
9429c9d
1 Parent(s): 5575e14

Training in progress, step 1000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c36d72b05089e24fbdb20f58edda2d589f745b59929423cb750fe3542e13898
3
  size 4115013
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e6d34c98f3791a3faa787960bb3a83c840754af10c0771225b4641fc4876571
3
  size 4115013
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e65a378301a7811cf0ef09053f95a65dcc2ca531c1d476902b59315ede5aeab0
3
  size 2329702453
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f05ed8aadfcb52667b608ea047bd58bd244bcded7029940cddd4dbb9db150031
3
  size 2329702453
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c62b4adb9e39700227a9dfe709b9be3404fb7da10290b22d1a55586d48740a30
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d2ff9b22bc08f364a63197a3c58a1819f82aa700f010edb44f336a90cc9fb87
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee5d8512ea69f36ac3d76c8a2d1063766890ab26719024996676d7b75548920c
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a7ec99b1bee8f2349cfd0142e944266b1486c4d9544af390e2e3f4a57486848
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9863148810257675,
5
- "global_step": 500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -366,11 +366,371 @@
366
  "eval_samples_per_second": 0.237,
367
  "eval_steps_per_second": 0.237,
368
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
369
  }
370
  ],
371
  "max_steps": 3542,
372
  "num_train_epochs": 7,
373
- "total_flos": 1.2123850515499008e+16,
374
  "trial_name": null,
375
  "trial_params": null
376
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.9744791024534583,
5
+ "global_step": 1000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
366
  "eval_samples_per_second": 0.237,
367
  "eval_steps_per_second": 0.237,
368
  "step": 500
369
+ },
370
+ {
371
+ "epoch": 1.01,
372
+ "learning_rate": 0.00043916570104287375,
373
+ "loss": 0.279,
374
+ "step": 510
375
+ },
376
+ {
377
+ "epoch": 1.03,
378
+ "learning_rate": 0.00043771726535341834,
379
+ "loss": 0.272,
380
+ "step": 520
381
+ },
382
+ {
383
+ "epoch": 1.05,
384
+ "learning_rate": 0.00043626882966396293,
385
+ "loss": 0.2272,
386
+ "step": 530
387
+ },
388
+ {
389
+ "epoch": 1.07,
390
+ "learning_rate": 0.0004348203939745076,
391
+ "loss": 0.2495,
392
+ "step": 540
393
+ },
394
+ {
395
+ "epoch": 1.09,
396
+ "learning_rate": 0.00043337195828505216,
397
+ "loss": 0.1965,
398
+ "step": 550
399
+ },
400
+ {
401
+ "epoch": 1.11,
402
+ "learning_rate": 0.00043192352259559675,
403
+ "loss": 0.2364,
404
+ "step": 560
405
+ },
406
+ {
407
+ "epoch": 1.13,
408
+ "learning_rate": 0.0004304750869061414,
409
+ "loss": 0.2478,
410
+ "step": 570
411
+ },
412
+ {
413
+ "epoch": 1.15,
414
+ "learning_rate": 0.000429026651216686,
415
+ "loss": 0.2046,
416
+ "step": 580
417
+ },
418
+ {
419
+ "epoch": 1.17,
420
+ "learning_rate": 0.0004275782155272306,
421
+ "loss": 0.2661,
422
+ "step": 590
423
+ },
424
+ {
425
+ "epoch": 1.19,
426
+ "learning_rate": 0.0004261297798377752,
427
+ "loss": 0.2041,
428
+ "step": 600
429
+ },
430
+ {
431
+ "epoch": 1.19,
432
+ "eval_loss": 0.9551488757133484,
433
+ "eval_rouge1": 0.09038461538461538,
434
+ "eval_rouge2": 0.05051948051948052,
435
+ "eval_rougeL": 0.09679487179487178,
436
+ "eval_rougeLsum": 0.09871794871794871,
437
+ "eval_runtime": 89.0139,
438
+ "eval_samples_per_second": 0.225,
439
+ "eval_steps_per_second": 0.225,
440
+ "step": 600
441
+ },
442
+ {
443
+ "epoch": 1.21,
444
+ "learning_rate": 0.0004246813441483198,
445
+ "loss": 0.2816,
446
+ "step": 610
447
+ },
448
+ {
449
+ "epoch": 1.22,
450
+ "learning_rate": 0.0004232329084588644,
451
+ "loss": 0.1904,
452
+ "step": 620
453
+ },
454
+ {
455
+ "epoch": 1.24,
456
+ "learning_rate": 0.00042178447276940904,
457
+ "loss": 0.21,
458
+ "step": 630
459
+ },
460
+ {
461
+ "epoch": 1.26,
462
+ "learning_rate": 0.0004203360370799537,
463
+ "loss": 0.1662,
464
+ "step": 640
465
+ },
466
+ {
467
+ "epoch": 1.28,
468
+ "learning_rate": 0.0004188876013904983,
469
+ "loss": 0.3052,
470
+ "step": 650
471
+ },
472
+ {
473
+ "epoch": 1.3,
474
+ "learning_rate": 0.0004174391657010429,
475
+ "loss": 0.1744,
476
+ "step": 660
477
+ },
478
+ {
479
+ "epoch": 1.32,
480
+ "learning_rate": 0.0004159907300115875,
481
+ "loss": 0.288,
482
+ "step": 670
483
+ },
484
+ {
485
+ "epoch": 1.34,
486
+ "learning_rate": 0.0004145422943221321,
487
+ "loss": 0.2303,
488
+ "step": 680
489
+ },
490
+ {
491
+ "epoch": 1.36,
492
+ "learning_rate": 0.00041309385863267674,
493
+ "loss": 0.2866,
494
+ "step": 690
495
+ },
496
+ {
497
+ "epoch": 1.38,
498
+ "learning_rate": 0.00041164542294322133,
499
+ "loss": 0.238,
500
+ "step": 700
501
+ },
502
+ {
503
+ "epoch": 1.38,
504
+ "eval_loss": 0.9423090219497681,
505
+ "eval_rouge1": 0.1,
506
+ "eval_rouge2": 0.07291666666666667,
507
+ "eval_rougeL": 0.1,
508
+ "eval_rougeLsum": 0.10333333333333335,
509
+ "eval_runtime": 88.0456,
510
+ "eval_samples_per_second": 0.227,
511
+ "eval_steps_per_second": 0.227,
512
+ "step": 700
513
+ },
514
+ {
515
+ "epoch": 1.4,
516
+ "learning_rate": 0.0004101969872537659,
517
+ "loss": 0.2342,
518
+ "step": 710
519
+ },
520
+ {
521
+ "epoch": 1.42,
522
+ "learning_rate": 0.00040874855156431057,
523
+ "loss": 0.2716,
524
+ "step": 720
525
+ },
526
+ {
527
+ "epoch": 1.44,
528
+ "learning_rate": 0.00040730011587485516,
529
+ "loss": 0.2453,
530
+ "step": 730
531
+ },
532
+ {
533
+ "epoch": 1.46,
534
+ "learning_rate": 0.00040585168018539974,
535
+ "loss": 0.2313,
536
+ "step": 740
537
+ },
538
+ {
539
+ "epoch": 1.48,
540
+ "learning_rate": 0.0004044032444959444,
541
+ "loss": 0.2306,
542
+ "step": 750
543
+ },
544
+ {
545
+ "epoch": 1.5,
546
+ "learning_rate": 0.000402954808806489,
547
+ "loss": 0.1773,
548
+ "step": 760
549
+ },
550
+ {
551
+ "epoch": 1.52,
552
+ "learning_rate": 0.00040150637311703357,
553
+ "loss": 0.1957,
554
+ "step": 770
555
+ },
556
+ {
557
+ "epoch": 1.54,
558
+ "learning_rate": 0.0004000579374275782,
559
+ "loss": 0.2758,
560
+ "step": 780
561
+ },
562
+ {
563
+ "epoch": 1.56,
564
+ "learning_rate": 0.0003986095017381228,
565
+ "loss": 0.2649,
566
+ "step": 790
567
+ },
568
+ {
569
+ "epoch": 1.58,
570
+ "learning_rate": 0.0003971610660486675,
571
+ "loss": 0.275,
572
+ "step": 800
573
+ },
574
+ {
575
+ "epoch": 1.58,
576
+ "eval_loss": 0.9273136258125305,
577
+ "eval_rouge1": 0.14666666666666667,
578
+ "eval_rouge2": 0.10977272727272727,
579
+ "eval_rougeL": 0.15038461538461537,
580
+ "eval_rougeLsum": 0.15153846153846154,
581
+ "eval_runtime": 87.8017,
582
+ "eval_samples_per_second": 0.228,
583
+ "eval_steps_per_second": 0.228,
584
+ "step": 800
585
+ },
586
+ {
587
+ "epoch": 1.6,
588
+ "learning_rate": 0.0003957126303592121,
589
+ "loss": 0.2102,
590
+ "step": 810
591
+ },
592
+ {
593
+ "epoch": 1.62,
594
+ "learning_rate": 0.0003942641946697567,
595
+ "loss": 0.2146,
596
+ "step": 820
597
+ },
598
+ {
599
+ "epoch": 1.64,
600
+ "learning_rate": 0.0003928157589803013,
601
+ "loss": 0.1918,
602
+ "step": 830
603
+ },
604
+ {
605
+ "epoch": 1.66,
606
+ "learning_rate": 0.0003913673232908459,
607
+ "loss": 0.2512,
608
+ "step": 840
609
+ },
610
+ {
611
+ "epoch": 1.68,
612
+ "learning_rate": 0.0003899188876013905,
613
+ "loss": 0.2499,
614
+ "step": 850
615
+ },
616
+ {
617
+ "epoch": 1.7,
618
+ "learning_rate": 0.00038847045191193515,
619
+ "loss": 0.228,
620
+ "step": 860
621
+ },
622
+ {
623
+ "epoch": 1.72,
624
+ "learning_rate": 0.00038702201622247974,
625
+ "loss": 0.2507,
626
+ "step": 870
627
+ },
628
+ {
629
+ "epoch": 1.74,
630
+ "learning_rate": 0.0003855735805330243,
631
+ "loss": 0.1735,
632
+ "step": 880
633
+ },
634
+ {
635
+ "epoch": 1.76,
636
+ "learning_rate": 0.00038412514484356897,
637
+ "loss": 0.2752,
638
+ "step": 890
639
+ },
640
+ {
641
+ "epoch": 1.78,
642
+ "learning_rate": 0.00038267670915411356,
643
+ "loss": 0.2379,
644
+ "step": 900
645
+ },
646
+ {
647
+ "epoch": 1.78,
648
+ "eval_loss": 0.9023244976997375,
649
+ "eval_rouge1": 0.1,
650
+ "eval_rouge2": 0.08333333333333333,
651
+ "eval_rougeL": 0.1,
652
+ "eval_rougeLsum": 0.1,
653
+ "eval_runtime": 80.7798,
654
+ "eval_samples_per_second": 0.248,
655
+ "eval_steps_per_second": 0.248,
656
+ "step": 900
657
+ },
658
+ {
659
+ "epoch": 1.8,
660
+ "learning_rate": 0.00038122827346465815,
661
+ "loss": 0.1993,
662
+ "step": 910
663
+ },
664
+ {
665
+ "epoch": 1.82,
666
+ "learning_rate": 0.0003797798377752028,
667
+ "loss": 0.2058,
668
+ "step": 920
669
+ },
670
+ {
671
+ "epoch": 1.84,
672
+ "learning_rate": 0.0003783314020857474,
673
+ "loss": 0.2675,
674
+ "step": 930
675
+ },
676
+ {
677
+ "epoch": 1.86,
678
+ "learning_rate": 0.00037688296639629197,
679
+ "loss": 0.1928,
680
+ "step": 940
681
+ },
682
+ {
683
+ "epoch": 1.88,
684
+ "learning_rate": 0.0003754345307068366,
685
+ "loss": 0.1903,
686
+ "step": 950
687
+ },
688
+ {
689
+ "epoch": 1.9,
690
+ "learning_rate": 0.00037398609501738126,
691
+ "loss": 0.1967,
692
+ "step": 960
693
+ },
694
+ {
695
+ "epoch": 1.92,
696
+ "learning_rate": 0.00037253765932792585,
697
+ "loss": 0.2044,
698
+ "step": 970
699
+ },
700
+ {
701
+ "epoch": 1.94,
702
+ "learning_rate": 0.0003710892236384705,
703
+ "loss": 0.2027,
704
+ "step": 980
705
+ },
706
+ {
707
+ "epoch": 1.95,
708
+ "learning_rate": 0.0003696407879490151,
709
+ "loss": 0.25,
710
+ "step": 990
711
+ },
712
+ {
713
+ "epoch": 1.97,
714
+ "learning_rate": 0.00036819235225955967,
715
+ "loss": 0.2896,
716
+ "step": 1000
717
+ },
718
+ {
719
+ "epoch": 1.97,
720
+ "eval_loss": 0.9184179306030273,
721
+ "eval_rouge1": 0.19,
722
+ "eval_rouge2": 0.1,
723
+ "eval_rougeL": 0.18893939393939393,
724
+ "eval_rougeLsum": 0.19848484848484846,
725
+ "eval_runtime": 81.9559,
726
+ "eval_samples_per_second": 0.244,
727
+ "eval_steps_per_second": 0.244,
728
+ "step": 1000
729
  }
730
  ],
731
  "max_steps": 3542,
732
  "num_train_epochs": 7,
733
+ "total_flos": 2.429607036460032e+16,
734
  "trial_name": null,
735
  "trial_params": null
736
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02a84220d3a59b7a986ddfb062b9ea9e9a45a0ff5e854f1a86b7727559daa3d2
3
  size 3643
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:660f1225e692fbbda687422d3532879d3c116f23c4ac0ae767265d9fdf03511c
3
  size 3643
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e65a378301a7811cf0ef09053f95a65dcc2ca531c1d476902b59315ede5aeab0
3
  size 2329702453
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f05ed8aadfcb52667b608ea047bd58bd244bcded7029940cddd4dbb9db150031
3
  size 2329702453
runs/Feb08_05-23-30_74bc69b4becb/1675839811.8360035/events.out.tfevents.1675839811.74bc69b4becb.290.7 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68b19e19e49a5585b95a1cf2ea41121eed756dbef1dea86815786e8e86ad97ca
3
+ size 5952
runs/Feb08_05-23-30_74bc69b4becb/events.out.tfevents.1675836783.74bc69b4becb.290.4 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a4dba25a0bde9038c15a6af7ee58649a984a45ecfc3765c4e05acf774193a45
3
- size 10366
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af8ff3619c5e7959921946e1c8f07ab19f490618fe1eadf1ad8ab63dae6cadde
3
+ size 10523
runs/Feb08_05-23-30_74bc69b4becb/events.out.tfevents.1675839811.74bc69b4becb.290.6 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44e57fd7a05f538e83f65e0f7f6d3626b5c64c0e3f74a9f494de3d08115edafc
3
+ size 4234
runs/Feb08_07-03-50_74bc69b4becb/1675839843.9712958/events.out.tfevents.1675839843.74bc69b4becb.290.9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee98b68b79f929f357d2c12a4a37be65cdda2e91c16f092582b4b1f874266e74
3
+ size 5952
runs/Feb08_07-03-50_74bc69b4becb/events.out.tfevents.1675839843.74bc69b4becb.290.8 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07efd7031be8ab155496121fd04b911227f322b0793e4124f1b3bfb2e678b8b4
3
+ size 14454
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02a84220d3a59b7a986ddfb062b9ea9e9a45a0ff5e854f1a86b7727559daa3d2
3
  size 3643
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:660f1225e692fbbda687422d3532879d3c116f23c4ac0ae767265d9fdf03511c
3
  size 3643