Xmm commited on
Commit
154a15f
1 Parent(s): 3d7fdee

Upload 11 files

Browse files
Files changed (5) hide show
  1. model.safetensors +1 -1
  2. optimizer.pt +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +555 -5
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18cec116365ef1b036f1549fe671ef38cd38cdd50e48ff000a37c0888048133b
3
  size 1110097420
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8e40555b51ab47cbd6a63d83168b6e5917f118d2889d40b4ce3e220871e6cd8
3
  size 1110097420
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0fda91dd1702ad659189bcabcd800638aa8658f56db4cec71d731299787b65a0
3
  size 2220313146
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdecd5992ef078b4985837f59de1928d94cf5014b3d11b76d337d7dd9c1d313e
3
  size 2220313146
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68c0a7bb5c807ad5bc1c4dff28401d8ba88a8d5d80ef889d2be2ca17beb56b13
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b9b3283714e5042580832405e8bcbd71a95ba0aa2101a748c58943a7771454c
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16421db322c149675656b6ef816c336444bc6590a1774f47025ad6dacf9c462b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c01948096ca09d6133b37c1d4832dd98f343fc0f51ab59a285b9840e309e1d5c
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.28671061992645264,
3
- "best_model_checkpoint": "./checkpoint-huawei-noah/checkpoint-20000",
4
- "epoch": 0.8854258898530193,
5
  "eval_steps": 1000,
6
- "global_step": 20000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -447,6 +447,556 @@
447
  "eval_samples_per_second": 74.919,
448
  "eval_steps_per_second": 0.293,
449
  "step": 20000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
450
  }
451
  ],
452
  "logging_steps": 1000,
@@ -454,7 +1004,7 @@
454
  "num_input_tokens_seen": 0,
455
  "num_train_epochs": 7,
456
  "save_steps": 500,
457
- "total_flos": 6651862755777888.0,
458
  "train_batch_size": 8,
459
  "trial_name": null,
460
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.260405570268631,
3
+ "best_model_checkpoint": "./checkpoint-huawei-noah/checkpoint-45000",
4
+ "epoch": 1.9922082521692934,
5
  "eval_steps": 1000,
6
+ "global_step": 45000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
447
  "eval_samples_per_second": 74.919,
448
  "eval_steps_per_second": 0.293,
449
  "step": 20000
450
+ },
451
+ {
452
+ "epoch": 0.93,
453
+ "grad_norm": 37.97357177734375,
454
+ "learning_rate": 1.7343722330440944e-05,
455
+ "loss": 0.2782,
456
+ "step": 21000
457
+ },
458
+ {
459
+ "epoch": 0.93,
460
+ "eval_LOC_f1": 0.8330484744899815,
461
+ "eval_ORG_f1": 0.7210407632263661,
462
+ "eval_PER_f1": 0.8489616418275103,
463
+ "eval_loss": 0.30233901739120483,
464
+ "eval_overall_accuracy": 0.906348548032643,
465
+ "eval_overall_f1": 0.8060817413887736,
466
+ "eval_overall_precision": 0.7987835211557213,
467
+ "eval_overall_recall": 0.8135145541872421,
468
+ "eval_runtime": 903.2924,
469
+ "eval_samples_per_second": 72.734,
470
+ "eval_steps_per_second": 0.285,
471
+ "step": 21000
472
+ },
473
+ {
474
+ "epoch": 0.97,
475
+ "grad_norm": 3.23641037940979,
476
+ "learning_rate": 1.7217232917604798e-05,
477
+ "loss": 0.2913,
478
+ "step": 22000
479
+ },
480
+ {
481
+ "epoch": 0.97,
482
+ "eval_LOC_f1": 0.8284559448131656,
483
+ "eval_ORG_f1": 0.7247506440319704,
484
+ "eval_PER_f1": 0.8467821489168608,
485
+ "eval_loss": 0.2910088300704956,
486
+ "eval_overall_accuracy": 0.908466500939486,
487
+ "eval_overall_f1": 0.8031924311719724,
488
+ "eval_overall_precision": 0.7945208195637806,
489
+ "eval_overall_recall": 0.8120554200752534,
490
+ "eval_runtime": 930.8988,
491
+ "eval_samples_per_second": 70.577,
492
+ "eval_steps_per_second": 0.276,
493
+ "step": 22000
494
+ },
495
+ {
496
+ "epoch": 1.02,
497
+ "grad_norm": 51.232643127441406,
498
+ "learning_rate": 1.709074350476865e-05,
499
+ "loss": 0.254,
500
+ "step": 23000
501
+ },
502
+ {
503
+ "epoch": 1.02,
504
+ "eval_LOC_f1": 0.8326820729529502,
505
+ "eval_ORG_f1": 0.7281753443144438,
506
+ "eval_PER_f1": 0.8538723624698639,
507
+ "eval_loss": 0.3031412661075592,
508
+ "eval_overall_accuracy": 0.9094907050317503,
509
+ "eval_overall_f1": 0.8106029688042823,
510
+ "eval_overall_precision": 0.8094644661502189,
511
+ "eval_overall_recall": 0.8117446785514041,
512
+ "eval_runtime": 982.2729,
513
+ "eval_samples_per_second": 66.886,
514
+ "eval_steps_per_second": 0.262,
515
+ "step": 23000
516
+ },
517
+ {
518
+ "epoch": 1.06,
519
+ "grad_norm": 3.0182816982269287,
520
+ "learning_rate": 1.6964254091932504e-05,
521
+ "loss": 0.2412,
522
+ "step": 24000
523
+ },
524
+ {
525
+ "epoch": 1.06,
526
+ "eval_LOC_f1": 0.8337292382788848,
527
+ "eval_ORG_f1": 0.7265721539463927,
528
+ "eval_PER_f1": 0.8438998504510589,
529
+ "eval_loss": 0.2959749400615692,
530
+ "eval_overall_accuracy": 0.9087023207706061,
531
+ "eval_overall_f1": 0.8054477820887165,
532
+ "eval_overall_precision": 0.7949914132873621,
533
+ "eval_overall_recall": 0.8161828781420359,
534
+ "eval_runtime": 937.946,
535
+ "eval_samples_per_second": 70.047,
536
+ "eval_steps_per_second": 0.274,
537
+ "step": 24000
538
+ },
539
+ {
540
+ "epoch": 1.11,
541
+ "grad_norm": 4.651257038116455,
542
+ "learning_rate": 1.683776467909636e-05,
543
+ "loss": 0.2248,
544
+ "step": 25000
545
+ },
546
+ {
547
+ "epoch": 1.11,
548
+ "eval_LOC_f1": 0.8204892221350009,
549
+ "eval_ORG_f1": 0.7164497497985908,
550
+ "eval_PER_f1": 0.8366153573083787,
551
+ "eval_loss": 0.2870059013366699,
552
+ "eval_overall_accuracy": 0.9095747909232758,
553
+ "eval_overall_f1": 0.7954083144399056,
554
+ "eval_overall_precision": 0.7841744564646571,
555
+ "eval_overall_recall": 0.806968716434849,
556
+ "eval_runtime": 880.5612,
557
+ "eval_samples_per_second": 74.612,
558
+ "eval_steps_per_second": 0.292,
559
+ "step": 25000
560
+ },
561
+ {
562
+ "epoch": 1.15,
563
+ "grad_norm": 0.5376187562942505,
564
+ "learning_rate": 1.6711275266260215e-05,
565
+ "loss": 0.2367,
566
+ "step": 26000
567
+ },
568
+ {
569
+ "epoch": 1.15,
570
+ "eval_LOC_f1": 0.8274943290245922,
571
+ "eval_ORG_f1": 0.7181801646127961,
572
+ "eval_PER_f1": 0.8393770384236834,
573
+ "eval_loss": 0.3035840094089508,
574
+ "eval_overall_accuracy": 0.9099459068505344,
575
+ "eval_overall_f1": 0.7997951699758828,
576
+ "eval_overall_precision": 0.7826674511984585,
577
+ "eval_overall_recall": 0.8176892990076537,
578
+ "eval_runtime": 877.1859,
579
+ "eval_samples_per_second": 74.899,
580
+ "eval_steps_per_second": 0.293,
581
+ "step": 26000
582
+ },
583
+ {
584
+ "epoch": 1.2,
585
+ "grad_norm": 29.850025177001953,
586
+ "learning_rate": 1.658478585342407e-05,
587
+ "loss": 0.2259,
588
+ "step": 27000
589
+ },
590
+ {
591
+ "epoch": 1.2,
592
+ "eval_LOC_f1": 0.8368737846096861,
593
+ "eval_ORG_f1": 0.730566338210411,
594
+ "eval_PER_f1": 0.8548625950405009,
595
+ "eval_loss": 0.2981078624725342,
596
+ "eval_overall_accuracy": 0.912430550111398,
597
+ "eval_overall_f1": 0.8107802613802052,
598
+ "eval_overall_precision": 0.8028279082088811,
599
+ "eval_overall_recall": 0.8188917335999406,
600
+ "eval_runtime": 900.9915,
601
+ "eval_samples_per_second": 72.92,
602
+ "eval_steps_per_second": 0.285,
603
+ "step": 27000
604
+ },
605
+ {
606
+ "epoch": 1.24,
607
+ "grad_norm": 6.522253036499023,
608
+ "learning_rate": 1.6458296440587925e-05,
609
+ "loss": 0.2353,
610
+ "step": 28000
611
+ },
612
+ {
613
+ "epoch": 1.24,
614
+ "eval_LOC_f1": 0.8361146769362431,
615
+ "eval_ORG_f1": 0.7397288818401768,
616
+ "eval_PER_f1": 0.851691836373991,
617
+ "eval_loss": 0.2890784442424774,
618
+ "eval_overall_accuracy": 0.9118988490980682,
619
+ "eval_overall_f1": 0.813503140265178,
620
+ "eval_overall_precision": 0.8080863288253761,
621
+ "eval_overall_recall": 0.8189930623577175,
622
+ "eval_runtime": 898.5924,
623
+ "eval_samples_per_second": 73.114,
624
+ "eval_steps_per_second": 0.286,
625
+ "step": 28000
626
+ },
627
+ {
628
+ "epoch": 1.28,
629
+ "grad_norm": 18.48634910583496,
630
+ "learning_rate": 1.633180702775178e-05,
631
+ "loss": 0.231,
632
+ "step": 29000
633
+ },
634
+ {
635
+ "epoch": 1.28,
636
+ "eval_LOC_f1": 0.8399307496708805,
637
+ "eval_ORG_f1": 0.7353511607405231,
638
+ "eval_PER_f1": 0.8429051875514639,
639
+ "eval_loss": 0.29812344908714294,
640
+ "eval_overall_accuracy": 0.9100799384971765,
641
+ "eval_overall_f1": 0.8109618028412001,
642
+ "eval_overall_precision": 0.8090263071203351,
643
+ "eval_overall_recall": 0.8129065816405802,
644
+ "eval_runtime": 911.3101,
645
+ "eval_samples_per_second": 72.094,
646
+ "eval_steps_per_second": 0.282,
647
+ "step": 29000
648
+ },
649
+ {
650
+ "epoch": 1.33,
651
+ "grad_norm": 6.476167678833008,
652
+ "learning_rate": 1.6205317614915632e-05,
653
+ "loss": 0.2298,
654
+ "step": 30000
655
+ },
656
+ {
657
+ "epoch": 1.33,
658
+ "eval_LOC_f1": 0.8437642148074813,
659
+ "eval_ORG_f1": 0.741920341727885,
660
+ "eval_PER_f1": 0.845791168353266,
661
+ "eval_loss": 0.2789755165576935,
662
+ "eval_overall_accuracy": 0.9128231616800994,
663
+ "eval_overall_f1": 0.8156984934617233,
664
+ "eval_overall_precision": 0.8007835888891781,
665
+ "eval_overall_recall": 0.8311795342930293,
666
+ "eval_runtime": 950.013,
667
+ "eval_samples_per_second": 69.157,
668
+ "eval_steps_per_second": 0.271,
669
+ "step": 30000
670
+ },
671
+ {
672
+ "epoch": 1.37,
673
+ "grad_norm": 0.6922666430473328,
674
+ "learning_rate": 1.607882820207949e-05,
675
+ "loss": 0.2236,
676
+ "step": 31000
677
+ },
678
+ {
679
+ "epoch": 1.37,
680
+ "eval_LOC_f1": 0.8401312445122232,
681
+ "eval_ORG_f1": 0.7412946847115139,
682
+ "eval_PER_f1": 0.8599101069965396,
683
+ "eval_loss": 0.2861514985561371,
684
+ "eval_overall_accuracy": 0.9133093425115507,
685
+ "eval_overall_f1": 0.8168471254617229,
686
+ "eval_overall_precision": 0.8077931685921699,
687
+ "eval_overall_recall": 0.8261063411536617,
688
+ "eval_runtime": 964.6377,
689
+ "eval_samples_per_second": 68.108,
690
+ "eval_steps_per_second": 0.266,
691
+ "step": 31000
692
+ },
693
+ {
694
+ "epoch": 1.42,
695
+ "grad_norm": 10.913984298706055,
696
+ "learning_rate": 1.5952338789243342e-05,
697
+ "loss": 0.2164,
698
+ "step": 32000
699
+ },
700
+ {
701
+ "epoch": 1.42,
702
+ "eval_LOC_f1": 0.8450018789928598,
703
+ "eval_ORG_f1": 0.7453389102160086,
704
+ "eval_PER_f1": 0.8475419561015748,
705
+ "eval_loss": 0.29202836751937866,
706
+ "eval_overall_accuracy": 0.9133548626934291,
707
+ "eval_overall_f1": 0.8167286457267982,
708
+ "eval_overall_precision": 0.8108745156006552,
709
+ "eval_overall_recall": 0.8226679186397627,
710
+ "eval_runtime": 934.7918,
711
+ "eval_samples_per_second": 70.283,
712
+ "eval_steps_per_second": 0.275,
713
+ "step": 32000
714
+ },
715
+ {
716
+ "epoch": 1.46,
717
+ "grad_norm": 8.604541778564453,
718
+ "learning_rate": 1.5825849376407196e-05,
719
+ "loss": 0.2343,
720
+ "step": 33000
721
+ },
722
+ {
723
+ "epoch": 1.46,
724
+ "eval_LOC_f1": 0.8327052539148251,
725
+ "eval_ORG_f1": 0.7464142820374833,
726
+ "eval_PER_f1": 0.8571568569804591,
727
+ "eval_loss": 0.26980945467948914,
728
+ "eval_overall_accuracy": 0.915141529832157,
729
+ "eval_overall_f1": 0.8162628685387808,
730
+ "eval_overall_precision": 0.8141182004502234,
731
+ "eval_overall_recall": 0.818418866063648,
732
+ "eval_runtime": 878.6904,
733
+ "eval_samples_per_second": 74.77,
734
+ "eval_steps_per_second": 0.292,
735
+ "step": 33000
736
+ },
737
+ {
738
+ "epoch": 1.51,
739
+ "grad_norm": 20.011140823364258,
740
+ "learning_rate": 1.569935996357105e-05,
741
+ "loss": 0.2305,
742
+ "step": 34000
743
+ },
744
+ {
745
+ "epoch": 1.51,
746
+ "eval_LOC_f1": 0.8434370154154885,
747
+ "eval_ORG_f1": 0.7450794786844748,
748
+ "eval_PER_f1": 0.8598302131901996,
749
+ "eval_loss": 0.2736206650733948,
750
+ "eval_overall_accuracy": 0.9164160949247526,
751
+ "eval_overall_f1": 0.820116525352046,
752
+ "eval_overall_precision": 0.8215285544822911,
753
+ "eval_overall_recall": 0.818709341835942,
754
+ "eval_runtime": 878.0774,
755
+ "eval_samples_per_second": 74.823,
756
+ "eval_steps_per_second": 0.293,
757
+ "step": 34000
758
+ },
759
+ {
760
+ "epoch": 1.55,
761
+ "grad_norm": 0.8893330693244934,
762
+ "learning_rate": 1.5572870550734906e-05,
763
+ "loss": 0.218,
764
+ "step": 35000
765
+ },
766
+ {
767
+ "epoch": 1.55,
768
+ "eval_LOC_f1": 0.8372996858861737,
769
+ "eval_ORG_f1": 0.7351363688234623,
770
+ "eval_PER_f1": 0.8452893909397927,
771
+ "eval_loss": 0.277670681476593,
772
+ "eval_overall_accuracy": 0.9132897435443531,
773
+ "eval_overall_f1": 0.8105032765054125,
774
+ "eval_overall_precision": 0.7914432306117588,
775
+ "eval_overall_recall": 0.8305040092411827,
776
+ "eval_runtime": 878.1965,
777
+ "eval_samples_per_second": 74.812,
778
+ "eval_steps_per_second": 0.293,
779
+ "step": 35000
780
+ },
781
+ {
782
+ "epoch": 1.59,
783
+ "grad_norm": 2.807310104370117,
784
+ "learning_rate": 1.544638113789876e-05,
785
+ "loss": 0.2209,
786
+ "step": 36000
787
+ },
788
+ {
789
+ "epoch": 1.59,
790
+ "eval_LOC_f1": 0.8436019819082686,
791
+ "eval_ORG_f1": 0.7541017701160051,
792
+ "eval_PER_f1": 0.8559255699664113,
793
+ "eval_loss": 0.2975883483886719,
794
+ "eval_overall_accuracy": 0.9155777649084917,
795
+ "eval_overall_f1": 0.8217433690792348,
796
+ "eval_overall_precision": 0.8178831213153369,
797
+ "eval_overall_recall": 0.8256402288678876,
798
+ "eval_runtime": 887.7886,
799
+ "eval_samples_per_second": 74.004,
800
+ "eval_steps_per_second": 0.289,
801
+ "step": 36000
802
+ },
803
+ {
804
+ "epoch": 1.64,
805
+ "grad_norm": 0.7378529906272888,
806
+ "learning_rate": 1.5319891725062616e-05,
807
+ "loss": 0.2068,
808
+ "step": 37000
809
+ },
810
+ {
811
+ "epoch": 1.64,
812
+ "eval_LOC_f1": 0.8404596277816221,
813
+ "eval_ORG_f1": 0.7519756060658962,
814
+ "eval_PER_f1": 0.8650594959056045,
815
+ "eval_loss": 0.2906901240348816,
816
+ "eval_overall_accuracy": 0.9159109473508519,
817
+ "eval_overall_f1": 0.8231073274551537,
818
+ "eval_overall_precision": 0.8232630746670091,
819
+ "eval_overall_recall": 0.8229516391615384,
820
+ "eval_runtime": 914.9939,
821
+ "eval_samples_per_second": 71.804,
822
+ "eval_steps_per_second": 0.281,
823
+ "step": 37000
824
+ },
825
+ {
826
+ "epoch": 1.68,
827
+ "grad_norm": 4.775814056396484,
828
+ "learning_rate": 1.5193402312226468e-05,
829
+ "loss": 0.2222,
830
+ "step": 38000
831
+ },
832
+ {
833
+ "epoch": 1.68,
834
+ "eval_LOC_f1": 0.8486954241510423,
835
+ "eval_ORG_f1": 0.7403596163509645,
836
+ "eval_PER_f1": 0.8627163820626227,
837
+ "eval_loss": 0.2920599579811096,
838
+ "eval_overall_accuracy": 0.9144530370812459,
839
+ "eval_overall_f1": 0.8204945751023299,
840
+ "eval_overall_precision": 0.8079300635190885,
841
+ "eval_overall_recall": 0.8334560537177521,
842
+ "eval_runtime": 974.0136,
843
+ "eval_samples_per_second": 67.453,
844
+ "eval_steps_per_second": 0.264,
845
+ "step": 38000
846
+ },
847
+ {
848
+ "epoch": 1.73,
849
+ "grad_norm": 2.2256317138671875,
850
+ "learning_rate": 1.5066912899390323e-05,
851
+ "loss": 0.2328,
852
+ "step": 39000
853
+ },
854
+ {
855
+ "epoch": 1.73,
856
+ "eval_LOC_f1": 0.849334397801749,
857
+ "eval_ORG_f1": 0.753236617390506,
858
+ "eval_PER_f1": 0.8693573280340553,
859
+ "eval_loss": 0.29489845037460327,
860
+ "eval_overall_accuracy": 0.917294255100157,
861
+ "eval_overall_f1": 0.8276056778793333,
862
+ "eval_overall_precision": 0.8335114765330592,
863
+ "eval_overall_recall": 0.8217829808218438,
864
+ "eval_runtime": 944.9062,
865
+ "eval_samples_per_second": 69.531,
866
+ "eval_steps_per_second": 0.272,
867
+ "step": 39000
868
+ },
869
+ {
870
+ "epoch": 1.77,
871
+ "grad_norm": 16.9512996673584,
872
+ "learning_rate": 1.4940423486554176e-05,
873
+ "loss": 0.2229,
874
+ "step": 40000
875
+ },
876
+ {
877
+ "epoch": 1.77,
878
+ "eval_LOC_f1": 0.8445889009269291,
879
+ "eval_ORG_f1": 0.7452847675981278,
880
+ "eval_PER_f1": 0.8508496270046708,
881
+ "eval_loss": 0.27226653695106506,
882
+ "eval_overall_accuracy": 0.916396495957555,
883
+ "eval_overall_f1": 0.818113712374582,
884
+ "eval_overall_precision": 0.8101638106341121,
885
+ "eval_overall_recall": 0.8262211804124756,
886
+ "eval_runtime": 919.492,
887
+ "eval_samples_per_second": 71.452,
888
+ "eval_steps_per_second": 0.28,
889
+ "step": 40000
890
+ },
891
+ {
892
+ "epoch": 1.82,
893
+ "grad_norm": 0.6652330160140991,
894
+ "learning_rate": 1.481393407371803e-05,
895
+ "loss": 0.2219,
896
+ "step": 41000
897
+ },
898
+ {
899
+ "epoch": 1.82,
900
+ "eval_LOC_f1": 0.8483445744353834,
901
+ "eval_ORG_f1": 0.7490041659533185,
902
+ "eval_PER_f1": 0.865145374272123,
903
+ "eval_loss": 0.2795349359512329,
904
+ "eval_overall_accuracy": 0.9183601860258099,
905
+ "eval_overall_f1": 0.8247159453593771,
906
+ "eval_overall_precision": 0.8203393908609086,
907
+ "eval_overall_recall": 0.8291394486364527,
908
+ "eval_runtime": 878.0472,
909
+ "eval_samples_per_second": 74.825,
910
+ "eval_steps_per_second": 0.293,
911
+ "step": 41000
912
+ },
913
+ {
914
+ "epoch": 1.86,
915
+ "grad_norm": 8.537057876586914,
916
+ "learning_rate": 1.4687444660881885e-05,
917
+ "loss": 0.2265,
918
+ "step": 42000
919
+ },
920
+ {
921
+ "epoch": 1.86,
922
+ "eval_LOC_f1": 0.8431806420528559,
923
+ "eval_ORG_f1": 0.7481651106805237,
924
+ "eval_PER_f1": 0.8624314888139032,
925
+ "eval_loss": 0.2695271372795105,
926
+ "eval_overall_accuracy": 0.9176305986662586,
927
+ "eval_overall_f1": 0.8227747180347664,
928
+ "eval_overall_precision": 0.8127298369558054,
929
+ "eval_overall_recall": 0.8330710044381996,
930
+ "eval_runtime": 878.4759,
931
+ "eval_samples_per_second": 74.789,
932
+ "eval_steps_per_second": 0.293,
933
+ "step": 42000
934
+ },
935
+ {
936
+ "epoch": 1.9,
937
+ "grad_norm": 2.5510284900665283,
938
+ "learning_rate": 1.456095524804574e-05,
939
+ "loss": 0.208,
940
+ "step": 43000
941
+ },
942
+ {
943
+ "epoch": 1.9,
944
+ "eval_LOC_f1": 0.8455445895423215,
945
+ "eval_ORG_f1": 0.7595170903640558,
946
+ "eval_PER_f1": 0.8683249226114047,
947
+ "eval_loss": 0.291418194770813,
948
+ "eval_overall_accuracy": 0.9158768072144431,
949
+ "eval_overall_f1": 0.828613510075651,
950
+ "eval_overall_precision": 0.8212624496539643,
951
+ "eval_overall_recall": 0.8360973566704721,
952
+ "eval_runtime": 879.88,
953
+ "eval_samples_per_second": 74.669,
954
+ "eval_steps_per_second": 0.292,
955
+ "step": 43000
956
+ },
957
+ {
958
+ "epoch": 1.95,
959
+ "grad_norm": 40.04784393310547,
960
+ "learning_rate": 1.4434465835209595e-05,
961
+ "loss": 0.2201,
962
+ "step": 44000
963
+ },
964
+ {
965
+ "epoch": 1.95,
966
+ "eval_LOC_f1": 0.8562697361719467,
967
+ "eval_ORG_f1": 0.7598152424942263,
968
+ "eval_PER_f1": 0.8635555196248839,
969
+ "eval_loss": 0.26673147082328796,
970
+ "eval_overall_accuracy": 0.9210597857010994,
971
+ "eval_overall_f1": 0.8302701818813599,
972
+ "eval_overall_precision": 0.8374682152429387,
973
+ "eval_overall_recall": 0.8231948281802031,
974
+ "eval_runtime": 908.7842,
975
+ "eval_samples_per_second": 72.294,
976
+ "eval_steps_per_second": 0.283,
977
+ "step": 44000
978
+ },
979
+ {
980
+ "epoch": 1.99,
981
+ "grad_norm": 4.13014030456543,
982
+ "learning_rate": 1.4307976422373449e-05,
983
+ "loss": 0.2065,
984
+ "step": 45000
985
+ },
986
+ {
987
+ "epoch": 1.99,
988
+ "eval_LOC_f1": 0.850374667633551,
989
+ "eval_ORG_f1": 0.7572422253856926,
990
+ "eval_PER_f1": 0.8662861965717501,
991
+ "eval_loss": 0.260405570268631,
992
+ "eval_overall_accuracy": 0.9184101317809266,
993
+ "eval_overall_f1": 0.8283524878105291,
994
+ "eval_overall_precision": 0.835684527492472,
995
+ "eval_overall_recall": 0.821147987273108,
996
+ "eval_runtime": 930.7338,
997
+ "eval_samples_per_second": 70.589,
998
+ "eval_steps_per_second": 0.276,
999
+ "step": 45000
1000
  }
1001
  ],
1002
  "logging_steps": 1000,
 
1004
  "num_input_tokens_seen": 0,
1005
  "num_train_epochs": 7,
1006
  "save_steps": 500,
1007
+ "total_flos": 1.4980271080173432e+16,
1008
  "train_batch_size": 8,
1009
  "trial_name": null,
1010
  "trial_params": null