DuongTrongChi commited on
Commit
6860dc4
1 Parent(s): 5fc146d

Training in progress, step 580, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c114ae96d9d8cb96dbaa3cd6af73dd527250586aeb31e8856b53c540acef67d6
3
  size 100198584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6589280cb3cff27d0e3f809d9fe18d16f0e9a7c5605ca58835189f572dabff16
3
  size 100198584
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac9691a0ad4d7be278c4991469273ca3ad54f22471fe44e8fcb314a30188d0d3
3
  size 50675604
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0682c52911bfd561a6f52732b0960db62f88ed27fe66503c2c995a4eaf4cdaed
3
  size 50675604
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a78f5c44175ab04d2074e288162c4abc93267ecb39ae1f22c8db10bc367ff930
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c64a34b4af24bd84d792037de6a5cdeb1e9758d386ffdf2a30823c12441032d4
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8138069582686512,
5
  "eval_steps": 500,
6
- "global_step": 557,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3906,6 +3906,167 @@
3906
  "learning_rate": 4.349315068493151e-06,
3907
  "loss": 1.1756,
3908
  "step": 557
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3909
  }
3910
  ],
3911
  "logging_steps": 1,
@@ -3925,7 +4086,7 @@
3925
  "attributes": {}
3926
  }
3927
  },
3928
- "total_flos": 6.292640189270753e+17,
3929
  "train_batch_size": 4,
3930
  "trial_name": null,
3931
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.8474111953246279,
5
  "eval_steps": 500,
6
+ "global_step": 580,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3906
  "learning_rate": 4.349315068493151e-06,
3907
  "loss": 1.1756,
3908
  "step": 557
3909
+ },
3910
+ {
3911
+ "epoch": 0.8152680120536937,
3912
+ "grad_norm": 0.11740544438362122,
3913
+ "learning_rate": 4.315068493150685e-06,
3914
+ "loss": 1.1753,
3915
+ "step": 558
3916
+ },
3917
+ {
3918
+ "epoch": 0.8167290658387362,
3919
+ "grad_norm": 0.11186040937900543,
3920
+ "learning_rate": 4.28082191780822e-06,
3921
+ "loss": 1.2328,
3922
+ "step": 559
3923
+ },
3924
+ {
3925
+ "epoch": 0.8181901196237786,
3926
+ "grad_norm": 0.11647879332304001,
3927
+ "learning_rate": 4.246575342465754e-06,
3928
+ "loss": 1.1962,
3929
+ "step": 560
3930
+ },
3931
+ {
3932
+ "epoch": 0.8196511734088211,
3933
+ "grad_norm": 0.12523581087589264,
3934
+ "learning_rate": 4.212328767123288e-06,
3935
+ "loss": 1.2818,
3936
+ "step": 561
3937
+ },
3938
+ {
3939
+ "epoch": 0.8211122271938636,
3940
+ "grad_norm": 0.11687058955430984,
3941
+ "learning_rate": 4.178082191780822e-06,
3942
+ "loss": 1.173,
3943
+ "step": 562
3944
+ },
3945
+ {
3946
+ "epoch": 0.822573280978906,
3947
+ "grad_norm": 0.1250499188899994,
3948
+ "learning_rate": 4.143835616438356e-06,
3949
+ "loss": 1.1234,
3950
+ "step": 563
3951
+ },
3952
+ {
3953
+ "epoch": 0.8240343347639485,
3954
+ "grad_norm": 0.11505109816789627,
3955
+ "learning_rate": 4.109589041095891e-06,
3956
+ "loss": 1.2452,
3957
+ "step": 564
3958
+ },
3959
+ {
3960
+ "epoch": 0.825495388548991,
3961
+ "grad_norm": 0.1190369576215744,
3962
+ "learning_rate": 4.075342465753426e-06,
3963
+ "loss": 1.149,
3964
+ "step": 565
3965
+ },
3966
+ {
3967
+ "epoch": 0.8269564423340334,
3968
+ "grad_norm": 0.12453046441078186,
3969
+ "learning_rate": 4.0410958904109595e-06,
3970
+ "loss": 1.2214,
3971
+ "step": 566
3972
+ },
3973
+ {
3974
+ "epoch": 0.8284174961190759,
3975
+ "grad_norm": 0.10606851428747177,
3976
+ "learning_rate": 4.006849315068493e-06,
3977
+ "loss": 1.1749,
3978
+ "step": 567
3979
+ },
3980
+ {
3981
+ "epoch": 0.8298785499041184,
3982
+ "grad_norm": 0.11873757094144821,
3983
+ "learning_rate": 3.972602739726027e-06,
3984
+ "loss": 1.2741,
3985
+ "step": 568
3986
+ },
3987
+ {
3988
+ "epoch": 0.8313396036891608,
3989
+ "grad_norm": 0.12206880748271942,
3990
+ "learning_rate": 3.938356164383562e-06,
3991
+ "loss": 1.2122,
3992
+ "step": 569
3993
+ },
3994
+ {
3995
+ "epoch": 0.8328006574742033,
3996
+ "grad_norm": 0.10448160022497177,
3997
+ "learning_rate": 3.904109589041096e-06,
3998
+ "loss": 1.2251,
3999
+ "step": 570
4000
+ },
4001
+ {
4002
+ "epoch": 0.8342617112592458,
4003
+ "grad_norm": 0.10980773717164993,
4004
+ "learning_rate": 3.869863013698631e-06,
4005
+ "loss": 1.1791,
4006
+ "step": 571
4007
+ },
4008
+ {
4009
+ "epoch": 0.8357227650442882,
4010
+ "grad_norm": 0.11824549734592438,
4011
+ "learning_rate": 3.8356164383561645e-06,
4012
+ "loss": 1.2319,
4013
+ "step": 572
4014
+ },
4015
+ {
4016
+ "epoch": 0.8371838188293307,
4017
+ "grad_norm": 0.1143551915884018,
4018
+ "learning_rate": 3.8013698630136993e-06,
4019
+ "loss": 1.16,
4020
+ "step": 573
4021
+ },
4022
+ {
4023
+ "epoch": 0.8386448726143731,
4024
+ "grad_norm": 0.11645519733428955,
4025
+ "learning_rate": 3.767123287671233e-06,
4026
+ "loss": 1.1278,
4027
+ "step": 574
4028
+ },
4029
+ {
4030
+ "epoch": 0.8401059263994156,
4031
+ "grad_norm": 0.12313269078731537,
4032
+ "learning_rate": 3.7328767123287675e-06,
4033
+ "loss": 1.1707,
4034
+ "step": 575
4035
+ },
4036
+ {
4037
+ "epoch": 0.8415669801844581,
4038
+ "grad_norm": 0.1056065782904625,
4039
+ "learning_rate": 3.6986301369863014e-06,
4040
+ "loss": 1.0892,
4041
+ "step": 576
4042
+ },
4043
+ {
4044
+ "epoch": 0.8430280339695005,
4045
+ "grad_norm": 0.10892536491155624,
4046
+ "learning_rate": 3.664383561643836e-06,
4047
+ "loss": 1.2527,
4048
+ "step": 577
4049
+ },
4050
+ {
4051
+ "epoch": 0.844489087754543,
4052
+ "grad_norm": 0.10961074382066727,
4053
+ "learning_rate": 3.6301369863013704e-06,
4054
+ "loss": 1.2338,
4055
+ "step": 578
4056
+ },
4057
+ {
4058
+ "epoch": 0.8459501415395855,
4059
+ "grad_norm": 0.1143115982413292,
4060
+ "learning_rate": 3.5958904109589043e-06,
4061
+ "loss": 1.2256,
4062
+ "step": 579
4063
+ },
4064
+ {
4065
+ "epoch": 0.8474111953246279,
4066
+ "grad_norm": 0.1371782124042511,
4067
+ "learning_rate": 3.5616438356164386e-06,
4068
+ "loss": 1.1487,
4069
+ "step": 580
4070
  }
4071
  ],
4072
  "logging_steps": 1,
 
4086
  "attributes": {}
4087
  }
4088
  },
4089
+ "total_flos": 6.557703270576169e+17,
4090
  "train_batch_size": 4,
4091
  "trial_name": null,
4092
  "trial_params": null