DuongTrongChi
commited on
Commit
•
6860dc4
1
Parent(s):
5fc146d
Training in progress, step 580, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 100198584
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6589280cb3cff27d0e3f809d9fe18d16f0e9a7c5605ca58835189f572dabff16
|
3 |
size 100198584
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 50675604
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0682c52911bfd561a6f52732b0960db62f88ed27fe66503c2c995a4eaf4cdaed
|
3 |
size 50675604
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c64a34b4af24bd84d792037de6a5cdeb1e9758d386ffdf2a30823c12441032d4
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -3906,6 +3906,167 @@
|
|
3906 |
"learning_rate": 4.349315068493151e-06,
|
3907 |
"loss": 1.1756,
|
3908 |
"step": 557
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3909 |
}
|
3910 |
],
|
3911 |
"logging_steps": 1,
|
@@ -3925,7 +4086,7 @@
|
|
3925 |
"attributes": {}
|
3926 |
}
|
3927 |
},
|
3928 |
-
"total_flos": 6.
|
3929 |
"train_batch_size": 4,
|
3930 |
"trial_name": null,
|
3931 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.8474111953246279,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 580,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
3906 |
"learning_rate": 4.349315068493151e-06,
|
3907 |
"loss": 1.1756,
|
3908 |
"step": 557
|
3909 |
+
},
|
3910 |
+
{
|
3911 |
+
"epoch": 0.8152680120536937,
|
3912 |
+
"grad_norm": 0.11740544438362122,
|
3913 |
+
"learning_rate": 4.315068493150685e-06,
|
3914 |
+
"loss": 1.1753,
|
3915 |
+
"step": 558
|
3916 |
+
},
|
3917 |
+
{
|
3918 |
+
"epoch": 0.8167290658387362,
|
3919 |
+
"grad_norm": 0.11186040937900543,
|
3920 |
+
"learning_rate": 4.28082191780822e-06,
|
3921 |
+
"loss": 1.2328,
|
3922 |
+
"step": 559
|
3923 |
+
},
|
3924 |
+
{
|
3925 |
+
"epoch": 0.8181901196237786,
|
3926 |
+
"grad_norm": 0.11647879332304001,
|
3927 |
+
"learning_rate": 4.246575342465754e-06,
|
3928 |
+
"loss": 1.1962,
|
3929 |
+
"step": 560
|
3930 |
+
},
|
3931 |
+
{
|
3932 |
+
"epoch": 0.8196511734088211,
|
3933 |
+
"grad_norm": 0.12523581087589264,
|
3934 |
+
"learning_rate": 4.212328767123288e-06,
|
3935 |
+
"loss": 1.2818,
|
3936 |
+
"step": 561
|
3937 |
+
},
|
3938 |
+
{
|
3939 |
+
"epoch": 0.8211122271938636,
|
3940 |
+
"grad_norm": 0.11687058955430984,
|
3941 |
+
"learning_rate": 4.178082191780822e-06,
|
3942 |
+
"loss": 1.173,
|
3943 |
+
"step": 562
|
3944 |
+
},
|
3945 |
+
{
|
3946 |
+
"epoch": 0.822573280978906,
|
3947 |
+
"grad_norm": 0.1250499188899994,
|
3948 |
+
"learning_rate": 4.143835616438356e-06,
|
3949 |
+
"loss": 1.1234,
|
3950 |
+
"step": 563
|
3951 |
+
},
|
3952 |
+
{
|
3953 |
+
"epoch": 0.8240343347639485,
|
3954 |
+
"grad_norm": 0.11505109816789627,
|
3955 |
+
"learning_rate": 4.109589041095891e-06,
|
3956 |
+
"loss": 1.2452,
|
3957 |
+
"step": 564
|
3958 |
+
},
|
3959 |
+
{
|
3960 |
+
"epoch": 0.825495388548991,
|
3961 |
+
"grad_norm": 0.1190369576215744,
|
3962 |
+
"learning_rate": 4.075342465753426e-06,
|
3963 |
+
"loss": 1.149,
|
3964 |
+
"step": 565
|
3965 |
+
},
|
3966 |
+
{
|
3967 |
+
"epoch": 0.8269564423340334,
|
3968 |
+
"grad_norm": 0.12453046441078186,
|
3969 |
+
"learning_rate": 4.0410958904109595e-06,
|
3970 |
+
"loss": 1.2214,
|
3971 |
+
"step": 566
|
3972 |
+
},
|
3973 |
+
{
|
3974 |
+
"epoch": 0.8284174961190759,
|
3975 |
+
"grad_norm": 0.10606851428747177,
|
3976 |
+
"learning_rate": 4.006849315068493e-06,
|
3977 |
+
"loss": 1.1749,
|
3978 |
+
"step": 567
|
3979 |
+
},
|
3980 |
+
{
|
3981 |
+
"epoch": 0.8298785499041184,
|
3982 |
+
"grad_norm": 0.11873757094144821,
|
3983 |
+
"learning_rate": 3.972602739726027e-06,
|
3984 |
+
"loss": 1.2741,
|
3985 |
+
"step": 568
|
3986 |
+
},
|
3987 |
+
{
|
3988 |
+
"epoch": 0.8313396036891608,
|
3989 |
+
"grad_norm": 0.12206880748271942,
|
3990 |
+
"learning_rate": 3.938356164383562e-06,
|
3991 |
+
"loss": 1.2122,
|
3992 |
+
"step": 569
|
3993 |
+
},
|
3994 |
+
{
|
3995 |
+
"epoch": 0.8328006574742033,
|
3996 |
+
"grad_norm": 0.10448160022497177,
|
3997 |
+
"learning_rate": 3.904109589041096e-06,
|
3998 |
+
"loss": 1.2251,
|
3999 |
+
"step": 570
|
4000 |
+
},
|
4001 |
+
{
|
4002 |
+
"epoch": 0.8342617112592458,
|
4003 |
+
"grad_norm": 0.10980773717164993,
|
4004 |
+
"learning_rate": 3.869863013698631e-06,
|
4005 |
+
"loss": 1.1791,
|
4006 |
+
"step": 571
|
4007 |
+
},
|
4008 |
+
{
|
4009 |
+
"epoch": 0.8357227650442882,
|
4010 |
+
"grad_norm": 0.11824549734592438,
|
4011 |
+
"learning_rate": 3.8356164383561645e-06,
|
4012 |
+
"loss": 1.2319,
|
4013 |
+
"step": 572
|
4014 |
+
},
|
4015 |
+
{
|
4016 |
+
"epoch": 0.8371838188293307,
|
4017 |
+
"grad_norm": 0.1143551915884018,
|
4018 |
+
"learning_rate": 3.8013698630136993e-06,
|
4019 |
+
"loss": 1.16,
|
4020 |
+
"step": 573
|
4021 |
+
},
|
4022 |
+
{
|
4023 |
+
"epoch": 0.8386448726143731,
|
4024 |
+
"grad_norm": 0.11645519733428955,
|
4025 |
+
"learning_rate": 3.767123287671233e-06,
|
4026 |
+
"loss": 1.1278,
|
4027 |
+
"step": 574
|
4028 |
+
},
|
4029 |
+
{
|
4030 |
+
"epoch": 0.8401059263994156,
|
4031 |
+
"grad_norm": 0.12313269078731537,
|
4032 |
+
"learning_rate": 3.7328767123287675e-06,
|
4033 |
+
"loss": 1.1707,
|
4034 |
+
"step": 575
|
4035 |
+
},
|
4036 |
+
{
|
4037 |
+
"epoch": 0.8415669801844581,
|
4038 |
+
"grad_norm": 0.1056065782904625,
|
4039 |
+
"learning_rate": 3.6986301369863014e-06,
|
4040 |
+
"loss": 1.0892,
|
4041 |
+
"step": 576
|
4042 |
+
},
|
4043 |
+
{
|
4044 |
+
"epoch": 0.8430280339695005,
|
4045 |
+
"grad_norm": 0.10892536491155624,
|
4046 |
+
"learning_rate": 3.664383561643836e-06,
|
4047 |
+
"loss": 1.2527,
|
4048 |
+
"step": 577
|
4049 |
+
},
|
4050 |
+
{
|
4051 |
+
"epoch": 0.844489087754543,
|
4052 |
+
"grad_norm": 0.10961074382066727,
|
4053 |
+
"learning_rate": 3.6301369863013704e-06,
|
4054 |
+
"loss": 1.2338,
|
4055 |
+
"step": 578
|
4056 |
+
},
|
4057 |
+
{
|
4058 |
+
"epoch": 0.8459501415395855,
|
4059 |
+
"grad_norm": 0.1143115982413292,
|
4060 |
+
"learning_rate": 3.5958904109589043e-06,
|
4061 |
+
"loss": 1.2256,
|
4062 |
+
"step": 579
|
4063 |
+
},
|
4064 |
+
{
|
4065 |
+
"epoch": 0.8474111953246279,
|
4066 |
+
"grad_norm": 0.1371782124042511,
|
4067 |
+
"learning_rate": 3.5616438356164386e-06,
|
4068 |
+
"loss": 1.1487,
|
4069 |
+
"step": 580
|
4070 |
}
|
4071 |
],
|
4072 |
"logging_steps": 1,
|
|
|
4086 |
"attributes": {}
|
4087 |
}
|
4088 |
},
|
4089 |
+
"total_flos": 6.557703270576169e+17,
|
4090 |
"train_batch_size": 4,
|
4091 |
"trial_name": null,
|
4092 |
"trial_params": null
|