Training in progress, step 1716, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +116 -4
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 159967880
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:12fb4cb11627ad284c1d37770a6aacfa220bc7356007fd91be56ae9b13cfa7e1
|
3 |
size 159967880
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 81735892
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d7084c316ee634e4748f581b2bd6c1841fa446bb7314925c6856b0c688d9d59
|
3 |
size 81735892
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bfdfe6864f5def6fb115e8ca14ab15e350070dafd726a524cb7ea2d2792031bc
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ddc0a7b6e0502913c27e2140efcb7b2ae87f20e67eabbf1a4c9eb14a1147911a
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3972fa5d27ce600e42d2f7fa59c3c380c3f284988e6b7b9d744cfc80bdadb18e
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:31938e2b87878021814bafc01027edb09402ab03f9b4018907cc3c40a6a45630
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b2aa591540982b0ed0561ae043be4daaa0300d3947c5a8b6265d20fe06871fc8
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -11907,6 +11907,118 @@
|
|
11907 |
"learning_rate": 9.324009324009324e-06,
|
11908 |
"loss": 0.003,
|
11909 |
"step": 1700
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11910 |
}
|
11911 |
],
|
11912 |
"logging_steps": 1,
|
@@ -11921,12 +12033,12 @@
|
|
11921 |
"should_evaluate": false,
|
11922 |
"should_log": false,
|
11923 |
"should_save": true,
|
11924 |
-
"should_training_stop":
|
11925 |
},
|
11926 |
"attributes": {}
|
11927 |
}
|
11928 |
},
|
11929 |
-
"total_flos": 7.
|
11930 |
"train_batch_size": 4,
|
11931 |
"trial_name": null,
|
11932 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 1716,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
11907 |
"learning_rate": 9.324009324009324e-06,
|
11908 |
"loss": 0.003,
|
11909 |
"step": 1700
|
11910 |
+
},
|
11911 |
+
{
|
11912 |
+
"epoch": 0.9912587412587412,
|
11913 |
+
"grad_norm": 0.0011362357763573527,
|
11914 |
+
"learning_rate": 8.741258741258741e-06,
|
11915 |
+
"loss": 0.0032,
|
11916 |
+
"step": 1701
|
11917 |
+
},
|
11918 |
+
{
|
11919 |
+
"epoch": 0.9918414918414918,
|
11920 |
+
"grad_norm": 0.0010037495521828532,
|
11921 |
+
"learning_rate": 8.158508158508159e-06,
|
11922 |
+
"loss": 0.0033,
|
11923 |
+
"step": 1702
|
11924 |
+
},
|
11925 |
+
{
|
11926 |
+
"epoch": 0.9924242424242424,
|
11927 |
+
"grad_norm": 0.0010420752223581076,
|
11928 |
+
"learning_rate": 7.5757575757575764e-06,
|
11929 |
+
"loss": 0.004,
|
11930 |
+
"step": 1703
|
11931 |
+
},
|
11932 |
+
{
|
11933 |
+
"epoch": 0.993006993006993,
|
11934 |
+
"grad_norm": 0.0010835586581379175,
|
11935 |
+
"learning_rate": 6.993006993006993e-06,
|
11936 |
+
"loss": 0.0033,
|
11937 |
+
"step": 1704
|
11938 |
+
},
|
11939 |
+
{
|
11940 |
+
"epoch": 0.9935897435897436,
|
11941 |
+
"grad_norm": 0.0009612834546715021,
|
11942 |
+
"learning_rate": 6.41025641025641e-06,
|
11943 |
+
"loss": 0.0046,
|
11944 |
+
"step": 1705
|
11945 |
+
},
|
11946 |
+
{
|
11947 |
+
"epoch": 0.9941724941724942,
|
11948 |
+
"grad_norm": 0.0007791322423145175,
|
11949 |
+
"learning_rate": 5.827505827505828e-06,
|
11950 |
+
"loss": 0.003,
|
11951 |
+
"step": 1706
|
11952 |
+
},
|
11953 |
+
{
|
11954 |
+
"epoch": 0.9947552447552448,
|
11955 |
+
"grad_norm": 0.0017918848898261786,
|
11956 |
+
"learning_rate": 5.244755244755245e-06,
|
11957 |
+
"loss": 0.0038,
|
11958 |
+
"step": 1707
|
11959 |
+
},
|
11960 |
+
{
|
11961 |
+
"epoch": 0.9953379953379954,
|
11962 |
+
"grad_norm": 0.0009001931175589561,
|
11963 |
+
"learning_rate": 4.662004662004662e-06,
|
11964 |
+
"loss": 0.0037,
|
11965 |
+
"step": 1708
|
11966 |
+
},
|
11967 |
+
{
|
11968 |
+
"epoch": 0.995920745920746,
|
11969 |
+
"grad_norm": 0.0008824478718452156,
|
11970 |
+
"learning_rate": 4.079254079254079e-06,
|
11971 |
+
"loss": 0.004,
|
11972 |
+
"step": 1709
|
11973 |
+
},
|
11974 |
+
{
|
11975 |
+
"epoch": 0.9965034965034965,
|
11976 |
+
"grad_norm": 0.0010619634995236993,
|
11977 |
+
"learning_rate": 3.4965034965034966e-06,
|
11978 |
+
"loss": 0.003,
|
11979 |
+
"step": 1710
|
11980 |
+
},
|
11981 |
+
{
|
11982 |
+
"epoch": 0.997086247086247,
|
11983 |
+
"grad_norm": 0.0007909830892458558,
|
11984 |
+
"learning_rate": 2.913752913752914e-06,
|
11985 |
+
"loss": 0.0023,
|
11986 |
+
"step": 1711
|
11987 |
+
},
|
11988 |
+
{
|
11989 |
+
"epoch": 0.9976689976689976,
|
11990 |
+
"grad_norm": 0.0019757202826440334,
|
11991 |
+
"learning_rate": 2.331002331002331e-06,
|
11992 |
+
"loss": 0.0065,
|
11993 |
+
"step": 1712
|
11994 |
+
},
|
11995 |
+
{
|
11996 |
+
"epoch": 0.9982517482517482,
|
11997 |
+
"grad_norm": 0.0008470152388326824,
|
11998 |
+
"learning_rate": 1.7482517482517483e-06,
|
11999 |
+
"loss": 0.0027,
|
12000 |
+
"step": 1713
|
12001 |
+
},
|
12002 |
+
{
|
12003 |
+
"epoch": 0.9988344988344988,
|
12004 |
+
"grad_norm": 0.0009145813528448343,
|
12005 |
+
"learning_rate": 1.1655011655011655e-06,
|
12006 |
+
"loss": 0.0035,
|
12007 |
+
"step": 1714
|
12008 |
+
},
|
12009 |
+
{
|
12010 |
+
"epoch": 0.9994172494172494,
|
12011 |
+
"grad_norm": 0.0007738119456917048,
|
12012 |
+
"learning_rate": 5.827505827505827e-07,
|
12013 |
+
"loss": 0.0038,
|
12014 |
+
"step": 1715
|
12015 |
+
},
|
12016 |
+
{
|
12017 |
+
"epoch": 1.0,
|
12018 |
+
"grad_norm": 0.0009543611668050289,
|
12019 |
+
"learning_rate": 0.0,
|
12020 |
+
"loss": 0.0042,
|
12021 |
+
"step": 1716
|
12022 |
}
|
12023 |
],
|
12024 |
"logging_steps": 1,
|
|
|
12033 |
"should_evaluate": false,
|
12034 |
"should_log": false,
|
12035 |
"should_save": true,
|
12036 |
+
"should_training_stop": true
|
12037 |
},
|
12038 |
"attributes": {}
|
12039 |
}
|
12040 |
},
|
12041 |
+
"total_flos": 7.176610110153897e+19,
|
12042 |
"train_batch_size": 4,
|
12043 |
"trial_name": null,
|
12044 |
"trial_params": null
|