Training in progress, step 150, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 73911112
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fff58c85152d6f151d159e47e6812725e1c0f6bc6d2070b8b1f24148ae65360e
|
3 |
size 73911112
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 37430836
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1921b53d958e83a91af2dba3747330b92c7f1a48510afb09346afb1e946d7685
|
3 |
size 37430836
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:449effa232fec37a400a7c449f0dc26f44d1af3ca0cc70a05e6be40519d8df93
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1043,6 +1043,20 @@
|
|
1043 |
"learning_rate": 0.00016363636363636366,
|
1044 |
"loss": 1.2176,
|
1045 |
"step": 148
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1046 |
}
|
1047 |
],
|
1048 |
"logging_steps": 1,
|
@@ -1062,7 +1076,7 @@
|
|
1062 |
"attributes": {}
|
1063 |
}
|
1064 |
},
|
1065 |
-
"total_flos": 1.
|
1066 |
"train_batch_size": 4,
|
1067 |
"trial_name": null,
|
1068 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.411522633744856,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 150,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1043 |
"learning_rate": 0.00016363636363636366,
|
1044 |
"loss": 1.2176,
|
1045 |
"step": 148
|
1046 |
+
},
|
1047 |
+
{
|
1048 |
+
"epoch": 0.40877914951989025,
|
1049 |
+
"grad_norm": 0.14986877143383026,
|
1050 |
+
"learning_rate": 0.0001628787878787879,
|
1051 |
+
"loss": 1.2373,
|
1052 |
+
"step": 149
|
1053 |
+
},
|
1054 |
+
{
|
1055 |
+
"epoch": 0.411522633744856,
|
1056 |
+
"grad_norm": 0.14896810054779053,
|
1057 |
+
"learning_rate": 0.00016212121212121213,
|
1058 |
+
"loss": 1.2124,
|
1059 |
+
"step": 150
|
1060 |
}
|
1061 |
],
|
1062 |
"logging_steps": 1,
|
|
|
1076 |
"attributes": {}
|
1077 |
}
|
1078 |
},
|
1079 |
+
"total_flos": 1.859117070211154e+17,
|
1080 |
"train_batch_size": 4,
|
1081 |
"trial_name": null,
|
1082 |
"trial_params": null
|