Training in progress, step 450, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 97307544
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c360e553d157936718cf6be990e147d21346274ac808e87443ceef418127fde4
|
3 |
size 97307544
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 49846644
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:de4659248af871d8a5bc8618f52c5ea1baa9d060fad262290330351318b749ff
|
3 |
size 49846644
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:815ec9236e78d8ac2fa18b52a6a75f823894c470f485337750679c813cd4c0ba
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c794fb315dbb1d737ad184da5b30abdc3101c9a540dba62326f5d08191ea2bdb
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 386,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -3138,6 +3138,41 @@
|
|
3138 |
"learning_rate": 9.737129810366952e-05,
|
3139 |
"loss": 0.8703,
|
3140 |
"step": 445
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3141 |
}
|
3142 |
],
|
3143 |
"logging_steps": 1,
|
@@ -3157,7 +3192,7 @@
|
|
3157 |
"attributes": {}
|
3158 |
}
|
3159 |
},
|
3160 |
-
"total_flos":
|
3161 |
"train_batch_size": 4,
|
3162 |
"trial_name": null,
|
3163 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.5823358136525396,
|
5 |
"eval_steps": 386,
|
6 |
+
"global_step": 450,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
3138 |
"learning_rate": 9.737129810366952e-05,
|
3139 |
"loss": 0.8703,
|
3140 |
"step": 445
|
3141 |
+
},
|
3142 |
+
{
|
3143 |
+
"epoch": 0.5771594953089615,
|
3144 |
+
"grad_norm": 0.8351484537124634,
|
3145 |
+
"learning_rate": 9.735808998885915e-05,
|
3146 |
+
"loss": 0.9626,
|
3147 |
+
"step": 446
|
3148 |
+
},
|
3149 |
+
{
|
3150 |
+
"epoch": 0.578453574894856,
|
3151 |
+
"grad_norm": 0.860819399356842,
|
3152 |
+
"learning_rate": 9.734484967493282e-05,
|
3153 |
+
"loss": 0.9061,
|
3154 |
+
"step": 447
|
3155 |
+
},
|
3156 |
+
{
|
3157 |
+
"epoch": 0.5797476544807506,
|
3158 |
+
"grad_norm": 0.7928848266601562,
|
3159 |
+
"learning_rate": 9.733157717089277e-05,
|
3160 |
+
"loss": 0.9141,
|
3161 |
+
"step": 448
|
3162 |
+
},
|
3163 |
+
{
|
3164 |
+
"epoch": 0.581041734066645,
|
3165 |
+
"grad_norm": 0.9132540822029114,
|
3166 |
+
"learning_rate": 9.7318272485763e-05,
|
3167 |
+
"loss": 0.7613,
|
3168 |
+
"step": 449
|
3169 |
+
},
|
3170 |
+
{
|
3171 |
+
"epoch": 0.5823358136525396,
|
3172 |
+
"grad_norm": 0.7894258499145508,
|
3173 |
+
"learning_rate": 9.730493562858953e-05,
|
3174 |
+
"loss": 0.8234,
|
3175 |
+
"step": 450
|
3176 |
}
|
3177 |
],
|
3178 |
"logging_steps": 1,
|
|
|
3192 |
"attributes": {}
|
3193 |
}
|
3194 |
},
|
3195 |
+
"total_flos": 5.030717272621056e+17,
|
3196 |
"train_batch_size": 4,
|
3197 |
"trial_name": null,
|
3198 |
"trial_params": null
|