Training in progress, step 2476, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 147770496
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4e296b9bb163171f7d3292e7f975cd8851abb7f06f131d59f3fc27ddc0971e89
|
3 |
size 147770496
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 75455810
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2409c85204b25b0c68db7e75cff61155bb985b9ed5e78015f6ae41839de13b08
|
3 |
size 75455810
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:477febd7eab5cb9a16378af748559d02a17dca77e420484aefc4f371d4457c5a
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c588c84c1abcb13cdaadd37cc81933f6bbcbd611f3b25ba28f6d4d519a813632
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -17311,6 +17311,34 @@
|
|
17311 |
"learning_rate": 4.4965673749054474e-08,
|
17312 |
"loss": 1.3548,
|
17313 |
"step": 2472
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17314 |
}
|
17315 |
],
|
17316 |
"logging_steps": 1.0,
|
@@ -17330,7 +17358,7 @@
|
|
17330 |
"attributes": {}
|
17331 |
}
|
17332 |
},
|
17333 |
-
"total_flos": 2.
|
17334 |
"train_batch_size": 1,
|
17335 |
"trial_name": null,
|
17336 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9593863375960551,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 2476,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
17311 |
"learning_rate": 4.4965673749054474e-08,
|
17312 |
"loss": 1.3548,
|
17313 |
"step": 2472
|
17314 |
+
},
|
17315 |
+
{
|
17316 |
+
"epoch": 0.9582239147314394,
|
17317 |
+
"grad_norm": 0.17760907113552094,
|
17318 |
+
"learning_rate": 4.4138052797422225e-08,
|
17319 |
+
"loss": 1.3602,
|
17320 |
+
"step": 2473
|
17321 |
+
},
|
17322 |
+
{
|
17323 |
+
"epoch": 0.9586113890196446,
|
17324 |
+
"grad_norm": 0.1899784654378891,
|
17325 |
+
"learning_rate": 4.331808542797855e-08,
|
17326 |
+
"loss": 1.3683,
|
17327 |
+
"step": 2474
|
17328 |
+
},
|
17329 |
+
{
|
17330 |
+
"epoch": 0.9589988633078498,
|
17331 |
+
"grad_norm": 0.18728189170360565,
|
17332 |
+
"learning_rate": 4.2505772907038836e-08,
|
17333 |
+
"loss": 1.3418,
|
17334 |
+
"step": 2475
|
17335 |
+
},
|
17336 |
+
{
|
17337 |
+
"epoch": 0.9593863375960551,
|
17338 |
+
"grad_norm": 0.18516357243061066,
|
17339 |
+
"learning_rate": 4.170111648909736e-08,
|
17340 |
+
"loss": 1.3903,
|
17341 |
+
"step": 2476
|
17342 |
}
|
17343 |
],
|
17344 |
"logging_steps": 1.0,
|
|
|
17358 |
"attributes": {}
|
17359 |
}
|
17360 |
},
|
17361 |
+
"total_flos": 2.448841409909757e+18,
|
17362 |
"train_batch_size": 1,
|
17363 |
"trial_name": null,
|
17364 |
"trial_params": null
|