Training in progress, step 217, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 73911112
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a86b6df3e2bd04be35a5d8200190ba034db516cef958ead8265e932ca906205b
|
3 |
size 73911112
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 37430836
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e6803ce1d00c1e14e0ff8a69baa2dbfd9d9584e5f294187fee81afb8f3d39476
|
3 |
size 37430836
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1054bd185923ae37e2cfd534bc8fc8be8ffcde679164453af8b8b105ec92295a
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1512,6 +1512,20 @@
|
|
1512 |
"learning_rate": 0.0001128787878787879,
|
1513 |
"loss": 1.1579,
|
1514 |
"step": 215
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1515 |
}
|
1516 |
],
|
1517 |
"logging_steps": 1,
|
@@ -1531,7 +1545,7 @@
|
|
1531 |
"attributes": {}
|
1532 |
}
|
1533 |
},
|
1534 |
-
"total_flos": 2.
|
1535 |
"train_batch_size": 4,
|
1536 |
"trial_name": null,
|
1537 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.5953360768175583,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 217,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1512 |
"learning_rate": 0.0001128787878787879,
|
1513 |
"loss": 1.1579,
|
1514 |
"step": 215
|
1515 |
+
},
|
1516 |
+
{
|
1517 |
+
"epoch": 0.5925925925925926,
|
1518 |
+
"grad_norm": 0.17414681613445282,
|
1519 |
+
"learning_rate": 0.00011212121212121212,
|
1520 |
+
"loss": 1.185,
|
1521 |
+
"step": 216
|
1522 |
+
},
|
1523 |
+
{
|
1524 |
+
"epoch": 0.5953360768175583,
|
1525 |
+
"grad_norm": 0.25959229469299316,
|
1526 |
+
"learning_rate": 0.00011136363636363636,
|
1527 |
+
"loss": 1.2697,
|
1528 |
+
"step": 217
|
1529 |
}
|
1530 |
],
|
1531 |
"logging_steps": 1,
|
|
|
1545 |
"attributes": {}
|
1546 |
}
|
1547 |
},
|
1548 |
+
"total_flos": 2.687823948889252e+17,
|
1549 |
"train_batch_size": 4,
|
1550 |
"trial_name": null,
|
1551 |
"trial_params": null
|