Training in progress, step 4720, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 70430032
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3d1ab2ab8575c3321712c1a6bad2eba029eedcf542bf41c1ccaab7fa22efcbcb
|
| 3 |
size 70430032
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 141058579
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b970592946f6915d514b0c5fde882560cf4b0d7152bf01c4bbd220428b47cf61
|
| 3 |
size 141058579
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9767e25e15d5c9765677c20f262aa1568a5646ba47fed1966038f40ce9201802
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d7addd295ee72ea36828978c8aecf66577e94dd9559017f38aea68d35ed7e152
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -4728,6 +4728,16 @@
|
|
| 4728 |
"mean_token_accuracy": 0.7781821310520172,
|
| 4729 |
"num_tokens": 21879326.0,
|
| 4730 |
"step": 4710
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4731 |
}
|
| 4732 |
],
|
| 4733 |
"logging_steps": 10,
|
|
@@ -4747,7 +4757,7 @@
|
|
| 4747 |
"attributes": {}
|
| 4748 |
}
|
| 4749 |
},
|
| 4750 |
-
"total_flos": 1.
|
| 4751 |
"train_batch_size": 4,
|
| 4752 |
"trial_name": null,
|
| 4753 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.0068266666666668,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 4720,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 4728 |
"mean_token_accuracy": 0.7781821310520172,
|
| 4729 |
"num_tokens": 21879326.0,
|
| 4730 |
"step": 4710
|
| 4731 |
+
},
|
| 4732 |
+
{
|
| 4733 |
+
"entropy": 0.9335578382015228,
|
| 4734 |
+
"epoch": 1.0068266666666668,
|
| 4735 |
+
"grad_norm": 0.2416774481534958,
|
| 4736 |
+
"learning_rate": 5.2058967697330784e-05,
|
| 4737 |
+
"loss": 0.9976616859436035,
|
| 4738 |
+
"mean_token_accuracy": 0.7626704692840576,
|
| 4739 |
+
"num_tokens": 21933750.0,
|
| 4740 |
+
"step": 4720
|
| 4741 |
}
|
| 4742 |
],
|
| 4743 |
"logging_steps": 10,
|
|
|
|
| 4757 |
"attributes": {}
|
| 4758 |
}
|
| 4759 |
},
|
| 4760 |
+
"total_flos": 1.0386211988394086e+17,
|
| 4761 |
"train_batch_size": 4,
|
| 4762 |
"trial_name": null,
|
| 4763 |
"trial_params": null
|