Training in progress, step 45, checkpoint
Browse files
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4628218
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ba4e6f31201d085ea85e641dee81f15497cbec2c4bf4ac956f4b70c6557a22c
|
3 |
size 4628218
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3cf420d826a76ebf3d18a2641574c572d67f70da8adcccd5dcab567f4430e22d
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b665a2618254d0693455029a117143eb612684fd8f287fa7207d501afc785521
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 9,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -96,6 +96,28 @@
|
|
96 |
"eval_samples_per_second": 285.366,
|
97 |
"eval_steps_per_second": 35.78,
|
98 |
"step": 36
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
}
|
100 |
],
|
101 |
"logging_steps": 5,
|
@@ -115,7 +137,7 @@
|
|
115 |
"attributes": {}
|
116 |
}
|
117 |
},
|
118 |
-
"total_flos":
|
119 |
"train_batch_size": 8,
|
120 |
"trial_name": null,
|
121 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.08155867693701857,
|
5 |
"eval_steps": 9,
|
6 |
+
"global_step": 45,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
96 |
"eval_samples_per_second": 285.366,
|
97 |
"eval_steps_per_second": 35.78,
|
98 |
"step": 36
|
99 |
+
},
|
100 |
+
{
|
101 |
+
"epoch": 0.07249660172179428,
|
102 |
+
"grad_norm": NaN,
|
103 |
+
"learning_rate": 0.00012803300858899104,
|
104 |
+
"loss": 0.0,
|
105 |
+
"step": 40
|
106 |
+
},
|
107 |
+
{
|
108 |
+
"epoch": 0.08155867693701857,
|
109 |
+
"grad_norm": NaN,
|
110 |
+
"learning_rate": 0.00011666776747647015,
|
111 |
+
"loss": 0.0,
|
112 |
+
"step": 45
|
113 |
+
},
|
114 |
+
{
|
115 |
+
"epoch": 0.08155867693701857,
|
116 |
+
"eval_loss": NaN,
|
117 |
+
"eval_runtime": 6.8846,
|
118 |
+
"eval_samples_per_second": 284.982,
|
119 |
+
"eval_steps_per_second": 35.732,
|
120 |
+
"step": 45
|
121 |
}
|
122 |
],
|
123 |
"logging_steps": 5,
|
|
|
137 |
"attributes": {}
|
138 |
}
|
139 |
},
|
140 |
+
"total_flos": 808917663744000.0,
|
141 |
"train_batch_size": 8,
|
142 |
"trial_name": null,
|
143 |
"trial_params": null
|