Training in progress, step 5904, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 479769104
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8b79b765163726683a5b97fb0c1abea490722bbbf1fc391f9b7a985bcdd16c3
|
3 |
size 479769104
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 240728404
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c4be8aec95d333750d7914e18caa878b08e168adf924ebf31647ab744346164
|
3 |
size 240728404
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d08e0bef61039a4f769cc5e4c3c08f715d445eab2242d23ea2e8a9e30cd2439d
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -41223,6 +41223,118 @@
|
|
41223 |
"learning_rate": 6.921943831625517e-06,
|
41224 |
"loss": 1.087,
|
41225 |
"step": 5888
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41226 |
}
|
41227 |
],
|
41228 |
"logging_steps": 1,
|
@@ -41242,7 +41354,7 @@
|
|
41242 |
"attributes": {}
|
41243 |
}
|
41244 |
},
|
41245 |
-
"total_flos": 5.
|
41246 |
"train_batch_size": 4,
|
41247 |
"trial_name": null,
|
41248 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.38170048730816786,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 5904,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
41223 |
"learning_rate": 6.921943831625517e-06,
|
41224 |
"loss": 1.087,
|
41225 |
"step": 5888
|
41226 |
+
},
|
41227 |
+
{
|
41228 |
+
"epoch": 0.38073071980992557,
|
41229 |
+
"grad_norm": 2.504150390625,
|
41230 |
+
"learning_rate": 6.92099674695922e-06,
|
41231 |
+
"loss": 1.1731,
|
41232 |
+
"step": 5889
|
41233 |
+
},
|
41234 |
+
{
|
41235 |
+
"epoch": 0.38079537097647503,
|
41236 |
+
"grad_norm": 2.604978322982788,
|
41237 |
+
"learning_rate": 6.920049581427559e-06,
|
41238 |
+
"loss": 1.0603,
|
41239 |
+
"step": 5890
|
41240 |
+
},
|
41241 |
+
{
|
41242 |
+
"epoch": 0.38086002214302456,
|
41243 |
+
"grad_norm": 2.4367499351501465,
|
41244 |
+
"learning_rate": 6.91910233507041e-06,
|
41245 |
+
"loss": 1.1082,
|
41246 |
+
"step": 5891
|
41247 |
+
},
|
41248 |
+
{
|
41249 |
+
"epoch": 0.380924673309574,
|
41250 |
+
"grad_norm": 2.471052885055542,
|
41251 |
+
"learning_rate": 6.918155007927646e-06,
|
41252 |
+
"loss": 1.164,
|
41253 |
+
"step": 5892
|
41254 |
+
},
|
41255 |
+
{
|
41256 |
+
"epoch": 0.3809893244761235,
|
41257 |
+
"grad_norm": 2.616631507873535,
|
41258 |
+
"learning_rate": 6.917207600039144e-06,
|
41259 |
+
"loss": 1.2193,
|
41260 |
+
"step": 5893
|
41261 |
+
},
|
41262 |
+
{
|
41263 |
+
"epoch": 0.381053975642673,
|
41264 |
+
"grad_norm": 2.8797757625579834,
|
41265 |
+
"learning_rate": 6.9162601114447855e-06,
|
41266 |
+
"loss": 1.1593,
|
41267 |
+
"step": 5894
|
41268 |
+
},
|
41269 |
+
{
|
41270 |
+
"epoch": 0.3811186268092225,
|
41271 |
+
"grad_norm": 2.8945086002349854,
|
41272 |
+
"learning_rate": 6.915312542184458e-06,
|
41273 |
+
"loss": 1.0885,
|
41274 |
+
"step": 5895
|
41275 |
+
},
|
41276 |
+
{
|
41277 |
+
"epoch": 0.38118327797577195,
|
41278 |
+
"grad_norm": 3.071239709854126,
|
41279 |
+
"learning_rate": 6.91436489229805e-06,
|
41280 |
+
"loss": 1.1257,
|
41281 |
+
"step": 5896
|
41282 |
+
},
|
41283 |
+
{
|
41284 |
+
"epoch": 0.3812479291423215,
|
41285 |
+
"grad_norm": 2.603156805038452,
|
41286 |
+
"learning_rate": 6.913417161825449e-06,
|
41287 |
+
"loss": 1.1818,
|
41288 |
+
"step": 5897
|
41289 |
+
},
|
41290 |
+
{
|
41291 |
+
"epoch": 0.38131258030887094,
|
41292 |
+
"grad_norm": 2.5029022693634033,
|
41293 |
+
"learning_rate": 6.912469350806554e-06,
|
41294 |
+
"loss": 1.0913,
|
41295 |
+
"step": 5898
|
41296 |
+
},
|
41297 |
+
{
|
41298 |
+
"epoch": 0.3813772314754204,
|
41299 |
+
"grad_norm": 2.4416632652282715,
|
41300 |
+
"learning_rate": 6.911521459281265e-06,
|
41301 |
+
"loss": 1.2458,
|
41302 |
+
"step": 5899
|
41303 |
+
},
|
41304 |
+
{
|
41305 |
+
"epoch": 0.38144188264196993,
|
41306 |
+
"grad_norm": 2.3060193061828613,
|
41307 |
+
"learning_rate": 6.910573487289479e-06,
|
41308 |
+
"loss": 1.0826,
|
41309 |
+
"step": 5900
|
41310 |
+
},
|
41311 |
+
{
|
41312 |
+
"epoch": 0.3815065338085194,
|
41313 |
+
"grad_norm": 2.7792911529541016,
|
41314 |
+
"learning_rate": 6.909625434871104e-06,
|
41315 |
+
"loss": 1.2411,
|
41316 |
+
"step": 5901
|
41317 |
+
},
|
41318 |
+
{
|
41319 |
+
"epoch": 0.38157118497506887,
|
41320 |
+
"grad_norm": 2.7977821826934814,
|
41321 |
+
"learning_rate": 6.90867730206605e-06,
|
41322 |
+
"loss": 1.0796,
|
41323 |
+
"step": 5902
|
41324 |
+
},
|
41325 |
+
{
|
41326 |
+
"epoch": 0.3816358361416184,
|
41327 |
+
"grad_norm": 2.568824529647827,
|
41328 |
+
"learning_rate": 6.907729088914228e-06,
|
41329 |
+
"loss": 1.1366,
|
41330 |
+
"step": 5903
|
41331 |
+
},
|
41332 |
+
{
|
41333 |
+
"epoch": 0.38170048730816786,
|
41334 |
+
"grad_norm": 2.810534954071045,
|
41335 |
+
"learning_rate": 6.906780795455553e-06,
|
41336 |
+
"loss": 1.2322,
|
41337 |
+
"step": 5904
|
41338 |
}
|
41339 |
],
|
41340 |
"logging_steps": 1,
|
|
|
41354 |
"attributes": {}
|
41355 |
}
|
41356 |
},
|
41357 |
+
"total_flos": 5.346276383970312e+18,
|
41358 |
"train_batch_size": 4,
|
41359 |
"trial_name": null,
|
41360 |
"trial_params": null
|