Training in progress, step 5550, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 527048968
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d491ec5c79e068346c07ac72051b147d92ec50b0b38fc5b4f05250ed8013d65
|
| 3 |
size 527048968
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1054135994
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a4499a0d589d85027a908e783ca232b578c49e99556d4d108f652eaa7d4cd5da
|
| 3 |
size 1054135994
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1be3d3b48fb8768a9f6d52575aecdf595860f5cf577f03b3acc8148f472cbae2
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a533c2b46d6abcf961f28fb57a403e6e075b91a6ddf8fd09d7df3b1d5f213cea
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.7166205048561096,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-450",
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 150,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -5227,6 +5227,151 @@
|
|
| 5227 |
"EMA_steps_per_second": 25.563,
|
| 5228 |
"epoch": 234.7826086956522,
|
| 5229 |
"step": 5400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5230 |
}
|
| 5231 |
],
|
| 5232 |
"logging_steps": 10,
|
|
@@ -5246,7 +5391,7 @@
|
|
| 5246 |
"attributes": {}
|
| 5247 |
}
|
| 5248 |
},
|
| 5249 |
-
"total_flos": 1.
|
| 5250 |
"train_batch_size": 4,
|
| 5251 |
"trial_name": null,
|
| 5252 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.7166205048561096,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-450",
|
| 4 |
+
"epoch": 241.30434782608697,
|
| 5 |
"eval_steps": 150,
|
| 6 |
+
"global_step": 5550,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 5227 |
"EMA_steps_per_second": 25.563,
|
| 5228 |
"epoch": 234.7826086956522,
|
| 5229 |
"step": 5400
|
| 5230 |
+
},
|
| 5231 |
+
{
|
| 5232 |
+
"epoch": 235.2173913043478,
|
| 5233 |
+
"grad_norm": 2.2689285278320312,
|
| 5234 |
+
"learning_rate": 1.0855486739416115e-06,
|
| 5235 |
+
"loss": 0.2335,
|
| 5236 |
+
"step": 5410
|
| 5237 |
+
},
|
| 5238 |
+
{
|
| 5239 |
+
"epoch": 235.65217391304347,
|
| 5240 |
+
"grad_norm": 2.205592393875122,
|
| 5241 |
+
"learning_rate": 1.085547275540868e-06,
|
| 5242 |
+
"loss": 0.2066,
|
| 5243 |
+
"step": 5420
|
| 5244 |
+
},
|
| 5245 |
+
{
|
| 5246 |
+
"epoch": 236.08695652173913,
|
| 5247 |
+
"grad_norm": 2.256180763244629,
|
| 5248 |
+
"learning_rate": 1.0855456620030405e-06,
|
| 5249 |
+
"loss": 0.2442,
|
| 5250 |
+
"step": 5430
|
| 5251 |
+
},
|
| 5252 |
+
{
|
| 5253 |
+
"epoch": 236.52173913043478,
|
| 5254 |
+
"grad_norm": 2.169275999069214,
|
| 5255 |
+
"learning_rate": 1.0855438333287692e-06,
|
| 5256 |
+
"loss": 0.1983,
|
| 5257 |
+
"step": 5440
|
| 5258 |
+
},
|
| 5259 |
+
{
|
| 5260 |
+
"epoch": 236.95652173913044,
|
| 5261 |
+
"grad_norm": 2.1479029655456543,
|
| 5262 |
+
"learning_rate": 1.0855417895187786e-06,
|
| 5263 |
+
"loss": 0.2359,
|
| 5264 |
+
"step": 5450
|
| 5265 |
+
},
|
| 5266 |
+
{
|
| 5267 |
+
"epoch": 237.3913043478261,
|
| 5268 |
+
"grad_norm": 1.7530748844146729,
|
| 5269 |
+
"learning_rate": 1.0855395305738789e-06,
|
| 5270 |
+
"loss": 0.2375,
|
| 5271 |
+
"step": 5460
|
| 5272 |
+
},
|
| 5273 |
+
{
|
| 5274 |
+
"epoch": 237.82608695652175,
|
| 5275 |
+
"grad_norm": 1.93467116355896,
|
| 5276 |
+
"learning_rate": 1.0855370564949654e-06,
|
| 5277 |
+
"loss": 0.2229,
|
| 5278 |
+
"step": 5470
|
| 5279 |
+
},
|
| 5280 |
+
{
|
| 5281 |
+
"epoch": 238.2608695652174,
|
| 5282 |
+
"grad_norm": 3.3168399333953857,
|
| 5283 |
+
"learning_rate": 1.0855343672830188e-06,
|
| 5284 |
+
"loss": 0.2231,
|
| 5285 |
+
"step": 5480
|
| 5286 |
+
},
|
| 5287 |
+
{
|
| 5288 |
+
"epoch": 238.69565217391303,
|
| 5289 |
+
"grad_norm": 2.073918342590332,
|
| 5290 |
+
"learning_rate": 1.085531462939105e-06,
|
| 5291 |
+
"loss": 0.223,
|
| 5292 |
+
"step": 5490
|
| 5293 |
+
},
|
| 5294 |
+
{
|
| 5295 |
+
"epoch": 239.1304347826087,
|
| 5296 |
+
"grad_norm": 2.3649418354034424,
|
| 5297 |
+
"learning_rate": 1.085528343464375e-06,
|
| 5298 |
+
"loss": 0.2133,
|
| 5299 |
+
"step": 5500
|
| 5300 |
+
},
|
| 5301 |
+
{
|
| 5302 |
+
"epoch": 239.56521739130434,
|
| 5303 |
+
"grad_norm": 2.719287395477295,
|
| 5304 |
+
"learning_rate": 1.0855250088600655e-06,
|
| 5305 |
+
"loss": 0.2752,
|
| 5306 |
+
"step": 5510
|
| 5307 |
+
},
|
| 5308 |
+
{
|
| 5309 |
+
"epoch": 240.0,
|
| 5310 |
+
"grad_norm": 5.105301380157471,
|
| 5311 |
+
"learning_rate": 1.0855214591274984e-06,
|
| 5312 |
+
"loss": 0.1964,
|
| 5313 |
+
"step": 5520
|
| 5314 |
+
},
|
| 5315 |
+
{
|
| 5316 |
+
"epoch": 240.43478260869566,
|
| 5317 |
+
"grad_norm": 1.5361961126327515,
|
| 5318 |
+
"learning_rate": 1.0855176942680803e-06,
|
| 5319 |
+
"loss": 0.2309,
|
| 5320 |
+
"step": 5530
|
| 5321 |
+
},
|
| 5322 |
+
{
|
| 5323 |
+
"epoch": 240.8695652173913,
|
| 5324 |
+
"grad_norm": 2.835388660430908,
|
| 5325 |
+
"learning_rate": 1.0855137142833035e-06,
|
| 5326 |
+
"loss": 0.2229,
|
| 5327 |
+
"step": 5540
|
| 5328 |
+
},
|
| 5329 |
+
{
|
| 5330 |
+
"epoch": 241.30434782608697,
|
| 5331 |
+
"grad_norm": 2.0795018672943115,
|
| 5332 |
+
"learning_rate": 1.0855095191747456e-06,
|
| 5333 |
+
"loss": 0.2335,
|
| 5334 |
+
"step": 5550
|
| 5335 |
+
},
|
| 5336 |
+
{
|
| 5337 |
+
"epoch": 241.30434782608697,
|
| 5338 |
+
"eval_loss": 0.9892138242721558,
|
| 5339 |
+
"eval_runtime": 0.5423,
|
| 5340 |
+
"eval_samples_per_second": 18.441,
|
| 5341 |
+
"eval_steps_per_second": 18.441,
|
| 5342 |
+
"step": 5550
|
| 5343 |
+
},
|
| 5344 |
+
{
|
| 5345 |
+
"Start_State_loss": 0.8609819412231445,
|
| 5346 |
+
"Start_State_runtime": 0.3972,
|
| 5347 |
+
"Start_State_samples_per_second": 25.175,
|
| 5348 |
+
"Start_State_steps_per_second": 25.175,
|
| 5349 |
+
"epoch": 241.30434782608697,
|
| 5350 |
+
"step": 5550
|
| 5351 |
+
},
|
| 5352 |
+
{
|
| 5353 |
+
"Raw_Model_loss": 0.9892138242721558,
|
| 5354 |
+
"Raw_Model_runtime": 0.4138,
|
| 5355 |
+
"Raw_Model_samples_per_second": 24.169,
|
| 5356 |
+
"Raw_Model_steps_per_second": 24.169,
|
| 5357 |
+
"epoch": 241.30434782608697,
|
| 5358 |
+
"step": 5550
|
| 5359 |
+
},
|
| 5360 |
+
{
|
| 5361 |
+
"SWA_loss": 0.831312358379364,
|
| 5362 |
+
"SWA_runtime": 0.4189,
|
| 5363 |
+
"SWA_samples_per_second": 23.872,
|
| 5364 |
+
"SWA_steps_per_second": 23.872,
|
| 5365 |
+
"epoch": 241.30434782608697,
|
| 5366 |
+
"step": 5550
|
| 5367 |
+
},
|
| 5368 |
+
{
|
| 5369 |
+
"EMA_loss": 0.8599440455436707,
|
| 5370 |
+
"EMA_runtime": 0.4024,
|
| 5371 |
+
"EMA_samples_per_second": 24.854,
|
| 5372 |
+
"EMA_steps_per_second": 24.854,
|
| 5373 |
+
"epoch": 241.30434782608697,
|
| 5374 |
+
"step": 5550
|
| 5375 |
}
|
| 5376 |
],
|
| 5377 |
"logging_steps": 10,
|
|
|
|
| 5391 |
"attributes": {}
|
| 5392 |
}
|
| 5393 |
},
|
| 5394 |
+
"total_flos": 1.4269374356277658e+17,
|
| 5395 |
"train_batch_size": 4,
|
| 5396 |
"trial_name": null,
|
| 5397 |
"trial_params": null
|