Training in progress, step 1800000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +383 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 893439185
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84eeca699785d889add4fce9e83fcf219cc03b8c3e8612092092ba4f022e339b
|
3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a47c42cd40edaf177247b0f81cc113941e45da543bcd8075122f86f8a439a53
|
3 |
size 449471589
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 21643
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c22e615daa20a7523bf096df9dcc68366ed60a8151bafc863df6c6b53275a84a
|
3 |
size 21643
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c4c724e259a52a66e7ae3019ca30f1baaafdcfcaf6dbe949cbda0206af52d55
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a24dd415d95b2d83e758fabab0d2c6d80262a248eda13bb423bd8c9ef9f0d1d
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -13306,11 +13306,391 @@
|
|
13306 |
"eval_samples_per_second": 80.645,
|
13307 |
"eval_steps_per_second": 0.63,
|
13308 |
"step": 1750000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13309 |
}
|
13310 |
],
|
13311 |
"max_steps": 2000000,
|
13312 |
"num_train_epochs": 9223372036854775807,
|
13313 |
-
"total_flos": 1.
|
13314 |
"trial_name": null,
|
13315 |
"trial_params": null
|
13316 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.15,
|
5 |
+
"global_step": 1800000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
13306 |
"eval_samples_per_second": 80.645,
|
13307 |
"eval_steps_per_second": 0.63,
|
13308 |
"step": 1750000
|
13309 |
+
},
|
13310 |
+
{
|
13311 |
+
"epoch": 0.13,
|
13312 |
+
"learning_rate": 1.6540859441048118e-05,
|
13313 |
+
"loss": 0.4394,
|
13314 |
+
"step": 1751000
|
13315 |
+
},
|
13316 |
+
{
|
13317 |
+
"epoch": 0.13,
|
13318 |
+
"learning_rate": 1.6489389468730806e-05,
|
13319 |
+
"loss": 0.439,
|
13320 |
+
"step": 1752000
|
13321 |
+
},
|
13322 |
+
{
|
13323 |
+
"epoch": 0.13,
|
13324 |
+
"learning_rate": 1.6438215320582125e-05,
|
13325 |
+
"loss": 0.4382,
|
13326 |
+
"step": 1753000
|
13327 |
+
},
|
13328 |
+
{
|
13329 |
+
"epoch": 0.13,
|
13330 |
+
"learning_rate": 1.638713204573334e-05,
|
13331 |
+
"loss": 0.4381,
|
13332 |
+
"step": 1754000
|
13333 |
+
},
|
13334 |
+
{
|
13335 |
+
"epoch": 0.13,
|
13336 |
+
"learning_rate": 1.6336293339368757e-05,
|
13337 |
+
"loss": 0.4392,
|
13338 |
+
"step": 1755000
|
13339 |
+
},
|
13340 |
+
{
|
13341 |
+
"epoch": 0.13,
|
13342 |
+
"eval_loss": 0.420003205537796,
|
13343 |
+
"eval_runtime": 78.6855,
|
13344 |
+
"eval_samples_per_second": 81.336,
|
13345 |
+
"eval_steps_per_second": 0.635,
|
13346 |
+
"step": 1755000
|
13347 |
+
},
|
13348 |
+
{
|
13349 |
+
"epoch": 0.13,
|
13350 |
+
"learning_rate": 1.628559757718579e-05,
|
13351 |
+
"loss": 0.439,
|
13352 |
+
"step": 1756000
|
13353 |
+
},
|
13354 |
+
{
|
13355 |
+
"epoch": 0.13,
|
13356 |
+
"learning_rate": 1.6235095900122255e-05,
|
13357 |
+
"loss": 0.4383,
|
13358 |
+
"step": 1757000
|
13359 |
+
},
|
13360 |
+
{
|
13361 |
+
"epoch": 0.13,
|
13362 |
+
"learning_rate": 1.61848386723797e-05,
|
13363 |
+
"loss": 0.4386,
|
13364 |
+
"step": 1758000
|
13365 |
+
},
|
13366 |
+
{
|
13367 |
+
"epoch": 0.13,
|
13368 |
+
"learning_rate": 1.6134725432005385e-05,
|
13369 |
+
"loss": 0.4395,
|
13370 |
+
"step": 1759000
|
13371 |
+
},
|
13372 |
+
{
|
13373 |
+
"epoch": 0.13,
|
13374 |
+
"learning_rate": 1.6084856557803128e-05,
|
13375 |
+
"loss": 0.438,
|
13376 |
+
"step": 1760000
|
13377 |
+
},
|
13378 |
+
{
|
13379 |
+
"epoch": 0.13,
|
13380 |
+
"eval_loss": 0.4174318015575409,
|
13381 |
+
"eval_runtime": 76.8661,
|
13382 |
+
"eval_samples_per_second": 83.262,
|
13383 |
+
"eval_steps_per_second": 0.65,
|
13384 |
+
"step": 1760000
|
13385 |
+
},
|
13386 |
+
{
|
13387 |
+
"epoch": 0.13,
|
13388 |
+
"learning_rate": 1.6035132364121584e-05,
|
13389 |
+
"loss": 0.4388,
|
13390 |
+
"step": 1761000
|
13391 |
+
},
|
13392 |
+
{
|
13393 |
+
"epoch": 0.13,
|
13394 |
+
"learning_rate": 1.5985603018519935e-05,
|
13395 |
+
"loss": 0.4378,
|
13396 |
+
"step": 1762000
|
13397 |
+
},
|
13398 |
+
{
|
13399 |
+
"epoch": 0.13,
|
13400 |
+
"learning_rate": 1.5936317908767756e-05,
|
13401 |
+
"loss": 0.4368,
|
13402 |
+
"step": 1763000
|
13403 |
+
},
|
13404 |
+
{
|
13405 |
+
"epoch": 0.13,
|
13406 |
+
"learning_rate": 1.5887178516132736e-05,
|
13407 |
+
"loss": 0.4412,
|
13408 |
+
"step": 1764000
|
13409 |
+
},
|
13410 |
+
{
|
13411 |
+
"epoch": 0.13,
|
13412 |
+
"learning_rate": 1.5838283268763148e-05,
|
13413 |
+
"loss": 0.4383,
|
13414 |
+
"step": 1765000
|
13415 |
+
},
|
13416 |
+
{
|
13417 |
+
"epoch": 0.13,
|
13418 |
+
"eval_loss": 0.41864004731178284,
|
13419 |
+
"eval_runtime": 86.8232,
|
13420 |
+
"eval_samples_per_second": 73.713,
|
13421 |
+
"eval_steps_per_second": 0.576,
|
13422 |
+
"step": 1765000
|
13423 |
+
},
|
13424 |
+
{
|
13425 |
+
"epoch": 0.13,
|
13426 |
+
"learning_rate": 1.5789583078410045e-05,
|
13427 |
+
"loss": 0.4389,
|
13428 |
+
"step": 1766000
|
13429 |
+
},
|
13430 |
+
{
|
13431 |
+
"epoch": 0.13,
|
13432 |
+
"learning_rate": 1.574102963743466e-05,
|
13433 |
+
"loss": 0.439,
|
13434 |
+
"step": 1767000
|
13435 |
+
},
|
13436 |
+
{
|
13437 |
+
"epoch": 0.13,
|
13438 |
+
"learning_rate": 1.5692671940427092e-05,
|
13439 |
+
"loss": 0.4385,
|
13440 |
+
"step": 1768000
|
13441 |
+
},
|
13442 |
+
{
|
13443 |
+
"epoch": 0.13,
|
13444 |
+
"learning_rate": 1.5644510134693248e-05,
|
13445 |
+
"loss": 0.4384,
|
13446 |
+
"step": 1769000
|
13447 |
+
},
|
13448 |
+
{
|
13449 |
+
"epoch": 0.14,
|
13450 |
+
"learning_rate": 1.559654436694238e-05,
|
13451 |
+
"loss": 0.4392,
|
13452 |
+
"step": 1770000
|
13453 |
+
},
|
13454 |
+
{
|
13455 |
+
"epoch": 0.14,
|
13456 |
+
"eval_loss": 0.4172964096069336,
|
13457 |
+
"eval_runtime": 79.9939,
|
13458 |
+
"eval_samples_per_second": 80.006,
|
13459 |
+
"eval_steps_per_second": 0.625,
|
13460 |
+
"step": 1770000
|
13461 |
+
},
|
13462 |
+
{
|
13463 |
+
"epoch": 0.14,
|
13464 |
+
"learning_rate": 1.5548822454827717e-05,
|
13465 |
+
"loss": 0.4393,
|
13466 |
+
"step": 1771000
|
13467 |
+
},
|
13468 |
+
{
|
13469 |
+
"epoch": 0.14,
|
13470 |
+
"learning_rate": 1.5501249004379188e-05,
|
13471 |
+
"loss": 0.4392,
|
13472 |
+
"step": 1772000
|
13473 |
+
},
|
13474 |
+
{
|
13475 |
+
"epoch": 0.14,
|
13476 |
+
"learning_rate": 1.54539193071009e-05,
|
13477 |
+
"loss": 0.4386,
|
13478 |
+
"step": 1773000
|
13479 |
+
},
|
13480 |
+
{
|
13481 |
+
"epoch": 0.14,
|
13482 |
+
"learning_rate": 1.5406738753042658e-05,
|
13483 |
+
"loss": 0.4393,
|
13484 |
+
"step": 1774000
|
13485 |
+
},
|
13486 |
+
{
|
13487 |
+
"epoch": 0.14,
|
13488 |
+
"learning_rate": 1.5359754961260252e-05,
|
13489 |
+
"loss": 0.4387,
|
13490 |
+
"step": 1775000
|
13491 |
+
},
|
13492 |
+
{
|
13493 |
+
"epoch": 0.14,
|
13494 |
+
"eval_loss": 0.4169865548610687,
|
13495 |
+
"eval_runtime": 77.7982,
|
13496 |
+
"eval_samples_per_second": 82.264,
|
13497 |
+
"eval_steps_per_second": 0.643,
|
13498 |
+
"step": 1775000
|
13499 |
+
},
|
13500 |
+
{
|
13501 |
+
"epoch": 0.14,
|
13502 |
+
"learning_rate": 1.5312968074874446e-05,
|
13503 |
+
"loss": 0.4381,
|
13504 |
+
"step": 1776000
|
13505 |
+
},
|
13506 |
+
{
|
13507 |
+
"epoch": 0.14,
|
13508 |
+
"learning_rate": 1.5266424727771944e-05,
|
13509 |
+
"loss": 0.4396,
|
13510 |
+
"step": 1777000
|
13511 |
+
},
|
13512 |
+
{
|
13513 |
+
"epoch": 0.14,
|
13514 |
+
"learning_rate": 1.522003188188146e-05,
|
13515 |
+
"loss": 0.4395,
|
13516 |
+
"step": 1778000
|
13517 |
+
},
|
13518 |
+
{
|
13519 |
+
"epoch": 0.14,
|
13520 |
+
"learning_rate": 1.517383636700831e-05,
|
13521 |
+
"loss": 0.4382,
|
13522 |
+
"step": 1779000
|
13523 |
+
},
|
13524 |
+
{
|
13525 |
+
"epoch": 0.14,
|
13526 |
+
"learning_rate": 1.5127838323872036e-05,
|
13527 |
+
"loss": 0.4364,
|
13528 |
+
"step": 1780000
|
13529 |
+
},
|
13530 |
+
{
|
13531 |
+
"epoch": 0.14,
|
13532 |
+
"eval_loss": 0.41849958896636963,
|
13533 |
+
"eval_runtime": 76.5418,
|
13534 |
+
"eval_samples_per_second": 83.614,
|
13535 |
+
"eval_steps_per_second": 0.653,
|
13536 |
+
"step": 1780000
|
13537 |
+
},
|
13538 |
+
{
|
13539 |
+
"epoch": 0.14,
|
13540 |
+
"learning_rate": 1.5082037892590664e-05,
|
13541 |
+
"loss": 0.439,
|
13542 |
+
"step": 1781000
|
13543 |
+
},
|
13544 |
+
{
|
13545 |
+
"epoch": 0.14,
|
13546 |
+
"learning_rate": 1.5036480716537045e-05,
|
13547 |
+
"loss": 0.4393,
|
13548 |
+
"step": 1782000
|
13549 |
+
},
|
13550 |
+
{
|
13551 |
+
"epoch": 0.14,
|
13552 |
+
"learning_rate": 1.4991121035047137e-05,
|
13553 |
+
"loss": 0.4383,
|
13554 |
+
"step": 1783000
|
13555 |
+
},
|
13556 |
+
{
|
13557 |
+
"epoch": 0.14,
|
13558 |
+
"learning_rate": 1.4945913877821996e-05,
|
13559 |
+
"loss": 0.4383,
|
13560 |
+
"step": 1784000
|
13561 |
+
},
|
13562 |
+
{
|
13563 |
+
"epoch": 0.14,
|
13564 |
+
"learning_rate": 1.4900904886625165e-05,
|
13565 |
+
"loss": 0.4377,
|
13566 |
+
"step": 1785000
|
13567 |
+
},
|
13568 |
+
{
|
13569 |
+
"epoch": 0.14,
|
13570 |
+
"eval_loss": 0.4204372465610504,
|
13571 |
+
"eval_runtime": 76.9476,
|
13572 |
+
"eval_samples_per_second": 83.173,
|
13573 |
+
"eval_steps_per_second": 0.65,
|
13574 |
+
"step": 1785000
|
13575 |
+
},
|
13576 |
+
{
|
13577 |
+
"epoch": 0.14,
|
13578 |
+
"learning_rate": 1.4856138910151988e-05,
|
13579 |
+
"loss": 0.4388,
|
13580 |
+
"step": 1786000
|
13581 |
+
},
|
13582 |
+
{
|
13583 |
+
"epoch": 0.14,
|
13584 |
+
"learning_rate": 1.4811526463215664e-05,
|
13585 |
+
"loss": 0.4371,
|
13586 |
+
"step": 1787000
|
13587 |
+
},
|
13588 |
+
{
|
13589 |
+
"epoch": 0.14,
|
13590 |
+
"learning_rate": 1.476715690631307e-05,
|
13591 |
+
"loss": 0.438,
|
13592 |
+
"step": 1788000
|
13593 |
+
},
|
13594 |
+
{
|
13595 |
+
"epoch": 0.14,
|
13596 |
+
"learning_rate": 1.4722941546682392e-05,
|
13597 |
+
"loss": 0.4381,
|
13598 |
+
"step": 1789000
|
13599 |
+
},
|
13600 |
+
{
|
13601 |
+
"epoch": 0.14,
|
13602 |
+
"learning_rate": 1.4678968949438921e-05,
|
13603 |
+
"loss": 0.4363,
|
13604 |
+
"step": 1790000
|
13605 |
+
},
|
13606 |
+
{
|
13607 |
+
"epoch": 0.14,
|
13608 |
+
"eval_loss": 0.4183988869190216,
|
13609 |
+
"eval_runtime": 76.9826,
|
13610 |
+
"eval_samples_per_second": 83.136,
|
13611 |
+
"eval_steps_per_second": 0.649,
|
13612 |
+
"step": 1790000
|
13613 |
+
},
|
13614 |
+
{
|
13615 |
+
"epoch": 0.15,
|
13616 |
+
"learning_rate": 1.4635151215325466e-05,
|
13617 |
+
"loss": 0.4366,
|
13618 |
+
"step": 1791000
|
13619 |
+
},
|
13620 |
+
{
|
13621 |
+
"epoch": 0.15,
|
13622 |
+
"learning_rate": 1.4591576112997706e-05,
|
13623 |
+
"loss": 0.4391,
|
13624 |
+
"step": 1792000
|
13625 |
+
},
|
13626 |
+
{
|
13627 |
+
"epoch": 0.15,
|
13628 |
+
"learning_rate": 1.4548156537772989e-05,
|
13629 |
+
"loss": 0.4391,
|
13630 |
+
"step": 1793000
|
13631 |
+
},
|
13632 |
+
{
|
13633 |
+
"epoch": 0.15,
|
13634 |
+
"learning_rate": 1.4504936340214418e-05,
|
13635 |
+
"loss": 0.4385,
|
13636 |
+
"step": 1794000
|
13637 |
+
},
|
13638 |
+
{
|
13639 |
+
"epoch": 0.15,
|
13640 |
+
"learning_rate": 1.4461958572967858e-05,
|
13641 |
+
"loss": 0.4378,
|
13642 |
+
"step": 1795000
|
13643 |
+
},
|
13644 |
+
{
|
13645 |
+
"epoch": 0.15,
|
13646 |
+
"eval_loss": 0.4223540425300598,
|
13647 |
+
"eval_runtime": 77.2417,
|
13648 |
+
"eval_samples_per_second": 82.857,
|
13649 |
+
"eval_steps_per_second": 0.647,
|
13650 |
+
"step": 1795000
|
13651 |
+
},
|
13652 |
+
{
|
13653 |
+
"epoch": 0.15,
|
13654 |
+
"learning_rate": 1.4419137325396865e-05,
|
13655 |
+
"loss": 0.4389,
|
13656 |
+
"step": 1796000
|
13657 |
+
},
|
13658 |
+
{
|
13659 |
+
"epoch": 0.15,
|
13660 |
+
"learning_rate": 1.437651584850691e-05,
|
13661 |
+
"loss": 0.4386,
|
13662 |
+
"step": 1797000
|
13663 |
+
},
|
13664 |
+
{
|
13665 |
+
"epoch": 0.15,
|
13666 |
+
"learning_rate": 1.4334094272130413e-05,
|
13667 |
+
"loss": 0.4367,
|
13668 |
+
"step": 1798000
|
13669 |
+
},
|
13670 |
+
{
|
13671 |
+
"epoch": 0.15,
|
13672 |
+
"learning_rate": 1.4291872725490842e-05,
|
13673 |
+
"loss": 0.4384,
|
13674 |
+
"step": 1799000
|
13675 |
+
},
|
13676 |
+
{
|
13677 |
+
"epoch": 0.15,
|
13678 |
+
"learning_rate": 1.4249893258568889e-05,
|
13679 |
+
"loss": 0.4384,
|
13680 |
+
"step": 1800000
|
13681 |
+
},
|
13682 |
+
{
|
13683 |
+
"epoch": 0.15,
|
13684 |
+
"eval_loss": 0.4171189069747925,
|
13685 |
+
"eval_runtime": 76.9772,
|
13686 |
+
"eval_samples_per_second": 83.142,
|
13687 |
+
"eval_steps_per_second": 0.65,
|
13688 |
+
"step": 1800000
|
13689 |
}
|
13690 |
],
|
13691 |
"max_steps": 2000000,
|
13692 |
"num_train_epochs": 9223372036854775807,
|
13693 |
+
"total_flos": 1.5772558930477056e+22,
|
13694 |
"trial_name": null,
|
13695 |
"trial_params": null
|
13696 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a47c42cd40edaf177247b0f81cc113941e45da543bcd8075122f86f8a439a53
|
3 |
size 449471589
|