Training in progress, epoch 149, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 166496880
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:02258098985a292d68d20bf94bf89b17ca13110b1ae56ee863f5f0d079f4238d
|
3 |
size 166496880
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 330495866
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:761e90a5b9c3c01cf812cc7dbf7f8345138f49ad9e203ed4d4779cfad169a777
|
3 |
size 330495866
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:74ff39ce5bc1f6039b31922a6e443ab8d8a8f93d5528fc0d576340ae329fb493
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb678f76da1c9347406d38fe82346b2ac3acd84e6118cb46f17ee79a3da28612
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.24468238651752472,
|
3 |
"best_model_checkpoint": "chickens-composite-403232323232-150-epochs-wo-transform-metrics-test/checkpoint-95000",
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -38386,6 +38386,263 @@
|
|
38386 |
"eval_samples_per_second": 15.264,
|
38387 |
"eval_steps_per_second": 1.908,
|
38388 |
"step": 148000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38389 |
}
|
38390 |
],
|
38391 |
"logging_steps": 30,
|
@@ -38405,7 +38662,7 @@
|
|
38405 |
"attributes": {}
|
38406 |
}
|
38407 |
},
|
38408 |
-
"total_flos": 5.
|
38409 |
"train_batch_size": 2,
|
38410 |
"trial_name": null,
|
38411 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.24468238651752472,
|
3 |
"best_model_checkpoint": "chickens-composite-403232323232-150-epochs-wo-transform-metrics-test/checkpoint-95000",
|
4 |
+
"epoch": 149.0,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 149000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
38386 |
"eval_samples_per_second": 15.264,
|
38387 |
"eval_steps_per_second": 1.908,
|
38388 |
"step": 148000
|
38389 |
+
},
|
38390 |
+
{
|
38391 |
+
"epoch": 148.02,
|
38392 |
+
"grad_norm": 42.084232330322266,
|
38393 |
+
"learning_rate": 4.298583608501328e-09,
|
38394 |
+
"loss": 0.1631,
|
38395 |
+
"step": 148020
|
38396 |
+
},
|
38397 |
+
{
|
38398 |
+
"epoch": 148.05,
|
38399 |
+
"grad_norm": 42.267086029052734,
|
38400 |
+
"learning_rate": 4.169328287299545e-09,
|
38401 |
+
"loss": 0.1619,
|
38402 |
+
"step": 148050
|
38403 |
+
},
|
38404 |
+
{
|
38405 |
+
"epoch": 148.08,
|
38406 |
+
"grad_norm": 39.14877700805664,
|
38407 |
+
"learning_rate": 4.042045240927883e-09,
|
38408 |
+
"loss": 0.1887,
|
38409 |
+
"step": 148080
|
38410 |
+
},
|
38411 |
+
{
|
38412 |
+
"epoch": 148.11,
|
38413 |
+
"grad_norm": 75.59671020507812,
|
38414 |
+
"learning_rate": 3.9167345196361454e-09,
|
38415 |
+
"loss": 0.1908,
|
38416 |
+
"step": 148110
|
38417 |
+
},
|
38418 |
+
{
|
38419 |
+
"epoch": 148.14,
|
38420 |
+
"grad_norm": 87.17718505859375,
|
38421 |
+
"learning_rate": 3.793396172895314e-09,
|
38422 |
+
"loss": 0.1728,
|
38423 |
+
"step": 148140
|
38424 |
+
},
|
38425 |
+
{
|
38426 |
+
"epoch": 148.17,
|
38427 |
+
"grad_norm": 37.11481475830078,
|
38428 |
+
"learning_rate": 3.672030249396441e-09,
|
38429 |
+
"loss": 0.1674,
|
38430 |
+
"step": 148170
|
38431 |
+
},
|
38432 |
+
{
|
38433 |
+
"epoch": 148.2,
|
38434 |
+
"grad_norm": 34.75517272949219,
|
38435 |
+
"learning_rate": 3.5526367970539765e-09,
|
38436 |
+
"loss": 0.1768,
|
38437 |
+
"step": 148200
|
38438 |
+
},
|
38439 |
+
{
|
38440 |
+
"epoch": 148.23,
|
38441 |
+
"grad_norm": 205.51870727539062,
|
38442 |
+
"learning_rate": 3.4352158630018837e-09,
|
38443 |
+
"loss": 0.1609,
|
38444 |
+
"step": 148230
|
38445 |
+
},
|
38446 |
+
{
|
38447 |
+
"epoch": 148.26,
|
38448 |
+
"grad_norm": 35.48714065551758,
|
38449 |
+
"learning_rate": 3.31976749359586e-09,
|
38450 |
+
"loss": 0.1847,
|
38451 |
+
"step": 148260
|
38452 |
+
},
|
38453 |
+
{
|
38454 |
+
"epoch": 148.29,
|
38455 |
+
"grad_norm": 143.81561279296875,
|
38456 |
+
"learning_rate": 3.206291734413891e-09,
|
38457 |
+
"loss": 0.1883,
|
38458 |
+
"step": 148290
|
38459 |
+
},
|
38460 |
+
{
|
38461 |
+
"epoch": 148.32,
|
38462 |
+
"grad_norm": 98.67874908447266,
|
38463 |
+
"learning_rate": 3.094788630254031e-09,
|
38464 |
+
"loss": 0.1725,
|
38465 |
+
"step": 148320
|
38466 |
+
},
|
38467 |
+
{
|
38468 |
+
"epoch": 148.35,
|
38469 |
+
"grad_norm": 51.73759078979492,
|
38470 |
+
"learning_rate": 2.9852582251355124e-09,
|
38471 |
+
"loss": 0.1788,
|
38472 |
+
"step": 148350
|
38473 |
+
},
|
38474 |
+
{
|
38475 |
+
"epoch": 148.38,
|
38476 |
+
"grad_norm": 37.59303665161133,
|
38477 |
+
"learning_rate": 2.8777005622998567e-09,
|
38478 |
+
"loss": 0.1682,
|
38479 |
+
"step": 148380
|
38480 |
+
},
|
38481 |
+
{
|
38482 |
+
"epoch": 148.41,
|
38483 |
+
"grad_norm": 80.69348907470703,
|
38484 |
+
"learning_rate": 2.772115684209209e-09,
|
38485 |
+
"loss": 0.1846,
|
38486 |
+
"step": 148410
|
38487 |
+
},
|
38488 |
+
{
|
38489 |
+
"epoch": 148.44,
|
38490 |
+
"grad_norm": 35.9366455078125,
|
38491 |
+
"learning_rate": 2.6685036325457826e-09,
|
38492 |
+
"loss": 0.1715,
|
38493 |
+
"step": 148440
|
38494 |
+
},
|
38495 |
+
{
|
38496 |
+
"epoch": 148.47,
|
38497 |
+
"grad_norm": 1834.0330810546875,
|
38498 |
+
"learning_rate": 2.5668644482151892e-09,
|
38499 |
+
"loss": 0.1731,
|
38500 |
+
"step": 148470
|
38501 |
+
},
|
38502 |
+
{
|
38503 |
+
"epoch": 148.5,
|
38504 |
+
"grad_norm": 30.97291374206543,
|
38505 |
+
"learning_rate": 2.4671981713420003e-09,
|
38506 |
+
"loss": 0.1783,
|
38507 |
+
"step": 148500
|
38508 |
+
},
|
38509 |
+
{
|
38510 |
+
"epoch": 148.53,
|
38511 |
+
"grad_norm": 33.913604736328125,
|
38512 |
+
"learning_rate": 2.369504841273629e-09,
|
38513 |
+
"loss": 0.1843,
|
38514 |
+
"step": 148530
|
38515 |
+
},
|
38516 |
+
{
|
38517 |
+
"epoch": 148.56,
|
38518 |
+
"grad_norm": 22.96918487548828,
|
38519 |
+
"learning_rate": 2.2737844965775578e-09,
|
38520 |
+
"loss": 0.1918,
|
38521 |
+
"step": 148560
|
38522 |
+
},
|
38523 |
+
{
|
38524 |
+
"epoch": 148.59,
|
38525 |
+
"grad_norm": 35.848934173583984,
|
38526 |
+
"learning_rate": 2.1800371750430037e-09,
|
38527 |
+
"loss": 0.1711,
|
38528 |
+
"step": 148590
|
38529 |
+
},
|
38530 |
+
{
|
38531 |
+
"epoch": 148.62,
|
38532 |
+
"grad_norm": 130.026123046875,
|
38533 |
+
"learning_rate": 2.088262913679251e-09,
|
38534 |
+
"loss": 0.1766,
|
38535 |
+
"step": 148620
|
38536 |
+
},
|
38537 |
+
{
|
38538 |
+
"epoch": 148.65,
|
38539 |
+
"grad_norm": 84.3224105834961,
|
38540 |
+
"learning_rate": 1.9984617487173174e-09,
|
38541 |
+
"loss": 0.1673,
|
38542 |
+
"step": 148650
|
38543 |
+
},
|
38544 |
+
{
|
38545 |
+
"epoch": 148.68,
|
38546 |
+
"grad_norm": 116.34769439697266,
|
38547 |
+
"learning_rate": 1.9106337156099553e-09,
|
38548 |
+
"loss": 0.1798,
|
38549 |
+
"step": 148680
|
38550 |
+
},
|
38551 |
+
{
|
38552 |
+
"epoch": 148.71,
|
38553 |
+
"grad_norm": 88.51609802246094,
|
38554 |
+
"learning_rate": 1.8247788490299846e-09,
|
38555 |
+
"loss": 0.1918,
|
38556 |
+
"step": 148710
|
38557 |
+
},
|
38558 |
+
{
|
38559 |
+
"epoch": 148.74,
|
38560 |
+
"grad_norm": 43.09914779663086,
|
38561 |
+
"learning_rate": 1.740897182871404e-09,
|
38562 |
+
"loss": 0.187,
|
38563 |
+
"step": 148740
|
38564 |
+
},
|
38565 |
+
{
|
38566 |
+
"epoch": 148.77,
|
38567 |
+
"grad_norm": 45.688201904296875,
|
38568 |
+
"learning_rate": 1.6589887502493907e-09,
|
38569 |
+
"loss": 0.1915,
|
38570 |
+
"step": 148770
|
38571 |
+
},
|
38572 |
+
{
|
38573 |
+
"epoch": 148.8,
|
38574 |
+
"grad_norm": 26.354772567749023,
|
38575 |
+
"learning_rate": 1.5790535835003006e-09,
|
38576 |
+
"loss": 0.1609,
|
38577 |
+
"step": 148800
|
38578 |
+
},
|
38579 |
+
{
|
38580 |
+
"epoch": 148.83,
|
38581 |
+
"grad_norm": 122.46343231201172,
|
38582 |
+
"learning_rate": 1.5010917141811132e-09,
|
38583 |
+
"loss": 0.1604,
|
38584 |
+
"step": 148830
|
38585 |
+
},
|
38586 |
+
{
|
38587 |
+
"epoch": 148.86,
|
38588 |
+
"grad_norm": 406.5648193359375,
|
38589 |
+
"learning_rate": 1.425103173069986e-09,
|
38590 |
+
"loss": 0.2423,
|
38591 |
+
"step": 148860
|
38592 |
+
},
|
38593 |
+
{
|
38594 |
+
"epoch": 148.89,
|
38595 |
+
"grad_norm": 59.21432876586914,
|
38596 |
+
"learning_rate": 1.3510879901657003e-09,
|
38597 |
+
"loss": 0.1776,
|
38598 |
+
"step": 148890
|
38599 |
+
},
|
38600 |
+
{
|
38601 |
+
"epoch": 148.92,
|
38602 |
+
"grad_norm": 53.223148345947266,
|
38603 |
+
"learning_rate": 1.2790461946887712e-09,
|
38604 |
+
"loss": 0.1913,
|
38605 |
+
"step": 148920
|
38606 |
+
},
|
38607 |
+
{
|
38608 |
+
"epoch": 148.95,
|
38609 |
+
"grad_norm": 63.24610137939453,
|
38610 |
+
"learning_rate": 1.2089778150797816e-09,
|
38611 |
+
"loss": 0.1795,
|
38612 |
+
"step": 148950
|
38613 |
+
},
|
38614 |
+
{
|
38615 |
+
"epoch": 148.98,
|
38616 |
+
"grad_norm": 66.03044128417969,
|
38617 |
+
"learning_rate": 1.1408828790010484e-09,
|
38618 |
+
"loss": 0.1662,
|
38619 |
+
"step": 148980
|
38620 |
+
},
|
38621 |
+
{
|
38622 |
+
"epoch": 149.0,
|
38623 |
+
"eval_loss": 0.25540730357170105,
|
38624 |
+
"eval_map": 0.8454,
|
38625 |
+
"eval_map_50": 0.9638,
|
38626 |
+
"eval_map_75": 0.9414,
|
38627 |
+
"eval_map_chicken": 0.8417,
|
38628 |
+
"eval_map_duck": 0.7981,
|
38629 |
+
"eval_map_large": 0.8191,
|
38630 |
+
"eval_map_medium": 0.8525,
|
38631 |
+
"eval_map_plant": 0.8964,
|
38632 |
+
"eval_map_small": 0.3586,
|
38633 |
+
"eval_mar_1": 0.3399,
|
38634 |
+
"eval_mar_10": 0.8795,
|
38635 |
+
"eval_mar_100": 0.8819,
|
38636 |
+
"eval_mar_100_chicken": 0.8813,
|
38637 |
+
"eval_mar_100_duck": 0.8407,
|
38638 |
+
"eval_mar_100_plant": 0.9237,
|
38639 |
+
"eval_mar_large": 0.8593,
|
38640 |
+
"eval_mar_medium": 0.8889,
|
38641 |
+
"eval_mar_small": 0.5029,
|
38642 |
+
"eval_runtime": 13.5683,
|
38643 |
+
"eval_samples_per_second": 14.74,
|
38644 |
+
"eval_steps_per_second": 1.843,
|
38645 |
+
"step": 149000
|
38646 |
}
|
38647 |
],
|
38648 |
"logging_steps": 30,
|
|
|
38662 |
"attributes": {}
|
38663 |
}
|
38664 |
},
|
38665 |
+
"total_flos": 5.12594438234112e+19,
|
38666 |
"train_batch_size": 2,
|
38667 |
"trial_name": null,
|
38668 |
"trial_params": null
|