Training in progress, step 1700000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +383 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 893439185
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a6263ea7e43a6acbefa798ce6055706ef15240d94f08fb8faefbf26e23ac3a25
|
3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:abc7a8543a963e582a29e31e1e0c78fea4345a1b73b925ed6cc4d7ab61edbd1e
|
3 |
size 449471589
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 21643
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f7eefc1725778458a372a52de0baec705be0fcd52c035947880ee6c60789db03
|
3 |
size 21643
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9f873a347744a9c52f42be277b16c7300feca4fe83dae00b3348477c6cab3f68
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f676f9b0130b013ba493986d64992bf63d68d6bad5cd11e3728c43b657e50e05
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -12546,11 +12546,391 @@
|
|
12546 |
"eval_samples_per_second": 80.739,
|
12547 |
"eval_steps_per_second": 0.631,
|
12548 |
"step": 1650000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12549 |
}
|
12550 |
],
|
12551 |
"max_steps": 2000000,
|
12552 |
"num_train_epochs": 9223372036854775807,
|
12553 |
-
"total_flos": 1.
|
12554 |
"trial_name": null,
|
12555 |
"trial_params": null
|
12556 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.1,
|
5 |
+
"global_step": 1700000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
12546 |
"eval_samples_per_second": 80.739,
|
12547 |
"eval_steps_per_second": 0.631,
|
12548 |
"step": 1650000
|
12549 |
+
},
|
12550 |
+
{
|
12551 |
+
"epoch": 0.08,
|
12552 |
+
"learning_rate": 2.263295082033955e-05,
|
12553 |
+
"loss": 0.4402,
|
12554 |
+
"step": 1651000
|
12555 |
+
},
|
12556 |
+
{
|
12557 |
+
"epoch": 0.08,
|
12558 |
+
"learning_rate": 2.256302851518958e-05,
|
12559 |
+
"loss": 0.4408,
|
12560 |
+
"step": 1652000
|
12561 |
+
},
|
12562 |
+
{
|
12563 |
+
"epoch": 0.08,
|
12564 |
+
"learning_rate": 2.2493281173015714e-05,
|
12565 |
+
"loss": 0.4392,
|
12566 |
+
"step": 1653000
|
12567 |
+
},
|
12568 |
+
{
|
12569 |
+
"epoch": 0.08,
|
12570 |
+
"learning_rate": 2.242370900628049e-05,
|
12571 |
+
"loss": 0.4401,
|
12572 |
+
"step": 1654000
|
12573 |
+
},
|
12574 |
+
{
|
12575 |
+
"epoch": 0.08,
|
12576 |
+
"learning_rate": 2.235438153601577e-05,
|
12577 |
+
"loss": 0.4399,
|
12578 |
+
"step": 1655000
|
12579 |
+
},
|
12580 |
+
{
|
12581 |
+
"epoch": 0.08,
|
12582 |
+
"eval_loss": 0.42048439383506775,
|
12583 |
+
"eval_runtime": 77.8384,
|
12584 |
+
"eval_samples_per_second": 82.222,
|
12585 |
+
"eval_steps_per_second": 0.642,
|
12586 |
+
"step": 1655000
|
12587 |
+
},
|
12588 |
+
{
|
12589 |
+
"epoch": 0.08,
|
12590 |
+
"learning_rate": 2.2285160179706007e-05,
|
12591 |
+
"loss": 0.44,
|
12592 |
+
"step": 1656000
|
12593 |
+
},
|
12594 |
+
{
|
12595 |
+
"epoch": 0.08,
|
12596 |
+
"learning_rate": 2.2216114632807524e-05,
|
12597 |
+
"loss": 0.4404,
|
12598 |
+
"step": 1657000
|
12599 |
+
},
|
12600 |
+
{
|
12601 |
+
"epoch": 0.08,
|
12602 |
+
"learning_rate": 2.214731388718044e-05,
|
12603 |
+
"loss": 0.4406,
|
12604 |
+
"step": 1658000
|
12605 |
+
},
|
12606 |
+
{
|
12607 |
+
"epoch": 0.08,
|
12608 |
+
"learning_rate": 2.2078620413208303e-05,
|
12609 |
+
"loss": 0.4402,
|
12610 |
+
"step": 1659000
|
12611 |
+
},
|
12612 |
+
{
|
12613 |
+
"epoch": 0.08,
|
12614 |
+
"learning_rate": 2.201010337780338e-05,
|
12615 |
+
"loss": 0.4405,
|
12616 |
+
"step": 1660000
|
12617 |
+
},
|
12618 |
+
{
|
12619 |
+
"epoch": 0.08,
|
12620 |
+
"eval_loss": 0.4228270649909973,
|
12621 |
+
"eval_runtime": 78.2009,
|
12622 |
+
"eval_samples_per_second": 81.841,
|
12623 |
+
"eval_steps_per_second": 0.639,
|
12624 |
+
"step": 1660000
|
12625 |
+
},
|
12626 |
+
{
|
12627 |
+
"epoch": 0.08,
|
12628 |
+
"learning_rate": 2.1941831241763897e-05,
|
12629 |
+
"loss": 0.4402,
|
12630 |
+
"step": 1661000
|
12631 |
+
},
|
12632 |
+
{
|
12633 |
+
"epoch": 0.08,
|
12634 |
+
"learning_rate": 2.1873667532140358e-05,
|
12635 |
+
"loss": 0.439,
|
12636 |
+
"step": 1662000
|
12637 |
+
},
|
12638 |
+
{
|
12639 |
+
"epoch": 0.08,
|
12640 |
+
"learning_rate": 2.1805748783540877e-05,
|
12641 |
+
"loss": 0.44,
|
12642 |
+
"step": 1663000
|
12643 |
+
},
|
12644 |
+
{
|
12645 |
+
"epoch": 0.08,
|
12646 |
+
"learning_rate": 2.1737939229421666e-05,
|
12647 |
+
"loss": 0.4407,
|
12648 |
+
"step": 1664000
|
12649 |
+
},
|
12650 |
+
{
|
12651 |
+
"epoch": 0.08,
|
12652 |
+
"learning_rate": 2.167037469500335e-05,
|
12653 |
+
"loss": 0.4404,
|
12654 |
+
"step": 1665000
|
12655 |
+
},
|
12656 |
+
{
|
12657 |
+
"epoch": 0.08,
|
12658 |
+
"eval_loss": 0.41908711194992065,
|
12659 |
+
"eval_runtime": 78.8241,
|
12660 |
+
"eval_samples_per_second": 81.193,
|
12661 |
+
"eval_steps_per_second": 0.634,
|
12662 |
+
"step": 1665000
|
12663 |
+
},
|
12664 |
+
{
|
12665 |
+
"epoch": 0.08,
|
12666 |
+
"learning_rate": 2.160292012180046e-05,
|
12667 |
+
"loss": 0.4405,
|
12668 |
+
"step": 1666000
|
12669 |
+
},
|
12670 |
+
{
|
12671 |
+
"epoch": 0.08,
|
12672 |
+
"learning_rate": 2.1535643436230335e-05,
|
12673 |
+
"loss": 0.4401,
|
12674 |
+
"step": 1667000
|
12675 |
+
},
|
12676 |
+
{
|
12677 |
+
"epoch": 0.08,
|
12678 |
+
"learning_rate": 2.146854484322948e-05,
|
12679 |
+
"loss": 0.4403,
|
12680 |
+
"step": 1668000
|
12681 |
+
},
|
12682 |
+
{
|
12683 |
+
"epoch": 0.08,
|
12684 |
+
"learning_rate": 2.140162454719184e-05,
|
12685 |
+
"loss": 0.4418,
|
12686 |
+
"step": 1669000
|
12687 |
+
},
|
12688 |
+
{
|
12689 |
+
"epoch": 0.09,
|
12690 |
+
"learning_rate": 2.1334882751968192e-05,
|
12691 |
+
"loss": 0.4397,
|
12692 |
+
"step": 1670000
|
12693 |
+
},
|
12694 |
+
{
|
12695 |
+
"epoch": 0.09,
|
12696 |
+
"eval_loss": 0.42122882604599,
|
12697 |
+
"eval_runtime": 76.417,
|
12698 |
+
"eval_samples_per_second": 83.751,
|
12699 |
+
"eval_steps_per_second": 0.654,
|
12700 |
+
"step": 1670000
|
12701 |
+
},
|
12702 |
+
{
|
12703 |
+
"epoch": 0.09,
|
12704 |
+
"learning_rate": 2.126838613462656e-05,
|
12705 |
+
"loss": 0.4387,
|
12706 |
+
"step": 1671000
|
12707 |
+
},
|
12708 |
+
{
|
12709 |
+
"epoch": 0.09,
|
12710 |
+
"learning_rate": 2.1202001771399895e-05,
|
12711 |
+
"loss": 0.4387,
|
12712 |
+
"step": 1672000
|
12713 |
+
},
|
12714 |
+
{
|
12715 |
+
"epoch": 0.09,
|
12716 |
+
"learning_rate": 2.1135796517072863e-05,
|
12717 |
+
"loss": 0.4394,
|
12718 |
+
"step": 1673000
|
12719 |
+
},
|
12720 |
+
{
|
12721 |
+
"epoch": 0.09,
|
12722 |
+
"learning_rate": 2.106977057331812e-05,
|
12723 |
+
"loss": 0.4398,
|
12724 |
+
"step": 1674000
|
12725 |
+
},
|
12726 |
+
{
|
12727 |
+
"epoch": 0.09,
|
12728 |
+
"learning_rate": 2.1003989897961326e-05,
|
12729 |
+
"loss": 0.44,
|
12730 |
+
"step": 1675000
|
12731 |
+
},
|
12732 |
+
{
|
12733 |
+
"epoch": 0.09,
|
12734 |
+
"eval_loss": 0.41976797580718994,
|
12735 |
+
"eval_runtime": 77.5035,
|
12736 |
+
"eval_samples_per_second": 82.577,
|
12737 |
+
"eval_steps_per_second": 0.645,
|
12738 |
+
"step": 1675000
|
12739 |
+
},
|
12740 |
+
{
|
12741 |
+
"epoch": 0.09,
|
12742 |
+
"learning_rate": 2.0938388575438328e-05,
|
12743 |
+
"loss": 0.4403,
|
12744 |
+
"step": 1676000
|
12745 |
+
},
|
12746 |
+
{
|
12747 |
+
"epoch": 0.09,
|
12748 |
+
"learning_rate": 2.0872901407947595e-05,
|
12749 |
+
"loss": 0.4413,
|
12750 |
+
"step": 1677000
|
12751 |
+
},
|
12752 |
+
{
|
12753 |
+
"epoch": 0.09,
|
12754 |
+
"learning_rate": 2.080759435185324e-05,
|
12755 |
+
"loss": 0.4397,
|
12756 |
+
"step": 1678000
|
12757 |
+
},
|
12758 |
+
{
|
12759 |
+
"epoch": 0.09,
|
12760 |
+
"learning_rate": 2.0742467606091935e-05,
|
12761 |
+
"loss": 0.4395,
|
12762 |
+
"step": 1679000
|
12763 |
+
},
|
12764 |
+
{
|
12765 |
+
"epoch": 0.09,
|
12766 |
+
"learning_rate": 2.0677586225058045e-05,
|
12767 |
+
"loss": 0.4407,
|
12768 |
+
"step": 1680000
|
12769 |
+
},
|
12770 |
+
{
|
12771 |
+
"epoch": 0.09,
|
12772 |
+
"eval_loss": 0.42079994082450867,
|
12773 |
+
"eval_runtime": 79.6958,
|
12774 |
+
"eval_samples_per_second": 80.305,
|
12775 |
+
"eval_steps_per_second": 0.627,
|
12776 |
+
"step": 1680000
|
12777 |
+
},
|
12778 |
+
{
|
12779 |
+
"epoch": 0.09,
|
12780 |
+
"learning_rate": 2.0612885189152567e-05,
|
12781 |
+
"loss": 0.4399,
|
12782 |
+
"step": 1681000
|
12783 |
+
},
|
12784 |
+
{
|
12785 |
+
"epoch": 0.09,
|
12786 |
+
"learning_rate": 2.0548300200510223e-05,
|
12787 |
+
"loss": 0.4382,
|
12788 |
+
"step": 1682000
|
12789 |
+
},
|
12790 |
+
{
|
12791 |
+
"epoch": 0.09,
|
12792 |
+
"learning_rate": 2.048389631205587e-05,
|
12793 |
+
"loss": 0.4393,
|
12794 |
+
"step": 1683000
|
12795 |
+
},
|
12796 |
+
{
|
12797 |
+
"epoch": 0.09,
|
12798 |
+
"learning_rate": 2.041967371997491e-05,
|
12799 |
+
"loss": 0.4392,
|
12800 |
+
"step": 1684000
|
12801 |
+
},
|
12802 |
+
{
|
12803 |
+
"epoch": 0.09,
|
12804 |
+
"learning_rate": 2.0355760520841843e-05,
|
12805 |
+
"loss": 0.4403,
|
12806 |
+
"step": 1685000
|
12807 |
+
},
|
12808 |
+
{
|
12809 |
+
"epoch": 0.09,
|
12810 |
+
"eval_loss": 0.41910338401794434,
|
12811 |
+
"eval_runtime": 79.2458,
|
12812 |
+
"eval_samples_per_second": 80.761,
|
12813 |
+
"eval_steps_per_second": 0.631,
|
12814 |
+
"step": 1685000
|
12815 |
+
},
|
12816 |
+
{
|
12817 |
+
"epoch": 0.09,
|
12818 |
+
"learning_rate": 2.0291900744285765e-05,
|
12819 |
+
"loss": 0.4397,
|
12820 |
+
"step": 1686000
|
12821 |
+
},
|
12822 |
+
{
|
12823 |
+
"epoch": 0.09,
|
12824 |
+
"learning_rate": 2.022822284895487e-05,
|
12825 |
+
"loss": 0.4401,
|
12826 |
+
"step": 1687000
|
12827 |
+
},
|
12828 |
+
{
|
12829 |
+
"epoch": 0.09,
|
12830 |
+
"learning_rate": 2.016472702882308e-05,
|
12831 |
+
"loss": 0.4395,
|
12832 |
+
"step": 1688000
|
12833 |
+
},
|
12834 |
+
{
|
12835 |
+
"epoch": 0.09,
|
12836 |
+
"learning_rate": 2.0101476699753774e-05,
|
12837 |
+
"loss": 0.4394,
|
12838 |
+
"step": 1689000
|
12839 |
+
},
|
12840 |
+
{
|
12841 |
+
"epoch": 0.1,
|
12842 |
+
"learning_rate": 2.003840846723428e-05,
|
12843 |
+
"loss": 0.4408,
|
12844 |
+
"step": 1690000
|
12845 |
+
},
|
12846 |
+
{
|
12847 |
+
"epoch": 0.1,
|
12848 |
+
"eval_loss": 0.41959914565086365,
|
12849 |
+
"eval_runtime": 79.6028,
|
12850 |
+
"eval_samples_per_second": 80.399,
|
12851 |
+
"eval_steps_per_second": 0.628,
|
12852 |
+
"step": 1690000
|
12853 |
+
},
|
12854 |
+
{
|
12855 |
+
"epoch": 0.1,
|
12856 |
+
"learning_rate": 1.9975459665494844e-05,
|
12857 |
+
"loss": 0.4406,
|
12858 |
+
"step": 1691000
|
12859 |
+
},
|
12860 |
+
{
|
12861 |
+
"epoch": 0.1,
|
12862 |
+
"learning_rate": 1.9912693708915007e-05,
|
12863 |
+
"loss": 0.4403,
|
12864 |
+
"step": 1692000
|
12865 |
+
},
|
12866 |
+
{
|
12867 |
+
"epoch": 0.1,
|
12868 |
+
"learning_rate": 1.9850110788690757e-05,
|
12869 |
+
"loss": 0.4391,
|
12870 |
+
"step": 1693000
|
12871 |
+
},
|
12872 |
+
{
|
12873 |
+
"epoch": 0.1,
|
12874 |
+
"learning_rate": 1.978771109546051e-05,
|
12875 |
+
"loss": 0.4388,
|
12876 |
+
"step": 1694000
|
12877 |
+
},
|
12878 |
+
{
|
12879 |
+
"epoch": 0.1,
|
12880 |
+
"learning_rate": 1.9725681193643978e-05,
|
12881 |
+
"loss": 0.439,
|
12882 |
+
"step": 1695000
|
12883 |
+
},
|
12884 |
+
{
|
12885 |
+
"epoch": 0.1,
|
12886 |
+
"eval_loss": 0.4219348132610321,
|
12887 |
+
"eval_runtime": 78.5115,
|
12888 |
+
"eval_samples_per_second": 81.517,
|
12889 |
+
"eval_steps_per_second": 0.637,
|
12890 |
+
"step": 1695000
|
12891 |
+
},
|
12892 |
+
{
|
12893 |
+
"epoch": 0.1,
|
12894 |
+
"learning_rate": 1.9663647972981225e-05,
|
12895 |
+
"loss": 0.4389,
|
12896 |
+
"step": 1696000
|
12897 |
+
},
|
12898 |
+
{
|
12899 |
+
"epoch": 0.1,
|
12900 |
+
"learning_rate": 1.9601798547310563e-05,
|
12901 |
+
"loss": 0.4396,
|
12902 |
+
"step": 1697000
|
12903 |
+
},
|
12904 |
+
{
|
12905 |
+
"epoch": 0.1,
|
12906 |
+
"learning_rate": 1.954019467851605e-05,
|
12907 |
+
"loss": 0.4405,
|
12908 |
+
"step": 1698000
|
12909 |
+
},
|
12910 |
+
{
|
12911 |
+
"epoch": 0.1,
|
12912 |
+
"learning_rate": 1.9478713223216454e-05,
|
12913 |
+
"loss": 0.4403,
|
12914 |
+
"step": 1699000
|
12915 |
+
},
|
12916 |
+
{
|
12917 |
+
"epoch": 0.1,
|
12918 |
+
"learning_rate": 1.9417416126252245e-05,
|
12919 |
+
"loss": 0.4394,
|
12920 |
+
"step": 1700000
|
12921 |
+
},
|
12922 |
+
{
|
12923 |
+
"epoch": 0.1,
|
12924 |
+
"eval_loss": 0.42123520374298096,
|
12925 |
+
"eval_runtime": 78.2147,
|
12926 |
+
"eval_samples_per_second": 81.826,
|
12927 |
+
"eval_steps_per_second": 0.639,
|
12928 |
+
"step": 1700000
|
12929 |
}
|
12930 |
],
|
12931 |
"max_steps": 2000000,
|
12932 |
"num_train_epochs": 9223372036854775807,
|
12933 |
+
"total_flos": 1.4896305656561664e+22,
|
12934 |
"trial_name": null,
|
12935 |
"trial_params": null
|
12936 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:abc7a8543a963e582a29e31e1e0c78fea4345a1b73b925ed6cc4d7ab61edbd1e
|
3 |
size 449471589
|