Training in progress, step 1920, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 912336848
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:91b1b0eb8821ed3e8ef53157ad0fe2a6d4a73b217edc43a13c527825df62dece
|
3 |
size 912336848
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 463916756
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af1ce888960052569c2e8cf3fbac3a73808b7588b56f53738a8dad72d923e0df
|
3 |
size 463916756
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:996d6df6cc333969754208420b894da3d6ad34807108aff10aacc8b6dc400f6c
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2d9cacaadf8439d83df6534e66fde09e58b74c250800597efcb7ccff892221c0
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.9712321758270264,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-1800",
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 150,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -12711,6 +12711,846 @@
|
|
12711 |
"eval_samples_per_second": 4.32,
|
12712 |
"eval_steps_per_second": 2.16,
|
12713 |
"step": 1800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12714 |
}
|
12715 |
],
|
12716 |
"logging_steps": 1,
|
@@ -12734,12 +13574,12 @@
|
|
12734 |
"should_evaluate": false,
|
12735 |
"should_log": false,
|
12736 |
"should_save": true,
|
12737 |
-
"should_training_stop":
|
12738 |
},
|
12739 |
"attributes": {}
|
12740 |
}
|
12741 |
},
|
12742 |
-
"total_flos": 2.
|
12743 |
"train_batch_size": 2,
|
12744 |
"trial_name": null,
|
12745 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.9712321758270264,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-1800",
|
4 |
+
"epoch": 0.39759784634499895,
|
5 |
"eval_steps": 150,
|
6 |
+
"global_step": 1920,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
12711 |
"eval_samples_per_second": 4.32,
|
12712 |
"eval_steps_per_second": 2.16,
|
12713 |
"step": 1800
|
12714 |
+
},
|
12715 |
+
{
|
12716 |
+
"epoch": 0.37295506316007454,
|
12717 |
+
"grad_norm": 7.738208770751953,
|
12718 |
+
"learning_rate": 1.909457351846866e-06,
|
12719 |
+
"loss": 3.3165,
|
12720 |
+
"step": 1801
|
12721 |
+
},
|
12722 |
+
{
|
12723 |
+
"epoch": 0.37316214537171255,
|
12724 |
+
"grad_norm": 9.508034706115723,
|
12725 |
+
"learning_rate": 1.8776008553690438e-06,
|
12726 |
+
"loss": 2.879,
|
12727 |
+
"step": 1802
|
12728 |
+
},
|
12729 |
+
{
|
12730 |
+
"epoch": 0.3733692275833506,
|
12731 |
+
"grad_norm": 8.288263320922852,
|
12732 |
+
"learning_rate": 1.8460098201022903e-06,
|
12733 |
+
"loss": 3.8177,
|
12734 |
+
"step": 1803
|
12735 |
+
},
|
12736 |
+
{
|
12737 |
+
"epoch": 0.3735763097949886,
|
12738 |
+
"grad_norm": 6.952064037322998,
|
12739 |
+
"learning_rate": 1.8146843315132833e-06,
|
12740 |
+
"loss": 3.7097,
|
12741 |
+
"step": 1804
|
12742 |
+
},
|
12743 |
+
{
|
12744 |
+
"epoch": 0.37378339200662664,
|
12745 |
+
"grad_norm": 8.130983352661133,
|
12746 |
+
"learning_rate": 1.783624474350276e-06,
|
12747 |
+
"loss": 3.8691,
|
12748 |
+
"step": 1805
|
12749 |
+
},
|
12750 |
+
{
|
12751 |
+
"epoch": 0.37399047421826465,
|
12752 |
+
"grad_norm": 7.877700328826904,
|
12753 |
+
"learning_rate": 1.7528303326428742e-06,
|
12754 |
+
"loss": 4.2351,
|
12755 |
+
"step": 1806
|
12756 |
+
},
|
12757 |
+
{
|
12758 |
+
"epoch": 0.37419755642990266,
|
12759 |
+
"grad_norm": 8.027643203735352,
|
12760 |
+
"learning_rate": 1.7223019897018245e-06,
|
12761 |
+
"loss": 3.5811,
|
12762 |
+
"step": 1807
|
12763 |
+
},
|
12764 |
+
{
|
12765 |
+
"epoch": 0.37440463864154067,
|
12766 |
+
"grad_norm": 8.3945894241333,
|
12767 |
+
"learning_rate": 1.6920395281187717e-06,
|
12768 |
+
"loss": 3.5923,
|
12769 |
+
"step": 1808
|
12770 |
+
},
|
12771 |
+
{
|
12772 |
+
"epoch": 0.37461172085317873,
|
12773 |
+
"grad_norm": 9.124868392944336,
|
12774 |
+
"learning_rate": 1.6620430297660694e-06,
|
12775 |
+
"loss": 3.3783,
|
12776 |
+
"step": 1809
|
12777 |
+
},
|
12778 |
+
{
|
12779 |
+
"epoch": 0.37481880306481674,
|
12780 |
+
"grad_norm": 10.345230102539062,
|
12781 |
+
"learning_rate": 1.6323125757964797e-06,
|
12782 |
+
"loss": 2.8082,
|
12783 |
+
"step": 1810
|
12784 |
+
},
|
12785 |
+
{
|
12786 |
+
"epoch": 0.37502588527645475,
|
12787 |
+
"grad_norm": 9.551642417907715,
|
12788 |
+
"learning_rate": 1.6028482466430628e-06,
|
12789 |
+
"loss": 3.8794,
|
12790 |
+
"step": 1811
|
12791 |
+
},
|
12792 |
+
{
|
12793 |
+
"epoch": 0.37523296748809276,
|
12794 |
+
"grad_norm": 7.6390156745910645,
|
12795 |
+
"learning_rate": 1.573650122018866e-06,
|
12796 |
+
"loss": 4.59,
|
12797 |
+
"step": 1812
|
12798 |
+
},
|
12799 |
+
{
|
12800 |
+
"epoch": 0.37544004969973077,
|
12801 |
+
"grad_norm": 6.970566272735596,
|
12802 |
+
"learning_rate": 1.5447182809167571e-06,
|
12803 |
+
"loss": 3.4794,
|
12804 |
+
"step": 1813
|
12805 |
+
},
|
12806 |
+
{
|
12807 |
+
"epoch": 0.37564713191136884,
|
12808 |
+
"grad_norm": 7.257846832275391,
|
12809 |
+
"learning_rate": 1.5160528016091914e-06,
|
12810 |
+
"loss": 3.4807,
|
12811 |
+
"step": 1814
|
12812 |
+
},
|
12813 |
+
{
|
12814 |
+
"epoch": 0.37585421412300685,
|
12815 |
+
"grad_norm": 6.892467021942139,
|
12816 |
+
"learning_rate": 1.4876537616480334e-06,
|
12817 |
+
"loss": 3.3787,
|
12818 |
+
"step": 1815
|
12819 |
+
},
|
12820 |
+
{
|
12821 |
+
"epoch": 0.37606129633464486,
|
12822 |
+
"grad_norm": 7.427321434020996,
|
12823 |
+
"learning_rate": 1.4595212378642697e-06,
|
12824 |
+
"loss": 4.2577,
|
12825 |
+
"step": 1816
|
12826 |
+
},
|
12827 |
+
{
|
12828 |
+
"epoch": 0.37626837854628287,
|
12829 |
+
"grad_norm": 6.533102035522461,
|
12830 |
+
"learning_rate": 1.4316553063678851e-06,
|
12831 |
+
"loss": 4.0092,
|
12832 |
+
"step": 1817
|
12833 |
+
},
|
12834 |
+
{
|
12835 |
+
"epoch": 0.3764754607579209,
|
12836 |
+
"grad_norm": 9.07302474975586,
|
12837 |
+
"learning_rate": 1.4040560425476411e-06,
|
12838 |
+
"loss": 3.955,
|
12839 |
+
"step": 1818
|
12840 |
+
},
|
12841 |
+
{
|
12842 |
+
"epoch": 0.37668254296955894,
|
12843 |
+
"grad_norm": 8.967131614685059,
|
12844 |
+
"learning_rate": 1.3767235210708107e-06,
|
12845 |
+
"loss": 3.5007,
|
12846 |
+
"step": 1819
|
12847 |
+
},
|
12848 |
+
{
|
12849 |
+
"epoch": 0.37688962518119695,
|
12850 |
+
"grad_norm": 7.1962761878967285,
|
12851 |
+
"learning_rate": 1.349657815883032e-06,
|
12852 |
+
"loss": 3.6339,
|
12853 |
+
"step": 1820
|
12854 |
+
},
|
12855 |
+
{
|
12856 |
+
"epoch": 0.37709670739283496,
|
12857 |
+
"grad_norm": 8.466217041015625,
|
12858 |
+
"learning_rate": 1.322859000208132e-06,
|
12859 |
+
"loss": 3.6523,
|
12860 |
+
"step": 1821
|
12861 |
+
},
|
12862 |
+
{
|
12863 |
+
"epoch": 0.37730378960447297,
|
12864 |
+
"grad_norm": 11.36102294921875,
|
12865 |
+
"learning_rate": 1.2963271465478488e-06,
|
12866 |
+
"loss": 3.1206,
|
12867 |
+
"step": 1822
|
12868 |
+
},
|
12869 |
+
{
|
12870 |
+
"epoch": 0.377510871816111,
|
12871 |
+
"grad_norm": 7.7683539390563965,
|
12872 |
+
"learning_rate": 1.270062326681698e-06,
|
12873 |
+
"loss": 4.508,
|
12874 |
+
"step": 1823
|
12875 |
+
},
|
12876 |
+
{
|
12877 |
+
"epoch": 0.377717954027749,
|
12878 |
+
"grad_norm": 6.693579196929932,
|
12879 |
+
"learning_rate": 1.244064611666751e-06,
|
12880 |
+
"loss": 4.529,
|
12881 |
+
"step": 1824
|
12882 |
+
},
|
12883 |
+
{
|
12884 |
+
"epoch": 0.37792503623938706,
|
12885 |
+
"grad_norm": 6.704649448394775,
|
12886 |
+
"learning_rate": 1.2183340718374681e-06,
|
12887 |
+
"loss": 4.2927,
|
12888 |
+
"step": 1825
|
12889 |
+
},
|
12890 |
+
{
|
12891 |
+
"epoch": 0.37813211845102507,
|
12892 |
+
"grad_norm": 7.658962726593018,
|
12893 |
+
"learning_rate": 1.1928707768054658e-06,
|
12894 |
+
"loss": 4.7976,
|
12895 |
+
"step": 1826
|
12896 |
+
},
|
12897 |
+
{
|
12898 |
+
"epoch": 0.3783392006626631,
|
12899 |
+
"grad_norm": 8.829057693481445,
|
12900 |
+
"learning_rate": 1.1676747954593837e-06,
|
12901 |
+
"loss": 4.4687,
|
12902 |
+
"step": 1827
|
12903 |
+
},
|
12904 |
+
{
|
12905 |
+
"epoch": 0.3785462828743011,
|
12906 |
+
"grad_norm": 8.466927528381348,
|
12907 |
+
"learning_rate": 1.14274619596465e-06,
|
12908 |
+
"loss": 3.5714,
|
12909 |
+
"step": 1828
|
12910 |
+
},
|
12911 |
+
{
|
12912 |
+
"epoch": 0.3787533650859391,
|
12913 |
+
"grad_norm": 9.557503700256348,
|
12914 |
+
"learning_rate": 1.1180850457633175e-06,
|
12915 |
+
"loss": 4.2684,
|
12916 |
+
"step": 1829
|
12917 |
+
},
|
12918 |
+
{
|
12919 |
+
"epoch": 0.37896044729757716,
|
12920 |
+
"grad_norm": 7.801200866699219,
|
12921 |
+
"learning_rate": 1.0936914115738717e-06,
|
12922 |
+
"loss": 4.8398,
|
12923 |
+
"step": 1830
|
12924 |
+
},
|
12925 |
+
{
|
12926 |
+
"epoch": 0.3791675295092152,
|
12927 |
+
"grad_norm": 7.245351314544678,
|
12928 |
+
"learning_rate": 1.0695653593910782e-06,
|
12929 |
+
"loss": 3.2947,
|
12930 |
+
"step": 1831
|
12931 |
+
},
|
12932 |
+
{
|
12933 |
+
"epoch": 0.3793746117208532,
|
12934 |
+
"grad_norm": 8.5117826461792,
|
12935 |
+
"learning_rate": 1.0457069544857813e-06,
|
12936 |
+
"loss": 3.9429,
|
12937 |
+
"step": 1832
|
12938 |
+
},
|
12939 |
+
{
|
12940 |
+
"epoch": 0.3795816939324912,
|
12941 |
+
"grad_norm": 26.09311294555664,
|
12942 |
+
"learning_rate": 1.0221162614047042e-06,
|
12943 |
+
"loss": 3.4436,
|
12944 |
+
"step": 1833
|
12945 |
+
},
|
12946 |
+
{
|
12947 |
+
"epoch": 0.3797887761441292,
|
12948 |
+
"grad_norm": 8.763874053955078,
|
12949 |
+
"learning_rate": 9.987933439703279e-07,
|
12950 |
+
"loss": 3.2206,
|
12951 |
+
"step": 1834
|
12952 |
+
},
|
12953 |
+
{
|
12954 |
+
"epoch": 0.3799958583557672,
|
12955 |
+
"grad_norm": 6.694812297821045,
|
12956 |
+
"learning_rate": 9.75738265280679e-07,
|
12957 |
+
"loss": 3.8137,
|
12958 |
+
"step": 1835
|
12959 |
+
},
|
12960 |
+
{
|
12961 |
+
"epoch": 0.3802029405674053,
|
12962 |
+
"grad_norm": 8.24753475189209,
|
12963 |
+
"learning_rate": 9.529510877091752e-07,
|
12964 |
+
"loss": 2.9927,
|
12965 |
+
"step": 1836
|
12966 |
+
},
|
12967 |
+
{
|
12968 |
+
"epoch": 0.3804100227790433,
|
12969 |
+
"grad_norm": 7.92348575592041,
|
12970 |
+
"learning_rate": 9.304318729044359e-07,
|
12971 |
+
"loss": 3.4538,
|
12972 |
+
"step": 1837
|
12973 |
+
},
|
12974 |
+
{
|
12975 |
+
"epoch": 0.3806171049906813,
|
12976 |
+
"grad_norm": 8.349137306213379,
|
12977 |
+
"learning_rate": 9.081806817901606e-07,
|
12978 |
+
"loss": 3.8553,
|
12979 |
+
"step": 1838
|
12980 |
+
},
|
12981 |
+
{
|
12982 |
+
"epoch": 0.3808241872023193,
|
12983 |
+
"grad_norm": 8.452718734741211,
|
12984 |
+
"learning_rate": 8.861975745649176e-07,
|
12985 |
+
"loss": 3.6945,
|
12986 |
+
"step": 1839
|
12987 |
+
},
|
12988 |
+
{
|
12989 |
+
"epoch": 0.3810312694139573,
|
12990 |
+
"grad_norm": 7.854761600494385,
|
12991 |
+
"learning_rate": 8.644826107019888e-07,
|
12992 |
+
"loss": 5.3701,
|
12993 |
+
"step": 1840
|
12994 |
+
},
|
12995 |
+
{
|
12996 |
+
"epoch": 0.3812383516255954,
|
12997 |
+
"grad_norm": 6.596001148223877,
|
12998 |
+
"learning_rate": 8.430358489492474e-07,
|
12999 |
+
"loss": 4.0305,
|
13000 |
+
"step": 1841
|
13001 |
+
},
|
13002 |
+
{
|
13003 |
+
"epoch": 0.3814454338372334,
|
13004 |
+
"grad_norm": 8.507204055786133,
|
13005 |
+
"learning_rate": 8.218573473289359e-07,
|
13006 |
+
"loss": 4.3795,
|
13007 |
+
"step": 1842
|
13008 |
+
},
|
13009 |
+
{
|
13010 |
+
"epoch": 0.3816525160488714,
|
13011 |
+
"grad_norm": 7.512143135070801,
|
13012 |
+
"learning_rate": 8.009471631375775e-07,
|
13013 |
+
"loss": 3.3766,
|
13014 |
+
"step": 1843
|
13015 |
+
},
|
13016 |
+
{
|
13017 |
+
"epoch": 0.3818595982605094,
|
13018 |
+
"grad_norm": 7.501784801483154,
|
13019 |
+
"learning_rate": 7.803053529457538e-07,
|
13020 |
+
"loss": 3.542,
|
13021 |
+
"step": 1844
|
13022 |
+
},
|
13023 |
+
{
|
13024 |
+
"epoch": 0.3820666804721474,
|
13025 |
+
"grad_norm": 7.17270040512085,
|
13026 |
+
"learning_rate": 7.599319725980048e-07,
|
13027 |
+
"loss": 4.8348,
|
13028 |
+
"step": 1845
|
13029 |
+
},
|
13030 |
+
{
|
13031 |
+
"epoch": 0.3822737626837855,
|
13032 |
+
"grad_norm": 8.266033172607422,
|
13033 |
+
"learning_rate": 7.398270772126736e-07,
|
13034 |
+
"loss": 3.6901,
|
13035 |
+
"step": 1846
|
13036 |
+
},
|
13037 |
+
{
|
13038 |
+
"epoch": 0.3824808448954235,
|
13039 |
+
"grad_norm": 7.612104415893555,
|
13040 |
+
"learning_rate": 7.19990721181707e-07,
|
13041 |
+
"loss": 3.8009,
|
13042 |
+
"step": 1847
|
13043 |
+
},
|
13044 |
+
{
|
13045 |
+
"epoch": 0.3826879271070615,
|
13046 |
+
"grad_norm": 8.199748992919922,
|
13047 |
+
"learning_rate": 7.004229581705546e-07,
|
13048 |
+
"loss": 4.2672,
|
13049 |
+
"step": 1848
|
13050 |
+
},
|
13051 |
+
{
|
13052 |
+
"epoch": 0.3828950093186995,
|
13053 |
+
"grad_norm": 9.074591636657715,
|
13054 |
+
"learning_rate": 6.811238411180365e-07,
|
13055 |
+
"loss": 5.1188,
|
13056 |
+
"step": 1849
|
13057 |
+
},
|
13058 |
+
{
|
13059 |
+
"epoch": 0.38310209153033753,
|
13060 |
+
"grad_norm": 6.480962753295898,
|
13061 |
+
"learning_rate": 6.620934222361319e-07,
|
13062 |
+
"loss": 3.6043,
|
13063 |
+
"step": 1850
|
13064 |
+
},
|
13065 |
+
{
|
13066 |
+
"epoch": 0.38330917374197554,
|
13067 |
+
"grad_norm": 7.479844093322754,
|
13068 |
+
"learning_rate": 6.433317530099237e-07,
|
13069 |
+
"loss": 4.7015,
|
13070 |
+
"step": 1851
|
13071 |
+
},
|
13072 |
+
{
|
13073 |
+
"epoch": 0.3835162559536136,
|
13074 |
+
"grad_norm": 6.926044464111328,
|
13075 |
+
"learning_rate": 6.248388841973762e-07,
|
13076 |
+
"loss": 3.9299,
|
13077 |
+
"step": 1852
|
13078 |
+
},
|
13079 |
+
{
|
13080 |
+
"epoch": 0.3837233381652516,
|
13081 |
+
"grad_norm": 8.056866645812988,
|
13082 |
+
"learning_rate": 6.066148658292914e-07,
|
13083 |
+
"loss": 4.4821,
|
13084 |
+
"step": 1853
|
13085 |
+
},
|
13086 |
+
{
|
13087 |
+
"epoch": 0.3839304203768896,
|
13088 |
+
"grad_norm": 8.139278411865234,
|
13089 |
+
"learning_rate": 5.886597472090638e-07,
|
13090 |
+
"loss": 4.6499,
|
13091 |
+
"step": 1854
|
13092 |
+
},
|
13093 |
+
{
|
13094 |
+
"epoch": 0.38413750258852764,
|
13095 |
+
"grad_norm": 12.200084686279297,
|
13096 |
+
"learning_rate": 5.709735769126478e-07,
|
13097 |
+
"loss": 4.5629,
|
13098 |
+
"step": 1855
|
13099 |
+
},
|
13100 |
+
{
|
13101 |
+
"epoch": 0.38434458480016565,
|
13102 |
+
"grad_norm": 11.336113929748535,
|
13103 |
+
"learning_rate": 5.535564027883577e-07,
|
13104 |
+
"loss": 4.3053,
|
13105 |
+
"step": 1856
|
13106 |
+
},
|
13107 |
+
{
|
13108 |
+
"epoch": 0.3845516670118037,
|
13109 |
+
"grad_norm": 7.85932731628418,
|
13110 |
+
"learning_rate": 5.364082719567787e-07,
|
13111 |
+
"loss": 4.3562,
|
13112 |
+
"step": 1857
|
13113 |
+
},
|
13114 |
+
{
|
13115 |
+
"epoch": 0.3847587492234417,
|
13116 |
+
"grad_norm": 7.8194427490234375,
|
13117 |
+
"learning_rate": 5.195292308106115e-07,
|
13118 |
+
"loss": 4.519,
|
13119 |
+
"step": 1858
|
13120 |
+
},
|
13121 |
+
{
|
13122 |
+
"epoch": 0.38496583143507973,
|
13123 |
+
"grad_norm": 9.013519287109375,
|
13124 |
+
"learning_rate": 5.029193250145503e-07,
|
13125 |
+
"loss": 3.9506,
|
13126 |
+
"step": 1859
|
13127 |
+
},
|
13128 |
+
{
|
13129 |
+
"epoch": 0.38517291364671774,
|
13130 |
+
"grad_norm": 8.887256622314453,
|
13131 |
+
"learning_rate": 4.865785995052052e-07,
|
13132 |
+
"loss": 4.1928,
|
13133 |
+
"step": 1860
|
13134 |
+
},
|
13135 |
+
{
|
13136 |
+
"epoch": 0.38537999585835575,
|
13137 |
+
"grad_norm": 9.979034423828125,
|
13138 |
+
"learning_rate": 4.7050709849091325e-07,
|
13139 |
+
"loss": 4.1995,
|
13140 |
+
"step": 1861
|
13141 |
+
},
|
13142 |
+
{
|
13143 |
+
"epoch": 0.38558707806999376,
|
13144 |
+
"grad_norm": 8.198508262634277,
|
13145 |
+
"learning_rate": 4.547048654516495e-07,
|
13146 |
+
"loss": 4.5163,
|
13147 |
+
"step": 1862
|
13148 |
+
},
|
13149 |
+
{
|
13150 |
+
"epoch": 0.3857941602816318,
|
13151 |
+
"grad_norm": 7.107065677642822,
|
13152 |
+
"learning_rate": 4.391719431389163e-07,
|
13153 |
+
"loss": 4.0739,
|
13154 |
+
"step": 1863
|
13155 |
+
},
|
13156 |
+
{
|
13157 |
+
"epoch": 0.38600124249326984,
|
13158 |
+
"grad_norm": 7.956059455871582,
|
13159 |
+
"learning_rate": 4.2390837357563217e-07,
|
13160 |
+
"loss": 3.6853,
|
13161 |
+
"step": 1864
|
13162 |
+
},
|
13163 |
+
{
|
13164 |
+
"epoch": 0.38620832470490785,
|
13165 |
+
"grad_norm": 7.028172969818115,
|
13166 |
+
"learning_rate": 4.089141980559763e-07,
|
13167 |
+
"loss": 4.2296,
|
13168 |
+
"step": 1865
|
13169 |
+
},
|
13170 |
+
{
|
13171 |
+
"epoch": 0.38641540691654586,
|
13172 |
+
"grad_norm": 8.081233978271484,
|
13173 |
+
"learning_rate": 3.941894571453442e-07,
|
13174 |
+
"loss": 3.9931,
|
13175 |
+
"step": 1866
|
13176 |
+
},
|
13177 |
+
{
|
13178 |
+
"epoch": 0.38662248912818387,
|
13179 |
+
"grad_norm": 7.969577789306641,
|
13180 |
+
"learning_rate": 3.797341906801588e-07,
|
13181 |
+
"loss": 3.5845,
|
13182 |
+
"step": 1867
|
13183 |
+
},
|
13184 |
+
{
|
13185 |
+
"epoch": 0.38682957133982193,
|
13186 |
+
"grad_norm": 9.258206367492676,
|
13187 |
+
"learning_rate": 3.655484377678375e-07,
|
13188 |
+
"loss": 3.5585,
|
13189 |
+
"step": 1868
|
13190 |
+
},
|
13191 |
+
{
|
13192 |
+
"epoch": 0.38703665355145994,
|
13193 |
+
"grad_norm": 8.422988891601562,
|
13194 |
+
"learning_rate": 3.516322367866476e-07,
|
13195 |
+
"loss": 3.89,
|
13196 |
+
"step": 1869
|
13197 |
+
},
|
13198 |
+
{
|
13199 |
+
"epoch": 0.38724373576309795,
|
13200 |
+
"grad_norm": 7.004971981048584,
|
13201 |
+
"learning_rate": 3.379856253855951e-07,
|
13202 |
+
"loss": 3.4266,
|
13203 |
+
"step": 1870
|
13204 |
+
},
|
13205 |
+
{
|
13206 |
+
"epoch": 0.38745081797473596,
|
13207 |
+
"grad_norm": 9.541719436645508,
|
13208 |
+
"learning_rate": 3.2460864048434735e-07,
|
13209 |
+
"loss": 3.2493,
|
13210 |
+
"step": 1871
|
13211 |
+
},
|
13212 |
+
{
|
13213 |
+
"epoch": 0.38765790018637397,
|
13214 |
+
"grad_norm": 7.930725574493408,
|
13215 |
+
"learning_rate": 3.115013182731219e-07,
|
13216 |
+
"loss": 3.0596,
|
13217 |
+
"step": 1872
|
13218 |
+
},
|
13219 |
+
{
|
13220 |
+
"epoch": 0.38786498239801204,
|
13221 |
+
"grad_norm": 10.04166316986084,
|
13222 |
+
"learning_rate": 2.986636942125753e-07,
|
13223 |
+
"loss": 3.9377,
|
13224 |
+
"step": 1873
|
13225 |
+
},
|
13226 |
+
{
|
13227 |
+
"epoch": 0.38807206460965005,
|
13228 |
+
"grad_norm": 8.160487174987793,
|
13229 |
+
"learning_rate": 2.860958030337368e-07,
|
13230 |
+
"loss": 3.7294,
|
13231 |
+
"step": 1874
|
13232 |
+
},
|
13233 |
+
{
|
13234 |
+
"epoch": 0.38827914682128806,
|
13235 |
+
"grad_norm": 8.443591117858887,
|
13236 |
+
"learning_rate": 2.73797678737886e-07,
|
13237 |
+
"loss": 4.0411,
|
13238 |
+
"step": 1875
|
13239 |
+
},
|
13240 |
+
{
|
13241 |
+
"epoch": 0.38848622903292607,
|
13242 |
+
"grad_norm": 7.043831825256348,
|
13243 |
+
"learning_rate": 2.61769354596475e-07,
|
13244 |
+
"loss": 5.0244,
|
13245 |
+
"step": 1876
|
13246 |
+
},
|
13247 |
+
{
|
13248 |
+
"epoch": 0.3886933112445641,
|
13249 |
+
"grad_norm": 7.078221797943115,
|
13250 |
+
"learning_rate": 2.500108631510512e-07,
|
13251 |
+
"loss": 3.8743,
|
13252 |
+
"step": 1877
|
13253 |
+
},
|
13254 |
+
{
|
13255 |
+
"epoch": 0.3889003934562021,
|
13256 |
+
"grad_norm": 7.39813756942749,
|
13257 |
+
"learning_rate": 2.385222362131345e-07,
|
13258 |
+
"loss": 2.9238,
|
13259 |
+
"step": 1878
|
13260 |
+
},
|
13261 |
+
{
|
13262 |
+
"epoch": 0.38910747566784015,
|
13263 |
+
"grad_norm": 8.55196475982666,
|
13264 |
+
"learning_rate": 2.2730350486416253e-07,
|
13265 |
+
"loss": 4.0762,
|
13266 |
+
"step": 1879
|
13267 |
+
},
|
13268 |
+
{
|
13269 |
+
"epoch": 0.38931455787947816,
|
13270 |
+
"grad_norm": 7.475944519042969,
|
13271 |
+
"learning_rate": 2.163546994553789e-07,
|
13272 |
+
"loss": 4.8681,
|
13273 |
+
"step": 1880
|
13274 |
+
},
|
13275 |
+
{
|
13276 |
+
"epoch": 0.3895216400911162,
|
13277 |
+
"grad_norm": 19.760482788085938,
|
13278 |
+
"learning_rate": 2.056758496077893e-07,
|
13279 |
+
"loss": 4.3132,
|
13280 |
+
"step": 1881
|
13281 |
+
},
|
13282 |
+
{
|
13283 |
+
"epoch": 0.3897287223027542,
|
13284 |
+
"grad_norm": 7.659260272979736,
|
13285 |
+
"learning_rate": 1.9526698421206136e-07,
|
13286 |
+
"loss": 3.8448,
|
13287 |
+
"step": 1882
|
13288 |
+
},
|
13289 |
+
{
|
13290 |
+
"epoch": 0.3899358045143922,
|
13291 |
+
"grad_norm": 11.384870529174805,
|
13292 |
+
"learning_rate": 1.8512813142840257e-07,
|
13293 |
+
"loss": 4.5382,
|
13294 |
+
"step": 1883
|
13295 |
+
},
|
13296 |
+
{
|
13297 |
+
"epoch": 0.39014288672603026,
|
13298 |
+
"grad_norm": 7.478827953338623,
|
13299 |
+
"learning_rate": 1.752593186865714e-07,
|
13300 |
+
"loss": 3.6739,
|
13301 |
+
"step": 1884
|
13302 |
+
},
|
13303 |
+
{
|
13304 |
+
"epoch": 0.39034996893766827,
|
13305 |
+
"grad_norm": 7.702869892120361,
|
13306 |
+
"learning_rate": 1.6566057268574408e-07,
|
13307 |
+
"loss": 4.1893,
|
13308 |
+
"step": 1885
|
13309 |
+
},
|
13310 |
+
{
|
13311 |
+
"epoch": 0.3905570511493063,
|
13312 |
+
"grad_norm": 7.367177963256836,
|
13313 |
+
"learning_rate": 1.5633191939444791e-07,
|
13314 |
+
"loss": 3.7774,
|
13315 |
+
"step": 1886
|
13316 |
+
},
|
13317 |
+
{
|
13318 |
+
"epoch": 0.3907641333609443,
|
13319 |
+
"grad_norm": 24.913116455078125,
|
13320 |
+
"learning_rate": 1.4727338405051693e-07,
|
13321 |
+
"loss": 3.9881,
|
13322 |
+
"step": 1887
|
13323 |
+
},
|
13324 |
+
{
|
13325 |
+
"epoch": 0.3909712155725823,
|
13326 |
+
"grad_norm": 7.524211883544922,
|
13327 |
+
"learning_rate": 1.384849911609809e-07,
|
13328 |
+
"loss": 4.513,
|
13329 |
+
"step": 1888
|
13330 |
+
},
|
13331 |
+
{
|
13332 |
+
"epoch": 0.3911782977842203,
|
13333 |
+
"grad_norm": 8.035744667053223,
|
13334 |
+
"learning_rate": 1.2996676450203193e-07,
|
13335 |
+
"loss": 3.6691,
|
13336 |
+
"step": 1889
|
13337 |
+
},
|
13338 |
+
{
|
13339 |
+
"epoch": 0.3913853799958584,
|
13340 |
+
"grad_norm": 8.68524169921875,
|
13341 |
+
"learning_rate": 1.2171872711895794e-07,
|
13342 |
+
"loss": 4.1598,
|
13343 |
+
"step": 1890
|
13344 |
+
},
|
13345 |
+
{
|
13346 |
+
"epoch": 0.3915924622074964,
|
13347 |
+
"grad_norm": 8.163741111755371,
|
13348 |
+
"learning_rate": 1.1374090132608706e-07,
|
13349 |
+
"loss": 3.3935,
|
13350 |
+
"step": 1891
|
13351 |
+
},
|
13352 |
+
{
|
13353 |
+
"epoch": 0.3917995444191344,
|
13354 |
+
"grad_norm": 8.107989311218262,
|
13355 |
+
"learning_rate": 1.0603330870668782e-07,
|
13356 |
+
"loss": 3.6385,
|
13357 |
+
"step": 1892
|
13358 |
+
},
|
13359 |
+
{
|
13360 |
+
"epoch": 0.3920066266307724,
|
13361 |
+
"grad_norm": 6.142742156982422,
|
13362 |
+
"learning_rate": 9.859597011294686e-08,
|
13363 |
+
"loss": 3.6153,
|
13364 |
+
"step": 1893
|
13365 |
+
},
|
13366 |
+
{
|
13367 |
+
"epoch": 0.3922137088424104,
|
13368 |
+
"grad_norm": 9.46705150604248,
|
13369 |
+
"learning_rate": 9.142890566591344e-08,
|
13370 |
+
"loss": 4.1681,
|
13371 |
+
"step": 1894
|
13372 |
+
},
|
13373 |
+
{
|
13374 |
+
"epoch": 0.3924207910540485,
|
13375 |
+
"grad_norm": 8.04094409942627,
|
13376 |
+
"learning_rate": 8.453213475543287e-08,
|
13377 |
+
"loss": 4.5136,
|
13378 |
+
"step": 1895
|
13379 |
+
},
|
13380 |
+
{
|
13381 |
+
"epoch": 0.3926278732656865,
|
13382 |
+
"grad_norm": 9.353519439697266,
|
13383 |
+
"learning_rate": 7.790567604007981e-08,
|
13384 |
+
"loss": 5.6024,
|
13385 |
+
"step": 1896
|
13386 |
+
},
|
13387 |
+
{
|
13388 |
+
"epoch": 0.3928349554773245,
|
13389 |
+
"grad_norm": 8.01060676574707,
|
13390 |
+
"learning_rate": 7.154954744713616e-08,
|
13391 |
+
"loss": 4.1784,
|
13392 |
+
"step": 1897
|
13393 |
+
},
|
13394 |
+
{
|
13395 |
+
"epoch": 0.3930420376889625,
|
13396 |
+
"grad_norm": 7.64965295791626,
|
13397 |
+
"learning_rate": 6.546376617252437e-08,
|
13398 |
+
"loss": 4.1038,
|
13399 |
+
"step": 1898
|
13400 |
+
},
|
13401 |
+
{
|
13402 |
+
"epoch": 0.3932491199006005,
|
13403 |
+
"grad_norm": 9.069695472717285,
|
13404 |
+
"learning_rate": 5.964834868078529e-08,
|
13405 |
+
"loss": 3.8891,
|
13406 |
+
"step": 1899
|
13407 |
+
},
|
13408 |
+
{
|
13409 |
+
"epoch": 0.3934562021122386,
|
13410 |
+
"grad_norm": 7.1893110275268555,
|
13411 |
+
"learning_rate": 5.4103310704989305e-08,
|
13412 |
+
"loss": 4.2647,
|
13413 |
+
"step": 1900
|
13414 |
+
},
|
13415 |
+
{
|
13416 |
+
"epoch": 0.3936632843238766,
|
13417 |
+
"grad_norm": 7.455760478973389,
|
13418 |
+
"learning_rate": 4.8828667246736406e-08,
|
13419 |
+
"loss": 4.6849,
|
13420 |
+
"step": 1901
|
13421 |
+
},
|
13422 |
+
{
|
13423 |
+
"epoch": 0.3938703665355146,
|
13424 |
+
"grad_norm": 8.376263618469238,
|
13425 |
+
"learning_rate": 4.382443257610058e-08,
|
13426 |
+
"loss": 3.1997,
|
13427 |
+
"step": 1902
|
13428 |
+
},
|
13429 |
+
{
|
13430 |
+
"epoch": 0.3940774487471526,
|
13431 |
+
"grad_norm": 10.802031517028809,
|
13432 |
+
"learning_rate": 3.909062023156329e-08,
|
13433 |
+
"loss": 3.5163,
|
13434 |
+
"step": 1903
|
13435 |
+
},
|
13436 |
+
{
|
13437 |
+
"epoch": 0.3942845309587906,
|
13438 |
+
"grad_norm": 7.496048450469971,
|
13439 |
+
"learning_rate": 3.4627243020035616e-08,
|
13440 |
+
"loss": 3.9053,
|
13441 |
+
"step": 1904
|
13442 |
+
},
|
13443 |
+
{
|
13444 |
+
"epoch": 0.39449161317042863,
|
13445 |
+
"grad_norm": 8.064364433288574,
|
13446 |
+
"learning_rate": 3.0434313016780567e-08,
|
13447 |
+
"loss": 3.7754,
|
13448 |
+
"step": 1905
|
13449 |
+
},
|
13450 |
+
{
|
13451 |
+
"epoch": 0.3946986953820667,
|
13452 |
+
"grad_norm": 7.672548294067383,
|
13453 |
+
"learning_rate": 2.651184156539088e-08,
|
13454 |
+
"loss": 3.9151,
|
13455 |
+
"step": 1906
|
13456 |
+
},
|
13457 |
+
{
|
13458 |
+
"epoch": 0.3949057775937047,
|
13459 |
+
"grad_norm": 6.153156280517578,
|
13460 |
+
"learning_rate": 2.2859839277733497e-08,
|
13461 |
+
"loss": 3.0665,
|
13462 |
+
"step": 1907
|
13463 |
+
},
|
13464 |
+
{
|
13465 |
+
"epoch": 0.3951128598053427,
|
13466 |
+
"grad_norm": 7.630539417266846,
|
13467 |
+
"learning_rate": 1.9478316033993972e-08,
|
13468 |
+
"loss": 3.1406,
|
13469 |
+
"step": 1908
|
13470 |
+
},
|
13471 |
+
{
|
13472 |
+
"epoch": 0.39531994201698073,
|
13473 |
+
"grad_norm": 8.899959564208984,
|
13474 |
+
"learning_rate": 1.636728098256546e-08,
|
13475 |
+
"loss": 3.58,
|
13476 |
+
"step": 1909
|
13477 |
+
},
|
13478 |
+
{
|
13479 |
+
"epoch": 0.39552702422861874,
|
13480 |
+
"grad_norm": 7.002161026000977,
|
13481 |
+
"learning_rate": 1.3526742540070913e-08,
|
13482 |
+
"loss": 4.3189,
|
13483 |
+
"step": 1910
|
13484 |
+
},
|
13485 |
+
{
|
13486 |
+
"epoch": 0.3957341064402568,
|
13487 |
+
"grad_norm": 8.042828559875488,
|
13488 |
+
"learning_rate": 1.0956708391318681e-08,
|
13489 |
+
"loss": 3.0454,
|
13490 |
+
"step": 1911
|
13491 |
+
},
|
13492 |
+
{
|
13493 |
+
"epoch": 0.3959411886518948,
|
13494 |
+
"grad_norm": 10.092788696289062,
|
13495 |
+
"learning_rate": 8.657185489313601e-09,
|
13496 |
+
"loss": 4.2819,
|
13497 |
+
"step": 1912
|
13498 |
+
},
|
13499 |
+
{
|
13500 |
+
"epoch": 0.3961482708635328,
|
13501 |
+
"grad_norm": 6.035614013671875,
|
13502 |
+
"learning_rate": 6.628180055201494e-09,
|
13503 |
+
"loss": 4.1054,
|
13504 |
+
"step": 1913
|
13505 |
+
},
|
13506 |
+
{
|
13507 |
+
"epoch": 0.39635535307517084,
|
13508 |
+
"grad_norm": 7.719038009643555,
|
13509 |
+
"learning_rate": 4.869697578269161e-09,
|
13510 |
+
"loss": 3.5056,
|
13511 |
+
"step": 1914
|
13512 |
+
},
|
13513 |
+
{
|
13514 |
+
"epoch": 0.39656243528680885,
|
13515 |
+
"grad_norm": 8.446083068847656,
|
13516 |
+
"learning_rate": 3.3817428159443886e-09,
|
13517 |
+
"loss": 3.7232,
|
13518 |
+
"step": 1915
|
13519 |
+
},
|
13520 |
+
{
|
13521 |
+
"epoch": 0.3967695174984469,
|
13522 |
+
"grad_norm": 8.734418869018555,
|
13523 |
+
"learning_rate": 2.16431979372933e-09,
|
13524 |
+
"loss": 3.8401,
|
13525 |
+
"step": 1916
|
13526 |
+
},
|
13527 |
+
{
|
13528 |
+
"epoch": 0.3969765997100849,
|
13529 |
+
"grad_norm": 8.412437438964844,
|
13530 |
+
"learning_rate": 1.217431805267122e-09,
|
13531 |
+
"loss": 3.4617,
|
13532 |
+
"step": 1917
|
13533 |
+
},
|
13534 |
+
{
|
13535 |
+
"epoch": 0.39718368192172293,
|
13536 |
+
"grad_norm": 7.3742594718933105,
|
13537 |
+
"learning_rate": 5.410814122752684e-10,
|
13538 |
+
"loss": 3.4868,
|
13539 |
+
"step": 1918
|
13540 |
+
},
|
13541 |
+
{
|
13542 |
+
"epoch": 0.39739076413336094,
|
13543 |
+
"grad_norm": 8.769962310791016,
|
13544 |
+
"learning_rate": 1.3527044455674543e-10,
|
13545 |
+
"loss": 4.4384,
|
13546 |
+
"step": 1919
|
13547 |
+
},
|
13548 |
+
{
|
13549 |
+
"epoch": 0.39759784634499895,
|
13550 |
+
"grad_norm": 9.391153335571289,
|
13551 |
+
"learning_rate": 0.0,
|
13552 |
+
"loss": 3.1379,
|
13553 |
+
"step": 1920
|
13554 |
}
|
13555 |
],
|
13556 |
"logging_steps": 1,
|
|
|
13574 |
"should_evaluate": false,
|
13575 |
"should_log": false,
|
13576 |
"should_save": true,
|
13577 |
+
"should_training_stop": true
|
13578 |
},
|
13579 |
"attributes": {}
|
13580 |
}
|
13581 |
},
|
13582 |
+
"total_flos": 2.2576652660559053e+18,
|
13583 |
"train_batch_size": 2,
|
13584 |
"trial_name": null,
|
13585 |
"trial_params": null
|