Training in progress, step 2200
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +1213 -5
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4736616809
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:844efd6c2009c5de04ac1262a4139a388ae75d1574b240173ede965955b6459a
|
3 |
size 4736616809
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2368281769
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce38e42231eaec2ee57adbd3a558f89d6a9d15cb466c85343dd8a5eee549c297
|
3 |
size 2368281769
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:501fe46ba428506dc04a6c17a5f3ac42a3aaaeb0ca785df8c20d998cfe0545c8
|
3 |
size 14575
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d32a4c0417862ee076b39c4cf863a820cfe35de7e68441945ff56dcac7983a7b
|
3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
-
"best_metric": 1.
|
3 |
-
"best_model_checkpoint": "output/checkpoint-
|
4 |
-
"epoch": 1.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -12086,11 +12086,1219 @@
|
|
12086 |
"eval_samples_per_second": 6.07,
|
12087 |
"eval_steps_per_second": 6.07,
|
12088 |
"step": 2000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12089 |
}
|
12090 |
],
|
12091 |
"max_steps": 4386,
|
12092 |
"num_train_epochs": 3,
|
12093 |
-
"total_flos": 2.
|
12094 |
"trial_name": null,
|
12095 |
"trial_params": null
|
12096 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 1.9827316999435425,
|
3 |
+
"best_model_checkpoint": "output/checkpoint-2200",
|
4 |
+
"epoch": 1.5041047745712288,
|
5 |
+
"global_step": 2200,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
12086 |
"eval_samples_per_second": 6.07,
|
12087 |
"eval_steps_per_second": 6.07,
|
12088 |
"step": 2000
|
12089 |
+
},
|
12090 |
+
{
|
12091 |
+
"epoch": 1.37,
|
12092 |
+
"learning_rate": 0.0003418537983755375,
|
12093 |
+
"loss": 2.0172,
|
12094 |
+
"step": 2001
|
12095 |
+
},
|
12096 |
+
{
|
12097 |
+
"epoch": 1.37,
|
12098 |
+
"learning_rate": 0.00034171046344959387,
|
12099 |
+
"loss": 2.0405,
|
12100 |
+
"step": 2002
|
12101 |
+
},
|
12102 |
+
{
|
12103 |
+
"epoch": 1.37,
|
12104 |
+
"learning_rate": 0.00034156712852365025,
|
12105 |
+
"loss": 1.9842,
|
12106 |
+
"step": 2003
|
12107 |
+
},
|
12108 |
+
{
|
12109 |
+
"epoch": 1.37,
|
12110 |
+
"learning_rate": 0.00034142379359770663,
|
12111 |
+
"loss": 2.0294,
|
12112 |
+
"step": 2004
|
12113 |
+
},
|
12114 |
+
{
|
12115 |
+
"epoch": 1.37,
|
12116 |
+
"learning_rate": 0.000341280458671763,
|
12117 |
+
"loss": 1.9872,
|
12118 |
+
"step": 2005
|
12119 |
+
},
|
12120 |
+
{
|
12121 |
+
"epoch": 1.37,
|
12122 |
+
"learning_rate": 0.00034113712374581935,
|
12123 |
+
"loss": 1.9517,
|
12124 |
+
"step": 2006
|
12125 |
+
},
|
12126 |
+
{
|
12127 |
+
"epoch": 1.37,
|
12128 |
+
"learning_rate": 0.00034099378881987573,
|
12129 |
+
"loss": 2.0903,
|
12130 |
+
"step": 2007
|
12131 |
+
},
|
12132 |
+
{
|
12133 |
+
"epoch": 1.37,
|
12134 |
+
"learning_rate": 0.0003408504538939321,
|
12135 |
+
"loss": 2.0047,
|
12136 |
+
"step": 2008
|
12137 |
+
},
|
12138 |
+
{
|
12139 |
+
"epoch": 1.37,
|
12140 |
+
"learning_rate": 0.0003407071189679885,
|
12141 |
+
"loss": 1.9116,
|
12142 |
+
"step": 2009
|
12143 |
+
},
|
12144 |
+
{
|
12145 |
+
"epoch": 1.37,
|
12146 |
+
"learning_rate": 0.0003405637840420449,
|
12147 |
+
"loss": 1.9933,
|
12148 |
+
"step": 2010
|
12149 |
+
},
|
12150 |
+
{
|
12151 |
+
"epoch": 1.37,
|
12152 |
+
"learning_rate": 0.00034042044911610127,
|
12153 |
+
"loss": 2.0691,
|
12154 |
+
"step": 2011
|
12155 |
+
},
|
12156 |
+
{
|
12157 |
+
"epoch": 1.38,
|
12158 |
+
"learning_rate": 0.00034027711419015765,
|
12159 |
+
"loss": 2.0249,
|
12160 |
+
"step": 2012
|
12161 |
+
},
|
12162 |
+
{
|
12163 |
+
"epoch": 1.38,
|
12164 |
+
"learning_rate": 0.000340133779264214,
|
12165 |
+
"loss": 1.9914,
|
12166 |
+
"step": 2013
|
12167 |
+
},
|
12168 |
+
{
|
12169 |
+
"epoch": 1.38,
|
12170 |
+
"learning_rate": 0.0003399904443382704,
|
12171 |
+
"loss": 2.0592,
|
12172 |
+
"step": 2014
|
12173 |
+
},
|
12174 |
+
{
|
12175 |
+
"epoch": 1.38,
|
12176 |
+
"learning_rate": 0.00033984710941232675,
|
12177 |
+
"loss": 2.0658,
|
12178 |
+
"step": 2015
|
12179 |
+
},
|
12180 |
+
{
|
12181 |
+
"epoch": 1.38,
|
12182 |
+
"learning_rate": 0.0003397037744863832,
|
12183 |
+
"loss": 1.9771,
|
12184 |
+
"step": 2016
|
12185 |
+
},
|
12186 |
+
{
|
12187 |
+
"epoch": 1.38,
|
12188 |
+
"learning_rate": 0.0003395604395604395,
|
12189 |
+
"loss": 1.9868,
|
12190 |
+
"step": 2017
|
12191 |
+
},
|
12192 |
+
{
|
12193 |
+
"epoch": 1.38,
|
12194 |
+
"learning_rate": 0.00033941710463449595,
|
12195 |
+
"loss": 1.943,
|
12196 |
+
"step": 2018
|
12197 |
+
},
|
12198 |
+
{
|
12199 |
+
"epoch": 1.38,
|
12200 |
+
"learning_rate": 0.0003392737697085523,
|
12201 |
+
"loss": 2.0233,
|
12202 |
+
"step": 2019
|
12203 |
+
},
|
12204 |
+
{
|
12205 |
+
"epoch": 1.38,
|
12206 |
+
"learning_rate": 0.00033913043478260867,
|
12207 |
+
"loss": 1.9217,
|
12208 |
+
"step": 2020
|
12209 |
+
},
|
12210 |
+
{
|
12211 |
+
"epoch": 1.38,
|
12212 |
+
"learning_rate": 0.00033898709985666505,
|
12213 |
+
"loss": 2.05,
|
12214 |
+
"step": 2021
|
12215 |
+
},
|
12216 |
+
{
|
12217 |
+
"epoch": 1.38,
|
12218 |
+
"learning_rate": 0.00033884376493072143,
|
12219 |
+
"loss": 1.9538,
|
12220 |
+
"step": 2022
|
12221 |
+
},
|
12222 |
+
{
|
12223 |
+
"epoch": 1.38,
|
12224 |
+
"learning_rate": 0.0003387004300047778,
|
12225 |
+
"loss": 1.9666,
|
12226 |
+
"step": 2023
|
12227 |
+
},
|
12228 |
+
{
|
12229 |
+
"epoch": 1.38,
|
12230 |
+
"learning_rate": 0.00033855709507883415,
|
12231 |
+
"loss": 2.0349,
|
12232 |
+
"step": 2024
|
12233 |
+
},
|
12234 |
+
{
|
12235 |
+
"epoch": 1.38,
|
12236 |
+
"learning_rate": 0.0003384137601528906,
|
12237 |
+
"loss": 2.0789,
|
12238 |
+
"step": 2025
|
12239 |
+
},
|
12240 |
+
{
|
12241 |
+
"epoch": 1.39,
|
12242 |
+
"learning_rate": 0.0003382704252269469,
|
12243 |
+
"loss": 2.0193,
|
12244 |
+
"step": 2026
|
12245 |
+
},
|
12246 |
+
{
|
12247 |
+
"epoch": 1.39,
|
12248 |
+
"learning_rate": 0.0003381270903010033,
|
12249 |
+
"loss": 2.0933,
|
12250 |
+
"step": 2027
|
12251 |
+
},
|
12252 |
+
{
|
12253 |
+
"epoch": 1.39,
|
12254 |
+
"learning_rate": 0.0003379837553750597,
|
12255 |
+
"loss": 2.0623,
|
12256 |
+
"step": 2028
|
12257 |
+
},
|
12258 |
+
{
|
12259 |
+
"epoch": 1.39,
|
12260 |
+
"learning_rate": 0.00033784042044911607,
|
12261 |
+
"loss": 2.044,
|
12262 |
+
"step": 2029
|
12263 |
+
},
|
12264 |
+
{
|
12265 |
+
"epoch": 1.39,
|
12266 |
+
"learning_rate": 0.00033769708552317245,
|
12267 |
+
"loss": 2.0534,
|
12268 |
+
"step": 2030
|
12269 |
+
},
|
12270 |
+
{
|
12271 |
+
"epoch": 1.39,
|
12272 |
+
"learning_rate": 0.00033755375059722883,
|
12273 |
+
"loss": 1.9295,
|
12274 |
+
"step": 2031
|
12275 |
+
},
|
12276 |
+
{
|
12277 |
+
"epoch": 1.39,
|
12278 |
+
"learning_rate": 0.0003374104156712852,
|
12279 |
+
"loss": 2.141,
|
12280 |
+
"step": 2032
|
12281 |
+
},
|
12282 |
+
{
|
12283 |
+
"epoch": 1.39,
|
12284 |
+
"learning_rate": 0.0003372670807453416,
|
12285 |
+
"loss": 2.0085,
|
12286 |
+
"step": 2033
|
12287 |
+
},
|
12288 |
+
{
|
12289 |
+
"epoch": 1.39,
|
12290 |
+
"learning_rate": 0.00033712374581939793,
|
12291 |
+
"loss": 2.0188,
|
12292 |
+
"step": 2034
|
12293 |
+
},
|
12294 |
+
{
|
12295 |
+
"epoch": 1.39,
|
12296 |
+
"learning_rate": 0.00033698041089345437,
|
12297 |
+
"loss": 2.0537,
|
12298 |
+
"step": 2035
|
12299 |
+
},
|
12300 |
+
{
|
12301 |
+
"epoch": 1.39,
|
12302 |
+
"learning_rate": 0.0003368370759675107,
|
12303 |
+
"loss": 1.9598,
|
12304 |
+
"step": 2036
|
12305 |
+
},
|
12306 |
+
{
|
12307 |
+
"epoch": 1.39,
|
12308 |
+
"learning_rate": 0.00033669374104156714,
|
12309 |
+
"loss": 2.004,
|
12310 |
+
"step": 2037
|
12311 |
+
},
|
12312 |
+
{
|
12313 |
+
"epoch": 1.39,
|
12314 |
+
"learning_rate": 0.00033655040611562347,
|
12315 |
+
"loss": 2.0024,
|
12316 |
+
"step": 2038
|
12317 |
+
},
|
12318 |
+
{
|
12319 |
+
"epoch": 1.39,
|
12320 |
+
"learning_rate": 0.0003364070711896799,
|
12321 |
+
"loss": 2.0414,
|
12322 |
+
"step": 2039
|
12323 |
+
},
|
12324 |
+
{
|
12325 |
+
"epoch": 1.39,
|
12326 |
+
"learning_rate": 0.00033626373626373623,
|
12327 |
+
"loss": 2.0245,
|
12328 |
+
"step": 2040
|
12329 |
+
},
|
12330 |
+
{
|
12331 |
+
"epoch": 1.4,
|
12332 |
+
"learning_rate": 0.00033612040133779256,
|
12333 |
+
"loss": 1.9819,
|
12334 |
+
"step": 2041
|
12335 |
+
},
|
12336 |
+
{
|
12337 |
+
"epoch": 1.4,
|
12338 |
+
"learning_rate": 0.000335977066411849,
|
12339 |
+
"loss": 2.0282,
|
12340 |
+
"step": 2042
|
12341 |
+
},
|
12342 |
+
{
|
12343 |
+
"epoch": 1.4,
|
12344 |
+
"learning_rate": 0.00033583373148590533,
|
12345 |
+
"loss": 1.9962,
|
12346 |
+
"step": 2043
|
12347 |
+
},
|
12348 |
+
{
|
12349 |
+
"epoch": 1.4,
|
12350 |
+
"learning_rate": 0.00033569039655996177,
|
12351 |
+
"loss": 1.9422,
|
12352 |
+
"step": 2044
|
12353 |
+
},
|
12354 |
+
{
|
12355 |
+
"epoch": 1.4,
|
12356 |
+
"learning_rate": 0.0003355470616340181,
|
12357 |
+
"loss": 2.0411,
|
12358 |
+
"step": 2045
|
12359 |
+
},
|
12360 |
+
{
|
12361 |
+
"epoch": 1.4,
|
12362 |
+
"learning_rate": 0.0003354037267080745,
|
12363 |
+
"loss": 1.9571,
|
12364 |
+
"step": 2046
|
12365 |
+
},
|
12366 |
+
{
|
12367 |
+
"epoch": 1.4,
|
12368 |
+
"learning_rate": 0.00033526039178213087,
|
12369 |
+
"loss": 1.9993,
|
12370 |
+
"step": 2047
|
12371 |
+
},
|
12372 |
+
{
|
12373 |
+
"epoch": 1.4,
|
12374 |
+
"learning_rate": 0.00033511705685618725,
|
12375 |
+
"loss": 2.0761,
|
12376 |
+
"step": 2048
|
12377 |
+
},
|
12378 |
+
{
|
12379 |
+
"epoch": 1.4,
|
12380 |
+
"learning_rate": 0.00033497372193024363,
|
12381 |
+
"loss": 2.0052,
|
12382 |
+
"step": 2049
|
12383 |
+
},
|
12384 |
+
{
|
12385 |
+
"epoch": 1.4,
|
12386 |
+
"learning_rate": 0.0003348303870043,
|
12387 |
+
"loss": 2.0984,
|
12388 |
+
"step": 2050
|
12389 |
+
},
|
12390 |
+
{
|
12391 |
+
"epoch": 1.4,
|
12392 |
+
"learning_rate": 0.0003346870520783564,
|
12393 |
+
"loss": 2.0299,
|
12394 |
+
"step": 2051
|
12395 |
+
},
|
12396 |
+
{
|
12397 |
+
"epoch": 1.4,
|
12398 |
+
"learning_rate": 0.0003345437171524128,
|
12399 |
+
"loss": 1.9776,
|
12400 |
+
"step": 2052
|
12401 |
+
},
|
12402 |
+
{
|
12403 |
+
"epoch": 1.4,
|
12404 |
+
"learning_rate": 0.0003344003822264691,
|
12405 |
+
"loss": 2.0061,
|
12406 |
+
"step": 2053
|
12407 |
+
},
|
12408 |
+
{
|
12409 |
+
"epoch": 1.4,
|
12410 |
+
"learning_rate": 0.00033425704730052555,
|
12411 |
+
"loss": 2.0546,
|
12412 |
+
"step": 2054
|
12413 |
+
},
|
12414 |
+
{
|
12415 |
+
"epoch": 1.4,
|
12416 |
+
"learning_rate": 0.0003341137123745819,
|
12417 |
+
"loss": 2.0127,
|
12418 |
+
"step": 2055
|
12419 |
+
},
|
12420 |
+
{
|
12421 |
+
"epoch": 1.41,
|
12422 |
+
"learning_rate": 0.0003339703774486383,
|
12423 |
+
"loss": 2.0911,
|
12424 |
+
"step": 2056
|
12425 |
+
},
|
12426 |
+
{
|
12427 |
+
"epoch": 1.41,
|
12428 |
+
"learning_rate": 0.00033382704252269465,
|
12429 |
+
"loss": 1.9568,
|
12430 |
+
"step": 2057
|
12431 |
+
},
|
12432 |
+
{
|
12433 |
+
"epoch": 1.41,
|
12434 |
+
"learning_rate": 0.0003336837075967511,
|
12435 |
+
"loss": 2.0353,
|
12436 |
+
"step": 2058
|
12437 |
+
},
|
12438 |
+
{
|
12439 |
+
"epoch": 1.41,
|
12440 |
+
"learning_rate": 0.0003335403726708074,
|
12441 |
+
"loss": 1.9722,
|
12442 |
+
"step": 2059
|
12443 |
+
},
|
12444 |
+
{
|
12445 |
+
"epoch": 1.41,
|
12446 |
+
"learning_rate": 0.00033339703774486375,
|
12447 |
+
"loss": 2.0115,
|
12448 |
+
"step": 2060
|
12449 |
+
},
|
12450 |
+
{
|
12451 |
+
"epoch": 1.41,
|
12452 |
+
"learning_rate": 0.0003332537028189202,
|
12453 |
+
"loss": 1.9517,
|
12454 |
+
"step": 2061
|
12455 |
+
},
|
12456 |
+
{
|
12457 |
+
"epoch": 1.41,
|
12458 |
+
"learning_rate": 0.0003331103678929765,
|
12459 |
+
"loss": 1.9787,
|
12460 |
+
"step": 2062
|
12461 |
+
},
|
12462 |
+
{
|
12463 |
+
"epoch": 1.41,
|
12464 |
+
"learning_rate": 0.00033296703296703295,
|
12465 |
+
"loss": 2.0402,
|
12466 |
+
"step": 2063
|
12467 |
+
},
|
12468 |
+
{
|
12469 |
+
"epoch": 1.41,
|
12470 |
+
"learning_rate": 0.0003328236980410893,
|
12471 |
+
"loss": 2.0362,
|
12472 |
+
"step": 2064
|
12473 |
+
},
|
12474 |
+
{
|
12475 |
+
"epoch": 1.41,
|
12476 |
+
"learning_rate": 0.0003326803631151457,
|
12477 |
+
"loss": 2.0316,
|
12478 |
+
"step": 2065
|
12479 |
+
},
|
12480 |
+
{
|
12481 |
+
"epoch": 1.41,
|
12482 |
+
"learning_rate": 0.00033253702818920205,
|
12483 |
+
"loss": 1.9957,
|
12484 |
+
"step": 2066
|
12485 |
+
},
|
12486 |
+
{
|
12487 |
+
"epoch": 1.41,
|
12488 |
+
"learning_rate": 0.00033239369326325843,
|
12489 |
+
"loss": 2.076,
|
12490 |
+
"step": 2067
|
12491 |
+
},
|
12492 |
+
{
|
12493 |
+
"epoch": 1.41,
|
12494 |
+
"learning_rate": 0.0003322503583373148,
|
12495 |
+
"loss": 1.9697,
|
12496 |
+
"step": 2068
|
12497 |
+
},
|
12498 |
+
{
|
12499 |
+
"epoch": 1.41,
|
12500 |
+
"learning_rate": 0.0003321070234113712,
|
12501 |
+
"loss": 2.063,
|
12502 |
+
"step": 2069
|
12503 |
+
},
|
12504 |
+
{
|
12505 |
+
"epoch": 1.42,
|
12506 |
+
"learning_rate": 0.0003319636884854276,
|
12507 |
+
"loss": 2.0591,
|
12508 |
+
"step": 2070
|
12509 |
+
},
|
12510 |
+
{
|
12511 |
+
"epoch": 1.42,
|
12512 |
+
"learning_rate": 0.00033182035355948397,
|
12513 |
+
"loss": 2.031,
|
12514 |
+
"step": 2071
|
12515 |
+
},
|
12516 |
+
{
|
12517 |
+
"epoch": 1.42,
|
12518 |
+
"learning_rate": 0.00033167701863354035,
|
12519 |
+
"loss": 2.0116,
|
12520 |
+
"step": 2072
|
12521 |
+
},
|
12522 |
+
{
|
12523 |
+
"epoch": 1.42,
|
12524 |
+
"learning_rate": 0.00033153368370759674,
|
12525 |
+
"loss": 2.0301,
|
12526 |
+
"step": 2073
|
12527 |
+
},
|
12528 |
+
{
|
12529 |
+
"epoch": 1.42,
|
12530 |
+
"learning_rate": 0.00033139034878165306,
|
12531 |
+
"loss": 2.0387,
|
12532 |
+
"step": 2074
|
12533 |
+
},
|
12534 |
+
{
|
12535 |
+
"epoch": 1.42,
|
12536 |
+
"learning_rate": 0.0003312470138557095,
|
12537 |
+
"loss": 2.0549,
|
12538 |
+
"step": 2075
|
12539 |
+
},
|
12540 |
+
{
|
12541 |
+
"epoch": 1.42,
|
12542 |
+
"learning_rate": 0.00033110367892976583,
|
12543 |
+
"loss": 1.9861,
|
12544 |
+
"step": 2076
|
12545 |
+
},
|
12546 |
+
{
|
12547 |
+
"epoch": 1.42,
|
12548 |
+
"learning_rate": 0.00033096034400382227,
|
12549 |
+
"loss": 1.918,
|
12550 |
+
"step": 2077
|
12551 |
+
},
|
12552 |
+
{
|
12553 |
+
"epoch": 1.42,
|
12554 |
+
"learning_rate": 0.0003308170090778786,
|
12555 |
+
"loss": 1.9836,
|
12556 |
+
"step": 2078
|
12557 |
+
},
|
12558 |
+
{
|
12559 |
+
"epoch": 1.42,
|
12560 |
+
"learning_rate": 0.00033067367415193504,
|
12561 |
+
"loss": 2.0345,
|
12562 |
+
"step": 2079
|
12563 |
+
},
|
12564 |
+
{
|
12565 |
+
"epoch": 1.42,
|
12566 |
+
"learning_rate": 0.00033053033922599137,
|
12567 |
+
"loss": 1.9827,
|
12568 |
+
"step": 2080
|
12569 |
+
},
|
12570 |
+
{
|
12571 |
+
"epoch": 1.42,
|
12572 |
+
"learning_rate": 0.0003303870043000477,
|
12573 |
+
"loss": 1.9904,
|
12574 |
+
"step": 2081
|
12575 |
+
},
|
12576 |
+
{
|
12577 |
+
"epoch": 1.42,
|
12578 |
+
"learning_rate": 0.00033024366937410413,
|
12579 |
+
"loss": 1.9661,
|
12580 |
+
"step": 2082
|
12581 |
+
},
|
12582 |
+
{
|
12583 |
+
"epoch": 1.42,
|
12584 |
+
"learning_rate": 0.00033010033444816046,
|
12585 |
+
"loss": 2.0373,
|
12586 |
+
"step": 2083
|
12587 |
+
},
|
12588 |
+
{
|
12589 |
+
"epoch": 1.42,
|
12590 |
+
"learning_rate": 0.0003299569995222169,
|
12591 |
+
"loss": 1.872,
|
12592 |
+
"step": 2084
|
12593 |
+
},
|
12594 |
+
{
|
12595 |
+
"epoch": 1.43,
|
12596 |
+
"learning_rate": 0.00032981366459627323,
|
12597 |
+
"loss": 1.9871,
|
12598 |
+
"step": 2085
|
12599 |
+
},
|
12600 |
+
{
|
12601 |
+
"epoch": 1.43,
|
12602 |
+
"learning_rate": 0.00032967032967032967,
|
12603 |
+
"loss": 1.9051,
|
12604 |
+
"step": 2086
|
12605 |
+
},
|
12606 |
+
{
|
12607 |
+
"epoch": 1.43,
|
12608 |
+
"learning_rate": 0.000329526994744386,
|
12609 |
+
"loss": 2.1045,
|
12610 |
+
"step": 2087
|
12611 |
+
},
|
12612 |
+
{
|
12613 |
+
"epoch": 1.43,
|
12614 |
+
"learning_rate": 0.0003293836598184424,
|
12615 |
+
"loss": 1.9143,
|
12616 |
+
"step": 2088
|
12617 |
+
},
|
12618 |
+
{
|
12619 |
+
"epoch": 1.43,
|
12620 |
+
"learning_rate": 0.00032924032489249877,
|
12621 |
+
"loss": 1.8973,
|
12622 |
+
"step": 2089
|
12623 |
+
},
|
12624 |
+
{
|
12625 |
+
"epoch": 1.43,
|
12626 |
+
"learning_rate": 0.00032909698996655515,
|
12627 |
+
"loss": 1.9506,
|
12628 |
+
"step": 2090
|
12629 |
+
},
|
12630 |
+
{
|
12631 |
+
"epoch": 1.43,
|
12632 |
+
"learning_rate": 0.00032895365504061153,
|
12633 |
+
"loss": 1.9271,
|
12634 |
+
"step": 2091
|
12635 |
+
},
|
12636 |
+
{
|
12637 |
+
"epoch": 1.43,
|
12638 |
+
"learning_rate": 0.0003288103201146679,
|
12639 |
+
"loss": 1.9837,
|
12640 |
+
"step": 2092
|
12641 |
+
},
|
12642 |
+
{
|
12643 |
+
"epoch": 1.43,
|
12644 |
+
"learning_rate": 0.0003286669851887243,
|
12645 |
+
"loss": 2.1084,
|
12646 |
+
"step": 2093
|
12647 |
+
},
|
12648 |
+
{
|
12649 |
+
"epoch": 1.43,
|
12650 |
+
"learning_rate": 0.0003285236502627807,
|
12651 |
+
"loss": 1.9546,
|
12652 |
+
"step": 2094
|
12653 |
+
},
|
12654 |
+
{
|
12655 |
+
"epoch": 1.43,
|
12656 |
+
"learning_rate": 0.000328380315336837,
|
12657 |
+
"loss": 2.0735,
|
12658 |
+
"step": 2095
|
12659 |
+
},
|
12660 |
+
{
|
12661 |
+
"epoch": 1.43,
|
12662 |
+
"learning_rate": 0.00032823698041089345,
|
12663 |
+
"loss": 2.0231,
|
12664 |
+
"step": 2096
|
12665 |
+
},
|
12666 |
+
{
|
12667 |
+
"epoch": 1.43,
|
12668 |
+
"learning_rate": 0.0003280936454849498,
|
12669 |
+
"loss": 2.0705,
|
12670 |
+
"step": 2097
|
12671 |
+
},
|
12672 |
+
{
|
12673 |
+
"epoch": 1.43,
|
12674 |
+
"learning_rate": 0.0003279503105590062,
|
12675 |
+
"loss": 1.9939,
|
12676 |
+
"step": 2098
|
12677 |
+
},
|
12678 |
+
{
|
12679 |
+
"epoch": 1.44,
|
12680 |
+
"learning_rate": 0.00032780697563306255,
|
12681 |
+
"loss": 1.9815,
|
12682 |
+
"step": 2099
|
12683 |
+
},
|
12684 |
+
{
|
12685 |
+
"epoch": 1.44,
|
12686 |
+
"learning_rate": 0.000327663640707119,
|
12687 |
+
"loss": 1.9705,
|
12688 |
+
"step": 2100
|
12689 |
+
},
|
12690 |
+
{
|
12691 |
+
"epoch": 1.44,
|
12692 |
+
"learning_rate": 0.0003275203057811753,
|
12693 |
+
"loss": 2.0562,
|
12694 |
+
"step": 2101
|
12695 |
+
},
|
12696 |
+
{
|
12697 |
+
"epoch": 1.44,
|
12698 |
+
"learning_rate": 0.00032737697085523165,
|
12699 |
+
"loss": 2.014,
|
12700 |
+
"step": 2102
|
12701 |
+
},
|
12702 |
+
{
|
12703 |
+
"epoch": 1.44,
|
12704 |
+
"learning_rate": 0.0003272336359292881,
|
12705 |
+
"loss": 2.0447,
|
12706 |
+
"step": 2103
|
12707 |
+
},
|
12708 |
+
{
|
12709 |
+
"epoch": 1.44,
|
12710 |
+
"learning_rate": 0.0003270903010033444,
|
12711 |
+
"loss": 1.9627,
|
12712 |
+
"step": 2104
|
12713 |
+
},
|
12714 |
+
{
|
12715 |
+
"epoch": 1.44,
|
12716 |
+
"learning_rate": 0.00032694696607740085,
|
12717 |
+
"loss": 1.9587,
|
12718 |
+
"step": 2105
|
12719 |
+
},
|
12720 |
+
{
|
12721 |
+
"epoch": 1.44,
|
12722 |
+
"learning_rate": 0.0003268036311514572,
|
12723 |
+
"loss": 2.0637,
|
12724 |
+
"step": 2106
|
12725 |
+
},
|
12726 |
+
{
|
12727 |
+
"epoch": 1.44,
|
12728 |
+
"learning_rate": 0.0003266602962255136,
|
12729 |
+
"loss": 1.9581,
|
12730 |
+
"step": 2107
|
12731 |
+
},
|
12732 |
+
{
|
12733 |
+
"epoch": 1.44,
|
12734 |
+
"learning_rate": 0.00032651696129956995,
|
12735 |
+
"loss": 1.9609,
|
12736 |
+
"step": 2108
|
12737 |
+
},
|
12738 |
+
{
|
12739 |
+
"epoch": 1.44,
|
12740 |
+
"learning_rate": 0.00032637362637362633,
|
12741 |
+
"loss": 1.9956,
|
12742 |
+
"step": 2109
|
12743 |
+
},
|
12744 |
+
{
|
12745 |
+
"epoch": 1.44,
|
12746 |
+
"learning_rate": 0.0003262302914476827,
|
12747 |
+
"loss": 1.9885,
|
12748 |
+
"step": 2110
|
12749 |
+
},
|
12750 |
+
{
|
12751 |
+
"epoch": 1.44,
|
12752 |
+
"learning_rate": 0.0003260869565217391,
|
12753 |
+
"loss": 1.9506,
|
12754 |
+
"step": 2111
|
12755 |
+
},
|
12756 |
+
{
|
12757 |
+
"epoch": 1.44,
|
12758 |
+
"learning_rate": 0.0003259436215957955,
|
12759 |
+
"loss": 2.0175,
|
12760 |
+
"step": 2112
|
12761 |
+
},
|
12762 |
+
{
|
12763 |
+
"epoch": 1.44,
|
12764 |
+
"learning_rate": 0.00032580028666985187,
|
12765 |
+
"loss": 1.9666,
|
12766 |
+
"step": 2113
|
12767 |
+
},
|
12768 |
+
{
|
12769 |
+
"epoch": 1.45,
|
12770 |
+
"learning_rate": 0.00032565695174390825,
|
12771 |
+
"loss": 2.0387,
|
12772 |
+
"step": 2114
|
12773 |
+
},
|
12774 |
+
{
|
12775 |
+
"epoch": 1.45,
|
12776 |
+
"learning_rate": 0.00032551361681796464,
|
12777 |
+
"loss": 2.0288,
|
12778 |
+
"step": 2115
|
12779 |
+
},
|
12780 |
+
{
|
12781 |
+
"epoch": 1.45,
|
12782 |
+
"learning_rate": 0.00032537028189202097,
|
12783 |
+
"loss": 2.0741,
|
12784 |
+
"step": 2116
|
12785 |
+
},
|
12786 |
+
{
|
12787 |
+
"epoch": 1.45,
|
12788 |
+
"learning_rate": 0.0003252269469660774,
|
12789 |
+
"loss": 2.0846,
|
12790 |
+
"step": 2117
|
12791 |
+
},
|
12792 |
+
{
|
12793 |
+
"epoch": 1.45,
|
12794 |
+
"learning_rate": 0.00032508361204013373,
|
12795 |
+
"loss": 1.974,
|
12796 |
+
"step": 2118
|
12797 |
+
},
|
12798 |
+
{
|
12799 |
+
"epoch": 1.45,
|
12800 |
+
"learning_rate": 0.00032494027711419017,
|
12801 |
+
"loss": 1.9899,
|
12802 |
+
"step": 2119
|
12803 |
+
},
|
12804 |
+
{
|
12805 |
+
"epoch": 1.45,
|
12806 |
+
"learning_rate": 0.0003247969421882465,
|
12807 |
+
"loss": 2.1513,
|
12808 |
+
"step": 2120
|
12809 |
+
},
|
12810 |
+
{
|
12811 |
+
"epoch": 1.45,
|
12812 |
+
"learning_rate": 0.00032465360726230294,
|
12813 |
+
"loss": 1.9642,
|
12814 |
+
"step": 2121
|
12815 |
+
},
|
12816 |
+
{
|
12817 |
+
"epoch": 1.45,
|
12818 |
+
"learning_rate": 0.00032451027233635927,
|
12819 |
+
"loss": 1.9595,
|
12820 |
+
"step": 2122
|
12821 |
+
},
|
12822 |
+
{
|
12823 |
+
"epoch": 1.45,
|
12824 |
+
"learning_rate": 0.0003243669374104156,
|
12825 |
+
"loss": 2.0706,
|
12826 |
+
"step": 2123
|
12827 |
+
},
|
12828 |
+
{
|
12829 |
+
"epoch": 1.45,
|
12830 |
+
"learning_rate": 0.00032422360248447204,
|
12831 |
+
"loss": 2.0984,
|
12832 |
+
"step": 2124
|
12833 |
+
},
|
12834 |
+
{
|
12835 |
+
"epoch": 1.45,
|
12836 |
+
"learning_rate": 0.00032408026755852837,
|
12837 |
+
"loss": 2.1254,
|
12838 |
+
"step": 2125
|
12839 |
+
},
|
12840 |
+
{
|
12841 |
+
"epoch": 1.45,
|
12842 |
+
"learning_rate": 0.0003239369326325848,
|
12843 |
+
"loss": 1.9552,
|
12844 |
+
"step": 2126
|
12845 |
+
},
|
12846 |
+
{
|
12847 |
+
"epoch": 1.45,
|
12848 |
+
"learning_rate": 0.00032379359770664113,
|
12849 |
+
"loss": 1.9694,
|
12850 |
+
"step": 2127
|
12851 |
+
},
|
12852 |
+
{
|
12853 |
+
"epoch": 1.45,
|
12854 |
+
"learning_rate": 0.00032365026278069757,
|
12855 |
+
"loss": 2.0373,
|
12856 |
+
"step": 2128
|
12857 |
+
},
|
12858 |
+
{
|
12859 |
+
"epoch": 1.46,
|
12860 |
+
"learning_rate": 0.0003235069278547539,
|
12861 |
+
"loss": 2.0331,
|
12862 |
+
"step": 2129
|
12863 |
+
},
|
12864 |
+
{
|
12865 |
+
"epoch": 1.46,
|
12866 |
+
"learning_rate": 0.0003233635929288103,
|
12867 |
+
"loss": 2.0722,
|
12868 |
+
"step": 2130
|
12869 |
+
},
|
12870 |
+
{
|
12871 |
+
"epoch": 1.46,
|
12872 |
+
"learning_rate": 0.00032322025800286667,
|
12873 |
+
"loss": 2.0406,
|
12874 |
+
"step": 2131
|
12875 |
+
},
|
12876 |
+
{
|
12877 |
+
"epoch": 1.46,
|
12878 |
+
"learning_rate": 0.00032307692307692305,
|
12879 |
+
"loss": 1.8967,
|
12880 |
+
"step": 2132
|
12881 |
+
},
|
12882 |
+
{
|
12883 |
+
"epoch": 1.46,
|
12884 |
+
"learning_rate": 0.00032293358815097944,
|
12885 |
+
"loss": 1.9515,
|
12886 |
+
"step": 2133
|
12887 |
+
},
|
12888 |
+
{
|
12889 |
+
"epoch": 1.46,
|
12890 |
+
"learning_rate": 0.0003227902532250358,
|
12891 |
+
"loss": 2.0313,
|
12892 |
+
"step": 2134
|
12893 |
+
},
|
12894 |
+
{
|
12895 |
+
"epoch": 1.46,
|
12896 |
+
"learning_rate": 0.0003226469182990922,
|
12897 |
+
"loss": 2.0825,
|
12898 |
+
"step": 2135
|
12899 |
+
},
|
12900 |
+
{
|
12901 |
+
"epoch": 1.46,
|
12902 |
+
"learning_rate": 0.0003225035833731486,
|
12903 |
+
"loss": 2.0743,
|
12904 |
+
"step": 2136
|
12905 |
+
},
|
12906 |
+
{
|
12907 |
+
"epoch": 1.46,
|
12908 |
+
"learning_rate": 0.0003223602484472049,
|
12909 |
+
"loss": 2.0133,
|
12910 |
+
"step": 2137
|
12911 |
+
},
|
12912 |
+
{
|
12913 |
+
"epoch": 1.46,
|
12914 |
+
"learning_rate": 0.00032221691352126136,
|
12915 |
+
"loss": 1.9019,
|
12916 |
+
"step": 2138
|
12917 |
+
},
|
12918 |
+
{
|
12919 |
+
"epoch": 1.46,
|
12920 |
+
"learning_rate": 0.0003220735785953177,
|
12921 |
+
"loss": 1.9543,
|
12922 |
+
"step": 2139
|
12923 |
+
},
|
12924 |
+
{
|
12925 |
+
"epoch": 1.46,
|
12926 |
+
"learning_rate": 0.0003219302436693741,
|
12927 |
+
"loss": 1.999,
|
12928 |
+
"step": 2140
|
12929 |
+
},
|
12930 |
+
{
|
12931 |
+
"epoch": 1.46,
|
12932 |
+
"learning_rate": 0.00032178690874343045,
|
12933 |
+
"loss": 1.927,
|
12934 |
+
"step": 2141
|
12935 |
+
},
|
12936 |
+
{
|
12937 |
+
"epoch": 1.46,
|
12938 |
+
"learning_rate": 0.0003216435738174869,
|
12939 |
+
"loss": 2.069,
|
12940 |
+
"step": 2142
|
12941 |
+
},
|
12942 |
+
{
|
12943 |
+
"epoch": 1.47,
|
12944 |
+
"learning_rate": 0.0003215002388915432,
|
12945 |
+
"loss": 1.9393,
|
12946 |
+
"step": 2143
|
12947 |
+
},
|
12948 |
+
{
|
12949 |
+
"epoch": 1.47,
|
12950 |
+
"learning_rate": 0.00032135690396559955,
|
12951 |
+
"loss": 1.9443,
|
12952 |
+
"step": 2144
|
12953 |
+
},
|
12954 |
+
{
|
12955 |
+
"epoch": 1.47,
|
12956 |
+
"learning_rate": 0.000321213569039656,
|
12957 |
+
"loss": 1.9659,
|
12958 |
+
"step": 2145
|
12959 |
+
},
|
12960 |
+
{
|
12961 |
+
"epoch": 1.47,
|
12962 |
+
"learning_rate": 0.0003210702341137123,
|
12963 |
+
"loss": 2.0523,
|
12964 |
+
"step": 2146
|
12965 |
+
},
|
12966 |
+
{
|
12967 |
+
"epoch": 1.47,
|
12968 |
+
"learning_rate": 0.00032092689918776876,
|
12969 |
+
"loss": 2.0378,
|
12970 |
+
"step": 2147
|
12971 |
+
},
|
12972 |
+
{
|
12973 |
+
"epoch": 1.47,
|
12974 |
+
"learning_rate": 0.0003207835642618251,
|
12975 |
+
"loss": 2.0093,
|
12976 |
+
"step": 2148
|
12977 |
+
},
|
12978 |
+
{
|
12979 |
+
"epoch": 1.47,
|
12980 |
+
"learning_rate": 0.00032064022933588147,
|
12981 |
+
"loss": 2.0546,
|
12982 |
+
"step": 2149
|
12983 |
+
},
|
12984 |
+
{
|
12985 |
+
"epoch": 1.47,
|
12986 |
+
"learning_rate": 0.00032049689440993785,
|
12987 |
+
"loss": 2.0351,
|
12988 |
+
"step": 2150
|
12989 |
+
},
|
12990 |
+
{
|
12991 |
+
"epoch": 1.47,
|
12992 |
+
"learning_rate": 0.00032035355948399424,
|
12993 |
+
"loss": 1.9175,
|
12994 |
+
"step": 2151
|
12995 |
+
},
|
12996 |
+
{
|
12997 |
+
"epoch": 1.47,
|
12998 |
+
"learning_rate": 0.0003202102245580506,
|
12999 |
+
"loss": 1.9986,
|
13000 |
+
"step": 2152
|
13001 |
+
},
|
13002 |
+
{
|
13003 |
+
"epoch": 1.47,
|
13004 |
+
"learning_rate": 0.000320066889632107,
|
13005 |
+
"loss": 1.9202,
|
13006 |
+
"step": 2153
|
13007 |
+
},
|
13008 |
+
{
|
13009 |
+
"epoch": 1.47,
|
13010 |
+
"learning_rate": 0.0003199235547061634,
|
13011 |
+
"loss": 1.9855,
|
13012 |
+
"step": 2154
|
13013 |
+
},
|
13014 |
+
{
|
13015 |
+
"epoch": 1.47,
|
13016 |
+
"learning_rate": 0.00031978021978021977,
|
13017 |
+
"loss": 1.9847,
|
13018 |
+
"step": 2155
|
13019 |
+
},
|
13020 |
+
{
|
13021 |
+
"epoch": 1.47,
|
13022 |
+
"learning_rate": 0.0003196368848542761,
|
13023 |
+
"loss": 2.096,
|
13024 |
+
"step": 2156
|
13025 |
+
},
|
13026 |
+
{
|
13027 |
+
"epoch": 1.47,
|
13028 |
+
"learning_rate": 0.00031949354992833254,
|
13029 |
+
"loss": 2.0109,
|
13030 |
+
"step": 2157
|
13031 |
+
},
|
13032 |
+
{
|
13033 |
+
"epoch": 1.48,
|
13034 |
+
"learning_rate": 0.00031935021500238887,
|
13035 |
+
"loss": 1.8784,
|
13036 |
+
"step": 2158
|
13037 |
+
},
|
13038 |
+
{
|
13039 |
+
"epoch": 1.48,
|
13040 |
+
"learning_rate": 0.0003192068800764453,
|
13041 |
+
"loss": 1.902,
|
13042 |
+
"step": 2159
|
13043 |
+
},
|
13044 |
+
{
|
13045 |
+
"epoch": 1.48,
|
13046 |
+
"learning_rate": 0.00031906354515050164,
|
13047 |
+
"loss": 2.0265,
|
13048 |
+
"step": 2160
|
13049 |
+
},
|
13050 |
+
{
|
13051 |
+
"epoch": 1.48,
|
13052 |
+
"learning_rate": 0.0003189202102245581,
|
13053 |
+
"loss": 1.9539,
|
13054 |
+
"step": 2161
|
13055 |
+
},
|
13056 |
+
{
|
13057 |
+
"epoch": 1.48,
|
13058 |
+
"learning_rate": 0.0003187768752986144,
|
13059 |
+
"loss": 1.9743,
|
13060 |
+
"step": 2162
|
13061 |
+
},
|
13062 |
+
{
|
13063 |
+
"epoch": 1.48,
|
13064 |
+
"learning_rate": 0.00031863354037267073,
|
13065 |
+
"loss": 2.0785,
|
13066 |
+
"step": 2163
|
13067 |
+
},
|
13068 |
+
{
|
13069 |
+
"epoch": 1.48,
|
13070 |
+
"learning_rate": 0.00031849020544672717,
|
13071 |
+
"loss": 2.0334,
|
13072 |
+
"step": 2164
|
13073 |
+
},
|
13074 |
+
{
|
13075 |
+
"epoch": 1.48,
|
13076 |
+
"learning_rate": 0.0003183468705207835,
|
13077 |
+
"loss": 1.8959,
|
13078 |
+
"step": 2165
|
13079 |
+
},
|
13080 |
+
{
|
13081 |
+
"epoch": 1.48,
|
13082 |
+
"learning_rate": 0.00031820353559483994,
|
13083 |
+
"loss": 1.9354,
|
13084 |
+
"step": 2166
|
13085 |
+
},
|
13086 |
+
{
|
13087 |
+
"epoch": 1.48,
|
13088 |
+
"learning_rate": 0.00031806020066889627,
|
13089 |
+
"loss": 2.1023,
|
13090 |
+
"step": 2167
|
13091 |
+
},
|
13092 |
+
{
|
13093 |
+
"epoch": 1.48,
|
13094 |
+
"learning_rate": 0.0003179168657429527,
|
13095 |
+
"loss": 2.034,
|
13096 |
+
"step": 2168
|
13097 |
+
},
|
13098 |
+
{
|
13099 |
+
"epoch": 1.48,
|
13100 |
+
"learning_rate": 0.00031777353081700904,
|
13101 |
+
"loss": 1.9602,
|
13102 |
+
"step": 2169
|
13103 |
+
},
|
13104 |
+
{
|
13105 |
+
"epoch": 1.48,
|
13106 |
+
"learning_rate": 0.0003176301958910654,
|
13107 |
+
"loss": 1.9826,
|
13108 |
+
"step": 2170
|
13109 |
+
},
|
13110 |
+
{
|
13111 |
+
"epoch": 1.48,
|
13112 |
+
"learning_rate": 0.0003174868609651218,
|
13113 |
+
"loss": 1.9921,
|
13114 |
+
"step": 2171
|
13115 |
+
},
|
13116 |
+
{
|
13117 |
+
"epoch": 1.48,
|
13118 |
+
"learning_rate": 0.0003173435260391782,
|
13119 |
+
"loss": 1.9476,
|
13120 |
+
"step": 2172
|
13121 |
+
},
|
13122 |
+
{
|
13123 |
+
"epoch": 1.49,
|
13124 |
+
"learning_rate": 0.00031720019111323457,
|
13125 |
+
"loss": 2.0478,
|
13126 |
+
"step": 2173
|
13127 |
+
},
|
13128 |
+
{
|
13129 |
+
"epoch": 1.49,
|
13130 |
+
"learning_rate": 0.00031705685618729095,
|
13131 |
+
"loss": 2.0748,
|
13132 |
+
"step": 2174
|
13133 |
+
},
|
13134 |
+
{
|
13135 |
+
"epoch": 1.49,
|
13136 |
+
"learning_rate": 0.00031691352126134734,
|
13137 |
+
"loss": 1.9633,
|
13138 |
+
"step": 2175
|
13139 |
+
},
|
13140 |
+
{
|
13141 |
+
"epoch": 1.49,
|
13142 |
+
"learning_rate": 0.0003167701863354037,
|
13143 |
+
"loss": 2.001,
|
13144 |
+
"step": 2176
|
13145 |
+
},
|
13146 |
+
{
|
13147 |
+
"epoch": 1.49,
|
13148 |
+
"learning_rate": 0.00031662685140946005,
|
13149 |
+
"loss": 2.0642,
|
13150 |
+
"step": 2177
|
13151 |
+
},
|
13152 |
+
{
|
13153 |
+
"epoch": 1.49,
|
13154 |
+
"learning_rate": 0.0003164835164835165,
|
13155 |
+
"loss": 1.9987,
|
13156 |
+
"step": 2178
|
13157 |
+
},
|
13158 |
+
{
|
13159 |
+
"epoch": 1.49,
|
13160 |
+
"learning_rate": 0.0003163401815575728,
|
13161 |
+
"loss": 1.9435,
|
13162 |
+
"step": 2179
|
13163 |
+
},
|
13164 |
+
{
|
13165 |
+
"epoch": 1.49,
|
13166 |
+
"learning_rate": 0.00031619684663162926,
|
13167 |
+
"loss": 1.9399,
|
13168 |
+
"step": 2180
|
13169 |
+
},
|
13170 |
+
{
|
13171 |
+
"epoch": 1.49,
|
13172 |
+
"learning_rate": 0.0003160535117056856,
|
13173 |
+
"loss": 1.9618,
|
13174 |
+
"step": 2181
|
13175 |
+
},
|
13176 |
+
{
|
13177 |
+
"epoch": 1.49,
|
13178 |
+
"learning_rate": 0.00031591017677974197,
|
13179 |
+
"loss": 1.9743,
|
13180 |
+
"step": 2182
|
13181 |
+
},
|
13182 |
+
{
|
13183 |
+
"epoch": 1.49,
|
13184 |
+
"learning_rate": 0.00031576684185379835,
|
13185 |
+
"loss": 1.9511,
|
13186 |
+
"step": 2183
|
13187 |
+
},
|
13188 |
+
{
|
13189 |
+
"epoch": 1.49,
|
13190 |
+
"learning_rate": 0.0003156235069278547,
|
13191 |
+
"loss": 2.0058,
|
13192 |
+
"step": 2184
|
13193 |
+
},
|
13194 |
+
{
|
13195 |
+
"epoch": 1.49,
|
13196 |
+
"learning_rate": 0.0003154801720019111,
|
13197 |
+
"loss": 2.0174,
|
13198 |
+
"step": 2185
|
13199 |
+
},
|
13200 |
+
{
|
13201 |
+
"epoch": 1.49,
|
13202 |
+
"learning_rate": 0.00031533683707596745,
|
13203 |
+
"loss": 2.0545,
|
13204 |
+
"step": 2186
|
13205 |
+
},
|
13206 |
+
{
|
13207 |
+
"epoch": 1.5,
|
13208 |
+
"learning_rate": 0.0003151935021500239,
|
13209 |
+
"loss": 1.9771,
|
13210 |
+
"step": 2187
|
13211 |
+
},
|
13212 |
+
{
|
13213 |
+
"epoch": 1.5,
|
13214 |
+
"learning_rate": 0.0003150501672240802,
|
13215 |
+
"loss": 1.9741,
|
13216 |
+
"step": 2188
|
13217 |
+
},
|
13218 |
+
{
|
13219 |
+
"epoch": 1.5,
|
13220 |
+
"learning_rate": 0.00031490683229813666,
|
13221 |
+
"loss": 1.9993,
|
13222 |
+
"step": 2189
|
13223 |
+
},
|
13224 |
+
{
|
13225 |
+
"epoch": 1.5,
|
13226 |
+
"learning_rate": 0.000314763497372193,
|
13227 |
+
"loss": 1.9776,
|
13228 |
+
"step": 2190
|
13229 |
+
},
|
13230 |
+
{
|
13231 |
+
"epoch": 1.5,
|
13232 |
+
"learning_rate": 0.00031462016244624937,
|
13233 |
+
"loss": 2.0027,
|
13234 |
+
"step": 2191
|
13235 |
+
},
|
13236 |
+
{
|
13237 |
+
"epoch": 1.5,
|
13238 |
+
"learning_rate": 0.00031447682752030575,
|
13239 |
+
"loss": 2.0119,
|
13240 |
+
"step": 2192
|
13241 |
+
},
|
13242 |
+
{
|
13243 |
+
"epoch": 1.5,
|
13244 |
+
"learning_rate": 0.00031433349259436214,
|
13245 |
+
"loss": 1.9371,
|
13246 |
+
"step": 2193
|
13247 |
+
},
|
13248 |
+
{
|
13249 |
+
"epoch": 1.5,
|
13250 |
+
"learning_rate": 0.0003141901576684185,
|
13251 |
+
"loss": 2.0221,
|
13252 |
+
"step": 2194
|
13253 |
+
},
|
13254 |
+
{
|
13255 |
+
"epoch": 1.5,
|
13256 |
+
"learning_rate": 0.0003140468227424749,
|
13257 |
+
"loss": 2.0126,
|
13258 |
+
"step": 2195
|
13259 |
+
},
|
13260 |
+
{
|
13261 |
+
"epoch": 1.5,
|
13262 |
+
"learning_rate": 0.0003139034878165313,
|
13263 |
+
"loss": 2.066,
|
13264 |
+
"step": 2196
|
13265 |
+
},
|
13266 |
+
{
|
13267 |
+
"epoch": 1.5,
|
13268 |
+
"learning_rate": 0.0003137601528905877,
|
13269 |
+
"loss": 1.9748,
|
13270 |
+
"step": 2197
|
13271 |
+
},
|
13272 |
+
{
|
13273 |
+
"epoch": 1.5,
|
13274 |
+
"learning_rate": 0.000313616817964644,
|
13275 |
+
"loss": 1.9549,
|
13276 |
+
"step": 2198
|
13277 |
+
},
|
13278 |
+
{
|
13279 |
+
"epoch": 1.5,
|
13280 |
+
"learning_rate": 0.0003134734830387004,
|
13281 |
+
"loss": 2.0066,
|
13282 |
+
"step": 2199
|
13283 |
+
},
|
13284 |
+
{
|
13285 |
+
"epoch": 1.5,
|
13286 |
+
"learning_rate": 0.00031333014811275677,
|
13287 |
+
"loss": 2.0541,
|
13288 |
+
"step": 2200
|
13289 |
+
},
|
13290 |
+
{
|
13291 |
+
"epoch": 1.5,
|
13292 |
+
"eval_loss": 1.9827316999435425,
|
13293 |
+
"eval_runtime": 1651.9566,
|
13294 |
+
"eval_samples_per_second": 6.053,
|
13295 |
+
"eval_steps_per_second": 6.053,
|
13296 |
+
"step": 2200
|
13297 |
}
|
13298 |
],
|
13299 |
"max_steps": 4386,
|
13300 |
"num_train_epochs": 3,
|
13301 |
+
"total_flos": 2.69384075772468e+18,
|
13302 |
"trial_name": null,
|
13303 |
"trial_params": null
|
13304 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2368281769
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce38e42231eaec2ee57adbd3a558f89d6a9d15cb466c85343dd8a5eee549c297
|
3 |
size 2368281769
|