Training in progress, step 1750000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +383 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 893439185
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c223f21c9f3d69fb40b6ad537a2d1e1726b01ec615931fd84b4f155a73edb6cb
|
3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d4b2f64ee4b8a3f1cf3d86fb133d82c77bc0f7052c00d93cb35fb4180acc8509
|
3 |
size 449471589
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 21643
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e052c7897af7d62d87b26b3f0036377845bb2408ce5c5d3e7b4078dbe5f611ef
|
3 |
size 21643
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af6d04926cbb05a843491ada6b24ca053dbb81e1dc7c6706a5415b4d4cca0e78
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd9e8ca586c336641c0b85f2a85288a9eeaaab808e84d3e0180b33f991192ef6
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -12926,11 +12926,391 @@
|
|
12926 |
"eval_samples_per_second": 81.826,
|
12927 |
"eval_steps_per_second": 0.639,
|
12928 |
"step": 1700000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12929 |
}
|
12930 |
],
|
12931 |
"max_steps": 2000000,
|
12932 |
"num_train_epochs": 9223372036854775807,
|
12933 |
-
"total_flos": 1.
|
12934 |
"trial_name": null,
|
12935 |
"trial_params": null
|
12936 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.125,
|
5 |
+
"global_step": 1750000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
12926 |
"eval_samples_per_second": 81.826,
|
12927 |
"eval_steps_per_second": 0.639,
|
12928 |
"step": 1700000
|
12929 |
+
},
|
12930 |
+
{
|
12931 |
+
"epoch": 0.1,
|
12932 |
+
"learning_rate": 1.9356303574345033e-05,
|
12933 |
+
"loss": 0.44,
|
12934 |
+
"step": 1701000
|
12935 |
+
},
|
12936 |
+
{
|
12937 |
+
"epoch": 0.1,
|
12938 |
+
"learning_rate": 1.9295375753654256e-05,
|
12939 |
+
"loss": 0.4406,
|
12940 |
+
"step": 1702000
|
12941 |
+
},
|
12942 |
+
{
|
12943 |
+
"epoch": 0.1,
|
12944 |
+
"learning_rate": 1.9234693500252896e-05,
|
12945 |
+
"loss": 0.4392,
|
12946 |
+
"step": 1703000
|
12947 |
+
},
|
12948 |
+
{
|
12949 |
+
"epoch": 0.1,
|
12950 |
+
"learning_rate": 1.9174195978495195e-05,
|
12951 |
+
"loss": 0.4389,
|
12952 |
+
"step": 1704000
|
12953 |
+
},
|
12954 |
+
{
|
12955 |
+
"epoch": 0.1,
|
12956 |
+
"learning_rate": 1.9113823092023844e-05,
|
12957 |
+
"loss": 0.4395,
|
12958 |
+
"step": 1705000
|
12959 |
+
},
|
12960 |
+
{
|
12961 |
+
"epoch": 0.1,
|
12962 |
+
"eval_loss": 0.42103949189186096,
|
12963 |
+
"eval_runtime": 79.7626,
|
12964 |
+
"eval_samples_per_second": 80.238,
|
12965 |
+
"eval_steps_per_second": 0.627,
|
12966 |
+
"step": 1705000
|
12967 |
+
},
|
12968 |
+
{
|
12969 |
+
"epoch": 0.1,
|
12970 |
+
"learning_rate": 1.9053635675406527e-05,
|
12971 |
+
"loss": 0.4387,
|
12972 |
+
"step": 1706000
|
12973 |
+
},
|
12974 |
+
{
|
12975 |
+
"epoch": 0.1,
|
12976 |
+
"learning_rate": 1.899363391198454e-05,
|
12977 |
+
"loss": 0.4405,
|
12978 |
+
"step": 1707000
|
12979 |
+
},
|
12980 |
+
{
|
12981 |
+
"epoch": 0.1,
|
12982 |
+
"learning_rate": 1.893381798453365e-05,
|
12983 |
+
"loss": 0.4395,
|
12984 |
+
"step": 1708000
|
12985 |
+
},
|
12986 |
+
{
|
12987 |
+
"epoch": 0.1,
|
12988 |
+
"learning_rate": 1.887418807526355e-05,
|
12989 |
+
"loss": 0.439,
|
12990 |
+
"step": 1709000
|
12991 |
+
},
|
12992 |
+
{
|
12993 |
+
"epoch": 0.1,
|
12994 |
+
"learning_rate": 1.8814803716459616e-05,
|
12995 |
+
"loss": 0.439,
|
12996 |
+
"step": 1710000
|
12997 |
+
},
|
12998 |
+
{
|
12999 |
+
"epoch": 0.1,
|
13000 |
+
"eval_loss": 0.42225512862205505,
|
13001 |
+
"eval_runtime": 77.1239,
|
13002 |
+
"eval_samples_per_second": 82.983,
|
13003 |
+
"eval_steps_per_second": 0.648,
|
13004 |
+
"step": 1710000
|
13005 |
+
},
|
13006 |
+
{
|
13007 |
+
"epoch": 0.11,
|
13008 |
+
"learning_rate": 1.875560536579964e-05,
|
13009 |
+
"loss": 0.439,
|
13010 |
+
"step": 1711000
|
13011 |
+
},
|
13012 |
+
{
|
13013 |
+
"epoch": 0.11,
|
13014 |
+
"learning_rate": 1.8696534225358292e-05,
|
13015 |
+
"loss": 0.4392,
|
13016 |
+
"step": 1712000
|
13017 |
+
},
|
13018 |
+
{
|
13019 |
+
"epoch": 0.11,
|
13020 |
+
"learning_rate": 1.8637708616967782e-05,
|
13021 |
+
"loss": 0.4397,
|
13022 |
+
"step": 1713000
|
13023 |
+
},
|
13024 |
+
{
|
13025 |
+
"epoch": 0.11,
|
13026 |
+
"learning_rate": 1.8579010950865316e-05,
|
13027 |
+
"loss": 0.4385,
|
13028 |
+
"step": 1714000
|
13029 |
+
},
|
13030 |
+
{
|
13031 |
+
"epoch": 0.11,
|
13032 |
+
"learning_rate": 1.852050038374741e-05,
|
13033 |
+
"loss": 0.4391,
|
13034 |
+
"step": 1715000
|
13035 |
+
},
|
13036 |
+
{
|
13037 |
+
"epoch": 0.11,
|
13038 |
+
"eval_loss": 0.420716792345047,
|
13039 |
+
"eval_runtime": 78.5003,
|
13040 |
+
"eval_samples_per_second": 81.528,
|
13041 |
+
"eval_steps_per_second": 0.637,
|
13042 |
+
"step": 1715000
|
13043 |
+
},
|
13044 |
+
{
|
13045 |
+
"epoch": 0.11,
|
13046 |
+
"learning_rate": 1.8462235323533128e-05,
|
13047 |
+
"loss": 0.4395,
|
13048 |
+
"step": 1716000
|
13049 |
+
},
|
13050 |
+
{
|
13051 |
+
"epoch": 0.11,
|
13052 |
+
"learning_rate": 1.8404099300970416e-05,
|
13053 |
+
"loss": 0.4378,
|
13054 |
+
"step": 1717000
|
13055 |
+
},
|
13056 |
+
{
|
13057 |
+
"epoch": 0.11,
|
13058 |
+
"learning_rate": 1.8346208764813356e-05,
|
13059 |
+
"loss": 0.4408,
|
13060 |
+
"step": 1718000
|
13061 |
+
},
|
13062 |
+
{
|
13063 |
+
"epoch": 0.11,
|
13064 |
+
"learning_rate": 1.8288447994466744e-05,
|
13065 |
+
"loss": 0.4388,
|
13066 |
+
"step": 1719000
|
13067 |
+
},
|
13068 |
+
{
|
13069 |
+
"epoch": 0.11,
|
13070 |
+
"learning_rate": 1.8230932687039617e-05,
|
13071 |
+
"loss": 0.439,
|
13072 |
+
"step": 1720000
|
13073 |
+
},
|
13074 |
+
{
|
13075 |
+
"epoch": 0.11,
|
13076 |
+
"eval_loss": 0.41757285594940186,
|
13077 |
+
"eval_runtime": 79.8473,
|
13078 |
+
"eval_samples_per_second": 80.153,
|
13079 |
+
"eval_steps_per_second": 0.626,
|
13080 |
+
"step": 1720000
|
13081 |
+
},
|
13082 |
+
{
|
13083 |
+
"epoch": 0.11,
|
13084 |
+
"learning_rate": 1.8173547872002242e-05,
|
13085 |
+
"loss": 0.4384,
|
13086 |
+
"step": 1721000
|
13087 |
+
},
|
13088 |
+
{
|
13089 |
+
"epoch": 0.11,
|
13090 |
+
"learning_rate": 1.811640849341029e-05,
|
13091 |
+
"loss": 0.4401,
|
13092 |
+
"step": 1722000
|
13093 |
+
},
|
13094 |
+
{
|
13095 |
+
"epoch": 0.11,
|
13096 |
+
"learning_rate": 1.8059400332198968e-05,
|
13097 |
+
"loss": 0.438,
|
13098 |
+
"step": 1723000
|
13099 |
+
},
|
13100 |
+
{
|
13101 |
+
"epoch": 0.11,
|
13102 |
+
"learning_rate": 1.8002580852796262e-05,
|
13103 |
+
"loss": 0.4401,
|
13104 |
+
"step": 1724000
|
13105 |
+
},
|
13106 |
+
{
|
13107 |
+
"epoch": 0.11,
|
13108 |
+
"learning_rate": 1.7945950228284155e-05,
|
13109 |
+
"loss": 0.4401,
|
13110 |
+
"step": 1725000
|
13111 |
+
},
|
13112 |
+
{
|
13113 |
+
"epoch": 0.11,
|
13114 |
+
"eval_loss": 0.41903457045555115,
|
13115 |
+
"eval_runtime": 77.4134,
|
13116 |
+
"eval_samples_per_second": 82.673,
|
13117 |
+
"eval_steps_per_second": 0.646,
|
13118 |
+
"step": 1725000
|
13119 |
+
},
|
13120 |
+
{
|
13121 |
+
"epoch": 0.11,
|
13122 |
+
"learning_rate": 1.788950863116934e-05,
|
13123 |
+
"loss": 0.4383,
|
13124 |
+
"step": 1726000
|
13125 |
+
},
|
13126 |
+
{
|
13127 |
+
"epoch": 0.11,
|
13128 |
+
"learning_rate": 1.783331239121836e-05,
|
13129 |
+
"loss": 0.4383,
|
13130 |
+
"step": 1727000
|
13131 |
+
},
|
13132 |
+
{
|
13133 |
+
"epoch": 0.11,
|
13134 |
+
"learning_rate": 1.7777305143227536e-05,
|
13135 |
+
"loss": 0.4401,
|
13136 |
+
"step": 1728000
|
13137 |
+
},
|
13138 |
+
{
|
13139 |
+
"epoch": 0.11,
|
13140 |
+
"learning_rate": 1.772143127833117e-05,
|
13141 |
+
"loss": 0.4391,
|
13142 |
+
"step": 1729000
|
13143 |
+
},
|
13144 |
+
{
|
13145 |
+
"epoch": 0.12,
|
13146 |
+
"learning_rate": 1.766574712475575e-05,
|
13147 |
+
"loss": 0.439,
|
13148 |
+
"step": 1730000
|
13149 |
+
},
|
13150 |
+
{
|
13151 |
+
"epoch": 0.12,
|
13152 |
+
"eval_loss": 0.4182310104370117,
|
13153 |
+
"eval_runtime": 75.5121,
|
13154 |
+
"eval_samples_per_second": 84.755,
|
13155 |
+
"eval_steps_per_second": 0.662,
|
13156 |
+
"step": 1730000
|
13157 |
+
},
|
13158 |
+
{
|
13159 |
+
"epoch": 0.12,
|
13160 |
+
"learning_rate": 1.7610252852124898e-05,
|
13161 |
+
"loss": 0.4387,
|
13162 |
+
"step": 1731000
|
13163 |
+
},
|
13164 |
+
{
|
13165 |
+
"epoch": 0.12,
|
13166 |
+
"learning_rate": 1.755494862948377e-05,
|
13167 |
+
"loss": 0.4382,
|
13168 |
+
"step": 1732000
|
13169 |
+
},
|
13170 |
+
{
|
13171 |
+
"epoch": 0.12,
|
13172 |
+
"learning_rate": 1.7499889644232756e-05,
|
13173 |
+
"loss": 0.4385,
|
13174 |
+
"step": 1733000
|
13175 |
+
},
|
13176 |
+
{
|
13177 |
+
"epoch": 0.12,
|
13178 |
+
"learning_rate": 1.744496583592041e-05,
|
13179 |
+
"loss": 0.4408,
|
13180 |
+
"step": 1734000
|
13181 |
+
},
|
13182 |
+
{
|
13183 |
+
"epoch": 0.12,
|
13184 |
+
"learning_rate": 1.7390287219108524e-05,
|
13185 |
+
"loss": 0.4401,
|
13186 |
+
"step": 1735000
|
13187 |
+
},
|
13188 |
+
{
|
13189 |
+
"epoch": 0.12,
|
13190 |
+
"eval_loss": 0.4186602830886841,
|
13191 |
+
"eval_runtime": 80.3977,
|
13192 |
+
"eval_samples_per_second": 79.604,
|
13193 |
+
"eval_steps_per_second": 0.622,
|
13194 |
+
"step": 1735000
|
13195 |
+
},
|
13196 |
+
{
|
13197 |
+
"epoch": 0.12,
|
13198 |
+
"learning_rate": 1.733574449368513e-05,
|
13199 |
+
"loss": 0.4391,
|
13200 |
+
"step": 1736000
|
13201 |
+
},
|
13202 |
+
{
|
13203 |
+
"epoch": 0.12,
|
13204 |
+
"learning_rate": 1.7281392654451555e-05,
|
13205 |
+
"loss": 0.4401,
|
13206 |
+
"step": 1737000
|
13207 |
+
},
|
13208 |
+
{
|
13209 |
+
"epoch": 0.12,
|
13210 |
+
"learning_rate": 1.7227339997768454e-05,
|
13211 |
+
"loss": 0.4405,
|
13212 |
+
"step": 1738000
|
13213 |
+
},
|
13214 |
+
{
|
13215 |
+
"epoch": 0.12,
|
13216 |
+
"learning_rate": 1.7173370044430122e-05,
|
13217 |
+
"loss": 0.439,
|
13218 |
+
"step": 1739000
|
13219 |
+
},
|
13220 |
+
{
|
13221 |
+
"epoch": 0.12,
|
13222 |
+
"learning_rate": 1.7119591471902336e-05,
|
13223 |
+
"loss": 0.4397,
|
13224 |
+
"step": 1740000
|
13225 |
+
},
|
13226 |
+
{
|
13227 |
+
"epoch": 0.12,
|
13228 |
+
"eval_loss": 0.41898131370544434,
|
13229 |
+
"eval_runtime": 77.8556,
|
13230 |
+
"eval_samples_per_second": 82.203,
|
13231 |
+
"eval_steps_per_second": 0.642,
|
13232 |
+
"step": 1740000
|
13233 |
+
},
|
13234 |
+
{
|
13235 |
+
"epoch": 0.12,
|
13236 |
+
"learning_rate": 1.7066004444003927e-05,
|
13237 |
+
"loss": 0.4388,
|
13238 |
+
"step": 1741000
|
13239 |
+
},
|
13240 |
+
{
|
13241 |
+
"epoch": 0.12,
|
13242 |
+
"learning_rate": 1.7012609123970294e-05,
|
13243 |
+
"loss": 0.4388,
|
13244 |
+
"step": 1742000
|
13245 |
+
},
|
13246 |
+
{
|
13247 |
+
"epoch": 0.12,
|
13248 |
+
"learning_rate": 1.6959405674452816e-05,
|
13249 |
+
"loss": 0.4393,
|
13250 |
+
"step": 1743000
|
13251 |
+
},
|
13252 |
+
{
|
13253 |
+
"epoch": 0.12,
|
13254 |
+
"learning_rate": 1.6906447172961322e-05,
|
13255 |
+
"loss": 0.4386,
|
13256 |
+
"step": 1744000
|
13257 |
+
},
|
13258 |
+
{
|
13259 |
+
"epoch": 0.12,
|
13260 |
+
"learning_rate": 1.6853627757817506e-05,
|
13261 |
+
"loss": 0.4379,
|
13262 |
+
"step": 1745000
|
13263 |
+
},
|
13264 |
+
{
|
13265 |
+
"epoch": 0.12,
|
13266 |
+
"eval_loss": 0.41874217987060547,
|
13267 |
+
"eval_runtime": 79.3439,
|
13268 |
+
"eval_samples_per_second": 80.662,
|
13269 |
+
"eval_steps_per_second": 0.63,
|
13270 |
+
"step": 1745000
|
13271 |
+
},
|
13272 |
+
{
|
13273 |
+
"epoch": 0.12,
|
13274 |
+
"learning_rate": 1.6801053228400387e-05,
|
13275 |
+
"loss": 0.4407,
|
13276 |
+
"step": 1746000
|
13277 |
+
},
|
13278 |
+
{
|
13279 |
+
"epoch": 0.12,
|
13280 |
+
"learning_rate": 1.6748618490574697e-05,
|
13281 |
+
"loss": 0.4398,
|
13282 |
+
"step": 1747000
|
13283 |
+
},
|
13284 |
+
{
|
13285 |
+
"epoch": 0.12,
|
13286 |
+
"learning_rate": 1.669637642742642e-05,
|
13287 |
+
"loss": 0.4385,
|
13288 |
+
"step": 1748000
|
13289 |
+
},
|
13290 |
+
{
|
13291 |
+
"epoch": 0.12,
|
13292 |
+
"learning_rate": 1.6644327198093962e-05,
|
13293 |
+
"loss": 0.4376,
|
13294 |
+
"step": 1749000
|
13295 |
+
},
|
13296 |
+
{
|
13297 |
+
"epoch": 0.12,
|
13298 |
+
"learning_rate": 1.6592522720912954e-05,
|
13299 |
+
"loss": 0.4381,
|
13300 |
+
"step": 1750000
|
13301 |
+
},
|
13302 |
+
{
|
13303 |
+
"epoch": 0.12,
|
13304 |
+
"eval_loss": 0.42223626375198364,
|
13305 |
+
"eval_runtime": 79.3599,
|
13306 |
+
"eval_samples_per_second": 80.645,
|
13307 |
+
"eval_steps_per_second": 0.63,
|
13308 |
+
"step": 1750000
|
13309 |
}
|
13310 |
],
|
13311 |
"max_steps": 2000000,
|
13312 |
"num_train_epochs": 9223372036854775807,
|
13313 |
+
"total_flos": 1.533443229351936e+22,
|
13314 |
"trial_name": null,
|
13315 |
"trial_params": null
|
13316 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d4b2f64ee4b8a3f1cf3d86fb133d82c77bc0f7052c00d93cb35fb4180acc8509
|
3 |
size 449471589
|