pearsonkyle
commited on
Commit
•
3ddee7e
1
Parent(s):
9eeba41
new model
Browse files- README.md +1 -1
- optimizer.pt +1 -1
- pytorch_model.bin +1 -1
- scheduler.pt +1 -1
- trainer_state.json +423 -3
README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
# Exo-Machina
|
2 |
A deep language model, GPT-2, is trained on scientific manuscripts from NASA's Astrophysical Data System pertaining to extrasolar planets and the references therein. This pilot study uses the abstracts of each article as training data in order to explore correlations in scientific literature from a language perspective. A language model is a mathematical representation for an algorithm used to generate sequences in the same way a human would to form sentances. Each word or letter in a sentance is encoded to a numerical value (e.g. using word2vec) and is appended to a list forming sequences that represent up to a paragraph worth of text. The sequences are fed into the [GPT-2](https://openai.com/blog/better-language-models/) 117M model and trained for 500,000 steps with fine tuning. After training, the language model is used to generate new text from scratch and from user input.
|
3 |
|
4 |
-
- ### [Browse
|
5 |
|
6 |
- ### [Train a model on Google Colab](https://colab.research.google.com/drive/1Pur0rFi5YVdn7axYRacXWFMic4NxRexV?usp=sharing)
|
7 |
|
|
|
1 |
# Exo-Machina
|
2 |
A deep language model, GPT-2, is trained on scientific manuscripts from NASA's Astrophysical Data System pertaining to extrasolar planets and the references therein. This pilot study uses the abstracts of each article as training data in order to explore correlations in scientific literature from a language perspective. A language model is a mathematical representation for an algorithm used to generate sequences in the same way a human would to form sentances. Each word or letter in a sentance is encoded to a numerical value (e.g. using word2vec) and is appended to a list forming sequences that represent up to a paragraph worth of text. The sequences are fed into the [GPT-2](https://openai.com/blog/better-language-models/) 117M model and trained for 500,000 steps with fine tuning. After training, the language model is used to generate new text from scratch and from user input.
|
3 |
|
4 |
+
- ### [Browse samples](https://pearsonkyle.github.io/Exo-Machina/)
|
5 |
|
6 |
- ### [Train a model on Google Colab](https://colab.research.google.com/drive/1Pur0rFi5YVdn7axYRacXWFMic4NxRexV?usp=sharing)
|
7 |
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 995610991
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3c30d157af2f57cabc3a5a2c810287a09033fbf373612568cef620f66f82e0f8
|
3 |
size 995610991
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 510407951
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c305cbf33d44d65fd5ea0fb1a4d8c4aaeae0fd2dc88eb7ce4ffb2c5e954ecb93
|
3 |
size 510407951
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c6149ede9fc96f56bbd7ff6a474936250fd17b0a96c98ce43da847600b309d96
|
3 |
size 623
|
trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -2016,11 +2016,431 @@
|
|
2016 |
"learning_rate": 1.8318517117457917e-05,
|
2017 |
"loss": 1.382125,
|
2018 |
"step": 335000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2019 |
}
|
2020 |
],
|
2021 |
"max_steps": 528700,
|
2022 |
"num_train_epochs": 100,
|
2023 |
-
"total_flos":
|
2024 |
"trial_name": null,
|
2025 |
"trial_params": null
|
2026 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 76.60298846226594,
|
5 |
+
"global_step": 405000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
2016 |
"learning_rate": 1.8318517117457917e-05,
|
2017 |
"loss": 1.382125,
|
2018 |
"step": 335000
|
2019 |
+
},
|
2020 |
+
{
|
2021 |
+
"epoch": 63.55210894647248,
|
2022 |
+
"learning_rate": 1.8223945526763762e-05,
|
2023 |
+
"loss": 1.3925,
|
2024 |
+
"step": 336000
|
2025 |
+
},
|
2026 |
+
{
|
2027 |
+
"epoch": 63.74125212786079,
|
2028 |
+
"learning_rate": 1.8129373936069604e-05,
|
2029 |
+
"loss": 1.3989375,
|
2030 |
+
"step": 337000
|
2031 |
+
},
|
2032 |
+
{
|
2033 |
+
"epoch": 63.9303953092491,
|
2034 |
+
"learning_rate": 1.8034802345375452e-05,
|
2035 |
+
"loss": 1.4040625,
|
2036 |
+
"step": 338000
|
2037 |
+
},
|
2038 |
+
{
|
2039 |
+
"epoch": 64.11953849063741,
|
2040 |
+
"learning_rate": 1.7940230754681294e-05,
|
2041 |
+
"loss": 1.3838125,
|
2042 |
+
"step": 339000
|
2043 |
+
},
|
2044 |
+
{
|
2045 |
+
"epoch": 64.30868167202573,
|
2046 |
+
"learning_rate": 1.784565916398714e-05,
|
2047 |
+
"loss": 1.3745625,
|
2048 |
+
"step": 340000
|
2049 |
+
},
|
2050 |
+
{
|
2051 |
+
"epoch": 64.49782485341403,
|
2052 |
+
"learning_rate": 1.7751087573292984e-05,
|
2053 |
+
"loss": 1.381875,
|
2054 |
+
"step": 341000
|
2055 |
+
},
|
2056 |
+
{
|
2057 |
+
"epoch": 64.68696803480235,
|
2058 |
+
"learning_rate": 1.765651598259883e-05,
|
2059 |
+
"loss": 1.3859375,
|
2060 |
+
"step": 342000
|
2061 |
+
},
|
2062 |
+
{
|
2063 |
+
"epoch": 64.87611121619065,
|
2064 |
+
"learning_rate": 1.756194439190467e-05,
|
2065 |
+
"loss": 1.4001875,
|
2066 |
+
"step": 343000
|
2067 |
+
},
|
2068 |
+
{
|
2069 |
+
"epoch": 65.06525439757897,
|
2070 |
+
"learning_rate": 1.7467372801210516e-05,
|
2071 |
+
"loss": 1.387,
|
2072 |
+
"step": 344000
|
2073 |
+
},
|
2074 |
+
{
|
2075 |
+
"epoch": 65.25439757896727,
|
2076 |
+
"learning_rate": 1.737280121051636e-05,
|
2077 |
+
"loss": 1.3645625,
|
2078 |
+
"step": 345000
|
2079 |
+
},
|
2080 |
+
{
|
2081 |
+
"epoch": 65.44354076035559,
|
2082 |
+
"learning_rate": 1.7278229619822206e-05,
|
2083 |
+
"loss": 1.373375,
|
2084 |
+
"step": 346000
|
2085 |
+
},
|
2086 |
+
{
|
2087 |
+
"epoch": 65.6326839417439,
|
2088 |
+
"learning_rate": 1.7183658029128048e-05,
|
2089 |
+
"loss": 1.384,
|
2090 |
+
"step": 347000
|
2091 |
+
},
|
2092 |
+
{
|
2093 |
+
"epoch": 65.82182712313221,
|
2094 |
+
"learning_rate": 1.7089086438433897e-05,
|
2095 |
+
"loss": 1.386875,
|
2096 |
+
"step": 348000
|
2097 |
+
},
|
2098 |
+
{
|
2099 |
+
"epoch": 66.01097030452053,
|
2100 |
+
"learning_rate": 1.6994514847739738e-05,
|
2101 |
+
"loss": 1.3924375,
|
2102 |
+
"step": 349000
|
2103 |
+
},
|
2104 |
+
{
|
2105 |
+
"epoch": 66.20011348590883,
|
2106 |
+
"learning_rate": 1.6899943257045583e-05,
|
2107 |
+
"loss": 1.357375,
|
2108 |
+
"step": 350000
|
2109 |
+
},
|
2110 |
+
{
|
2111 |
+
"epoch": 66.38925666729715,
|
2112 |
+
"learning_rate": 1.680537166635143e-05,
|
2113 |
+
"loss": 1.3655625,
|
2114 |
+
"step": 351000
|
2115 |
+
},
|
2116 |
+
{
|
2117 |
+
"epoch": 66.57839984868545,
|
2118 |
+
"learning_rate": 1.6710800075657274e-05,
|
2119 |
+
"loss": 1.3670625,
|
2120 |
+
"step": 352000
|
2121 |
+
},
|
2122 |
+
{
|
2123 |
+
"epoch": 66.76754303007377,
|
2124 |
+
"learning_rate": 1.661622848496312e-05,
|
2125 |
+
"loss": 1.3799375,
|
2126 |
+
"step": 353000
|
2127 |
+
},
|
2128 |
+
{
|
2129 |
+
"epoch": 66.95668621146207,
|
2130 |
+
"learning_rate": 1.6521656894268964e-05,
|
2131 |
+
"loss": 1.3880625,
|
2132 |
+
"step": 354000
|
2133 |
+
},
|
2134 |
+
{
|
2135 |
+
"epoch": 67.14582939285039,
|
2136 |
+
"learning_rate": 1.642708530357481e-05,
|
2137 |
+
"loss": 1.3585,
|
2138 |
+
"step": 355000
|
2139 |
+
},
|
2140 |
+
{
|
2141 |
+
"epoch": 67.3349725742387,
|
2142 |
+
"learning_rate": 1.633251371288065e-05,
|
2143 |
+
"loss": 1.3579375,
|
2144 |
+
"step": 356000
|
2145 |
+
},
|
2146 |
+
{
|
2147 |
+
"epoch": 67.52411575562701,
|
2148 |
+
"learning_rate": 1.6237942122186496e-05,
|
2149 |
+
"loss": 1.3641875,
|
2150 |
+
"step": 357000
|
2151 |
+
},
|
2152 |
+
{
|
2153 |
+
"epoch": 67.71325893701533,
|
2154 |
+
"learning_rate": 1.614337053149234e-05,
|
2155 |
+
"loss": 1.3701875,
|
2156 |
+
"step": 358000
|
2157 |
+
},
|
2158 |
+
{
|
2159 |
+
"epoch": 67.90240211840363,
|
2160 |
+
"learning_rate": 1.6048798940798186e-05,
|
2161 |
+
"loss": 1.375875,
|
2162 |
+
"step": 359000
|
2163 |
+
},
|
2164 |
+
{
|
2165 |
+
"epoch": 68.09154529979195,
|
2166 |
+
"learning_rate": 1.5954227350104027e-05,
|
2167 |
+
"loss": 1.359,
|
2168 |
+
"step": 360000
|
2169 |
+
},
|
2170 |
+
{
|
2171 |
+
"epoch": 68.28068848118025,
|
2172 |
+
"learning_rate": 1.5859655759409876e-05,
|
2173 |
+
"loss": 1.3483125,
|
2174 |
+
"step": 361000
|
2175 |
+
},
|
2176 |
+
{
|
2177 |
+
"epoch": 68.46983166256857,
|
2178 |
+
"learning_rate": 1.5765084168715718e-05,
|
2179 |
+
"loss": 1.3565,
|
2180 |
+
"step": 362000
|
2181 |
+
},
|
2182 |
+
{
|
2183 |
+
"epoch": 68.65897484395687,
|
2184 |
+
"learning_rate": 1.5670512578021563e-05,
|
2185 |
+
"loss": 1.362125,
|
2186 |
+
"step": 363000
|
2187 |
+
},
|
2188 |
+
{
|
2189 |
+
"epoch": 68.84811802534519,
|
2190 |
+
"learning_rate": 1.5575940987327408e-05,
|
2191 |
+
"loss": 1.36725,
|
2192 |
+
"step": 364000
|
2193 |
+
},
|
2194 |
+
{
|
2195 |
+
"epoch": 69.0372612067335,
|
2196 |
+
"learning_rate": 1.5481369396633253e-05,
|
2197 |
+
"loss": 1.3636875,
|
2198 |
+
"step": 365000
|
2199 |
+
},
|
2200 |
+
{
|
2201 |
+
"epoch": 69.22640438812181,
|
2202 |
+
"learning_rate": 1.5386797805939095e-05,
|
2203 |
+
"loss": 1.33725,
|
2204 |
+
"step": 366000
|
2205 |
+
},
|
2206 |
+
{
|
2207 |
+
"epoch": 69.41554756951012,
|
2208 |
+
"learning_rate": 1.5292226215244943e-05,
|
2209 |
+
"loss": 1.3505,
|
2210 |
+
"step": 367000
|
2211 |
+
},
|
2212 |
+
{
|
2213 |
+
"epoch": 69.60469075089843,
|
2214 |
+
"learning_rate": 1.5197654624550786e-05,
|
2215 |
+
"loss": 1.3563125,
|
2216 |
+
"step": 368000
|
2217 |
+
},
|
2218 |
+
{
|
2219 |
+
"epoch": 69.79383393228674,
|
2220 |
+
"learning_rate": 1.510308303385663e-05,
|
2221 |
+
"loss": 1.3588125,
|
2222 |
+
"step": 369000
|
2223 |
+
},
|
2224 |
+
{
|
2225 |
+
"epoch": 69.98297711367505,
|
2226 |
+
"learning_rate": 1.5008511443162473e-05,
|
2227 |
+
"loss": 1.369125,
|
2228 |
+
"step": 370000
|
2229 |
+
},
|
2230 |
+
{
|
2231 |
+
"epoch": 70.17212029506337,
|
2232 |
+
"learning_rate": 1.491393985246832e-05,
|
2233 |
+
"loss": 1.3346875,
|
2234 |
+
"step": 371000
|
2235 |
+
},
|
2236 |
+
{
|
2237 |
+
"epoch": 70.36126347645167,
|
2238 |
+
"learning_rate": 1.4819368261774163e-05,
|
2239 |
+
"loss": 1.338,
|
2240 |
+
"step": 372000
|
2241 |
+
},
|
2242 |
+
{
|
2243 |
+
"epoch": 70.55040665783999,
|
2244 |
+
"learning_rate": 1.4724796671080007e-05,
|
2245 |
+
"loss": 1.3443125,
|
2246 |
+
"step": 373000
|
2247 |
+
},
|
2248 |
+
{
|
2249 |
+
"epoch": 70.73954983922829,
|
2250 |
+
"learning_rate": 1.4630225080385854e-05,
|
2251 |
+
"loss": 1.3544375,
|
2252 |
+
"step": 374000
|
2253 |
+
},
|
2254 |
+
{
|
2255 |
+
"epoch": 70.9286930206166,
|
2256 |
+
"learning_rate": 1.4535653489691697e-05,
|
2257 |
+
"loss": 1.3595625,
|
2258 |
+
"step": 375000
|
2259 |
+
},
|
2260 |
+
{
|
2261 |
+
"epoch": 71.11783620200492,
|
2262 |
+
"learning_rate": 1.444108189899754e-05,
|
2263 |
+
"loss": 1.343875,
|
2264 |
+
"step": 376000
|
2265 |
+
},
|
2266 |
+
{
|
2267 |
+
"epoch": 71.30697938339323,
|
2268 |
+
"learning_rate": 1.4346510308303387e-05,
|
2269 |
+
"loss": 1.332625,
|
2270 |
+
"step": 377000
|
2271 |
+
},
|
2272 |
+
{
|
2273 |
+
"epoch": 71.49612256478154,
|
2274 |
+
"learning_rate": 1.425193871760923e-05,
|
2275 |
+
"loss": 1.3395625,
|
2276 |
+
"step": 378000
|
2277 |
+
},
|
2278 |
+
{
|
2279 |
+
"epoch": 71.68526574616985,
|
2280 |
+
"learning_rate": 1.4157367126915074e-05,
|
2281 |
+
"loss": 1.344125,
|
2282 |
+
"step": 379000
|
2283 |
+
},
|
2284 |
+
{
|
2285 |
+
"epoch": 71.87440892755816,
|
2286 |
+
"learning_rate": 1.406279553622092e-05,
|
2287 |
+
"loss": 1.3505625,
|
2288 |
+
"step": 380000
|
2289 |
+
},
|
2290 |
+
{
|
2291 |
+
"epoch": 72.06355210894647,
|
2292 |
+
"learning_rate": 1.3968223945526764e-05,
|
2293 |
+
"loss": 1.3428125,
|
2294 |
+
"step": 381000
|
2295 |
+
},
|
2296 |
+
{
|
2297 |
+
"epoch": 72.25269529033478,
|
2298 |
+
"learning_rate": 1.387365235483261e-05,
|
2299 |
+
"loss": 1.324875,
|
2300 |
+
"step": 382000
|
2301 |
+
},
|
2302 |
+
{
|
2303 |
+
"epoch": 72.44183847172309,
|
2304 |
+
"learning_rate": 1.3779080764138453e-05,
|
2305 |
+
"loss": 1.332125,
|
2306 |
+
"step": 383000
|
2307 |
+
},
|
2308 |
+
{
|
2309 |
+
"epoch": 72.6309816531114,
|
2310 |
+
"learning_rate": 1.36845091734443e-05,
|
2311 |
+
"loss": 1.339,
|
2312 |
+
"step": 384000
|
2313 |
+
},
|
2314 |
+
{
|
2315 |
+
"epoch": 72.82012483449972,
|
2316 |
+
"learning_rate": 1.3589937582750143e-05,
|
2317 |
+
"loss": 1.3420625,
|
2318 |
+
"step": 385000
|
2319 |
+
},
|
2320 |
+
{
|
2321 |
+
"epoch": 73.00926801588803,
|
2322 |
+
"learning_rate": 1.3495365992055986e-05,
|
2323 |
+
"loss": 1.342875,
|
2324 |
+
"step": 386000
|
2325 |
+
},
|
2326 |
+
{
|
2327 |
+
"epoch": 73.19841119727634,
|
2328 |
+
"learning_rate": 1.3400794401361833e-05,
|
2329 |
+
"loss": 1.3198125,
|
2330 |
+
"step": 387000
|
2331 |
+
},
|
2332 |
+
{
|
2333 |
+
"epoch": 73.38755437866465,
|
2334 |
+
"learning_rate": 1.3306222810667676e-05,
|
2335 |
+
"loss": 1.3253125,
|
2336 |
+
"step": 388000
|
2337 |
+
},
|
2338 |
+
{
|
2339 |
+
"epoch": 73.57669756005296,
|
2340 |
+
"learning_rate": 1.321165121997352e-05,
|
2341 |
+
"loss": 1.328875,
|
2342 |
+
"step": 389000
|
2343 |
+
},
|
2344 |
+
{
|
2345 |
+
"epoch": 73.76584074144127,
|
2346 |
+
"learning_rate": 1.3117079629279367e-05,
|
2347 |
+
"loss": 1.335875,
|
2348 |
+
"step": 390000
|
2349 |
+
},
|
2350 |
+
{
|
2351 |
+
"epoch": 73.95498392282958,
|
2352 |
+
"learning_rate": 1.302250803858521e-05,
|
2353 |
+
"loss": 1.3428125,
|
2354 |
+
"step": 391000
|
2355 |
+
},
|
2356 |
+
{
|
2357 |
+
"epoch": 74.14412710421789,
|
2358 |
+
"learning_rate": 1.2927936447891053e-05,
|
2359 |
+
"loss": 1.3216875,
|
2360 |
+
"step": 392000
|
2361 |
+
},
|
2362 |
+
{
|
2363 |
+
"epoch": 74.3332702856062,
|
2364 |
+
"learning_rate": 1.2833364857196897e-05,
|
2365 |
+
"loss": 1.3190625,
|
2366 |
+
"step": 393000
|
2367 |
+
},
|
2368 |
+
{
|
2369 |
+
"epoch": 74.52241346699452,
|
2370 |
+
"learning_rate": 1.2738793266502744e-05,
|
2371 |
+
"loss": 1.3250625,
|
2372 |
+
"step": 394000
|
2373 |
+
},
|
2374 |
+
{
|
2375 |
+
"epoch": 74.71155664838282,
|
2376 |
+
"learning_rate": 1.2644221675808587e-05,
|
2377 |
+
"loss": 1.3285625,
|
2378 |
+
"step": 395000
|
2379 |
+
},
|
2380 |
+
{
|
2381 |
+
"epoch": 74.90069982977114,
|
2382 |
+
"learning_rate": 1.254965008511443e-05,
|
2383 |
+
"loss": 1.3365625,
|
2384 |
+
"step": 396000
|
2385 |
+
},
|
2386 |
+
{
|
2387 |
+
"epoch": 75.08984301115945,
|
2388 |
+
"learning_rate": 1.2455078494420275e-05,
|
2389 |
+
"loss": 1.3183125,
|
2390 |
+
"step": 397000
|
2391 |
+
},
|
2392 |
+
{
|
2393 |
+
"epoch": 75.27898619254776,
|
2394 |
+
"learning_rate": 1.236050690372612e-05,
|
2395 |
+
"loss": 1.309375,
|
2396 |
+
"step": 398000
|
2397 |
+
},
|
2398 |
+
{
|
2399 |
+
"epoch": 75.46812937393607,
|
2400 |
+
"learning_rate": 1.2265935313031966e-05,
|
2401 |
+
"loss": 1.31425,
|
2402 |
+
"step": 399000
|
2403 |
+
},
|
2404 |
+
{
|
2405 |
+
"epoch": 75.65727255532438,
|
2406 |
+
"learning_rate": 1.217136372233781e-05,
|
2407 |
+
"loss": 1.32575,
|
2408 |
+
"step": 400000
|
2409 |
+
},
|
2410 |
+
{
|
2411 |
+
"epoch": 75.84641573671269,
|
2412 |
+
"learning_rate": 1.2076792131643656e-05,
|
2413 |
+
"loss": 1.330375,
|
2414 |
+
"step": 401000
|
2415 |
+
},
|
2416 |
+
{
|
2417 |
+
"epoch": 76.035558918101,
|
2418 |
+
"learning_rate": 1.1982220540949501e-05,
|
2419 |
+
"loss": 1.3319375,
|
2420 |
+
"step": 402000
|
2421 |
+
},
|
2422 |
+
{
|
2423 |
+
"epoch": 76.22470209948932,
|
2424 |
+
"learning_rate": 1.1887648950255344e-05,
|
2425 |
+
"loss": 1.2995625,
|
2426 |
+
"step": 403000
|
2427 |
+
},
|
2428 |
+
{
|
2429 |
+
"epoch": 76.41384528087762,
|
2430 |
+
"learning_rate": 1.179307735956119e-05,
|
2431 |
+
"loss": 1.309,
|
2432 |
+
"step": 404000
|
2433 |
+
},
|
2434 |
+
{
|
2435 |
+
"epoch": 76.60298846226594,
|
2436 |
+
"learning_rate": 1.1698505768867033e-05,
|
2437 |
+
"loss": 1.3225,
|
2438 |
+
"step": 405000
|
2439 |
}
|
2440 |
],
|
2441 |
"max_steps": 528700,
|
2442 |
"num_train_epochs": 100,
|
2443 |
+
"total_flos": 619270336176979968,
|
2444 |
"trial_name": null,
|
2445 |
"trial_params": null
|
2446 |
}
|