Bingsu commited on
Commit
1910f3a
1 Parent(s): f1f4522

Training in progress, step 80000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3eb55ec1aacbfeebe119e515b71d01fac6a80c1dc916333ed52358ff9464626e
3
  size 100172997
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61757e92d02b06dda1da003da57fa0b18bc1cc2b413fc514841b017d0d63c3c8
3
  size 100172997
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e319b3e22d458ba27ff2a2eb8537fae27cd2f8bcba6cd5bc802fb4266dab1c01
3
  size 146774203
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81a90871ae24751a566fb99821bee5e29d062c303c164fcd6aeac08948cab240
3
  size 146774203
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b63f7cf635c5cd7e0a6a99be90b9c9040bc4b142713e70d6ed808fdd72cc930
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7978600af4170dc4592efcab1d33d1582d45b26dc998a10a280a81e23e422deb
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5cce12b461956f7f82f9c60078f067ba7f5af96b281245752bc9e8d8eb78bb3a
3
  size 246899880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7331ea7e49edb5d8c1485934eca953ca913987924fdd220c26d2fc895357dc9
3
  size 246899880
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.30081650193382037,
5
- "global_step": 70000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2106,11 +2106,311 @@
2106
  "learning_rate": 0.00046983100340983056,
2107
  "loss": 3.2461,
2108
  "step": 70000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2109
  }
2110
  ],
2111
  "max_steps": 500000,
2112
  "num_train_epochs": 3,
2113
- "total_flos": 1.1156809875456e+17,
2114
  "trial_name": null,
2115
  "trial_params": null
2116
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.3437902879243661,
5
+ "global_step": 80000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2106
  "learning_rate": 0.00046983100340983056,
2107
  "loss": 3.2461,
2108
  "step": 70000
2109
+ },
2110
+ {
2111
+ "epoch": 0.3,
2112
+ "learning_rate": 0.00047183105799939297,
2113
+ "loss": 3.2495,
2114
+ "step": 70200
2115
+ },
2116
+ {
2117
+ "epoch": 0.3,
2118
+ "learning_rate": 0.0004738319577694809,
2119
+ "loss": 3.2325,
2120
+ "step": 70400
2121
+ },
2122
+ {
2123
+ "epoch": 0.3,
2124
+ "learning_rate": 0.0004758336676119636,
2125
+ "loss": 3.2335,
2126
+ "step": 70600
2127
+ },
2128
+ {
2129
+ "epoch": 0.3,
2130
+ "learning_rate": 0.0004778361524044967,
2131
+ "loss": 3.2499,
2132
+ "step": 70800
2133
+ },
2134
+ {
2135
+ "epoch": 0.31,
2136
+ "learning_rate": 0.00047983937701113794,
2137
+ "loss": 3.2323,
2138
+ "step": 71000
2139
+ },
2140
+ {
2141
+ "epoch": 0.31,
2142
+ "learning_rate": 0.00048184330628296484,
2143
+ "loss": 3.221,
2144
+ "step": 71200
2145
+ },
2146
+ {
2147
+ "epoch": 0.31,
2148
+ "learning_rate": 0.0004838479050586898,
2149
+ "loss": 3.2344,
2150
+ "step": 71400
2151
+ },
2152
+ {
2153
+ "epoch": 0.31,
2154
+ "learning_rate": 0.0004858531381652792,
2155
+ "loss": 3.241,
2156
+ "step": 71600
2157
+ },
2158
+ {
2159
+ "epoch": 0.31,
2160
+ "learning_rate": 0.0004878589704185682,
2161
+ "loss": 3.2434,
2162
+ "step": 71800
2163
+ },
2164
+ {
2165
+ "epoch": 0.31,
2166
+ "learning_rate": 0.00048986536662388,
2167
+ "loss": 3.2375,
2168
+ "step": 72000
2169
+ },
2170
+ {
2171
+ "epoch": 0.31,
2172
+ "learning_rate": 0.0004918722915766426,
2173
+ "loss": 3.2237,
2174
+ "step": 72200
2175
+ },
2176
+ {
2177
+ "epoch": 0.31,
2178
+ "learning_rate": 0.0004938797100630057,
2179
+ "loss": 3.2665,
2180
+ "step": 72400
2181
+ },
2182
+ {
2183
+ "epoch": 0.31,
2184
+ "learning_rate": 0.0004958875868604602,
2185
+ "loss": 3.218,
2186
+ "step": 72600
2187
+ },
2188
+ {
2189
+ "epoch": 0.31,
2190
+ "learning_rate": 0.0004978958867384551,
2191
+ "loss": 3.2474,
2192
+ "step": 72800
2193
+ },
2194
+ {
2195
+ "epoch": 0.31,
2196
+ "learning_rate": 0.000499904574459016,
2197
+ "loss": 3.2423,
2198
+ "step": 73000
2199
+ },
2200
+ {
2201
+ "epoch": 0.31,
2202
+ "learning_rate": 0.000501913614777363,
2203
+ "loss": 3.2288,
2204
+ "step": 73200
2205
+ },
2206
+ {
2207
+ "epoch": 0.32,
2208
+ "learning_rate": 0.0005039229724425303,
2209
+ "loss": 3.2148,
2210
+ "step": 73400
2211
+ },
2212
+ {
2213
+ "epoch": 0.32,
2214
+ "learning_rate": 0.0005059326121979831,
2215
+ "loss": 3.2383,
2216
+ "step": 73600
2217
+ },
2218
+ {
2219
+ "epoch": 0.32,
2220
+ "learning_rate": 0.0005079424987822374,
2221
+ "loss": 3.236,
2222
+ "step": 73800
2223
+ },
2224
+ {
2225
+ "epoch": 0.32,
2226
+ "learning_rate": 0.0005099525969294778,
2227
+ "loss": 3.2231,
2228
+ "step": 74000
2229
+ },
2230
+ {
2231
+ "epoch": 0.32,
2232
+ "learning_rate": 0.0005119628713701773,
2233
+ "loss": 3.2215,
2234
+ "step": 74200
2235
+ },
2236
+ {
2237
+ "epoch": 0.32,
2238
+ "learning_rate": 0.0005139732868317155,
2239
+ "loss": 3.227,
2240
+ "step": 74400
2241
+ },
2242
+ {
2243
+ "epoch": 0.32,
2244
+ "learning_rate": 0.0005159838080389977,
2245
+ "loss": 3.2006,
2246
+ "step": 74600
2247
+ },
2248
+ {
2249
+ "epoch": 0.32,
2250
+ "learning_rate": 0.0005179943997150736,
2251
+ "loss": 3.2426,
2252
+ "step": 74800
2253
+ },
2254
+ {
2255
+ "epoch": 0.32,
2256
+ "learning_rate": 0.0005200050265817561,
2257
+ "loss": 3.2194,
2258
+ "step": 75000
2259
+ },
2260
+ {
2261
+ "epoch": 0.32,
2262
+ "learning_rate": 0.0005220156533602416,
2263
+ "loss": 3.223,
2264
+ "step": 75200
2265
+ },
2266
+ {
2267
+ "epoch": 0.32,
2268
+ "learning_rate": 0.0005240262447717271,
2269
+ "loss": 3.224,
2270
+ "step": 75400
2271
+ },
2272
+ {
2273
+ "epoch": 0.32,
2274
+ "learning_rate": 0.0005260367655380307,
2275
+ "loss": 3.2138,
2276
+ "step": 75600
2277
+ },
2278
+ {
2279
+ "epoch": 0.33,
2280
+ "learning_rate": 0.0005280471803822096,
2281
+ "loss": 3.2123,
2282
+ "step": 75800
2283
+ },
2284
+ {
2285
+ "epoch": 0.33,
2286
+ "learning_rate": 0.0005300574540291803,
2287
+ "loss": 3.2208,
2288
+ "step": 76000
2289
+ },
2290
+ {
2291
+ "epoch": 0.33,
2292
+ "learning_rate": 0.000532067551206336,
2293
+ "loss": 3.2199,
2294
+ "step": 76200
2295
+ },
2296
+ {
2297
+ "epoch": 0.33,
2298
+ "learning_rate": 0.0005340774366441665,
2299
+ "loss": 3.2367,
2300
+ "step": 76400
2301
+ },
2302
+ {
2303
+ "epoch": 0.33,
2304
+ "learning_rate": 0.0005360870750768769,
2305
+ "loss": 3.21,
2306
+ "step": 76600
2307
+ },
2308
+ {
2309
+ "epoch": 0.33,
2310
+ "learning_rate": 0.0005380964312430063,
2311
+ "loss": 3.2221,
2312
+ "step": 76800
2313
+ },
2314
+ {
2315
+ "epoch": 0.33,
2316
+ "learning_rate": 0.0005401054698860466,
2317
+ "loss": 3.2021,
2318
+ "step": 77000
2319
+ },
2320
+ {
2321
+ "epoch": 0.33,
2322
+ "learning_rate": 0.0005421141557550603,
2323
+ "loss": 3.2352,
2324
+ "step": 77200
2325
+ },
2326
+ {
2327
+ "epoch": 0.33,
2328
+ "learning_rate": 0.0005441224536053012,
2329
+ "loss": 3.1962,
2330
+ "step": 77400
2331
+ },
2332
+ {
2333
+ "epoch": 0.33,
2334
+ "learning_rate": 0.0005461303281988298,
2335
+ "loss": 3.2127,
2336
+ "step": 77600
2337
+ },
2338
+ {
2339
+ "epoch": 0.33,
2340
+ "learning_rate": 0.000548137744305134,
2341
+ "loss": 3.2307,
2342
+ "step": 77800
2343
+ },
2344
+ {
2345
+ "epoch": 0.34,
2346
+ "learning_rate": 0.0005501446667017461,
2347
+ "loss": 3.2053,
2348
+ "step": 78000
2349
+ },
2350
+ {
2351
+ "epoch": 0.34,
2352
+ "learning_rate": 0.0005521510601748613,
2353
+ "loss": 3.1936,
2354
+ "step": 78200
2355
+ },
2356
+ {
2357
+ "epoch": 0.34,
2358
+ "learning_rate": 0.0005541568895199552,
2359
+ "loss": 3.2019,
2360
+ "step": 78400
2361
+ },
2362
+ {
2363
+ "epoch": 0.34,
2364
+ "learning_rate": 0.0005561621195424016,
2365
+ "loss": 3.203,
2366
+ "step": 78600
2367
+ },
2368
+ {
2369
+ "epoch": 0.34,
2370
+ "learning_rate": 0.0005581667150580907,
2371
+ "loss": 3.2125,
2372
+ "step": 78800
2373
+ },
2374
+ {
2375
+ "epoch": 0.34,
2376
+ "learning_rate": 0.0005601706408940451,
2377
+ "loss": 3.2205,
2378
+ "step": 79000
2379
+ },
2380
+ {
2381
+ "epoch": 0.34,
2382
+ "learning_rate": 0.0005621738618890382,
2383
+ "loss": 3.2215,
2384
+ "step": 79200
2385
+ },
2386
+ {
2387
+ "epoch": 0.34,
2388
+ "learning_rate": 0.0005641763428942106,
2389
+ "loss": 3.2052,
2390
+ "step": 79400
2391
+ },
2392
+ {
2393
+ "epoch": 0.34,
2394
+ "learning_rate": 0.0005661780487736866,
2395
+ "loss": 3.2249,
2396
+ "step": 79600
2397
+ },
2398
+ {
2399
+ "epoch": 0.34,
2400
+ "learning_rate": 0.0005681789444051913,
2401
+ "loss": 3.1952,
2402
+ "step": 79800
2403
+ },
2404
+ {
2405
+ "epoch": 0.34,
2406
+ "learning_rate": 0.0005701789946806666,
2407
+ "loss": 3.1995,
2408
+ "step": 80000
2409
  }
2410
  ],
2411
  "max_steps": 500000,
2412
  "num_train_epochs": 3,
2413
+ "total_flos": 1.2750639857664e+17,
2414
  "trial_name": null,
2415
  "trial_params": null
2416
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e319b3e22d458ba27ff2a2eb8537fae27cd2f8bcba6cd5bc802fb4266dab1c01
3
  size 146774203
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81a90871ae24751a566fb99821bee5e29d062c303c164fcd6aeac08948cab240
3
  size 146774203