kthumar commited on
Commit
24eebbf
·
1 Parent(s): 9e7e52b

Training in progress, step 20000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a36b8300d1efed86c6cc8fb68ca4b8652d3bf07567b89b6c33af4b5f60f5304
3
  size 768843213
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec7b974441388e888eea61e7fc83f2d3e64adf3ed37f00d4fb6d4a12ace19f6a
3
  size 768843213
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:667fc042b1654ba63be98142339f82e2d9ced5a682e237a00651e87d9529cdde
3
  size 384848389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cd8a34ddf7e38c5080236a7687e8e63c25b944dd9f3f12d8b4325f57006b1cd
3
  size 384848389
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04fd57acfc1796a4a73b883b65812353a1d9ef39b1a9eca84abd70bc2800b465
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1a48af60df70a0fc5925076ff66d31b72fa2e72742c28160990b6bda9ee1664
3
+ size 14567
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7edce77d6edc3d0cefec8d66d191dbe596742f200664a33c2d45656329643a47
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b0242c88e90c1ded6be60df7d6726df86d88437c186b98d9835e9f7ca644d21
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52f07aaae1ee1a8d1345b09c237d52be9808364382f9ab7825d7d6baa44aac78
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f185139787c577b5818236290c3f10307b8aa66ac5cf8699f48ca0065a085bfb
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8932559178204555,
5
- "global_step": 10000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -136,11 +136,141 @@
136
  "eval_samples_per_second": 13.043,
137
  "eval_steps_per_second": 0.816,
138
  "step": 10000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  }
140
  ],
141
  "max_steps": 55975,
142
  "num_train_epochs": 5,
143
- "total_flos": 1.084747677696e+16,
144
  "trial_name": null,
145
  "trial_params": null
146
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.786511835640911,
5
+ "global_step": 20000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
136
  "eval_samples_per_second": 13.043,
137
  "eval_steps_per_second": 0.816,
138
  "step": 10000
139
+ },
140
+ {
141
+ "epoch": 0.94,
142
+ "learning_rate": 8.126328955597249e-05,
143
+ "loss": 0.9619,
144
+ "step": 10500
145
+ },
146
+ {
147
+ "epoch": 0.98,
148
+ "learning_rate": 8.036987402841062e-05,
149
+ "loss": 0.9549,
150
+ "step": 11000
151
+ },
152
+ {
153
+ "epoch": 1.03,
154
+ "learning_rate": 7.947645850084875e-05,
155
+ "loss": 0.9069,
156
+ "step": 11500
157
+ },
158
+ {
159
+ "epoch": 1.07,
160
+ "learning_rate": 7.858304297328688e-05,
161
+ "loss": 0.8864,
162
+ "step": 12000
163
+ },
164
+ {
165
+ "epoch": 1.12,
166
+ "learning_rate": 7.768962744572501e-05,
167
+ "loss": 0.8692,
168
+ "step": 12500
169
+ },
170
+ {
171
+ "epoch": 1.16,
172
+ "learning_rate": 7.679799874921827e-05,
173
+ "loss": 0.8603,
174
+ "step": 13000
175
+ },
176
+ {
177
+ "epoch": 1.21,
178
+ "learning_rate": 7.59045832216564e-05,
179
+ "loss": 0.8496,
180
+ "step": 13500
181
+ },
182
+ {
183
+ "epoch": 1.25,
184
+ "learning_rate": 7.501116769409454e-05,
185
+ "loss": 0.8412,
186
+ "step": 14000
187
+ },
188
+ {
189
+ "epoch": 1.3,
190
+ "learning_rate": 7.411775216653267e-05,
191
+ "loss": 0.8436,
192
+ "step": 14500
193
+ },
194
+ {
195
+ "epoch": 1.34,
196
+ "learning_rate": 7.32243366389708e-05,
197
+ "loss": 0.8297,
198
+ "step": 15000
199
+ },
200
+ {
201
+ "epoch": 1.38,
202
+ "learning_rate": 7.233092111140893e-05,
203
+ "loss": 0.811,
204
+ "step": 15500
205
+ },
206
+ {
207
+ "epoch": 1.43,
208
+ "learning_rate": 7.143750558384706e-05,
209
+ "loss": 0.8057,
210
+ "step": 16000
211
+ },
212
+ {
213
+ "epoch": 1.47,
214
+ "learning_rate": 7.05458768873403e-05,
215
+ "loss": 0.8113,
216
+ "step": 16500
217
+ },
218
+ {
219
+ "epoch": 1.52,
220
+ "learning_rate": 6.965246135977843e-05,
221
+ "loss": 0.8007,
222
+ "step": 17000
223
+ },
224
+ {
225
+ "epoch": 1.56,
226
+ "learning_rate": 6.875904583221656e-05,
227
+ "loss": 0.7831,
228
+ "step": 17500
229
+ },
230
+ {
231
+ "epoch": 1.61,
232
+ "learning_rate": 6.786563030465469e-05,
233
+ "loss": 0.7916,
234
+ "step": 18000
235
+ },
236
+ {
237
+ "epoch": 1.65,
238
+ "learning_rate": 6.697221477709283e-05,
239
+ "loss": 0.7687,
240
+ "step": 18500
241
+ },
242
+ {
243
+ "epoch": 1.7,
244
+ "learning_rate": 6.607879924953097e-05,
245
+ "loss": 0.7655,
246
+ "step": 19000
247
+ },
248
+ {
249
+ "epoch": 1.74,
250
+ "learning_rate": 6.51853837219691e-05,
251
+ "loss": 0.7576,
252
+ "step": 19500
253
+ },
254
+ {
255
+ "epoch": 1.79,
256
+ "learning_rate": 6.429196819440723e-05,
257
+ "loss": 0.7576,
258
+ "step": 20000
259
+ },
260
+ {
261
+ "epoch": 1.79,
262
+ "eval_bleu": 22.8022,
263
+ "eval_gen_len": 43.8515,
264
+ "eval_loss": 0.833698570728302,
265
+ "eval_runtime": 578.6578,
266
+ "eval_samples_per_second": 15.432,
267
+ "eval_steps_per_second": 0.966,
268
+ "step": 20000
269
  }
270
  ],
271
  "max_steps": 55975,
272
  "num_train_epochs": 5,
273
+ "total_flos": 2.1694343383351296e+16,
274
  "trial_name": null,
275
  "trial_params": null
276
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:667fc042b1654ba63be98142339f82e2d9ced5a682e237a00651e87d9529cdde
3
  size 384848389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cd8a34ddf7e38c5080236a7687e8e63c25b944dd9f3f12d8b4325f57006b1cd
3
  size 384848389
runs/Apr02_22-09-40_2ea1649bbc44/events.out.tfevents.1680473399.2ea1649bbc44.981.2 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2443cae59216eaf5f4a0e87e066b8f9a5fccbfa06c2217efd71f4a53e8022ca
3
- size 7685
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8aee400640f7abb63237ec03c78a54989a6e18f43aa762814fde4984193e3b0
3
+ size 11226