kthumar commited on
Commit
0075f5d
·
1 Parent(s): 24eebbf

Training in progress, step 30000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec7b974441388e888eea61e7fc83f2d3e64adf3ed37f00d4fb6d4a12ace19f6a
3
  size 768843213
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4afd9e96e30c55033c81d663e123112d0e4b940b7f531817870755548ffc840
3
  size 768843213
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0cd8a34ddf7e38c5080236a7687e8e63c25b944dd9f3f12d8b4325f57006b1cd
3
  size 384848389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f4a64d6d78c9e8abfcc625bc8b4d39d5db180f7cd080685979599555a6e56ae
3
  size 384848389
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1a48af60df70a0fc5925076ff66d31b72fa2e72742c28160990b6bda9ee1664
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5623ef7e3da407c88d16106413c2f65b14dd4d4f8687ed1b55a66b25ce1d7ead
3
+ size 14503
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b0242c88e90c1ded6be60df7d6726df86d88437c186b98d9835e9f7ca644d21
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76cc3bd636dff666d4435d9a1314227eb29c1052699611f59040a24a461e1e87
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f185139787c577b5818236290c3f10307b8aa66ac5cf8699f48ca0065a085bfb
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a35f16b3d3457db09b7df4283c2d03aee6146b79859cc29b3b18a6fd7d571b9
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.786511835640911,
5
- "global_step": 20000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -266,11 +266,141 @@
266
  "eval_samples_per_second": 15.432,
267
  "eval_steps_per_second": 0.966,
268
  "step": 20000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
  }
270
  ],
271
  "max_steps": 55975,
272
  "num_train_epochs": 5,
273
- "total_flos": 2.1694343383351296e+16,
274
  "trial_name": null,
275
  "trial_params": null
276
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.6797677534613666,
5
+ "global_step": 30000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
266
  "eval_samples_per_second": 15.432,
267
  "eval_steps_per_second": 0.966,
268
  "step": 20000
269
+ },
270
+ {
271
+ "epoch": 1.83,
272
+ "learning_rate": 6.339855266684536e-05,
273
+ "loss": 0.7469,
274
+ "step": 20500
275
+ },
276
+ {
277
+ "epoch": 1.88,
278
+ "learning_rate": 6.250692397033861e-05,
279
+ "loss": 0.7497,
280
+ "step": 21000
281
+ },
282
+ {
283
+ "epoch": 1.92,
284
+ "learning_rate": 6.161350844277674e-05,
285
+ "loss": 0.7361,
286
+ "step": 21500
287
+ },
288
+ {
289
+ "epoch": 1.97,
290
+ "learning_rate": 6.0720092915214864e-05,
291
+ "loss": 0.7278,
292
+ "step": 22000
293
+ },
294
+ {
295
+ "epoch": 2.01,
296
+ "learning_rate": 5.9826677387652995e-05,
297
+ "loss": 0.7232,
298
+ "step": 22500
299
+ },
300
+ {
301
+ "epoch": 2.05,
302
+ "learning_rate": 5.8933261860091126e-05,
303
+ "loss": 0.6626,
304
+ "step": 23000
305
+ },
306
+ {
307
+ "epoch": 2.1,
308
+ "learning_rate": 5.8041633163584383e-05,
309
+ "loss": 0.672,
310
+ "step": 23500
311
+ },
312
+ {
313
+ "epoch": 2.14,
314
+ "learning_rate": 5.7148217636022514e-05,
315
+ "loss": 0.6616,
316
+ "step": 24000
317
+ },
318
+ {
319
+ "epoch": 2.19,
320
+ "learning_rate": 5.6254802108460645e-05,
321
+ "loss": 0.6631,
322
+ "step": 24500
323
+ },
324
+ {
325
+ "epoch": 2.23,
326
+ "learning_rate": 5.5361386580898776e-05,
327
+ "loss": 0.6686,
328
+ "step": 25000
329
+ },
330
+ {
331
+ "epoch": 2.28,
332
+ "learning_rate": 5.446797105333691e-05,
333
+ "loss": 0.6583,
334
+ "step": 25500
335
+ },
336
+ {
337
+ "epoch": 2.32,
338
+ "learning_rate": 5.3576342356830165e-05,
339
+ "loss": 0.6431,
340
+ "step": 26000
341
+ },
342
+ {
343
+ "epoch": 2.37,
344
+ "learning_rate": 5.2682926829268296e-05,
345
+ "loss": 0.6527,
346
+ "step": 26500
347
+ },
348
+ {
349
+ "epoch": 2.41,
350
+ "learning_rate": 5.178951130170643e-05,
351
+ "loss": 0.6501,
352
+ "step": 27000
353
+ },
354
+ {
355
+ "epoch": 2.46,
356
+ "learning_rate": 5.089609577414456e-05,
357
+ "loss": 0.6493,
358
+ "step": 27500
359
+ },
360
+ {
361
+ "epoch": 2.5,
362
+ "learning_rate": 5.000446707763781e-05,
363
+ "loss": 0.6406,
364
+ "step": 28000
365
+ },
366
+ {
367
+ "epoch": 2.55,
368
+ "learning_rate": 4.911105155007594e-05,
369
+ "loss": 0.6358,
370
+ "step": 28500
371
+ },
372
+ {
373
+ "epoch": 2.59,
374
+ "learning_rate": 4.821763602251407e-05,
375
+ "loss": 0.649,
376
+ "step": 29000
377
+ },
378
+ {
379
+ "epoch": 2.64,
380
+ "learning_rate": 4.73242204949522e-05,
381
+ "loss": 0.6325,
382
+ "step": 29500
383
+ },
384
+ {
385
+ "epoch": 2.68,
386
+ "learning_rate": 4.643080496739033e-05,
387
+ "loss": 0.6385,
388
+ "step": 30000
389
+ },
390
+ {
391
+ "epoch": 2.68,
392
+ "eval_bleu": 25.9878,
393
+ "eval_gen_len": 43.5751,
394
+ "eval_loss": 0.7514618635177612,
395
+ "eval_runtime": 558.5823,
396
+ "eval_samples_per_second": 15.987,
397
+ "eval_steps_per_second": 1.001,
398
+ "step": 30000
399
  }
400
  ],
401
  "max_steps": 55975,
402
  "num_train_epochs": 5,
403
+ "total_flos": 3.254120998974259e+16,
404
  "trial_name": null,
405
  "trial_params": null
406
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0cd8a34ddf7e38c5080236a7687e8e63c25b944dd9f3f12d8b4325f57006b1cd
3
  size 384848389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f4a64d6d78c9e8abfcc625bc8b4d39d5db180f7cd080685979599555a6e56ae
3
  size 384848389
runs/Apr02_22-09-40_2ea1649bbc44/events.out.tfevents.1680473399.2ea1649bbc44.981.2 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8aee400640f7abb63237ec03c78a54989a6e18f43aa762814fde4984193e3b0
3
- size 11226
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48e3d31ad4549eee77407d5cc929647b4822156841d65e92f5b1d81339bf4a79
3
+ size 14803