GlycerinLOL commited on
Commit
e570d6b
1 Parent(s): b7e6ae0

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +5 -5
  2. train_results.json +5 -5
  3. trainer_state.json +116 -22
all_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 15.99,
3
- "train_loss": 0.8767006197992594,
4
- "train_runtime": 37037.5595,
5
- "train_samples_per_second": 43.199,
6
- "train_steps_per_second": 0.3
7
  }
 
1
  {
2
+ "epoch": 20.0,
3
+ "train_loss": 0.2986434628709249,
4
+ "train_runtime": 16684.611,
5
+ "train_samples_per_second": 119.871,
6
+ "train_steps_per_second": 0.832
7
  }
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 15.99,
3
- "train_loss": 0.8767006197992594,
4
- "train_runtime": 37037.5595,
5
- "train_samples_per_second": 43.199,
6
- "train_steps_per_second": 0.3
7
  }
 
1
  {
2
+ "epoch": 20.0,
3
+ "train_loss": 0.2986434628709249,
4
+ "train_runtime": 16684.611,
5
+ "train_samples_per_second": 119.871,
6
+ "train_steps_per_second": 0.832
7
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 15.992800575953924,
5
  "eval_steps": 500,
6
- "global_step": 11104,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -365,37 +365,131 @@
365
  "step": 11000
366
  },
367
  {
368
- "epoch": 15.99,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
369
  "eval_f1": 0.9134,
370
- "eval_gen_len": 25.96290909090909,
371
- "eval_loss": 1.4468724727630615,
372
  "eval_precision": 0.9133,
373
  "eval_recall": 0.9138,
374
- "eval_rouge1": 0.4939,
375
- "eval_rouge2": 0.2453,
376
- "eval_rougeL": 0.4133,
377
- "eval_rougeLsum": 0.4134,
378
- "eval_runtime": 864.4194,
379
- "eval_samples_per_second": 6.363,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
380
  "eval_steps_per_second": 0.398,
381
- "step": 11104
382
  },
383
  {
384
- "epoch": 15.99,
385
- "step": 11104,
386
- "total_flos": 2.2405705733792072e+18,
387
- "train_loss": 0.8767006197992594,
388
- "train_runtime": 37037.5595,
389
- "train_samples_per_second": 43.199,
390
- "train_steps_per_second": 0.3
391
  }
392
  ],
393
  "logging_steps": 500,
394
- "max_steps": 11104,
395
  "num_input_tokens_seen": 0,
396
- "num_train_epochs": 16,
397
  "save_steps": 500,
398
- "total_flos": 2.2405705733792072e+18,
399
  "train_batch_size": 24,
400
  "trial_name": null,
401
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 19.99640028797696,
5
  "eval_steps": 500,
6
+ "global_step": 13880,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
365
  "step": 11000
366
  },
367
  {
368
+ "epoch": 16.0,
369
+ "eval_f1": 0.9133,
370
+ "eval_gen_len": 26.034545454545455,
371
+ "eval_loss": 1.4474281072616577,
372
+ "eval_precision": 0.9131,
373
+ "eval_recall": 0.9139,
374
+ "eval_rouge1": 0.4942,
375
+ "eval_rouge2": 0.2456,
376
+ "eval_rougeL": 0.4133,
377
+ "eval_rougeLsum": 0.4134,
378
+ "eval_runtime": 875.1275,
379
+ "eval_samples_per_second": 6.285,
380
+ "eval_steps_per_second": 0.393,
381
+ "step": 11104
382
+ },
383
+ {
384
+ "epoch": 16.57,
385
+ "learning_rate": 3.4293948126801158e-06,
386
+ "loss": 1.4441,
387
+ "step": 11500
388
+ },
389
+ {
390
+ "epoch": 17.0,
391
  "eval_f1": 0.9134,
392
+ "eval_gen_len": 25.939090909090908,
393
+ "eval_loss": 1.4446682929992676,
394
  "eval_precision": 0.9133,
395
  "eval_recall": 0.9138,
396
+ "eval_rouge1": 0.4945,
397
+ "eval_rouge2": 0.2457,
398
+ "eval_rougeL": 0.4139,
399
+ "eval_rougeLsum": 0.414,
400
+ "eval_runtime": 853.4658,
401
+ "eval_samples_per_second": 6.444,
402
+ "eval_steps_per_second": 0.403,
403
+ "step": 11799
404
+ },
405
+ {
406
+ "epoch": 17.29,
407
+ "learning_rate": 2.708933717579251e-06,
408
+ "loss": 1.444,
409
+ "step": 12000
410
+ },
411
+ {
412
+ "epoch": 18.0,
413
+ "eval_f1": 0.9135,
414
+ "eval_gen_len": 26.010727272727273,
415
+ "eval_loss": 1.4445807933807373,
416
+ "eval_precision": 0.9133,
417
+ "eval_recall": 0.9141,
418
+ "eval_rouge1": 0.4957,
419
+ "eval_rouge2": 0.2473,
420
+ "eval_rougeL": 0.415,
421
+ "eval_rougeLsum": 0.4151,
422
+ "eval_runtime": 869.7396,
423
+ "eval_samples_per_second": 6.324,
424
+ "eval_steps_per_second": 0.396,
425
+ "step": 12493
426
+ },
427
+ {
428
+ "epoch": 18.01,
429
+ "learning_rate": 1.988472622478386e-06,
430
+ "loss": 1.4378,
431
+ "step": 12500
432
+ },
433
+ {
434
+ "epoch": 18.73,
435
+ "learning_rate": 1.2680115273775217e-06,
436
+ "loss": 1.4375,
437
+ "step": 13000
438
+ },
439
+ {
440
+ "epoch": 19.0,
441
+ "eval_f1": 0.9136,
442
+ "eval_gen_len": 25.88690909090909,
443
+ "eval_loss": 1.4433233737945557,
444
+ "eval_precision": 0.9136,
445
+ "eval_recall": 0.914,
446
+ "eval_rouge1": 0.4961,
447
+ "eval_rouge2": 0.2473,
448
+ "eval_rougeL": 0.4153,
449
+ "eval_rougeLsum": 0.4153,
450
+ "eval_runtime": 854.4011,
451
+ "eval_samples_per_second": 6.437,
452
+ "eval_steps_per_second": 0.403,
453
+ "step": 13188
454
+ },
455
+ {
456
+ "epoch": 19.45,
457
+ "learning_rate": 5.475504322766571e-07,
458
+ "loss": 1.4361,
459
+ "step": 13500
460
+ },
461
+ {
462
+ "epoch": 20.0,
463
+ "eval_f1": 0.9137,
464
+ "eval_gen_len": 25.862909090909092,
465
+ "eval_loss": 1.4432713985443115,
466
+ "eval_precision": 0.9136,
467
+ "eval_recall": 0.914,
468
+ "eval_rouge1": 0.4961,
469
+ "eval_rouge2": 0.2476,
470
+ "eval_rougeL": 0.4155,
471
+ "eval_rougeLsum": 0.4154,
472
+ "eval_runtime": 863.7254,
473
+ "eval_samples_per_second": 6.368,
474
  "eval_steps_per_second": 0.398,
475
+ "step": 13880
476
  },
477
  {
478
+ "epoch": 20.0,
479
+ "step": 13880,
480
+ "total_flos": 2.818047373345161e+18,
481
+ "train_loss": 0.2986434628709249,
482
+ "train_runtime": 16684.611,
483
+ "train_samples_per_second": 119.871,
484
+ "train_steps_per_second": 0.832
485
  }
486
  ],
487
  "logging_steps": 500,
488
+ "max_steps": 13880,
489
  "num_input_tokens_seen": 0,
490
+ "num_train_epochs": 20,
491
  "save_steps": 500,
492
+ "total_flos": 2.818047373345161e+18,
493
  "train_batch_size": 24,
494
  "trial_name": null,
495
  "trial_params": null