polejowska commited on
Commit
5e3a331
1 Parent(s): 81d27ca

End of training

Browse files
runs/Feb26_21-20-11_952abec28ae3/events.out.tfevents.1677446416.952abec28ae3.204.16 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5773efbe8444e97cdb238cd3c6f28fb766aaedacd0dc8d5f54f15d165e5c625
3
- size 6618
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5ff3d76639c247548a4c32b73959bb298a318234473af15b449cf8b7ede5ace
3
+ size 6972
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 35.0,
5
- "global_step": 3290,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -498,18 +498,158 @@
498
  "step": 3290
499
  },
500
  {
501
- "epoch": 35.0,
502
- "step": 3290,
503
- "total_flos": 1.25446763232e+19,
504
- "train_loss": 0.31171516024233,
505
- "train_runtime": 1243.3871,
506
- "train_samples_per_second": 21.112,
507
- "train_steps_per_second": 2.646
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
508
  }
509
  ],
510
- "max_steps": 3290,
511
- "num_train_epochs": 35,
512
- "total_flos": 1.25446763232e+19,
513
  "trial_name": null,
514
  "trial_params": null
515
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 45.0,
5
+ "global_step": 4230,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
498
  "step": 3290
499
  },
500
  {
501
+ "epoch": 36.0,
502
+ "learning_rate": 1e-05,
503
+ "loss": 2.1715,
504
+ "step": 3384
505
+ },
506
+ {
507
+ "epoch": 36.0,
508
+ "eval_loss": 1.7286556959152222,
509
+ "eval_runtime": 37.366,
510
+ "eval_samples_per_second": 5.352,
511
+ "eval_steps_per_second": 0.669,
512
+ "step": 3384
513
+ },
514
+ {
515
+ "epoch": 37.0,
516
+ "learning_rate": 7.5e-06,
517
+ "loss": 2.2125,
518
+ "step": 3478
519
+ },
520
+ {
521
+ "epoch": 37.0,
522
+ "eval_loss": 1.6994493007659912,
523
+ "eval_runtime": 36.8702,
524
+ "eval_samples_per_second": 5.424,
525
+ "eval_steps_per_second": 0.678,
526
+ "step": 3478
527
+ },
528
+ {
529
+ "epoch": 38.0,
530
+ "learning_rate": 5e-06,
531
+ "loss": 2.2032,
532
+ "step": 3572
533
+ },
534
+ {
535
+ "epoch": 38.0,
536
+ "eval_loss": 1.6896188259124756,
537
+ "eval_runtime": 37.5355,
538
+ "eval_samples_per_second": 5.328,
539
+ "eval_steps_per_second": 0.666,
540
+ "step": 3572
541
+ },
542
+ {
543
+ "epoch": 39.0,
544
+ "learning_rate": 2.5e-06,
545
+ "loss": 2.21,
546
+ "step": 3666
547
+ },
548
+ {
549
+ "epoch": 39.0,
550
+ "eval_loss": 1.6792980432510376,
551
+ "eval_runtime": 37.2056,
552
+ "eval_samples_per_second": 5.376,
553
+ "eval_steps_per_second": 0.672,
554
+ "step": 3666
555
+ },
556
+ {
557
+ "epoch": 40.0,
558
+ "learning_rate": 0.0,
559
+ "loss": 2.1837,
560
+ "step": 3760
561
+ },
562
+ {
563
+ "epoch": 40.0,
564
+ "eval_loss": 1.6746587753295898,
565
+ "eval_runtime": 37.0982,
566
+ "eval_samples_per_second": 5.391,
567
+ "eval_steps_per_second": 0.674,
568
+ "step": 3760
569
+ },
570
+ {
571
+ "epoch": 41.0,
572
+ "learning_rate": 8.88888888888889e-06,
573
+ "loss": 2.2136,
574
+ "step": 3854
575
+ },
576
+ {
577
+ "epoch": 41.0,
578
+ "eval_loss": 1.6727588176727295,
579
+ "eval_runtime": 36.6175,
580
+ "eval_samples_per_second": 5.462,
581
+ "eval_steps_per_second": 0.683,
582
+ "step": 3854
583
+ },
584
+ {
585
+ "epoch": 42.0,
586
+ "learning_rate": 6.666666666666667e-06,
587
+ "loss": 2.1825,
588
+ "step": 3948
589
+ },
590
+ {
591
+ "epoch": 42.0,
592
+ "eval_loss": 1.6641244888305664,
593
+ "eval_runtime": 36.4688,
594
+ "eval_samples_per_second": 5.484,
595
+ "eval_steps_per_second": 0.686,
596
+ "step": 3948
597
+ },
598
+ {
599
+ "epoch": 43.0,
600
+ "learning_rate": 4.444444444444445e-06,
601
+ "loss": 2.1419,
602
+ "step": 4042
603
+ },
604
+ {
605
+ "epoch": 43.0,
606
+ "eval_loss": 1.6828693151474,
607
+ "eval_runtime": 36.8137,
608
+ "eval_samples_per_second": 5.433,
609
+ "eval_steps_per_second": 0.679,
610
+ "step": 4042
611
+ },
612
+ {
613
+ "epoch": 44.0,
614
+ "learning_rate": 2.2222222222222225e-06,
615
+ "loss": 2.1695,
616
+ "step": 4136
617
+ },
618
+ {
619
+ "epoch": 44.0,
620
+ "eval_loss": 1.6625133752822876,
621
+ "eval_runtime": 37.3227,
622
+ "eval_samples_per_second": 5.359,
623
+ "eval_steps_per_second": 0.67,
624
+ "step": 4136
625
+ },
626
+ {
627
+ "epoch": 45.0,
628
+ "learning_rate": 0.0,
629
+ "loss": 2.1478,
630
+ "step": 4230
631
+ },
632
+ {
633
+ "epoch": 45.0,
634
+ "eval_loss": 1.667972445487976,
635
+ "eval_runtime": 37.5388,
636
+ "eval_samples_per_second": 5.328,
637
+ "eval_steps_per_second": 0.666,
638
+ "step": 4230
639
+ },
640
+ {
641
+ "epoch": 45.0,
642
+ "step": 4230,
643
+ "total_flos": 1.61288695584e+19,
644
+ "train_loss": 0.2412280865031213,
645
+ "train_runtime": 1234.9371,
646
+ "train_samples_per_second": 27.329,
647
+ "train_steps_per_second": 3.425
648
  }
649
  ],
650
+ "max_steps": 4230,
651
+ "num_train_epochs": 45,
652
+ "total_flos": 1.61288695584e+19,
653
  "trial_name": null,
654
  "trial_params": null
655
  }