polejowska commited on
Commit
d7d58c1
1 Parent(s): 43a197a

End of training

Browse files
Files changed (3) hide show
  1. README.md +104 -54
  2. model.safetensors +1 -1
  3. trainer_state.json +761 -61
README.md CHANGED
@@ -15,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model is a fine-tuned version of [microsoft/conditional-detr-resnet-50](https://huggingface.co/microsoft/conditional-detr-resnet-50) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 2.8303
19
 
20
  ## Model description
21
 
@@ -40,63 +40,113 @@ The following hyperparameters were used during training:
40
  - seed: 42
41
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
  - lr_scheduler_type: linear
43
- - num_epochs: 50
44
  - mixed_precision_training: Native AMP
45
 
46
  ### Training results
47
 
48
- | Training Loss | Epoch | Step | Validation Loss |
49
- |:-------------:|:-----:|:----:|:---------------:|
50
- | 5.4149 | 1.0 | 115 | 4.3974 |
51
- | 3.9453 | 2.0 | 230 | 3.6520 |
52
- | 3.7269 | 3.0 | 345 | 3.7602 |
53
- | 3.5898 | 4.0 | 460 | 3.5671 |
54
- | 3.486 | 5.0 | 575 | 3.4912 |
55
- | 3.4073 | 6.0 | 690 | 3.4095 |
56
- | 3.4181 | 7.0 | 805 | 3.3183 |
57
- | 3.3603 | 8.0 | 920 | 3.1111 |
58
- | 3.2777 | 9.0 | 1035 | 3.1992 |
59
- | 3.2851 | 10.0 | 1150 | 3.3997 |
60
- | 3.266 | 11.0 | 1265 | 3.2861 |
61
- | 3.2803 | 12.0 | 1380 | 3.1813 |
62
- | 3.1733 | 13.0 | 1495 | 2.9838 |
63
- | 3.2094 | 14.0 | 1610 | 3.1175 |
64
- | 3.1718 | 15.0 | 1725 | 3.0064 |
65
- | 3.1303 | 16.0 | 1840 | 3.0869 |
66
- | 3.0897 | 17.0 | 1955 | 3.0306 |
67
- | 3.0233 | 18.0 | 2070 | 2.9479 |
68
- | 3.0156 | 19.0 | 2185 | 2.9145 |
69
- | 3.0277 | 20.0 | 2300 | 2.8919 |
70
- | 3.0847 | 21.0 | 2415 | 2.9321 |
71
- | 3.0333 | 22.0 | 2530 | 2.9128 |
72
- | 3.0126 | 23.0 | 2645 | 2.8627 |
73
- | 2.9968 | 24.0 | 2760 | 3.0186 |
74
- | 3.0295 | 25.0 | 2875 | 3.0148 |
75
- | 3.0294 | 26.0 | 2990 | 3.0341 |
76
- | 3.0395 | 27.0 | 3105 | 2.9997 |
77
- | 3.0445 | 28.0 | 3220 | 3.0575 |
78
- | 2.9761 | 29.0 | 3335 | 2.9707 |
79
- | 3.0075 | 30.0 | 3450 | 2.9392 |
80
- | 3.0198 | 31.0 | 3565 | 2.9122 |
81
- | 2.9782 | 32.0 | 3680 | 2.9471 |
82
- | 2.9773 | 33.0 | 3795 | 3.0306 |
83
- | 2.9528 | 34.0 | 3910 | 2.8513 |
84
- | 2.9228 | 35.0 | 4025 | 2.8997 |
85
- | 2.9221 | 36.0 | 4140 | 2.8646 |
86
- | 2.8933 | 37.0 | 4255 | 2.8871 |
87
- | 2.8925 | 38.0 | 4370 | 2.9407 |
88
- | 2.9069 | 39.0 | 4485 | 2.9625 |
89
- | 2.9246 | 40.0 | 4600 | 2.9946 |
90
- | 2.9089 | 41.0 | 4715 | 2.8936 |
91
- | 2.8573 | 42.0 | 4830 | 2.8272 |
92
- | 2.8378 | 43.0 | 4945 | 2.8543 |
93
- | 2.8957 | 44.0 | 5060 | 2.8590 |
94
- | 2.8454 | 45.0 | 5175 | 2.8525 |
95
- | 2.7964 | 46.0 | 5290 | 2.8658 |
96
- | 2.8172 | 47.0 | 5405 | 2.8872 |
97
- | 2.8472 | 48.0 | 5520 | 2.8654 |
98
- | 2.811 | 49.0 | 5635 | 2.8572 |
99
- | 2.801 | 50.0 | 5750 | 2.8303 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
 
102
  ### Framework versions
 
15
 
16
  This model is a fine-tuned version of [microsoft/conditional-detr-resnet-50](https://huggingface.co/microsoft/conditional-detr-resnet-50) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 2.7389
19
 
20
  ## Model description
21
 
 
40
  - seed: 42
41
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
  - lr_scheduler_type: linear
43
+ - num_epochs: 100
44
  - mixed_precision_training: Native AMP
45
 
46
  ### Training results
47
 
48
+ | Training Loss | Epoch | Step | Validation Loss |
49
+ |:-------------:|:-----:|:-----:|:---------------:|
50
+ | 5.4149 | 1.0 | 115 | 4.3974 |
51
+ | 3.9453 | 2.0 | 230 | 3.6520 |
52
+ | 3.7269 | 3.0 | 345 | 3.7602 |
53
+ | 3.5898 | 4.0 | 460 | 3.5671 |
54
+ | 3.486 | 5.0 | 575 | 3.4912 |
55
+ | 3.4073 | 6.0 | 690 | 3.4095 |
56
+ | 3.4181 | 7.0 | 805 | 3.3183 |
57
+ | 3.3603 | 8.0 | 920 | 3.1111 |
58
+ | 3.2777 | 9.0 | 1035 | 3.1992 |
59
+ | 3.2851 | 10.0 | 1150 | 3.3997 |
60
+ | 3.266 | 11.0 | 1265 | 3.2861 |
61
+ | 3.2803 | 12.0 | 1380 | 3.1813 |
62
+ | 3.1733 | 13.0 | 1495 | 2.9838 |
63
+ | 3.2094 | 14.0 | 1610 | 3.1175 |
64
+ | 3.1718 | 15.0 | 1725 | 3.0064 |
65
+ | 3.1303 | 16.0 | 1840 | 3.0869 |
66
+ | 3.0897 | 17.0 | 1955 | 3.0306 |
67
+ | 3.0233 | 18.0 | 2070 | 2.9479 |
68
+ | 3.0156 | 19.0 | 2185 | 2.9145 |
69
+ | 3.0277 | 20.0 | 2300 | 2.8919 |
70
+ | 3.0847 | 21.0 | 2415 | 2.9321 |
71
+ | 3.0333 | 22.0 | 2530 | 2.9128 |
72
+ | 3.0126 | 23.0 | 2645 | 2.8627 |
73
+ | 2.9968 | 24.0 | 2760 | 3.0186 |
74
+ | 3.0295 | 25.0 | 2875 | 3.0148 |
75
+ | 3.0294 | 26.0 | 2990 | 3.0341 |
76
+ | 3.0395 | 27.0 | 3105 | 2.9997 |
77
+ | 3.0445 | 28.0 | 3220 | 3.0575 |
78
+ | 2.9761 | 29.0 | 3335 | 2.9707 |
79
+ | 3.0075 | 30.0 | 3450 | 2.9392 |
80
+ | 3.0198 | 31.0 | 3565 | 2.9122 |
81
+ | 2.9782 | 32.0 | 3680 | 2.9471 |
82
+ | 2.9773 | 33.0 | 3795 | 3.0306 |
83
+ | 2.9528 | 34.0 | 3910 | 2.8513 |
84
+ | 2.9228 | 35.0 | 4025 | 2.8997 |
85
+ | 2.9221 | 36.0 | 4140 | 2.8646 |
86
+ | 2.8933 | 37.0 | 4255 | 2.8871 |
87
+ | 2.8925 | 38.0 | 4370 | 2.9407 |
88
+ | 2.9069 | 39.0 | 4485 | 2.9625 |
89
+ | 2.9246 | 40.0 | 4600 | 2.9946 |
90
+ | 2.9089 | 41.0 | 4715 | 2.8936 |
91
+ | 2.8573 | 42.0 | 4830 | 2.8272 |
92
+ | 2.8768 | 43.0 | 4945 | 2.9868 |
93
+ | 2.9666 | 44.0 | 5060 | 2.9200 |
94
+ | 2.958 | 45.0 | 5175 | 2.8755 |
95
+ | 2.8923 | 46.0 | 5290 | 2.8518 |
96
+ | 2.9204 | 47.0 | 5405 | 2.9000 |
97
+ | 2.9644 | 48.0 | 5520 | 2.8969 |
98
+ | 2.9011 | 49.0 | 5635 | 2.7918 |
99
+ | 2.9329 | 50.0 | 5750 | 2.9139 |
100
+ | 2.9031 | 51.0 | 5865 | 2.7796 |
101
+ | 2.9029 | 52.0 | 5980 | 2.8025 |
102
+ | 2.9555 | 53.0 | 6095 | 2.9121 |
103
+ | 2.9366 | 54.0 | 6210 | 2.9035 |
104
+ | 2.8871 | 55.0 | 6325 | 2.8759 |
105
+ | 2.863 | 56.0 | 6440 | 2.8540 |
106
+ | 2.8897 | 57.0 | 6555 | 2.8401 |
107
+ | 2.828 | 58.0 | 6670 | 2.8590 |
108
+ | 2.8221 | 59.0 | 6785 | 2.9255 |
109
+ | 2.835 | 60.0 | 6900 | 2.9809 |
110
+ | 2.886 | 61.0 | 7015 | 2.9907 |
111
+ | 2.8227 | 62.0 | 7130 | 2.8283 |
112
+ | 2.7864 | 63.0 | 7245 | 2.8258 |
113
+ | 2.8179 | 64.0 | 7360 | 2.9504 |
114
+ | 2.7944 | 65.0 | 7475 | 2.8042 |
115
+ | 2.7986 | 66.0 | 7590 | 2.8307 |
116
+ | 2.7567 | 67.0 | 7705 | 2.8060 |
117
+ | 2.7552 | 68.0 | 7820 | 2.7994 |
118
+ | 2.7933 | 69.0 | 7935 | 2.8493 |
119
+ | 2.7393 | 70.0 | 8050 | 2.8409 |
120
+ | 2.7357 | 71.0 | 8165 | 2.8086 |
121
+ | 2.7264 | 72.0 | 8280 | 2.7773 |
122
+ | 2.7614 | 73.0 | 8395 | 2.8937 |
123
+ | 2.7279 | 74.0 | 8510 | 2.8887 |
124
+ | 2.745 | 75.0 | 8625 | 2.8274 |
125
+ | 2.7225 | 76.0 | 8740 | 2.7971 |
126
+ | 2.7094 | 77.0 | 8855 | 2.8685 |
127
+ | 2.7306 | 78.0 | 8970 | 2.8482 |
128
+ | 2.6844 | 79.0 | 9085 | 2.7372 |
129
+ | 2.6949 | 80.0 | 9200 | 2.8149 |
130
+ | 2.7342 | 81.0 | 9315 | 2.7647 |
131
+ | 2.6813 | 82.0 | 9430 | 2.7666 |
132
+ | 2.7161 | 83.0 | 9545 | 2.8437 |
133
+ | 2.6953 | 84.0 | 9660 | 2.7895 |
134
+ | 2.6714 | 85.0 | 9775 | 2.7683 |
135
+ | 2.6611 | 86.0 | 9890 | 2.7004 |
136
+ | 2.6714 | 87.0 | 10005 | 2.7183 |
137
+ | 2.6655 | 88.0 | 10120 | 2.7043 |
138
+ | 2.6509 | 89.0 | 10235 | 2.7705 |
139
+ | 2.6266 | 90.0 | 10350 | 2.7152 |
140
+ | 2.6677 | 91.0 | 10465 | 2.7295 |
141
+ | 2.6438 | 92.0 | 10580 | 2.7018 |
142
+ | 2.6267 | 93.0 | 10695 | 2.7063 |
143
+ | 2.6286 | 94.0 | 10810 | 2.7798 |
144
+ | 2.6043 | 95.0 | 10925 | 2.7712 |
145
+ | 2.6188 | 96.0 | 11040 | 2.7614 |
146
+ | 2.6028 | 97.0 | 11155 | 2.7405 |
147
+ | 2.621 | 98.0 | 11270 | 2.7415 |
148
+ | 2.61 | 99.0 | 11385 | 2.7415 |
149
+ | 2.6164 | 100.0 | 11500 | 2.7389 |
150
 
151
 
152
  ### Framework versions
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6ca86c0931744ccb8c90e4638a9ca83e238f2ea6102a4e014e7d419ebfe75b8
3
  size 173870884
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c1866f4d599d39ba0fe76d52a1a7dcf979852ecd271dca7cd00adff8579d689
3
  size 173870884
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 2.827199935913086,
3
- "best_model_checkpoint": "cdetr-mist1-brain-gt-tumors-8ah-6l/checkpoint-4830",
4
- "epoch": 50.0,
5
  "eval_steps": 500,
6
- "global_step": 5750,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -598,131 +598,831 @@
598
  },
599
  {
600
  "epoch": 43.0,
601
- "learning_rate": 1.4382608695652176e-06,
602
- "loss": 2.8378,
603
  "step": 4945
604
  },
605
  {
606
  "epoch": 43.0,
607
- "eval_loss": 2.8543009757995605,
608
- "eval_runtime": 5.6643,
609
- "eval_samples_per_second": 7.062,
610
- "eval_steps_per_second": 0.883,
611
  "step": 4945
612
  },
613
  {
614
  "epoch": 44.0,
615
- "learning_rate": 1.2382608695652176e-06,
616
- "loss": 2.8957,
617
  "step": 5060
618
  },
619
  {
620
  "epoch": 44.0,
621
- "eval_loss": 2.859046459197998,
622
- "eval_runtime": 5.687,
623
- "eval_samples_per_second": 7.034,
624
- "eval_steps_per_second": 0.879,
625
  "step": 5060
626
  },
627
  {
628
  "epoch": 45.0,
629
- "learning_rate": 1.0382608695652174e-06,
630
- "loss": 2.8454,
631
  "step": 5175
632
  },
633
  {
634
  "epoch": 45.0,
635
- "eval_loss": 2.8524787425994873,
636
- "eval_runtime": 5.6623,
637
- "eval_samples_per_second": 7.064,
638
  "eval_steps_per_second": 0.883,
639
  "step": 5175
640
  },
641
  {
642
  "epoch": 46.0,
643
- "learning_rate": 8.382608695652175e-07,
644
- "loss": 2.7964,
645
  "step": 5290
646
  },
647
  {
648
  "epoch": 46.0,
649
- "eval_loss": 2.8657748699188232,
650
- "eval_runtime": 5.6625,
651
- "eval_samples_per_second": 7.064,
652
- "eval_steps_per_second": 0.883,
653
  "step": 5290
654
  },
655
  {
656
  "epoch": 47.0,
657
- "learning_rate": 6.382608695652175e-07,
658
- "loss": 2.8172,
659
  "step": 5405
660
  },
661
  {
662
  "epoch": 47.0,
663
- "eval_loss": 2.8872039318084717,
664
- "eval_runtime": 5.6798,
665
- "eval_samples_per_second": 7.043,
666
- "eval_steps_per_second": 0.88,
667
  "step": 5405
668
  },
669
  {
670
  "epoch": 48.0,
671
- "learning_rate": 4.382608695652174e-07,
672
- "loss": 2.8472,
673
  "step": 5520
674
  },
675
  {
676
  "epoch": 48.0,
677
- "eval_loss": 2.8653695583343506,
678
- "eval_runtime": 5.6595,
679
- "eval_samples_per_second": 7.068,
680
- "eval_steps_per_second": 0.883,
681
  "step": 5520
682
  },
683
  {
684
  "epoch": 49.0,
685
- "learning_rate": 2.3826086956521743e-07,
686
- "loss": 2.811,
687
  "step": 5635
688
  },
689
  {
690
  "epoch": 49.0,
691
- "eval_loss": 2.8571643829345703,
692
- "eval_runtime": 5.7272,
693
- "eval_samples_per_second": 6.984,
694
- "eval_steps_per_second": 0.873,
695
  "step": 5635
696
  },
697
  {
698
  "epoch": 50.0,
699
- "learning_rate": 3.82608695652174e-08,
700
- "loss": 2.801,
701
  "step": 5750
702
  },
703
  {
704
  "epoch": 50.0,
705
- "eval_loss": 2.830277919769287,
706
- "eval_runtime": 5.6412,
707
- "eval_samples_per_second": 7.091,
708
- "eval_steps_per_second": 0.886,
709
  "step": 5750
710
  },
711
  {
712
- "epoch": 50.0,
713
- "step": 5750,
714
- "total_flos": 1.147766207616e+19,
715
- "train_loss": 1.5786318518597147,
716
- "train_runtime": 2642.836,
717
- "train_samples_per_second": 8.703,
718
- "train_steps_per_second": 2.176
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
719
  }
720
  ],
721
  "logging_steps": 500,
722
- "max_steps": 5750,
723
- "num_train_epochs": 50,
724
  "save_steps": 500,
725
- "total_flos": 1.147766207616e+19,
726
  "trial_name": null,
727
  "trial_params": null
728
  }
 
1
  {
2
+ "best_metric": 2.700421094894409,
3
+ "best_model_checkpoint": "cdetr-mist1-brain-gt-tumors-8ah-6l/checkpoint-9890",
4
+ "epoch": 100.0,
5
  "eval_steps": 500,
6
+ "global_step": 11500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
598
  },
599
  {
600
  "epoch": 43.0,
601
+ "learning_rate": 5.726956521739131e-06,
602
+ "loss": 2.8768,
603
  "step": 4945
604
  },
605
  {
606
  "epoch": 43.0,
607
+ "eval_loss": 2.9868218898773193,
608
+ "eval_runtime": 5.7349,
609
+ "eval_samples_per_second": 6.975,
610
+ "eval_steps_per_second": 0.872,
611
  "step": 4945
612
  },
613
  {
614
  "epoch": 44.0,
615
+ "learning_rate": 5.627826086956523e-06,
616
+ "loss": 2.9666,
617
  "step": 5060
618
  },
619
  {
620
  "epoch": 44.0,
621
+ "eval_loss": 2.9200377464294434,
622
+ "eval_runtime": 5.6461,
623
+ "eval_samples_per_second": 7.084,
624
+ "eval_steps_per_second": 0.886,
625
  "step": 5060
626
  },
627
  {
628
  "epoch": 45.0,
629
+ "learning_rate": 5.527826086956523e-06,
630
+ "loss": 2.958,
631
  "step": 5175
632
  },
633
  {
634
  "epoch": 45.0,
635
+ "eval_loss": 2.875474214553833,
636
+ "eval_runtime": 5.664,
637
+ "eval_samples_per_second": 7.062,
638
  "eval_steps_per_second": 0.883,
639
  "step": 5175
640
  },
641
  {
642
  "epoch": 46.0,
643
+ "learning_rate": 5.427826086956523e-06,
644
+ "loss": 2.8923,
645
  "step": 5290
646
  },
647
  {
648
  "epoch": 46.0,
649
+ "eval_loss": 2.851766586303711,
650
+ "eval_runtime": 5.6855,
651
+ "eval_samples_per_second": 7.035,
652
+ "eval_steps_per_second": 0.879,
653
  "step": 5290
654
  },
655
  {
656
  "epoch": 47.0,
657
+ "learning_rate": 5.327826086956522e-06,
658
+ "loss": 2.9204,
659
  "step": 5405
660
  },
661
  {
662
  "epoch": 47.0,
663
+ "eval_loss": 2.9000306129455566,
664
+ "eval_runtime": 5.6182,
665
+ "eval_samples_per_second": 7.12,
666
+ "eval_steps_per_second": 0.89,
667
  "step": 5405
668
  },
669
  {
670
  "epoch": 48.0,
671
+ "learning_rate": 5.227826086956522e-06,
672
+ "loss": 2.9644,
673
  "step": 5520
674
  },
675
  {
676
  "epoch": 48.0,
677
+ "eval_loss": 2.8968658447265625,
678
+ "eval_runtime": 5.677,
679
+ "eval_samples_per_second": 7.046,
680
+ "eval_steps_per_second": 0.881,
681
  "step": 5520
682
  },
683
  {
684
  "epoch": 49.0,
685
+ "learning_rate": 5.127826086956522e-06,
686
+ "loss": 2.9011,
687
  "step": 5635
688
  },
689
  {
690
  "epoch": 49.0,
691
+ "eval_loss": 2.7918035984039307,
692
+ "eval_runtime": 5.6945,
693
+ "eval_samples_per_second": 7.024,
694
+ "eval_steps_per_second": 0.878,
695
  "step": 5635
696
  },
697
  {
698
  "epoch": 50.0,
699
+ "learning_rate": 5.028695652173914e-06,
700
+ "loss": 2.9329,
701
  "step": 5750
702
  },
703
  {
704
  "epoch": 50.0,
705
+ "eval_loss": 2.9139397144317627,
706
+ "eval_runtime": 5.6814,
707
+ "eval_samples_per_second": 7.041,
708
+ "eval_steps_per_second": 0.88,
709
  "step": 5750
710
  },
711
  {
712
+ "epoch": 51.0,
713
+ "learning_rate": 4.9286956521739135e-06,
714
+ "loss": 2.9031,
715
+ "step": 5865
716
+ },
717
+ {
718
+ "epoch": 51.0,
719
+ "eval_loss": 2.779574155807495,
720
+ "eval_runtime": 5.6982,
721
+ "eval_samples_per_second": 7.02,
722
+ "eval_steps_per_second": 0.877,
723
+ "step": 5865
724
+ },
725
+ {
726
+ "epoch": 52.0,
727
+ "learning_rate": 4.828695652173914e-06,
728
+ "loss": 2.9029,
729
+ "step": 5980
730
+ },
731
+ {
732
+ "epoch": 52.0,
733
+ "eval_loss": 2.802475690841675,
734
+ "eval_runtime": 5.6076,
735
+ "eval_samples_per_second": 7.133,
736
+ "eval_steps_per_second": 0.892,
737
+ "step": 5980
738
+ },
739
+ {
740
+ "epoch": 53.0,
741
+ "learning_rate": 4.728695652173914e-06,
742
+ "loss": 2.9555,
743
+ "step": 6095
744
+ },
745
+ {
746
+ "epoch": 53.0,
747
+ "eval_loss": 2.9120869636535645,
748
+ "eval_runtime": 5.7682,
749
+ "eval_samples_per_second": 6.935,
750
+ "eval_steps_per_second": 0.867,
751
+ "step": 6095
752
+ },
753
+ {
754
+ "epoch": 54.0,
755
+ "learning_rate": 4.628695652173914e-06,
756
+ "loss": 2.9366,
757
+ "step": 6210
758
+ },
759
+ {
760
+ "epoch": 54.0,
761
+ "eval_loss": 2.9034695625305176,
762
+ "eval_runtime": 5.7147,
763
+ "eval_samples_per_second": 7.0,
764
+ "eval_steps_per_second": 0.875,
765
+ "step": 6210
766
+ },
767
+ {
768
+ "epoch": 55.0,
769
+ "learning_rate": 4.528695652173913e-06,
770
+ "loss": 2.8871,
771
+ "step": 6325
772
+ },
773
+ {
774
+ "epoch": 55.0,
775
+ "eval_loss": 2.87589168548584,
776
+ "eval_runtime": 5.6683,
777
+ "eval_samples_per_second": 7.057,
778
+ "eval_steps_per_second": 0.882,
779
+ "step": 6325
780
+ },
781
+ {
782
+ "epoch": 56.0,
783
+ "learning_rate": 4.428695652173913e-06,
784
+ "loss": 2.863,
785
+ "step": 6440
786
+ },
787
+ {
788
+ "epoch": 56.0,
789
+ "eval_loss": 2.8540170192718506,
790
+ "eval_runtime": 5.7794,
791
+ "eval_samples_per_second": 6.921,
792
+ "eval_steps_per_second": 0.865,
793
+ "step": 6440
794
+ },
795
+ {
796
+ "epoch": 57.0,
797
+ "learning_rate": 4.328695652173913e-06,
798
+ "loss": 2.8897,
799
+ "step": 6555
800
+ },
801
+ {
802
+ "epoch": 57.0,
803
+ "eval_loss": 2.8401310443878174,
804
+ "eval_runtime": 5.6374,
805
+ "eval_samples_per_second": 7.095,
806
+ "eval_steps_per_second": 0.887,
807
+ "step": 6555
808
+ },
809
+ {
810
+ "epoch": 58.0,
811
+ "learning_rate": 4.228695652173913e-06,
812
+ "loss": 2.828,
813
+ "step": 6670
814
+ },
815
+ {
816
+ "epoch": 58.0,
817
+ "eval_loss": 2.8589885234832764,
818
+ "eval_runtime": 5.697,
819
+ "eval_samples_per_second": 7.021,
820
+ "eval_steps_per_second": 0.878,
821
+ "step": 6670
822
+ },
823
+ {
824
+ "epoch": 59.0,
825
+ "learning_rate": 4.1286956521739135e-06,
826
+ "loss": 2.8221,
827
+ "step": 6785
828
+ },
829
+ {
830
+ "epoch": 59.0,
831
+ "eval_loss": 2.92549467086792,
832
+ "eval_runtime": 5.7309,
833
+ "eval_samples_per_second": 6.98,
834
+ "eval_steps_per_second": 0.872,
835
+ "step": 6785
836
+ },
837
+ {
838
+ "epoch": 60.0,
839
+ "learning_rate": 4.028695652173914e-06,
840
+ "loss": 2.835,
841
+ "step": 6900
842
+ },
843
+ {
844
+ "epoch": 60.0,
845
+ "eval_loss": 2.9808831214904785,
846
+ "eval_runtime": 5.6748,
847
+ "eval_samples_per_second": 7.049,
848
+ "eval_steps_per_second": 0.881,
849
+ "step": 6900
850
+ },
851
+ {
852
+ "epoch": 61.0,
853
+ "learning_rate": 3.928695652173914e-06,
854
+ "loss": 2.886,
855
+ "step": 7015
856
+ },
857
+ {
858
+ "epoch": 61.0,
859
+ "eval_loss": 2.9906742572784424,
860
+ "eval_runtime": 5.7118,
861
+ "eval_samples_per_second": 7.003,
862
+ "eval_steps_per_second": 0.875,
863
+ "step": 7015
864
+ },
865
+ {
866
+ "epoch": 62.0,
867
+ "learning_rate": 3.828695652173913e-06,
868
+ "loss": 2.8227,
869
+ "step": 7130
870
+ },
871
+ {
872
+ "epoch": 62.0,
873
+ "eval_loss": 2.8283145427703857,
874
+ "eval_runtime": 5.7014,
875
+ "eval_samples_per_second": 7.016,
876
+ "eval_steps_per_second": 0.877,
877
+ "step": 7130
878
+ },
879
+ {
880
+ "epoch": 63.0,
881
+ "learning_rate": 3.728695652173913e-06,
882
+ "loss": 2.7864,
883
+ "step": 7245
884
+ },
885
+ {
886
+ "epoch": 63.0,
887
+ "eval_loss": 2.8258347511291504,
888
+ "eval_runtime": 5.6903,
889
+ "eval_samples_per_second": 7.029,
890
+ "eval_steps_per_second": 0.879,
891
+ "step": 7245
892
+ },
893
+ {
894
+ "epoch": 64.0,
895
+ "learning_rate": 3.6286956521739132e-06,
896
+ "loss": 2.8179,
897
+ "step": 7360
898
+ },
899
+ {
900
+ "epoch": 64.0,
901
+ "eval_loss": 2.9504449367523193,
902
+ "eval_runtime": 5.7303,
903
+ "eval_samples_per_second": 6.98,
904
+ "eval_steps_per_second": 0.873,
905
+ "step": 7360
906
+ },
907
+ {
908
+ "epoch": 65.0,
909
+ "learning_rate": 3.5286956521739133e-06,
910
+ "loss": 2.7944,
911
+ "step": 7475
912
+ },
913
+ {
914
+ "epoch": 65.0,
915
+ "eval_loss": 2.8042430877685547,
916
+ "eval_runtime": 5.7552,
917
+ "eval_samples_per_second": 6.95,
918
+ "eval_steps_per_second": 0.869,
919
+ "step": 7475
920
+ },
921
+ {
922
+ "epoch": 66.0,
923
+ "learning_rate": 3.4286956521739134e-06,
924
+ "loss": 2.7986,
925
+ "step": 7590
926
+ },
927
+ {
928
+ "epoch": 66.0,
929
+ "eval_loss": 2.8307268619537354,
930
+ "eval_runtime": 5.741,
931
+ "eval_samples_per_second": 6.967,
932
+ "eval_steps_per_second": 0.871,
933
+ "step": 7590
934
+ },
935
+ {
936
+ "epoch": 67.0,
937
+ "learning_rate": 3.328695652173913e-06,
938
+ "loss": 2.7567,
939
+ "step": 7705
940
+ },
941
+ {
942
+ "epoch": 67.0,
943
+ "eval_loss": 2.805975914001465,
944
+ "eval_runtime": 5.7979,
945
+ "eval_samples_per_second": 6.899,
946
+ "eval_steps_per_second": 0.862,
947
+ "step": 7705
948
+ },
949
+ {
950
+ "epoch": 68.0,
951
+ "learning_rate": 3.2286956521739132e-06,
952
+ "loss": 2.7552,
953
+ "step": 7820
954
+ },
955
+ {
956
+ "epoch": 68.0,
957
+ "eval_loss": 2.7994372844696045,
958
+ "eval_runtime": 5.7117,
959
+ "eval_samples_per_second": 7.003,
960
+ "eval_steps_per_second": 0.875,
961
+ "step": 7820
962
+ },
963
+ {
964
+ "epoch": 69.0,
965
+ "learning_rate": 3.1286956521739133e-06,
966
+ "loss": 2.7933,
967
+ "step": 7935
968
+ },
969
+ {
970
+ "epoch": 69.0,
971
+ "eval_loss": 2.849256992340088,
972
+ "eval_runtime": 5.7067,
973
+ "eval_samples_per_second": 7.009,
974
+ "eval_steps_per_second": 0.876,
975
+ "step": 7935
976
+ },
977
+ {
978
+ "epoch": 70.0,
979
+ "learning_rate": 3.028695652173913e-06,
980
+ "loss": 2.7393,
981
+ "step": 8050
982
+ },
983
+ {
984
+ "epoch": 70.0,
985
+ "eval_loss": 2.8409152030944824,
986
+ "eval_runtime": 5.6797,
987
+ "eval_samples_per_second": 7.043,
988
+ "eval_steps_per_second": 0.88,
989
+ "step": 8050
990
+ },
991
+ {
992
+ "epoch": 71.0,
993
+ "learning_rate": 2.9286956521739136e-06,
994
+ "loss": 2.7357,
995
+ "step": 8165
996
+ },
997
+ {
998
+ "epoch": 71.0,
999
+ "eval_loss": 2.8086206912994385,
1000
+ "eval_runtime": 5.6798,
1001
+ "eval_samples_per_second": 7.043,
1002
+ "eval_steps_per_second": 0.88,
1003
+ "step": 8165
1004
+ },
1005
+ {
1006
+ "epoch": 72.0,
1007
+ "learning_rate": 2.8286956521739132e-06,
1008
+ "loss": 2.7264,
1009
+ "step": 8280
1010
+ },
1011
+ {
1012
+ "epoch": 72.0,
1013
+ "eval_loss": 2.7772560119628906,
1014
+ "eval_runtime": 5.7909,
1015
+ "eval_samples_per_second": 6.907,
1016
+ "eval_steps_per_second": 0.863,
1017
+ "step": 8280
1018
+ },
1019
+ {
1020
+ "epoch": 73.0,
1021
+ "learning_rate": 2.7286956521739134e-06,
1022
+ "loss": 2.7614,
1023
+ "step": 8395
1024
+ },
1025
+ {
1026
+ "epoch": 73.0,
1027
+ "eval_loss": 2.8937366008758545,
1028
+ "eval_runtime": 5.7573,
1029
+ "eval_samples_per_second": 6.948,
1030
+ "eval_steps_per_second": 0.868,
1031
+ "step": 8395
1032
+ },
1033
+ {
1034
+ "epoch": 74.0,
1035
+ "learning_rate": 2.6295652173913044e-06,
1036
+ "loss": 2.7279,
1037
+ "step": 8510
1038
+ },
1039
+ {
1040
+ "epoch": 74.0,
1041
+ "eval_loss": 2.8887228965759277,
1042
+ "eval_runtime": 5.7093,
1043
+ "eval_samples_per_second": 7.006,
1044
+ "eval_steps_per_second": 0.876,
1045
+ "step": 8510
1046
+ },
1047
+ {
1048
+ "epoch": 75.0,
1049
+ "learning_rate": 2.5295652173913045e-06,
1050
+ "loss": 2.745,
1051
+ "step": 8625
1052
+ },
1053
+ {
1054
+ "epoch": 75.0,
1055
+ "eval_loss": 2.827376127243042,
1056
+ "eval_runtime": 5.6568,
1057
+ "eval_samples_per_second": 7.071,
1058
+ "eval_steps_per_second": 0.884,
1059
+ "step": 8625
1060
+ },
1061
+ {
1062
+ "epoch": 76.0,
1063
+ "learning_rate": 2.4295652173913046e-06,
1064
+ "loss": 2.7225,
1065
+ "step": 8740
1066
+ },
1067
+ {
1068
+ "epoch": 76.0,
1069
+ "eval_loss": 2.7970547676086426,
1070
+ "eval_runtime": 5.7165,
1071
+ "eval_samples_per_second": 6.997,
1072
+ "eval_steps_per_second": 0.875,
1073
+ "step": 8740
1074
+ },
1075
+ {
1076
+ "epoch": 77.0,
1077
+ "learning_rate": 2.3295652173913043e-06,
1078
+ "loss": 2.7094,
1079
+ "step": 8855
1080
+ },
1081
+ {
1082
+ "epoch": 77.0,
1083
+ "eval_loss": 2.868479013442993,
1084
+ "eval_runtime": 5.6813,
1085
+ "eval_samples_per_second": 7.041,
1086
+ "eval_steps_per_second": 0.88,
1087
+ "step": 8855
1088
+ },
1089
+ {
1090
+ "epoch": 78.0,
1091
+ "learning_rate": 2.229565217391305e-06,
1092
+ "loss": 2.7306,
1093
+ "step": 8970
1094
+ },
1095
+ {
1096
+ "epoch": 78.0,
1097
+ "eval_loss": 2.8482155799865723,
1098
+ "eval_runtime": 5.7403,
1099
+ "eval_samples_per_second": 6.968,
1100
+ "eval_steps_per_second": 0.871,
1101
+ "step": 8970
1102
+ },
1103
+ {
1104
+ "epoch": 79.0,
1105
+ "learning_rate": 2.1295652173913045e-06,
1106
+ "loss": 2.6844,
1107
+ "step": 9085
1108
+ },
1109
+ {
1110
+ "epoch": 79.0,
1111
+ "eval_loss": 2.7371761798858643,
1112
+ "eval_runtime": 5.7039,
1113
+ "eval_samples_per_second": 7.013,
1114
+ "eval_steps_per_second": 0.877,
1115
+ "step": 9085
1116
+ },
1117
+ {
1118
+ "epoch": 80.0,
1119
+ "learning_rate": 2.0295652173913046e-06,
1120
+ "loss": 2.6949,
1121
+ "step": 9200
1122
+ },
1123
+ {
1124
+ "epoch": 80.0,
1125
+ "eval_loss": 2.81486177444458,
1126
+ "eval_runtime": 5.7433,
1127
+ "eval_samples_per_second": 6.965,
1128
+ "eval_steps_per_second": 0.871,
1129
+ "step": 9200
1130
+ },
1131
+ {
1132
+ "epoch": 81.0,
1133
+ "learning_rate": 1.9295652173913047e-06,
1134
+ "loss": 2.7342,
1135
+ "step": 9315
1136
+ },
1137
+ {
1138
+ "epoch": 81.0,
1139
+ "eval_loss": 2.76469087600708,
1140
+ "eval_runtime": 5.6684,
1141
+ "eval_samples_per_second": 7.057,
1142
+ "eval_steps_per_second": 0.882,
1143
+ "step": 9315
1144
+ },
1145
+ {
1146
+ "epoch": 82.0,
1147
+ "learning_rate": 1.8295652173913044e-06,
1148
+ "loss": 2.6813,
1149
+ "step": 9430
1150
+ },
1151
+ {
1152
+ "epoch": 82.0,
1153
+ "eval_loss": 2.7665936946868896,
1154
+ "eval_runtime": 5.6874,
1155
+ "eval_samples_per_second": 7.033,
1156
+ "eval_steps_per_second": 0.879,
1157
+ "step": 9430
1158
+ },
1159
+ {
1160
+ "epoch": 83.0,
1161
+ "learning_rate": 1.7295652173913043e-06,
1162
+ "loss": 2.7161,
1163
+ "step": 9545
1164
+ },
1165
+ {
1166
+ "epoch": 83.0,
1167
+ "eval_loss": 2.843693971633911,
1168
+ "eval_runtime": 5.7308,
1169
+ "eval_samples_per_second": 6.98,
1170
+ "eval_steps_per_second": 0.872,
1171
+ "step": 9545
1172
+ },
1173
+ {
1174
+ "epoch": 84.0,
1175
+ "learning_rate": 1.6295652173913046e-06,
1176
+ "loss": 2.6953,
1177
+ "step": 9660
1178
+ },
1179
+ {
1180
+ "epoch": 84.0,
1181
+ "eval_loss": 2.7895007133483887,
1182
+ "eval_runtime": 5.5926,
1183
+ "eval_samples_per_second": 7.152,
1184
+ "eval_steps_per_second": 0.894,
1185
+ "step": 9660
1186
+ },
1187
+ {
1188
+ "epoch": 85.0,
1189
+ "learning_rate": 1.5295652173913045e-06,
1190
+ "loss": 2.6714,
1191
+ "step": 9775
1192
+ },
1193
+ {
1194
+ "epoch": 85.0,
1195
+ "eval_loss": 2.768319606781006,
1196
+ "eval_runtime": 5.5781,
1197
+ "eval_samples_per_second": 7.171,
1198
+ "eval_steps_per_second": 0.896,
1199
+ "step": 9775
1200
+ },
1201
+ {
1202
+ "epoch": 86.0,
1203
+ "learning_rate": 1.4295652173913044e-06,
1204
+ "loss": 2.6611,
1205
+ "step": 9890
1206
+ },
1207
+ {
1208
+ "epoch": 86.0,
1209
+ "eval_loss": 2.700421094894409,
1210
+ "eval_runtime": 5.7156,
1211
+ "eval_samples_per_second": 6.998,
1212
+ "eval_steps_per_second": 0.875,
1213
+ "step": 9890
1214
+ },
1215
+ {
1216
+ "epoch": 87.0,
1217
+ "learning_rate": 1.3295652173913045e-06,
1218
+ "loss": 2.6714,
1219
+ "step": 10005
1220
+ },
1221
+ {
1222
+ "epoch": 87.0,
1223
+ "eval_loss": 2.7182838916778564,
1224
+ "eval_runtime": 5.6318,
1225
+ "eval_samples_per_second": 7.103,
1226
+ "eval_steps_per_second": 0.888,
1227
+ "step": 10005
1228
+ },
1229
+ {
1230
+ "epoch": 88.0,
1231
+ "learning_rate": 1.2295652173913044e-06,
1232
+ "loss": 2.6655,
1233
+ "step": 10120
1234
+ },
1235
+ {
1236
+ "epoch": 88.0,
1237
+ "eval_loss": 2.7042617797851562,
1238
+ "eval_runtime": 5.8135,
1239
+ "eval_samples_per_second": 6.881,
1240
+ "eval_steps_per_second": 0.86,
1241
+ "step": 10120
1242
+ },
1243
+ {
1244
+ "epoch": 89.0,
1245
+ "learning_rate": 1.1295652173913045e-06,
1246
+ "loss": 2.6509,
1247
+ "step": 10235
1248
+ },
1249
+ {
1250
+ "epoch": 89.0,
1251
+ "eval_loss": 2.770498514175415,
1252
+ "eval_runtime": 5.7555,
1253
+ "eval_samples_per_second": 6.95,
1254
+ "eval_steps_per_second": 0.869,
1255
+ "step": 10235
1256
+ },
1257
+ {
1258
+ "epoch": 90.0,
1259
+ "learning_rate": 1.0295652173913044e-06,
1260
+ "loss": 2.6266,
1261
+ "step": 10350
1262
+ },
1263
+ {
1264
+ "epoch": 90.0,
1265
+ "eval_loss": 2.7151668071746826,
1266
+ "eval_runtime": 5.6476,
1267
+ "eval_samples_per_second": 7.083,
1268
+ "eval_steps_per_second": 0.885,
1269
+ "step": 10350
1270
+ },
1271
+ {
1272
+ "epoch": 91.0,
1273
+ "learning_rate": 9.295652173913044e-07,
1274
+ "loss": 2.6677,
1275
+ "step": 10465
1276
+ },
1277
+ {
1278
+ "epoch": 91.0,
1279
+ "eval_loss": 2.729531764984131,
1280
+ "eval_runtime": 5.6362,
1281
+ "eval_samples_per_second": 7.097,
1282
+ "eval_steps_per_second": 0.887,
1283
+ "step": 10465
1284
+ },
1285
+ {
1286
+ "epoch": 92.0,
1287
+ "learning_rate": 8.295652173913043e-07,
1288
+ "loss": 2.6438,
1289
+ "step": 10580
1290
+ },
1291
+ {
1292
+ "epoch": 92.0,
1293
+ "eval_loss": 2.70180344581604,
1294
+ "eval_runtime": 5.6488,
1295
+ "eval_samples_per_second": 7.081,
1296
+ "eval_steps_per_second": 0.885,
1297
+ "step": 10580
1298
+ },
1299
+ {
1300
+ "epoch": 93.0,
1301
+ "learning_rate": 7.295652173913044e-07,
1302
+ "loss": 2.6267,
1303
+ "step": 10695
1304
+ },
1305
+ {
1306
+ "epoch": 93.0,
1307
+ "eval_loss": 2.7062716484069824,
1308
+ "eval_runtime": 5.5879,
1309
+ "eval_samples_per_second": 7.158,
1310
+ "eval_steps_per_second": 0.895,
1311
+ "step": 10695
1312
+ },
1313
+ {
1314
+ "epoch": 94.0,
1315
+ "learning_rate": 6.295652173913045e-07,
1316
+ "loss": 2.6286,
1317
+ "step": 10810
1318
+ },
1319
+ {
1320
+ "epoch": 94.0,
1321
+ "eval_loss": 2.779799222946167,
1322
+ "eval_runtime": 5.6552,
1323
+ "eval_samples_per_second": 7.073,
1324
+ "eval_steps_per_second": 0.884,
1325
+ "step": 10810
1326
+ },
1327
+ {
1328
+ "epoch": 95.0,
1329
+ "learning_rate": 5.295652173913045e-07,
1330
+ "loss": 2.6043,
1331
+ "step": 10925
1332
+ },
1333
+ {
1334
+ "epoch": 95.0,
1335
+ "eval_loss": 2.7712299823760986,
1336
+ "eval_runtime": 5.6131,
1337
+ "eval_samples_per_second": 7.126,
1338
+ "eval_steps_per_second": 0.891,
1339
+ "step": 10925
1340
+ },
1341
+ {
1342
+ "epoch": 96.0,
1343
+ "learning_rate": 4.295652173913044e-07,
1344
+ "loss": 2.6188,
1345
+ "step": 11040
1346
+ },
1347
+ {
1348
+ "epoch": 96.0,
1349
+ "eval_loss": 2.7614357471466064,
1350
+ "eval_runtime": 5.567,
1351
+ "eval_samples_per_second": 7.185,
1352
+ "eval_steps_per_second": 0.898,
1353
+ "step": 11040
1354
+ },
1355
+ {
1356
+ "epoch": 97.0,
1357
+ "learning_rate": 3.2956521739130436e-07,
1358
+ "loss": 2.6028,
1359
+ "step": 11155
1360
+ },
1361
+ {
1362
+ "epoch": 97.0,
1363
+ "eval_loss": 2.740495204925537,
1364
+ "eval_runtime": 5.621,
1365
+ "eval_samples_per_second": 7.116,
1366
+ "eval_steps_per_second": 0.89,
1367
+ "step": 11155
1368
+ },
1369
+ {
1370
+ "epoch": 98.0,
1371
+ "learning_rate": 2.2956521739130436e-07,
1372
+ "loss": 2.621,
1373
+ "step": 11270
1374
+ },
1375
+ {
1376
+ "epoch": 98.0,
1377
+ "eval_loss": 2.741487503051758,
1378
+ "eval_runtime": 5.6116,
1379
+ "eval_samples_per_second": 7.128,
1380
+ "eval_steps_per_second": 0.891,
1381
+ "step": 11270
1382
+ },
1383
+ {
1384
+ "epoch": 99.0,
1385
+ "learning_rate": 1.2956521739130434e-07,
1386
+ "loss": 2.61,
1387
+ "step": 11385
1388
+ },
1389
+ {
1390
+ "epoch": 99.0,
1391
+ "eval_loss": 2.741542100906372,
1392
+ "eval_runtime": 5.7228,
1393
+ "eval_samples_per_second": 6.99,
1394
+ "eval_steps_per_second": 0.874,
1395
+ "step": 11385
1396
+ },
1397
+ {
1398
+ "epoch": 100.0,
1399
+ "learning_rate": 2.9565217391304353e-08,
1400
+ "loss": 2.6164,
1401
+ "step": 11500
1402
+ },
1403
+ {
1404
+ "epoch": 100.0,
1405
+ "eval_loss": 2.7389094829559326,
1406
+ "eval_runtime": 5.5344,
1407
+ "eval_samples_per_second": 7.228,
1408
+ "eval_steps_per_second": 0.903,
1409
+ "step": 11500
1410
+ },
1411
+ {
1412
+ "epoch": 100.0,
1413
+ "step": 11500,
1414
+ "total_flos": 2.295532415232e+19,
1415
+ "train_loss": 1.6043826771611753,
1416
+ "train_runtime": 5669.8878,
1417
+ "train_samples_per_second": 8.113,
1418
+ "train_steps_per_second": 2.028
1419
  }
1420
  ],
1421
  "logging_steps": 500,
1422
+ "max_steps": 11500,
1423
+ "num_train_epochs": 100,
1424
  "save_steps": 500,
1425
+ "total_flos": 2.295532415232e+19,
1426
  "trial_name": null,
1427
  "trial_params": null
1428
  }