dq158 commited on
Commit
4463c49
1 Parent(s): 4f4143b

Training in progress, epoch 0, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbb86fa29711cf73451d69e586219dc470cb08aab773a082701f4b0fcbdaf4c7
3
  size 1980860410
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de9f7349e10ab07fbbab35c81b7c16c073894cf48a4823d5a647d9beb10ab1d9
3
  size 1980860410
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aac2642f5ba22de9083d70a10604febed07cca571c19c26458004ebbdac32966
3
  size 990409330
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee592dbae667820974bbd15f0c1b0ae8e0fefa72082fd091f319110fafb11fea
3
  size 990409330
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0918241e306ad03e3674267c638e43ba42adbfe63c16a1af28aeac15987292e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cea44ad93fec8c4df820561aaed097d102f17e9be600b9a116f68fe9875b8e4
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09b06431adab47657a4d073f438d7ff4af6825fbc0fd1c3f5af310d4b27c3079
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3748f341e085f7325569224502631ef6799be280580e4e62c27664eaf0caf83b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,232 +1,44 @@
1
  {
2
- "best_metric": 1.9787917137145996,
3
  "best_model_checkpoint": "dq158/morbius/checkpoint-790",
4
- "epoch": 8.999051533354411,
5
  "eval_steps": 500,
6
- "global_step": 3558,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 1.0,
13
- "eval_bleu": 1.0,
14
- "eval_brevity_penalty": 1.0,
15
- "eval_length_ratio": 1.0,
16
- "eval_loss": 1.9788066148757935,
17
- "eval_precisions": [
18
- 1.0,
19
- 1.0,
20
- 1.0,
21
- 1.0
22
- ],
23
- "eval_reference_length": 52082,
24
- "eval_runtime": 562.3039,
25
- "eval_samples_per_second": 5.001,
26
- "eval_steps_per_second": 0.626,
27
- "eval_translation_length": 52082,
28
- "step": 395
29
- },
30
- {
31
- "epoch": 1.26,
32
  "learning_rate": 7e-06,
33
- "loss": 2.1674,
34
  "step": 500
35
  },
36
  {
37
- "epoch": 2.0,
38
  "eval_bleu": 1.0,
39
  "eval_brevity_penalty": 1.0,
40
  "eval_length_ratio": 1.0,
41
- "eval_loss": 1.9787917137145996,
42
  "eval_precisions": [
43
  1.0,
44
  1.0,
45
  1.0,
46
  1.0
47
  ],
48
- "eval_reference_length": 52066,
49
- "eval_runtime": 558.9789,
50
- "eval_samples_per_second": 5.031,
51
- "eval_steps_per_second": 0.63,
52
- "eval_translation_length": 52066,
53
  "step": 790
54
- },
55
- {
56
- "epoch": 2.53,
57
- "learning_rate": 6.921443336316902e-06,
58
- "loss": 2.1635,
59
- "step": 1000
60
- },
61
- {
62
- "epoch": 3.0,
63
- "eval_bleu": 1.0,
64
- "eval_brevity_penalty": 1.0,
65
- "eval_length_ratio": 1.0,
66
- "eval_loss": 1.9789754152297974,
67
- "eval_precisions": [
68
- 1.0,
69
- 1.0,
70
- 1.0,
71
- 1.0
72
- ],
73
- "eval_reference_length": 51990,
74
- "eval_runtime": 559.2395,
75
- "eval_samples_per_second": 5.028,
76
- "eval_steps_per_second": 0.629,
77
- "eval_translation_length": 51990,
78
- "step": 1186
79
- },
80
- {
81
- "epoch": 3.79,
82
- "learning_rate": 6.689299716358475e-06,
83
- "loss": 2.1668,
84
- "step": 1500
85
- },
86
- {
87
- "epoch": 4.0,
88
- "eval_bleu": 1.0,
89
- "eval_brevity_penalty": 1.0,
90
- "eval_length_ratio": 1.0,
91
- "eval_loss": 1.9790551662445068,
92
- "eval_precisions": [
93
- 1.0,
94
- 1.0,
95
- 1.0,
96
- 1.0
97
- ],
98
- "eval_reference_length": 52127,
99
- "eval_runtime": 559.2844,
100
- "eval_samples_per_second": 5.028,
101
- "eval_steps_per_second": 0.629,
102
- "eval_translation_length": 52127,
103
- "step": 1581
104
- },
105
- {
106
- "epoch": 5.0,
107
- "eval_bleu": 1.0,
108
- "eval_brevity_penalty": 1.0,
109
- "eval_length_ratio": 1.0,
110
- "eval_loss": 1.9790822267532349,
111
- "eval_precisions": [
112
- 1.0,
113
- 1.0,
114
- 1.0,
115
- 1.0
116
- ],
117
- "eval_reference_length": 52050,
118
- "eval_runtime": 559.5182,
119
- "eval_samples_per_second": 5.026,
120
- "eval_steps_per_second": 0.629,
121
- "eval_translation_length": 52050,
122
- "step": 1976
123
- },
124
- {
125
- "epoch": 5.06,
126
- "learning_rate": 6.313989956284292e-06,
127
- "loss": 2.1554,
128
- "step": 2000
129
- },
130
- {
131
- "epoch": 6.0,
132
- "eval_bleu": 1.0,
133
- "eval_brevity_penalty": 1.0,
134
- "eval_length_ratio": 1.0,
135
- "eval_loss": 1.9794400930404663,
136
- "eval_precisions": [
137
- 1.0,
138
- 1.0,
139
- 1.0,
140
- 1.0
141
- ],
142
- "eval_reference_length": 51951,
143
- "eval_runtime": 559.0901,
144
- "eval_samples_per_second": 5.03,
145
- "eval_steps_per_second": 0.63,
146
- "eval_translation_length": 51951,
147
- "step": 2372
148
- },
149
- {
150
- "epoch": 6.32,
151
- "learning_rate": 5.812361531865285e-06,
152
- "loss": 2.1567,
153
- "step": 2500
154
- },
155
- {
156
- "epoch": 7.0,
157
- "eval_bleu": 1.0,
158
- "eval_brevity_penalty": 1.0,
159
- "eval_length_ratio": 1.0,
160
- "eval_loss": 1.9794704914093018,
161
- "eval_precisions": [
162
- 1.0,
163
- 1.0,
164
- 1.0,
165
- 1.0
166
- ],
167
- "eval_reference_length": 51995,
168
- "eval_runtime": 560.6932,
169
- "eval_samples_per_second": 5.015,
170
- "eval_steps_per_second": 0.628,
171
- "eval_translation_length": 51995,
172
- "step": 2767
173
- },
174
- {
175
- "epoch": 7.59,
176
- "learning_rate": 5.2069323033477215e-06,
177
- "loss": 2.1498,
178
- "step": 3000
179
- },
180
- {
181
- "epoch": 8.0,
182
- "eval_bleu": 1.0,
183
- "eval_brevity_penalty": 1.0,
184
- "eval_length_ratio": 1.0,
185
- "eval_loss": 1.9796102046966553,
186
- "eval_precisions": [
187
- 1.0,
188
- 1.0,
189
- 1.0,
190
- 1.0
191
- ],
192
- "eval_reference_length": 51999,
193
- "eval_runtime": 559.6356,
194
- "eval_samples_per_second": 5.025,
195
- "eval_steps_per_second": 0.629,
196
- "eval_translation_length": 51999,
197
- "step": 3163
198
- },
199
- {
200
- "epoch": 8.85,
201
- "learning_rate": 4.524879699467926e-06,
202
- "loss": 2.156,
203
- "step": 3500
204
- },
205
- {
206
- "epoch": 9.0,
207
- "eval_bleu": 1.0,
208
- "eval_brevity_penalty": 1.0,
209
- "eval_length_ratio": 1.0,
210
- "eval_loss": 1.979423999786377,
211
- "eval_precisions": [
212
- 1.0,
213
- 1.0,
214
- 1.0,
215
- 1.0
216
- ],
217
- "eval_reference_length": 52017,
218
- "eval_runtime": 559.6401,
219
- "eval_samples_per_second": 5.025,
220
- "eval_steps_per_second": 0.629,
221
- "eval_translation_length": 52017,
222
- "step": 3558
223
  }
224
  ],
225
  "logging_steps": 500,
226
- "max_steps": 7900,
227
  "num_train_epochs": 20,
228
  "save_steps": 500,
229
- "total_flos": 1.559377376009257e+17,
230
  "trial_name": null,
231
  "trial_params": null
232
  }
 
1
  {
2
+ "best_metric": 2.020920515060425,
3
  "best_model_checkpoint": "dq158/morbius/checkpoint-790",
4
+ "epoch": 0.9993674889310563,
5
  "eval_steps": 500,
6
+ "global_step": 790,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.63,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  "learning_rate": 7e-06,
14
+ "loss": 2.15,
15
  "step": 500
16
  },
17
  {
18
+ "epoch": 1.0,
19
  "eval_bleu": 1.0,
20
  "eval_brevity_penalty": 1.0,
21
  "eval_length_ratio": 1.0,
22
+ "eval_loss": 2.020920515060425,
23
  "eval_precisions": [
24
  1.0,
25
  1.0,
26
  1.0,
27
  1.0
28
  ],
29
+ "eval_reference_length": 51925,
30
+ "eval_runtime": 586.8865,
31
+ "eval_samples_per_second": 4.79,
32
+ "eval_steps_per_second": 0.6,
33
+ "eval_translation_length": 51925,
34
  "step": 790
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  }
36
  ],
37
  "logging_steps": 500,
38
+ "max_steps": 15800,
39
  "num_train_epochs": 20,
40
  "save_steps": 500,
41
+ "total_flos": 1.7318198200762368e+16,
42
  "trial_name": null,
43
  "trial_params": null
44
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c408916aa246237e209139265c17d14ec7bf9a464d1448431deb99f86d00dad9
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7af9a47e418d2a9c2fe4c88063e63e0b297c00b773de28984542d2af72871b77
3
  size 4664