duyvu8373 commited on
Commit
8ad1e95
1 Parent(s): 9bf5cf7

Upload 12 files

Browse files
Files changed (6) hide show
  1. model.safetensors +1 -1
  2. optimizer.pt +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +279 -51
  6. training_args.bin +1 -1
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b45c7b495dc66a53e343991c9516e0d110bb2c061da4cec98096722e0f80f440
3
  size 903834408
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e660274f329f10f14793a1b4455c64873e0b7ef644fa06173d9444de2d943e17
3
  size 903834408
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5861ce6f7a1779157da89556e25eacdf319ceee413c09009fd5a928f39524cc5
3
  size 1807824186
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:142d87c4909ea8962442c297d0977d8f5e5199d02f401ee1bfa959f7df68df99
3
  size 1807824186
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16b713008e4eea7860540db04296a11a09d44804872b072ac510cef9f0391ff3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e49afdb19bcdeee33ec1305e5c298cf6932adcae5742879c34495ed0577c979e
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d61a714e2d67c35f405d9bbd18b18087fe1e54b1e2e331d14c8409a81e182f79
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:835079408709a06b26ee2442588615220d6aabc85594e0cb0cae602a8ab2db5c
3
  size 1064
trainer_state.json CHANGED
@@ -1,81 +1,309 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.4945054945054945,
5
  "eval_steps": 500,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_bleu": 52.2825,
14
- "eval_gen_len": 17.3764,
15
- "eval_loss": 0.2236185073852539,
16
- "eval_meteor": 0.7138,
17
- "eval_runtime": 20.1266,
18
- "eval_samples_per_second": 26.135,
19
- "eval_steps_per_second": 0.845,
20
- "step": 91
21
  },
22
  {
23
  "epoch": 2.0,
24
- "eval_bleu": 58.1966,
25
- "eval_gen_len": 17.3175,
26
- "eval_loss": 0.1642024964094162,
27
- "eval_meteor": 0.7742,
28
- "eval_runtime": 15.2591,
29
- "eval_samples_per_second": 34.471,
30
- "eval_steps_per_second": 1.114,
31
- "step": 182
32
  },
33
  {
34
  "epoch": 3.0,
35
- "eval_bleu": 62.8589,
36
- "eval_gen_len": 17.6293,
37
- "eval_loss": 0.15181176364421844,
38
- "eval_meteor": 0.7831,
39
- "eval_runtime": 15.3539,
40
- "eval_samples_per_second": 34.258,
41
- "eval_steps_per_second": 1.107,
42
- "step": 273
 
 
 
 
 
 
43
  },
44
  {
45
  "epoch": 4.0,
46
- "eval_bleu": 64.9987,
47
- "eval_gen_len": 17.5798,
48
- "eval_loss": 0.14291420578956604,
49
- "eval_meteor": 0.8085,
50
- "eval_runtime": 15.3569,
51
- "eval_samples_per_second": 34.252,
52
- "eval_steps_per_second": 1.107,
53
- "step": 364
54
  },
55
  {
56
  "epoch": 5.0,
57
- "eval_bleu": 65.7474,
58
- "eval_gen_len": 17.5171,
59
- "eval_loss": 0.13588279485702515,
60
- "eval_meteor": 0.821,
61
- "eval_runtime": 15.5893,
62
- "eval_samples_per_second": 33.741,
63
- "eval_steps_per_second": 1.09,
64
- "step": 455
65
- },
66
- {
67
- "epoch": 5.49,
68
- "learning_rate": 9.010989010989011e-06,
69
- "loss": 0.2938,
70
- "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  }
72
  ],
73
  "logging_steps": 500,
74
- "max_steps": 910,
75
  "num_input_tokens_seen": 0,
76
- "num_train_epochs": 10,
77
  "save_steps": 500,
78
- "total_flos": 1951729069178880.0,
79
  "train_batch_size": 32,
80
  "trial_name": null,
81
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 23.25581395348837,
5
  "eval_steps": 500,
6
+ "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_bleu": 59.1033,
14
+ "eval_gen_len": 17.5513,
15
+ "eval_loss": 0.1559952348470688,
16
+ "eval_meteor": 0.7539,
17
+ "eval_runtime": 19.7643,
18
+ "eval_samples_per_second": 26.614,
19
+ "eval_steps_per_second": 0.86,
20
+ "step": 129
21
  },
22
  {
23
  "epoch": 2.0,
24
+ "eval_bleu": 65.6424,
25
+ "eval_gen_len": 17.6027,
26
+ "eval_loss": 0.0991397500038147,
27
+ "eval_meteor": 0.8044,
28
+ "eval_runtime": 14.846,
29
+ "eval_samples_per_second": 35.43,
30
+ "eval_steps_per_second": 1.145,
31
+ "step": 258
32
  },
33
  {
34
  "epoch": 3.0,
35
+ "eval_bleu": 70.6577,
36
+ "eval_gen_len": 17.5779,
37
+ "eval_loss": 0.06296151131391525,
38
+ "eval_meteor": 0.8488,
39
+ "eval_runtime": 14.7963,
40
+ "eval_samples_per_second": 35.549,
41
+ "eval_steps_per_second": 1.149,
42
+ "step": 387
43
+ },
44
+ {
45
+ "epoch": 3.88,
46
+ "learning_rate": 1.689922480620155e-05,
47
+ "loss": 0.3038,
48
+ "step": 500
49
  },
50
  {
51
  "epoch": 4.0,
52
+ "eval_bleu": 71.6744,
53
+ "eval_gen_len": 17.5989,
54
+ "eval_loss": 0.04667546600103378,
55
+ "eval_meteor": 0.8522,
56
+ "eval_runtime": 14.7696,
57
+ "eval_samples_per_second": 35.614,
58
+ "eval_steps_per_second": 1.151,
59
+ "step": 516
60
  },
61
  {
62
  "epoch": 5.0,
63
+ "eval_bleu": 72.4991,
64
+ "eval_gen_len": 17.6749,
65
+ "eval_loss": 0.038296110928058624,
66
+ "eval_meteor": 0.8509,
67
+ "eval_runtime": 14.8269,
68
+ "eval_samples_per_second": 35.476,
69
+ "eval_steps_per_second": 1.147,
70
+ "step": 645
71
+ },
72
+ {
73
+ "epoch": 6.0,
74
+ "eval_bleu": 72.5858,
75
+ "eval_gen_len": 17.6464,
76
+ "eval_loss": 0.03319519758224487,
77
+ "eval_meteor": 0.8548,
78
+ "eval_runtime": 14.6593,
79
+ "eval_samples_per_second": 35.882,
80
+ "eval_steps_per_second": 1.16,
81
+ "step": 774
82
+ },
83
+ {
84
+ "epoch": 7.0,
85
+ "eval_bleu": 74.3526,
86
+ "eval_gen_len": 17.6217,
87
+ "eval_loss": 0.023467697203159332,
88
+ "eval_meteor": 0.8734,
89
+ "eval_runtime": 14.705,
90
+ "eval_samples_per_second": 35.77,
91
+ "eval_steps_per_second": 1.156,
92
+ "step": 903
93
+ },
94
+ {
95
+ "epoch": 7.75,
96
+ "learning_rate": 1.3798449612403102e-05,
97
+ "loss": 0.0643,
98
+ "step": 1000
99
+ },
100
+ {
101
+ "epoch": 8.0,
102
+ "eval_bleu": 74.9962,
103
+ "eval_gen_len": 17.6141,
104
+ "eval_loss": 0.01849055290222168,
105
+ "eval_meteor": 0.8793,
106
+ "eval_runtime": 15.0305,
107
+ "eval_samples_per_second": 34.995,
108
+ "eval_steps_per_second": 1.131,
109
+ "step": 1032
110
+ },
111
+ {
112
+ "epoch": 9.0,
113
+ "eval_bleu": 75.5462,
114
+ "eval_gen_len": 17.6027,
115
+ "eval_loss": 0.014913694001734257,
116
+ "eval_meteor": 0.8862,
117
+ "eval_runtime": 14.5903,
118
+ "eval_samples_per_second": 36.051,
119
+ "eval_steps_per_second": 1.165,
120
+ "step": 1161
121
+ },
122
+ {
123
+ "epoch": 10.0,
124
+ "eval_bleu": 76.3236,
125
+ "eval_gen_len": 17.5798,
126
+ "eval_loss": 0.014180008322000504,
127
+ "eval_meteor": 0.8954,
128
+ "eval_runtime": 14.6398,
129
+ "eval_samples_per_second": 35.929,
130
+ "eval_steps_per_second": 1.161,
131
+ "step": 1290
132
+ },
133
+ {
134
+ "epoch": 11.0,
135
+ "eval_bleu": 75.8326,
136
+ "eval_gen_len": 17.5951,
137
+ "eval_loss": 0.010033702477812767,
138
+ "eval_meteor": 0.8888,
139
+ "eval_runtime": 14.6128,
140
+ "eval_samples_per_second": 35.996,
141
+ "eval_steps_per_second": 1.163,
142
+ "step": 1419
143
+ },
144
+ {
145
+ "epoch": 11.63,
146
+ "learning_rate": 1.0697674418604651e-05,
147
+ "loss": 0.0341,
148
+ "step": 1500
149
+ },
150
+ {
151
+ "epoch": 12.0,
152
+ "eval_bleu": 75.9138,
153
+ "eval_gen_len": 17.5951,
154
+ "eval_loss": 0.009980925358831882,
155
+ "eval_meteor": 0.8891,
156
+ "eval_runtime": 14.7328,
157
+ "eval_samples_per_second": 35.703,
158
+ "eval_steps_per_second": 1.154,
159
+ "step": 1548
160
+ },
161
+ {
162
+ "epoch": 13.0,
163
+ "eval_bleu": 76.0534,
164
+ "eval_gen_len": 17.5913,
165
+ "eval_loss": 0.0070331464521586895,
166
+ "eval_meteor": 0.8901,
167
+ "eval_runtime": 14.643,
168
+ "eval_samples_per_second": 35.922,
169
+ "eval_steps_per_second": 1.161,
170
+ "step": 1677
171
+ },
172
+ {
173
+ "epoch": 14.0,
174
+ "eval_bleu": 76.3943,
175
+ "eval_gen_len": 17.5798,
176
+ "eval_loss": 0.006607058458030224,
177
+ "eval_meteor": 0.8952,
178
+ "eval_runtime": 14.5594,
179
+ "eval_samples_per_second": 36.128,
180
+ "eval_steps_per_second": 1.168,
181
+ "step": 1806
182
+ },
183
+ {
184
+ "epoch": 15.0,
185
+ "eval_bleu": 76.9833,
186
+ "eval_gen_len": 17.5608,
187
+ "eval_loss": 0.003804780077189207,
188
+ "eval_meteor": 0.9027,
189
+ "eval_runtime": 14.9867,
190
+ "eval_samples_per_second": 35.098,
191
+ "eval_steps_per_second": 1.134,
192
+ "step": 1935
193
+ },
194
+ {
195
+ "epoch": 15.5,
196
+ "learning_rate": 7.596899224806202e-06,
197
+ "loss": 0.0191,
198
+ "step": 2000
199
+ },
200
+ {
201
+ "epoch": 16.0,
202
+ "eval_bleu": 76.9399,
203
+ "eval_gen_len": 17.5608,
204
+ "eval_loss": 0.0028171560261398554,
205
+ "eval_meteor": 0.9025,
206
+ "eval_runtime": 14.5931,
207
+ "eval_samples_per_second": 36.044,
208
+ "eval_steps_per_second": 1.165,
209
+ "step": 2064
210
+ },
211
+ {
212
+ "epoch": 17.0,
213
+ "eval_bleu": 76.5796,
214
+ "eval_gen_len": 17.5722,
215
+ "eval_loss": 0.005369492340832949,
216
+ "eval_meteor": 0.8979,
217
+ "eval_runtime": 14.6939,
218
+ "eval_samples_per_second": 35.797,
219
+ "eval_steps_per_second": 1.157,
220
+ "step": 2193
221
+ },
222
+ {
223
+ "epoch": 18.0,
224
+ "eval_bleu": 77.0507,
225
+ "eval_gen_len": 17.557,
226
+ "eval_loss": 0.002158859744668007,
227
+ "eval_meteor": 0.904,
228
+ "eval_runtime": 14.6859,
229
+ "eval_samples_per_second": 35.817,
230
+ "eval_steps_per_second": 1.158,
231
+ "step": 2322
232
+ },
233
+ {
234
+ "epoch": 19.0,
235
+ "eval_bleu": 76.3097,
236
+ "eval_gen_len": 17.5837,
237
+ "eval_loss": 0.0028479481115937233,
238
+ "eval_meteor": 0.8933,
239
+ "eval_runtime": 14.699,
240
+ "eval_samples_per_second": 35.785,
241
+ "eval_steps_per_second": 1.157,
242
+ "step": 2451
243
+ },
244
+ {
245
+ "epoch": 19.38,
246
+ "learning_rate": 4.4961240310077525e-06,
247
+ "loss": 0.0121,
248
+ "step": 2500
249
+ },
250
+ {
251
+ "epoch": 20.0,
252
+ "eval_bleu": 77.0507,
253
+ "eval_gen_len": 17.557,
254
+ "eval_loss": 0.0012633432634174824,
255
+ "eval_meteor": 0.904,
256
+ "eval_runtime": 14.9177,
257
+ "eval_samples_per_second": 35.26,
258
+ "eval_steps_per_second": 1.14,
259
+ "step": 2580
260
+ },
261
+ {
262
+ "epoch": 21.0,
263
+ "eval_bleu": 76.5168,
264
+ "eval_gen_len": 17.576,
265
+ "eval_loss": 0.001905079698190093,
266
+ "eval_meteor": 0.8965,
267
+ "eval_runtime": 14.6207,
268
+ "eval_samples_per_second": 35.976,
269
+ "eval_steps_per_second": 1.163,
270
+ "step": 2709
271
+ },
272
+ {
273
+ "epoch": 22.0,
274
+ "eval_bleu": 77.2739,
275
+ "eval_gen_len": 17.5494,
276
+ "eval_loss": 0.0008121016435325146,
277
+ "eval_meteor": 0.9072,
278
+ "eval_runtime": 14.6135,
279
+ "eval_samples_per_second": 35.994,
280
+ "eval_steps_per_second": 1.163,
281
+ "step": 2838
282
+ },
283
+ {
284
+ "epoch": 23.0,
285
+ "eval_bleu": 77.1609,
286
+ "eval_gen_len": 17.5532,
287
+ "eval_loss": 0.0007495949394069612,
288
+ "eval_meteor": 0.9056,
289
+ "eval_runtime": 14.5508,
290
+ "eval_samples_per_second": 36.149,
291
+ "eval_steps_per_second": 1.168,
292
+ "step": 2967
293
+ },
294
+ {
295
+ "epoch": 23.26,
296
+ "learning_rate": 1.3953488372093025e-06,
297
+ "loss": 0.0083,
298
+ "step": 3000
299
  }
300
  ],
301
  "logging_steps": 500,
302
+ "max_steps": 3225,
303
  "num_input_tokens_seen": 0,
304
+ "num_train_epochs": 25,
305
  "save_steps": 500,
306
+ "total_flos": 1.172703512236032e+16,
307
  "train_batch_size": 32,
308
  "trial_name": null,
309
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37a20b8cc5abc81c0daf886eac613913c24f43a5e3b97901c38838a12454b794
3
  size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd8cbeb50927f789bd585d35e44d98f7fd2319ad0be400b4d24eb8eb3c59ace0
3
  size 4856