Xmm commited on
Commit
538c5ac
1 Parent(s): 75c29fa

Upload 8 files

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "facebook/nllb-200-distilled-600M",
3
  "activation_dropout": 0.0,
4
  "activation_function": "relu",
5
  "architectures": [
@@ -19,8 +19,10 @@
19
  "encoder_layerdrop": 0,
20
  "encoder_layers": 12,
21
  "eos_token_id": 2,
 
22
  "init_std": 0.02,
23
  "is_encoder_decoder": true,
 
24
  "max_length": 200,
25
  "max_position_embeddings": 1024,
26
  "model_type": "m2m_100",
@@ -29,7 +31,7 @@
29
  "scale_embedding": true,
30
  "tokenizer_class": "NllbTokenizer",
31
  "torch_dtype": "float32",
32
- "transformers_version": "4.33.1",
33
  "use_cache": true,
34
  "vocab_size": 256206
35
  }
 
1
  {
2
+ "_name_or_path": "./models/facebook/nllb-200-distilled-600M",
3
  "activation_dropout": 0.0,
4
  "activation_function": "relu",
5
  "architectures": [
 
19
  "encoder_layerdrop": 0,
20
  "encoder_layers": 12,
21
  "eos_token_id": 2,
22
+ "id2label": {},
23
  "init_std": 0.02,
24
  "is_encoder_decoder": true,
25
+ "label2id": {},
26
  "max_length": 200,
27
  "max_position_embeddings": 1024,
28
  "model_type": "m2m_100",
 
31
  "scale_embedding": true,
32
  "tokenizer_class": "NllbTokenizer",
33
  "torch_dtype": "float32",
34
+ "transformers_version": "4.35.2",
35
  "use_cache": true,
36
  "vocab_size": 256206
37
  }
generation_config.json CHANGED
@@ -4,5 +4,5 @@
4
  "eos_token_id": 2,
5
  "max_length": 200,
6
  "pad_token_id": 1,
7
- "transformers_version": "4.33.1"
8
  }
 
4
  "eos_token_id": 2,
5
  "max_length": 200,
6
  "pad_token_id": 1,
7
+ "transformers_version": "4.35.2"
8
  }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbc36bbddb21ad3db45831721e9bff170a067515204655ccbb1916ee83ab6b3e
3
+ size 2460354912
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc0dc7832b72a061bfd12eace3caeaf02fb97f5941e3747397a50d5f590f8180
3
- size 4921022932
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de324c10686035063e89b70b9b577400051ab0c8c333daeb459107912afd8f1c
3
+ size 4921022996
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18024cb89a10c3fe74f4e14c4463c5bc84cd730736c1f4db43e2a571f0f8ba6e
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3eb202a8db0c6740a779ef7a3a34190b17e26d421eaadfe5f2519e6480d625df
3
  size 14575
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d8bd97df0539aae0394fc2e4314f21cef5a17ea814a0e642ed64ec769f28c57
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a01737a678c054e9c843164cdc8b032e6f5dadf1833008bde13bc3bde1d48d31
3
  size 627
trainer_state.json CHANGED
@@ -1,243 +1,361 @@
1
  {
2
- "best_metric": 0.28169530630111694,
3
- "best_model_checkpoint": "./checkpoint-lo/checkpoint-1500",
4
- "epoch": 12.698301245753115,
5
  "eval_steps": 500,
6
- "global_step": 7000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.91,
13
- "learning_rate": 1.8790078644888082e-05,
14
- "loss": 4.5919,
 
 
 
 
 
 
15
  "step": 500
16
  },
17
  {
18
- "epoch": 0.91,
19
- "eval_bleu": 32.6249,
20
- "eval_gen_len": 32.6077,
21
- "eval_loss": 1.1729092597961426,
22
- "eval_runtime": 207.0111,
23
- "eval_samples_per_second": 4.889,
24
- "eval_steps_per_second": 1.222,
25
  "step": 500
26
  },
27
  {
28
- "epoch": 1.81,
29
- "learning_rate": 1.7580157289776165e-05,
30
- "loss": 0.3216,
31
  "step": 1000
32
  },
33
  {
34
- "epoch": 1.81,
35
- "eval_bleu": 33.2994,
36
- "eval_gen_len": 32.8577,
37
- "eval_loss": 0.2831147611141205,
38
- "eval_runtime": 231.4494,
39
- "eval_samples_per_second": 4.372,
40
- "eval_steps_per_second": 1.093,
41
  "step": 1000
42
  },
43
  {
44
- "epoch": 2.72,
45
- "learning_rate": 1.637023593466425e-05,
46
- "loss": 0.1325,
47
  "step": 1500
48
  },
49
  {
50
- "epoch": 2.72,
51
- "eval_bleu": 33.7596,
52
- "eval_gen_len": 32.5978,
53
- "eval_loss": 0.28169530630111694,
54
- "eval_runtime": 195.7742,
55
- "eval_samples_per_second": 5.169,
56
- "eval_steps_per_second": 1.292,
57
  "step": 1500
58
  },
59
  {
60
- "epoch": 3.63,
61
- "learning_rate": 1.516031457955233e-05,
62
- "loss": 0.2509,
63
  "step": 2000
64
  },
65
  {
66
- "epoch": 3.63,
67
- "eval_bleu": 8.1225,
68
- "eval_gen_len": 42.5958,
69
- "eval_loss": 0.576555609703064,
70
- "eval_runtime": 240.5328,
71
- "eval_samples_per_second": 4.207,
72
- "eval_steps_per_second": 1.052,
73
  "step": 2000
74
  },
75
  {
76
- "epoch": 4.53,
77
- "learning_rate": 1.3950393224440413e-05,
78
- "loss": 0.2255,
79
  "step": 2500
80
  },
81
  {
82
- "epoch": 4.53,
83
- "eval_bleu": 7.8932,
84
- "eval_gen_len": 43.3468,
85
- "eval_loss": 0.5630556344985962,
86
- "eval_runtime": 238.3063,
87
- "eval_samples_per_second": 4.247,
88
- "eval_steps_per_second": 1.062,
89
  "step": 2500
90
  },
91
  {
92
- "epoch": 5.44,
93
- "learning_rate": 1.2740471869328494e-05,
94
- "loss": 0.2123,
95
  "step": 3000
96
  },
97
  {
98
- "epoch": 5.44,
99
- "eval_bleu": 7.8523,
100
- "eval_gen_len": 43.2866,
101
- "eval_loss": 0.5581173896789551,
102
- "eval_runtime": 238.527,
103
- "eval_samples_per_second": 4.243,
104
- "eval_steps_per_second": 1.061,
105
  "step": 3000
106
  },
107
  {
108
- "epoch": 6.35,
109
- "learning_rate": 1.1530550514216576e-05,
110
- "loss": 0.2061,
111
  "step": 3500
112
  },
113
  {
114
- "epoch": 6.35,
115
- "eval_bleu": 7.8532,
116
- "eval_gen_len": 42.9358,
117
- "eval_loss": 0.555178701877594,
118
- "eval_runtime": 243.8895,
119
- "eval_samples_per_second": 4.149,
120
- "eval_steps_per_second": 1.037,
121
  "step": 3500
122
  },
123
  {
124
- "epoch": 7.25,
125
- "learning_rate": 1.0320629159104658e-05,
126
- "loss": 0.2001,
127
  "step": 4000
128
  },
129
  {
130
- "epoch": 7.25,
131
- "eval_bleu": 7.8643,
132
- "eval_gen_len": 43.7075,
133
- "eval_loss": 0.5538426637649536,
134
- "eval_runtime": 243.9747,
135
- "eval_samples_per_second": 4.148,
136
- "eval_steps_per_second": 1.037,
137
  "step": 4000
138
  },
139
  {
140
- "epoch": 8.16,
141
- "learning_rate": 9.110707803992742e-06,
142
- "loss": 0.1935,
143
  "step": 4500
144
  },
145
  {
146
- "epoch": 8.16,
147
- "eval_bleu": 7.884,
148
- "eval_gen_len": 43.6126,
149
- "eval_loss": 0.5509431958198547,
150
- "eval_runtime": 253.7386,
151
- "eval_samples_per_second": 3.988,
152
- "eval_steps_per_second": 0.997,
153
  "step": 4500
154
  },
155
  {
156
- "epoch": 9.07,
157
- "learning_rate": 7.900786448880823e-06,
158
- "loss": 0.1902,
159
  "step": 5000
160
  },
161
  {
162
- "epoch": 9.07,
163
- "eval_bleu": 7.8327,
164
- "eval_gen_len": 43.2352,
165
- "eval_loss": 0.5488432049751282,
166
- "eval_runtime": 241.3846,
167
- "eval_samples_per_second": 4.192,
168
- "eval_steps_per_second": 1.048,
169
  "step": 5000
170
  },
171
  {
172
- "epoch": 9.97,
173
- "learning_rate": 6.690865093768906e-06,
174
- "loss": 0.1867,
175
  "step": 5500
176
  },
177
  {
178
- "epoch": 9.97,
179
- "eval_bleu": 7.8753,
180
- "eval_gen_len": 43.75,
181
- "eval_loss": 0.5456582903862,
182
- "eval_runtime": 247.5702,
183
- "eval_samples_per_second": 4.088,
184
- "eval_steps_per_second": 1.022,
185
  "step": 5500
186
  },
187
  {
188
- "epoch": 10.89,
189
- "learning_rate": 5.480943738656987e-06,
190
- "loss": 0.1295,
191
  "step": 6000
192
  },
193
  {
194
- "epoch": 10.89,
195
- "eval_bleu": 33.7401,
196
- "eval_gen_len": 32.753,
197
- "eval_loss": 0.2826240658760071,
198
- "eval_runtime": 200.2264,
199
- "eval_samples_per_second": 5.054,
200
- "eval_steps_per_second": 1.264,
201
  "step": 6000
202
  },
203
  {
204
- "epoch": 11.79,
205
- "learning_rate": 4.27102238354507e-06,
206
- "loss": 0.1255,
207
  "step": 6500
208
  },
209
  {
210
- "epoch": 11.79,
211
- "eval_bleu": 33.767,
212
- "eval_gen_len": 32.7213,
213
- "eval_loss": 0.2822073698043823,
214
- "eval_runtime": 206.8407,
215
- "eval_samples_per_second": 4.893,
216
- "eval_steps_per_second": 1.223,
217
  "step": 6500
218
  },
219
  {
220
- "epoch": 12.7,
221
- "learning_rate": 3.061101028433152e-06,
222
- "loss": 0.1246,
223
  "step": 7000
224
  },
225
  {
226
- "epoch": 12.7,
227
- "eval_bleu": 33.7958,
228
- "eval_gen_len": 32.7233,
229
- "eval_loss": 0.2822967767715454,
230
- "eval_runtime": 200.4031,
231
- "eval_samples_per_second": 5.05,
232
- "eval_steps_per_second": 1.262,
233
  "step": 7000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  }
235
  ],
236
  "logging_steps": 500,
237
- "max_steps": 8265,
238
- "num_train_epochs": 15,
239
  "save_steps": 500,
240
- "total_flos": 4.85372919048831e+17,
241
  "trial_name": null,
242
  "trial_params": null
243
  }
 
1
  {
2
+ "best_metric": 0.26939964294433594,
3
+ "best_model_checkpoint": "./checkpoint/checkpoint-4000",
4
+ "epoch": 4.790692369111441,
5
  "eval_steps": 500,
6
+ "global_step": 10500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.0,
13
+ "learning_rate": 4e-08,
14
+ "loss": 12.0623,
15
+ "step": 1
16
+ },
17
+ {
18
+ "epoch": 0.23,
19
+ "learning_rate": 1.9960000000000002e-05,
20
+ "loss": 7.4437,
21
  "step": 500
22
  },
23
  {
24
+ "epoch": 0.23,
25
+ "eval_bleu": 33.6574,
26
+ "eval_gen_len": 31.2317,
27
+ "eval_loss": 3.4493963718414307,
28
+ "eval_runtime": 576.3536,
29
+ "eval_samples_per_second": 1.73,
30
+ "eval_steps_per_second": 1.73,
31
  "step": 500
32
  },
33
  {
34
+ "epoch": 0.46,
35
+ "learning_rate": 1.904543280726925e-05,
36
+ "loss": 1.3107,
37
  "step": 1000
38
  },
39
  {
40
+ "epoch": 0.46,
41
+ "eval_bleu": 33.3513,
42
+ "eval_gen_len": 31.7091,
43
+ "eval_loss": 0.2903362810611725,
44
+ "eval_runtime": 476.1637,
45
+ "eval_samples_per_second": 2.094,
46
+ "eval_steps_per_second": 2.094,
47
  "step": 1000
48
  },
49
  {
50
+ "epoch": 0.68,
51
+ "learning_rate": 1.8088952654232427e-05,
52
+ "loss": 0.1582,
53
  "step": 1500
54
  },
55
  {
56
+ "epoch": 0.68,
57
+ "eval_bleu": 33.7009,
58
+ "eval_gen_len": 31.7202,
59
+ "eval_loss": 0.2747056484222412,
60
+ "eval_runtime": 477.4288,
61
+ "eval_samples_per_second": 2.088,
62
+ "eval_steps_per_second": 2.088,
63
  "step": 1500
64
  },
65
  {
66
+ "epoch": 0.91,
67
+ "learning_rate": 1.71324725011956e-05,
68
+ "loss": 0.1477,
69
  "step": 2000
70
  },
71
  {
72
+ "epoch": 0.91,
73
+ "eval_bleu": 34.2274,
74
+ "eval_gen_len": 31.5557,
75
+ "eval_loss": 0.2713315188884735,
76
+ "eval_runtime": 473.8787,
77
+ "eval_samples_per_second": 2.104,
78
+ "eval_steps_per_second": 2.104,
79
  "step": 2000
80
  },
81
  {
82
+ "epoch": 1.14,
83
+ "learning_rate": 1.617599234815878e-05,
84
+ "loss": 0.1413,
85
  "step": 2500
86
  },
87
  {
88
+ "epoch": 1.14,
89
+ "eval_bleu": 34.0664,
90
+ "eval_gen_len": 31.674,
91
+ "eval_loss": 0.2716849446296692,
92
+ "eval_runtime": 477.2223,
93
+ "eval_samples_per_second": 2.089,
94
+ "eval_steps_per_second": 2.089,
95
  "step": 2500
96
  },
97
  {
98
+ "epoch": 1.37,
99
+ "learning_rate": 1.5219512195121952e-05,
100
+ "loss": 0.1355,
101
  "step": 3000
102
  },
103
  {
104
+ "epoch": 1.37,
105
+ "eval_bleu": 34.1168,
106
+ "eval_gen_len": 31.8506,
107
+ "eval_loss": 0.2718922197818756,
108
+ "eval_runtime": 479.68,
109
+ "eval_samples_per_second": 2.078,
110
+ "eval_steps_per_second": 2.078,
111
  "step": 3000
112
  },
113
  {
114
+ "epoch": 1.6,
115
+ "learning_rate": 1.4263032042085128e-05,
116
+ "loss": 0.136,
117
  "step": 3500
118
  },
119
  {
120
+ "epoch": 1.6,
121
+ "eval_bleu": 34.2638,
122
+ "eval_gen_len": 31.7523,
123
+ "eval_loss": 0.270623117685318,
124
+ "eval_runtime": 479.0203,
125
+ "eval_samples_per_second": 2.081,
126
+ "eval_steps_per_second": 2.081,
127
  "step": 3500
128
  },
129
  {
130
+ "epoch": 1.83,
131
+ "learning_rate": 1.3306551889048302e-05,
132
+ "loss": 0.1316,
133
  "step": 4000
134
  },
135
  {
136
+ "epoch": 1.83,
137
+ "eval_bleu": 34.1582,
138
+ "eval_gen_len": 31.6931,
139
+ "eval_loss": 0.26939964294433594,
140
+ "eval_runtime": 477.4424,
141
+ "eval_samples_per_second": 2.088,
142
+ "eval_steps_per_second": 2.088,
143
  "step": 4000
144
  },
145
  {
146
+ "epoch": 2.05,
147
+ "learning_rate": 1.235007173601148e-05,
148
+ "loss": 0.1312,
149
  "step": 4500
150
  },
151
  {
152
+ "epoch": 2.05,
153
+ "eval_bleu": 34.4277,
154
+ "eval_gen_len": 31.662,
155
+ "eval_loss": 0.2704804539680481,
156
+ "eval_runtime": 479.1942,
157
+ "eval_samples_per_second": 2.081,
158
+ "eval_steps_per_second": 2.081,
159
  "step": 4500
160
  },
161
  {
162
+ "epoch": 2.28,
163
+ "learning_rate": 1.1393591582974655e-05,
164
+ "loss": 0.1258,
165
  "step": 5000
166
  },
167
  {
168
+ "epoch": 2.28,
169
+ "eval_bleu": 34.3594,
170
+ "eval_gen_len": 31.651,
171
+ "eval_loss": 0.27046987414360046,
172
+ "eval_runtime": 478.936,
173
+ "eval_samples_per_second": 2.082,
174
+ "eval_steps_per_second": 2.082,
175
  "step": 5000
176
  },
177
  {
178
+ "epoch": 2.51,
179
+ "learning_rate": 1.043711142993783e-05,
180
+ "loss": 0.1271,
181
  "step": 5500
182
  },
183
  {
184
+ "epoch": 2.51,
185
+ "eval_bleu": 34.3412,
186
+ "eval_gen_len": 31.8094,
187
+ "eval_loss": 0.27054643630981445,
188
+ "eval_runtime": 481.7151,
189
+ "eval_samples_per_second": 2.07,
190
+ "eval_steps_per_second": 2.07,
191
  "step": 5500
192
  },
193
  {
194
+ "epoch": 2.74,
195
+ "learning_rate": 9.480631276901005e-06,
196
+ "loss": 0.1249,
197
  "step": 6000
198
  },
199
  {
200
+ "epoch": 2.74,
201
+ "eval_bleu": 34.2387,
202
+ "eval_gen_len": 31.7212,
203
+ "eval_loss": 0.2704330086708069,
204
+ "eval_runtime": 479.9945,
205
+ "eval_samples_per_second": 2.077,
206
+ "eval_steps_per_second": 2.077,
207
  "step": 6000
208
  },
209
  {
210
+ "epoch": 2.97,
211
+ "learning_rate": 8.52415112386418e-06,
212
+ "loss": 0.1245,
213
  "step": 6500
214
  },
215
  {
216
+ "epoch": 2.97,
217
+ "eval_bleu": 34.3033,
218
+ "eval_gen_len": 31.8616,
219
+ "eval_loss": 0.27082785964012146,
220
+ "eval_runtime": 482.2741,
221
+ "eval_samples_per_second": 2.067,
222
+ "eval_steps_per_second": 2.067,
223
  "step": 6500
224
  },
225
  {
226
+ "epoch": 3.19,
227
+ "learning_rate": 7.5676709708273554e-06,
228
+ "loss": 0.1195,
229
  "step": 7000
230
  },
231
  {
232
+ "epoch": 3.19,
233
+ "eval_bleu": 34.2748,
234
+ "eval_gen_len": 31.9017,
235
+ "eval_loss": 0.27176010608673096,
236
+ "eval_runtime": 484.0841,
237
+ "eval_samples_per_second": 2.06,
238
+ "eval_steps_per_second": 2.06,
239
  "step": 7000
240
+ },
241
+ {
242
+ "epoch": 3.42,
243
+ "learning_rate": 6.611190817790531e-06,
244
+ "loss": 0.1198,
245
+ "step": 7500
246
+ },
247
+ {
248
+ "epoch": 3.42,
249
+ "eval_bleu": 34.2897,
250
+ "eval_gen_len": 31.7312,
251
+ "eval_loss": 0.27175214886665344,
252
+ "eval_runtime": 479.5665,
253
+ "eval_samples_per_second": 2.079,
254
+ "eval_steps_per_second": 2.079,
255
+ "step": 7500
256
+ },
257
+ {
258
+ "epoch": 3.65,
259
+ "learning_rate": 5.654710664753707e-06,
260
+ "loss": 0.1209,
261
+ "step": 8000
262
+ },
263
+ {
264
+ "epoch": 3.65,
265
+ "eval_bleu": 34.4446,
266
+ "eval_gen_len": 31.7272,
267
+ "eval_loss": 0.2709992527961731,
268
+ "eval_runtime": 478.4748,
269
+ "eval_samples_per_second": 2.084,
270
+ "eval_steps_per_second": 2.084,
271
+ "step": 8000
272
+ },
273
+ {
274
+ "epoch": 3.88,
275
+ "learning_rate": 4.6982305117168825e-06,
276
+ "loss": 0.1201,
277
+ "step": 8500
278
+ },
279
+ {
280
+ "epoch": 3.88,
281
+ "eval_bleu": 34.3571,
282
+ "eval_gen_len": 31.7432,
283
+ "eval_loss": 0.2712614834308624,
284
+ "eval_runtime": 478.6295,
285
+ "eval_samples_per_second": 2.083,
286
+ "eval_steps_per_second": 2.083,
287
+ "step": 8500
288
+ },
289
+ {
290
+ "epoch": 4.11,
291
+ "learning_rate": 3.7417503586800574e-06,
292
+ "loss": 0.1201,
293
+ "step": 9000
294
+ },
295
+ {
296
+ "epoch": 4.11,
297
+ "eval_bleu": 34.4398,
298
+ "eval_gen_len": 31.7613,
299
+ "eval_loss": 0.27223262190818787,
300
+ "eval_runtime": 478.6225,
301
+ "eval_samples_per_second": 2.083,
302
+ "eval_steps_per_second": 2.083,
303
+ "step": 9000
304
+ },
305
+ {
306
+ "epoch": 4.33,
307
+ "learning_rate": 2.785270205643233e-06,
308
+ "loss": 0.1178,
309
+ "step": 9500
310
+ },
311
+ {
312
+ "epoch": 4.33,
313
+ "eval_bleu": 34.4074,
314
+ "eval_gen_len": 31.7753,
315
+ "eval_loss": 0.27177131175994873,
316
+ "eval_runtime": 479.0762,
317
+ "eval_samples_per_second": 2.081,
318
+ "eval_steps_per_second": 2.081,
319
+ "step": 9500
320
+ },
321
+ {
322
+ "epoch": 4.56,
323
+ "learning_rate": 1.8287900526064088e-06,
324
+ "loss": 0.1181,
325
+ "step": 10000
326
+ },
327
+ {
328
+ "epoch": 4.56,
329
+ "eval_bleu": 34.4628,
330
+ "eval_gen_len": 31.8034,
331
+ "eval_loss": 0.2723881006240845,
332
+ "eval_runtime": 479.5721,
333
+ "eval_samples_per_second": 2.079,
334
+ "eval_steps_per_second": 2.079,
335
+ "step": 10000
336
+ },
337
+ {
338
+ "epoch": 4.79,
339
+ "learning_rate": 8.72309899569584e-07,
340
+ "loss": 0.1169,
341
+ "step": 10500
342
+ },
343
+ {
344
+ "epoch": 4.79,
345
+ "eval_bleu": 34.563,
346
+ "eval_gen_len": 31.7442,
347
+ "eval_loss": 0.2720402777194977,
348
+ "eval_runtime": 478.0313,
349
+ "eval_samples_per_second": 2.086,
350
+ "eval_steps_per_second": 2.086,
351
+ "step": 10500
352
  }
353
  ],
354
  "logging_steps": 500,
355
+ "max_steps": 10955,
356
+ "num_train_epochs": 5,
357
  "save_steps": 500,
358
+ "total_flos": 1.820292017113006e+17,
359
  "trial_name": null,
360
  "trial_params": null
361
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8dfbd515e9eda14c4f8041c5aebfc0a3a080a820bfc19ea73f21fe2ae0cfb96
3
- size 4155
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47575ec633d51ca41dc7576e53b8a49ad285943a3852130adba912f886871277
3
+ size 4219