mekbus commited on
Commit
fd4833d
·
verified ·
1 Parent(s): e5d567c

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -22,7 +22,7 @@
22
  "position_embedding_type": "absolute",
23
  "problem_type": "single_label_classification",
24
  "torch_dtype": "float32",
25
- "transformers_version": "4.35.2",
26
  "type_vocab_size": 1,
27
  "use_cache": true,
28
  "vocab_size": 50265
 
22
  "position_embedding_type": "absolute",
23
  "problem_type": "single_label_classification",
24
  "torch_dtype": "float32",
25
+ "transformers_version": "4.30.2",
26
  "type_vocab_size": 1,
27
  "use_cache": true,
28
  "vocab_size": 50265
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2347bb36c75e36892183c95dce50a6b5615a6f578a2f69dc9fa44d40e4f2d99
3
- size 118173498
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c3c8d0e76911edabda4ab250741c870e4e952b86246bb618567241576ff3478
3
+ size 118164566
model.safetensors → pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76be76a15eb7729f4b00b4f1d419d99293a80090099dc1e5fabd15af50a9baac
3
- size 498612824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b978135ac2209a82b6de6ae0226ab15107b18f0128625287f32c1f7bf6d1ac7
3
+ size 498662578
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b92896732603661d06d166837c431b32c69a6df4f83079e8f3cf7091b113ad6a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90335df6b4672643597b54508bc69e775017b50d0888af17282b7b0c642b3b53
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0dcb5d1309ade11adef56b51bf410df7ad6a10fac32cf9fd0b45c0dab8f75032
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac73aa17dd42bf751cbba9d5abff2558729f6cf81e85083ea16fd3c5c49619d2
3
  size 1064
trainer_state.json CHANGED
@@ -1,445 +1,196 @@
1
  {
2
- "best_metric": 0.9870967741935484,
3
- "best_model_checkpoint": "./php_cwe79_codebert_output\\checkpoint-1648",
4
- "epoch": 16.99203640500569,
5
- "eval_steps": 500,
6
- "global_step": 1867,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.46,
13
- "learning_rate": 4.587155963302753e-06,
14
- "loss": 0.7019,
15
  "step": 50
16
  },
17
  {
18
- "epoch": 0.91,
19
- "learning_rate": 9.174311926605506e-06,
20
- "loss": 0.6915,
21
  "step": 100
22
  },
23
  {
24
- "epoch": 0.99,
25
- "eval_accuracy": 0.5532258064516129,
26
- "eval_f1": 0.4822429906542056,
27
- "eval_loss": 0.6805028915405273,
28
- "eval_precision": 0.5733333333333334,
29
- "eval_recall": 0.4161290322580645,
30
- "eval_runtime": 19.9477,
31
- "eval_samples_per_second": 62.162,
32
- "eval_steps_per_second": 3.91,
33
- "step": 109
34
- },
35
- {
36
- "epoch": 1.37,
37
- "learning_rate": 1.3761467889908258e-05,
38
- "loss": 0.6774,
39
  "step": 150
40
  },
41
  {
42
- "epoch": 1.82,
43
- "learning_rate": 1.834862385321101e-05,
44
- "loss": 0.5408,
45
- "step": 200
 
 
 
 
 
 
46
  },
47
  {
48
- "epoch": 1.99,
49
- "eval_accuracy": 0.8362903225806452,
50
- "eval_f1": 0.8407843137254902,
51
- "eval_loss": 0.35471096634864807,
52
- "eval_precision": 0.8183206106870229,
53
- "eval_recall": 0.864516129032258,
54
- "eval_runtime": 20.4696,
55
- "eval_samples_per_second": 60.578,
56
- "eval_steps_per_second": 3.811,
57
- "step": 219
58
  },
59
  {
60
- "epoch": 2.28,
61
- "learning_rate": 1.9673802242609582e-05,
62
- "loss": 0.3461,
63
  "step": 250
64
  },
65
  {
66
- "epoch": 2.73,
67
- "learning_rate": 1.9164118246687054e-05,
68
- "loss": 0.2515,
69
  "step": 300
70
  },
71
  {
72
- "epoch": 2.99,
73
- "eval_accuracy": 0.9185483870967742,
74
- "eval_f1": 0.9144792548687553,
75
- "eval_loss": 0.20694807171821594,
76
- "eval_precision": 0.9625668449197861,
77
- "eval_recall": 0.8709677419354839,
78
- "eval_runtime": 31.9108,
79
- "eval_samples_per_second": 38.858,
80
- "eval_steps_per_second": 2.444,
81
- "step": 329
82
  },
83
  {
84
- "epoch": 3.19,
85
- "learning_rate": 1.865443425076453e-05,
86
- "loss": 0.211,
87
  "step": 350
88
  },
89
  {
90
- "epoch": 3.64,
91
- "learning_rate": 1.8144750254841998e-05,
92
- "loss": 0.1931,
93
  "step": 400
94
  },
95
  {
96
- "epoch": 4.0,
97
- "eval_accuracy": 0.9314516129032258,
98
- "eval_f1": 0.9280270956816258,
99
- "eval_loss": 0.16265422105789185,
100
- "eval_precision": 0.9768270944741533,
101
- "eval_recall": 0.8838709677419355,
102
- "eval_runtime": 36.5458,
103
- "eval_samples_per_second": 33.93,
104
- "eval_steps_per_second": 2.134,
105
- "step": 439
106
- },
107
- {
108
- "epoch": 4.1,
109
- "learning_rate": 1.763506625891947e-05,
110
- "loss": 0.1447,
111
  "step": 450
112
  },
113
  {
114
- "epoch": 4.55,
115
- "learning_rate": 1.7125382262996945e-05,
116
- "loss": 0.1335,
117
- "step": 500
 
 
 
 
 
 
118
  },
119
  {
120
- "epoch": 5.0,
121
- "eval_accuracy": 0.9612903225806452,
122
- "eval_f1": 0.9608482871125612,
123
- "eval_loss": 0.09464450925588608,
124
- "eval_precision": 0.971947194719472,
125
- "eval_recall": 0.95,
126
- "eval_runtime": 35.2367,
127
- "eval_samples_per_second": 35.191,
128
- "eval_steps_per_second": 2.214,
129
- "step": 549
130
  },
131
  {
132
- "epoch": 5.01,
133
- "learning_rate": 1.6625891946992865e-05,
134
- "loss": 0.1333,
135
  "step": 550
136
  },
137
  {
138
- "epoch": 5.46,
139
- "learning_rate": 1.6116207951070337e-05,
140
- "loss": 0.1043,
141
  "step": 600
142
  },
143
  {
144
- "epoch": 5.92,
145
- "learning_rate": 1.560652395514781e-05,
146
- "loss": 0.1057,
 
 
 
 
 
 
 
 
 
 
 
 
147
  "step": 650
148
  },
149
  {
150
- "epoch": 6.0,
151
- "eval_accuracy": 0.9548387096774194,
152
- "eval_f1": 0.9537953795379538,
153
- "eval_loss": 0.09458089619874954,
154
- "eval_precision": 0.9763513513513513,
155
- "eval_recall": 0.932258064516129,
156
- "eval_runtime": 36.0845,
157
- "eval_samples_per_second": 34.364,
158
- "eval_steps_per_second": 2.162,
159
- "step": 659
160
- },
161
- {
162
- "epoch": 6.37,
163
- "learning_rate": 1.5096839959225283e-05,
164
- "loss": 0.1046,
165
  "step": 700
166
  },
167
  {
168
- "epoch": 6.83,
169
- "learning_rate": 1.4587155963302753e-05,
170
- "loss": 0.0887,
171
  "step": 750
172
  },
173
  {
174
- "epoch": 7.0,
175
- "eval_accuracy": 0.9701612903225807,
176
- "eval_f1": 0.9699918896999189,
177
- "eval_loss": 0.07421040534973145,
178
- "eval_precision": 0.9755301794453507,
179
- "eval_recall": 0.964516129032258,
180
- "eval_runtime": 37.4354,
181
- "eval_samples_per_second": 33.124,
182
- "eval_steps_per_second": 2.084,
183
- "step": 769
184
- },
185
- {
186
- "epoch": 7.28,
187
- "learning_rate": 1.4077471967380225e-05,
188
- "loss": 0.0763,
189
  "step": 800
190
  },
191
  {
192
- "epoch": 7.74,
193
- "learning_rate": 1.3567787971457698e-05,
194
- "loss": 0.0916,
195
  "step": 850
196
  },
197
  {
198
- "epoch": 8.0,
199
- "eval_accuracy": 0.9733870967741935,
200
- "eval_f1": 0.9734085414987913,
201
- "eval_loss": 0.05636864900588989,
202
- "eval_precision": 0.9726247987117552,
203
- "eval_recall": 0.9741935483870968,
204
- "eval_runtime": 37.5471,
205
- "eval_samples_per_second": 33.025,
206
- "eval_steps_per_second": 2.077,
207
- "step": 879
208
- },
209
- {
210
- "epoch": 8.19,
211
- "learning_rate": 1.305810397553517e-05,
212
- "loss": 0.0858,
213
  "step": 900
214
  },
215
  {
216
- "epoch": 8.65,
217
- "learning_rate": 1.254841997961264e-05,
218
- "loss": 0.0716,
219
- "step": 950
220
- },
221
- {
222
- "epoch": 8.99,
223
- "eval_accuracy": 0.9790322580645161,
224
- "eval_f1": 0.9790996784565916,
225
- "eval_loss": 0.049785859882831573,
226
- "eval_precision": 0.9759615384615384,
227
- "eval_recall": 0.9822580645161291,
228
- "eval_runtime": 39.3971,
229
- "eval_samples_per_second": 31.474,
230
- "eval_steps_per_second": 1.98,
231
- "step": 988
232
- },
233
- {
234
- "epoch": 9.1,
235
- "learning_rate": 1.2038735983690114e-05,
236
- "loss": 0.0686,
237
- "step": 1000
238
- },
239
- {
240
- "epoch": 9.56,
241
- "learning_rate": 1.1529051987767585e-05,
242
- "loss": 0.0673,
243
- "step": 1050
244
- },
245
- {
246
- "epoch": 9.99,
247
- "eval_accuracy": 0.9798387096774194,
248
- "eval_f1": 0.9799518845228549,
249
- "eval_loss": 0.046214405447244644,
250
- "eval_precision": 0.9744816586921851,
251
- "eval_recall": 0.9854838709677419,
252
- "eval_runtime": 29.0256,
253
- "eval_samples_per_second": 42.721,
254
- "eval_steps_per_second": 2.687,
255
- "step": 1098
256
- },
257
- {
258
- "epoch": 10.01,
259
- "learning_rate": 1.1019367991845057e-05,
260
- "loss": 0.0577,
261
- "step": 1100
262
- },
263
- {
264
- "epoch": 10.47,
265
- "learning_rate": 1.0509683995922529e-05,
266
- "loss": 0.0533,
267
- "step": 1150
268
- },
269
- {
270
- "epoch": 10.92,
271
- "learning_rate": 1e-05,
272
- "loss": 0.0569,
273
- "step": 1200
274
- },
275
- {
276
- "epoch": 10.99,
277
- "eval_accuracy": 0.9846774193548387,
278
- "eval_f1": 0.9847144006436042,
279
- "eval_loss": 0.039788804948329926,
280
- "eval_precision": 0.9823434991974318,
281
- "eval_recall": 0.9870967741935484,
282
- "eval_runtime": 29.3619,
283
- "eval_samples_per_second": 42.232,
284
- "eval_steps_per_second": 2.657,
285
- "step": 1208
286
- },
287
- {
288
- "epoch": 11.38,
289
- "learning_rate": 9.490316004077473e-06,
290
- "loss": 0.0497,
291
- "step": 1250
292
- },
293
- {
294
- "epoch": 11.83,
295
- "learning_rate": 8.980632008154944e-06,
296
- "loss": 0.0532,
297
- "step": 1300
298
- },
299
- {
300
- "epoch": 12.0,
301
- "eval_accuracy": 0.9854838709677419,
302
- "eval_f1": 0.9854604200323102,
303
- "eval_loss": 0.034001607447862625,
304
- "eval_precision": 0.9870550161812298,
305
- "eval_recall": 0.9838709677419355,
306
- "eval_runtime": 35.9784,
307
- "eval_samples_per_second": 34.465,
308
- "eval_steps_per_second": 2.168,
309
- "step": 1318
310
- },
311
- {
312
- "epoch": 12.29,
313
- "learning_rate": 8.470948012232416e-06,
314
- "loss": 0.0517,
315
- "step": 1350
316
- },
317
- {
318
- "epoch": 12.74,
319
- "learning_rate": 7.961264016309888e-06,
320
- "loss": 0.0382,
321
- "step": 1400
322
- },
323
- {
324
- "epoch": 13.0,
325
- "eval_accuracy": 0.9854838709677419,
326
- "eval_f1": 0.9855072463768116,
327
- "eval_loss": 0.02894522435963154,
328
- "eval_precision": 0.9839228295819936,
329
- "eval_recall": 0.9870967741935484,
330
- "eval_runtime": 37.3704,
331
- "eval_samples_per_second": 33.181,
332
- "eval_steps_per_second": 2.087,
333
- "step": 1428
334
- },
335
- {
336
- "epoch": 13.2,
337
- "learning_rate": 7.45158002038736e-06,
338
- "loss": 0.0333,
339
- "step": 1450
340
- },
341
- {
342
- "epoch": 13.65,
343
- "learning_rate": 6.941896024464833e-06,
344
- "loss": 0.0338,
345
- "step": 1500
346
- },
347
- {
348
- "epoch": 14.0,
349
- "eval_accuracy": 0.9854838709677419,
350
- "eval_f1": 0.9854838709677419,
351
- "eval_loss": 0.028582246974110603,
352
- "eval_precision": 0.9854838709677419,
353
- "eval_recall": 0.9854838709677419,
354
- "eval_runtime": 34.3893,
355
- "eval_samples_per_second": 36.058,
356
- "eval_steps_per_second": 2.268,
357
- "step": 1538
358
- },
359
- {
360
- "epoch": 14.11,
361
- "learning_rate": 6.432212028542304e-06,
362
- "loss": 0.0365,
363
- "step": 1550
364
- },
365
- {
366
- "epoch": 14.56,
367
- "learning_rate": 5.922528032619776e-06,
368
- "loss": 0.0332,
369
- "step": 1600
370
- },
371
- {
372
- "epoch": 15.0,
373
- "eval_accuracy": 0.9870967741935484,
374
- "eval_f1": 0.9870967741935484,
375
- "eval_loss": 0.02915828675031662,
376
- "eval_precision": 0.9870967741935484,
377
- "eval_recall": 0.9870967741935484,
378
- "eval_runtime": 24.73,
379
- "eval_samples_per_second": 50.141,
380
- "eval_steps_per_second": 3.154,
381
- "step": 1648
382
- },
383
- {
384
- "epoch": 15.02,
385
- "learning_rate": 5.412844036697248e-06,
386
- "loss": 0.0329,
387
- "step": 1650
388
- },
389
- {
390
- "epoch": 15.47,
391
- "learning_rate": 4.90316004077472e-06,
392
- "loss": 0.029,
393
- "step": 1700
394
- },
395
- {
396
- "epoch": 15.93,
397
- "learning_rate": 4.393476044852192e-06,
398
- "loss": 0.0271,
399
- "step": 1750
400
- },
401
- {
402
- "epoch": 16.0,
403
- "eval_accuracy": 0.9854838709677419,
404
- "eval_f1": 0.9855072463768116,
405
- "eval_loss": 0.02873540110886097,
406
- "eval_precision": 0.9839228295819936,
407
- "eval_recall": 0.9870967741935484,
408
- "eval_runtime": 24.9886,
409
- "eval_samples_per_second": 49.623,
410
- "eval_steps_per_second": 3.121,
411
- "step": 1758
412
- },
413
- {
414
- "epoch": 16.38,
415
- "learning_rate": 3.8837920489296635e-06,
416
- "loss": 0.0233,
417
- "step": 1800
418
- },
419
- {
420
- "epoch": 16.84,
421
- "learning_rate": 3.3741080530071357e-06,
422
- "loss": 0.0264,
423
- "step": 1850
424
- },
425
- {
426
- "epoch": 16.99,
427
- "eval_accuracy": 0.9854838709677419,
428
- "eval_f1": 0.9854838709677419,
429
- "eval_loss": 0.026798125356435776,
430
- "eval_precision": 0.9854838709677419,
431
- "eval_recall": 0.9854838709677419,
432
- "eval_runtime": 35.295,
433
- "eval_samples_per_second": 35.132,
434
- "eval_steps_per_second": 2.21,
435
- "step": 1867
436
  }
437
  ],
438
- "logging_steps": 50,
439
- "max_steps": 2180,
440
  "num_train_epochs": 20,
441
- "save_steps": 500,
442
- "total_flos": 3.1422037786368e+16,
443
  "trial_name": null,
444
  "trial_params": null
445
  }
 
1
  {
2
+ "best_metric": 0.9907529722589168,
3
+ "best_model_checkpoint": "./php_cwe79_codebert_output\\checkpoint-627",
4
+ "epoch": 5.996810207336523,
5
+ "global_step": 940,
 
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.32,
12
+ "learning_rate": 3.205128205128206e-06,
13
+ "loss": 0.6994,
14
  "step": 50
15
  },
16
  {
17
+ "epoch": 0.64,
18
+ "learning_rate": 6.410256410256412e-06,
19
+ "loss": 0.6957,
20
  "step": 100
21
  },
22
  {
23
+ "epoch": 0.96,
24
+ "learning_rate": 9.615384615384616e-06,
25
+ "loss": 0.6873,
 
 
 
 
 
 
 
 
 
 
 
 
26
  "step": 150
27
  },
28
  {
29
+ "epoch": 1.0,
30
+ "eval_accuracy": 0.6689265536723163,
31
+ "eval_f1": 0.6689265536723163,
32
+ "eval_loss": 0.6734532713890076,
33
+ "eval_precision": 0.581532416502947,
34
+ "eval_recall": 0.7872340425531915,
35
+ "eval_runtime": 11.4008,
36
+ "eval_samples_per_second": 77.626,
37
+ "eval_steps_per_second": 4.912,
38
+ "step": 156
39
  },
40
  {
41
+ "epoch": 1.28,
42
+ "learning_rate": 1.2820512820512823e-05,
43
+ "loss": 0.6729,
44
+ "step": 200
 
 
 
 
 
 
45
  },
46
  {
47
+ "epoch": 1.59,
48
+ "learning_rate": 1.602564102564103e-05,
49
+ "loss": 0.5098,
50
  "step": 250
51
  },
52
  {
53
+ "epoch": 1.91,
54
+ "learning_rate": 1.923076923076923e-05,
55
+ "loss": 0.3065,
56
  "step": 300
57
  },
58
  {
59
+ "epoch": 2.0,
60
+ "eval_accuracy": 0.9299435028248587,
61
+ "eval_f1": 0.9146005509641874,
62
+ "eval_loss": 0.16874603927135468,
63
+ "eval_precision": 0.9485714285714286,
64
+ "eval_recall": 0.8829787234042553,
65
+ "eval_runtime": 11.7337,
66
+ "eval_samples_per_second": 75.424,
67
+ "eval_steps_per_second": 4.773,
68
+ "step": 313
69
  },
70
  {
71
+ "epoch": 2.23,
72
+ "learning_rate": 1.972934472934473e-05,
73
+ "loss": 0.2084,
74
  "step": 350
75
  },
76
  {
77
+ "epoch": 2.55,
78
+ "learning_rate": 1.9373219373219374e-05,
79
+ "loss": 0.1386,
80
  "step": 400
81
  },
82
  {
83
+ "epoch": 2.87,
84
+ "learning_rate": 1.9017094017094017e-05,
85
+ "loss": 0.0805,
 
 
 
 
 
 
 
 
 
 
 
 
86
  "step": 450
87
  },
88
  {
89
+ "epoch": 3.0,
90
+ "eval_accuracy": 0.9796610169491525,
91
+ "eval_f1": 0.9757412398921833,
92
+ "eval_loss": 0.046507738530635834,
93
+ "eval_precision": 0.9890710382513661,
94
+ "eval_recall": 0.9627659574468085,
95
+ "eval_runtime": 11.7373,
96
+ "eval_samples_per_second": 75.401,
97
+ "eval_steps_per_second": 4.771,
98
+ "step": 470
99
  },
100
  {
101
+ "epoch": 3.19,
102
+ "learning_rate": 1.8660968660968664e-05,
103
+ "loss": 0.0677,
104
+ "step": 500
 
 
 
 
 
 
105
  },
106
  {
107
+ "epoch": 3.51,
108
+ "learning_rate": 1.8304843304843308e-05,
109
+ "loss": 0.0628,
110
  "step": 550
111
  },
112
  {
113
+ "epoch": 3.83,
114
+ "learning_rate": 1.794871794871795e-05,
115
+ "loss": 0.0359,
116
  "step": 600
117
  },
118
  {
119
+ "epoch": 4.0,
120
+ "eval_accuracy": 0.992090395480226,
121
+ "eval_f1": 0.9907529722589168,
122
+ "eval_loss": 0.03131083399057388,
123
+ "eval_precision": 0.984251968503937,
124
+ "eval_recall": 0.9973404255319149,
125
+ "eval_runtime": 11.9166,
126
+ "eval_samples_per_second": 74.266,
127
+ "eval_steps_per_second": 4.699,
128
+ "step": 627
129
+ },
130
+ {
131
+ "epoch": 4.15,
132
+ "learning_rate": 1.7599715099715102e-05,
133
+ "loss": 0.0412,
134
  "step": 650
135
  },
136
  {
137
+ "epoch": 4.47,
138
+ "learning_rate": 1.7243589743589745e-05,
139
+ "loss": 0.0457,
 
 
 
 
 
 
 
 
 
 
 
 
140
  "step": 700
141
  },
142
  {
143
+ "epoch": 4.78,
144
+ "learning_rate": 1.688746438746439e-05,
145
+ "loss": 0.0316,
146
  "step": 750
147
  },
148
  {
149
+ "epoch": 5.0,
150
+ "eval_accuracy": 0.9909604519774011,
151
+ "eval_f1": 0.9894736842105263,
152
+ "eval_loss": 0.02798781916499138,
153
+ "eval_precision": 0.9791666666666666,
154
+ "eval_recall": 1.0,
155
+ "eval_runtime": 11.9117,
156
+ "eval_samples_per_second": 74.297,
157
+ "eval_steps_per_second": 4.701,
158
+ "step": 783
159
+ },
160
+ {
161
+ "epoch": 5.1,
162
+ "learning_rate": 1.6531339031339032e-05,
163
+ "loss": 0.0355,
164
  "step": 800
165
  },
166
  {
167
+ "epoch": 5.42,
168
+ "learning_rate": 1.6175213675213675e-05,
169
+ "loss": 0.0285,
170
  "step": 850
171
  },
172
  {
173
+ "epoch": 5.74,
174
+ "learning_rate": 1.581908831908832e-05,
175
+ "loss": 0.0285,
 
 
 
 
 
 
 
 
 
 
 
 
176
  "step": 900
177
  },
178
  {
179
+ "epoch": 6.0,
180
+ "eval_accuracy": 0.9909604519774011,
181
+ "eval_f1": 0.9894179894179894,
182
+ "eval_loss": 0.027588626369833946,
183
+ "eval_precision": 0.9842105263157894,
184
+ "eval_recall": 0.9946808510638298,
185
+ "eval_runtime": 12.0204,
186
+ "eval_samples_per_second": 73.625,
187
+ "eval_steps_per_second": 4.659,
188
+ "step": 940
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  }
190
  ],
191
+ "max_steps": 3120,
 
192
  "num_train_epochs": 20,
193
+ "total_flos": 7915432989450240.0,
 
194
  "trial_name": null,
195
  "trial_params": null
196
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a57b4d693922248abedb2c0d9ab60786de92600c7d03e067f28731e3896c892
3
- size 4600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7530f4a49a3b0a74f4886f6ce692c31c3798b5bcd57d7107236dc628db13266
3
+ size 4408