Abhilashvj commited on
Commit
7583e89
1 Parent(s): 88794bb

Upload 8 files

Browse files
config.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/resnet-18",
3
+ "architectures": [
4
+ "ResNetForImageClassification"
5
+ ],
6
+ "depths": [
7
+ 2,
8
+ 2,
9
+ 2,
10
+ 2
11
+ ],
12
+ "downsample_in_first_stage": false,
13
+ "embedding_size": 64,
14
+ "hidden_act": "relu",
15
+ "hidden_sizes": [
16
+ 64,
17
+ 128,
18
+ 256,
19
+ 512
20
+ ],
21
+ "id2label": {
22
+ "0": "forum",
23
+ "1": "general",
24
+ "2": "marketplace"
25
+ },
26
+ "label2id": {
27
+ "forum": "0",
28
+ "general": "1",
29
+ "marketplace": "2"
30
+ },
31
+ "layer_type": "basic",
32
+ "model_type": "resnet",
33
+ "num_channels": 3,
34
+ "out_features": [
35
+ "stage4"
36
+ ],
37
+ "out_indices": [
38
+ 4
39
+ ],
40
+ "problem_type": "single_label_classification",
41
+ "stage_names": [
42
+ "stem",
43
+ "stage1",
44
+ "stage2",
45
+ "stage3",
46
+ "stage4"
47
+ ],
48
+ "torch_dtype": "float32",
49
+ "transformers_version": "4.29.2"
50
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b2a231ff7c6f6c2c9ec3904e67b384be47e20f1733256b888dc3fb39983ece4
3
+ size 89460869
preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_pct": 0.875,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.485,
8
+ 0.456,
9
+ 0.406
10
+ ],
11
+ "image_processor_type": "ConvNextImageProcessor",
12
+ "image_std": [
13
+ 0.229,
14
+ 0.224,
15
+ 0.225
16
+ ],
17
+ "resample": 3,
18
+ "rescale_factor": 0.00392156862745098,
19
+ "size": {
20
+ "shortest_edge": 224
21
+ }
22
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32c9525a35996799fe135773d47a535c9260963336836cddaef648f9a00ea46d
3
+ size 44793653
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7771edaf10b42494826c221f91314e438076cff6958d5a6e4f4627e3b6ae6d37
3
+ size 14575
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cf093da31c7733407244481c89a6115a0bdc1e0fc43240dbdf23373234ae61e
3
+ size 627
trainer_state.json ADDED
@@ -0,0 +1,571 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.7777777777777778,
3
+ "best_model_checkpoint": "CIRCL_website_classifier\\checkpoint-658",
4
+ "epoch": 15.0,
5
+ "global_step": 705,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.21,
12
+ "learning_rate": 7.042253521126762e-06,
13
+ "loss": 0.6238,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.43,
18
+ "learning_rate": 1.4084507042253523e-05,
19
+ "loss": 0.5542,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.64,
24
+ "learning_rate": 2.112676056338028e-05,
25
+ "loss": 0.4952,
26
+ "step": 30
27
+ },
28
+ {
29
+ "epoch": 0.85,
30
+ "learning_rate": 2.8169014084507046e-05,
31
+ "loss": 0.5278,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 1.0,
36
+ "eval_accuracy": 0.7160493827160493,
37
+ "eval_loss": 0.7332170009613037,
38
+ "eval_runtime": 3.93,
39
+ "eval_samples_per_second": 20.611,
40
+ "eval_steps_per_second": 1.527,
41
+ "step": 47
42
+ },
43
+ {
44
+ "epoch": 1.06,
45
+ "learning_rate": 3.5211267605633805e-05,
46
+ "loss": 0.4947,
47
+ "step": 50
48
+ },
49
+ {
50
+ "epoch": 1.28,
51
+ "learning_rate": 4.225352112676056e-05,
52
+ "loss": 0.4824,
53
+ "step": 60
54
+ },
55
+ {
56
+ "epoch": 1.49,
57
+ "learning_rate": 4.929577464788733e-05,
58
+ "loss": 0.4869,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 1.7,
63
+ "learning_rate": 4.929022082018927e-05,
64
+ "loss": 0.5236,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 1.91,
69
+ "learning_rate": 4.850157728706625e-05,
70
+ "loss": 0.473,
71
+ "step": 90
72
+ },
73
+ {
74
+ "epoch": 2.0,
75
+ "eval_accuracy": 0.691358024691358,
76
+ "eval_loss": 0.7856547832489014,
77
+ "eval_runtime": 4.0912,
78
+ "eval_samples_per_second": 19.799,
79
+ "eval_steps_per_second": 1.467,
80
+ "step": 94
81
+ },
82
+ {
83
+ "epoch": 2.13,
84
+ "learning_rate": 4.771293375394322e-05,
85
+ "loss": 0.4583,
86
+ "step": 100
87
+ },
88
+ {
89
+ "epoch": 2.34,
90
+ "learning_rate": 4.6924290220820195e-05,
91
+ "loss": 0.5197,
92
+ "step": 110
93
+ },
94
+ {
95
+ "epoch": 2.55,
96
+ "learning_rate": 4.6135646687697165e-05,
97
+ "loss": 0.4843,
98
+ "step": 120
99
+ },
100
+ {
101
+ "epoch": 2.77,
102
+ "learning_rate": 4.5347003154574134e-05,
103
+ "loss": 0.519,
104
+ "step": 130
105
+ },
106
+ {
107
+ "epoch": 2.98,
108
+ "learning_rate": 4.4558359621451104e-05,
109
+ "loss": 0.5199,
110
+ "step": 140
111
+ },
112
+ {
113
+ "epoch": 3.0,
114
+ "eval_accuracy": 0.7037037037037037,
115
+ "eval_loss": 0.6900804042816162,
116
+ "eval_runtime": 4.0734,
117
+ "eval_samples_per_second": 19.885,
118
+ "eval_steps_per_second": 1.473,
119
+ "step": 141
120
+ },
121
+ {
122
+ "epoch": 3.19,
123
+ "learning_rate": 4.376971608832808e-05,
124
+ "loss": 0.489,
125
+ "step": 150
126
+ },
127
+ {
128
+ "epoch": 3.4,
129
+ "learning_rate": 4.298107255520505e-05,
130
+ "loss": 0.481,
131
+ "step": 160
132
+ },
133
+ {
134
+ "epoch": 3.62,
135
+ "learning_rate": 4.219242902208202e-05,
136
+ "loss": 0.5719,
137
+ "step": 170
138
+ },
139
+ {
140
+ "epoch": 3.83,
141
+ "learning_rate": 4.140378548895899e-05,
142
+ "loss": 0.578,
143
+ "step": 180
144
+ },
145
+ {
146
+ "epoch": 4.0,
147
+ "eval_accuracy": 0.7160493827160493,
148
+ "eval_loss": 0.6246393918991089,
149
+ "eval_runtime": 4.3226,
150
+ "eval_samples_per_second": 18.739,
151
+ "eval_steps_per_second": 1.388,
152
+ "step": 188
153
+ },
154
+ {
155
+ "epoch": 4.04,
156
+ "learning_rate": 4.0615141955835965e-05,
157
+ "loss": 0.6731,
158
+ "step": 190
159
+ },
160
+ {
161
+ "epoch": 4.26,
162
+ "learning_rate": 3.982649842271294e-05,
163
+ "loss": 0.6082,
164
+ "step": 200
165
+ },
166
+ {
167
+ "epoch": 4.47,
168
+ "learning_rate": 3.903785488958991e-05,
169
+ "loss": 0.628,
170
+ "step": 210
171
+ },
172
+ {
173
+ "epoch": 4.68,
174
+ "learning_rate": 3.824921135646688e-05,
175
+ "loss": 0.6112,
176
+ "step": 220
177
+ },
178
+ {
179
+ "epoch": 4.89,
180
+ "learning_rate": 3.746056782334385e-05,
181
+ "loss": 0.5931,
182
+ "step": 230
183
+ },
184
+ {
185
+ "epoch": 5.0,
186
+ "eval_accuracy": 0.6172839506172839,
187
+ "eval_loss": 0.7297191619873047,
188
+ "eval_runtime": 4.3049,
189
+ "eval_samples_per_second": 18.816,
190
+ "eval_steps_per_second": 1.394,
191
+ "step": 235
192
+ },
193
+ {
194
+ "epoch": 5.11,
195
+ "learning_rate": 3.667192429022082e-05,
196
+ "loss": 0.5872,
197
+ "step": 240
198
+ },
199
+ {
200
+ "epoch": 5.32,
201
+ "learning_rate": 3.5883280757097795e-05,
202
+ "loss": 0.5647,
203
+ "step": 250
204
+ },
205
+ {
206
+ "epoch": 5.53,
207
+ "learning_rate": 3.5094637223974765e-05,
208
+ "loss": 0.6125,
209
+ "step": 260
210
+ },
211
+ {
212
+ "epoch": 5.74,
213
+ "learning_rate": 3.4305993690851734e-05,
214
+ "loss": 0.6125,
215
+ "step": 270
216
+ },
217
+ {
218
+ "epoch": 5.96,
219
+ "learning_rate": 3.351735015772871e-05,
220
+ "loss": 0.567,
221
+ "step": 280
222
+ },
223
+ {
224
+ "epoch": 6.0,
225
+ "eval_accuracy": 0.7037037037037037,
226
+ "eval_loss": 0.7199741005897522,
227
+ "eval_runtime": 4.5208,
228
+ "eval_samples_per_second": 17.917,
229
+ "eval_steps_per_second": 1.327,
230
+ "step": 282
231
+ },
232
+ {
233
+ "epoch": 6.17,
234
+ "learning_rate": 3.272870662460568e-05,
235
+ "loss": 0.5294,
236
+ "step": 290
237
+ },
238
+ {
239
+ "epoch": 6.38,
240
+ "learning_rate": 3.194006309148265e-05,
241
+ "loss": 0.6032,
242
+ "step": 300
243
+ },
244
+ {
245
+ "epoch": 6.6,
246
+ "learning_rate": 3.1151419558359626e-05,
247
+ "loss": 0.5943,
248
+ "step": 310
249
+ },
250
+ {
251
+ "epoch": 6.81,
252
+ "learning_rate": 3.0362776025236596e-05,
253
+ "loss": 0.5512,
254
+ "step": 320
255
+ },
256
+ {
257
+ "epoch": 7.0,
258
+ "eval_accuracy": 0.7283950617283951,
259
+ "eval_loss": 0.6853248476982117,
260
+ "eval_runtime": 4.5083,
261
+ "eval_samples_per_second": 17.967,
262
+ "eval_steps_per_second": 1.331,
263
+ "step": 329
264
+ },
265
+ {
266
+ "epoch": 7.02,
267
+ "learning_rate": 2.9574132492113565e-05,
268
+ "loss": 0.5808,
269
+ "step": 330
270
+ },
271
+ {
272
+ "epoch": 7.23,
273
+ "learning_rate": 2.8785488958990535e-05,
274
+ "loss": 0.5226,
275
+ "step": 340
276
+ },
277
+ {
278
+ "epoch": 7.45,
279
+ "learning_rate": 2.7996845425867508e-05,
280
+ "loss": 0.5454,
281
+ "step": 350
282
+ },
283
+ {
284
+ "epoch": 7.66,
285
+ "learning_rate": 2.7208201892744477e-05,
286
+ "loss": 0.5752,
287
+ "step": 360
288
+ },
289
+ {
290
+ "epoch": 7.87,
291
+ "learning_rate": 2.6419558359621453e-05,
292
+ "loss": 0.529,
293
+ "step": 370
294
+ },
295
+ {
296
+ "epoch": 8.0,
297
+ "eval_accuracy": 0.7037037037037037,
298
+ "eval_loss": 0.7038930058479309,
299
+ "eval_runtime": 4.4325,
300
+ "eval_samples_per_second": 18.274,
301
+ "eval_steps_per_second": 1.354,
302
+ "step": 376
303
+ },
304
+ {
305
+ "epoch": 8.09,
306
+ "learning_rate": 2.5630914826498426e-05,
307
+ "loss": 0.4872,
308
+ "step": 380
309
+ },
310
+ {
311
+ "epoch": 8.3,
312
+ "learning_rate": 2.4842271293375396e-05,
313
+ "loss": 0.548,
314
+ "step": 390
315
+ },
316
+ {
317
+ "epoch": 8.51,
318
+ "learning_rate": 2.405362776025237e-05,
319
+ "loss": 0.5507,
320
+ "step": 400
321
+ },
322
+ {
323
+ "epoch": 8.72,
324
+ "learning_rate": 2.3264984227129338e-05,
325
+ "loss": 0.5179,
326
+ "step": 410
327
+ },
328
+ {
329
+ "epoch": 8.94,
330
+ "learning_rate": 2.2476340694006308e-05,
331
+ "loss": 0.5467,
332
+ "step": 420
333
+ },
334
+ {
335
+ "epoch": 9.0,
336
+ "eval_accuracy": 0.6790123456790124,
337
+ "eval_loss": 0.8216166496276855,
338
+ "eval_runtime": 3.9158,
339
+ "eval_samples_per_second": 20.685,
340
+ "eval_steps_per_second": 1.532,
341
+ "step": 423
342
+ },
343
+ {
344
+ "epoch": 9.15,
345
+ "learning_rate": 2.1687697160883284e-05,
346
+ "loss": 0.5055,
347
+ "step": 430
348
+ },
349
+ {
350
+ "epoch": 9.36,
351
+ "learning_rate": 2.0899053627760254e-05,
352
+ "loss": 0.5338,
353
+ "step": 440
354
+ },
355
+ {
356
+ "epoch": 9.57,
357
+ "learning_rate": 2.0110410094637226e-05,
358
+ "loss": 0.4838,
359
+ "step": 450
360
+ },
361
+ {
362
+ "epoch": 9.79,
363
+ "learning_rate": 1.9321766561514196e-05,
364
+ "loss": 0.5182,
365
+ "step": 460
366
+ },
367
+ {
368
+ "epoch": 10.0,
369
+ "learning_rate": 1.853312302839117e-05,
370
+ "loss": 0.5517,
371
+ "step": 470
372
+ },
373
+ {
374
+ "epoch": 10.0,
375
+ "eval_accuracy": 0.7407407407407407,
376
+ "eval_loss": 0.7132633328437805,
377
+ "eval_runtime": 4.247,
378
+ "eval_samples_per_second": 19.072,
379
+ "eval_steps_per_second": 1.413,
380
+ "step": 470
381
+ },
382
+ {
383
+ "epoch": 10.21,
384
+ "learning_rate": 1.7744479495268142e-05,
385
+ "loss": 0.5663,
386
+ "step": 480
387
+ },
388
+ {
389
+ "epoch": 10.43,
390
+ "learning_rate": 1.695583596214511e-05,
391
+ "loss": 0.527,
392
+ "step": 490
393
+ },
394
+ {
395
+ "epoch": 10.64,
396
+ "learning_rate": 1.616719242902208e-05,
397
+ "loss": 0.4708,
398
+ "step": 500
399
+ },
400
+ {
401
+ "epoch": 10.85,
402
+ "learning_rate": 1.5378548895899054e-05,
403
+ "loss": 0.4917,
404
+ "step": 510
405
+ },
406
+ {
407
+ "epoch": 11.0,
408
+ "eval_accuracy": 0.7283950617283951,
409
+ "eval_loss": 0.7398880124092102,
410
+ "eval_runtime": 5.0919,
411
+ "eval_samples_per_second": 15.908,
412
+ "eval_steps_per_second": 1.178,
413
+ "step": 517
414
+ },
415
+ {
416
+ "epoch": 11.06,
417
+ "learning_rate": 1.4589905362776027e-05,
418
+ "loss": 0.5304,
419
+ "step": 520
420
+ },
421
+ {
422
+ "epoch": 11.28,
423
+ "learning_rate": 1.3801261829652998e-05,
424
+ "loss": 0.537,
425
+ "step": 530
426
+ },
427
+ {
428
+ "epoch": 11.49,
429
+ "learning_rate": 1.3012618296529969e-05,
430
+ "loss": 0.4953,
431
+ "step": 540
432
+ },
433
+ {
434
+ "epoch": 11.7,
435
+ "learning_rate": 1.222397476340694e-05,
436
+ "loss": 0.4726,
437
+ "step": 550
438
+ },
439
+ {
440
+ "epoch": 11.91,
441
+ "learning_rate": 1.1435331230283911e-05,
442
+ "loss": 0.4638,
443
+ "step": 560
444
+ },
445
+ {
446
+ "epoch": 12.0,
447
+ "eval_accuracy": 0.7283950617283951,
448
+ "eval_loss": 0.7797142863273621,
449
+ "eval_runtime": 4.4579,
450
+ "eval_samples_per_second": 18.17,
451
+ "eval_steps_per_second": 1.346,
452
+ "step": 564
453
+ },
454
+ {
455
+ "epoch": 12.13,
456
+ "learning_rate": 1.0646687697160884e-05,
457
+ "loss": 0.4413,
458
+ "step": 570
459
+ },
460
+ {
461
+ "epoch": 12.34,
462
+ "learning_rate": 9.858044164037856e-06,
463
+ "loss": 0.5107,
464
+ "step": 580
465
+ },
466
+ {
467
+ "epoch": 12.55,
468
+ "learning_rate": 9.069400630914827e-06,
469
+ "loss": 0.4521,
470
+ "step": 590
471
+ },
472
+ {
473
+ "epoch": 12.77,
474
+ "learning_rate": 8.280757097791798e-06,
475
+ "loss": 0.5601,
476
+ "step": 600
477
+ },
478
+ {
479
+ "epoch": 12.98,
480
+ "learning_rate": 7.492113564668771e-06,
481
+ "loss": 0.5082,
482
+ "step": 610
483
+ },
484
+ {
485
+ "epoch": 13.0,
486
+ "eval_accuracy": 0.7160493827160493,
487
+ "eval_loss": 0.6203879714012146,
488
+ "eval_runtime": 4.284,
489
+ "eval_samples_per_second": 18.908,
490
+ "eval_steps_per_second": 1.401,
491
+ "step": 611
492
+ },
493
+ {
494
+ "epoch": 13.19,
495
+ "learning_rate": 6.703470031545741e-06,
496
+ "loss": 0.5038,
497
+ "step": 620
498
+ },
499
+ {
500
+ "epoch": 13.4,
501
+ "learning_rate": 5.914826498422713e-06,
502
+ "loss": 0.4934,
503
+ "step": 630
504
+ },
505
+ {
506
+ "epoch": 13.62,
507
+ "learning_rate": 5.1261829652996846e-06,
508
+ "loss": 0.4834,
509
+ "step": 640
510
+ },
511
+ {
512
+ "epoch": 13.83,
513
+ "learning_rate": 4.337539432176657e-06,
514
+ "loss": 0.5358,
515
+ "step": 650
516
+ },
517
+ {
518
+ "epoch": 14.0,
519
+ "eval_accuracy": 0.7777777777777778,
520
+ "eval_loss": 0.626596212387085,
521
+ "eval_runtime": 4.1062,
522
+ "eval_samples_per_second": 19.726,
523
+ "eval_steps_per_second": 1.461,
524
+ "step": 658
525
+ },
526
+ {
527
+ "epoch": 14.04,
528
+ "learning_rate": 3.5488958990536283e-06,
529
+ "loss": 0.5051,
530
+ "step": 660
531
+ },
532
+ {
533
+ "epoch": 14.26,
534
+ "learning_rate": 2.7602523659305995e-06,
535
+ "loss": 0.4712,
536
+ "step": 670
537
+ },
538
+ {
539
+ "epoch": 14.47,
540
+ "learning_rate": 1.971608832807571e-06,
541
+ "loss": 0.4649,
542
+ "step": 680
543
+ },
544
+ {
545
+ "epoch": 14.68,
546
+ "learning_rate": 1.1829652996845425e-06,
547
+ "loss": 0.4604,
548
+ "step": 690
549
+ },
550
+ {
551
+ "epoch": 14.89,
552
+ "learning_rate": 3.943217665615142e-07,
553
+ "loss": 0.5267,
554
+ "step": 700
555
+ },
556
+ {
557
+ "epoch": 15.0,
558
+ "eval_accuracy": 0.6790123456790124,
559
+ "eval_loss": 0.7902321815490723,
560
+ "eval_runtime": 4.7108,
561
+ "eval_samples_per_second": 17.194,
562
+ "eval_steps_per_second": 1.274,
563
+ "step": 705
564
+ }
565
+ ],
566
+ "max_steps": 705,
567
+ "num_train_epochs": 15,
568
+ "total_flos": 4.550617827979776e+17,
569
+ "trial_name": null,
570
+ "trial_params": null
571
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43ac4a09e13a84f09f0bd63e0fc613fec1a6eadc7232ccb73bb79a1ca64438e1
3
+ size 3963