Upload folder using huggingface_hub

#1
by satwikapaul - opened
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 20.0,
3
+ "total_flos": 1.1843641391623373e+18,
4
+ "train_loss": 0.6315841894596815,
5
+ "train_runtime": 473.5366,
6
+ "train_samples_per_second": 32.268,
7
+ "train_steps_per_second": 1.352
8
+ }
checkpoint-600/config.json ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/vit-base-patch16-224-in21k",
3
+ "architectures": [
4
+ "ViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "a",
13
+ "1": "b",
14
+ "10": "j",
15
+ "11": "k",
16
+ "12": "l",
17
+ "13": "m",
18
+ "14": "n",
19
+ "15": "o",
20
+ "16": "p",
21
+ "17": "period",
22
+ "18": "q",
23
+ "19": "question%20mark",
24
+ "2": "c",
25
+ "20": "r",
26
+ "21": "s",
27
+ "22": "t",
28
+ "23": "u",
29
+ "24": "v",
30
+ "25": "w",
31
+ "26": "x",
32
+ "27": "y",
33
+ "28": "z",
34
+ "3": "capital",
35
+ "4": "d",
36
+ "5": "e",
37
+ "6": "f",
38
+ "7": "g",
39
+ "8": "h",
40
+ "9": "i"
41
+ },
42
+ "image_size": 224,
43
+ "initializer_range": 0.02,
44
+ "intermediate_size": 3072,
45
+ "label2id": {
46
+ "a": "0",
47
+ "b": "1",
48
+ "c": "2",
49
+ "capital": "3",
50
+ "d": "4",
51
+ "e": "5",
52
+ "f": "6",
53
+ "g": "7",
54
+ "h": "8",
55
+ "i": "9",
56
+ "j": "10",
57
+ "k": "11",
58
+ "l": "12",
59
+ "m": "13",
60
+ "n": "14",
61
+ "o": "15",
62
+ "p": "16",
63
+ "period": "17",
64
+ "q": "18",
65
+ "question%20mark": "19",
66
+ "r": "20",
67
+ "s": "21",
68
+ "t": "22",
69
+ "u": "23",
70
+ "v": "24",
71
+ "w": "25",
72
+ "x": "26",
73
+ "y": "27",
74
+ "z": "28"
75
+ },
76
+ "layer_norm_eps": 1e-12,
77
+ "model_type": "vit",
78
+ "num_attention_heads": 12,
79
+ "num_channels": 3,
80
+ "num_hidden_layers": 12,
81
+ "patch_size": 16,
82
+ "problem_type": "single_label_classification",
83
+ "qkv_bias": true,
84
+ "torch_dtype": "float32",
85
+ "transformers_version": "4.30.2"
86
+ }
checkpoint-600/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53c27f65a8b8e3f973f44c8f7a901b0645535146d1ea086bc93096bbede8ca2b
3
+ size 686684933
checkpoint-600/preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_rescale": true,
4
+ "do_resize": true,
5
+ "image_mean": [
6
+ 0.5,
7
+ 0.5,
8
+ 0.5
9
+ ],
10
+ "image_processor_type": "ViTFeatureExtractor",
11
+ "image_std": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
+ ],
16
+ "resample": 2,
17
+ "rescale_factor": 0.00392156862745098,
18
+ "size": {
19
+ "height": 224,
20
+ "width": 224
21
+ }
22
+ }
checkpoint-600/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c93165ce5edf95f1c1cfa9c27411c5283f28dcf3e706a4405e9e676deba2d58
3
+ size 343351725
checkpoint-600/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4762ca4e860d7e0ff02cdfb0b71f4f951fb1b0017c35580e28b494788bd64933
3
+ size 14575
checkpoint-600/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a3a408b28df808ac711fe39d83558d7ce6c645059f2732c291ddeeab1a5c7ee
3
+ size 627
checkpoint-600/trainer_state.json ADDED
@@ -0,0 +1,511 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.16766877472400665,
3
+ "best_model_checkpoint": "./vit-base-beans/checkpoint-560",
4
+ "epoch": 18.75,
5
+ "global_step": 600,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.31,
12
+ "learning_rate": 9.84375e-05,
13
+ "loss": 3.3252,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.62,
18
+ "learning_rate": 9.687500000000001e-05,
19
+ "loss": 3.217,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.94,
24
+ "learning_rate": 9.53125e-05,
25
+ "loss": 3.0799,
26
+ "step": 30
27
+ },
28
+ {
29
+ "epoch": 1.25,
30
+ "learning_rate": 9.375e-05,
31
+ "loss": 2.8394,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 1.25,
36
+ "eval_accuracy": 0.4815668202764977,
37
+ "eval_loss": 2.7347912788391113,
38
+ "eval_runtime": 5.6305,
39
+ "eval_samples_per_second": 77.081,
40
+ "eval_steps_per_second": 9.768,
41
+ "step": 40
42
+ },
43
+ {
44
+ "epoch": 1.56,
45
+ "learning_rate": 9.21875e-05,
46
+ "loss": 2.598,
47
+ "step": 50
48
+ },
49
+ {
50
+ "epoch": 1.88,
51
+ "learning_rate": 9.062500000000001e-05,
52
+ "loss": 2.417,
53
+ "step": 60
54
+ },
55
+ {
56
+ "epoch": 2.19,
57
+ "learning_rate": 8.90625e-05,
58
+ "loss": 2.111,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 2.5,
63
+ "learning_rate": 8.75e-05,
64
+ "loss": 1.9174,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 2.5,
69
+ "eval_accuracy": 0.8963133640552995,
70
+ "eval_loss": 1.8127654790878296,
71
+ "eval_runtime": 5.3595,
72
+ "eval_samples_per_second": 80.978,
73
+ "eval_steps_per_second": 10.262,
74
+ "step": 80
75
+ },
76
+ {
77
+ "epoch": 2.81,
78
+ "learning_rate": 8.593750000000001e-05,
79
+ "loss": 1.666,
80
+ "step": 90
81
+ },
82
+ {
83
+ "epoch": 3.12,
84
+ "learning_rate": 8.4375e-05,
85
+ "loss": 1.5159,
86
+ "step": 100
87
+ },
88
+ {
89
+ "epoch": 3.44,
90
+ "learning_rate": 8.28125e-05,
91
+ "loss": 1.3105,
92
+ "step": 110
93
+ },
94
+ {
95
+ "epoch": 3.75,
96
+ "learning_rate": 8.125000000000001e-05,
97
+ "loss": 1.1859,
98
+ "step": 120
99
+ },
100
+ {
101
+ "epoch": 3.75,
102
+ "eval_accuracy": 0.9470046082949308,
103
+ "eval_loss": 1.1414676904678345,
104
+ "eval_runtime": 5.3582,
105
+ "eval_samples_per_second": 80.997,
106
+ "eval_steps_per_second": 10.265,
107
+ "step": 120
108
+ },
109
+ {
110
+ "epoch": 4.06,
111
+ "learning_rate": 7.96875e-05,
112
+ "loss": 1.0525,
113
+ "step": 130
114
+ },
115
+ {
116
+ "epoch": 4.38,
117
+ "learning_rate": 7.8125e-05,
118
+ "loss": 0.9085,
119
+ "step": 140
120
+ },
121
+ {
122
+ "epoch": 4.69,
123
+ "learning_rate": 7.65625e-05,
124
+ "loss": 0.8207,
125
+ "step": 150
126
+ },
127
+ {
128
+ "epoch": 5.0,
129
+ "learning_rate": 7.500000000000001e-05,
130
+ "loss": 0.7413,
131
+ "step": 160
132
+ },
133
+ {
134
+ "epoch": 5.0,
135
+ "eval_accuracy": 0.9654377880184332,
136
+ "eval_loss": 0.7720305919647217,
137
+ "eval_runtime": 5.6625,
138
+ "eval_samples_per_second": 76.645,
139
+ "eval_steps_per_second": 9.713,
140
+ "step": 160
141
+ },
142
+ {
143
+ "epoch": 5.31,
144
+ "learning_rate": 7.34375e-05,
145
+ "loss": 0.6334,
146
+ "step": 170
147
+ },
148
+ {
149
+ "epoch": 5.62,
150
+ "learning_rate": 7.1875e-05,
151
+ "loss": 0.6058,
152
+ "step": 180
153
+ },
154
+ {
155
+ "epoch": 5.94,
156
+ "learning_rate": 7.031250000000001e-05,
157
+ "loss": 0.5319,
158
+ "step": 190
159
+ },
160
+ {
161
+ "epoch": 6.25,
162
+ "learning_rate": 6.875e-05,
163
+ "loss": 0.4761,
164
+ "step": 200
165
+ },
166
+ {
167
+ "epoch": 6.25,
168
+ "eval_accuracy": 0.9838709677419355,
169
+ "eval_loss": 0.5084273815155029,
170
+ "eval_runtime": 5.4846,
171
+ "eval_samples_per_second": 79.131,
172
+ "eval_steps_per_second": 10.028,
173
+ "step": 200
174
+ },
175
+ {
176
+ "epoch": 6.56,
177
+ "learning_rate": 6.71875e-05,
178
+ "loss": 0.4109,
179
+ "step": 210
180
+ },
181
+ {
182
+ "epoch": 6.88,
183
+ "learning_rate": 6.562500000000001e-05,
184
+ "loss": 0.3707,
185
+ "step": 220
186
+ },
187
+ {
188
+ "epoch": 7.19,
189
+ "learning_rate": 6.40625e-05,
190
+ "loss": 0.3536,
191
+ "step": 230
192
+ },
193
+ {
194
+ "epoch": 7.5,
195
+ "learning_rate": 6.25e-05,
196
+ "loss": 0.3108,
197
+ "step": 240
198
+ },
199
+ {
200
+ "epoch": 7.5,
201
+ "eval_accuracy": 0.9746543778801844,
202
+ "eval_loss": 0.36055463552474976,
203
+ "eval_runtime": 5.6696,
204
+ "eval_samples_per_second": 76.549,
205
+ "eval_steps_per_second": 9.701,
206
+ "step": 240
207
+ },
208
+ {
209
+ "epoch": 7.81,
210
+ "learning_rate": 6.0937500000000004e-05,
211
+ "loss": 0.2789,
212
+ "step": 250
213
+ },
214
+ {
215
+ "epoch": 8.12,
216
+ "learning_rate": 5.9375e-05,
217
+ "loss": 0.2905,
218
+ "step": 260
219
+ },
220
+ {
221
+ "epoch": 8.44,
222
+ "learning_rate": 5.78125e-05,
223
+ "loss": 0.2462,
224
+ "step": 270
225
+ },
226
+ {
227
+ "epoch": 8.75,
228
+ "learning_rate": 5.6250000000000005e-05,
229
+ "loss": 0.251,
230
+ "step": 280
231
+ },
232
+ {
233
+ "epoch": 8.75,
234
+ "eval_accuracy": 0.9769585253456221,
235
+ "eval_loss": 0.2958492040634155,
236
+ "eval_runtime": 7.6402,
237
+ "eval_samples_per_second": 56.805,
238
+ "eval_steps_per_second": 7.199,
239
+ "step": 280
240
+ },
241
+ {
242
+ "epoch": 9.06,
243
+ "learning_rate": 5.46875e-05,
244
+ "loss": 0.2171,
245
+ "step": 290
246
+ },
247
+ {
248
+ "epoch": 9.38,
249
+ "learning_rate": 5.3125000000000004e-05,
250
+ "loss": 0.2064,
251
+ "step": 300
252
+ },
253
+ {
254
+ "epoch": 9.69,
255
+ "learning_rate": 5.15625e-05,
256
+ "loss": 0.2116,
257
+ "step": 310
258
+ },
259
+ {
260
+ "epoch": 10.0,
261
+ "learning_rate": 5e-05,
262
+ "loss": 0.1896,
263
+ "step": 320
264
+ },
265
+ {
266
+ "epoch": 10.0,
267
+ "eval_accuracy": 0.9769585253456221,
268
+ "eval_loss": 0.24788345396518707,
269
+ "eval_runtime": 5.2479,
270
+ "eval_samples_per_second": 82.699,
271
+ "eval_steps_per_second": 10.48,
272
+ "step": 320
273
+ },
274
+ {
275
+ "epoch": 10.31,
276
+ "learning_rate": 4.8437500000000005e-05,
277
+ "loss": 0.1783,
278
+ "step": 330
279
+ },
280
+ {
281
+ "epoch": 10.62,
282
+ "learning_rate": 4.6875e-05,
283
+ "loss": 0.1859,
284
+ "step": 340
285
+ },
286
+ {
287
+ "epoch": 10.94,
288
+ "learning_rate": 4.5312500000000004e-05,
289
+ "loss": 0.1705,
290
+ "step": 350
291
+ },
292
+ {
293
+ "epoch": 11.25,
294
+ "learning_rate": 4.375e-05,
295
+ "loss": 0.1659,
296
+ "step": 360
297
+ },
298
+ {
299
+ "epoch": 11.25,
300
+ "eval_accuracy": 0.9838709677419355,
301
+ "eval_loss": 0.23752211034297943,
302
+ "eval_runtime": 5.7417,
303
+ "eval_samples_per_second": 75.588,
304
+ "eval_steps_per_second": 9.579,
305
+ "step": 360
306
+ },
307
+ {
308
+ "epoch": 11.56,
309
+ "learning_rate": 4.21875e-05,
310
+ "loss": 0.1753,
311
+ "step": 370
312
+ },
313
+ {
314
+ "epoch": 11.88,
315
+ "learning_rate": 4.0625000000000005e-05,
316
+ "loss": 0.1509,
317
+ "step": 380
318
+ },
319
+ {
320
+ "epoch": 12.19,
321
+ "learning_rate": 3.90625e-05,
322
+ "loss": 0.1447,
323
+ "step": 390
324
+ },
325
+ {
326
+ "epoch": 12.5,
327
+ "learning_rate": 3.7500000000000003e-05,
328
+ "loss": 0.1401,
329
+ "step": 400
330
+ },
331
+ {
332
+ "epoch": 12.5,
333
+ "eval_accuracy": 0.9792626728110599,
334
+ "eval_loss": 0.20333679020404816,
335
+ "eval_runtime": 5.2814,
336
+ "eval_samples_per_second": 82.175,
337
+ "eval_steps_per_second": 10.414,
338
+ "step": 400
339
+ },
340
+ {
341
+ "epoch": 12.81,
342
+ "learning_rate": 3.59375e-05,
343
+ "loss": 0.1508,
344
+ "step": 410
345
+ },
346
+ {
347
+ "epoch": 13.12,
348
+ "learning_rate": 3.4375e-05,
349
+ "loss": 0.1364,
350
+ "step": 420
351
+ },
352
+ {
353
+ "epoch": 13.44,
354
+ "learning_rate": 3.2812500000000005e-05,
355
+ "loss": 0.1415,
356
+ "step": 430
357
+ },
358
+ {
359
+ "epoch": 13.75,
360
+ "learning_rate": 3.125e-05,
361
+ "loss": 0.131,
362
+ "step": 440
363
+ },
364
+ {
365
+ "epoch": 13.75,
366
+ "eval_accuracy": 0.9792626728110599,
367
+ "eval_loss": 0.19693893194198608,
368
+ "eval_runtime": 5.0279,
369
+ "eval_samples_per_second": 86.318,
370
+ "eval_steps_per_second": 10.939,
371
+ "step": 440
372
+ },
373
+ {
374
+ "epoch": 14.06,
375
+ "learning_rate": 2.96875e-05,
376
+ "loss": 0.1264,
377
+ "step": 450
378
+ },
379
+ {
380
+ "epoch": 14.38,
381
+ "learning_rate": 2.8125000000000003e-05,
382
+ "loss": 0.1376,
383
+ "step": 460
384
+ },
385
+ {
386
+ "epoch": 14.69,
387
+ "learning_rate": 2.6562500000000002e-05,
388
+ "loss": 0.1211,
389
+ "step": 470
390
+ },
391
+ {
392
+ "epoch": 15.0,
393
+ "learning_rate": 2.5e-05,
394
+ "loss": 0.1162,
395
+ "step": 480
396
+ },
397
+ {
398
+ "epoch": 15.0,
399
+ "eval_accuracy": 0.9792626728110599,
400
+ "eval_loss": 0.1791529506444931,
401
+ "eval_runtime": 4.2132,
402
+ "eval_samples_per_second": 103.009,
403
+ "eval_steps_per_second": 13.054,
404
+ "step": 480
405
+ },
406
+ {
407
+ "epoch": 15.31,
408
+ "learning_rate": 2.34375e-05,
409
+ "loss": 0.1285,
410
+ "step": 490
411
+ },
412
+ {
413
+ "epoch": 15.62,
414
+ "learning_rate": 2.1875e-05,
415
+ "loss": 0.1136,
416
+ "step": 500
417
+ },
418
+ {
419
+ "epoch": 15.94,
420
+ "learning_rate": 2.0312500000000002e-05,
421
+ "loss": 0.1117,
422
+ "step": 510
423
+ },
424
+ {
425
+ "epoch": 16.25,
426
+ "learning_rate": 1.8750000000000002e-05,
427
+ "loss": 0.11,
428
+ "step": 520
429
+ },
430
+ {
431
+ "epoch": 16.25,
432
+ "eval_accuracy": 0.9792626728110599,
433
+ "eval_loss": 0.17193575203418732,
434
+ "eval_runtime": 4.2988,
435
+ "eval_samples_per_second": 100.957,
436
+ "eval_steps_per_second": 12.794,
437
+ "step": 520
438
+ },
439
+ {
440
+ "epoch": 16.56,
441
+ "learning_rate": 1.71875e-05,
442
+ "loss": 0.1087,
443
+ "step": 530
444
+ },
445
+ {
446
+ "epoch": 16.88,
447
+ "learning_rate": 1.5625e-05,
448
+ "loss": 0.1065,
449
+ "step": 540
450
+ },
451
+ {
452
+ "epoch": 17.19,
453
+ "learning_rate": 1.4062500000000001e-05,
454
+ "loss": 0.1207,
455
+ "step": 550
456
+ },
457
+ {
458
+ "epoch": 17.5,
459
+ "learning_rate": 1.25e-05,
460
+ "loss": 0.1056,
461
+ "step": 560
462
+ },
463
+ {
464
+ "epoch": 17.5,
465
+ "eval_accuracy": 0.9792626728110599,
466
+ "eval_loss": 0.16766877472400665,
467
+ "eval_runtime": 5.7239,
468
+ "eval_samples_per_second": 75.823,
469
+ "eval_steps_per_second": 9.609,
470
+ "step": 560
471
+ },
472
+ {
473
+ "epoch": 17.81,
474
+ "learning_rate": 1.09375e-05,
475
+ "loss": 0.1051,
476
+ "step": 570
477
+ },
478
+ {
479
+ "epoch": 18.12,
480
+ "learning_rate": 9.375000000000001e-06,
481
+ "loss": 0.1148,
482
+ "step": 580
483
+ },
484
+ {
485
+ "epoch": 18.44,
486
+ "learning_rate": 7.8125e-06,
487
+ "loss": 0.1043,
488
+ "step": 590
489
+ },
490
+ {
491
+ "epoch": 18.75,
492
+ "learning_rate": 6.25e-06,
493
+ "loss": 0.1128,
494
+ "step": 600
495
+ },
496
+ {
497
+ "epoch": 18.75,
498
+ "eval_accuracy": 0.9723502304147466,
499
+ "eval_loss": 0.17274537682533264,
500
+ "eval_runtime": 5.5073,
501
+ "eval_samples_per_second": 78.805,
502
+ "eval_steps_per_second": 9.987,
503
+ "step": 600
504
+ }
505
+ ],
506
+ "max_steps": 640,
507
+ "num_train_epochs": 20,
508
+ "total_flos": 1.1105739126909665e+18,
509
+ "trial_name": null,
510
+ "trial_params": null
511
+ }
checkpoint-600/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e00db2a0e0945cff474f615b599cebc0ca0436459ecf509b6097643ca635938b
3
+ size 3899
checkpoint-640/config.json ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/vit-base-patch16-224-in21k",
3
+ "architectures": [
4
+ "ViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "a",
13
+ "1": "b",
14
+ "10": "j",
15
+ "11": "k",
16
+ "12": "l",
17
+ "13": "m",
18
+ "14": "n",
19
+ "15": "o",
20
+ "16": "p",
21
+ "17": "period",
22
+ "18": "q",
23
+ "19": "question%20mark",
24
+ "2": "c",
25
+ "20": "r",
26
+ "21": "s",
27
+ "22": "t",
28
+ "23": "u",
29
+ "24": "v",
30
+ "25": "w",
31
+ "26": "x",
32
+ "27": "y",
33
+ "28": "z",
34
+ "3": "capital",
35
+ "4": "d",
36
+ "5": "e",
37
+ "6": "f",
38
+ "7": "g",
39
+ "8": "h",
40
+ "9": "i"
41
+ },
42
+ "image_size": 224,
43
+ "initializer_range": 0.02,
44
+ "intermediate_size": 3072,
45
+ "label2id": {
46
+ "a": "0",
47
+ "b": "1",
48
+ "c": "2",
49
+ "capital": "3",
50
+ "d": "4",
51
+ "e": "5",
52
+ "f": "6",
53
+ "g": "7",
54
+ "h": "8",
55
+ "i": "9",
56
+ "j": "10",
57
+ "k": "11",
58
+ "l": "12",
59
+ "m": "13",
60
+ "n": "14",
61
+ "o": "15",
62
+ "p": "16",
63
+ "period": "17",
64
+ "q": "18",
65
+ "question%20mark": "19",
66
+ "r": "20",
67
+ "s": "21",
68
+ "t": "22",
69
+ "u": "23",
70
+ "v": "24",
71
+ "w": "25",
72
+ "x": "26",
73
+ "y": "27",
74
+ "z": "28"
75
+ },
76
+ "layer_norm_eps": 1e-12,
77
+ "model_type": "vit",
78
+ "num_attention_heads": 12,
79
+ "num_channels": 3,
80
+ "num_hidden_layers": 12,
81
+ "patch_size": 16,
82
+ "problem_type": "single_label_classification",
83
+ "qkv_bias": true,
84
+ "torch_dtype": "float32",
85
+ "transformers_version": "4.30.2"
86
+ }
checkpoint-640/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a26cdc0df0dea9a25116e0dfff3e12c741831cea206bdc214c326b5d25bee368
3
+ size 686684933
checkpoint-640/preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_rescale": true,
4
+ "do_resize": true,
5
+ "image_mean": [
6
+ 0.5,
7
+ 0.5,
8
+ 0.5
9
+ ],
10
+ "image_processor_type": "ViTFeatureExtractor",
11
+ "image_std": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
+ ],
16
+ "resample": 2,
17
+ "rescale_factor": 0.00392156862745098,
18
+ "size": {
19
+ "height": 224,
20
+ "width": 224
21
+ }
22
+ }
checkpoint-640/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73148133251e579307cedbc6460e628f9c60344cf1f6bcc16883d36df666f0d1
3
+ size 343351725
checkpoint-640/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adb3563d7561e012ae0c950df619e9ba01000208cd5ba07786142cc2783ee2a7
3
+ size 14575
checkpoint-640/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc0a2c689052c6bc67cea049e5102d4499d32a5eaf611948ee55d50a010034aa
3
+ size 627
checkpoint-640/trainer_state.json ADDED
@@ -0,0 +1,544 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.16443897783756256,
3
+ "best_model_checkpoint": "./vit-base-beans/checkpoint-640",
4
+ "epoch": 20.0,
5
+ "global_step": 640,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.31,
12
+ "learning_rate": 9.84375e-05,
13
+ "loss": 3.3252,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.62,
18
+ "learning_rate": 9.687500000000001e-05,
19
+ "loss": 3.217,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.94,
24
+ "learning_rate": 9.53125e-05,
25
+ "loss": 3.0799,
26
+ "step": 30
27
+ },
28
+ {
29
+ "epoch": 1.25,
30
+ "learning_rate": 9.375e-05,
31
+ "loss": 2.8394,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 1.25,
36
+ "eval_accuracy": 0.4815668202764977,
37
+ "eval_loss": 2.7347912788391113,
38
+ "eval_runtime": 5.6305,
39
+ "eval_samples_per_second": 77.081,
40
+ "eval_steps_per_second": 9.768,
41
+ "step": 40
42
+ },
43
+ {
44
+ "epoch": 1.56,
45
+ "learning_rate": 9.21875e-05,
46
+ "loss": 2.598,
47
+ "step": 50
48
+ },
49
+ {
50
+ "epoch": 1.88,
51
+ "learning_rate": 9.062500000000001e-05,
52
+ "loss": 2.417,
53
+ "step": 60
54
+ },
55
+ {
56
+ "epoch": 2.19,
57
+ "learning_rate": 8.90625e-05,
58
+ "loss": 2.111,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 2.5,
63
+ "learning_rate": 8.75e-05,
64
+ "loss": 1.9174,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 2.5,
69
+ "eval_accuracy": 0.8963133640552995,
70
+ "eval_loss": 1.8127654790878296,
71
+ "eval_runtime": 5.3595,
72
+ "eval_samples_per_second": 80.978,
73
+ "eval_steps_per_second": 10.262,
74
+ "step": 80
75
+ },
76
+ {
77
+ "epoch": 2.81,
78
+ "learning_rate": 8.593750000000001e-05,
79
+ "loss": 1.666,
80
+ "step": 90
81
+ },
82
+ {
83
+ "epoch": 3.12,
84
+ "learning_rate": 8.4375e-05,
85
+ "loss": 1.5159,
86
+ "step": 100
87
+ },
88
+ {
89
+ "epoch": 3.44,
90
+ "learning_rate": 8.28125e-05,
91
+ "loss": 1.3105,
92
+ "step": 110
93
+ },
94
+ {
95
+ "epoch": 3.75,
96
+ "learning_rate": 8.125000000000001e-05,
97
+ "loss": 1.1859,
98
+ "step": 120
99
+ },
100
+ {
101
+ "epoch": 3.75,
102
+ "eval_accuracy": 0.9470046082949308,
103
+ "eval_loss": 1.1414676904678345,
104
+ "eval_runtime": 5.3582,
105
+ "eval_samples_per_second": 80.997,
106
+ "eval_steps_per_second": 10.265,
107
+ "step": 120
108
+ },
109
+ {
110
+ "epoch": 4.06,
111
+ "learning_rate": 7.96875e-05,
112
+ "loss": 1.0525,
113
+ "step": 130
114
+ },
115
+ {
116
+ "epoch": 4.38,
117
+ "learning_rate": 7.8125e-05,
118
+ "loss": 0.9085,
119
+ "step": 140
120
+ },
121
+ {
122
+ "epoch": 4.69,
123
+ "learning_rate": 7.65625e-05,
124
+ "loss": 0.8207,
125
+ "step": 150
126
+ },
127
+ {
128
+ "epoch": 5.0,
129
+ "learning_rate": 7.500000000000001e-05,
130
+ "loss": 0.7413,
131
+ "step": 160
132
+ },
133
+ {
134
+ "epoch": 5.0,
135
+ "eval_accuracy": 0.9654377880184332,
136
+ "eval_loss": 0.7720305919647217,
137
+ "eval_runtime": 5.6625,
138
+ "eval_samples_per_second": 76.645,
139
+ "eval_steps_per_second": 9.713,
140
+ "step": 160
141
+ },
142
+ {
143
+ "epoch": 5.31,
144
+ "learning_rate": 7.34375e-05,
145
+ "loss": 0.6334,
146
+ "step": 170
147
+ },
148
+ {
149
+ "epoch": 5.62,
150
+ "learning_rate": 7.1875e-05,
151
+ "loss": 0.6058,
152
+ "step": 180
153
+ },
154
+ {
155
+ "epoch": 5.94,
156
+ "learning_rate": 7.031250000000001e-05,
157
+ "loss": 0.5319,
158
+ "step": 190
159
+ },
160
+ {
161
+ "epoch": 6.25,
162
+ "learning_rate": 6.875e-05,
163
+ "loss": 0.4761,
164
+ "step": 200
165
+ },
166
+ {
167
+ "epoch": 6.25,
168
+ "eval_accuracy": 0.9838709677419355,
169
+ "eval_loss": 0.5084273815155029,
170
+ "eval_runtime": 5.4846,
171
+ "eval_samples_per_second": 79.131,
172
+ "eval_steps_per_second": 10.028,
173
+ "step": 200
174
+ },
175
+ {
176
+ "epoch": 6.56,
177
+ "learning_rate": 6.71875e-05,
178
+ "loss": 0.4109,
179
+ "step": 210
180
+ },
181
+ {
182
+ "epoch": 6.88,
183
+ "learning_rate": 6.562500000000001e-05,
184
+ "loss": 0.3707,
185
+ "step": 220
186
+ },
187
+ {
188
+ "epoch": 7.19,
189
+ "learning_rate": 6.40625e-05,
190
+ "loss": 0.3536,
191
+ "step": 230
192
+ },
193
+ {
194
+ "epoch": 7.5,
195
+ "learning_rate": 6.25e-05,
196
+ "loss": 0.3108,
197
+ "step": 240
198
+ },
199
+ {
200
+ "epoch": 7.5,
201
+ "eval_accuracy": 0.9746543778801844,
202
+ "eval_loss": 0.36055463552474976,
203
+ "eval_runtime": 5.6696,
204
+ "eval_samples_per_second": 76.549,
205
+ "eval_steps_per_second": 9.701,
206
+ "step": 240
207
+ },
208
+ {
209
+ "epoch": 7.81,
210
+ "learning_rate": 6.0937500000000004e-05,
211
+ "loss": 0.2789,
212
+ "step": 250
213
+ },
214
+ {
215
+ "epoch": 8.12,
216
+ "learning_rate": 5.9375e-05,
217
+ "loss": 0.2905,
218
+ "step": 260
219
+ },
220
+ {
221
+ "epoch": 8.44,
222
+ "learning_rate": 5.78125e-05,
223
+ "loss": 0.2462,
224
+ "step": 270
225
+ },
226
+ {
227
+ "epoch": 8.75,
228
+ "learning_rate": 5.6250000000000005e-05,
229
+ "loss": 0.251,
230
+ "step": 280
231
+ },
232
+ {
233
+ "epoch": 8.75,
234
+ "eval_accuracy": 0.9769585253456221,
235
+ "eval_loss": 0.2958492040634155,
236
+ "eval_runtime": 7.6402,
237
+ "eval_samples_per_second": 56.805,
238
+ "eval_steps_per_second": 7.199,
239
+ "step": 280
240
+ },
241
+ {
242
+ "epoch": 9.06,
243
+ "learning_rate": 5.46875e-05,
244
+ "loss": 0.2171,
245
+ "step": 290
246
+ },
247
+ {
248
+ "epoch": 9.38,
249
+ "learning_rate": 5.3125000000000004e-05,
250
+ "loss": 0.2064,
251
+ "step": 300
252
+ },
253
+ {
254
+ "epoch": 9.69,
255
+ "learning_rate": 5.15625e-05,
256
+ "loss": 0.2116,
257
+ "step": 310
258
+ },
259
+ {
260
+ "epoch": 10.0,
261
+ "learning_rate": 5e-05,
262
+ "loss": 0.1896,
263
+ "step": 320
264
+ },
265
+ {
266
+ "epoch": 10.0,
267
+ "eval_accuracy": 0.9769585253456221,
268
+ "eval_loss": 0.24788345396518707,
269
+ "eval_runtime": 5.2479,
270
+ "eval_samples_per_second": 82.699,
271
+ "eval_steps_per_second": 10.48,
272
+ "step": 320
273
+ },
274
+ {
275
+ "epoch": 10.31,
276
+ "learning_rate": 4.8437500000000005e-05,
277
+ "loss": 0.1783,
278
+ "step": 330
279
+ },
280
+ {
281
+ "epoch": 10.62,
282
+ "learning_rate": 4.6875e-05,
283
+ "loss": 0.1859,
284
+ "step": 340
285
+ },
286
+ {
287
+ "epoch": 10.94,
288
+ "learning_rate": 4.5312500000000004e-05,
289
+ "loss": 0.1705,
290
+ "step": 350
291
+ },
292
+ {
293
+ "epoch": 11.25,
294
+ "learning_rate": 4.375e-05,
295
+ "loss": 0.1659,
296
+ "step": 360
297
+ },
298
+ {
299
+ "epoch": 11.25,
300
+ "eval_accuracy": 0.9838709677419355,
301
+ "eval_loss": 0.23752211034297943,
302
+ "eval_runtime": 5.7417,
303
+ "eval_samples_per_second": 75.588,
304
+ "eval_steps_per_second": 9.579,
305
+ "step": 360
306
+ },
307
+ {
308
+ "epoch": 11.56,
309
+ "learning_rate": 4.21875e-05,
310
+ "loss": 0.1753,
311
+ "step": 370
312
+ },
313
+ {
314
+ "epoch": 11.88,
315
+ "learning_rate": 4.0625000000000005e-05,
316
+ "loss": 0.1509,
317
+ "step": 380
318
+ },
319
+ {
320
+ "epoch": 12.19,
321
+ "learning_rate": 3.90625e-05,
322
+ "loss": 0.1447,
323
+ "step": 390
324
+ },
325
+ {
326
+ "epoch": 12.5,
327
+ "learning_rate": 3.7500000000000003e-05,
328
+ "loss": 0.1401,
329
+ "step": 400
330
+ },
331
+ {
332
+ "epoch": 12.5,
333
+ "eval_accuracy": 0.9792626728110599,
334
+ "eval_loss": 0.20333679020404816,
335
+ "eval_runtime": 5.2814,
336
+ "eval_samples_per_second": 82.175,
337
+ "eval_steps_per_second": 10.414,
338
+ "step": 400
339
+ },
340
+ {
341
+ "epoch": 12.81,
342
+ "learning_rate": 3.59375e-05,
343
+ "loss": 0.1508,
344
+ "step": 410
345
+ },
346
+ {
347
+ "epoch": 13.12,
348
+ "learning_rate": 3.4375e-05,
349
+ "loss": 0.1364,
350
+ "step": 420
351
+ },
352
+ {
353
+ "epoch": 13.44,
354
+ "learning_rate": 3.2812500000000005e-05,
355
+ "loss": 0.1415,
356
+ "step": 430
357
+ },
358
+ {
359
+ "epoch": 13.75,
360
+ "learning_rate": 3.125e-05,
361
+ "loss": 0.131,
362
+ "step": 440
363
+ },
364
+ {
365
+ "epoch": 13.75,
366
+ "eval_accuracy": 0.9792626728110599,
367
+ "eval_loss": 0.19693893194198608,
368
+ "eval_runtime": 5.0279,
369
+ "eval_samples_per_second": 86.318,
370
+ "eval_steps_per_second": 10.939,
371
+ "step": 440
372
+ },
373
+ {
374
+ "epoch": 14.06,
375
+ "learning_rate": 2.96875e-05,
376
+ "loss": 0.1264,
377
+ "step": 450
378
+ },
379
+ {
380
+ "epoch": 14.38,
381
+ "learning_rate": 2.8125000000000003e-05,
382
+ "loss": 0.1376,
383
+ "step": 460
384
+ },
385
+ {
386
+ "epoch": 14.69,
387
+ "learning_rate": 2.6562500000000002e-05,
388
+ "loss": 0.1211,
389
+ "step": 470
390
+ },
391
+ {
392
+ "epoch": 15.0,
393
+ "learning_rate": 2.5e-05,
394
+ "loss": 0.1162,
395
+ "step": 480
396
+ },
397
+ {
398
+ "epoch": 15.0,
399
+ "eval_accuracy": 0.9792626728110599,
400
+ "eval_loss": 0.1791529506444931,
401
+ "eval_runtime": 4.2132,
402
+ "eval_samples_per_second": 103.009,
403
+ "eval_steps_per_second": 13.054,
404
+ "step": 480
405
+ },
406
+ {
407
+ "epoch": 15.31,
408
+ "learning_rate": 2.34375e-05,
409
+ "loss": 0.1285,
410
+ "step": 490
411
+ },
412
+ {
413
+ "epoch": 15.62,
414
+ "learning_rate": 2.1875e-05,
415
+ "loss": 0.1136,
416
+ "step": 500
417
+ },
418
+ {
419
+ "epoch": 15.94,
420
+ "learning_rate": 2.0312500000000002e-05,
421
+ "loss": 0.1117,
422
+ "step": 510
423
+ },
424
+ {
425
+ "epoch": 16.25,
426
+ "learning_rate": 1.8750000000000002e-05,
427
+ "loss": 0.11,
428
+ "step": 520
429
+ },
430
+ {
431
+ "epoch": 16.25,
432
+ "eval_accuracy": 0.9792626728110599,
433
+ "eval_loss": 0.17193575203418732,
434
+ "eval_runtime": 4.2988,
435
+ "eval_samples_per_second": 100.957,
436
+ "eval_steps_per_second": 12.794,
437
+ "step": 520
438
+ },
439
+ {
440
+ "epoch": 16.56,
441
+ "learning_rate": 1.71875e-05,
442
+ "loss": 0.1087,
443
+ "step": 530
444
+ },
445
+ {
446
+ "epoch": 16.88,
447
+ "learning_rate": 1.5625e-05,
448
+ "loss": 0.1065,
449
+ "step": 540
450
+ },
451
+ {
452
+ "epoch": 17.19,
453
+ "learning_rate": 1.4062500000000001e-05,
454
+ "loss": 0.1207,
455
+ "step": 550
456
+ },
457
+ {
458
+ "epoch": 17.5,
459
+ "learning_rate": 1.25e-05,
460
+ "loss": 0.1056,
461
+ "step": 560
462
+ },
463
+ {
464
+ "epoch": 17.5,
465
+ "eval_accuracy": 0.9792626728110599,
466
+ "eval_loss": 0.16766877472400665,
467
+ "eval_runtime": 5.7239,
468
+ "eval_samples_per_second": 75.823,
469
+ "eval_steps_per_second": 9.609,
470
+ "step": 560
471
+ },
472
+ {
473
+ "epoch": 17.81,
474
+ "learning_rate": 1.09375e-05,
475
+ "loss": 0.1051,
476
+ "step": 570
477
+ },
478
+ {
479
+ "epoch": 18.12,
480
+ "learning_rate": 9.375000000000001e-06,
481
+ "loss": 0.1148,
482
+ "step": 580
483
+ },
484
+ {
485
+ "epoch": 18.44,
486
+ "learning_rate": 7.8125e-06,
487
+ "loss": 0.1043,
488
+ "step": 590
489
+ },
490
+ {
491
+ "epoch": 18.75,
492
+ "learning_rate": 6.25e-06,
493
+ "loss": 0.1128,
494
+ "step": 600
495
+ },
496
+ {
497
+ "epoch": 18.75,
498
+ "eval_accuracy": 0.9723502304147466,
499
+ "eval_loss": 0.17274537682533264,
500
+ "eval_runtime": 5.5073,
501
+ "eval_samples_per_second": 78.805,
502
+ "eval_steps_per_second": 9.987,
503
+ "step": 600
504
+ },
505
+ {
506
+ "epoch": 19.06,
507
+ "learning_rate": 4.6875000000000004e-06,
508
+ "loss": 0.102,
509
+ "step": 610
510
+ },
511
+ {
512
+ "epoch": 19.38,
513
+ "learning_rate": 3.125e-06,
514
+ "loss": 0.1028,
515
+ "step": 620
516
+ },
517
+ {
518
+ "epoch": 19.69,
519
+ "learning_rate": 1.5625e-06,
520
+ "loss": 0.1096,
521
+ "step": 630
522
+ },
523
+ {
524
+ "epoch": 20.0,
525
+ "learning_rate": 0.0,
526
+ "loss": 0.1018,
527
+ "step": 640
528
+ },
529
+ {
530
+ "epoch": 20.0,
531
+ "eval_accuracy": 0.9792626728110599,
532
+ "eval_loss": 0.16443897783756256,
533
+ "eval_runtime": 4.9159,
534
+ "eval_samples_per_second": 88.285,
535
+ "eval_steps_per_second": 11.188,
536
+ "step": 640
537
+ }
538
+ ],
539
+ "max_steps": 640,
540
+ "num_train_epochs": 20,
541
+ "total_flos": 1.1843641391623373e+18,
542
+ "trial_name": null,
543
+ "trial_params": null
544
+ }
checkpoint-640/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e00db2a0e0945cff474f615b599cebc0ca0436459ecf509b6097643ca635938b
3
+ size 3899
config.json ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/vit-base-patch16-224-in21k",
3
+ "architectures": [
4
+ "ViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "a",
13
+ "1": "b",
14
+ "10": "j",
15
+ "11": "k",
16
+ "12": "l",
17
+ "13": "m",
18
+ "14": "n",
19
+ "15": "o",
20
+ "16": "p",
21
+ "17": "period",
22
+ "18": "q",
23
+ "19": "question%20mark",
24
+ "2": "c",
25
+ "20": "r",
26
+ "21": "s",
27
+ "22": "t",
28
+ "23": "u",
29
+ "24": "v",
30
+ "25": "w",
31
+ "26": "x",
32
+ "27": "y",
33
+ "28": "z",
34
+ "3": "capital",
35
+ "4": "d",
36
+ "5": "e",
37
+ "6": "f",
38
+ "7": "g",
39
+ "8": "h",
40
+ "9": "i"
41
+ },
42
+ "image_size": 224,
43
+ "initializer_range": 0.02,
44
+ "intermediate_size": 3072,
45
+ "label2id": {
46
+ "a": "0",
47
+ "b": "1",
48
+ "c": "2",
49
+ "capital": "3",
50
+ "d": "4",
51
+ "e": "5",
52
+ "f": "6",
53
+ "g": "7",
54
+ "h": "8",
55
+ "i": "9",
56
+ "j": "10",
57
+ "k": "11",
58
+ "l": "12",
59
+ "m": "13",
60
+ "n": "14",
61
+ "o": "15",
62
+ "p": "16",
63
+ "period": "17",
64
+ "q": "18",
65
+ "question%20mark": "19",
66
+ "r": "20",
67
+ "s": "21",
68
+ "t": "22",
69
+ "u": "23",
70
+ "v": "24",
71
+ "w": "25",
72
+ "x": "26",
73
+ "y": "27",
74
+ "z": "28"
75
+ },
76
+ "layer_norm_eps": 1e-12,
77
+ "model_type": "vit",
78
+ "num_attention_heads": 12,
79
+ "num_channels": 3,
80
+ "num_hidden_layers": 12,
81
+ "patch_size": 16,
82
+ "problem_type": "single_label_classification",
83
+ "qkv_bias": true,
84
+ "torch_dtype": "float32",
85
+ "transformers_version": "4.30.2"
86
+ }
preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_rescale": true,
4
+ "do_resize": true,
5
+ "image_mean": [
6
+ 0.5,
7
+ 0.5,
8
+ 0.5
9
+ ],
10
+ "image_processor_type": "ViTFeatureExtractor",
11
+ "image_std": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
+ ],
16
+ "resample": 2,
17
+ "rescale_factor": 0.00392156862745098,
18
+ "size": {
19
+ "height": 224,
20
+ "width": 224
21
+ }
22
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73148133251e579307cedbc6460e628f9c60344cf1f6bcc16883d36df666f0d1
3
+ size 343351725
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 20.0,
3
+ "total_flos": 1.1843641391623373e+18,
4
+ "train_loss": 0.6315841894596815,
5
+ "train_runtime": 473.5366,
6
+ "train_samples_per_second": 32.268,
7
+ "train_steps_per_second": 1.352
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,553 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.16443897783756256,
3
+ "best_model_checkpoint": "./vit-base-beans/checkpoint-640",
4
+ "epoch": 20.0,
5
+ "global_step": 640,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.31,
12
+ "learning_rate": 9.84375e-05,
13
+ "loss": 3.3252,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.62,
18
+ "learning_rate": 9.687500000000001e-05,
19
+ "loss": 3.217,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.94,
24
+ "learning_rate": 9.53125e-05,
25
+ "loss": 3.0799,
26
+ "step": 30
27
+ },
28
+ {
29
+ "epoch": 1.25,
30
+ "learning_rate": 9.375e-05,
31
+ "loss": 2.8394,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 1.25,
36
+ "eval_accuracy": 0.4815668202764977,
37
+ "eval_loss": 2.7347912788391113,
38
+ "eval_runtime": 5.6305,
39
+ "eval_samples_per_second": 77.081,
40
+ "eval_steps_per_second": 9.768,
41
+ "step": 40
42
+ },
43
+ {
44
+ "epoch": 1.56,
45
+ "learning_rate": 9.21875e-05,
46
+ "loss": 2.598,
47
+ "step": 50
48
+ },
49
+ {
50
+ "epoch": 1.88,
51
+ "learning_rate": 9.062500000000001e-05,
52
+ "loss": 2.417,
53
+ "step": 60
54
+ },
55
+ {
56
+ "epoch": 2.19,
57
+ "learning_rate": 8.90625e-05,
58
+ "loss": 2.111,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 2.5,
63
+ "learning_rate": 8.75e-05,
64
+ "loss": 1.9174,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 2.5,
69
+ "eval_accuracy": 0.8963133640552995,
70
+ "eval_loss": 1.8127654790878296,
71
+ "eval_runtime": 5.3595,
72
+ "eval_samples_per_second": 80.978,
73
+ "eval_steps_per_second": 10.262,
74
+ "step": 80
75
+ },
76
+ {
77
+ "epoch": 2.81,
78
+ "learning_rate": 8.593750000000001e-05,
79
+ "loss": 1.666,
80
+ "step": 90
81
+ },
82
+ {
83
+ "epoch": 3.12,
84
+ "learning_rate": 8.4375e-05,
85
+ "loss": 1.5159,
86
+ "step": 100
87
+ },
88
+ {
89
+ "epoch": 3.44,
90
+ "learning_rate": 8.28125e-05,
91
+ "loss": 1.3105,
92
+ "step": 110
93
+ },
94
+ {
95
+ "epoch": 3.75,
96
+ "learning_rate": 8.125000000000001e-05,
97
+ "loss": 1.1859,
98
+ "step": 120
99
+ },
100
+ {
101
+ "epoch": 3.75,
102
+ "eval_accuracy": 0.9470046082949308,
103
+ "eval_loss": 1.1414676904678345,
104
+ "eval_runtime": 5.3582,
105
+ "eval_samples_per_second": 80.997,
106
+ "eval_steps_per_second": 10.265,
107
+ "step": 120
108
+ },
109
+ {
110
+ "epoch": 4.06,
111
+ "learning_rate": 7.96875e-05,
112
+ "loss": 1.0525,
113
+ "step": 130
114
+ },
115
+ {
116
+ "epoch": 4.38,
117
+ "learning_rate": 7.8125e-05,
118
+ "loss": 0.9085,
119
+ "step": 140
120
+ },
121
+ {
122
+ "epoch": 4.69,
123
+ "learning_rate": 7.65625e-05,
124
+ "loss": 0.8207,
125
+ "step": 150
126
+ },
127
+ {
128
+ "epoch": 5.0,
129
+ "learning_rate": 7.500000000000001e-05,
130
+ "loss": 0.7413,
131
+ "step": 160
132
+ },
133
+ {
134
+ "epoch": 5.0,
135
+ "eval_accuracy": 0.9654377880184332,
136
+ "eval_loss": 0.7720305919647217,
137
+ "eval_runtime": 5.6625,
138
+ "eval_samples_per_second": 76.645,
139
+ "eval_steps_per_second": 9.713,
140
+ "step": 160
141
+ },
142
+ {
143
+ "epoch": 5.31,
144
+ "learning_rate": 7.34375e-05,
145
+ "loss": 0.6334,
146
+ "step": 170
147
+ },
148
+ {
149
+ "epoch": 5.62,
150
+ "learning_rate": 7.1875e-05,
151
+ "loss": 0.6058,
152
+ "step": 180
153
+ },
154
+ {
155
+ "epoch": 5.94,
156
+ "learning_rate": 7.031250000000001e-05,
157
+ "loss": 0.5319,
158
+ "step": 190
159
+ },
160
+ {
161
+ "epoch": 6.25,
162
+ "learning_rate": 6.875e-05,
163
+ "loss": 0.4761,
164
+ "step": 200
165
+ },
166
+ {
167
+ "epoch": 6.25,
168
+ "eval_accuracy": 0.9838709677419355,
169
+ "eval_loss": 0.5084273815155029,
170
+ "eval_runtime": 5.4846,
171
+ "eval_samples_per_second": 79.131,
172
+ "eval_steps_per_second": 10.028,
173
+ "step": 200
174
+ },
175
+ {
176
+ "epoch": 6.56,
177
+ "learning_rate": 6.71875e-05,
178
+ "loss": 0.4109,
179
+ "step": 210
180
+ },
181
+ {
182
+ "epoch": 6.88,
183
+ "learning_rate": 6.562500000000001e-05,
184
+ "loss": 0.3707,
185
+ "step": 220
186
+ },
187
+ {
188
+ "epoch": 7.19,
189
+ "learning_rate": 6.40625e-05,
190
+ "loss": 0.3536,
191
+ "step": 230
192
+ },
193
+ {
194
+ "epoch": 7.5,
195
+ "learning_rate": 6.25e-05,
196
+ "loss": 0.3108,
197
+ "step": 240
198
+ },
199
+ {
200
+ "epoch": 7.5,
201
+ "eval_accuracy": 0.9746543778801844,
202
+ "eval_loss": 0.36055463552474976,
203
+ "eval_runtime": 5.6696,
204
+ "eval_samples_per_second": 76.549,
205
+ "eval_steps_per_second": 9.701,
206
+ "step": 240
207
+ },
208
+ {
209
+ "epoch": 7.81,
210
+ "learning_rate": 6.0937500000000004e-05,
211
+ "loss": 0.2789,
212
+ "step": 250
213
+ },
214
+ {
215
+ "epoch": 8.12,
216
+ "learning_rate": 5.9375e-05,
217
+ "loss": 0.2905,
218
+ "step": 260
219
+ },
220
+ {
221
+ "epoch": 8.44,
222
+ "learning_rate": 5.78125e-05,
223
+ "loss": 0.2462,
224
+ "step": 270
225
+ },
226
+ {
227
+ "epoch": 8.75,
228
+ "learning_rate": 5.6250000000000005e-05,
229
+ "loss": 0.251,
230
+ "step": 280
231
+ },
232
+ {
233
+ "epoch": 8.75,
234
+ "eval_accuracy": 0.9769585253456221,
235
+ "eval_loss": 0.2958492040634155,
236
+ "eval_runtime": 7.6402,
237
+ "eval_samples_per_second": 56.805,
238
+ "eval_steps_per_second": 7.199,
239
+ "step": 280
240
+ },
241
+ {
242
+ "epoch": 9.06,
243
+ "learning_rate": 5.46875e-05,
244
+ "loss": 0.2171,
245
+ "step": 290
246
+ },
247
+ {
248
+ "epoch": 9.38,
249
+ "learning_rate": 5.3125000000000004e-05,
250
+ "loss": 0.2064,
251
+ "step": 300
252
+ },
253
+ {
254
+ "epoch": 9.69,
255
+ "learning_rate": 5.15625e-05,
256
+ "loss": 0.2116,
257
+ "step": 310
258
+ },
259
+ {
260
+ "epoch": 10.0,
261
+ "learning_rate": 5e-05,
262
+ "loss": 0.1896,
263
+ "step": 320
264
+ },
265
+ {
266
+ "epoch": 10.0,
267
+ "eval_accuracy": 0.9769585253456221,
268
+ "eval_loss": 0.24788345396518707,
269
+ "eval_runtime": 5.2479,
270
+ "eval_samples_per_second": 82.699,
271
+ "eval_steps_per_second": 10.48,
272
+ "step": 320
273
+ },
274
+ {
275
+ "epoch": 10.31,
276
+ "learning_rate": 4.8437500000000005e-05,
277
+ "loss": 0.1783,
278
+ "step": 330
279
+ },
280
+ {
281
+ "epoch": 10.62,
282
+ "learning_rate": 4.6875e-05,
283
+ "loss": 0.1859,
284
+ "step": 340
285
+ },
286
+ {
287
+ "epoch": 10.94,
288
+ "learning_rate": 4.5312500000000004e-05,
289
+ "loss": 0.1705,
290
+ "step": 350
291
+ },
292
+ {
293
+ "epoch": 11.25,
294
+ "learning_rate": 4.375e-05,
295
+ "loss": 0.1659,
296
+ "step": 360
297
+ },
298
+ {
299
+ "epoch": 11.25,
300
+ "eval_accuracy": 0.9838709677419355,
301
+ "eval_loss": 0.23752211034297943,
302
+ "eval_runtime": 5.7417,
303
+ "eval_samples_per_second": 75.588,
304
+ "eval_steps_per_second": 9.579,
305
+ "step": 360
306
+ },
307
+ {
308
+ "epoch": 11.56,
309
+ "learning_rate": 4.21875e-05,
310
+ "loss": 0.1753,
311
+ "step": 370
312
+ },
313
+ {
314
+ "epoch": 11.88,
315
+ "learning_rate": 4.0625000000000005e-05,
316
+ "loss": 0.1509,
317
+ "step": 380
318
+ },
319
+ {
320
+ "epoch": 12.19,
321
+ "learning_rate": 3.90625e-05,
322
+ "loss": 0.1447,
323
+ "step": 390
324
+ },
325
+ {
326
+ "epoch": 12.5,
327
+ "learning_rate": 3.7500000000000003e-05,
328
+ "loss": 0.1401,
329
+ "step": 400
330
+ },
331
+ {
332
+ "epoch": 12.5,
333
+ "eval_accuracy": 0.9792626728110599,
334
+ "eval_loss": 0.20333679020404816,
335
+ "eval_runtime": 5.2814,
336
+ "eval_samples_per_second": 82.175,
337
+ "eval_steps_per_second": 10.414,
338
+ "step": 400
339
+ },
340
+ {
341
+ "epoch": 12.81,
342
+ "learning_rate": 3.59375e-05,
343
+ "loss": 0.1508,
344
+ "step": 410
345
+ },
346
+ {
347
+ "epoch": 13.12,
348
+ "learning_rate": 3.4375e-05,
349
+ "loss": 0.1364,
350
+ "step": 420
351
+ },
352
+ {
353
+ "epoch": 13.44,
354
+ "learning_rate": 3.2812500000000005e-05,
355
+ "loss": 0.1415,
356
+ "step": 430
357
+ },
358
+ {
359
+ "epoch": 13.75,
360
+ "learning_rate": 3.125e-05,
361
+ "loss": 0.131,
362
+ "step": 440
363
+ },
364
+ {
365
+ "epoch": 13.75,
366
+ "eval_accuracy": 0.9792626728110599,
367
+ "eval_loss": 0.19693893194198608,
368
+ "eval_runtime": 5.0279,
369
+ "eval_samples_per_second": 86.318,
370
+ "eval_steps_per_second": 10.939,
371
+ "step": 440
372
+ },
373
+ {
374
+ "epoch": 14.06,
375
+ "learning_rate": 2.96875e-05,
376
+ "loss": 0.1264,
377
+ "step": 450
378
+ },
379
+ {
380
+ "epoch": 14.38,
381
+ "learning_rate": 2.8125000000000003e-05,
382
+ "loss": 0.1376,
383
+ "step": 460
384
+ },
385
+ {
386
+ "epoch": 14.69,
387
+ "learning_rate": 2.6562500000000002e-05,
388
+ "loss": 0.1211,
389
+ "step": 470
390
+ },
391
+ {
392
+ "epoch": 15.0,
393
+ "learning_rate": 2.5e-05,
394
+ "loss": 0.1162,
395
+ "step": 480
396
+ },
397
+ {
398
+ "epoch": 15.0,
399
+ "eval_accuracy": 0.9792626728110599,
400
+ "eval_loss": 0.1791529506444931,
401
+ "eval_runtime": 4.2132,
402
+ "eval_samples_per_second": 103.009,
403
+ "eval_steps_per_second": 13.054,
404
+ "step": 480
405
+ },
406
+ {
407
+ "epoch": 15.31,
408
+ "learning_rate": 2.34375e-05,
409
+ "loss": 0.1285,
410
+ "step": 490
411
+ },
412
+ {
413
+ "epoch": 15.62,
414
+ "learning_rate": 2.1875e-05,
415
+ "loss": 0.1136,
416
+ "step": 500
417
+ },
418
+ {
419
+ "epoch": 15.94,
420
+ "learning_rate": 2.0312500000000002e-05,
421
+ "loss": 0.1117,
422
+ "step": 510
423
+ },
424
+ {
425
+ "epoch": 16.25,
426
+ "learning_rate": 1.8750000000000002e-05,
427
+ "loss": 0.11,
428
+ "step": 520
429
+ },
430
+ {
431
+ "epoch": 16.25,
432
+ "eval_accuracy": 0.9792626728110599,
433
+ "eval_loss": 0.17193575203418732,
434
+ "eval_runtime": 4.2988,
435
+ "eval_samples_per_second": 100.957,
436
+ "eval_steps_per_second": 12.794,
437
+ "step": 520
438
+ },
439
+ {
440
+ "epoch": 16.56,
441
+ "learning_rate": 1.71875e-05,
442
+ "loss": 0.1087,
443
+ "step": 530
444
+ },
445
+ {
446
+ "epoch": 16.88,
447
+ "learning_rate": 1.5625e-05,
448
+ "loss": 0.1065,
449
+ "step": 540
450
+ },
451
+ {
452
+ "epoch": 17.19,
453
+ "learning_rate": 1.4062500000000001e-05,
454
+ "loss": 0.1207,
455
+ "step": 550
456
+ },
457
+ {
458
+ "epoch": 17.5,
459
+ "learning_rate": 1.25e-05,
460
+ "loss": 0.1056,
461
+ "step": 560
462
+ },
463
+ {
464
+ "epoch": 17.5,
465
+ "eval_accuracy": 0.9792626728110599,
466
+ "eval_loss": 0.16766877472400665,
467
+ "eval_runtime": 5.7239,
468
+ "eval_samples_per_second": 75.823,
469
+ "eval_steps_per_second": 9.609,
470
+ "step": 560
471
+ },
472
+ {
473
+ "epoch": 17.81,
474
+ "learning_rate": 1.09375e-05,
475
+ "loss": 0.1051,
476
+ "step": 570
477
+ },
478
+ {
479
+ "epoch": 18.12,
480
+ "learning_rate": 9.375000000000001e-06,
481
+ "loss": 0.1148,
482
+ "step": 580
483
+ },
484
+ {
485
+ "epoch": 18.44,
486
+ "learning_rate": 7.8125e-06,
487
+ "loss": 0.1043,
488
+ "step": 590
489
+ },
490
+ {
491
+ "epoch": 18.75,
492
+ "learning_rate": 6.25e-06,
493
+ "loss": 0.1128,
494
+ "step": 600
495
+ },
496
+ {
497
+ "epoch": 18.75,
498
+ "eval_accuracy": 0.9723502304147466,
499
+ "eval_loss": 0.17274537682533264,
500
+ "eval_runtime": 5.5073,
501
+ "eval_samples_per_second": 78.805,
502
+ "eval_steps_per_second": 9.987,
503
+ "step": 600
504
+ },
505
+ {
506
+ "epoch": 19.06,
507
+ "learning_rate": 4.6875000000000004e-06,
508
+ "loss": 0.102,
509
+ "step": 610
510
+ },
511
+ {
512
+ "epoch": 19.38,
513
+ "learning_rate": 3.125e-06,
514
+ "loss": 0.1028,
515
+ "step": 620
516
+ },
517
+ {
518
+ "epoch": 19.69,
519
+ "learning_rate": 1.5625e-06,
520
+ "loss": 0.1096,
521
+ "step": 630
522
+ },
523
+ {
524
+ "epoch": 20.0,
525
+ "learning_rate": 0.0,
526
+ "loss": 0.1018,
527
+ "step": 640
528
+ },
529
+ {
530
+ "epoch": 20.0,
531
+ "eval_accuracy": 0.9792626728110599,
532
+ "eval_loss": 0.16443897783756256,
533
+ "eval_runtime": 4.9159,
534
+ "eval_samples_per_second": 88.285,
535
+ "eval_steps_per_second": 11.188,
536
+ "step": 640
537
+ },
538
+ {
539
+ "epoch": 20.0,
540
+ "step": 640,
541
+ "total_flos": 1.1843641391623373e+18,
542
+ "train_loss": 0.6315841894596815,
543
+ "train_runtime": 473.5366,
544
+ "train_samples_per_second": 32.268,
545
+ "train_steps_per_second": 1.352
546
+ }
547
+ ],
548
+ "max_steps": 640,
549
+ "num_train_epochs": 20,
550
+ "total_flos": 1.1843641391623373e+18,
551
+ "trial_name": null,
552
+ "trial_params": null
553
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e00db2a0e0945cff474f615b599cebc0ca0436459ecf509b6097643ca635938b
3
+ size 3899