Kontawat commited on
Commit
41c5279
1 Parent(s): 8ccbd7c
README.md ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ tags:
4
+ - image-classification
5
+ - generated_from_trainer
6
+ metrics:
7
+ - accuracy
8
+ model-index:
9
+ - name: vit-base-beans-demo-v5
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # vit-base-beans-demo-v5
17
+
18
+ This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the diabetic-retinopathy-classification dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 0.7260
21
+ - Accuracy: 0.7263
22
+
23
+ ## Model description
24
+
25
+ More information needed
26
+
27
+ ## Intended uses & limitations
28
+
29
+ More information needed
30
+
31
+ ## Training and evaluation data
32
+
33
+ More information needed
34
+
35
+ ## Training procedure
36
+
37
+ ### Training hyperparameters
38
+
39
+ The following hyperparameters were used during training:
40
+ - learning_rate: 0.0002
41
+ - train_batch_size: 16
42
+ - eval_batch_size: 8
43
+ - seed: 42
44
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
+ - lr_scheduler_type: linear
46
+ - num_epochs: 4
47
+ - mixed_precision_training: Native AMP
48
+
49
+ ### Training results
50
+
51
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy |
52
+ |:-------------:|:-----:|:----:|:---------------:|:--------:|
53
+ | 0.9235 | 0.47 | 100 | 0.8972 | 0.6493 |
54
+ | 0.9253 | 0.95 | 200 | 0.9151 | 0.6635 |
55
+ | 0.8371 | 1.42 | 300 | 0.8071 | 0.6931 |
56
+ | 0.7355 | 1.9 | 400 | 0.7563 | 0.7073 |
57
+ | 0.6532 | 2.37 | 500 | 0.7543 | 0.6896 |
58
+ | 0.5982 | 2.84 | 600 | 0.7260 | 0.7263 |
59
+ | 0.4276 | 3.32 | 700 | 0.7346 | 0.7239 |
60
+ | 0.4935 | 3.79 | 800 | 0.7490 | 0.7133 |
61
+
62
+
63
+ ### Framework versions
64
+
65
+ - Transformers 4.26.1
66
+ - Pytorch 1.13.1+cu116
67
+ - Datasets 2.10.0
68
+ - Tokenizers 0.13.2
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.0,
3
+ "eval_accuracy": 0.726303317535545,
4
+ "eval_loss": 0.7260138988494873,
5
+ "eval_runtime": 88.4264,
6
+ "eval_samples_per_second": 9.545,
7
+ "eval_steps_per_second": 1.199,
8
+ "total_flos": 1.0430702255838413e+18,
9
+ "train_loss": 0.674277721140622,
10
+ "train_runtime": 2407.857,
11
+ "train_samples_per_second": 5.59,
12
+ "train_steps_per_second": 0.351
13
+ }
checkpoint-600/config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/vit-base-patch16-224-in21k",
3
+ "architectures": [
4
+ "ViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "0",
13
+ "1": "1",
14
+ "2": "2",
15
+ "3": "3",
16
+ "4": "4"
17
+ },
18
+ "image_size": 224,
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 3072,
21
+ "label2id": {
22
+ "0": "0",
23
+ "1": "1",
24
+ "2": "2",
25
+ "3": "3",
26
+ "4": "4"
27
+ },
28
+ "layer_norm_eps": 1e-12,
29
+ "model_type": "vit",
30
+ "num_attention_heads": 12,
31
+ "num_channels": 3,
32
+ "num_hidden_layers": 12,
33
+ "patch_size": 16,
34
+ "problem_type": "single_label_classification",
35
+ "qkv_bias": true,
36
+ "torch_dtype": "float32",
37
+ "transformers_version": "4.26.1"
38
+ }
checkpoint-600/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33a722383d4ffcd7da7445ebc6b7efb316dd3c0fadc74a3d83b8f1a07cfd6d40
3
+ size 686537349
checkpoint-600/preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_rescale": true,
4
+ "do_resize": true,
5
+ "image_mean": [
6
+ 0.5,
7
+ 0.5,
8
+ 0.5
9
+ ],
10
+ "image_processor_type": "ViTFeatureExtractor",
11
+ "image_std": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
+ ],
16
+ "resample": 2,
17
+ "rescale_factor": 0.00392156862745098,
18
+ "size": {
19
+ "height": 224,
20
+ "width": 224
21
+ }
22
+ }
checkpoint-600/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cd8865690927931e71e4fe5f7cd1c001f5e3c322f7cfe9c0e0faabaf2e20126
3
+ size 343277933
checkpoint-600/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5773d772a88e414bc446a704f98e2303c1d836815da2d0e7d245a164163118d
3
+ size 14575
checkpoint-600/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3b6fea68b198450145ec317f1beb9b8d30d0d70e357ddd6fababef1f050d113
3
+ size 557
checkpoint-600/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a35b72cf0c2b08101adbc7d63fa942fdd2967fce9e5173f0ea2ef8e9ead735f9
3
+ size 627
checkpoint-600/trainer_state.json ADDED
@@ -0,0 +1,430 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.7260138988494873,
3
+ "best_model_checkpoint": "./vit-base-beans-demo-v5/checkpoint-600",
4
+ "epoch": 2.843601895734597,
5
+ "global_step": 600,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.05,
12
+ "learning_rate": 0.00019763033175355452,
13
+ "loss": 1.4193,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.09,
18
+ "learning_rate": 0.000195260663507109,
19
+ "loss": 1.0491,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.14,
24
+ "learning_rate": 0.00019289099526066352,
25
+ "loss": 1.1612,
26
+ "step": 30
27
+ },
28
+ {
29
+ "epoch": 0.19,
30
+ "learning_rate": 0.000190521327014218,
31
+ "loss": 1.0393,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 0.24,
36
+ "learning_rate": 0.00018815165876777252,
37
+ "loss": 1.0533,
38
+ "step": 50
39
+ },
40
+ {
41
+ "epoch": 0.28,
42
+ "learning_rate": 0.00018578199052132703,
43
+ "loss": 0.9506,
44
+ "step": 60
45
+ },
46
+ {
47
+ "epoch": 0.33,
48
+ "learning_rate": 0.00018341232227488152,
49
+ "loss": 0.9148,
50
+ "step": 70
51
+ },
52
+ {
53
+ "epoch": 0.38,
54
+ "learning_rate": 0.00018104265402843603,
55
+ "loss": 0.9735,
56
+ "step": 80
57
+ },
58
+ {
59
+ "epoch": 0.43,
60
+ "learning_rate": 0.00017890995260663508,
61
+ "loss": 0.783,
62
+ "step": 90
63
+ },
64
+ {
65
+ "epoch": 0.47,
66
+ "learning_rate": 0.0001765402843601896,
67
+ "loss": 0.9235,
68
+ "step": 100
69
+ },
70
+ {
71
+ "epoch": 0.47,
72
+ "eval_accuracy": 0.6492890995260664,
73
+ "eval_loss": 0.8972262740135193,
74
+ "eval_runtime": 90.3527,
75
+ "eval_samples_per_second": 9.341,
76
+ "eval_steps_per_second": 1.173,
77
+ "step": 100
78
+ },
79
+ {
80
+ "epoch": 0.52,
81
+ "learning_rate": 0.00017417061611374408,
82
+ "loss": 0.9931,
83
+ "step": 110
84
+ },
85
+ {
86
+ "epoch": 0.57,
87
+ "learning_rate": 0.0001718009478672986,
88
+ "loss": 1.0035,
89
+ "step": 120
90
+ },
91
+ {
92
+ "epoch": 0.62,
93
+ "learning_rate": 0.00016943127962085308,
94
+ "loss": 0.9619,
95
+ "step": 130
96
+ },
97
+ {
98
+ "epoch": 0.66,
99
+ "learning_rate": 0.0001670616113744076,
100
+ "loss": 0.8618,
101
+ "step": 140
102
+ },
103
+ {
104
+ "epoch": 0.71,
105
+ "learning_rate": 0.0001646919431279621,
106
+ "loss": 0.9862,
107
+ "step": 150
108
+ },
109
+ {
110
+ "epoch": 0.76,
111
+ "learning_rate": 0.0001623222748815166,
112
+ "loss": 0.9977,
113
+ "step": 160
114
+ },
115
+ {
116
+ "epoch": 0.81,
117
+ "learning_rate": 0.0001599526066350711,
118
+ "loss": 0.8715,
119
+ "step": 170
120
+ },
121
+ {
122
+ "epoch": 0.85,
123
+ "learning_rate": 0.0001575829383886256,
124
+ "loss": 0.9153,
125
+ "step": 180
126
+ },
127
+ {
128
+ "epoch": 0.9,
129
+ "learning_rate": 0.0001552132701421801,
130
+ "loss": 0.784,
131
+ "step": 190
132
+ },
133
+ {
134
+ "epoch": 0.95,
135
+ "learning_rate": 0.00015284360189573462,
136
+ "loss": 0.9253,
137
+ "step": 200
138
+ },
139
+ {
140
+ "epoch": 0.95,
141
+ "eval_accuracy": 0.6635071090047393,
142
+ "eval_loss": 0.9150586128234863,
143
+ "eval_runtime": 91.0647,
144
+ "eval_samples_per_second": 9.268,
145
+ "eval_steps_per_second": 1.164,
146
+ "step": 200
147
+ },
148
+ {
149
+ "epoch": 1.0,
150
+ "learning_rate": 0.0001504739336492891,
151
+ "loss": 0.8305,
152
+ "step": 210
153
+ },
154
+ {
155
+ "epoch": 1.04,
156
+ "learning_rate": 0.0001481042654028436,
157
+ "loss": 0.8379,
158
+ "step": 220
159
+ },
160
+ {
161
+ "epoch": 1.09,
162
+ "learning_rate": 0.0001457345971563981,
163
+ "loss": 0.707,
164
+ "step": 230
165
+ },
166
+ {
167
+ "epoch": 1.14,
168
+ "learning_rate": 0.0001433649289099526,
169
+ "loss": 0.8205,
170
+ "step": 240
171
+ },
172
+ {
173
+ "epoch": 1.18,
174
+ "learning_rate": 0.00014099526066350712,
175
+ "loss": 0.7565,
176
+ "step": 250
177
+ },
178
+ {
179
+ "epoch": 1.23,
180
+ "learning_rate": 0.0001386255924170616,
181
+ "loss": 0.7539,
182
+ "step": 260
183
+ },
184
+ {
185
+ "epoch": 1.28,
186
+ "learning_rate": 0.00013625592417061612,
187
+ "loss": 0.9389,
188
+ "step": 270
189
+ },
190
+ {
191
+ "epoch": 1.33,
192
+ "learning_rate": 0.0001338862559241706,
193
+ "loss": 0.8062,
194
+ "step": 280
195
+ },
196
+ {
197
+ "epoch": 1.37,
198
+ "learning_rate": 0.00013151658767772512,
199
+ "loss": 0.7702,
200
+ "step": 290
201
+ },
202
+ {
203
+ "epoch": 1.42,
204
+ "learning_rate": 0.00012914691943127963,
205
+ "loss": 0.8371,
206
+ "step": 300
207
+ },
208
+ {
209
+ "epoch": 1.42,
210
+ "eval_accuracy": 0.693127962085308,
211
+ "eval_loss": 0.8071145415306091,
212
+ "eval_runtime": 87.0572,
213
+ "eval_samples_per_second": 9.695,
214
+ "eval_steps_per_second": 1.218,
215
+ "step": 300
216
+ },
217
+ {
218
+ "epoch": 1.47,
219
+ "learning_rate": 0.00012677725118483412,
220
+ "loss": 0.7698,
221
+ "step": 310
222
+ },
223
+ {
224
+ "epoch": 1.52,
225
+ "learning_rate": 0.00012440758293838863,
226
+ "loss": 0.8412,
227
+ "step": 320
228
+ },
229
+ {
230
+ "epoch": 1.56,
231
+ "learning_rate": 0.00012203791469194314,
232
+ "loss": 0.8268,
233
+ "step": 330
234
+ },
235
+ {
236
+ "epoch": 1.61,
237
+ "learning_rate": 0.00011966824644549763,
238
+ "loss": 0.6359,
239
+ "step": 340
240
+ },
241
+ {
242
+ "epoch": 1.66,
243
+ "learning_rate": 0.00011729857819905214,
244
+ "loss": 0.6765,
245
+ "step": 350
246
+ },
247
+ {
248
+ "epoch": 1.71,
249
+ "learning_rate": 0.00011492890995260664,
250
+ "loss": 0.7455,
251
+ "step": 360
252
+ },
253
+ {
254
+ "epoch": 1.75,
255
+ "learning_rate": 0.00011255924170616114,
256
+ "loss": 0.7108,
257
+ "step": 370
258
+ },
259
+ {
260
+ "epoch": 1.8,
261
+ "learning_rate": 0.00011018957345971565,
262
+ "loss": 0.6456,
263
+ "step": 380
264
+ },
265
+ {
266
+ "epoch": 1.85,
267
+ "learning_rate": 0.00010781990521327015,
268
+ "loss": 0.7036,
269
+ "step": 390
270
+ },
271
+ {
272
+ "epoch": 1.9,
273
+ "learning_rate": 0.00010545023696682465,
274
+ "loss": 0.7355,
275
+ "step": 400
276
+ },
277
+ {
278
+ "epoch": 1.9,
279
+ "eval_accuracy": 0.707345971563981,
280
+ "eval_loss": 0.7562642693519592,
281
+ "eval_runtime": 87.3813,
282
+ "eval_samples_per_second": 9.659,
283
+ "eval_steps_per_second": 1.213,
284
+ "step": 400
285
+ },
286
+ {
287
+ "epoch": 1.94,
288
+ "learning_rate": 0.00010308056872037915,
289
+ "loss": 0.6315,
290
+ "step": 410
291
+ },
292
+ {
293
+ "epoch": 1.99,
294
+ "learning_rate": 0.00010071090047393366,
295
+ "loss": 0.6996,
296
+ "step": 420
297
+ },
298
+ {
299
+ "epoch": 2.04,
300
+ "learning_rate": 9.834123222748816e-05,
301
+ "loss": 0.6379,
302
+ "step": 430
303
+ },
304
+ {
305
+ "epoch": 2.09,
306
+ "learning_rate": 9.597156398104266e-05,
307
+ "loss": 0.5765,
308
+ "step": 440
309
+ },
310
+ {
311
+ "epoch": 2.13,
312
+ "learning_rate": 9.360189573459716e-05,
313
+ "loss": 0.6309,
314
+ "step": 450
315
+ },
316
+ {
317
+ "epoch": 2.18,
318
+ "learning_rate": 9.123222748815167e-05,
319
+ "loss": 0.5836,
320
+ "step": 460
321
+ },
322
+ {
323
+ "epoch": 2.23,
324
+ "learning_rate": 8.886255924170617e-05,
325
+ "loss": 0.5075,
326
+ "step": 470
327
+ },
328
+ {
329
+ "epoch": 2.27,
330
+ "learning_rate": 8.649289099526067e-05,
331
+ "loss": 0.5817,
332
+ "step": 480
333
+ },
334
+ {
335
+ "epoch": 2.32,
336
+ "learning_rate": 8.412322274881517e-05,
337
+ "loss": 0.6765,
338
+ "step": 490
339
+ },
340
+ {
341
+ "epoch": 2.37,
342
+ "learning_rate": 8.175355450236967e-05,
343
+ "loss": 0.6532,
344
+ "step": 500
345
+ },
346
+ {
347
+ "epoch": 2.37,
348
+ "eval_accuracy": 0.6895734597156398,
349
+ "eval_loss": 0.7542913556098938,
350
+ "eval_runtime": 87.8093,
351
+ "eval_samples_per_second": 9.612,
352
+ "eval_steps_per_second": 1.207,
353
+ "step": 500
354
+ },
355
+ {
356
+ "epoch": 2.42,
357
+ "learning_rate": 7.938388625592418e-05,
358
+ "loss": 0.5203,
359
+ "step": 510
360
+ },
361
+ {
362
+ "epoch": 2.46,
363
+ "learning_rate": 7.701421800947868e-05,
364
+ "loss": 0.4772,
365
+ "step": 520
366
+ },
367
+ {
368
+ "epoch": 2.51,
369
+ "learning_rate": 7.464454976303318e-05,
370
+ "loss": 0.6391,
371
+ "step": 530
372
+ },
373
+ {
374
+ "epoch": 2.56,
375
+ "learning_rate": 7.227488151658768e-05,
376
+ "loss": 0.5908,
377
+ "step": 540
378
+ },
379
+ {
380
+ "epoch": 2.61,
381
+ "learning_rate": 6.990521327014218e-05,
382
+ "loss": 0.5459,
383
+ "step": 550
384
+ },
385
+ {
386
+ "epoch": 2.65,
387
+ "learning_rate": 6.753554502369669e-05,
388
+ "loss": 0.6922,
389
+ "step": 560
390
+ },
391
+ {
392
+ "epoch": 2.7,
393
+ "learning_rate": 6.516587677725119e-05,
394
+ "loss": 0.5738,
395
+ "step": 570
396
+ },
397
+ {
398
+ "epoch": 2.75,
399
+ "learning_rate": 6.279620853080569e-05,
400
+ "loss": 0.5743,
401
+ "step": 580
402
+ },
403
+ {
404
+ "epoch": 2.8,
405
+ "learning_rate": 6.0426540284360186e-05,
406
+ "loss": 0.5733,
407
+ "step": 590
408
+ },
409
+ {
410
+ "epoch": 2.84,
411
+ "learning_rate": 5.829383886255925e-05,
412
+ "loss": 0.5982,
413
+ "step": 600
414
+ },
415
+ {
416
+ "epoch": 2.84,
417
+ "eval_accuracy": 0.726303317535545,
418
+ "eval_loss": 0.7260138988494873,
419
+ "eval_runtime": 88.3787,
420
+ "eval_samples_per_second": 9.55,
421
+ "eval_steps_per_second": 1.199,
422
+ "step": 600
423
+ }
424
+ ],
425
+ "max_steps": 844,
426
+ "num_train_epochs": 4,
427
+ "total_flos": 7.422382333315031e+17,
428
+ "trial_name": null,
429
+ "trial_params": null
430
+ }
checkpoint-600/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c0decd387a5144fbf9b05b0832efcffff92fc5abe56dfe179661f63568600ca
3
+ size 3515
checkpoint-800/config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/vit-base-patch16-224-in21k",
3
+ "architectures": [
4
+ "ViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "0",
13
+ "1": "1",
14
+ "2": "2",
15
+ "3": "3",
16
+ "4": "4"
17
+ },
18
+ "image_size": 224,
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 3072,
21
+ "label2id": {
22
+ "0": "0",
23
+ "1": "1",
24
+ "2": "2",
25
+ "3": "3",
26
+ "4": "4"
27
+ },
28
+ "layer_norm_eps": 1e-12,
29
+ "model_type": "vit",
30
+ "num_attention_heads": 12,
31
+ "num_channels": 3,
32
+ "num_hidden_layers": 12,
33
+ "patch_size": 16,
34
+ "problem_type": "single_label_classification",
35
+ "qkv_bias": true,
36
+ "torch_dtype": "float32",
37
+ "transformers_version": "4.26.1"
38
+ }
checkpoint-800/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a673b36cd93c3cf1114dfd1bc3a2d2c5e863d0351e84aba8e4d6d338f9997378
3
+ size 686537349
checkpoint-800/preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_rescale": true,
4
+ "do_resize": true,
5
+ "image_mean": [
6
+ 0.5,
7
+ 0.5,
8
+ 0.5
9
+ ],
10
+ "image_processor_type": "ViTFeatureExtractor",
11
+ "image_std": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
+ ],
16
+ "resample": 2,
17
+ "rescale_factor": 0.00392156862745098,
18
+ "size": {
19
+ "height": 224,
20
+ "width": 224
21
+ }
22
+ }
checkpoint-800/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b1cda50565b33cc1eb3493f4806d1963f581e55afe1ccea67ea57b2fe7b3b3a
3
+ size 343277933
checkpoint-800/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51852b425c1e6dfce8e8cd545d7a990f1379b6092095bac9bede723f445f5fe9
3
+ size 14575
checkpoint-800/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:696a3dde5542590ab7d757eac6eef227d11bc974a194df5c80657ffa6905a8b6
3
+ size 557
checkpoint-800/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:429115b68a9bddc30ea4ab96f26b1acad0849d612b21d90baa452738ff327133
3
+ size 627
checkpoint-800/trainer_state.json ADDED
@@ -0,0 +1,568 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.7260138988494873,
3
+ "best_model_checkpoint": "./vit-base-beans-demo-v5/checkpoint-600",
4
+ "epoch": 3.7914691943127963,
5
+ "global_step": 800,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.05,
12
+ "learning_rate": 0.00019763033175355452,
13
+ "loss": 1.4193,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.09,
18
+ "learning_rate": 0.000195260663507109,
19
+ "loss": 1.0491,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.14,
24
+ "learning_rate": 0.00019289099526066352,
25
+ "loss": 1.1612,
26
+ "step": 30
27
+ },
28
+ {
29
+ "epoch": 0.19,
30
+ "learning_rate": 0.000190521327014218,
31
+ "loss": 1.0393,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 0.24,
36
+ "learning_rate": 0.00018815165876777252,
37
+ "loss": 1.0533,
38
+ "step": 50
39
+ },
40
+ {
41
+ "epoch": 0.28,
42
+ "learning_rate": 0.00018578199052132703,
43
+ "loss": 0.9506,
44
+ "step": 60
45
+ },
46
+ {
47
+ "epoch": 0.33,
48
+ "learning_rate": 0.00018341232227488152,
49
+ "loss": 0.9148,
50
+ "step": 70
51
+ },
52
+ {
53
+ "epoch": 0.38,
54
+ "learning_rate": 0.00018104265402843603,
55
+ "loss": 0.9735,
56
+ "step": 80
57
+ },
58
+ {
59
+ "epoch": 0.43,
60
+ "learning_rate": 0.00017890995260663508,
61
+ "loss": 0.783,
62
+ "step": 90
63
+ },
64
+ {
65
+ "epoch": 0.47,
66
+ "learning_rate": 0.0001765402843601896,
67
+ "loss": 0.9235,
68
+ "step": 100
69
+ },
70
+ {
71
+ "epoch": 0.47,
72
+ "eval_accuracy": 0.6492890995260664,
73
+ "eval_loss": 0.8972262740135193,
74
+ "eval_runtime": 90.3527,
75
+ "eval_samples_per_second": 9.341,
76
+ "eval_steps_per_second": 1.173,
77
+ "step": 100
78
+ },
79
+ {
80
+ "epoch": 0.52,
81
+ "learning_rate": 0.00017417061611374408,
82
+ "loss": 0.9931,
83
+ "step": 110
84
+ },
85
+ {
86
+ "epoch": 0.57,
87
+ "learning_rate": 0.0001718009478672986,
88
+ "loss": 1.0035,
89
+ "step": 120
90
+ },
91
+ {
92
+ "epoch": 0.62,
93
+ "learning_rate": 0.00016943127962085308,
94
+ "loss": 0.9619,
95
+ "step": 130
96
+ },
97
+ {
98
+ "epoch": 0.66,
99
+ "learning_rate": 0.0001670616113744076,
100
+ "loss": 0.8618,
101
+ "step": 140
102
+ },
103
+ {
104
+ "epoch": 0.71,
105
+ "learning_rate": 0.0001646919431279621,
106
+ "loss": 0.9862,
107
+ "step": 150
108
+ },
109
+ {
110
+ "epoch": 0.76,
111
+ "learning_rate": 0.0001623222748815166,
112
+ "loss": 0.9977,
113
+ "step": 160
114
+ },
115
+ {
116
+ "epoch": 0.81,
117
+ "learning_rate": 0.0001599526066350711,
118
+ "loss": 0.8715,
119
+ "step": 170
120
+ },
121
+ {
122
+ "epoch": 0.85,
123
+ "learning_rate": 0.0001575829383886256,
124
+ "loss": 0.9153,
125
+ "step": 180
126
+ },
127
+ {
128
+ "epoch": 0.9,
129
+ "learning_rate": 0.0001552132701421801,
130
+ "loss": 0.784,
131
+ "step": 190
132
+ },
133
+ {
134
+ "epoch": 0.95,
135
+ "learning_rate": 0.00015284360189573462,
136
+ "loss": 0.9253,
137
+ "step": 200
138
+ },
139
+ {
140
+ "epoch": 0.95,
141
+ "eval_accuracy": 0.6635071090047393,
142
+ "eval_loss": 0.9150586128234863,
143
+ "eval_runtime": 91.0647,
144
+ "eval_samples_per_second": 9.268,
145
+ "eval_steps_per_second": 1.164,
146
+ "step": 200
147
+ },
148
+ {
149
+ "epoch": 1.0,
150
+ "learning_rate": 0.0001504739336492891,
151
+ "loss": 0.8305,
152
+ "step": 210
153
+ },
154
+ {
155
+ "epoch": 1.04,
156
+ "learning_rate": 0.0001481042654028436,
157
+ "loss": 0.8379,
158
+ "step": 220
159
+ },
160
+ {
161
+ "epoch": 1.09,
162
+ "learning_rate": 0.0001457345971563981,
163
+ "loss": 0.707,
164
+ "step": 230
165
+ },
166
+ {
167
+ "epoch": 1.14,
168
+ "learning_rate": 0.0001433649289099526,
169
+ "loss": 0.8205,
170
+ "step": 240
171
+ },
172
+ {
173
+ "epoch": 1.18,
174
+ "learning_rate": 0.00014099526066350712,
175
+ "loss": 0.7565,
176
+ "step": 250
177
+ },
178
+ {
179
+ "epoch": 1.23,
180
+ "learning_rate": 0.0001386255924170616,
181
+ "loss": 0.7539,
182
+ "step": 260
183
+ },
184
+ {
185
+ "epoch": 1.28,
186
+ "learning_rate": 0.00013625592417061612,
187
+ "loss": 0.9389,
188
+ "step": 270
189
+ },
190
+ {
191
+ "epoch": 1.33,
192
+ "learning_rate": 0.0001338862559241706,
193
+ "loss": 0.8062,
194
+ "step": 280
195
+ },
196
+ {
197
+ "epoch": 1.37,
198
+ "learning_rate": 0.00013151658767772512,
199
+ "loss": 0.7702,
200
+ "step": 290
201
+ },
202
+ {
203
+ "epoch": 1.42,
204
+ "learning_rate": 0.00012914691943127963,
205
+ "loss": 0.8371,
206
+ "step": 300
207
+ },
208
+ {
209
+ "epoch": 1.42,
210
+ "eval_accuracy": 0.693127962085308,
211
+ "eval_loss": 0.8071145415306091,
212
+ "eval_runtime": 87.0572,
213
+ "eval_samples_per_second": 9.695,
214
+ "eval_steps_per_second": 1.218,
215
+ "step": 300
216
+ },
217
+ {
218
+ "epoch": 1.47,
219
+ "learning_rate": 0.00012677725118483412,
220
+ "loss": 0.7698,
221
+ "step": 310
222
+ },
223
+ {
224
+ "epoch": 1.52,
225
+ "learning_rate": 0.00012440758293838863,
226
+ "loss": 0.8412,
227
+ "step": 320
228
+ },
229
+ {
230
+ "epoch": 1.56,
231
+ "learning_rate": 0.00012203791469194314,
232
+ "loss": 0.8268,
233
+ "step": 330
234
+ },
235
+ {
236
+ "epoch": 1.61,
237
+ "learning_rate": 0.00011966824644549763,
238
+ "loss": 0.6359,
239
+ "step": 340
240
+ },
241
+ {
242
+ "epoch": 1.66,
243
+ "learning_rate": 0.00011729857819905214,
244
+ "loss": 0.6765,
245
+ "step": 350
246
+ },
247
+ {
248
+ "epoch": 1.71,
249
+ "learning_rate": 0.00011492890995260664,
250
+ "loss": 0.7455,
251
+ "step": 360
252
+ },
253
+ {
254
+ "epoch": 1.75,
255
+ "learning_rate": 0.00011255924170616114,
256
+ "loss": 0.7108,
257
+ "step": 370
258
+ },
259
+ {
260
+ "epoch": 1.8,
261
+ "learning_rate": 0.00011018957345971565,
262
+ "loss": 0.6456,
263
+ "step": 380
264
+ },
265
+ {
266
+ "epoch": 1.85,
267
+ "learning_rate": 0.00010781990521327015,
268
+ "loss": 0.7036,
269
+ "step": 390
270
+ },
271
+ {
272
+ "epoch": 1.9,
273
+ "learning_rate": 0.00010545023696682465,
274
+ "loss": 0.7355,
275
+ "step": 400
276
+ },
277
+ {
278
+ "epoch": 1.9,
279
+ "eval_accuracy": 0.707345971563981,
280
+ "eval_loss": 0.7562642693519592,
281
+ "eval_runtime": 87.3813,
282
+ "eval_samples_per_second": 9.659,
283
+ "eval_steps_per_second": 1.213,
284
+ "step": 400
285
+ },
286
+ {
287
+ "epoch": 1.94,
288
+ "learning_rate": 0.00010308056872037915,
289
+ "loss": 0.6315,
290
+ "step": 410
291
+ },
292
+ {
293
+ "epoch": 1.99,
294
+ "learning_rate": 0.00010071090047393366,
295
+ "loss": 0.6996,
296
+ "step": 420
297
+ },
298
+ {
299
+ "epoch": 2.04,
300
+ "learning_rate": 9.834123222748816e-05,
301
+ "loss": 0.6379,
302
+ "step": 430
303
+ },
304
+ {
305
+ "epoch": 2.09,
306
+ "learning_rate": 9.597156398104266e-05,
307
+ "loss": 0.5765,
308
+ "step": 440
309
+ },
310
+ {
311
+ "epoch": 2.13,
312
+ "learning_rate": 9.360189573459716e-05,
313
+ "loss": 0.6309,
314
+ "step": 450
315
+ },
316
+ {
317
+ "epoch": 2.18,
318
+ "learning_rate": 9.123222748815167e-05,
319
+ "loss": 0.5836,
320
+ "step": 460
321
+ },
322
+ {
323
+ "epoch": 2.23,
324
+ "learning_rate": 8.886255924170617e-05,
325
+ "loss": 0.5075,
326
+ "step": 470
327
+ },
328
+ {
329
+ "epoch": 2.27,
330
+ "learning_rate": 8.649289099526067e-05,
331
+ "loss": 0.5817,
332
+ "step": 480
333
+ },
334
+ {
335
+ "epoch": 2.32,
336
+ "learning_rate": 8.412322274881517e-05,
337
+ "loss": 0.6765,
338
+ "step": 490
339
+ },
340
+ {
341
+ "epoch": 2.37,
342
+ "learning_rate": 8.175355450236967e-05,
343
+ "loss": 0.6532,
344
+ "step": 500
345
+ },
346
+ {
347
+ "epoch": 2.37,
348
+ "eval_accuracy": 0.6895734597156398,
349
+ "eval_loss": 0.7542913556098938,
350
+ "eval_runtime": 87.8093,
351
+ "eval_samples_per_second": 9.612,
352
+ "eval_steps_per_second": 1.207,
353
+ "step": 500
354
+ },
355
+ {
356
+ "epoch": 2.42,
357
+ "learning_rate": 7.938388625592418e-05,
358
+ "loss": 0.5203,
359
+ "step": 510
360
+ },
361
+ {
362
+ "epoch": 2.46,
363
+ "learning_rate": 7.701421800947868e-05,
364
+ "loss": 0.4772,
365
+ "step": 520
366
+ },
367
+ {
368
+ "epoch": 2.51,
369
+ "learning_rate": 7.464454976303318e-05,
370
+ "loss": 0.6391,
371
+ "step": 530
372
+ },
373
+ {
374
+ "epoch": 2.56,
375
+ "learning_rate": 7.227488151658768e-05,
376
+ "loss": 0.5908,
377
+ "step": 540
378
+ },
379
+ {
380
+ "epoch": 2.61,
381
+ "learning_rate": 6.990521327014218e-05,
382
+ "loss": 0.5459,
383
+ "step": 550
384
+ },
385
+ {
386
+ "epoch": 2.65,
387
+ "learning_rate": 6.753554502369669e-05,
388
+ "loss": 0.6922,
389
+ "step": 560
390
+ },
391
+ {
392
+ "epoch": 2.7,
393
+ "learning_rate": 6.516587677725119e-05,
394
+ "loss": 0.5738,
395
+ "step": 570
396
+ },
397
+ {
398
+ "epoch": 2.75,
399
+ "learning_rate": 6.279620853080569e-05,
400
+ "loss": 0.5743,
401
+ "step": 580
402
+ },
403
+ {
404
+ "epoch": 2.8,
405
+ "learning_rate": 6.0426540284360186e-05,
406
+ "loss": 0.5733,
407
+ "step": 590
408
+ },
409
+ {
410
+ "epoch": 2.84,
411
+ "learning_rate": 5.829383886255925e-05,
412
+ "loss": 0.5982,
413
+ "step": 600
414
+ },
415
+ {
416
+ "epoch": 2.84,
417
+ "eval_accuracy": 0.726303317535545,
418
+ "eval_loss": 0.7260138988494873,
419
+ "eval_runtime": 88.3787,
420
+ "eval_samples_per_second": 9.55,
421
+ "eval_steps_per_second": 1.199,
422
+ "step": 600
423
+ },
424
+ {
425
+ "epoch": 2.89,
426
+ "learning_rate": 5.592417061611375e-05,
427
+ "loss": 0.6154,
428
+ "step": 610
429
+ },
430
+ {
431
+ "epoch": 2.94,
432
+ "learning_rate": 5.355450236966825e-05,
433
+ "loss": 0.577,
434
+ "step": 620
435
+ },
436
+ {
437
+ "epoch": 2.99,
438
+ "learning_rate": 5.1184834123222746e-05,
439
+ "loss": 0.5146,
440
+ "step": 630
441
+ },
442
+ {
443
+ "epoch": 3.03,
444
+ "learning_rate": 4.881516587677725e-05,
445
+ "loss": 0.43,
446
+ "step": 640
447
+ },
448
+ {
449
+ "epoch": 3.08,
450
+ "learning_rate": 4.644549763033176e-05,
451
+ "loss": 0.4428,
452
+ "step": 650
453
+ },
454
+ {
455
+ "epoch": 3.13,
456
+ "learning_rate": 4.407582938388626e-05,
457
+ "loss": 0.4397,
458
+ "step": 660
459
+ },
460
+ {
461
+ "epoch": 3.18,
462
+ "learning_rate": 4.1706161137440756e-05,
463
+ "loss": 0.3049,
464
+ "step": 670
465
+ },
466
+ {
467
+ "epoch": 3.22,
468
+ "learning_rate": 3.933649289099526e-05,
469
+ "loss": 0.3993,
470
+ "step": 680
471
+ },
472
+ {
473
+ "epoch": 3.27,
474
+ "learning_rate": 3.696682464454976e-05,
475
+ "loss": 0.4284,
476
+ "step": 690
477
+ },
478
+ {
479
+ "epoch": 3.32,
480
+ "learning_rate": 3.4597156398104267e-05,
481
+ "loss": 0.4276,
482
+ "step": 700
483
+ },
484
+ {
485
+ "epoch": 3.32,
486
+ "eval_accuracy": 0.7239336492890995,
487
+ "eval_loss": 0.7345680594444275,
488
+ "eval_runtime": 87.1766,
489
+ "eval_samples_per_second": 9.681,
490
+ "eval_steps_per_second": 1.216,
491
+ "step": 700
492
+ },
493
+ {
494
+ "epoch": 3.36,
495
+ "learning_rate": 3.222748815165877e-05,
496
+ "loss": 0.3917,
497
+ "step": 710
498
+ },
499
+ {
500
+ "epoch": 3.41,
501
+ "learning_rate": 2.9857819905213268e-05,
502
+ "loss": 0.3747,
503
+ "step": 720
504
+ },
505
+ {
506
+ "epoch": 3.46,
507
+ "learning_rate": 2.7488151658767774e-05,
508
+ "loss": 0.3327,
509
+ "step": 730
510
+ },
511
+ {
512
+ "epoch": 3.51,
513
+ "learning_rate": 2.5118483412322273e-05,
514
+ "loss": 0.3647,
515
+ "step": 740
516
+ },
517
+ {
518
+ "epoch": 3.55,
519
+ "learning_rate": 2.274881516587678e-05,
520
+ "loss": 0.3785,
521
+ "step": 750
522
+ },
523
+ {
524
+ "epoch": 3.6,
525
+ "learning_rate": 2.037914691943128e-05,
526
+ "loss": 0.4262,
527
+ "step": 760
528
+ },
529
+ {
530
+ "epoch": 3.65,
531
+ "learning_rate": 1.8009478672985784e-05,
532
+ "loss": 0.4531,
533
+ "step": 770
534
+ },
535
+ {
536
+ "epoch": 3.7,
537
+ "learning_rate": 1.5639810426540286e-05,
538
+ "loss": 0.3191,
539
+ "step": 780
540
+ },
541
+ {
542
+ "epoch": 3.74,
543
+ "learning_rate": 1.3270142180094788e-05,
544
+ "loss": 0.427,
545
+ "step": 790
546
+ },
547
+ {
548
+ "epoch": 3.79,
549
+ "learning_rate": 1.0900473933649289e-05,
550
+ "loss": 0.4935,
551
+ "step": 800
552
+ },
553
+ {
554
+ "epoch": 3.79,
555
+ "eval_accuracy": 0.7132701421800948,
556
+ "eval_loss": 0.749038577079773,
557
+ "eval_runtime": 86.4859,
558
+ "eval_samples_per_second": 9.759,
559
+ "eval_steps_per_second": 1.226,
560
+ "step": 800
561
+ }
562
+ ],
563
+ "max_steps": 844,
564
+ "num_train_epochs": 4,
565
+ "total_flos": 9.893668328401859e+17,
566
+ "trial_name": null,
567
+ "trial_params": null
568
+ }
checkpoint-800/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c0decd387a5144fbf9b05b0832efcffff92fc5abe56dfe179661f63568600ca
3
+ size 3515
config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/vit-base-patch16-224-in21k",
3
+ "architectures": [
4
+ "ViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "0",
13
+ "1": "1",
14
+ "2": "2",
15
+ "3": "3",
16
+ "4": "4"
17
+ },
18
+ "image_size": 224,
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 3072,
21
+ "label2id": {
22
+ "0": "0",
23
+ "1": "1",
24
+ "2": "2",
25
+ "3": "3",
26
+ "4": "4"
27
+ },
28
+ "layer_norm_eps": 1e-12,
29
+ "model_type": "vit",
30
+ "num_attention_heads": 12,
31
+ "num_channels": 3,
32
+ "num_hidden_layers": 12,
33
+ "patch_size": 16,
34
+ "problem_type": "single_label_classification",
35
+ "qkv_bias": true,
36
+ "torch_dtype": "float32",
37
+ "transformers_version": "4.26.1"
38
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.0,
3
+ "eval_accuracy": 0.726303317535545,
4
+ "eval_loss": 0.7260138988494873,
5
+ "eval_runtime": 88.4264,
6
+ "eval_samples_per_second": 9.545,
7
+ "eval_steps_per_second": 1.199
8
+ }
preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_rescale": true,
4
+ "do_resize": true,
5
+ "image_mean": [
6
+ 0.5,
7
+ 0.5,
8
+ 0.5
9
+ ],
10
+ "image_processor_type": "ViTFeatureExtractor",
11
+ "image_std": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
+ ],
16
+ "resample": 2,
17
+ "rescale_factor": 0.00392156862745098,
18
+ "size": {
19
+ "height": 224,
20
+ "width": 224
21
+ }
22
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cd8865690927931e71e4fe5f7cd1c001f5e3c322f7cfe9c0e0faabaf2e20126
3
+ size 343277933
runs/Feb24_15-21-30_27e8a3adc154/1677252096.5539162/events.out.tfevents.1677252096.27e8a3adc154.110.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bd9a93da192b1acdf96db4c6aa21a8b9334ec6fce4dbbb3e87f32544da1f12f
3
+ size 5698
runs/Feb24_15-21-30_27e8a3adc154/events.out.tfevents.1677252096.27e8a3adc154.110.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e68cc72337c9ecf97632f87ebd60fac3f743aa6ace71e5318befea77cbf6118
3
+ size 19976
runs/Feb24_15-21-30_27e8a3adc154/events.out.tfevents.1677254593.27e8a3adc154.110.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22dd62bef4427b3920a329013fda494769dbba0f60edf9f9d15bdbb91e8df0c1
3
+ size 363
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.0,
3
+ "total_flos": 1.0430702255838413e+18,
4
+ "train_loss": 0.674277721140622,
5
+ "train_runtime": 2407.857,
6
+ "train_samples_per_second": 5.59,
7
+ "train_steps_per_second": 0.351
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,601 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.7260138988494873,
3
+ "best_model_checkpoint": "./vit-base-beans-demo-v5/checkpoint-600",
4
+ "epoch": 4.0,
5
+ "global_step": 844,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.05,
12
+ "learning_rate": 0.00019763033175355452,
13
+ "loss": 1.4193,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.09,
18
+ "learning_rate": 0.000195260663507109,
19
+ "loss": 1.0491,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.14,
24
+ "learning_rate": 0.00019289099526066352,
25
+ "loss": 1.1612,
26
+ "step": 30
27
+ },
28
+ {
29
+ "epoch": 0.19,
30
+ "learning_rate": 0.000190521327014218,
31
+ "loss": 1.0393,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 0.24,
36
+ "learning_rate": 0.00018815165876777252,
37
+ "loss": 1.0533,
38
+ "step": 50
39
+ },
40
+ {
41
+ "epoch": 0.28,
42
+ "learning_rate": 0.00018578199052132703,
43
+ "loss": 0.9506,
44
+ "step": 60
45
+ },
46
+ {
47
+ "epoch": 0.33,
48
+ "learning_rate": 0.00018341232227488152,
49
+ "loss": 0.9148,
50
+ "step": 70
51
+ },
52
+ {
53
+ "epoch": 0.38,
54
+ "learning_rate": 0.00018104265402843603,
55
+ "loss": 0.9735,
56
+ "step": 80
57
+ },
58
+ {
59
+ "epoch": 0.43,
60
+ "learning_rate": 0.00017890995260663508,
61
+ "loss": 0.783,
62
+ "step": 90
63
+ },
64
+ {
65
+ "epoch": 0.47,
66
+ "learning_rate": 0.0001765402843601896,
67
+ "loss": 0.9235,
68
+ "step": 100
69
+ },
70
+ {
71
+ "epoch": 0.47,
72
+ "eval_accuracy": 0.6492890995260664,
73
+ "eval_loss": 0.8972262740135193,
74
+ "eval_runtime": 90.3527,
75
+ "eval_samples_per_second": 9.341,
76
+ "eval_steps_per_second": 1.173,
77
+ "step": 100
78
+ },
79
+ {
80
+ "epoch": 0.52,
81
+ "learning_rate": 0.00017417061611374408,
82
+ "loss": 0.9931,
83
+ "step": 110
84
+ },
85
+ {
86
+ "epoch": 0.57,
87
+ "learning_rate": 0.0001718009478672986,
88
+ "loss": 1.0035,
89
+ "step": 120
90
+ },
91
+ {
92
+ "epoch": 0.62,
93
+ "learning_rate": 0.00016943127962085308,
94
+ "loss": 0.9619,
95
+ "step": 130
96
+ },
97
+ {
98
+ "epoch": 0.66,
99
+ "learning_rate": 0.0001670616113744076,
100
+ "loss": 0.8618,
101
+ "step": 140
102
+ },
103
+ {
104
+ "epoch": 0.71,
105
+ "learning_rate": 0.0001646919431279621,
106
+ "loss": 0.9862,
107
+ "step": 150
108
+ },
109
+ {
110
+ "epoch": 0.76,
111
+ "learning_rate": 0.0001623222748815166,
112
+ "loss": 0.9977,
113
+ "step": 160
114
+ },
115
+ {
116
+ "epoch": 0.81,
117
+ "learning_rate": 0.0001599526066350711,
118
+ "loss": 0.8715,
119
+ "step": 170
120
+ },
121
+ {
122
+ "epoch": 0.85,
123
+ "learning_rate": 0.0001575829383886256,
124
+ "loss": 0.9153,
125
+ "step": 180
126
+ },
127
+ {
128
+ "epoch": 0.9,
129
+ "learning_rate": 0.0001552132701421801,
130
+ "loss": 0.784,
131
+ "step": 190
132
+ },
133
+ {
134
+ "epoch": 0.95,
135
+ "learning_rate": 0.00015284360189573462,
136
+ "loss": 0.9253,
137
+ "step": 200
138
+ },
139
+ {
140
+ "epoch": 0.95,
141
+ "eval_accuracy": 0.6635071090047393,
142
+ "eval_loss": 0.9150586128234863,
143
+ "eval_runtime": 91.0647,
144
+ "eval_samples_per_second": 9.268,
145
+ "eval_steps_per_second": 1.164,
146
+ "step": 200
147
+ },
148
+ {
149
+ "epoch": 1.0,
150
+ "learning_rate": 0.0001504739336492891,
151
+ "loss": 0.8305,
152
+ "step": 210
153
+ },
154
+ {
155
+ "epoch": 1.04,
156
+ "learning_rate": 0.0001481042654028436,
157
+ "loss": 0.8379,
158
+ "step": 220
159
+ },
160
+ {
161
+ "epoch": 1.09,
162
+ "learning_rate": 0.0001457345971563981,
163
+ "loss": 0.707,
164
+ "step": 230
165
+ },
166
+ {
167
+ "epoch": 1.14,
168
+ "learning_rate": 0.0001433649289099526,
169
+ "loss": 0.8205,
170
+ "step": 240
171
+ },
172
+ {
173
+ "epoch": 1.18,
174
+ "learning_rate": 0.00014099526066350712,
175
+ "loss": 0.7565,
176
+ "step": 250
177
+ },
178
+ {
179
+ "epoch": 1.23,
180
+ "learning_rate": 0.0001386255924170616,
181
+ "loss": 0.7539,
182
+ "step": 260
183
+ },
184
+ {
185
+ "epoch": 1.28,
186
+ "learning_rate": 0.00013625592417061612,
187
+ "loss": 0.9389,
188
+ "step": 270
189
+ },
190
+ {
191
+ "epoch": 1.33,
192
+ "learning_rate": 0.0001338862559241706,
193
+ "loss": 0.8062,
194
+ "step": 280
195
+ },
196
+ {
197
+ "epoch": 1.37,
198
+ "learning_rate": 0.00013151658767772512,
199
+ "loss": 0.7702,
200
+ "step": 290
201
+ },
202
+ {
203
+ "epoch": 1.42,
204
+ "learning_rate": 0.00012914691943127963,
205
+ "loss": 0.8371,
206
+ "step": 300
207
+ },
208
+ {
209
+ "epoch": 1.42,
210
+ "eval_accuracy": 0.693127962085308,
211
+ "eval_loss": 0.8071145415306091,
212
+ "eval_runtime": 87.0572,
213
+ "eval_samples_per_second": 9.695,
214
+ "eval_steps_per_second": 1.218,
215
+ "step": 300
216
+ },
217
+ {
218
+ "epoch": 1.47,
219
+ "learning_rate": 0.00012677725118483412,
220
+ "loss": 0.7698,
221
+ "step": 310
222
+ },
223
+ {
224
+ "epoch": 1.52,
225
+ "learning_rate": 0.00012440758293838863,
226
+ "loss": 0.8412,
227
+ "step": 320
228
+ },
229
+ {
230
+ "epoch": 1.56,
231
+ "learning_rate": 0.00012203791469194314,
232
+ "loss": 0.8268,
233
+ "step": 330
234
+ },
235
+ {
236
+ "epoch": 1.61,
237
+ "learning_rate": 0.00011966824644549763,
238
+ "loss": 0.6359,
239
+ "step": 340
240
+ },
241
+ {
242
+ "epoch": 1.66,
243
+ "learning_rate": 0.00011729857819905214,
244
+ "loss": 0.6765,
245
+ "step": 350
246
+ },
247
+ {
248
+ "epoch": 1.71,
249
+ "learning_rate": 0.00011492890995260664,
250
+ "loss": 0.7455,
251
+ "step": 360
252
+ },
253
+ {
254
+ "epoch": 1.75,
255
+ "learning_rate": 0.00011255924170616114,
256
+ "loss": 0.7108,
257
+ "step": 370
258
+ },
259
+ {
260
+ "epoch": 1.8,
261
+ "learning_rate": 0.00011018957345971565,
262
+ "loss": 0.6456,
263
+ "step": 380
264
+ },
265
+ {
266
+ "epoch": 1.85,
267
+ "learning_rate": 0.00010781990521327015,
268
+ "loss": 0.7036,
269
+ "step": 390
270
+ },
271
+ {
272
+ "epoch": 1.9,
273
+ "learning_rate": 0.00010545023696682465,
274
+ "loss": 0.7355,
275
+ "step": 400
276
+ },
277
+ {
278
+ "epoch": 1.9,
279
+ "eval_accuracy": 0.707345971563981,
280
+ "eval_loss": 0.7562642693519592,
281
+ "eval_runtime": 87.3813,
282
+ "eval_samples_per_second": 9.659,
283
+ "eval_steps_per_second": 1.213,
284
+ "step": 400
285
+ },
286
+ {
287
+ "epoch": 1.94,
288
+ "learning_rate": 0.00010308056872037915,
289
+ "loss": 0.6315,
290
+ "step": 410
291
+ },
292
+ {
293
+ "epoch": 1.99,
294
+ "learning_rate": 0.00010071090047393366,
295
+ "loss": 0.6996,
296
+ "step": 420
297
+ },
298
+ {
299
+ "epoch": 2.04,
300
+ "learning_rate": 9.834123222748816e-05,
301
+ "loss": 0.6379,
302
+ "step": 430
303
+ },
304
+ {
305
+ "epoch": 2.09,
306
+ "learning_rate": 9.597156398104266e-05,
307
+ "loss": 0.5765,
308
+ "step": 440
309
+ },
310
+ {
311
+ "epoch": 2.13,
312
+ "learning_rate": 9.360189573459716e-05,
313
+ "loss": 0.6309,
314
+ "step": 450
315
+ },
316
+ {
317
+ "epoch": 2.18,
318
+ "learning_rate": 9.123222748815167e-05,
319
+ "loss": 0.5836,
320
+ "step": 460
321
+ },
322
+ {
323
+ "epoch": 2.23,
324
+ "learning_rate": 8.886255924170617e-05,
325
+ "loss": 0.5075,
326
+ "step": 470
327
+ },
328
+ {
329
+ "epoch": 2.27,
330
+ "learning_rate": 8.649289099526067e-05,
331
+ "loss": 0.5817,
332
+ "step": 480
333
+ },
334
+ {
335
+ "epoch": 2.32,
336
+ "learning_rate": 8.412322274881517e-05,
337
+ "loss": 0.6765,
338
+ "step": 490
339
+ },
340
+ {
341
+ "epoch": 2.37,
342
+ "learning_rate": 8.175355450236967e-05,
343
+ "loss": 0.6532,
344
+ "step": 500
345
+ },
346
+ {
347
+ "epoch": 2.37,
348
+ "eval_accuracy": 0.6895734597156398,
349
+ "eval_loss": 0.7542913556098938,
350
+ "eval_runtime": 87.8093,
351
+ "eval_samples_per_second": 9.612,
352
+ "eval_steps_per_second": 1.207,
353
+ "step": 500
354
+ },
355
+ {
356
+ "epoch": 2.42,
357
+ "learning_rate": 7.938388625592418e-05,
358
+ "loss": 0.5203,
359
+ "step": 510
360
+ },
361
+ {
362
+ "epoch": 2.46,
363
+ "learning_rate": 7.701421800947868e-05,
364
+ "loss": 0.4772,
365
+ "step": 520
366
+ },
367
+ {
368
+ "epoch": 2.51,
369
+ "learning_rate": 7.464454976303318e-05,
370
+ "loss": 0.6391,
371
+ "step": 530
372
+ },
373
+ {
374
+ "epoch": 2.56,
375
+ "learning_rate": 7.227488151658768e-05,
376
+ "loss": 0.5908,
377
+ "step": 540
378
+ },
379
+ {
380
+ "epoch": 2.61,
381
+ "learning_rate": 6.990521327014218e-05,
382
+ "loss": 0.5459,
383
+ "step": 550
384
+ },
385
+ {
386
+ "epoch": 2.65,
387
+ "learning_rate": 6.753554502369669e-05,
388
+ "loss": 0.6922,
389
+ "step": 560
390
+ },
391
+ {
392
+ "epoch": 2.7,
393
+ "learning_rate": 6.516587677725119e-05,
394
+ "loss": 0.5738,
395
+ "step": 570
396
+ },
397
+ {
398
+ "epoch": 2.75,
399
+ "learning_rate": 6.279620853080569e-05,
400
+ "loss": 0.5743,
401
+ "step": 580
402
+ },
403
+ {
404
+ "epoch": 2.8,
405
+ "learning_rate": 6.0426540284360186e-05,
406
+ "loss": 0.5733,
407
+ "step": 590
408
+ },
409
+ {
410
+ "epoch": 2.84,
411
+ "learning_rate": 5.829383886255925e-05,
412
+ "loss": 0.5982,
413
+ "step": 600
414
+ },
415
+ {
416
+ "epoch": 2.84,
417
+ "eval_accuracy": 0.726303317535545,
418
+ "eval_loss": 0.7260138988494873,
419
+ "eval_runtime": 88.3787,
420
+ "eval_samples_per_second": 9.55,
421
+ "eval_steps_per_second": 1.199,
422
+ "step": 600
423
+ },
424
+ {
425
+ "epoch": 2.89,
426
+ "learning_rate": 5.592417061611375e-05,
427
+ "loss": 0.6154,
428
+ "step": 610
429
+ },
430
+ {
431
+ "epoch": 2.94,
432
+ "learning_rate": 5.355450236966825e-05,
433
+ "loss": 0.577,
434
+ "step": 620
435
+ },
436
+ {
437
+ "epoch": 2.99,
438
+ "learning_rate": 5.1184834123222746e-05,
439
+ "loss": 0.5146,
440
+ "step": 630
441
+ },
442
+ {
443
+ "epoch": 3.03,
444
+ "learning_rate": 4.881516587677725e-05,
445
+ "loss": 0.43,
446
+ "step": 640
447
+ },
448
+ {
449
+ "epoch": 3.08,
450
+ "learning_rate": 4.644549763033176e-05,
451
+ "loss": 0.4428,
452
+ "step": 650
453
+ },
454
+ {
455
+ "epoch": 3.13,
456
+ "learning_rate": 4.407582938388626e-05,
457
+ "loss": 0.4397,
458
+ "step": 660
459
+ },
460
+ {
461
+ "epoch": 3.18,
462
+ "learning_rate": 4.1706161137440756e-05,
463
+ "loss": 0.3049,
464
+ "step": 670
465
+ },
466
+ {
467
+ "epoch": 3.22,
468
+ "learning_rate": 3.933649289099526e-05,
469
+ "loss": 0.3993,
470
+ "step": 680
471
+ },
472
+ {
473
+ "epoch": 3.27,
474
+ "learning_rate": 3.696682464454976e-05,
475
+ "loss": 0.4284,
476
+ "step": 690
477
+ },
478
+ {
479
+ "epoch": 3.32,
480
+ "learning_rate": 3.4597156398104267e-05,
481
+ "loss": 0.4276,
482
+ "step": 700
483
+ },
484
+ {
485
+ "epoch": 3.32,
486
+ "eval_accuracy": 0.7239336492890995,
487
+ "eval_loss": 0.7345680594444275,
488
+ "eval_runtime": 87.1766,
489
+ "eval_samples_per_second": 9.681,
490
+ "eval_steps_per_second": 1.216,
491
+ "step": 700
492
+ },
493
+ {
494
+ "epoch": 3.36,
495
+ "learning_rate": 3.222748815165877e-05,
496
+ "loss": 0.3917,
497
+ "step": 710
498
+ },
499
+ {
500
+ "epoch": 3.41,
501
+ "learning_rate": 2.9857819905213268e-05,
502
+ "loss": 0.3747,
503
+ "step": 720
504
+ },
505
+ {
506
+ "epoch": 3.46,
507
+ "learning_rate": 2.7488151658767774e-05,
508
+ "loss": 0.3327,
509
+ "step": 730
510
+ },
511
+ {
512
+ "epoch": 3.51,
513
+ "learning_rate": 2.5118483412322273e-05,
514
+ "loss": 0.3647,
515
+ "step": 740
516
+ },
517
+ {
518
+ "epoch": 3.55,
519
+ "learning_rate": 2.274881516587678e-05,
520
+ "loss": 0.3785,
521
+ "step": 750
522
+ },
523
+ {
524
+ "epoch": 3.6,
525
+ "learning_rate": 2.037914691943128e-05,
526
+ "loss": 0.4262,
527
+ "step": 760
528
+ },
529
+ {
530
+ "epoch": 3.65,
531
+ "learning_rate": 1.8009478672985784e-05,
532
+ "loss": 0.4531,
533
+ "step": 770
534
+ },
535
+ {
536
+ "epoch": 3.7,
537
+ "learning_rate": 1.5639810426540286e-05,
538
+ "loss": 0.3191,
539
+ "step": 780
540
+ },
541
+ {
542
+ "epoch": 3.74,
543
+ "learning_rate": 1.3270142180094788e-05,
544
+ "loss": 0.427,
545
+ "step": 790
546
+ },
547
+ {
548
+ "epoch": 3.79,
549
+ "learning_rate": 1.0900473933649289e-05,
550
+ "loss": 0.4935,
551
+ "step": 800
552
+ },
553
+ {
554
+ "epoch": 3.79,
555
+ "eval_accuracy": 0.7132701421800948,
556
+ "eval_loss": 0.749038577079773,
557
+ "eval_runtime": 86.4859,
558
+ "eval_samples_per_second": 9.759,
559
+ "eval_steps_per_second": 1.226,
560
+ "step": 800
561
+ },
562
+ {
563
+ "epoch": 3.84,
564
+ "learning_rate": 8.530805687203793e-06,
565
+ "loss": 0.3046,
566
+ "step": 810
567
+ },
568
+ {
569
+ "epoch": 3.89,
570
+ "learning_rate": 6.161137440758294e-06,
571
+ "loss": 0.3368,
572
+ "step": 820
573
+ },
574
+ {
575
+ "epoch": 3.93,
576
+ "learning_rate": 3.791469194312797e-06,
577
+ "loss": 0.3309,
578
+ "step": 830
579
+ },
580
+ {
581
+ "epoch": 3.98,
582
+ "learning_rate": 1.4218009478672987e-06,
583
+ "loss": 0.3304,
584
+ "step": 840
585
+ },
586
+ {
587
+ "epoch": 4.0,
588
+ "step": 844,
589
+ "total_flos": 1.0430702255838413e+18,
590
+ "train_loss": 0.674277721140622,
591
+ "train_runtime": 2407.857,
592
+ "train_samples_per_second": 5.59,
593
+ "train_steps_per_second": 0.351
594
+ }
595
+ ],
596
+ "max_steps": 844,
597
+ "num_train_epochs": 4,
598
+ "total_flos": 1.0430702255838413e+18,
599
+ "trial_name": null,
600
+ "trial_params": null
601
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c0decd387a5144fbf9b05b0832efcffff92fc5abe56dfe179661f63568600ca
3
+ size 3515