dima806 commited on
Commit
ce07923
1 Parent(s): 6f9d8f1

Upload folder using huggingface_hub

Browse files
checkpoint-2340/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/vit-base-patch16-224-in21k",
3
+ "architectures": [
4
+ "ViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "food",
13
+ "1": "beverage"
14
+ },
15
+ "image_size": 224,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 3072,
18
+ "label2id": {
19
+ "beverage": 1,
20
+ "food": 0
21
+ },
22
+ "layer_norm_eps": 1e-12,
23
+ "model_type": "vit",
24
+ "num_attention_heads": 12,
25
+ "num_channels": 3,
26
+ "num_hidden_layers": 12,
27
+ "patch_size": 16,
28
+ "problem_type": "single_label_classification",
29
+ "qkv_bias": true,
30
+ "torch_dtype": "float32",
31
+ "transformers_version": "4.34.0"
32
+ }
checkpoint-2340/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a32eb39caf85afc1fa00fc69e5a2df8e27c5ff99b15948d1cd8701399d10992
3
+ size 686568453
checkpoint-2340/preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_rescale": true,
4
+ "do_resize": true,
5
+ "image_mean": [
6
+ 0.5,
7
+ 0.5,
8
+ 0.5
9
+ ],
10
+ "image_processor_type": "ViTImageProcessor",
11
+ "image_std": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
+ ],
16
+ "resample": 2,
17
+ "rescale_factor": 0.00392156862745098,
18
+ "size": {
19
+ "height": 224,
20
+ "width": 224
21
+ }
22
+ }
checkpoint-2340/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7690a6365b86e0a5a2c891df93896284e9a425fd219ccd1720c194bbd6a66e7d
3
+ size 343268717
checkpoint-2340/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd70d9239897b0884e8df145427e40ef0b84abc91467fbf35938bcf6b1568e7a
3
+ size 14575
checkpoint-2340/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48333eb7a27600e24022af26291022b410554f24c21a3dcc1f93c21030f8e64e
3
+ size 627
checkpoint-2340/trainer_state.json ADDED
@@ -0,0 +1,394 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.4006434977054596,
3
+ "best_model_checkpoint": "food_beverages_japan_image_detection/checkpoint-2340",
4
+ "epoch": 39.0,
5
+ "eval_steps": 500,
6
+ "global_step": 2340,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_accuracy": 0.6215644820295984,
14
+ "eval_loss": 0.6740179657936096,
15
+ "eval_runtime": 8.3101,
16
+ "eval_samples_per_second": 56.919,
17
+ "eval_steps_per_second": 7.22,
18
+ "step": 60
19
+ },
20
+ {
21
+ "epoch": 2.0,
22
+ "eval_accuracy": 0.7019027484143763,
23
+ "eval_loss": 0.6518784165382385,
24
+ "eval_runtime": 8.3128,
25
+ "eval_samples_per_second": 56.9,
26
+ "eval_steps_per_second": 7.218,
27
+ "step": 120
28
+ },
29
+ {
30
+ "epoch": 3.0,
31
+ "eval_accuracy": 0.7315010570824524,
32
+ "eval_loss": 0.6313825249671936,
33
+ "eval_runtime": 8.4056,
34
+ "eval_samples_per_second": 56.272,
35
+ "eval_steps_per_second": 7.138,
36
+ "step": 180
37
+ },
38
+ {
39
+ "epoch": 4.0,
40
+ "eval_accuracy": 0.7378435517970402,
41
+ "eval_loss": 0.6076480150222778,
42
+ "eval_runtime": 8.3695,
43
+ "eval_samples_per_second": 56.515,
44
+ "eval_steps_per_second": 7.169,
45
+ "step": 240
46
+ },
47
+ {
48
+ "epoch": 5.0,
49
+ "eval_accuracy": 0.7505285412262156,
50
+ "eval_loss": 0.5873068571090698,
51
+ "eval_runtime": 8.3314,
52
+ "eval_samples_per_second": 56.773,
53
+ "eval_steps_per_second": 7.202,
54
+ "step": 300
55
+ },
56
+ {
57
+ "epoch": 6.0,
58
+ "eval_accuracy": 0.7758985200845666,
59
+ "eval_loss": 0.5679311752319336,
60
+ "eval_runtime": 8.239,
61
+ "eval_samples_per_second": 57.41,
62
+ "eval_steps_per_second": 7.282,
63
+ "step": 360
64
+ },
65
+ {
66
+ "epoch": 7.0,
67
+ "eval_accuracy": 0.7780126849894292,
68
+ "eval_loss": 0.5512701869010925,
69
+ "eval_runtime": 7.9131,
70
+ "eval_samples_per_second": 59.775,
71
+ "eval_steps_per_second": 7.582,
72
+ "step": 420
73
+ },
74
+ {
75
+ "epoch": 8.0,
76
+ "eval_accuracy": 0.7801268498942917,
77
+ "eval_loss": 0.5363279581069946,
78
+ "eval_runtime": 7.9214,
79
+ "eval_samples_per_second": 59.712,
80
+ "eval_steps_per_second": 7.574,
81
+ "step": 480
82
+ },
83
+ {
84
+ "epoch": 8.33,
85
+ "learning_rate": 4.042553191489362e-06,
86
+ "loss": 0.5935,
87
+ "step": 500
88
+ },
89
+ {
90
+ "epoch": 9.0,
91
+ "eval_accuracy": 0.7843551797040169,
92
+ "eval_loss": 0.5246909260749817,
93
+ "eval_runtime": 7.9484,
94
+ "eval_samples_per_second": 59.509,
95
+ "eval_steps_per_second": 7.549,
96
+ "step": 540
97
+ },
98
+ {
99
+ "epoch": 10.0,
100
+ "eval_accuracy": 0.7906976744186046,
101
+ "eval_loss": 0.5121592283248901,
102
+ "eval_runtime": 7.9961,
103
+ "eval_samples_per_second": 59.154,
104
+ "eval_steps_per_second": 7.504,
105
+ "step": 600
106
+ },
107
+ {
108
+ "epoch": 11.0,
109
+ "eval_accuracy": 0.7991543340380549,
110
+ "eval_loss": 0.4976297616958618,
111
+ "eval_runtime": 7.9409,
112
+ "eval_samples_per_second": 59.565,
113
+ "eval_steps_per_second": 7.556,
114
+ "step": 660
115
+ },
116
+ {
117
+ "epoch": 12.0,
118
+ "eval_accuracy": 0.8054968287526427,
119
+ "eval_loss": 0.48714011907577515,
120
+ "eval_runtime": 7.9777,
121
+ "eval_samples_per_second": 59.29,
122
+ "eval_steps_per_second": 7.521,
123
+ "step": 720
124
+ },
125
+ {
126
+ "epoch": 13.0,
127
+ "eval_accuracy": 0.8033826638477801,
128
+ "eval_loss": 0.47856396436691284,
129
+ "eval_runtime": 8.0373,
130
+ "eval_samples_per_second": 58.85,
131
+ "eval_steps_per_second": 7.465,
132
+ "step": 780
133
+ },
134
+ {
135
+ "epoch": 14.0,
136
+ "eval_accuracy": 0.8054968287526427,
137
+ "eval_loss": 0.4685792028903961,
138
+ "eval_runtime": 8.0022,
139
+ "eval_samples_per_second": 59.109,
140
+ "eval_steps_per_second": 7.498,
141
+ "step": 840
142
+ },
143
+ {
144
+ "epoch": 15.0,
145
+ "eval_accuracy": 0.8076109936575053,
146
+ "eval_loss": 0.4633488357067108,
147
+ "eval_runtime": 7.897,
148
+ "eval_samples_per_second": 59.896,
149
+ "eval_steps_per_second": 7.598,
150
+ "step": 900
151
+ },
152
+ {
153
+ "epoch": 16.0,
154
+ "eval_accuracy": 0.8097251585623678,
155
+ "eval_loss": 0.4541207253932953,
156
+ "eval_runtime": 7.9062,
157
+ "eval_samples_per_second": 59.826,
158
+ "eval_steps_per_second": 7.589,
159
+ "step": 960
160
+ },
161
+ {
162
+ "epoch": 16.67,
163
+ "learning_rate": 2.978723404255319e-06,
164
+ "loss": 0.3919,
165
+ "step": 1000
166
+ },
167
+ {
168
+ "epoch": 17.0,
169
+ "eval_accuracy": 0.8181818181818182,
170
+ "eval_loss": 0.44693174958229065,
171
+ "eval_runtime": 7.9465,
172
+ "eval_samples_per_second": 59.523,
173
+ "eval_steps_per_second": 7.551,
174
+ "step": 1020
175
+ },
176
+ {
177
+ "epoch": 18.0,
178
+ "eval_accuracy": 0.8202959830866807,
179
+ "eval_loss": 0.4403076171875,
180
+ "eval_runtime": 7.9729,
181
+ "eval_samples_per_second": 59.326,
182
+ "eval_steps_per_second": 7.526,
183
+ "step": 1080
184
+ },
185
+ {
186
+ "epoch": 19.0,
187
+ "eval_accuracy": 0.8181818181818182,
188
+ "eval_loss": 0.43559935688972473,
189
+ "eval_runtime": 7.8143,
190
+ "eval_samples_per_second": 60.53,
191
+ "eval_steps_per_second": 7.678,
192
+ "step": 1140
193
+ },
194
+ {
195
+ "epoch": 20.0,
196
+ "eval_accuracy": 0.8245243128964059,
197
+ "eval_loss": 0.43019139766693115,
198
+ "eval_runtime": 7.7993,
199
+ "eval_samples_per_second": 60.646,
200
+ "eval_steps_per_second": 7.693,
201
+ "step": 1200
202
+ },
203
+ {
204
+ "epoch": 21.0,
205
+ "eval_accuracy": 0.8202959830866807,
206
+ "eval_loss": 0.42576563358306885,
207
+ "eval_runtime": 7.7862,
208
+ "eval_samples_per_second": 60.749,
209
+ "eval_steps_per_second": 7.706,
210
+ "step": 1260
211
+ },
212
+ {
213
+ "epoch": 22.0,
214
+ "eval_accuracy": 0.8245243128964059,
215
+ "eval_loss": 0.4226011633872986,
216
+ "eval_runtime": 7.8089,
217
+ "eval_samples_per_second": 60.572,
218
+ "eval_steps_per_second": 7.684,
219
+ "step": 1320
220
+ },
221
+ {
222
+ "epoch": 23.0,
223
+ "eval_accuracy": 0.8266384778012685,
224
+ "eval_loss": 0.4196974039077759,
225
+ "eval_runtime": 7.7453,
226
+ "eval_samples_per_second": 61.069,
227
+ "eval_steps_per_second": 7.747,
228
+ "step": 1380
229
+ },
230
+ {
231
+ "epoch": 24.0,
232
+ "eval_accuracy": 0.8308668076109936,
233
+ "eval_loss": 0.4153010845184326,
234
+ "eval_runtime": 7.7967,
235
+ "eval_samples_per_second": 60.667,
236
+ "eval_steps_per_second": 7.696,
237
+ "step": 1440
238
+ },
239
+ {
240
+ "epoch": 25.0,
241
+ "learning_rate": 1.9148936170212767e-06,
242
+ "loss": 0.2598,
243
+ "step": 1500
244
+ },
245
+ {
246
+ "epoch": 25.0,
247
+ "eval_accuracy": 0.8245243128964059,
248
+ "eval_loss": 0.41346848011016846,
249
+ "eval_runtime": 8.0165,
250
+ "eval_samples_per_second": 59.003,
251
+ "eval_steps_per_second": 7.485,
252
+ "step": 1500
253
+ },
254
+ {
255
+ "epoch": 26.0,
256
+ "eval_accuracy": 0.828752642706131,
257
+ "eval_loss": 0.41087058186531067,
258
+ "eval_runtime": 8.2057,
259
+ "eval_samples_per_second": 57.643,
260
+ "eval_steps_per_second": 7.312,
261
+ "step": 1560
262
+ },
263
+ {
264
+ "epoch": 27.0,
265
+ "eval_accuracy": 0.8350951374207188,
266
+ "eval_loss": 0.410675048828125,
267
+ "eval_runtime": 8.1905,
268
+ "eval_samples_per_second": 57.75,
269
+ "eval_steps_per_second": 7.326,
270
+ "step": 1620
271
+ },
272
+ {
273
+ "epoch": 28.0,
274
+ "eval_accuracy": 0.8372093023255814,
275
+ "eval_loss": 0.4087086319923401,
276
+ "eval_runtime": 8.1633,
277
+ "eval_samples_per_second": 57.942,
278
+ "eval_steps_per_second": 7.35,
279
+ "step": 1680
280
+ },
281
+ {
282
+ "epoch": 29.0,
283
+ "eval_accuracy": 0.8266384778012685,
284
+ "eval_loss": 0.41004815697669983,
285
+ "eval_runtime": 8.2033,
286
+ "eval_samples_per_second": 57.66,
287
+ "eval_steps_per_second": 7.314,
288
+ "step": 1740
289
+ },
290
+ {
291
+ "epoch": 30.0,
292
+ "eval_accuracy": 0.8372093023255814,
293
+ "eval_loss": 0.40502193570137024,
294
+ "eval_runtime": 8.171,
295
+ "eval_samples_per_second": 57.888,
296
+ "eval_steps_per_second": 7.343,
297
+ "step": 1800
298
+ },
299
+ {
300
+ "epoch": 31.0,
301
+ "eval_accuracy": 0.828752642706131,
302
+ "eval_loss": 0.40703651309013367,
303
+ "eval_runtime": 8.1556,
304
+ "eval_samples_per_second": 57.997,
305
+ "eval_steps_per_second": 7.357,
306
+ "step": 1860
307
+ },
308
+ {
309
+ "epoch": 32.0,
310
+ "eval_accuracy": 0.8350951374207188,
311
+ "eval_loss": 0.40481287240982056,
312
+ "eval_runtime": 8.1436,
313
+ "eval_samples_per_second": 58.083,
314
+ "eval_steps_per_second": 7.368,
315
+ "step": 1920
316
+ },
317
+ {
318
+ "epoch": 33.0,
319
+ "eval_accuracy": 0.8329809725158562,
320
+ "eval_loss": 0.4047437608242035,
321
+ "eval_runtime": 8.1259,
322
+ "eval_samples_per_second": 58.209,
323
+ "eval_steps_per_second": 7.384,
324
+ "step": 1980
325
+ },
326
+ {
327
+ "epoch": 33.33,
328
+ "learning_rate": 8.510638297872341e-07,
329
+ "loss": 0.1847,
330
+ "step": 2000
331
+ },
332
+ {
333
+ "epoch": 34.0,
334
+ "eval_accuracy": 0.8329809725158562,
335
+ "eval_loss": 0.4028582274913788,
336
+ "eval_runtime": 8.1842,
337
+ "eval_samples_per_second": 57.795,
338
+ "eval_steps_per_second": 7.331,
339
+ "step": 2040
340
+ },
341
+ {
342
+ "epoch": 35.0,
343
+ "eval_accuracy": 0.8329809725158562,
344
+ "eval_loss": 0.4015713930130005,
345
+ "eval_runtime": 8.1082,
346
+ "eval_samples_per_second": 58.336,
347
+ "eval_steps_per_second": 7.4,
348
+ "step": 2100
349
+ },
350
+ {
351
+ "epoch": 36.0,
352
+ "eval_accuracy": 0.8329809725158562,
353
+ "eval_loss": 0.4017912745475769,
354
+ "eval_runtime": 8.0144,
355
+ "eval_samples_per_second": 59.019,
356
+ "eval_steps_per_second": 7.487,
357
+ "step": 2160
358
+ },
359
+ {
360
+ "epoch": 37.0,
361
+ "eval_accuracy": 0.8372093023255814,
362
+ "eval_loss": 0.401458203792572,
363
+ "eval_runtime": 7.7426,
364
+ "eval_samples_per_second": 61.091,
365
+ "eval_steps_per_second": 7.749,
366
+ "step": 2220
367
+ },
368
+ {
369
+ "epoch": 38.0,
370
+ "eval_accuracy": 0.8372093023255814,
371
+ "eval_loss": 0.40120792388916016,
372
+ "eval_runtime": 7.812,
373
+ "eval_samples_per_second": 60.548,
374
+ "eval_steps_per_second": 7.68,
375
+ "step": 2280
376
+ },
377
+ {
378
+ "epoch": 39.0,
379
+ "eval_accuracy": 0.8372093023255814,
380
+ "eval_loss": 0.4006434977054596,
381
+ "eval_runtime": 7.8268,
382
+ "eval_samples_per_second": 60.433,
383
+ "eval_steps_per_second": 7.666,
384
+ "step": 2340
385
+ }
386
+ ],
387
+ "logging_steps": 500,
388
+ "max_steps": 2400,
389
+ "num_train_epochs": 40,
390
+ "save_steps": 500,
391
+ "total_flos": 5.714956742086545e+18,
392
+ "trial_name": null,
393
+ "trial_params": null
394
+ }
checkpoint-2340/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1769f67a92685cc0062cf921bb60504a412ec13b229e5dc6b9f14b8af157d8c0
3
+ size 4027
config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/vit-base-patch16-224-in21k",
3
+ "architectures": [
4
+ "ViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "food",
13
+ "1": "beverage"
14
+ },
15
+ "image_size": 224,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 3072,
18
+ "label2id": {
19
+ "beverage": 1,
20
+ "food": 0
21
+ },
22
+ "layer_norm_eps": 1e-12,
23
+ "model_type": "vit",
24
+ "num_attention_heads": 12,
25
+ "num_channels": 3,
26
+ "num_hidden_layers": 12,
27
+ "patch_size": 16,
28
+ "problem_type": "single_label_classification",
29
+ "qkv_bias": true,
30
+ "torch_dtype": "float32",
31
+ "transformers_version": "4.34.0"
32
+ }
preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_rescale": true,
4
+ "do_resize": true,
5
+ "image_mean": [
6
+ 0.5,
7
+ 0.5,
8
+ 0.5
9
+ ],
10
+ "image_processor_type": "ViTImageProcessor",
11
+ "image_std": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
+ ],
16
+ "resample": 2,
17
+ "rescale_factor": 0.00392156862745098,
18
+ "size": {
19
+ "height": 224,
20
+ "width": 224
21
+ }
22
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7690a6365b86e0a5a2c891df93896284e9a425fd219ccd1720c194bbd6a66e7d
3
+ size 343268717
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1769f67a92685cc0062cf921bb60504a412ec13b229e5dc6b9f14b8af157d8c0
3
+ size 4027