rshrott commited on
Commit
8b46159
1 Parent(s): f0464a0

🍻 cheers

Browse files
README.md CHANGED
@@ -2,6 +2,7 @@
2
  license: apache-2.0
3
  base_model: google/vit-base-patch16-224-in21k
4
  tags:
 
5
  - generated_from_trainer
6
  model-index:
7
  - name: ryan_model314_3
@@ -13,11 +14,11 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  # ryan_model314_3
15
 
16
- This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 0.2818
19
- - Na Accuracy: 0.955
20
- - Ordinal Mae: 1.2378
21
 
22
  ## Model description
23
 
 
2
  license: apache-2.0
3
  base_model: google/vit-base-patch16-224-in21k
4
  tags:
5
+ - image-classification
6
  - generated_from_trainer
7
  model-index:
8
  - name: ryan_model314_3
 
14
 
15
  # ryan_model314_3
16
 
17
+ This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the beans dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 0.2547
20
+ - Na Accuracy: 0.95
21
+ - Ordinal Mae: 1.2090
22
 
23
  ## Model description
24
 
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.4,
3
+ "eval_loss": 0.2546972334384918,
4
+ "eval_na_accuracy": 0.95,
5
+ "eval_ordinal_mae": 1.2089859222915764,
6
+ "eval_runtime": 8.0073,
7
+ "eval_samples_per_second": 24.977,
8
+ "eval_steps_per_second": 3.122,
9
+ "train_loss": 0.25249010701974234,
10
+ "train_runtime": 688.8214,
11
+ "train_samples_per_second": 11.614,
12
+ "train_steps_per_second": 0.726
13
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.4,
3
+ "eval_loss": 0.2546972334384918,
4
+ "eval_na_accuracy": 0.95,
5
+ "eval_ordinal_mae": 1.2089859222915764,
6
+ "eval_runtime": 8.0073,
7
+ "eval_samples_per_second": 24.977,
8
+ "eval_steps_per_second": 3.122
9
+ }
runs/Mar26_22-24-03_ryanserver/events.out.tfevents.1711507021.ryanserver.1969.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13de880269b7777da1c13d179f3e6cfc0e5a3be1515fd2599041780a55ecb9a7
3
+ size 469
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.4,
3
+ "train_loss": 0.25249010701974234,
4
+ "train_runtime": 688.8214,
5
+ "train_samples_per_second": 11.614,
6
+ "train_steps_per_second": 0.726
7
+ }
trainer_state.json ADDED
@@ -0,0 +1,360 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.2546972334384918,
3
+ "best_model_checkpoint": "./ryan_model314_3/checkpoint-250",
4
+ "epoch": 2.4,
5
+ "eval_steps": 25,
6
+ "global_step": 300,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08,
13
+ "grad_norm": 0.8899219036102295,
14
+ "learning_rate": 9.8e-05,
15
+ "loss": 0.6027,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.16,
20
+ "grad_norm": 0.9669970273971558,
21
+ "learning_rate": 9.6e-05,
22
+ "loss": 0.4505,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.2,
27
+ "eval_loss": 0.4262112081050873,
28
+ "eval_na_accuracy": 0.9,
29
+ "eval_ordinal_mae": 1.0091899644308249,
30
+ "eval_runtime": 24.8283,
31
+ "eval_samples_per_second": 8.055,
32
+ "eval_steps_per_second": 1.007,
33
+ "step": 25
34
+ },
35
+ {
36
+ "epoch": 0.24,
37
+ "grad_norm": 1.0023255348205566,
38
+ "learning_rate": 9.4e-05,
39
+ "loss": 0.4166,
40
+ "step": 30
41
+ },
42
+ {
43
+ "epoch": 0.32,
44
+ "grad_norm": 0.7017713189125061,
45
+ "learning_rate": 9.200000000000001e-05,
46
+ "loss": 0.3632,
47
+ "step": 40
48
+ },
49
+ {
50
+ "epoch": 0.4,
51
+ "grad_norm": 1.028002142906189,
52
+ "learning_rate": 9e-05,
53
+ "loss": 0.3847,
54
+ "step": 50
55
+ },
56
+ {
57
+ "epoch": 0.4,
58
+ "eval_loss": 0.367563396692276,
59
+ "eval_na_accuracy": 0.935,
60
+ "eval_ordinal_mae": 1.3719091470156184,
61
+ "eval_runtime": 7.6715,
62
+ "eval_samples_per_second": 26.071,
63
+ "eval_steps_per_second": 3.259,
64
+ "step": 50
65
+ },
66
+ {
67
+ "epoch": 0.48,
68
+ "grad_norm": 0.9666626453399658,
69
+ "learning_rate": 8.800000000000001e-05,
70
+ "loss": 0.3503,
71
+ "step": 60
72
+ },
73
+ {
74
+ "epoch": 0.56,
75
+ "grad_norm": 1.6747545003890991,
76
+ "learning_rate": 8.6e-05,
77
+ "loss": 0.3061,
78
+ "step": 70
79
+ },
80
+ {
81
+ "epoch": 0.6,
82
+ "eval_loss": 0.32615897059440613,
83
+ "eval_na_accuracy": 0.945,
84
+ "eval_ordinal_mae": 0.7485670199170652,
85
+ "eval_runtime": 7.5813,
86
+ "eval_samples_per_second": 26.381,
87
+ "eval_steps_per_second": 3.298,
88
+ "step": 75
89
+ },
90
+ {
91
+ "epoch": 0.64,
92
+ "grad_norm": 0.9667518138885498,
93
+ "learning_rate": 8.4e-05,
94
+ "loss": 0.3304,
95
+ "step": 80
96
+ },
97
+ {
98
+ "epoch": 0.72,
99
+ "grad_norm": 1.2316919565200806,
100
+ "learning_rate": 8.2e-05,
101
+ "loss": 0.3149,
102
+ "step": 90
103
+ },
104
+ {
105
+ "epoch": 0.8,
106
+ "grad_norm": 0.8591766953468323,
107
+ "learning_rate": 8e-05,
108
+ "loss": 0.2744,
109
+ "step": 100
110
+ },
111
+ {
112
+ "epoch": 0.8,
113
+ "eval_loss": 0.3524325489997864,
114
+ "eval_na_accuracy": 0.905,
115
+ "eval_ordinal_mae": 1.1407896330054461,
116
+ "eval_runtime": 7.8965,
117
+ "eval_samples_per_second": 25.328,
118
+ "eval_steps_per_second": 3.166,
119
+ "step": 100
120
+ },
121
+ {
122
+ "epoch": 0.88,
123
+ "grad_norm": 0.9472360014915466,
124
+ "learning_rate": 7.800000000000001e-05,
125
+ "loss": 0.3389,
126
+ "step": 110
127
+ },
128
+ {
129
+ "epoch": 0.96,
130
+ "grad_norm": 0.5021968483924866,
131
+ "learning_rate": 7.6e-05,
132
+ "loss": 0.2384,
133
+ "step": 120
134
+ },
135
+ {
136
+ "epoch": 1.0,
137
+ "eval_loss": 0.36111611127853394,
138
+ "eval_na_accuracy": 0.93,
139
+ "eval_ordinal_mae": 0.6746639459123592,
140
+ "eval_runtime": 7.827,
141
+ "eval_samples_per_second": 25.553,
142
+ "eval_steps_per_second": 3.194,
143
+ "step": 125
144
+ },
145
+ {
146
+ "epoch": 1.04,
147
+ "grad_norm": 0.4840359687805176,
148
+ "learning_rate": 7.4e-05,
149
+ "loss": 0.1954,
150
+ "step": 130
151
+ },
152
+ {
153
+ "epoch": 1.12,
154
+ "grad_norm": 1.4421360492706299,
155
+ "learning_rate": 7.2e-05,
156
+ "loss": 0.223,
157
+ "step": 140
158
+ },
159
+ {
160
+ "epoch": 1.2,
161
+ "grad_norm": 0.36317479610443115,
162
+ "learning_rate": 7e-05,
163
+ "loss": 0.2021,
164
+ "step": 150
165
+ },
166
+ {
167
+ "epoch": 1.2,
168
+ "eval_loss": 0.31051769852638245,
169
+ "eval_na_accuracy": 0.95,
170
+ "eval_ordinal_mae": 1.0440793198453366,
171
+ "eval_runtime": 8.093,
172
+ "eval_samples_per_second": 24.713,
173
+ "eval_steps_per_second": 3.089,
174
+ "step": 150
175
+ },
176
+ {
177
+ "epoch": 1.28,
178
+ "grad_norm": 0.8046426773071289,
179
+ "learning_rate": 6.800000000000001e-05,
180
+ "loss": 0.21,
181
+ "step": 160
182
+ },
183
+ {
184
+ "epoch": 1.36,
185
+ "grad_norm": 0.8122718334197998,
186
+ "learning_rate": 6.6e-05,
187
+ "loss": 0.2234,
188
+ "step": 170
189
+ },
190
+ {
191
+ "epoch": 1.4,
192
+ "eval_loss": 0.27382710576057434,
193
+ "eval_na_accuracy": 0.955,
194
+ "eval_ordinal_mae": 1.4168444083902865,
195
+ "eval_runtime": 7.6337,
196
+ "eval_samples_per_second": 26.2,
197
+ "eval_steps_per_second": 3.275,
198
+ "step": 175
199
+ },
200
+ {
201
+ "epoch": 1.44,
202
+ "grad_norm": 1.5608426332473755,
203
+ "learning_rate": 6.400000000000001e-05,
204
+ "loss": 0.2306,
205
+ "step": 180
206
+ },
207
+ {
208
+ "epoch": 1.52,
209
+ "grad_norm": 0.9130323529243469,
210
+ "learning_rate": 6.2e-05,
211
+ "loss": 0.1506,
212
+ "step": 190
213
+ },
214
+ {
215
+ "epoch": 1.6,
216
+ "grad_norm": 0.6261627674102783,
217
+ "learning_rate": 6e-05,
218
+ "loss": 0.187,
219
+ "step": 200
220
+ },
221
+ {
222
+ "epoch": 1.6,
223
+ "eval_loss": 0.26879259943962097,
224
+ "eval_na_accuracy": 0.955,
225
+ "eval_ordinal_mae": 1.365302862794827,
226
+ "eval_runtime": 7.5957,
227
+ "eval_samples_per_second": 26.331,
228
+ "eval_steps_per_second": 3.291,
229
+ "step": 200
230
+ },
231
+ {
232
+ "epoch": 1.68,
233
+ "grad_norm": 0.7222486138343811,
234
+ "learning_rate": 5.8e-05,
235
+ "loss": 0.2111,
236
+ "step": 210
237
+ },
238
+ {
239
+ "epoch": 1.76,
240
+ "grad_norm": 0.7348190546035767,
241
+ "learning_rate": 5.6000000000000006e-05,
242
+ "loss": 0.2008,
243
+ "step": 220
244
+ },
245
+ {
246
+ "epoch": 1.8,
247
+ "eval_loss": 0.2668895423412323,
248
+ "eval_na_accuracy": 0.96,
249
+ "eval_ordinal_mae": 0.8935630971378721,
250
+ "eval_runtime": 7.6573,
251
+ "eval_samples_per_second": 26.119,
252
+ "eval_steps_per_second": 3.265,
253
+ "step": 225
254
+ },
255
+ {
256
+ "epoch": 1.84,
257
+ "grad_norm": 1.3947832584381104,
258
+ "learning_rate": 5.4000000000000005e-05,
259
+ "loss": 0.2176,
260
+ "step": 230
261
+ },
262
+ {
263
+ "epoch": 1.92,
264
+ "grad_norm": 1.1390098333358765,
265
+ "learning_rate": 5.2000000000000004e-05,
266
+ "loss": 0.2037,
267
+ "step": 240
268
+ },
269
+ {
270
+ "epoch": 2.0,
271
+ "grad_norm": 0.7233979105949402,
272
+ "learning_rate": 5e-05,
273
+ "loss": 0.1541,
274
+ "step": 250
275
+ },
276
+ {
277
+ "epoch": 2.0,
278
+ "eval_loss": 0.2546972334384918,
279
+ "eval_na_accuracy": 0.95,
280
+ "eval_ordinal_mae": 1.2089859222915764,
281
+ "eval_runtime": 7.685,
282
+ "eval_samples_per_second": 26.025,
283
+ "eval_steps_per_second": 3.253,
284
+ "step": 250
285
+ },
286
+ {
287
+ "epoch": 2.08,
288
+ "grad_norm": 0.5704214572906494,
289
+ "learning_rate": 4.8e-05,
290
+ "loss": 0.1104,
291
+ "step": 260
292
+ },
293
+ {
294
+ "epoch": 2.16,
295
+ "grad_norm": 0.648725688457489,
296
+ "learning_rate": 4.600000000000001e-05,
297
+ "loss": 0.1201,
298
+ "step": 270
299
+ },
300
+ {
301
+ "epoch": 2.2,
302
+ "eval_loss": 0.2725123465061188,
303
+ "eval_na_accuracy": 0.95,
304
+ "eval_ordinal_mae": 0.7955228271403142,
305
+ "eval_runtime": 7.4815,
306
+ "eval_samples_per_second": 26.733,
307
+ "eval_steps_per_second": 3.342,
308
+ "step": 275
309
+ },
310
+ {
311
+ "epoch": 2.24,
312
+ "grad_norm": 0.5427641272544861,
313
+ "learning_rate": 4.4000000000000006e-05,
314
+ "loss": 0.096,
315
+ "step": 280
316
+ },
317
+ {
318
+ "epoch": 2.32,
319
+ "grad_norm": 1.687751293182373,
320
+ "learning_rate": 4.2e-05,
321
+ "loss": 0.155,
322
+ "step": 290
323
+ },
324
+ {
325
+ "epoch": 2.4,
326
+ "grad_norm": 0.6842709183692932,
327
+ "learning_rate": 4e-05,
328
+ "loss": 0.113,
329
+ "step": 300
330
+ },
331
+ {
332
+ "epoch": 2.4,
333
+ "eval_loss": 0.2817830443382263,
334
+ "eval_na_accuracy": 0.955,
335
+ "eval_ordinal_mae": 1.2378182741668735,
336
+ "eval_runtime": 8.0351,
337
+ "eval_samples_per_second": 24.891,
338
+ "eval_steps_per_second": 3.111,
339
+ "step": 300
340
+ },
341
+ {
342
+ "epoch": 2.4,
343
+ "step": 300,
344
+ "total_flos": 3.71974885244928e+17,
345
+ "train_loss": 0.25249010701974234,
346
+ "train_runtime": 688.8214,
347
+ "train_samples_per_second": 11.614,
348
+ "train_steps_per_second": 0.726
349
+ }
350
+ ],
351
+ "logging_steps": 10,
352
+ "max_steps": 500,
353
+ "num_input_tokens_seen": 0,
354
+ "num_train_epochs": 4,
355
+ "save_steps": 25,
356
+ "total_flos": 3.71974885244928e+17,
357
+ "train_batch_size": 16,
358
+ "trial_name": null,
359
+ "trial_params": null
360
+ }