rshrott commited on
Commit
d6a8860
1 Parent(s): a86f46f

🍻 cheers

Browse files
README.md CHANGED
@@ -2,6 +2,7 @@
2
  license: apache-2.0
3
  base_model: google/vit-base-patch16-224-in21k
4
  tags:
 
5
  - generated_from_trainer
6
  model-index:
7
  - name: ryan_model3272024
@@ -13,12 +14,12 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  # ryan_model3272024
15
 
16
- This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 0.2760
19
- - Ordinal Mae: 0.6107
20
- - Ordinal Accuracy: 0.5426
21
- - Na Accuracy: 0.7124
22
 
23
  ## Model description
24
 
 
2
  license: apache-2.0
3
  base_model: google/vit-base-patch16-224-in21k
4
  tags:
5
+ - image-classification
6
  - generated_from_trainer
7
  model-index:
8
  - name: ryan_model3272024
 
14
 
15
  # ryan_model3272024
16
 
17
+ This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the properties dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 0.2636
20
+ - Ordinal Mae: 0.5544
21
+ - Ordinal Accuracy: 0.5810
22
+ - Na Accuracy: 0.7915
23
 
24
  ## Model description
25
 
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 1.46,
3
- "eval_loss": 0.3037484288215637,
4
- "eval_na_accuracy": 0.7297297120094299,
5
- "eval_ordinal_accuracy": 0.5285171270370483,
6
- "eval_ordinal_mae": 0.6723113059997559,
7
- "eval_runtime": 11.1069,
8
- "eval_samples_per_second": 27.01,
9
- "eval_steps_per_second": 3.421,
10
- "train_loss": 0.32911812565543436,
11
- "train_runtime": 761.2747,
12
- "train_samples_per_second": 15.763,
13
- "train_steps_per_second": 0.988
14
  }
 
1
  {
2
+ "epoch": 0.65,
3
+ "eval_loss": 0.26356959342956543,
4
+ "eval_na_accuracy": 0.7915058135986328,
5
+ "eval_ordinal_accuracy": 0.581045925617218,
6
+ "eval_ordinal_mae": 0.5543876886367798,
7
+ "eval_runtime": 160.2802,
8
+ "eval_samples_per_second": 24.825,
9
+ "eval_steps_per_second": 3.107,
10
+ "train_loss": 0.29669314997536794,
11
+ "train_runtime": 4786.838,
12
+ "train_samples_per_second": 28.807,
13
+ "train_steps_per_second": 1.801
14
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "epoch": 1.46,
3
- "eval_loss": 0.3037484288215637,
4
- "eval_na_accuracy": 0.7297297120094299,
5
- "eval_ordinal_accuracy": 0.5285171270370483,
6
- "eval_ordinal_mae": 0.6723113059997559,
7
- "eval_runtime": 11.1069,
8
- "eval_samples_per_second": 27.01,
9
- "eval_steps_per_second": 3.421
10
  }
 
1
  {
2
+ "epoch": 0.65,
3
+ "eval_loss": 0.26356959342956543,
4
+ "eval_na_accuracy": 0.7915058135986328,
5
+ "eval_ordinal_accuracy": 0.581045925617218,
6
+ "eval_ordinal_mae": 0.5543876886367798,
7
+ "eval_runtime": 160.2802,
8
+ "eval_samples_per_second": 24.825,
9
+ "eval_steps_per_second": 3.107
10
  }
runs/Mar27_22-47-32_ryanserver/events.out.tfevents.1711599059.ryanserver.28085.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd8e9d9adf68ac410dc4be5a1c6abfd8c0da0c9c412c736aab1296102c953046
3
+ size 529
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 1.46,
3
- "train_loss": 0.32911812565543436,
4
- "train_runtime": 761.2747,
5
- "train_samples_per_second": 15.763,
6
- "train_steps_per_second": 0.988
7
  }
 
1
  {
2
+ "epoch": 0.65,
3
+ "train_loss": 0.29669314997536794,
4
+ "train_runtime": 4786.838,
5
+ "train_samples_per_second": 28.807,
6
+ "train_steps_per_second": 1.801
7
  }
trainer_state.json CHANGED
@@ -1,535 +1,575 @@
1
  {
2
- "best_metric": 0.3037484288215637,
3
- "best_model_checkpoint": "./ryan_model3272024/checkpoint-250",
4
- "epoch": 1.4627659574468086,
5
- "eval_steps": 25,
6
- "global_step": 275,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.03,
13
- "grad_norm": 0.5357832312583923,
14
- "learning_rate": 0.00019867021276595746,
15
- "loss": 0.4838,
16
- "step": 5
17
  },
18
  {
19
- "epoch": 0.05,
20
- "grad_norm": 0.3301275372505188,
21
- "learning_rate": 0.00019734042553191489,
22
- "loss": 0.3457,
23
- "step": 10
24
  },
25
  {
26
- "epoch": 0.08,
27
- "grad_norm": 0.829723060131073,
28
- "learning_rate": 0.00019601063829787234,
29
- "loss": 0.4508,
30
- "step": 15
31
  },
32
  {
33
- "epoch": 0.11,
34
- "grad_norm": 0.43481916189193726,
35
- "learning_rate": 0.00019468085106382982,
36
- "loss": 0.3205,
37
- "step": 20
38
  },
39
  {
40
- "epoch": 0.13,
41
- "grad_norm": 0.9304245114326477,
42
- "learning_rate": 0.00019335106382978724,
43
- "loss": 0.4062,
44
- "step": 25
 
 
 
 
45
  },
46
  {
47
- "epoch": 0.13,
48
- "eval_loss": 0.3798711597919464,
49
- "eval_na_accuracy": 0.6216216087341309,
50
- "eval_ordinal_accuracy": 0.23954372107982635,
51
- "eval_ordinal_mae": 0.9244347810745239,
52
- "eval_runtime": 31.9197,
53
- "eval_samples_per_second": 9.399,
54
- "eval_steps_per_second": 1.19,
55
- "step": 25
56
  },
57
  {
58
- "epoch": 0.16,
59
- "grad_norm": 0.8552330136299133,
60
- "learning_rate": 0.0001920212765957447,
61
- "loss": 0.3029,
62
- "step": 30
63
  },
64
  {
65
- "epoch": 0.19,
66
- "grad_norm": 0.43845850229263306,
67
- "learning_rate": 0.00019069148936170214,
68
- "loss": 0.3261,
69
- "step": 35
70
  },
71
  {
72
- "epoch": 0.21,
73
- "grad_norm": 0.18216854333877563,
74
- "learning_rate": 0.00018936170212765957,
75
- "loss": 0.2895,
76
- "step": 40
77
  },
78
  {
79
- "epoch": 0.24,
80
- "grad_norm": 0.7537259459495544,
81
- "learning_rate": 0.00018829787234042554,
82
- "loss": 0.4943,
83
- "step": 45
 
 
 
 
84
  },
85
  {
86
- "epoch": 0.27,
87
- "grad_norm": 0.4183604419231415,
88
- "learning_rate": 0.000186968085106383,
89
- "loss": 0.3536,
90
- "step": 50
91
  },
92
  {
93
- "epoch": 0.27,
94
- "eval_loss": 0.3699643015861511,
95
- "eval_na_accuracy": 0.6756756901741028,
96
- "eval_ordinal_accuracy": 0.38403043150901794,
97
- "eval_ordinal_mae": 0.9066693782806396,
98
- "eval_runtime": 12.0769,
99
- "eval_samples_per_second": 24.841,
100
- "eval_steps_per_second": 3.147,
101
- "step": 50
102
  },
103
  {
104
- "epoch": 0.29,
105
- "grad_norm": 0.8079000115394592,
106
- "learning_rate": 0.00018563829787234044,
107
- "loss": 0.3601,
108
- "step": 55
109
  },
110
  {
111
- "epoch": 0.32,
112
- "grad_norm": 0.9893763065338135,
113
- "learning_rate": 0.0001843085106382979,
114
- "loss": 0.4152,
115
- "step": 60
116
  },
117
  {
118
- "epoch": 0.35,
119
- "grad_norm": 1.3993595838546753,
120
- "learning_rate": 0.00018297872340425532,
121
- "loss": 0.448,
122
- "step": 65
 
 
 
 
123
  },
124
  {
125
- "epoch": 0.37,
126
- "grad_norm": 0.45827603340148926,
127
- "learning_rate": 0.00018164893617021277,
128
- "loss": 0.3132,
129
- "step": 70
130
  },
131
  {
132
- "epoch": 0.4,
133
- "grad_norm": 1.4677202701568604,
134
- "learning_rate": 0.00018031914893617022,
135
- "loss": 0.4295,
136
- "step": 75
137
  },
138
  {
139
- "epoch": 0.4,
140
- "eval_loss": 0.34051504731178284,
141
- "eval_na_accuracy": 0.7837837934494019,
142
- "eval_ordinal_accuracy": 0.29657796025276184,
143
- "eval_ordinal_mae": 0.8797782063484192,
144
- "eval_runtime": 11.5094,
145
- "eval_samples_per_second": 26.066,
146
- "eval_steps_per_second": 3.302,
147
- "step": 75
148
  },
149
  {
150
- "epoch": 0.43,
151
- "grad_norm": 1.1136772632598877,
152
- "learning_rate": 0.00017898936170212767,
153
- "loss": 0.2444,
154
- "step": 80
155
  },
156
  {
157
- "epoch": 0.45,
158
- "grad_norm": 3.5127644538879395,
159
- "learning_rate": 0.00017765957446808512,
160
- "loss": 0.4542,
161
- "step": 85
 
 
 
 
162
  },
163
  {
164
- "epoch": 0.48,
165
- "grad_norm": 0.6393898725509644,
166
- "learning_rate": 0.00017632978723404257,
167
- "loss": 0.3411,
168
- "step": 90
169
  },
170
  {
171
- "epoch": 0.51,
172
- "grad_norm": 0.8198608160018921,
173
- "learning_rate": 0.000175,
174
- "loss": 0.3184,
175
- "step": 95
176
  },
177
  {
178
- "epoch": 0.53,
179
- "grad_norm": 1.1194807291030884,
180
- "learning_rate": 0.00017367021276595745,
181
- "loss": 0.4114,
182
- "step": 100
183
  },
184
  {
185
- "epoch": 0.53,
186
- "eval_loss": 0.39057785272598267,
187
- "eval_na_accuracy": 0.7297297120094299,
188
- "eval_ordinal_accuracy": 0.35361215472221375,
189
- "eval_ordinal_mae": 0.8806185126304626,
190
- "eval_runtime": 12.2664,
191
- "eval_samples_per_second": 24.457,
192
- "eval_steps_per_second": 3.098,
193
- "step": 100
194
  },
195
  {
196
- "epoch": 0.56,
197
- "grad_norm": 3.365068197250366,
198
- "learning_rate": 0.0001723404255319149,
199
- "loss": 0.469,
200
- "step": 105
 
 
 
 
201
  },
202
  {
203
- "epoch": 0.59,
204
- "grad_norm": 0.6197894215583801,
205
- "learning_rate": 0.00017101063829787233,
206
- "loss": 0.2738,
207
- "step": 110
208
  },
209
  {
210
- "epoch": 0.61,
211
- "grad_norm": 0.5341880917549133,
212
- "learning_rate": 0.0001696808510638298,
213
- "loss": 0.2867,
214
- "step": 115
215
  },
216
  {
217
- "epoch": 0.64,
218
- "grad_norm": 1.0934885740280151,
219
- "learning_rate": 0.00016835106382978726,
220
- "loss": 0.4105,
221
- "step": 120
222
  },
223
  {
224
- "epoch": 0.66,
225
- "grad_norm": 0.5764520764350891,
226
- "learning_rate": 0.00016702127659574468,
227
- "loss": 0.3521,
228
- "step": 125
229
  },
230
  {
231
- "epoch": 0.66,
232
- "eval_loss": 0.35300251841545105,
233
- "eval_na_accuracy": 0.8108108043670654,
234
- "eval_ordinal_accuracy": 0.42585551738739014,
235
- "eval_ordinal_mae": 0.8441764116287231,
236
- "eval_runtime": 12.3015,
237
- "eval_samples_per_second": 24.387,
238
- "eval_steps_per_second": 3.089,
239
- "step": 125
240
  },
241
  {
242
- "epoch": 0.69,
243
- "grad_norm": 0.2090584933757782,
244
- "learning_rate": 0.00016569148936170213,
245
- "loss": 0.294,
246
- "step": 130
247
  },
248
  {
249
- "epoch": 0.72,
250
- "grad_norm": 0.2995198369026184,
251
- "learning_rate": 0.00016436170212765958,
252
- "loss": 0.3099,
253
- "step": 135
254
  },
255
  {
256
- "epoch": 0.74,
257
- "grad_norm": 0.41820451617240906,
258
- "learning_rate": 0.00016303191489361703,
259
- "loss": 0.4392,
260
- "step": 140
261
  },
262
  {
263
- "epoch": 0.77,
264
- "grad_norm": 1.1886084079742432,
265
- "learning_rate": 0.00016170212765957446,
266
- "loss": 0.3725,
267
- "step": 145
268
  },
269
  {
270
- "epoch": 0.8,
271
- "grad_norm": 0.8490511178970337,
272
- "learning_rate": 0.00016037234042553194,
273
- "loss": 0.3349,
274
- "step": 150
 
 
 
 
275
  },
276
  {
277
- "epoch": 0.8,
278
- "eval_loss": 0.34123122692108154,
279
- "eval_na_accuracy": 0.7297297120094299,
280
- "eval_ordinal_accuracy": 0.4752851724624634,
281
- "eval_ordinal_mae": 0.8015652298927307,
282
- "eval_runtime": 12.509,
283
- "eval_samples_per_second": 23.983,
284
- "eval_steps_per_second": 3.038,
285
- "step": 150
286
  },
287
  {
288
- "epoch": 0.82,
289
- "grad_norm": 0.4098907709121704,
290
- "learning_rate": 0.00015904255319148936,
291
- "loss": 0.2306,
292
- "step": 155
293
  },
294
  {
295
- "epoch": 0.85,
296
- "grad_norm": 1.5454349517822266,
297
- "learning_rate": 0.0001577127659574468,
298
- "loss": 0.2382,
299
- "step": 160
300
  },
301
  {
302
- "epoch": 0.88,
303
- "grad_norm": 0.61043381690979,
304
- "learning_rate": 0.00015638297872340426,
305
- "loss": 0.3448,
306
- "step": 165
307
  },
308
  {
309
- "epoch": 0.9,
310
- "grad_norm": 0.7741652727127075,
311
- "learning_rate": 0.00015505319148936171,
312
- "loss": 0.2037,
313
- "step": 170
 
 
 
 
314
  },
315
  {
316
- "epoch": 0.93,
317
- "grad_norm": 0.5108156204223633,
318
- "learning_rate": 0.00015372340425531914,
319
- "loss": 0.4612,
320
- "step": 175
321
  },
322
  {
323
- "epoch": 0.93,
324
- "eval_loss": 0.36386463046073914,
325
- "eval_na_accuracy": 0.5405405163764954,
326
- "eval_ordinal_accuracy": 0.46768060326576233,
327
- "eval_ordinal_mae": 0.7603853940963745,
328
- "eval_runtime": 12.3807,
329
- "eval_samples_per_second": 24.231,
330
- "eval_steps_per_second": 3.069,
331
- "step": 175
332
  },
333
  {
334
- "epoch": 0.96,
335
- "grad_norm": 0.8889250159263611,
336
- "learning_rate": 0.00015239361702127662,
337
- "loss": 0.4401,
338
- "step": 180
339
  },
340
  {
341
- "epoch": 0.98,
342
- "grad_norm": 1.047706127166748,
343
- "learning_rate": 0.00015106382978723407,
344
- "loss": 0.2771,
345
- "step": 185
346
  },
347
  {
348
- "epoch": 1.01,
349
- "grad_norm": 0.6727350354194641,
350
- "learning_rate": 0.0001497340425531915,
351
- "loss": 0.3726,
352
- "step": 190
 
 
 
 
353
  },
354
  {
355
- "epoch": 1.04,
356
- "grad_norm": 0.4043642282485962,
357
- "learning_rate": 0.00014840425531914894,
358
- "loss": 0.3504,
359
- "step": 195
360
  },
361
  {
362
- "epoch": 1.06,
363
- "grad_norm": 1.089250922203064,
364
- "learning_rate": 0.0001470744680851064,
365
- "loss": 0.2424,
366
- "step": 200
367
  },
368
  {
369
- "epoch": 1.06,
370
- "eval_loss": 0.32972484827041626,
371
- "eval_na_accuracy": 0.7027027010917664,
372
- "eval_ordinal_accuracy": 0.48669201135635376,
373
- "eval_ordinal_mae": 0.7117426991462708,
374
- "eval_runtime": 11.8333,
375
- "eval_samples_per_second": 25.352,
376
- "eval_steps_per_second": 3.211,
377
- "step": 200
378
  },
379
  {
380
- "epoch": 1.09,
381
- "grad_norm": 0.6453298926353455,
382
- "learning_rate": 0.00014574468085106382,
383
- "loss": 0.2115,
384
- "step": 205
385
  },
386
  {
387
- "epoch": 1.12,
388
- "grad_norm": 0.7021524310112,
389
- "learning_rate": 0.00014441489361702127,
390
- "loss": 0.2281,
391
- "step": 210
 
 
 
 
392
  },
393
  {
394
- "epoch": 1.14,
395
- "grad_norm": 0.7665510773658752,
396
- "learning_rate": 0.00014308510638297875,
397
- "loss": 0.2048,
398
- "step": 215
399
  },
400
  {
401
- "epoch": 1.17,
402
- "grad_norm": 1.2339574098587036,
403
- "learning_rate": 0.00014175531914893617,
404
- "loss": 0.2344,
405
- "step": 220
406
  },
407
  {
408
- "epoch": 1.2,
409
- "grad_norm": 2.540107011795044,
410
- "learning_rate": 0.00014042553191489363,
411
- "loss": 0.2928,
412
- "step": 225
413
  },
414
  {
415
- "epoch": 1.2,
416
- "eval_loss": 0.3493916690349579,
417
- "eval_na_accuracy": 0.6756756901741028,
418
- "eval_ordinal_accuracy": 0.5285171270370483,
419
- "eval_ordinal_mae": 0.6955077052116394,
420
- "eval_runtime": 10.8667,
421
- "eval_samples_per_second": 27.607,
422
- "eval_steps_per_second": 3.497,
423
- "step": 225
424
  },
425
  {
426
- "epoch": 1.22,
427
- "grad_norm": 0.7262032628059387,
428
- "learning_rate": 0.00013909574468085108,
429
- "loss": 0.3973,
430
- "step": 230
 
 
 
 
431
  },
432
  {
433
- "epoch": 1.25,
434
- "grad_norm": 0.28402724862098694,
435
- "learning_rate": 0.0001377659574468085,
436
- "loss": 0.218,
437
- "step": 235
438
  },
439
  {
440
- "epoch": 1.28,
441
- "grad_norm": 1.5170676708221436,
442
- "learning_rate": 0.00013643617021276595,
443
- "loss": 0.227,
444
- "step": 240
445
  },
446
  {
447
- "epoch": 1.3,
448
- "grad_norm": 0.35739636421203613,
449
- "learning_rate": 0.0001351063829787234,
450
- "loss": 0.2275,
451
- "step": 245
452
  },
453
  {
454
- "epoch": 1.33,
455
- "grad_norm": 0.5471745133399963,
456
- "learning_rate": 0.00013377659574468086,
457
- "loss": 0.2436,
458
- "step": 250
459
  },
460
  {
461
- "epoch": 1.33,
462
- "eval_loss": 0.3037484288215637,
463
- "eval_na_accuracy": 0.7297297120094299,
464
- "eval_ordinal_accuracy": 0.5285171270370483,
465
- "eval_ordinal_mae": 0.6723113059997559,
466
- "eval_runtime": 12.062,
467
- "eval_samples_per_second": 24.871,
468
- "eval_steps_per_second": 3.15,
469
- "step": 250
470
  },
471
  {
472
- "epoch": 1.36,
473
- "grad_norm": 0.41173043847084045,
474
- "learning_rate": 0.0001324468085106383,
475
- "loss": 0.2408,
476
- "step": 255
477
  },
478
  {
479
- "epoch": 1.38,
480
- "grad_norm": 0.8529615998268127,
481
- "learning_rate": 0.00013111702127659576,
482
- "loss": 0.2935,
483
- "step": 260
484
  },
485
  {
486
- "epoch": 1.41,
487
- "grad_norm": 1.3896653652191162,
488
- "learning_rate": 0.00012978723404255318,
489
- "loss": 0.2547,
490
- "step": 265
491
  },
492
  {
493
- "epoch": 1.44,
494
- "grad_norm": 0.30819597840309143,
495
- "learning_rate": 0.00012845744680851063,
496
- "loss": 0.3372,
497
- "step": 270
498
  },
499
  {
500
- "epoch": 1.46,
501
- "grad_norm": 2.0342652797698975,
502
- "learning_rate": 0.00012712765957446809,
503
- "loss": 0.2776,
504
- "step": 275
 
 
 
 
505
  },
506
  {
507
- "epoch": 1.46,
508
- "eval_loss": 0.3365646004676819,
509
- "eval_na_accuracy": 0.5945945978164673,
510
- "eval_ordinal_accuracy": 0.517110288143158,
511
- "eval_ordinal_mae": 0.672748863697052,
512
- "eval_runtime": 11.4536,
513
- "eval_samples_per_second": 26.193,
514
- "eval_steps_per_second": 3.318,
515
- "step": 275
516
  },
517
  {
518
- "epoch": 1.46,
519
- "step": 275,
520
- "total_flos": 3.403570199991091e+17,
521
- "train_loss": 0.32911812565543436,
522
- "train_runtime": 761.2747,
523
- "train_samples_per_second": 15.763,
524
- "train_steps_per_second": 0.988
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
525
  }
526
  ],
527
- "logging_steps": 5,
528
- "max_steps": 752,
529
  "num_input_tokens_seen": 0,
530
  "num_train_epochs": 4,
531
- "save_steps": 25,
532
- "total_flos": 3.403570199991091e+17,
533
  "train_batch_size": 16,
534
  "trial_name": null,
535
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.26356959342956543,
3
+ "best_model_checkpoint": "./ryan_model3272024/checkpoint-1000",
4
+ "epoch": 0.6496519721577726,
5
+ "eval_steps": 100,
6
+ "global_step": 1400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.01,
13
+ "grad_norm": 0.38699468970298767,
14
+ "learning_rate": 0.0001994199535962877,
15
+ "loss": 0.4038,
16
+ "step": 25
17
  },
18
  {
19
+ "epoch": 0.02,
20
+ "grad_norm": 0.6787680387496948,
21
+ "learning_rate": 0.00019883990719257543,
22
+ "loss": 0.4003,
23
+ "step": 50
24
  },
25
  {
26
+ "epoch": 0.03,
27
+ "grad_norm": 0.5743306279182434,
28
+ "learning_rate": 0.00019825986078886312,
29
+ "loss": 0.3591,
30
+ "step": 75
31
  },
32
  {
33
+ "epoch": 0.05,
34
+ "grad_norm": 0.41705068945884705,
35
+ "learning_rate": 0.00019767981438515082,
36
+ "loss": 0.3524,
37
+ "step": 100
38
  },
39
  {
40
+ "epoch": 0.05,
41
+ "eval_loss": 0.339992493391037,
42
+ "eval_na_accuracy": 0.7586872577667236,
43
+ "eval_ordinal_accuracy": 0.38746026158332825,
44
+ "eval_ordinal_mae": 0.8904515504837036,
45
+ "eval_runtime": 335.205,
46
+ "eval_samples_per_second": 11.87,
47
+ "eval_steps_per_second": 1.486,
48
+ "step": 100
49
  },
50
  {
51
+ "epoch": 0.06,
52
+ "grad_norm": 0.36200761795043945,
53
+ "learning_rate": 0.0001970997679814385,
54
+ "loss": 0.3071,
55
+ "step": 125
 
 
 
 
56
  },
57
  {
58
+ "epoch": 0.07,
59
+ "grad_norm": 0.24589791893959045,
60
+ "learning_rate": 0.00019651972157772623,
61
+ "loss": 0.3475,
62
+ "step": 150
63
  },
64
  {
65
+ "epoch": 0.08,
66
+ "grad_norm": 0.6089735627174377,
67
+ "learning_rate": 0.00019593967517401393,
68
+ "loss": 0.3072,
69
+ "step": 175
70
  },
71
  {
72
+ "epoch": 0.09,
73
+ "grad_norm": 0.5671761631965637,
74
+ "learning_rate": 0.00019535962877030162,
75
+ "loss": 0.2683,
76
+ "step": 200
77
  },
78
  {
79
+ "epoch": 0.09,
80
+ "eval_loss": 0.36712726950645447,
81
+ "eval_na_accuracy": 0.623552143573761,
82
+ "eval_ordinal_accuracy": 0.48916497826576233,
83
+ "eval_ordinal_mae": 0.7306416630744934,
84
+ "eval_runtime": 155.9343,
85
+ "eval_samples_per_second": 25.517,
86
+ "eval_steps_per_second": 3.194,
87
+ "step": 200
88
  },
89
  {
90
+ "epoch": 0.1,
91
+ "grad_norm": 1.2764167785644531,
92
+ "learning_rate": 0.00019477958236658932,
93
+ "loss": 0.2953,
94
+ "step": 225
95
  },
96
  {
97
+ "epoch": 0.12,
98
+ "grad_norm": 1.9076497554779053,
99
+ "learning_rate": 0.00019419953596287704,
100
+ "loss": 0.3382,
101
+ "step": 250
 
 
 
 
102
  },
103
  {
104
+ "epoch": 0.13,
105
+ "grad_norm": 0.2747127115726471,
106
+ "learning_rate": 0.00019361948955916474,
107
+ "loss": 0.2752,
108
+ "step": 275
109
  },
110
  {
111
+ "epoch": 0.14,
112
+ "grad_norm": 0.9448749423027039,
113
+ "learning_rate": 0.00019303944315545243,
114
+ "loss": 0.3314,
115
+ "step": 300
116
  },
117
  {
118
+ "epoch": 0.14,
119
+ "eval_loss": 0.3450469672679901,
120
+ "eval_na_accuracy": 0.6969112157821655,
121
+ "eval_ordinal_accuracy": 0.4013291001319885,
122
+ "eval_ordinal_mae": 0.8077224493026733,
123
+ "eval_runtime": 156.2328,
124
+ "eval_samples_per_second": 25.468,
125
+ "eval_steps_per_second": 3.188,
126
+ "step": 300
127
  },
128
  {
129
+ "epoch": 0.15,
130
+ "grad_norm": 0.2589721083641052,
131
+ "learning_rate": 0.00019245939675174015,
132
+ "loss": 0.3486,
133
+ "step": 325
134
  },
135
  {
136
+ "epoch": 0.16,
137
+ "grad_norm": 0.44286003708839417,
138
+ "learning_rate": 0.00019187935034802785,
139
+ "loss": 0.3386,
140
+ "step": 350
141
  },
142
  {
143
+ "epoch": 0.17,
144
+ "grad_norm": 0.3215602934360504,
145
+ "learning_rate": 0.00019129930394431554,
146
+ "loss": 0.3056,
147
+ "step": 375
 
 
 
 
148
  },
149
  {
150
+ "epoch": 0.19,
151
+ "grad_norm": 0.9510051012039185,
152
+ "learning_rate": 0.00019071925754060324,
153
+ "loss": 0.2747,
154
+ "step": 400
155
  },
156
  {
157
+ "epoch": 0.19,
158
+ "eval_loss": 0.28132036328315735,
159
+ "eval_na_accuracy": 0.7895752787590027,
160
+ "eval_ordinal_accuracy": 0.5423288345336914,
161
+ "eval_ordinal_mae": 0.6105712056159973,
162
+ "eval_runtime": 155.1965,
163
+ "eval_samples_per_second": 25.638,
164
+ "eval_steps_per_second": 3.209,
165
+ "step": 400
166
  },
167
  {
168
+ "epoch": 0.2,
169
+ "grad_norm": 0.5417093634605408,
170
+ "learning_rate": 0.00019013921113689096,
171
+ "loss": 0.2522,
172
+ "step": 425
173
  },
174
  {
175
+ "epoch": 0.21,
176
+ "grad_norm": 1.405881643295288,
177
+ "learning_rate": 0.00018955916473317868,
178
+ "loss": 0.3589,
179
+ "step": 450
180
  },
181
  {
182
+ "epoch": 0.22,
183
+ "grad_norm": 0.8319898843765259,
184
+ "learning_rate": 0.00018897911832946638,
185
+ "loss": 0.2991,
186
+ "step": 475
187
  },
188
  {
189
+ "epoch": 0.23,
190
+ "grad_norm": 1.9455621242523193,
191
+ "learning_rate": 0.00018839907192575407,
192
+ "loss": 0.3247,
193
+ "step": 500
 
 
 
 
194
  },
195
  {
196
+ "epoch": 0.23,
197
+ "eval_loss": 0.3143959045410156,
198
+ "eval_na_accuracy": 0.7104247212409973,
199
+ "eval_ordinal_accuracy": 0.4524703919887543,
200
+ "eval_ordinal_mae": 0.7256373763084412,
201
+ "eval_runtime": 157.1141,
202
+ "eval_samples_per_second": 25.326,
203
+ "eval_steps_per_second": 3.17,
204
+ "step": 500
205
  },
206
  {
207
+ "epoch": 0.24,
208
+ "grad_norm": 0.6339251399040222,
209
+ "learning_rate": 0.00018781902552204177,
210
+ "loss": 0.303,
211
+ "step": 525
212
  },
213
  {
214
+ "epoch": 0.26,
215
+ "grad_norm": 0.3713740408420563,
216
+ "learning_rate": 0.0001872389791183295,
217
+ "loss": 0.3035,
218
+ "step": 550
219
  },
220
  {
221
+ "epoch": 0.27,
222
+ "grad_norm": 0.7050974369049072,
223
+ "learning_rate": 0.00018665893271461718,
224
+ "loss": 0.2609,
225
+ "step": 575
226
  },
227
  {
228
+ "epoch": 0.28,
229
+ "grad_norm": 0.791477620601654,
230
+ "learning_rate": 0.00018607888631090488,
231
+ "loss": 0.3612,
232
+ "step": 600
233
  },
234
  {
235
+ "epoch": 0.28,
236
+ "eval_loss": 0.3074879050254822,
237
+ "eval_na_accuracy": 0.7586872577667236,
238
+ "eval_ordinal_accuracy": 0.4984108507633209,
239
+ "eval_ordinal_mae": 0.6415887475013733,
240
+ "eval_runtime": 154.2538,
241
+ "eval_samples_per_second": 25.795,
242
+ "eval_steps_per_second": 3.228,
243
+ "step": 600
244
  },
245
  {
246
+ "epoch": 0.29,
247
+ "grad_norm": 0.39196524024009705,
248
+ "learning_rate": 0.0001854988399071926,
249
+ "loss": 0.31,
250
+ "step": 625
251
  },
252
  {
253
+ "epoch": 0.3,
254
+ "grad_norm": 1.0753191709518433,
255
+ "learning_rate": 0.0001849187935034803,
256
+ "loss": 0.2722,
257
+ "step": 650
258
  },
259
  {
260
+ "epoch": 0.31,
261
+ "grad_norm": 0.8922611474990845,
262
+ "learning_rate": 0.000184338747099768,
263
+ "loss": 0.3132,
264
+ "step": 675
265
  },
266
  {
267
+ "epoch": 0.32,
268
+ "grad_norm": 0.6866246461868286,
269
+ "learning_rate": 0.0001837587006960557,
270
+ "loss": 0.3031,
271
+ "step": 700
272
  },
273
  {
274
+ "epoch": 0.32,
275
+ "eval_loss": 0.2784635126590729,
276
+ "eval_na_accuracy": 0.7895752787590027,
277
+ "eval_ordinal_accuracy": 0.5556197762489319,
278
+ "eval_ordinal_mae": 0.5720168352127075,
279
+ "eval_runtime": 154.421,
280
+ "eval_samples_per_second": 25.767,
281
+ "eval_steps_per_second": 3.225,
282
+ "step": 700
283
  },
284
  {
285
+ "epoch": 0.34,
286
+ "grad_norm": 1.713051676750183,
287
+ "learning_rate": 0.0001831786542923434,
288
+ "loss": 0.337,
289
+ "step": 725
 
 
 
 
290
  },
291
  {
292
+ "epoch": 0.35,
293
+ "grad_norm": 1.0872548818588257,
294
+ "learning_rate": 0.0001825986078886311,
295
+ "loss": 0.2918,
296
+ "step": 750
297
  },
298
  {
299
+ "epoch": 0.36,
300
+ "grad_norm": 1.5099256038665771,
301
+ "learning_rate": 0.0001820185614849188,
302
+ "loss": 0.2509,
303
+ "step": 775
304
  },
305
  {
306
+ "epoch": 0.37,
307
+ "grad_norm": 0.5774210691452026,
308
+ "learning_rate": 0.0001814385150812065,
309
+ "loss": 0.2866,
310
+ "step": 800
311
  },
312
  {
313
+ "epoch": 0.37,
314
+ "eval_loss": 0.28780511021614075,
315
+ "eval_na_accuracy": 0.7335907220840454,
316
+ "eval_ordinal_accuracy": 0.5775787234306335,
317
+ "eval_ordinal_mae": 0.5347856879234314,
318
+ "eval_runtime": 154.6062,
319
+ "eval_samples_per_second": 25.736,
320
+ "eval_steps_per_second": 3.221,
321
+ "step": 800
322
  },
323
  {
324
+ "epoch": 0.38,
325
+ "grad_norm": 0.33059367537498474,
326
+ "learning_rate": 0.00018085846867749422,
327
+ "loss": 0.2626,
328
+ "step": 825
329
  },
330
  {
331
+ "epoch": 0.39,
332
+ "grad_norm": 1.45087730884552,
333
+ "learning_rate": 0.0001802784222737819,
334
+ "loss": 0.3485,
335
+ "step": 850
 
 
 
 
336
  },
337
  {
338
+ "epoch": 0.41,
339
+ "grad_norm": 1.195901870727539,
340
+ "learning_rate": 0.0001796983758700696,
341
+ "loss": 0.3007,
342
+ "step": 875
343
  },
344
  {
345
+ "epoch": 0.42,
346
+ "grad_norm": 0.26779890060424805,
347
+ "learning_rate": 0.00017911832946635733,
348
+ "loss": 0.2927,
349
+ "step": 900
350
  },
351
  {
352
+ "epoch": 0.42,
353
+ "eval_loss": 0.2688673734664917,
354
+ "eval_na_accuracy": 0.7972972989082336,
355
+ "eval_ordinal_accuracy": 0.5573533773422241,
356
+ "eval_ordinal_mae": 0.5855077505111694,
357
+ "eval_runtime": 154.5178,
358
+ "eval_samples_per_second": 25.751,
359
+ "eval_steps_per_second": 3.223,
360
+ "step": 900
361
  },
362
  {
363
+ "epoch": 0.43,
364
+ "grad_norm": 0.5635965466499329,
365
+ "learning_rate": 0.00017853828306264502,
366
+ "loss": 0.269,
367
+ "step": 925
368
  },
369
  {
370
+ "epoch": 0.44,
371
+ "grad_norm": 2.8135786056518555,
372
+ "learning_rate": 0.00017795823665893272,
373
+ "loss": 0.2677,
374
+ "step": 950
375
  },
376
  {
377
+ "epoch": 0.45,
378
+ "grad_norm": 0.49396631121635437,
379
+ "learning_rate": 0.0001773781902552204,
380
+ "loss": 0.3069,
381
+ "step": 975
 
 
 
 
382
  },
383
  {
384
+ "epoch": 0.46,
385
+ "grad_norm": 1.3267723321914673,
386
+ "learning_rate": 0.00017679814385150814,
387
+ "loss": 0.3003,
388
+ "step": 1000
389
  },
390
  {
391
+ "epoch": 0.46,
392
+ "eval_loss": 0.26356959342956543,
393
+ "eval_na_accuracy": 0.7915058135986328,
394
+ "eval_ordinal_accuracy": 0.581045925617218,
395
+ "eval_ordinal_mae": 0.5543876886367798,
396
+ "eval_runtime": 157.946,
397
+ "eval_samples_per_second": 25.192,
398
+ "eval_steps_per_second": 3.153,
399
+ "step": 1000
400
  },
401
  {
402
+ "epoch": 0.48,
403
+ "grad_norm": 0.9938157200813293,
404
+ "learning_rate": 0.00017621809744779583,
405
+ "loss": 0.2521,
406
+ "step": 1025
407
  },
408
  {
409
+ "epoch": 0.49,
410
+ "grad_norm": 0.45715010166168213,
411
+ "learning_rate": 0.00017563805104408353,
412
+ "loss": 0.2926,
413
+ "step": 1050
414
  },
415
  {
416
+ "epoch": 0.5,
417
+ "grad_norm": 2.9666409492492676,
418
+ "learning_rate": 0.00017505800464037122,
419
+ "loss": 0.2581,
420
+ "step": 1075
421
  },
422
  {
423
+ "epoch": 0.51,
424
+ "grad_norm": 2.5301055908203125,
425
+ "learning_rate": 0.00017447795823665894,
426
+ "loss": 0.2522,
427
+ "step": 1100
 
 
 
 
428
  },
429
  {
430
+ "epoch": 0.51,
431
+ "eval_loss": 0.3009192943572998,
432
+ "eval_na_accuracy": 0.8571428656578064,
433
+ "eval_ordinal_accuracy": 0.54435133934021,
434
+ "eval_ordinal_mae": 0.5650931596755981,
435
+ "eval_runtime": 159.1216,
436
+ "eval_samples_per_second": 25.006,
437
+ "eval_steps_per_second": 3.13,
438
+ "step": 1100
439
  },
440
  {
441
+ "epoch": 0.52,
442
+ "grad_norm": 0.8192782998085022,
443
+ "learning_rate": 0.00017389791183294664,
444
+ "loss": 0.3584,
445
+ "step": 1125
446
  },
447
  {
448
+ "epoch": 0.53,
449
+ "grad_norm": 2.0657265186309814,
450
+ "learning_rate": 0.00017331786542923433,
451
+ "loss": 0.2547,
452
+ "step": 1150
453
  },
454
  {
455
+ "epoch": 0.55,
456
+ "grad_norm": 0.5887840390205383,
457
+ "learning_rate": 0.00017273781902552203,
458
+ "loss": 0.2335,
459
+ "step": 1175
460
  },
461
  {
462
+ "epoch": 0.56,
463
+ "grad_norm": 0.8169906735420227,
464
+ "learning_rate": 0.00017215777262180975,
465
+ "loss": 0.262,
466
+ "step": 1200
467
  },
468
  {
469
+ "epoch": 0.56,
470
+ "eval_loss": 0.279022216796875,
471
+ "eval_na_accuracy": 0.8301158547401428,
472
+ "eval_ordinal_accuracy": 0.5801791548728943,
473
+ "eval_ordinal_mae": 0.5203233361244202,
474
+ "eval_runtime": 159.9167,
475
+ "eval_samples_per_second": 24.882,
476
+ "eval_steps_per_second": 3.114,
477
+ "step": 1200
478
  },
479
  {
480
+ "epoch": 0.57,
481
+ "grad_norm": 2.5461835861206055,
482
+ "learning_rate": 0.00017157772621809744,
483
+ "loss": 0.2387,
484
+ "step": 1225
485
  },
486
  {
487
+ "epoch": 0.58,
488
+ "grad_norm": 0.7304142117500305,
489
+ "learning_rate": 0.00017099767981438517,
490
+ "loss": 0.2366,
491
+ "step": 1250
492
  },
493
  {
494
+ "epoch": 0.59,
495
+ "grad_norm": 1.3845186233520508,
496
+ "learning_rate": 0.00017041763341067286,
497
+ "loss": 0.2309,
498
+ "step": 1275
499
  },
500
  {
501
+ "epoch": 0.6,
502
+ "grad_norm": 0.5202885270118713,
503
+ "learning_rate": 0.00016983758700696058,
504
+ "loss": 0.2139,
505
+ "step": 1300
506
  },
507
  {
508
+ "epoch": 0.6,
509
+ "eval_loss": 0.2653418481349945,
510
+ "eval_na_accuracy": 0.7509652376174927,
511
+ "eval_ordinal_accuracy": 0.5492632389068604,
512
+ "eval_ordinal_mae": 0.562603771686554,
513
+ "eval_runtime": 158.9921,
514
+ "eval_samples_per_second": 25.026,
515
+ "eval_steps_per_second": 3.132,
516
+ "step": 1300
517
  },
518
  {
519
+ "epoch": 0.61,
520
+ "grad_norm": 1.6506483554840088,
521
+ "learning_rate": 0.00016925754060324828,
522
+ "loss": 0.3071,
523
+ "step": 1325
 
 
 
 
524
  },
525
  {
526
+ "epoch": 0.63,
527
+ "grad_norm": 0.5789369940757751,
528
+ "learning_rate": 0.00016867749419953597,
529
+ "loss": 0.2689,
530
+ "step": 1350
531
+ },
532
+ {
533
+ "epoch": 0.64,
534
+ "grad_norm": 0.5665389895439148,
535
+ "learning_rate": 0.00016809744779582367,
536
+ "loss": 0.2598,
537
+ "step": 1375
538
+ },
539
+ {
540
+ "epoch": 0.65,
541
+ "grad_norm": 0.6937847137451172,
542
+ "learning_rate": 0.0001675174013921114,
543
+ "loss": 0.2655,
544
+ "step": 1400
545
+ },
546
+ {
547
+ "epoch": 0.65,
548
+ "eval_loss": 0.2760397493839264,
549
+ "eval_na_accuracy": 0.7123551964759827,
550
+ "eval_ordinal_accuracy": 0.5426177382469177,
551
+ "eval_ordinal_mae": 0.6106911897659302,
552
+ "eval_runtime": 160.1635,
553
+ "eval_samples_per_second": 24.843,
554
+ "eval_steps_per_second": 3.109,
555
+ "step": 1400
556
+ },
557
+ {
558
+ "epoch": 0.65,
559
+ "step": 1400,
560
+ "total_flos": 1.735882797809664e+18,
561
+ "train_loss": 0.29669314997536794,
562
+ "train_runtime": 4786.838,
563
+ "train_samples_per_second": 28.807,
564
+ "train_steps_per_second": 1.801
565
  }
566
  ],
567
+ "logging_steps": 25,
568
+ "max_steps": 8620,
569
  "num_input_tokens_seen": 0,
570
  "num_train_epochs": 4,
571
+ "save_steps": 100,
572
+ "total_flos": 1.735882797809664e+18,
573
  "train_batch_size": 16,
574
  "trial_name": null,
575
  "trial_params": null