rshrott commited on
Commit
d0c239a
1 Parent(s): 6eed9bf

🍻 cheers

Browse files
README.md CHANGED
@@ -2,6 +2,7 @@
2
  license: apache-2.0
3
  base_model: google/vit-base-patch16-224-in21k
4
  tags:
 
5
  - generated_from_trainer
6
  model-index:
7
  - name: ryan_model314_3
@@ -13,11 +14,11 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  # ryan_model314_3
15
 
16
- This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 0.2783
19
- - Na Accuracy: 0.9389
20
- - Ordinal Mae: 0.8154
21
 
22
  ## Model description
23
 
 
2
  license: apache-2.0
3
  base_model: google/vit-base-patch16-224-in21k
4
  tags:
5
+ - image-classification
6
  - generated_from_trainer
7
  model-index:
8
  - name: ryan_model314_3
 
14
 
15
  # ryan_model314_3
16
 
17
+ This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the beans dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 0.2652
20
+ - Na Accuracy: 0.9372
21
+ - Ordinal Mae: 1.0212
22
 
23
  ## Model description
24
 
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 2.4,
3
- "eval_loss": 0.2546972334384918,
4
- "eval_na_accuracy": 0.95,
5
- "eval_ordinal_mae": 1.2089859222915764,
6
- "eval_runtime": 8.0073,
7
- "eval_samples_per_second": 24.977,
8
- "eval_steps_per_second": 3.122,
9
- "train_loss": 0.25249010701974234,
10
- "train_runtime": 688.8214,
11
- "train_samples_per_second": 11.614,
12
- "train_steps_per_second": 0.726
13
  }
 
1
  {
2
+ "epoch": 0.51,
3
+ "eval_loss": 0.2651739716529846,
4
+ "eval_na_accuracy": 0.9371701432520734,
5
+ "eval_ordinal_mae": 1.021151511080242,
6
+ "eval_runtime": 162.3376,
7
+ "eval_samples_per_second": 24.511,
8
+ "eval_steps_per_second": 3.068,
9
+ "train_loss": 0.29898834055120294,
10
+ "train_runtime": 4048.1416,
11
+ "train_samples_per_second": 34.064,
12
+ "train_steps_per_second": 2.129
13
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 2.4,
3
- "eval_loss": 0.2546972334384918,
4
- "eval_na_accuracy": 0.95,
5
- "eval_ordinal_mae": 1.2089859222915764,
6
- "eval_runtime": 8.0073,
7
- "eval_samples_per_second": 24.977,
8
- "eval_steps_per_second": 3.122
9
  }
 
1
  {
2
+ "epoch": 0.51,
3
+ "eval_loss": 0.2651739716529846,
4
+ "eval_na_accuracy": 0.9371701432520734,
5
+ "eval_ordinal_mae": 1.021151511080242,
6
+ "eval_runtime": 162.3376,
7
+ "eval_samples_per_second": 24.511,
8
+ "eval_steps_per_second": 3.068
9
  }
runs/Mar26_23-38-05_ryanserver/events.out.tfevents.1711514950.ryanserver.18298.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fdf06391994cdf0c1812f9860c0e3be96047ea32feaa533d718d042da3d933e
3
+ size 469
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 2.4,
3
- "train_loss": 0.25249010701974234,
4
- "train_runtime": 688.8214,
5
- "train_samples_per_second": 11.614,
6
- "train_steps_per_second": 0.726
7
  }
 
1
  {
2
+ "epoch": 0.51,
3
+ "train_loss": 0.29898834055120294,
4
+ "train_runtime": 4048.1416,
5
+ "train_samples_per_second": 34.064,
6
+ "train_steps_per_second": 2.129
7
  }
trainer_state.json CHANGED
@@ -1,359 +1,216 @@
1
  {
2
- "best_metric": 0.2546972334384918,
3
- "best_model_checkpoint": "./ryan_model314_3/checkpoint-250",
4
- "epoch": 2.4,
5
- "eval_steps": 25,
6
- "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.08,
13
- "grad_norm": 0.8899219036102295,
14
- "learning_rate": 9.8e-05,
15
- "loss": 0.6027,
16
- "step": 10
17
- },
18
- {
19
- "epoch": 0.16,
20
- "grad_norm": 0.9669970273971558,
21
- "learning_rate": 9.6e-05,
22
- "loss": 0.4505,
23
- "step": 20
24
- },
25
- {
26
- "epoch": 0.2,
27
- "eval_loss": 0.4262112081050873,
28
- "eval_na_accuracy": 0.9,
29
- "eval_ordinal_mae": 1.0091899644308249,
30
- "eval_runtime": 24.8283,
31
- "eval_samples_per_second": 8.055,
32
- "eval_steps_per_second": 1.007,
33
- "step": 25
34
- },
35
- {
36
- "epoch": 0.24,
37
- "grad_norm": 1.0023255348205566,
38
- "learning_rate": 9.4e-05,
39
- "loss": 0.4166,
40
- "step": 30
41
- },
42
- {
43
- "epoch": 0.32,
44
- "grad_norm": 0.7017713189125061,
45
- "learning_rate": 9.200000000000001e-05,
46
- "loss": 0.3632,
47
- "step": 40
48
- },
49
- {
50
- "epoch": 0.4,
51
- "grad_norm": 1.028002142906189,
52
- "learning_rate": 9e-05,
53
- "loss": 0.3847,
54
- "step": 50
55
- },
56
- {
57
- "epoch": 0.4,
58
- "eval_loss": 0.367563396692276,
59
- "eval_na_accuracy": 0.935,
60
- "eval_ordinal_mae": 1.3719091470156184,
61
- "eval_runtime": 7.6715,
62
- "eval_samples_per_second": 26.071,
63
- "eval_steps_per_second": 3.259,
64
- "step": 50
65
- },
66
- {
67
- "epoch": 0.48,
68
- "grad_norm": 0.9666626453399658,
69
- "learning_rate": 8.800000000000001e-05,
70
- "loss": 0.3503,
71
- "step": 60
72
- },
73
- {
74
- "epoch": 0.56,
75
- "grad_norm": 1.6747545003890991,
76
- "learning_rate": 8.6e-05,
77
- "loss": 0.3061,
78
- "step": 70
79
- },
80
- {
81
- "epoch": 0.6,
82
- "eval_loss": 0.32615897059440613,
83
- "eval_na_accuracy": 0.945,
84
- "eval_ordinal_mae": 0.7485670199170652,
85
- "eval_runtime": 7.5813,
86
- "eval_samples_per_second": 26.381,
87
- "eval_steps_per_second": 3.298,
88
- "step": 75
89
- },
90
- {
91
- "epoch": 0.64,
92
- "grad_norm": 0.9667518138885498,
93
- "learning_rate": 8.4e-05,
94
- "loss": 0.3304,
95
- "step": 80
96
- },
97
- {
98
- "epoch": 0.72,
99
- "grad_norm": 1.2316919565200806,
100
- "learning_rate": 8.2e-05,
101
- "loss": 0.3149,
102
- "step": 90
103
- },
104
- {
105
- "epoch": 0.8,
106
- "grad_norm": 0.8591766953468323,
107
- "learning_rate": 8e-05,
108
- "loss": 0.2744,
109
  "step": 100
110
  },
111
  {
112
- "epoch": 0.8,
113
- "eval_loss": 0.3524325489997864,
114
- "eval_na_accuracy": 0.905,
115
- "eval_ordinal_mae": 1.1407896330054461,
116
- "eval_runtime": 7.8965,
117
- "eval_samples_per_second": 25.328,
118
- "eval_steps_per_second": 3.166,
119
  "step": 100
120
  },
121
  {
122
- "epoch": 0.88,
123
- "grad_norm": 0.9472360014915466,
124
- "learning_rate": 7.800000000000001e-05,
125
- "loss": 0.3389,
126
- "step": 110
127
- },
128
- {
129
- "epoch": 0.96,
130
- "grad_norm": 0.5021968483924866,
131
- "learning_rate": 7.6e-05,
132
- "loss": 0.2384,
133
- "step": 120
134
- },
135
- {
136
- "epoch": 1.0,
137
- "eval_loss": 0.36111611127853394,
138
- "eval_na_accuracy": 0.93,
139
- "eval_ordinal_mae": 0.6746639459123592,
140
- "eval_runtime": 7.827,
141
- "eval_samples_per_second": 25.553,
142
- "eval_steps_per_second": 3.194,
143
- "step": 125
144
- },
145
- {
146
- "epoch": 1.04,
147
- "grad_norm": 0.4840359687805176,
148
- "learning_rate": 7.4e-05,
149
- "loss": 0.1954,
150
- "step": 130
151
- },
152
- {
153
- "epoch": 1.12,
154
- "grad_norm": 1.4421360492706299,
155
- "learning_rate": 7.2e-05,
156
- "loss": 0.223,
157
- "step": 140
158
- },
159
- {
160
- "epoch": 1.2,
161
- "grad_norm": 0.36317479610443115,
162
- "learning_rate": 7e-05,
163
- "loss": 0.2021,
164
- "step": 150
165
- },
166
- {
167
- "epoch": 1.2,
168
- "eval_loss": 0.31051769852638245,
169
- "eval_na_accuracy": 0.95,
170
- "eval_ordinal_mae": 1.0440793198453366,
171
- "eval_runtime": 8.093,
172
- "eval_samples_per_second": 24.713,
173
- "eval_steps_per_second": 3.089,
174
- "step": 150
175
- },
176
- {
177
- "epoch": 1.28,
178
- "grad_norm": 0.8046426773071289,
179
- "learning_rate": 6.800000000000001e-05,
180
- "loss": 0.21,
181
- "step": 160
182
- },
183
- {
184
- "epoch": 1.36,
185
- "grad_norm": 0.8122718334197998,
186
- "learning_rate": 6.6e-05,
187
- "loss": 0.2234,
188
- "step": 170
189
- },
190
- {
191
- "epoch": 1.4,
192
- "eval_loss": 0.27382710576057434,
193
- "eval_na_accuracy": 0.955,
194
- "eval_ordinal_mae": 1.4168444083902865,
195
- "eval_runtime": 7.6337,
196
- "eval_samples_per_second": 26.2,
197
- "eval_steps_per_second": 3.275,
198
- "step": 175
199
- },
200
- {
201
- "epoch": 1.44,
202
- "grad_norm": 1.5608426332473755,
203
- "learning_rate": 6.400000000000001e-05,
204
- "loss": 0.2306,
205
- "step": 180
206
- },
207
- {
208
- "epoch": 1.52,
209
- "grad_norm": 0.9130323529243469,
210
- "learning_rate": 6.2e-05,
211
- "loss": 0.1506,
212
- "step": 190
213
- },
214
- {
215
- "epoch": 1.6,
216
- "grad_norm": 0.6261627674102783,
217
- "learning_rate": 6e-05,
218
- "loss": 0.187,
219
  "step": 200
220
  },
221
  {
222
- "epoch": 1.6,
223
- "eval_loss": 0.26879259943962097,
224
- "eval_na_accuracy": 0.955,
225
- "eval_ordinal_mae": 1.365302862794827,
226
- "eval_runtime": 7.5957,
227
- "eval_samples_per_second": 26.331,
228
- "eval_steps_per_second": 3.291,
229
  "step": 200
230
  },
231
  {
232
- "epoch": 1.68,
233
- "grad_norm": 0.7222486138343811,
234
- "learning_rate": 5.8e-05,
235
- "loss": 0.2111,
236
- "step": 210
237
- },
238
- {
239
- "epoch": 1.76,
240
- "grad_norm": 0.7348190546035767,
241
- "learning_rate": 5.6000000000000006e-05,
242
- "loss": 0.2008,
243
- "step": 220
244
- },
245
- {
246
- "epoch": 1.8,
247
- "eval_loss": 0.2668895423412323,
248
- "eval_na_accuracy": 0.96,
249
- "eval_ordinal_mae": 0.8935630971378721,
250
- "eval_runtime": 7.6573,
251
- "eval_samples_per_second": 26.119,
252
- "eval_steps_per_second": 3.265,
253
- "step": 225
254
- },
255
- {
256
- "epoch": 1.84,
257
- "grad_norm": 1.3947832584381104,
258
- "learning_rate": 5.4000000000000005e-05,
259
- "loss": 0.2176,
260
- "step": 230
261
- },
262
- {
263
- "epoch": 1.92,
264
- "grad_norm": 1.1390098333358765,
265
- "learning_rate": 5.2000000000000004e-05,
266
- "loss": 0.2037,
267
- "step": 240
268
- },
269
- {
270
- "epoch": 2.0,
271
- "grad_norm": 0.7233979105949402,
272
- "learning_rate": 5e-05,
273
- "loss": 0.1541,
274
- "step": 250
275
  },
276
  {
277
- "epoch": 2.0,
278
- "eval_loss": 0.2546972334384918,
279
- "eval_na_accuracy": 0.95,
280
- "eval_ordinal_mae": 1.2089859222915764,
281
- "eval_runtime": 7.685,
282
- "eval_samples_per_second": 26.025,
283
- "eval_steps_per_second": 3.253,
284
- "step": 250
285
  },
286
  {
287
- "epoch": 2.08,
288
- "grad_norm": 0.5704214572906494,
289
- "learning_rate": 4.8e-05,
290
- "loss": 0.1104,
291
- "step": 260
292
  },
293
  {
294
- "epoch": 2.16,
295
- "grad_norm": 0.648725688457489,
296
- "learning_rate": 4.600000000000001e-05,
297
- "loss": 0.1201,
298
- "step": 270
 
 
 
299
  },
300
  {
301
- "epoch": 2.2,
302
- "eval_loss": 0.2725123465061188,
303
- "eval_na_accuracy": 0.95,
304
- "eval_ordinal_mae": 0.7955228271403142,
305
- "eval_runtime": 7.4815,
306
- "eval_samples_per_second": 26.733,
307
- "eval_steps_per_second": 3.342,
308
- "step": 275
309
  },
310
  {
311
- "epoch": 2.24,
312
- "grad_norm": 0.5427641272544861,
313
- "learning_rate": 4.4000000000000006e-05,
314
- "loss": 0.096,
315
- "step": 280
 
 
 
316
  },
317
  {
318
- "epoch": 2.32,
319
- "grad_norm": 1.687751293182373,
320
- "learning_rate": 4.2e-05,
321
- "loss": 0.155,
322
- "step": 290
323
  },
324
  {
325
- "epoch": 2.4,
326
- "grad_norm": 0.6842709183692932,
327
- "learning_rate": 4e-05,
328
- "loss": 0.113,
329
- "step": 300
 
 
 
330
  },
331
  {
332
- "epoch": 2.4,
333
- "eval_loss": 0.2817830443382263,
334
- "eval_na_accuracy": 0.955,
335
- "eval_ordinal_mae": 1.2378182741668735,
336
- "eval_runtime": 8.0351,
337
- "eval_samples_per_second": 24.891,
338
- "eval_steps_per_second": 3.111,
339
- "step": 300
340
  },
341
  {
342
- "epoch": 2.4,
343
- "step": 300,
344
- "total_flos": 3.71974885244928e+17,
345
- "train_loss": 0.25249010701974234,
346
- "train_runtime": 688.8214,
347
- "train_samples_per_second": 11.614,
348
- "train_steps_per_second": 0.726
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
349
  }
350
  ],
351
- "logging_steps": 10,
352
- "max_steps": 500,
353
  "num_input_tokens_seen": 0,
354
  "num_train_epochs": 4,
355
- "save_steps": 25,
356
- "total_flos": 3.71974885244928e+17,
357
  "train_batch_size": 16,
358
  "trial_name": null,
359
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.2651739716529846,
3
+ "best_model_checkpoint": "./ryan_model314_3/checkpoint-1000",
4
+ "epoch": 0.5104408352668214,
5
+ "eval_steps": 100,
6
+ "global_step": 1100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.05,
13
+ "grad_norm": 1.358393669128418,
14
+ "learning_rate": 0.00019767981438515082,
15
+ "loss": 0.3676,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  "step": 100
17
  },
18
  {
19
+ "epoch": 0.05,
20
+ "eval_loss": 0.34232085943222046,
21
+ "eval_na_accuracy": 0.9273686855993968,
22
+ "eval_ordinal_mae": 1.1293019706063574,
23
+ "eval_runtime": 381.3744,
24
+ "eval_samples_per_second": 10.433,
25
+ "eval_steps_per_second": 1.306,
26
  "step": 100
27
  },
28
  {
29
+ "epoch": 0.09,
30
+ "grad_norm": 0.9884235262870789,
31
+ "learning_rate": 0.00019535962877030162,
32
+ "loss": 0.3329,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  "step": 200
34
  },
35
  {
36
+ "epoch": 0.09,
37
+ "eval_loss": 0.31356191635131836,
38
+ "eval_na_accuracy": 0.9313897964312642,
39
+ "eval_ordinal_mae": 1.070580373245832,
40
+ "eval_runtime": 164.3074,
41
+ "eval_samples_per_second": 24.217,
42
+ "eval_steps_per_second": 3.031,
43
  "step": 200
44
  },
45
  {
46
+ "epoch": 0.14,
47
+ "grad_norm": 1.2422517538070679,
48
+ "learning_rate": 0.00019303944315545243,
49
+ "loss": 0.3134,
50
+ "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  },
52
  {
53
+ "epoch": 0.14,
54
+ "eval_loss": 0.3302006423473358,
55
+ "eval_na_accuracy": 0.9165619502387534,
56
+ "eval_ordinal_mae": 1.1219553900231423,
57
+ "eval_runtime": 164.3869,
58
+ "eval_samples_per_second": 24.205,
59
+ "eval_steps_per_second": 3.029,
60
+ "step": 300
61
  },
62
  {
63
+ "epoch": 0.19,
64
+ "grad_norm": 1.1815475225448608,
65
+ "learning_rate": 0.00019071925754060324,
66
+ "loss": 0.314,
67
+ "step": 400
68
  },
69
  {
70
+ "epoch": 0.19,
71
+ "eval_loss": 0.2992381155490875,
72
+ "eval_na_accuracy": 0.9256094496104549,
73
+ "eval_ordinal_mae": 0.820222080006495,
74
+ "eval_runtime": 165.1522,
75
+ "eval_samples_per_second": 24.093,
76
+ "eval_steps_per_second": 3.015,
77
+ "step": 400
78
  },
79
  {
80
+ "epoch": 0.23,
81
+ "grad_norm": 0.47366032004356384,
82
+ "learning_rate": 0.00018839907192575407,
83
+ "loss": 0.2965,
84
+ "step": 500
 
 
 
85
  },
86
  {
87
+ "epoch": 0.23,
88
+ "eval_loss": 0.319803923368454,
89
+ "eval_na_accuracy": 0.9248554913294798,
90
+ "eval_ordinal_mae": 1.2210362517446196,
91
+ "eval_runtime": 163.23,
92
+ "eval_samples_per_second": 24.377,
93
+ "eval_steps_per_second": 3.051,
94
+ "step": 500
95
  },
96
  {
97
+ "epoch": 0.28,
98
+ "grad_norm": 1.8707951307296753,
99
+ "learning_rate": 0.00018607888631090488,
100
+ "loss": 0.3068,
101
+ "step": 600
102
  },
103
  {
104
+ "epoch": 0.28,
105
+ "eval_loss": 0.26730772852897644,
106
+ "eval_na_accuracy": 0.9371701432520734,
107
+ "eval_ordinal_mae": 1.103637127990878,
108
+ "eval_runtime": 160.1368,
109
+ "eval_samples_per_second": 24.848,
110
+ "eval_steps_per_second": 3.11,
111
+ "step": 600
112
  },
113
  {
114
+ "epoch": 0.32,
115
+ "grad_norm": 1.1020443439483643,
116
+ "learning_rate": 0.0001837587006960557,
117
+ "loss": 0.2824,
118
+ "step": 700
 
 
 
119
  },
120
  {
121
+ "epoch": 0.32,
122
+ "eval_loss": 0.2921823561191559,
123
+ "eval_na_accuracy": 0.9371701432520734,
124
+ "eval_ordinal_mae": 1.4977284913306896,
125
+ "eval_runtime": 163.7628,
126
+ "eval_samples_per_second": 24.297,
127
+ "eval_steps_per_second": 3.041,
128
+ "step": 700
129
+ },
130
+ {
131
+ "epoch": 0.37,
132
+ "grad_norm": 1.2855117321014404,
133
+ "learning_rate": 0.0001814385150812065,
134
+ "loss": 0.2914,
135
+ "step": 800
136
+ },
137
+ {
138
+ "epoch": 0.37,
139
+ "eval_loss": 0.2798122763633728,
140
+ "eval_na_accuracy": 0.9384267403870319,
141
+ "eval_ordinal_mae": 0.7788859930601368,
142
+ "eval_runtime": 163.524,
143
+ "eval_samples_per_second": 24.333,
144
+ "eval_steps_per_second": 3.045,
145
+ "step": 800
146
+ },
147
+ {
148
+ "epoch": 0.42,
149
+ "grad_norm": 1.4634666442871094,
150
+ "learning_rate": 0.00017911832946635733,
151
+ "loss": 0.2968,
152
+ "step": 900
153
+ },
154
+ {
155
+ "epoch": 0.42,
156
+ "eval_loss": 0.27096793055534363,
157
+ "eval_na_accuracy": 0.9369188238250816,
158
+ "eval_ordinal_mae": 0.9694435305190251,
159
+ "eval_runtime": 163.2761,
160
+ "eval_samples_per_second": 24.37,
161
+ "eval_steps_per_second": 3.05,
162
+ "step": 900
163
+ },
164
+ {
165
+ "epoch": 0.46,
166
+ "grad_norm": 0.3547471761703491,
167
+ "learning_rate": 0.00017679814385150814,
168
+ "loss": 0.2433,
169
+ "step": 1000
170
+ },
171
+ {
172
+ "epoch": 0.46,
173
+ "eval_loss": 0.2651739716529846,
174
+ "eval_na_accuracy": 0.9371701432520734,
175
+ "eval_ordinal_mae": 1.021151511080242,
176
+ "eval_runtime": 165.2163,
177
+ "eval_samples_per_second": 24.084,
178
+ "eval_steps_per_second": 3.014,
179
+ "step": 1000
180
+ },
181
+ {
182
+ "epoch": 0.51,
183
+ "grad_norm": 0.6242479085922241,
184
+ "learning_rate": 0.00017447795823665894,
185
+ "loss": 0.2438,
186
+ "step": 1100
187
+ },
188
+ {
189
+ "epoch": 0.51,
190
+ "eval_loss": 0.2782880365848541,
191
+ "eval_na_accuracy": 0.9389293792410154,
192
+ "eval_ordinal_mae": 0.8154305260353155,
193
+ "eval_runtime": 161.9425,
194
+ "eval_samples_per_second": 24.57,
195
+ "eval_steps_per_second": 3.075,
196
+ "step": 1100
197
+ },
198
+ {
199
+ "epoch": 0.51,
200
+ "step": 1100,
201
+ "total_flos": 1.363907912564736e+18,
202
+ "train_loss": 0.29898834055120294,
203
+ "train_runtime": 4048.1416,
204
+ "train_samples_per_second": 34.064,
205
+ "train_steps_per_second": 2.129
206
  }
207
  ],
208
+ "logging_steps": 100,
209
+ "max_steps": 8620,
210
  "num_input_tokens_seen": 0,
211
  "num_train_epochs": 4,
212
+ "save_steps": 100,
213
+ "total_flos": 1.363907912564736e+18,
214
  "train_batch_size": 16,
215
  "trial_name": null,
216
  "trial_params": null