rshrott commited on
Commit
f03688f
1 Parent(s): abbbd2f

🍻 cheers

Browse files
README.md CHANGED
@@ -2,6 +2,7 @@
2
  license: apache-2.0
3
  base_model: google/vit-base-patch16-224-in21k
4
  tags:
 
5
  - generated_from_trainer
6
  datasets:
7
  - renovation
@@ -14,7 +15,7 @@ model-index:
14
  name: Image Classification
15
  type: image-classification
16
  dataset:
17
- name: renovation
18
  type: renovation
19
  config: default
20
  split: validation
@@ -22,7 +23,7 @@ model-index:
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
- value: 0.6986301369863014
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -30,10 +31,10 @@ should probably proofread and complete it, then remove this comment. -->
30
 
31
  # vit-base-beans-demo-v5
32
 
33
- This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the renovation dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 1.0644
36
- - Accuracy: 0.6986
37
 
38
  ## Model description
39
 
 
2
  license: apache-2.0
3
  base_model: google/vit-base-patch16-224-in21k
4
  tags:
5
+ - image-classification
6
  - generated_from_trainer
7
  datasets:
8
  - renovation
 
15
  name: Image Classification
16
  type: image-classification
17
  dataset:
18
+ name: beans
19
  type: renovation
20
  config: default
21
  split: validation
 
23
  metrics:
24
  - name: Accuracy
25
  type: accuracy
26
+ value: 0.634703196347032
27
  ---
28
 
29
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
31
 
32
  # vit-base-beans-demo-v5
33
 
34
+ This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the beans dataset.
35
  It achieves the following results on the evaluation set:
36
+ - Loss: 0.9295
37
+ - Accuracy: 0.6347
38
 
39
  ## Model description
40
 
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 4.0,
3
- "eval_accuracy": 0.6575342465753424,
4
- "eval_loss": 0.7924718260765076,
5
- "eval_runtime": 8.3551,
6
- "eval_samples_per_second": 26.211,
7
- "eval_steps_per_second": 3.351,
8
  "total_flos": 6.10974224738132e+17,
9
- "train_loss": 0.575864625193419,
10
- "train_runtime": 402.9871,
11
- "train_samples_per_second": 19.564,
12
- "train_steps_per_second": 1.231
13
  }
 
1
  {
2
  "epoch": 4.0,
3
+ "eval_accuracy": 0.634703196347032,
4
+ "eval_loss": 0.929534375667572,
5
+ "eval_runtime": 8.1809,
6
+ "eval_samples_per_second": 26.77,
7
+ "eval_steps_per_second": 3.423,
8
  "total_flos": 6.10974224738132e+17,
9
+ "train_loss": 0.25425288126233125,
10
+ "train_runtime": 387.3536,
11
+ "train_samples_per_second": 20.353,
12
+ "train_steps_per_second": 1.28
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.0,
3
- "eval_accuracy": 0.6575342465753424,
4
- "eval_loss": 0.7924718260765076,
5
- "eval_runtime": 8.3551,
6
- "eval_samples_per_second": 26.211,
7
- "eval_steps_per_second": 3.351
8
  }
 
1
  {
2
  "epoch": 4.0,
3
+ "eval_accuracy": 0.634703196347032,
4
+ "eval_loss": 0.929534375667572,
5
+ "eval_runtime": 8.1809,
6
+ "eval_samples_per_second": 26.77,
7
+ "eval_steps_per_second": 3.423
8
  }
runs/Mar23_01-10-51_3ec072334407/events.out.tfevents.1711156669.3ec072334407.4424.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96afcb793b77d2098984a2dd0f40df476315a1abd58cc3e95b750d11c3ec1f2e
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.0,
3
  "total_flos": 6.10974224738132e+17,
4
- "train_loss": 0.575864625193419,
5
- "train_runtime": 402.9871,
6
- "train_samples_per_second": 19.564,
7
- "train_steps_per_second": 1.231
8
  }
 
1
  {
2
  "epoch": 4.0,
3
  "total_flos": 6.10974224738132e+17,
4
+ "train_loss": 0.25425288126233125,
5
+ "train_runtime": 387.3536,
6
+ "train_samples_per_second": 20.353,
7
+ "train_steps_per_second": 1.28
8
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.7924718260765076,
3
- "best_model_checkpoint": "./vit-base-beans-demo-v5/checkpoint-200",
4
  "epoch": 4.0,
5
  "eval_steps": 100,
6
  "global_step": 496,
@@ -10,391 +10,391 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.08,
13
- "grad_norm": 2.0731146335601807,
14
  "learning_rate": 0.00019596774193548388,
15
- "loss": 1.5546,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.16,
20
- "grad_norm": 1.9955084323883057,
21
  "learning_rate": 0.00019193548387096775,
22
- "loss": 1.3431,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.24,
27
- "grad_norm": 1.4386166334152222,
28
  "learning_rate": 0.00018790322580645164,
29
- "loss": 1.1702,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.32,
34
- "grad_norm": 2.1243135929107666,
35
  "learning_rate": 0.00018387096774193548,
36
- "loss": 1.1703,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 0.4,
41
- "grad_norm": 1.4760087728500366,
42
  "learning_rate": 0.00017983870967741935,
43
- "loss": 1.0635,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 0.48,
48
- "grad_norm": 1.3573176860809326,
49
  "learning_rate": 0.00017580645161290325,
50
- "loss": 1.0524,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 0.56,
55
- "grad_norm": 3.1192474365234375,
56
  "learning_rate": 0.00017177419354838711,
57
- "loss": 1.0872,
58
  "step": 70
59
  },
60
  {
61
  "epoch": 0.65,
62
- "grad_norm": 2.4728469848632812,
63
  "learning_rate": 0.00016774193548387098,
64
- "loss": 0.87,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 0.73,
69
- "grad_norm": 3.819819688796997,
70
  "learning_rate": 0.00016370967741935485,
71
- "loss": 1.2173,
72
  "step": 90
73
  },
74
  {
75
  "epoch": 0.81,
76
- "grad_norm": 1.585181713104248,
77
  "learning_rate": 0.00015967741935483872,
78
- "loss": 1.1711,
79
  "step": 100
80
  },
81
  {
82
  "epoch": 0.81,
83
- "eval_accuracy": 0.5981735159817352,
84
- "eval_loss": 1.0254943370819092,
85
- "eval_runtime": 7.7395,
86
- "eval_samples_per_second": 28.296,
87
- "eval_steps_per_second": 3.618,
88
  "step": 100
89
  },
90
  {
91
  "epoch": 0.89,
92
- "grad_norm": 1.5606015920639038,
93
  "learning_rate": 0.0001556451612903226,
94
- "loss": 0.9609,
95
  "step": 110
96
  },
97
  {
98
  "epoch": 0.97,
99
- "grad_norm": 2.0005290508270264,
100
  "learning_rate": 0.00015161290322580646,
101
- "loss": 0.8673,
102
  "step": 120
103
  },
104
  {
105
  "epoch": 1.05,
106
- "grad_norm": 1.7934025526046753,
107
  "learning_rate": 0.00014758064516129032,
108
- "loss": 0.7807,
109
  "step": 130
110
  },
111
  {
112
  "epoch": 1.13,
113
- "grad_norm": 2.0995261669158936,
114
  "learning_rate": 0.00014354838709677422,
115
- "loss": 0.7211,
116
  "step": 140
117
  },
118
  {
119
  "epoch": 1.21,
120
- "grad_norm": 1.2384763956069946,
121
- "learning_rate": 0.0001395161290322581,
122
- "loss": 0.8051,
123
  "step": 150
124
  },
125
  {
126
  "epoch": 1.29,
127
- "grad_norm": 3.001422643661499,
128
- "learning_rate": 0.00013548387096774193,
129
- "loss": 0.9052,
130
  "step": 160
131
  },
132
  {
133
  "epoch": 1.37,
134
- "grad_norm": 1.8868207931518555,
135
- "learning_rate": 0.0001314516129032258,
136
- "loss": 0.7217,
137
  "step": 170
138
  },
139
  {
140
  "epoch": 1.45,
141
- "grad_norm": 1.7666105031967163,
142
- "learning_rate": 0.0001274193548387097,
143
- "loss": 0.728,
144
  "step": 180
145
  },
146
  {
147
  "epoch": 1.53,
148
- "grad_norm": 2.378178358078003,
149
- "learning_rate": 0.00012338709677419356,
150
- "loss": 0.7242,
151
  "step": 190
152
  },
153
  {
154
  "epoch": 1.61,
155
- "grad_norm": 2.4882402420043945,
156
- "learning_rate": 0.00011935483870967743,
157
- "loss": 0.7083,
158
  "step": 200
159
  },
160
  {
161
  "epoch": 1.61,
162
  "eval_accuracy": 0.6575342465753424,
163
- "eval_loss": 0.7924718260765076,
164
- "eval_runtime": 7.9712,
165
- "eval_samples_per_second": 27.474,
166
- "eval_steps_per_second": 3.513,
167
  "step": 200
168
  },
169
  {
170
  "epoch": 1.69,
171
- "grad_norm": 3.762314796447754,
172
- "learning_rate": 0.00011532258064516131,
173
- "loss": 0.6794,
174
  "step": 210
175
  },
176
  {
177
  "epoch": 1.77,
178
- "grad_norm": 1.8527878522872925,
179
- "learning_rate": 0.00011129032258064515,
180
- "loss": 0.5424,
181
  "step": 220
182
  },
183
  {
184
  "epoch": 1.85,
185
- "grad_norm": 2.1447601318359375,
186
- "learning_rate": 0.00010725806451612903,
187
- "loss": 0.7186,
188
  "step": 230
189
  },
190
  {
191
  "epoch": 1.94,
192
- "grad_norm": 2.2273943424224854,
193
- "learning_rate": 0.0001032258064516129,
194
- "loss": 0.7786,
195
  "step": 240
196
  },
197
  {
198
  "epoch": 2.02,
199
- "grad_norm": 1.1381219625473022,
200
- "learning_rate": 9.919354838709678e-05,
201
- "loss": 0.5616,
202
  "step": 250
203
  },
204
  {
205
  "epoch": 2.1,
206
- "grad_norm": 1.41716730594635,
207
- "learning_rate": 9.516129032258065e-05,
208
- "loss": 0.3615,
209
  "step": 260
210
  },
211
  {
212
  "epoch": 2.18,
213
- "grad_norm": 1.193400263786316,
214
- "learning_rate": 9.112903225806452e-05,
215
- "loss": 0.3589,
216
  "step": 270
217
  },
218
  {
219
  "epoch": 2.26,
220
- "grad_norm": 1.1476421356201172,
221
- "learning_rate": 8.709677419354839e-05,
222
- "loss": 0.302,
223
  "step": 280
224
  },
225
  {
226
  "epoch": 2.34,
227
- "grad_norm": 2.02689790725708,
228
- "learning_rate": 8.306451612903227e-05,
229
- "loss": 0.4161,
230
  "step": 290
231
  },
232
  {
233
  "epoch": 2.42,
234
- "grad_norm": 1.2040166854858398,
235
- "learning_rate": 7.903225806451613e-05,
236
- "loss": 0.2479,
237
  "step": 300
238
  },
239
  {
240
  "epoch": 2.42,
241
- "eval_accuracy": 0.6940639269406392,
242
- "eval_loss": 0.871184766292572,
243
- "eval_runtime": 7.1874,
244
- "eval_samples_per_second": 30.47,
245
- "eval_steps_per_second": 3.896,
246
  "step": 300
247
  },
248
  {
249
  "epoch": 2.5,
250
- "grad_norm": 4.465162754058838,
251
- "learning_rate": 7.500000000000001e-05,
252
- "loss": 0.2949,
253
  "step": 310
254
  },
255
  {
256
  "epoch": 2.58,
257
- "grad_norm": 2.3733272552490234,
258
- "learning_rate": 7.096774193548388e-05,
259
- "loss": 0.2599,
260
  "step": 320
261
  },
262
  {
263
  "epoch": 2.66,
264
- "grad_norm": 3.527358293533325,
265
- "learning_rate": 6.693548387096774e-05,
266
- "loss": 0.4337,
267
  "step": 330
268
  },
269
  {
270
  "epoch": 2.74,
271
- "grad_norm": 2.8637776374816895,
272
- "learning_rate": 6.290322580645161e-05,
273
- "loss": 0.3137,
274
  "step": 340
275
  },
276
  {
277
  "epoch": 2.82,
278
- "grad_norm": 4.198659896850586,
279
- "learning_rate": 5.887096774193549e-05,
280
- "loss": 0.3192,
281
  "step": 350
282
  },
283
  {
284
  "epoch": 2.9,
285
- "grad_norm": 4.358771324157715,
286
- "learning_rate": 5.4838709677419355e-05,
287
- "loss": 0.3796,
288
  "step": 360
289
  },
290
  {
291
  "epoch": 2.98,
292
- "grad_norm": 3.1412558555603027,
293
- "learning_rate": 5.080645161290323e-05,
294
- "loss": 0.2622,
295
  "step": 370
296
  },
297
  {
298
  "epoch": 3.06,
299
- "grad_norm": 1.4319863319396973,
300
- "learning_rate": 4.67741935483871e-05,
301
- "loss": 0.2016,
302
  "step": 380
303
  },
304
  {
305
  "epoch": 3.15,
306
- "grad_norm": 0.5633509755134583,
307
- "learning_rate": 4.2741935483870973e-05,
308
- "loss": 0.1276,
309
  "step": 390
310
  },
311
  {
312
  "epoch": 3.23,
313
- "grad_norm": 0.31856054067611694,
314
- "learning_rate": 3.870967741935484e-05,
315
- "loss": 0.127,
316
  "step": 400
317
  },
318
  {
319
  "epoch": 3.23,
320
- "eval_accuracy": 0.6940639269406392,
321
- "eval_loss": 0.8440136313438416,
322
- "eval_runtime": 7.6031,
323
- "eval_samples_per_second": 28.804,
324
- "eval_steps_per_second": 3.683,
325
  "step": 400
326
  },
327
  {
328
  "epoch": 3.31,
329
- "grad_norm": 0.2085038274526596,
330
- "learning_rate": 3.467741935483872e-05,
331
- "loss": 0.1289,
332
  "step": 410
333
  },
334
  {
335
  "epoch": 3.39,
336
- "grad_norm": 2.471668004989624,
337
- "learning_rate": 3.0645161290322585e-05,
338
- "loss": 0.1495,
339
  "step": 420
340
  },
341
  {
342
  "epoch": 3.47,
343
- "grad_norm": 1.026688575744629,
344
- "learning_rate": 2.661290322580645e-05,
345
- "loss": 0.1389,
346
  "step": 430
347
  },
348
  {
349
  "epoch": 3.55,
350
- "grad_norm": 4.408120155334473,
351
- "learning_rate": 2.258064516129032e-05,
352
- "loss": 0.1799,
353
  "step": 440
354
  },
355
  {
356
  "epoch": 3.63,
357
- "grad_norm": 1.7386049032211304,
358
- "learning_rate": 1.8548387096774193e-05,
359
- "loss": 0.0727,
360
  "step": 450
361
  },
362
  {
363
  "epoch": 3.71,
364
- "grad_norm": 0.9766908884048462,
365
- "learning_rate": 1.4516129032258066e-05,
366
- "loss": 0.1062,
367
  "step": 460
368
  },
369
  {
370
  "epoch": 3.79,
371
- "grad_norm": 0.2413896918296814,
372
- "learning_rate": 1.0483870967741936e-05,
373
- "loss": 0.1303,
374
  "step": 470
375
  },
376
  {
377
  "epoch": 3.87,
378
- "grad_norm": 1.249718427658081,
379
- "learning_rate": 6.451612903225806e-06,
380
- "loss": 0.1025,
381
  "step": 480
382
  },
383
  {
384
  "epoch": 3.95,
385
- "grad_norm": 3.0604677200317383,
386
- "learning_rate": 2.4193548387096776e-06,
387
- "loss": 0.1515,
388
  "step": 490
389
  },
390
  {
391
  "epoch": 4.0,
392
  "step": 496,
393
  "total_flos": 6.10974224738132e+17,
394
- "train_loss": 0.575864625193419,
395
- "train_runtime": 402.9871,
396
- "train_samples_per_second": 19.564,
397
- "train_steps_per_second": 1.231
398
  }
399
  ],
400
  "logging_steps": 10,
 
1
  {
2
+ "best_metric": 0.929534375667572,
3
+ "best_model_checkpoint": "./vit-base-beans-demo-v5/checkpoint-100",
4
  "epoch": 4.0,
5
  "eval_steps": 100,
6
  "global_step": 496,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.08,
13
+ "grad_norm": 1.9071108102798462,
14
  "learning_rate": 0.00019596774193548388,
15
+ "loss": 0.7806,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.16,
20
+ "grad_norm": 2.2380499839782715,
21
  "learning_rate": 0.00019193548387096775,
22
+ "loss": 0.7214,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.24,
27
+ "grad_norm": 1.4890930652618408,
28
  "learning_rate": 0.00018790322580645164,
29
+ "loss": 0.6215,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.32,
34
+ "grad_norm": 3.2323720455169678,
35
  "learning_rate": 0.00018387096774193548,
36
+ "loss": 0.6378,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 0.4,
41
+ "grad_norm": 2.838930606842041,
42
  "learning_rate": 0.00017983870967741935,
43
+ "loss": 0.7502,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 0.48,
48
+ "grad_norm": 3.2034356594085693,
49
  "learning_rate": 0.00017580645161290325,
50
+ "loss": 0.5904,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 0.56,
55
+ "grad_norm": 3.1891825199127197,
56
  "learning_rate": 0.00017177419354838711,
57
+ "loss": 0.5718,
58
  "step": 70
59
  },
60
  {
61
  "epoch": 0.65,
62
+ "grad_norm": 2.0921356678009033,
63
  "learning_rate": 0.00016774193548387098,
64
+ "loss": 0.3783,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 0.73,
69
+ "grad_norm": 2.864804983139038,
70
  "learning_rate": 0.00016370967741935485,
71
+ "loss": 0.6002,
72
  "step": 90
73
  },
74
  {
75
  "epoch": 0.81,
76
+ "grad_norm": 3.1752126216888428,
77
  "learning_rate": 0.00015967741935483872,
78
+ "loss": 0.6438,
79
  "step": 100
80
  },
81
  {
82
  "epoch": 0.81,
83
+ "eval_accuracy": 0.634703196347032,
84
+ "eval_loss": 0.929534375667572,
85
+ "eval_runtime": 7.2962,
86
+ "eval_samples_per_second": 30.016,
87
+ "eval_steps_per_second": 3.838,
88
  "step": 100
89
  },
90
  {
91
  "epoch": 0.89,
92
+ "grad_norm": 2.728193521499634,
93
  "learning_rate": 0.0001556451612903226,
94
+ "loss": 0.5441,
95
  "step": 110
96
  },
97
  {
98
  "epoch": 0.97,
99
+ "grad_norm": 2.140393018722534,
100
  "learning_rate": 0.00015161290322580646,
101
+ "loss": 0.4403,
102
  "step": 120
103
  },
104
  {
105
  "epoch": 1.05,
106
+ "grad_norm": 0.6765386462211609,
107
  "learning_rate": 0.00014758064516129032,
108
+ "loss": 0.3251,
109
  "step": 130
110
  },
111
  {
112
  "epoch": 1.13,
113
+ "grad_norm": 0.9497590661048889,
114
  "learning_rate": 0.00014354838709677422,
115
+ "loss": 0.2046,
116
  "step": 140
117
  },
118
  {
119
  "epoch": 1.21,
120
+ "grad_norm": 4.010074615478516,
121
+ "learning_rate": 0.00013991935483870967,
122
+ "loss": 0.3276,
123
  "step": 150
124
  },
125
  {
126
  "epoch": 1.29,
127
+ "grad_norm": 3.7631189823150635,
128
+ "learning_rate": 0.00013588709677419357,
129
+ "loss": 0.2937,
130
  "step": 160
131
  },
132
  {
133
  "epoch": 1.37,
134
+ "grad_norm": 0.5803029537200928,
135
+ "learning_rate": 0.00013185483870967743,
136
+ "loss": 0.1906,
137
  "step": 170
138
  },
139
  {
140
  "epoch": 1.45,
141
+ "grad_norm": 5.088043212890625,
142
+ "learning_rate": 0.0001278225806451613,
143
+ "loss": 0.2207,
144
  "step": 180
145
  },
146
  {
147
  "epoch": 1.53,
148
+ "grad_norm": 2.3816022872924805,
149
+ "learning_rate": 0.00012379032258064514,
150
+ "loss": 0.1919,
151
  "step": 190
152
  },
153
  {
154
  "epoch": 1.61,
155
+ "grad_norm": 5.558553218841553,
156
+ "learning_rate": 0.00011975806451612903,
157
+ "loss": 0.3105,
158
  "step": 200
159
  },
160
  {
161
  "epoch": 1.61,
162
  "eval_accuracy": 0.6575342465753424,
163
+ "eval_loss": 0.9350173473358154,
164
+ "eval_runtime": 7.7793,
165
+ "eval_samples_per_second": 28.152,
166
+ "eval_steps_per_second": 3.599,
167
  "step": 200
168
  },
169
  {
170
  "epoch": 1.69,
171
+ "grad_norm": 3.439823865890503,
172
+ "learning_rate": 0.00011572580645161291,
173
+ "loss": 0.3714,
174
  "step": 210
175
  },
176
  {
177
  "epoch": 1.77,
178
+ "grad_norm": 2.6023850440979004,
179
+ "learning_rate": 0.00011169354838709678,
180
+ "loss": 0.2869,
181
  "step": 220
182
  },
183
  {
184
  "epoch": 1.85,
185
+ "grad_norm": 3.2238519191741943,
186
+ "learning_rate": 0.00010766129032258066,
187
+ "loss": 0.4462,
188
  "step": 230
189
  },
190
  {
191
  "epoch": 1.94,
192
+ "grad_norm": 1.0531260967254639,
193
+ "learning_rate": 0.00010362903225806453,
194
+ "loss": 0.3634,
195
  "step": 240
196
  },
197
  {
198
  "epoch": 2.02,
199
+ "grad_norm": 0.5729889869689941,
200
+ "learning_rate": 9.95967741935484e-05,
201
+ "loss": 0.2624,
202
  "step": 250
203
  },
204
  {
205
  "epoch": 2.1,
206
+ "grad_norm": 0.1924820989370346,
207
+ "learning_rate": 9.556451612903226e-05,
208
+ "loss": 0.0999,
209
  "step": 260
210
  },
211
  {
212
  "epoch": 2.18,
213
+ "grad_norm": 0.39775505661964417,
214
+ "learning_rate": 9.153225806451613e-05,
215
+ "loss": 0.0938,
216
  "step": 270
217
  },
218
  {
219
  "epoch": 2.26,
220
+ "grad_norm": 0.22179947793483734,
221
+ "learning_rate": 8.75e-05,
222
+ "loss": 0.1017,
223
  "step": 280
224
  },
225
  {
226
  "epoch": 2.34,
227
+ "grad_norm": 1.6249357461929321,
228
+ "learning_rate": 8.346774193548388e-05,
229
+ "loss": 0.1745,
230
  "step": 290
231
  },
232
  {
233
  "epoch": 2.42,
234
+ "grad_norm": 0.34801536798477173,
235
+ "learning_rate": 7.943548387096774e-05,
236
+ "loss": 0.0634,
237
  "step": 300
238
  },
239
  {
240
  "epoch": 2.42,
241
+ "eval_accuracy": 0.6894977168949772,
242
+ "eval_loss": 1.0781886577606201,
243
+ "eval_runtime": 7.6715,
244
+ "eval_samples_per_second": 28.547,
245
+ "eval_steps_per_second": 3.65,
246
  "step": 300
247
  },
248
  {
249
  "epoch": 2.5,
250
+ "grad_norm": 2.6541597843170166,
251
+ "learning_rate": 7.540322580645162e-05,
252
+ "loss": 0.0772,
253
  "step": 310
254
  },
255
  {
256
  "epoch": 2.58,
257
+ "grad_norm": 0.1635380983352661,
258
+ "learning_rate": 7.137096774193549e-05,
259
+ "loss": 0.1042,
260
  "step": 320
261
  },
262
  {
263
  "epoch": 2.66,
264
+ "grad_norm": 1.126976490020752,
265
+ "learning_rate": 6.733870967741935e-05,
266
+ "loss": 0.1643,
267
  "step": 330
268
  },
269
  {
270
  "epoch": 2.74,
271
+ "grad_norm": 0.2140628844499588,
272
+ "learning_rate": 6.330645161290322e-05,
273
+ "loss": 0.0479,
274
  "step": 340
275
  },
276
  {
277
  "epoch": 2.82,
278
+ "grad_norm": 0.14856065809726715,
279
+ "learning_rate": 5.9274193548387104e-05,
280
+ "loss": 0.0606,
281
  "step": 350
282
  },
283
  {
284
  "epoch": 2.9,
285
+ "grad_norm": 1.9021470546722412,
286
+ "learning_rate": 5.5241935483870966e-05,
287
+ "loss": 0.0576,
288
  "step": 360
289
  },
290
  {
291
  "epoch": 2.98,
292
+ "grad_norm": 0.488421767950058,
293
+ "learning_rate": 5.120967741935484e-05,
294
+ "loss": 0.1573,
295
  "step": 370
296
  },
297
  {
298
  "epoch": 3.06,
299
+ "grad_norm": 0.27475953102111816,
300
+ "learning_rate": 4.7177419354838716e-05,
301
+ "loss": 0.0264,
302
  "step": 380
303
  },
304
  {
305
  "epoch": 3.15,
306
+ "grad_norm": 0.08814023435115814,
307
+ "learning_rate": 4.3145161290322584e-05,
308
+ "loss": 0.0197,
309
  "step": 390
310
  },
311
  {
312
  "epoch": 3.23,
313
+ "grad_norm": 0.10707065463066101,
314
+ "learning_rate": 3.911290322580645e-05,
315
+ "loss": 0.0257,
316
  "step": 400
317
  },
318
  {
319
  "epoch": 3.23,
320
+ "eval_accuracy": 0.6986301369863014,
321
+ "eval_loss": 1.06435227394104,
322
+ "eval_runtime": 7.0971,
323
+ "eval_samples_per_second": 30.858,
324
+ "eval_steps_per_second": 3.945,
325
  "step": 400
326
  },
327
  {
328
  "epoch": 3.31,
329
+ "grad_norm": 0.06996390968561172,
330
+ "learning_rate": 3.508064516129033e-05,
331
+ "loss": 0.0192,
332
  "step": 410
333
  },
334
  {
335
  "epoch": 3.39,
336
+ "grad_norm": 1.358115315437317,
337
+ "learning_rate": 3.1048387096774195e-05,
338
+ "loss": 0.0431,
339
  "step": 420
340
  },
341
  {
342
  "epoch": 3.47,
343
+ "grad_norm": 0.4962191581726074,
344
+ "learning_rate": 2.7016129032258064e-05,
345
+ "loss": 0.0573,
346
  "step": 430
347
  },
348
  {
349
  "epoch": 3.55,
350
+ "grad_norm": 0.08283121138811111,
351
+ "learning_rate": 2.2983870967741935e-05,
352
+ "loss": 0.0216,
353
  "step": 440
354
  },
355
  {
356
  "epoch": 3.63,
357
+ "grad_norm": 0.06285007297992706,
358
+ "learning_rate": 1.8951612903225807e-05,
359
+ "loss": 0.0169,
360
  "step": 450
361
  },
362
  {
363
  "epoch": 3.71,
364
+ "grad_norm": 0.10198648273944855,
365
+ "learning_rate": 1.4919354838709679e-05,
366
+ "loss": 0.0188,
367
  "step": 460
368
  },
369
  {
370
  "epoch": 3.79,
371
+ "grad_norm": 1.5539321899414062,
372
+ "learning_rate": 1.0887096774193549e-05,
373
+ "loss": 0.0227,
374
  "step": 470
375
  },
376
  {
377
  "epoch": 3.87,
378
+ "grad_norm": 0.06271003931760788,
379
+ "learning_rate": 6.854838709677419e-06,
380
+ "loss": 0.0212,
381
  "step": 480
382
  },
383
  {
384
  "epoch": 3.95,
385
+ "grad_norm": 0.1244824230670929,
386
+ "learning_rate": 2.82258064516129e-06,
387
+ "loss": 0.0183,
388
  "step": 490
389
  },
390
  {
391
  "epoch": 4.0,
392
  "step": 496,
393
  "total_flos": 6.10974224738132e+17,
394
+ "train_loss": 0.25425288126233125,
395
+ "train_runtime": 387.3536,
396
+ "train_samples_per_second": 20.353,
397
+ "train_steps_per_second": 1.28
398
  }
399
  ],
400
  "logging_steps": 10,