mmervecerit commited on
Commit
b964fbd
·
verified ·
1 Parent(s): ecfdf3f

cheers again

Browse files
README.md CHANGED
@@ -2,6 +2,7 @@
2
  license: apache-2.0
3
  base_model: google/vit-base-patch16-224-in21k
4
  tags:
 
5
  - generated_from_trainer
6
  metrics:
7
  - accuracy
@@ -15,10 +16,10 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # vit-base-beans-tutorial
17
 
18
- This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.1777
21
- - Accuracy: 0.9850
22
 
23
  ## Model description
24
 
 
2
  license: apache-2.0
3
  base_model: google/vit-base-patch16-224-in21k
4
  tags:
5
+ - image-classification
6
  - generated_from_trainer
7
  metrics:
8
  - accuracy
 
16
 
17
  # vit-base-beans-tutorial
18
 
19
+ This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the beans dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.5143
22
+ - Accuracy: 0.9609
23
 
24
  ## Model description
25
 
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_accuracy": 0.9924812030075187,
4
- "eval_loss": 0.07803888618946075,
5
- "eval_runtime": 2.0235,
6
- "eval_samples_per_second": 65.728,
7
- "eval_steps_per_second": 8.401,
8
  "total_flos": 4.006371770595533e+17,
9
- "train_loss": 2.7056336402893067e-05,
10
- "train_runtime": 203.0893,
11
- "train_samples_per_second": 25.457,
12
- "train_steps_per_second": 3.201
13
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_accuracy": 0.9849624060150376,
4
+ "eval_loss": 0.16576917469501495,
5
+ "eval_runtime": 2.0958,
6
+ "eval_samples_per_second": 63.46,
7
+ "eval_steps_per_second": 8.111,
8
  "total_flos": 4.006371770595533e+17,
9
+ "train_loss": 3.2931016008100974e-06,
10
+ "train_runtime": 143.2513,
11
+ "train_samples_per_second": 36.09,
12
+ "train_steps_per_second": 2.269
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_accuracy": 0.9924812030075187,
4
- "eval_loss": 0.07803888618946075,
5
- "eval_runtime": 2.0235,
6
- "eval_samples_per_second": 65.728,
7
- "eval_steps_per_second": 8.401
8
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_accuracy": 0.9849624060150376,
4
+ "eval_loss": 0.16576917469501495,
5
+ "eval_runtime": 2.0958,
6
+ "eval_samples_per_second": 63.46,
7
+ "eval_steps_per_second": 8.111
8
  }
runs/Jan24_01-31-50_579caf070ae1/events.out.tfevents.1706060139.579caf070ae1.3225.12 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ca9c618977c4937a7160dc15ff5974824a77c21eceac11ba2b07382ad9e54de
3
+ size 734
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 5.0,
3
  "total_flos": 4.006371770595533e+17,
4
- "train_loss": 2.7056336402893067e-05,
5
- "train_runtime": 203.0893,
6
- "train_samples_per_second": 25.457,
7
- "train_steps_per_second": 3.201
8
  }
 
1
  {
2
  "epoch": 5.0,
3
  "total_flos": 4.006371770595533e+17,
4
+ "train_loss": 3.2931016008100974e-06,
5
+ "train_runtime": 143.2513,
6
+ "train_samples_per_second": 36.09,
7
+ "train_steps_per_second": 2.269
8
  }
trainer_state.json CHANGED
@@ -1,469 +1,244 @@
1
  {
2
- "best_metric": 0.07803888618946075,
3
  "best_model_checkpoint": "./vit-base-beans-tutorial/checkpoint-100",
4
  "epoch": 5.0,
5
  "eval_steps": 100,
6
- "global_step": 650,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
- {
12
- "epoch": 0.08,
13
- "learning_rate": 0.00019692307692307696,
14
- "loss": 0.0004,
15
- "step": 10
16
- },
17
  {
18
  "epoch": 0.15,
19
  "learning_rate": 0.00019384615384615385,
20
- "loss": 0.0002,
21
- "step": 20
22
- },
23
- {
24
- "epoch": 0.23,
25
- "learning_rate": 0.0001907692307692308,
26
- "loss": 0.0002,
27
- "step": 30
28
  },
29
  {
30
  "epoch": 0.31,
31
  "learning_rate": 0.0001876923076923077,
32
- "loss": 0.0001,
33
- "step": 40
34
- },
35
- {
36
- "epoch": 0.38,
37
- "learning_rate": 0.00018461538461538463,
38
- "loss": 0.0001,
39
- "step": 50
40
  },
41
  {
42
  "epoch": 0.46,
43
  "learning_rate": 0.00018153846153846155,
44
- "loss": 0.0001,
45
- "step": 60
46
- },
47
- {
48
- "epoch": 0.54,
49
- "learning_rate": 0.00017846153846153847,
50
- "loss": 0.0001,
51
- "step": 70
52
  },
53
  {
54
  "epoch": 0.62,
55
  "learning_rate": 0.0001753846153846154,
56
  "loss": 0.0,
57
- "step": 80
58
- },
59
- {
60
- "epoch": 0.69,
61
- "learning_rate": 0.00017230769230769234,
62
- "loss": 0.0,
63
- "step": 90
64
  },
65
  {
66
  "epoch": 0.77,
67
  "learning_rate": 0.00016923076923076923,
68
  "loss": 0.0,
69
- "step": 100
70
- },
71
- {
72
- "epoch": 0.77,
73
- "eval_accuracy": 0.9924812030075187,
74
- "eval_loss": 0.07803888618946075,
75
- "eval_runtime": 2.3471,
76
- "eval_samples_per_second": 56.666,
77
- "eval_steps_per_second": 7.243,
78
- "step": 100
79
- },
80
- {
81
- "epoch": 0.85,
82
- "learning_rate": 0.00016615384615384617,
83
- "loss": 0.0,
84
- "step": 110
85
  },
86
  {
87
  "epoch": 0.92,
88
  "learning_rate": 0.0001630769230769231,
89
  "loss": 0.0,
90
- "step": 120
91
- },
92
- {
93
- "epoch": 1.0,
94
- "learning_rate": 0.00016,
95
- "loss": 0.0,
96
- "step": 130
97
  },
98
  {
99
  "epoch": 1.08,
100
  "learning_rate": 0.00015692307692307693,
101
  "loss": 0.0,
102
- "step": 140
103
- },
104
- {
105
- "epoch": 1.15,
106
- "learning_rate": 0.00015384615384615385,
107
- "loss": 0.0,
108
- "step": 150
109
  },
110
  {
111
  "epoch": 1.23,
112
  "learning_rate": 0.00015076923076923077,
113
  "loss": 0.0,
114
- "step": 160
115
- },
116
- {
117
- "epoch": 1.31,
118
- "learning_rate": 0.00014769230769230772,
119
- "loss": 0.0,
120
- "step": 170
121
  },
122
  {
123
  "epoch": 1.38,
124
  "learning_rate": 0.0001446153846153846,
125
  "loss": 0.0,
126
- "step": 180
127
- },
128
- {
129
- "epoch": 1.46,
130
- "learning_rate": 0.00014153846153846156,
131
- "loss": 0.0,
132
- "step": 190
133
  },
134
  {
135
  "epoch": 1.54,
136
  "learning_rate": 0.00013846153846153847,
137
  "loss": 0.0,
138
- "step": 200
139
  },
140
  {
141
  "epoch": 1.54,
142
- "eval_accuracy": 0.9774436090225563,
143
- "eval_loss": 0.1820073425769806,
144
- "eval_runtime": 1.9046,
145
- "eval_samples_per_second": 69.83,
146
- "eval_steps_per_second": 8.926,
147
- "step": 200
148
- },
149
- {
150
- "epoch": 1.62,
151
- "learning_rate": 0.0001353846153846154,
152
- "loss": 0.0,
153
- "step": 210
154
  },
155
  {
156
  "epoch": 1.69,
157
  "learning_rate": 0.0001323076923076923,
158
  "loss": 0.0,
159
- "step": 220
160
- },
161
- {
162
- "epoch": 1.77,
163
- "learning_rate": 0.00012923076923076923,
164
- "loss": 0.0,
165
- "step": 230
166
  },
167
  {
168
  "epoch": 1.85,
169
  "learning_rate": 0.00012615384615384615,
170
  "loss": 0.0,
171
- "step": 240
172
- },
173
- {
174
- "epoch": 1.92,
175
- "learning_rate": 0.0001230769230769231,
176
- "loss": 0.0,
177
- "step": 250
178
  },
179
  {
180
  "epoch": 2.0,
181
  "learning_rate": 0.00012,
182
  "loss": 0.0,
183
- "step": 260
184
- },
185
- {
186
- "epoch": 2.08,
187
- "learning_rate": 0.00011692307692307694,
188
- "loss": 0.0,
189
- "step": 270
190
  },
191
  {
192
  "epoch": 2.15,
193
  "learning_rate": 0.00011384615384615384,
194
  "loss": 0.0,
195
- "step": 280
196
- },
197
- {
198
- "epoch": 2.23,
199
- "learning_rate": 0.00011076923076923077,
200
- "loss": 0.0,
201
- "step": 290
202
  },
203
  {
204
  "epoch": 2.31,
205
  "learning_rate": 0.0001076923076923077,
206
  "loss": 0.0,
207
- "step": 300
208
- },
209
- {
210
- "epoch": 2.31,
211
- "eval_accuracy": 0.9774436090225563,
212
- "eval_loss": 0.1845720261335373,
213
- "eval_runtime": 1.7918,
214
- "eval_samples_per_second": 74.226,
215
- "eval_steps_per_second": 9.488,
216
- "step": 300
217
- },
218
- {
219
- "epoch": 2.38,
220
- "learning_rate": 0.00010461538461538463,
221
- "loss": 0.0,
222
- "step": 310
223
  },
224
  {
225
  "epoch": 2.46,
226
  "learning_rate": 0.00010153846153846153,
227
  "loss": 0.0,
228
- "step": 320
229
- },
230
- {
231
- "epoch": 2.54,
232
- "learning_rate": 9.846153846153848e-05,
233
- "loss": 0.0,
234
- "step": 330
235
  },
236
  {
237
  "epoch": 2.62,
238
  "learning_rate": 9.53846153846154e-05,
239
  "loss": 0.0,
240
- "step": 340
241
- },
242
- {
243
- "epoch": 2.69,
244
- "learning_rate": 9.230769230769232e-05,
245
- "loss": 0.0,
246
- "step": 350
247
  },
248
  {
249
  "epoch": 2.77,
250
  "learning_rate": 8.923076923076924e-05,
251
  "loss": 0.0,
252
- "step": 360
253
- },
254
- {
255
- "epoch": 2.85,
256
- "learning_rate": 8.615384615384617e-05,
257
- "loss": 0.0,
258
- "step": 370
259
  },
260
  {
261
  "epoch": 2.92,
262
  "learning_rate": 8.307692307692309e-05,
263
  "loss": 0.0,
264
- "step": 380
265
- },
266
- {
267
- "epoch": 3.0,
268
- "learning_rate": 8e-05,
269
- "loss": 0.0,
270
- "step": 390
271
  },
272
  {
273
  "epoch": 3.08,
274
  "learning_rate": 7.692307692307693e-05,
275
  "loss": 0.0,
276
- "step": 400
277
  },
278
  {
279
  "epoch": 3.08,
280
- "eval_accuracy": 0.9774436090225563,
281
- "eval_loss": 0.1543329805135727,
282
- "eval_runtime": 1.7915,
283
- "eval_samples_per_second": 74.24,
284
- "eval_steps_per_second": 9.489,
285
- "step": 400
286
- },
287
- {
288
- "epoch": 3.15,
289
- "learning_rate": 7.384615384615386e-05,
290
- "loss": 0.0,
291
- "step": 410
292
  },
293
  {
294
  "epoch": 3.23,
295
  "learning_rate": 7.076923076923078e-05,
296
  "loss": 0.0,
297
- "step": 420
298
- },
299
- {
300
- "epoch": 3.31,
301
- "learning_rate": 6.76923076923077e-05,
302
- "loss": 0.0,
303
- "step": 430
304
  },
305
  {
306
  "epoch": 3.38,
307
  "learning_rate": 6.461538461538462e-05,
308
  "loss": 0.0,
309
- "step": 440
310
- },
311
- {
312
- "epoch": 3.46,
313
- "learning_rate": 6.153846153846155e-05,
314
- "loss": 0.0,
315
- "step": 450
316
  },
317
  {
318
  "epoch": 3.54,
319
  "learning_rate": 5.846153846153847e-05,
320
  "loss": 0.0,
321
- "step": 460
322
- },
323
- {
324
- "epoch": 3.62,
325
- "learning_rate": 5.538461538461539e-05,
326
- "loss": 0.0,
327
- "step": 470
328
  },
329
  {
330
  "epoch": 3.69,
331
  "learning_rate": 5.230769230769231e-05,
332
  "loss": 0.0,
333
- "step": 480
334
- },
335
- {
336
- "epoch": 3.77,
337
- "learning_rate": 4.923076923076924e-05,
338
- "loss": 0.0,
339
- "step": 490
340
  },
341
  {
342
  "epoch": 3.85,
343
  "learning_rate": 4.615384615384616e-05,
344
  "loss": 0.0,
345
- "step": 500
346
- },
347
- {
348
- "epoch": 3.85,
349
- "eval_accuracy": 0.9849624060150376,
350
- "eval_loss": 0.13960087299346924,
351
- "eval_runtime": 1.797,
352
- "eval_samples_per_second": 74.011,
353
- "eval_steps_per_second": 9.46,
354
- "step": 500
355
- },
356
- {
357
- "epoch": 3.92,
358
- "learning_rate": 4.3076923076923084e-05,
359
- "loss": 0.0,
360
- "step": 510
361
  },
362
  {
363
  "epoch": 4.0,
364
  "learning_rate": 4e-05,
365
  "loss": 0.0,
366
- "step": 520
367
- },
368
- {
369
- "epoch": 4.08,
370
- "learning_rate": 3.692307692307693e-05,
371
- "loss": 0.0,
372
- "step": 530
373
  },
374
  {
375
  "epoch": 4.15,
376
  "learning_rate": 3.384615384615385e-05,
377
  "loss": 0.0,
378
- "step": 540
379
- },
380
- {
381
- "epoch": 4.23,
382
- "learning_rate": 3.0769230769230774e-05,
383
- "loss": 0.0,
384
- "step": 550
385
  },
386
  {
387
  "epoch": 4.31,
388
  "learning_rate": 2.7692307692307694e-05,
389
  "loss": 0.0,
390
- "step": 560
391
- },
392
- {
393
- "epoch": 4.38,
394
- "learning_rate": 2.461538461538462e-05,
395
- "loss": 0.0,
396
- "step": 570
397
  },
398
  {
399
  "epoch": 4.46,
400
  "learning_rate": 2.1538461538461542e-05,
401
  "loss": 0.0,
402
- "step": 580
403
- },
404
- {
405
- "epoch": 4.54,
406
- "learning_rate": 1.8461538461538465e-05,
407
- "loss": 0.0,
408
- "step": 590
409
  },
410
  {
411
  "epoch": 4.62,
412
  "learning_rate": 1.5384615384615387e-05,
413
  "loss": 0.0,
414
- "step": 600
415
  },
416
  {
417
  "epoch": 4.62,
418
  "eval_accuracy": 0.9849624060150376,
419
- "eval_loss": 0.14361034333705902,
420
- "eval_runtime": 1.8005,
421
- "eval_samples_per_second": 73.867,
422
- "eval_steps_per_second": 9.442,
423
- "step": 600
424
- },
425
- {
426
- "epoch": 4.69,
427
- "learning_rate": 1.230769230769231e-05,
428
- "loss": 0.0,
429
- "step": 610
430
  },
431
  {
432
  "epoch": 4.77,
433
  "learning_rate": 9.230769230769232e-06,
434
  "loss": 0.0,
435
- "step": 620
436
- },
437
- {
438
- "epoch": 4.85,
439
- "learning_rate": 6.153846153846155e-06,
440
- "loss": 0.0,
441
- "step": 630
442
  },
443
  {
444
  "epoch": 4.92,
445
  "learning_rate": 3.0769230769230774e-06,
446
  "loss": 0.0,
447
- "step": 640
448
- },
449
- {
450
- "epoch": 5.0,
451
- "learning_rate": 0.0,
452
- "loss": 0.0,
453
- "step": 650
454
  },
455
  {
456
  "epoch": 5.0,
457
- "step": 650,
458
  "total_flos": 4.006371770595533e+17,
459
- "train_loss": 2.7056336402893067e-05,
460
- "train_runtime": 203.0893,
461
- "train_samples_per_second": 25.457,
462
- "train_steps_per_second": 3.201
463
  }
464
  ],
465
  "logging_steps": 10,
466
- "max_steps": 650,
467
  "num_train_epochs": 5,
468
  "save_steps": 100,
469
  "total_flos": 4.006371770595533e+17,
 
1
  {
2
+ "best_metric": 0.16576917469501495,
3
  "best_model_checkpoint": "./vit-base-beans-tutorial/checkpoint-100",
4
  "epoch": 5.0,
5
  "eval_steps": 100,
6
+ "global_step": 325,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
 
 
 
 
 
 
11
  {
12
  "epoch": 0.15,
13
  "learning_rate": 0.00019384615384615385,
14
+ "loss": 0.0,
15
+ "step": 10
 
 
 
 
 
 
16
  },
17
  {
18
  "epoch": 0.31,
19
  "learning_rate": 0.0001876923076923077,
20
+ "loss": 0.0,
21
+ "step": 20
 
 
 
 
 
 
22
  },
23
  {
24
  "epoch": 0.46,
25
  "learning_rate": 0.00018153846153846155,
26
+ "loss": 0.0,
27
+ "step": 30
 
 
 
 
 
 
28
  },
29
  {
30
  "epoch": 0.62,
31
  "learning_rate": 0.0001753846153846154,
32
  "loss": 0.0,
33
+ "step": 40
 
 
 
 
 
 
34
  },
35
  {
36
  "epoch": 0.77,
37
  "learning_rate": 0.00016923076923076923,
38
  "loss": 0.0,
39
+ "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  },
41
  {
42
  "epoch": 0.92,
43
  "learning_rate": 0.0001630769230769231,
44
  "loss": 0.0,
45
+ "step": 60
 
 
 
 
 
 
46
  },
47
  {
48
  "epoch": 1.08,
49
  "learning_rate": 0.00015692307692307693,
50
  "loss": 0.0,
51
+ "step": 70
 
 
 
 
 
 
52
  },
53
  {
54
  "epoch": 1.23,
55
  "learning_rate": 0.00015076923076923077,
56
  "loss": 0.0,
57
+ "step": 80
 
 
 
 
 
 
58
  },
59
  {
60
  "epoch": 1.38,
61
  "learning_rate": 0.0001446153846153846,
62
  "loss": 0.0,
63
+ "step": 90
 
 
 
 
 
 
64
  },
65
  {
66
  "epoch": 1.54,
67
  "learning_rate": 0.00013846153846153847,
68
  "loss": 0.0,
69
+ "step": 100
70
  },
71
  {
72
  "epoch": 1.54,
73
+ "eval_accuracy": 0.9849624060150376,
74
+ "eval_loss": 0.16576917469501495,
75
+ "eval_runtime": 2.163,
76
+ "eval_samples_per_second": 61.49,
77
+ "eval_steps_per_second": 7.86,
78
+ "step": 100
 
 
 
 
 
 
79
  },
80
  {
81
  "epoch": 1.69,
82
  "learning_rate": 0.0001323076923076923,
83
  "loss": 0.0,
84
+ "step": 110
 
 
 
 
 
 
85
  },
86
  {
87
  "epoch": 1.85,
88
  "learning_rate": 0.00012615384615384615,
89
  "loss": 0.0,
90
+ "step": 120
 
 
 
 
 
 
91
  },
92
  {
93
  "epoch": 2.0,
94
  "learning_rate": 0.00012,
95
  "loss": 0.0,
96
+ "step": 130
 
 
 
 
 
 
97
  },
98
  {
99
  "epoch": 2.15,
100
  "learning_rate": 0.00011384615384615384,
101
  "loss": 0.0,
102
+ "step": 140
 
 
 
 
 
 
103
  },
104
  {
105
  "epoch": 2.31,
106
  "learning_rate": 0.0001076923076923077,
107
  "loss": 0.0,
108
+ "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  },
110
  {
111
  "epoch": 2.46,
112
  "learning_rate": 0.00010153846153846153,
113
  "loss": 0.0,
114
+ "step": 160
 
 
 
 
 
 
115
  },
116
  {
117
  "epoch": 2.62,
118
  "learning_rate": 9.53846153846154e-05,
119
  "loss": 0.0,
120
+ "step": 170
 
 
 
 
 
 
121
  },
122
  {
123
  "epoch": 2.77,
124
  "learning_rate": 8.923076923076924e-05,
125
  "loss": 0.0,
126
+ "step": 180
 
 
 
 
 
 
127
  },
128
  {
129
  "epoch": 2.92,
130
  "learning_rate": 8.307692307692309e-05,
131
  "loss": 0.0,
132
+ "step": 190
 
 
 
 
 
 
133
  },
134
  {
135
  "epoch": 3.08,
136
  "learning_rate": 7.692307692307693e-05,
137
  "loss": 0.0,
138
+ "step": 200
139
  },
140
  {
141
  "epoch": 3.08,
142
+ "eval_accuracy": 0.9849624060150376,
143
+ "eval_loss": 0.17416749894618988,
144
+ "eval_runtime": 1.7693,
145
+ "eval_samples_per_second": 75.171,
146
+ "eval_steps_per_second": 9.608,
147
+ "step": 200
 
 
 
 
 
 
148
  },
149
  {
150
  "epoch": 3.23,
151
  "learning_rate": 7.076923076923078e-05,
152
  "loss": 0.0,
153
+ "step": 210
 
 
 
 
 
 
154
  },
155
  {
156
  "epoch": 3.38,
157
  "learning_rate": 6.461538461538462e-05,
158
  "loss": 0.0,
159
+ "step": 220
 
 
 
 
 
 
160
  },
161
  {
162
  "epoch": 3.54,
163
  "learning_rate": 5.846153846153847e-05,
164
  "loss": 0.0,
165
+ "step": 230
 
 
 
 
 
 
166
  },
167
  {
168
  "epoch": 3.69,
169
  "learning_rate": 5.230769230769231e-05,
170
  "loss": 0.0,
171
+ "step": 240
 
 
 
 
 
 
172
  },
173
  {
174
  "epoch": 3.85,
175
  "learning_rate": 4.615384615384616e-05,
176
  "loss": 0.0,
177
+ "step": 250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  },
179
  {
180
  "epoch": 4.0,
181
  "learning_rate": 4e-05,
182
  "loss": 0.0,
183
+ "step": 260
 
 
 
 
 
 
184
  },
185
  {
186
  "epoch": 4.15,
187
  "learning_rate": 3.384615384615385e-05,
188
  "loss": 0.0,
189
+ "step": 270
 
 
 
 
 
 
190
  },
191
  {
192
  "epoch": 4.31,
193
  "learning_rate": 2.7692307692307694e-05,
194
  "loss": 0.0,
195
+ "step": 280
 
 
 
 
 
 
196
  },
197
  {
198
  "epoch": 4.46,
199
  "learning_rate": 2.1538461538461542e-05,
200
  "loss": 0.0,
201
+ "step": 290
 
 
 
 
 
 
202
  },
203
  {
204
  "epoch": 4.62,
205
  "learning_rate": 1.5384615384615387e-05,
206
  "loss": 0.0,
207
+ "step": 300
208
  },
209
  {
210
  "epoch": 4.62,
211
  "eval_accuracy": 0.9849624060150376,
212
+ "eval_loss": 0.17769162356853485,
213
+ "eval_runtime": 1.7753,
214
+ "eval_samples_per_second": 74.918,
215
+ "eval_steps_per_second": 9.576,
216
+ "step": 300
 
 
 
 
 
 
217
  },
218
  {
219
  "epoch": 4.77,
220
  "learning_rate": 9.230769230769232e-06,
221
  "loss": 0.0,
222
+ "step": 310
 
 
 
 
 
 
223
  },
224
  {
225
  "epoch": 4.92,
226
  "learning_rate": 3.0769230769230774e-06,
227
  "loss": 0.0,
228
+ "step": 320
 
 
 
 
 
 
229
  },
230
  {
231
  "epoch": 5.0,
232
+ "step": 325,
233
  "total_flos": 4.006371770595533e+17,
234
+ "train_loss": 3.2931016008100974e-06,
235
+ "train_runtime": 143.2513,
236
+ "train_samples_per_second": 36.09,
237
+ "train_steps_per_second": 2.269
238
  }
239
  ],
240
  "logging_steps": 10,
241
+ "max_steps": 325,
242
  "num_train_epochs": 5,
243
  "save_steps": 100,
244
  "total_flos": 4.006371770595533e+17,