bansilp commited on
Commit
13f3ca0
1 Parent(s): ad0b9c8

Model save

Browse files
README.md CHANGED
@@ -22,7 +22,7 @@ model-index:
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
- value: 0.9092592592592592
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -32,8 +32,8 @@ should probably proofread and complete it, then remove this comment. -->
32
 
33
  This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 0.5359
36
- - Accuracy: 0.9093
37
 
38
  ## Model description
39
 
@@ -52,7 +52,7 @@ More information needed
52
  ### Training hyperparameters
53
 
54
  The following hyperparameters were used during training:
55
- - learning_rate: 0.0002
56
  - train_batch_size: 48
57
  - eval_batch_size: 8
58
  - seed: 42
@@ -65,10 +65,10 @@ The following hyperparameters were used during training:
65
 
66
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
67
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
68
- | 0.0273 | 1.11 | 100 | 0.7637 | 0.8741 |
69
- | 0.0314 | 2.22 | 200 | 0.6027 | 0.8824 |
70
- | 0.0019 | 3.33 | 300 | 0.5983 | 0.9037 |
71
- | 0.0019 | 4.44 | 400 | 0.5359 | 0.9093 |
72
 
73
 
74
  ### Framework versions
 
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
+ value: 0.9009259259259259
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
32
 
33
  This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 0.5926
36
+ - Accuracy: 0.9009
37
 
38
  ## Model description
39
 
 
52
  ### Training hyperparameters
53
 
54
  The following hyperparameters were used during training:
55
+ - learning_rate: 0.0003
56
  - train_batch_size: 48
57
  - eval_batch_size: 8
58
  - seed: 42
 
65
 
66
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
67
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
68
+ | 0.128 | 1.11 | 100 | 0.7718 | 0.8685 |
69
+ | 0.0236 | 2.22 | 200 | 0.6526 | 0.8852 |
70
+ | 0.004 | 3.33 | 300 | 0.5478 | 0.9009 |
71
+ | 0.0024 | 4.44 | 400 | 0.5926 | 0.9009 |
72
 
73
 
74
  ### Framework versions
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 10.0,
3
- "eval_accuracy": 0.9101851851851852,
4
- "eval_loss": 0.5122644305229187,
5
- "eval_runtime": 14.2588,
6
- "eval_samples_per_second": 75.743,
7
- "eval_steps_per_second": 9.468,
8
- "total_flos": 3.347863979094835e+18,
9
- "train_loss": 0.017901433989819554,
10
- "train_runtime": 786.561,
11
- "train_samples_per_second": 54.923,
12
- "train_steps_per_second": 1.144
13
  }
 
1
  {
2
+ "epoch": 5.0,
3
+ "eval_accuracy": 0.9092592592592592,
4
+ "eval_loss": 0.5364564061164856,
5
+ "eval_runtime": 14.0587,
6
+ "eval_samples_per_second": 76.821,
7
+ "eval_steps_per_second": 9.603,
8
+ "total_flos": 1.6739319895474176e+18,
9
+ "train_loss": 0.02156418908904824,
10
+ "train_runtime": 434.3833,
11
+ "train_samples_per_second": 49.726,
12
+ "train_steps_per_second": 1.036
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 10.0,
3
- "eval_accuracy": 0.9101851851851852,
4
- "eval_loss": 0.5122644305229187,
5
- "eval_runtime": 14.2588,
6
- "eval_samples_per_second": 75.743,
7
- "eval_steps_per_second": 9.468
8
  }
 
1
  {
2
+ "epoch": 5.0,
3
+ "eval_accuracy": 0.9092592592592592,
4
+ "eval_loss": 0.5364564061164856,
5
+ "eval_runtime": 14.0587,
6
+ "eval_samples_per_second": 76.821,
7
+ "eval_steps_per_second": 9.603
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8bf9f4739b7ef3476528dcf336f2b268bba1671e46fa9a4877a1ca8d71b8c6d
3
  size 343245508
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:195c22084a8777f5cc7b698a55f748752ec59dfc255766ae2fbf09e121c968ee
3
  size 343245508
runs/Dec15_02-29-03_d2a76fcee09b/events.out.tfevents.1702607830.d2a76fcee09b.2614.8 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9070a007c35af5e503973c97c7485e886ab8530c8580b33e0f324622f4f4c95c
3
+ size 411
runs/Dec15_02-37-22_d2a76fcee09b/events.out.tfevents.1702607847.d2a76fcee09b.2614.9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0dbbcad38ec4549bf2e46a47945b4a4bee4f0ec8c542c362b70039596412f7ce
3
+ size 13189
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 10.0,
3
- "total_flos": 3.347863979094835e+18,
4
- "train_loss": 0.017901433989819554,
5
- "train_runtime": 786.561,
6
- "train_samples_per_second": 54.923,
7
- "train_steps_per_second": 1.144
8
  }
 
1
  {
2
+ "epoch": 5.0,
3
+ "total_flos": 1.6739319895474176e+18,
4
+ "train_loss": 0.02156418908904824,
5
+ "train_runtime": 434.3833,
6
+ "train_samples_per_second": 49.726,
7
+ "train_steps_per_second": 1.036
8
  }
trainer_state.json CHANGED
@@ -1,577 +1,334 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.0,
5
- "eval_steps": 500,
6
- "global_step": 900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.11,
13
- "learning_rate": 0.00019777777777777778,
14
- "loss": 0.0793,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.22,
19
- "learning_rate": 0.00019555555555555556,
20
- "loss": 0.104,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.33,
25
- "learning_rate": 0.00019333333333333333,
26
- "loss": 0.065,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 0.44,
31
- "learning_rate": 0.00019111111111111114,
32
- "loss": 0.0836,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 0.56,
37
- "learning_rate": 0.00018888888888888888,
38
- "loss": 0.1303,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 0.67,
43
- "learning_rate": 0.0001866666666666667,
44
- "loss": 0.0363,
45
  "step": 60
46
  },
47
  {
48
  "epoch": 0.78,
49
- "learning_rate": 0.00018444444444444446,
50
- "loss": 0.0567,
51
  "step": 70
52
  },
53
  {
54
  "epoch": 0.89,
55
- "learning_rate": 0.00018222222222222224,
56
- "loss": 0.059,
57
  "step": 80
58
  },
59
  {
60
  "epoch": 1.0,
61
- "learning_rate": 0.00018,
62
- "loss": 0.1205,
63
  "step": 90
64
  },
65
  {
66
  "epoch": 1.11,
67
- "learning_rate": 0.00017777777777777779,
68
- "loss": 0.0774,
 
 
 
 
 
 
 
 
 
69
  "step": 100
70
  },
71
  {
72
  "epoch": 1.22,
73
- "learning_rate": 0.00017555555555555556,
74
- "loss": 0.0807,
75
  "step": 110
76
  },
77
  {
78
  "epoch": 1.33,
79
- "learning_rate": 0.00017333333333333334,
80
- "loss": 0.0377,
81
  "step": 120
82
  },
83
  {
84
  "epoch": 1.44,
85
- "learning_rate": 0.0001711111111111111,
86
- "loss": 0.0621,
87
  "step": 130
88
  },
89
  {
90
  "epoch": 1.56,
91
- "learning_rate": 0.00016888888888888889,
92
- "loss": 0.0076,
93
  "step": 140
94
  },
95
  {
96
  "epoch": 1.67,
97
- "learning_rate": 0.0001666666666666667,
98
- "loss": 0.0098,
99
  "step": 150
100
  },
101
  {
102
  "epoch": 1.78,
103
- "learning_rate": 0.00016444444444444444,
104
- "loss": 0.0595,
105
  "step": 160
106
  },
107
  {
108
  "epoch": 1.89,
109
- "learning_rate": 0.00016222222222222224,
110
- "loss": 0.0179,
111
  "step": 170
112
  },
113
  {
114
  "epoch": 2.0,
115
- "learning_rate": 0.00016,
116
- "loss": 0.0269,
117
  "step": 180
118
  },
119
  {
120
  "epoch": 2.11,
121
- "learning_rate": 0.0001577777777777778,
122
- "loss": 0.0074,
123
  "step": 190
124
  },
125
  {
126
  "epoch": 2.22,
127
- "learning_rate": 0.00015555555555555556,
128
- "loss": 0.0246,
 
 
 
 
 
 
 
 
 
129
  "step": 200
130
  },
131
  {
132
  "epoch": 2.33,
133
- "learning_rate": 0.00015333333333333334,
134
- "loss": 0.0481,
135
  "step": 210
136
  },
137
  {
138
  "epoch": 2.44,
139
- "learning_rate": 0.0001511111111111111,
140
- "loss": 0.0281,
141
  "step": 220
142
  },
143
  {
144
  "epoch": 2.56,
145
- "learning_rate": 0.0001488888888888889,
146
- "loss": 0.0037,
147
  "step": 230
148
  },
149
  {
150
  "epoch": 2.67,
151
- "learning_rate": 0.00014666666666666666,
152
- "loss": 0.0099,
153
  "step": 240
154
  },
155
  {
156
  "epoch": 2.78,
157
- "learning_rate": 0.00014444444444444444,
158
- "loss": 0.0202,
159
  "step": 250
160
  },
161
  {
162
  "epoch": 2.89,
163
- "learning_rate": 0.00014222222222222224,
164
- "loss": 0.0082,
165
  "step": 260
166
  },
167
  {
168
  "epoch": 3.0,
169
- "learning_rate": 0.00014,
170
- "loss": 0.0221,
171
  "step": 270
172
  },
173
  {
174
  "epoch": 3.11,
175
- "learning_rate": 0.0001377777777777778,
176
- "loss": 0.008,
177
  "step": 280
178
  },
179
  {
180
  "epoch": 3.22,
181
- "learning_rate": 0.00013555555555555556,
182
- "loss": 0.0314,
183
  "step": 290
184
  },
185
  {
186
  "epoch": 3.33,
187
- "learning_rate": 0.00013333333333333334,
188
- "loss": 0.0227,
 
 
 
 
 
 
 
 
 
189
  "step": 300
190
  },
191
  {
192
  "epoch": 3.44,
193
- "learning_rate": 0.00013111111111111111,
194
- "loss": 0.0465,
195
  "step": 310
196
  },
197
  {
198
  "epoch": 3.56,
199
- "learning_rate": 0.00012888888888888892,
200
- "loss": 0.026,
201
  "step": 320
202
  },
203
  {
204
  "epoch": 3.67,
205
- "learning_rate": 0.00012666666666666666,
206
- "loss": 0.0058,
207
  "step": 330
208
  },
209
  {
210
  "epoch": 3.78,
211
- "learning_rate": 0.00012444444444444444,
212
- "loss": 0.0071,
213
  "step": 340
214
  },
215
  {
216
  "epoch": 3.89,
217
- "learning_rate": 0.00012222222222222224,
218
- "loss": 0.0256,
219
  "step": 350
220
  },
221
  {
222
  "epoch": 4.0,
223
- "learning_rate": 0.00012,
224
- "loss": 0.0099,
225
  "step": 360
226
  },
227
  {
228
  "epoch": 4.11,
229
- "learning_rate": 0.00011777777777777779,
230
- "loss": 0.0152,
231
  "step": 370
232
  },
233
  {
234
  "epoch": 4.22,
235
- "learning_rate": 0.00011555555555555555,
236
- "loss": 0.0057,
237
  "step": 380
238
  },
239
  {
240
  "epoch": 4.33,
241
- "learning_rate": 0.00011333333333333334,
242
- "loss": 0.0009,
243
  "step": 390
244
  },
245
  {
246
  "epoch": 4.44,
247
- "learning_rate": 0.00011111111111111112,
248
- "loss": 0.0122,
 
 
 
 
 
 
 
 
 
249
  "step": 400
250
  },
251
  {
252
  "epoch": 4.56,
253
- "learning_rate": 0.00010888888888888889,
254
- "loss": 0.0014,
255
  "step": 410
256
  },
257
  {
258
  "epoch": 4.67,
259
- "learning_rate": 0.00010666666666666667,
260
- "loss": 0.0035,
261
  "step": 420
262
  },
263
  {
264
  "epoch": 4.78,
265
- "learning_rate": 0.00010444444444444445,
266
- "loss": 0.0054,
267
  "step": 430
268
  },
269
  {
270
  "epoch": 4.89,
271
- "learning_rate": 0.00010222222222222222,
272
- "loss": 0.0036,
273
  "step": 440
274
  },
275
  {
276
  "epoch": 5.0,
277
- "learning_rate": 0.0001,
278
- "loss": 0.0009,
279
- "step": 450
280
- },
281
- {
282
- "epoch": 5.11,
283
- "learning_rate": 9.777777777777778e-05,
284
- "loss": 0.0012,
285
- "step": 460
286
- },
287
- {
288
- "epoch": 5.22,
289
- "learning_rate": 9.555555555555557e-05,
290
- "loss": 0.0006,
291
- "step": 470
292
- },
293
- {
294
- "epoch": 5.33,
295
- "learning_rate": 9.333333333333334e-05,
296
- "loss": 0.0041,
297
- "step": 480
298
- },
299
- {
300
- "epoch": 5.44,
301
- "learning_rate": 9.111111111111112e-05,
302
- "loss": 0.0019,
303
- "step": 490
304
- },
305
- {
306
- "epoch": 5.56,
307
- "learning_rate": 8.888888888888889e-05,
308
- "loss": 0.0017,
309
- "step": 500
310
- },
311
- {
312
- "epoch": 5.56,
313
- "eval_accuracy": 0.9101851851851852,
314
- "eval_loss": 0.5352890491485596,
315
- "eval_runtime": 12.7384,
316
- "eval_samples_per_second": 84.783,
317
- "eval_steps_per_second": 10.598,
318
- "step": 500
319
- },
320
- {
321
- "epoch": 5.67,
322
- "learning_rate": 8.666666666666667e-05,
323
- "loss": 0.0091,
324
- "step": 510
325
- },
326
- {
327
- "epoch": 5.78,
328
- "learning_rate": 8.444444444444444e-05,
329
- "loss": 0.0015,
330
- "step": 520
331
- },
332
- {
333
- "epoch": 5.89,
334
- "learning_rate": 8.222222222222222e-05,
335
- "loss": 0.0059,
336
- "step": 530
337
- },
338
- {
339
- "epoch": 6.0,
340
- "learning_rate": 8e-05,
341
- "loss": 0.0028,
342
- "step": 540
343
- },
344
- {
345
- "epoch": 6.11,
346
- "learning_rate": 7.777777777777778e-05,
347
- "loss": 0.0004,
348
- "step": 550
349
- },
350
- {
351
- "epoch": 6.22,
352
- "learning_rate": 7.555555555555556e-05,
353
- "loss": 0.0024,
354
- "step": 560
355
- },
356
- {
357
- "epoch": 6.33,
358
- "learning_rate": 7.333333333333333e-05,
359
- "loss": 0.0004,
360
- "step": 570
361
- },
362
- {
363
- "epoch": 6.44,
364
- "learning_rate": 7.111111111111112e-05,
365
- "loss": 0.0024,
366
- "step": 580
367
- },
368
- {
369
- "epoch": 6.56,
370
- "learning_rate": 6.88888888888889e-05,
371
- "loss": 0.0004,
372
- "step": 590
373
- },
374
- {
375
- "epoch": 6.67,
376
- "learning_rate": 6.666666666666667e-05,
377
- "loss": 0.0019,
378
- "step": 600
379
- },
380
- {
381
- "epoch": 6.78,
382
- "learning_rate": 6.444444444444446e-05,
383
- "loss": 0.0005,
384
- "step": 610
385
- },
386
- {
387
- "epoch": 6.89,
388
- "learning_rate": 6.222222222222222e-05,
389
- "loss": 0.004,
390
- "step": 620
391
- },
392
- {
393
- "epoch": 7.0,
394
- "learning_rate": 6e-05,
395
- "loss": 0.0047,
396
- "step": 630
397
- },
398
- {
399
- "epoch": 7.11,
400
- "learning_rate": 5.7777777777777776e-05,
401
- "loss": 0.0004,
402
- "step": 640
403
- },
404
- {
405
- "epoch": 7.22,
406
- "learning_rate": 5.555555555555556e-05,
407
- "loss": 0.003,
408
- "step": 650
409
- },
410
- {
411
- "epoch": 7.33,
412
- "learning_rate": 5.333333333333333e-05,
413
- "loss": 0.0027,
414
- "step": 660
415
- },
416
- {
417
- "epoch": 7.44,
418
- "learning_rate": 5.111111111111111e-05,
419
- "loss": 0.0022,
420
- "step": 670
421
- },
422
- {
423
- "epoch": 7.56,
424
- "learning_rate": 4.888888888888889e-05,
425
- "loss": 0.0004,
426
- "step": 680
427
- },
428
- {
429
- "epoch": 7.67,
430
- "learning_rate": 4.666666666666667e-05,
431
- "loss": 0.0031,
432
- "step": 690
433
- },
434
- {
435
- "epoch": 7.78,
436
- "learning_rate": 4.4444444444444447e-05,
437
- "loss": 0.002,
438
- "step": 700
439
- },
440
- {
441
- "epoch": 7.89,
442
- "learning_rate": 4.222222222222222e-05,
443
- "loss": 0.0004,
444
- "step": 710
445
- },
446
- {
447
- "epoch": 8.0,
448
- "learning_rate": 4e-05,
449
- "loss": 0.0021,
450
- "step": 720
451
- },
452
- {
453
- "epoch": 8.11,
454
- "learning_rate": 3.777777777777778e-05,
455
- "loss": 0.0019,
456
- "step": 730
457
- },
458
- {
459
- "epoch": 8.22,
460
- "learning_rate": 3.555555555555556e-05,
461
- "loss": 0.0033,
462
- "step": 740
463
- },
464
- {
465
- "epoch": 8.33,
466
- "learning_rate": 3.3333333333333335e-05,
467
- "loss": 0.0022,
468
- "step": 750
469
- },
470
- {
471
- "epoch": 8.44,
472
- "learning_rate": 3.111111111111111e-05,
473
- "loss": 0.0018,
474
- "step": 760
475
- },
476
- {
477
- "epoch": 8.56,
478
- "learning_rate": 2.8888888888888888e-05,
479
- "loss": 0.0017,
480
- "step": 770
481
- },
482
- {
483
- "epoch": 8.67,
484
- "learning_rate": 2.6666666666666667e-05,
485
- "loss": 0.002,
486
- "step": 780
487
- },
488
- {
489
- "epoch": 8.78,
490
- "learning_rate": 2.4444444444444445e-05,
491
- "loss": 0.0004,
492
- "step": 790
493
- },
494
- {
495
- "epoch": 8.89,
496
- "learning_rate": 2.2222222222222223e-05,
497
- "loss": 0.0003,
498
- "step": 800
499
- },
500
- {
501
- "epoch": 9.0,
502
- "learning_rate": 2e-05,
503
- "loss": 0.0022,
504
- "step": 810
505
- },
506
- {
507
- "epoch": 9.11,
508
- "learning_rate": 1.777777777777778e-05,
509
- "loss": 0.0017,
510
- "step": 820
511
- },
512
- {
513
- "epoch": 9.22,
514
- "learning_rate": 1.5555555555555555e-05,
515
- "loss": 0.0018,
516
- "step": 830
517
- },
518
- {
519
- "epoch": 9.33,
520
- "learning_rate": 1.3333333333333333e-05,
521
- "loss": 0.0032,
522
- "step": 840
523
- },
524
- {
525
- "epoch": 9.44,
526
- "learning_rate": 1.1111111111111112e-05,
527
- "loss": 0.0032,
528
- "step": 850
529
- },
530
- {
531
- "epoch": 9.56,
532
- "learning_rate": 8.88888888888889e-06,
533
- "loss": 0.0003,
534
- "step": 860
535
- },
536
- {
537
- "epoch": 9.67,
538
- "learning_rate": 6.666666666666667e-06,
539
- "loss": 0.0021,
540
- "step": 870
541
- },
542
- {
543
- "epoch": 9.78,
544
- "learning_rate": 4.444444444444445e-06,
545
- "loss": 0.0003,
546
- "step": 880
547
- },
548
- {
549
- "epoch": 9.89,
550
- "learning_rate": 2.2222222222222225e-06,
551
- "loss": 0.0003,
552
- "step": 890
553
- },
554
- {
555
- "epoch": 10.0,
556
  "learning_rate": 0.0,
557
- "loss": 0.002,
558
- "step": 900
559
  },
560
  {
561
- "epoch": 10.0,
562
- "step": 900,
563
- "total_flos": 3.347863979094835e+18,
564
- "train_loss": 0.017901433989819554,
565
- "train_runtime": 786.561,
566
- "train_samples_per_second": 54.923,
567
- "train_steps_per_second": 1.144
568
  }
569
  ],
570
  "logging_steps": 10,
571
- "max_steps": 900,
572
- "num_train_epochs": 10,
573
  "save_steps": 1000,
574
- "total_flos": 3.347863979094835e+18,
575
  "trial_name": null,
576
  "trial_params": null
577
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.0,
5
+ "eval_steps": 100,
6
+ "global_step": 450,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.11,
13
+ "learning_rate": 0.00019555555555555556,
14
+ "loss": 0.0015,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.22,
19
+ "learning_rate": 0.00019111111111111114,
20
+ "loss": 0.0402,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.33,
25
+ "learning_rate": 0.0001866666666666667,
26
+ "loss": 0.0814,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 0.44,
31
+ "learning_rate": 0.00018222222222222224,
32
+ "loss": 0.0893,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 0.56,
37
+ "learning_rate": 0.00017777777777777779,
38
+ "loss": 0.1203,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 0.67,
43
+ "learning_rate": 0.00017333333333333334,
44
+ "loss": 0.0505,
45
  "step": 60
46
  },
47
  {
48
  "epoch": 0.78,
49
+ "learning_rate": 0.00016888888888888889,
50
+ "loss": 0.0448,
51
  "step": 70
52
  },
53
  {
54
  "epoch": 0.89,
55
+ "learning_rate": 0.00016444444444444444,
56
+ "loss": 0.073,
57
  "step": 80
58
  },
59
  {
60
  "epoch": 1.0,
61
+ "learning_rate": 0.00016,
62
+ "loss": 0.0954,
63
  "step": 90
64
  },
65
  {
66
  "epoch": 1.11,
67
+ "learning_rate": 0.00015555555555555556,
68
+ "loss": 0.0273,
69
+ "step": 100
70
+ },
71
+ {
72
+ "epoch": 1.11,
73
+ "eval_accuracy": 0.8740740740740741,
74
+ "eval_loss": 0.7636905312538147,
75
+ "eval_runtime": 12.8677,
76
+ "eval_samples_per_second": 83.931,
77
+ "eval_steps_per_second": 10.491,
78
  "step": 100
79
  },
80
  {
81
  "epoch": 1.22,
82
+ "learning_rate": 0.0001511111111111111,
83
+ "loss": 0.0246,
84
  "step": 110
85
  },
86
  {
87
  "epoch": 1.33,
88
+ "learning_rate": 0.00014666666666666666,
89
+ "loss": 0.0223,
90
  "step": 120
91
  },
92
  {
93
  "epoch": 1.44,
94
+ "learning_rate": 0.00014222222222222224,
95
+ "loss": 0.0129,
96
  "step": 130
97
  },
98
  {
99
  "epoch": 1.56,
100
+ "learning_rate": 0.0001377777777777778,
101
+ "loss": 0.0401,
102
  "step": 140
103
  },
104
  {
105
  "epoch": 1.67,
106
+ "learning_rate": 0.00013333333333333334,
107
+ "loss": 0.0285,
108
  "step": 150
109
  },
110
  {
111
  "epoch": 1.78,
112
+ "learning_rate": 0.00012888888888888892,
113
+ "loss": 0.0261,
114
  "step": 160
115
  },
116
  {
117
  "epoch": 1.89,
118
+ "learning_rate": 0.00012444444444444444,
119
+ "loss": 0.0456,
120
  "step": 170
121
  },
122
  {
123
  "epoch": 2.0,
124
+ "learning_rate": 0.00012,
125
+ "loss": 0.0198,
126
  "step": 180
127
  },
128
  {
129
  "epoch": 2.11,
130
+ "learning_rate": 0.00011555555555555555,
131
+ "loss": 0.0051,
132
  "step": 190
133
  },
134
  {
135
  "epoch": 2.22,
136
+ "learning_rate": 0.00011111111111111112,
137
+ "loss": 0.0314,
138
+ "step": 200
139
+ },
140
+ {
141
+ "epoch": 2.22,
142
+ "eval_accuracy": 0.8824074074074074,
143
+ "eval_loss": 0.6027200818061829,
144
+ "eval_runtime": 13.4571,
145
+ "eval_samples_per_second": 80.255,
146
+ "eval_steps_per_second": 10.032,
147
  "step": 200
148
  },
149
  {
150
  "epoch": 2.33,
151
+ "learning_rate": 0.00010666666666666667,
152
+ "loss": 0.0061,
153
  "step": 210
154
  },
155
  {
156
  "epoch": 2.44,
157
+ "learning_rate": 0.00010222222222222222,
158
+ "loss": 0.0103,
159
  "step": 220
160
  },
161
  {
162
  "epoch": 2.56,
163
+ "learning_rate": 9.777777777777778e-05,
164
+ "loss": 0.0119,
165
  "step": 230
166
  },
167
  {
168
  "epoch": 2.67,
169
+ "learning_rate": 9.333333333333334e-05,
170
+ "loss": 0.0121,
171
  "step": 240
172
  },
173
  {
174
  "epoch": 2.78,
175
+ "learning_rate": 8.888888888888889e-05,
176
+ "loss": 0.0109,
177
  "step": 250
178
  },
179
  {
180
  "epoch": 2.89,
181
+ "learning_rate": 8.444444444444444e-05,
182
+ "loss": 0.0017,
183
  "step": 260
184
  },
185
  {
186
  "epoch": 3.0,
187
+ "learning_rate": 8e-05,
188
+ "loss": 0.0043,
189
  "step": 270
190
  },
191
  {
192
  "epoch": 3.11,
193
+ "learning_rate": 7.555555555555556e-05,
194
+ "loss": 0.0005,
195
  "step": 280
196
  },
197
  {
198
  "epoch": 3.22,
199
+ "learning_rate": 7.111111111111112e-05,
200
+ "loss": 0.0004,
201
  "step": 290
202
  },
203
  {
204
  "epoch": 3.33,
205
+ "learning_rate": 6.666666666666667e-05,
206
+ "loss": 0.0019,
207
+ "step": 300
208
+ },
209
+ {
210
+ "epoch": 3.33,
211
+ "eval_accuracy": 0.9037037037037037,
212
+ "eval_loss": 0.5983412861824036,
213
+ "eval_runtime": 12.7475,
214
+ "eval_samples_per_second": 84.723,
215
+ "eval_steps_per_second": 10.59,
216
  "step": 300
217
  },
218
  {
219
  "epoch": 3.44,
220
+ "learning_rate": 6.222222222222222e-05,
221
+ "loss": 0.0037,
222
  "step": 310
223
  },
224
  {
225
  "epoch": 3.56,
226
+ "learning_rate": 5.7777777777777776e-05,
227
+ "loss": 0.0025,
228
  "step": 320
229
  },
230
  {
231
  "epoch": 3.67,
232
+ "learning_rate": 5.333333333333333e-05,
233
+ "loss": 0.0026,
234
  "step": 330
235
  },
236
  {
237
  "epoch": 3.78,
238
+ "learning_rate": 4.888888888888889e-05,
239
+ "loss": 0.0006,
240
  "step": 340
241
  },
242
  {
243
  "epoch": 3.89,
244
+ "learning_rate": 4.4444444444444447e-05,
245
+ "loss": 0.0053,
246
  "step": 350
247
  },
248
  {
249
  "epoch": 4.0,
250
+ "learning_rate": 4e-05,
251
+ "loss": 0.0006,
252
  "step": 360
253
  },
254
  {
255
  "epoch": 4.11,
256
+ "learning_rate": 3.555555555555556e-05,
257
+ "loss": 0.0003,
258
  "step": 370
259
  },
260
  {
261
  "epoch": 4.22,
262
+ "learning_rate": 3.111111111111111e-05,
263
+ "loss": 0.0019,
264
  "step": 380
265
  },
266
  {
267
  "epoch": 4.33,
268
+ "learning_rate": 2.6666666666666667e-05,
269
+ "loss": 0.0004,
270
  "step": 390
271
  },
272
  {
273
  "epoch": 4.44,
274
+ "learning_rate": 2.2222222222222223e-05,
275
+ "loss": 0.0019,
276
+ "step": 400
277
+ },
278
+ {
279
+ "epoch": 4.44,
280
+ "eval_accuracy": 0.9092592592592592,
281
+ "eval_loss": 0.5358805060386658,
282
+ "eval_runtime": 12.6258,
283
+ "eval_samples_per_second": 85.539,
284
+ "eval_steps_per_second": 10.692,
285
  "step": 400
286
  },
287
  {
288
  "epoch": 4.56,
289
+ "learning_rate": 1.777777777777778e-05,
290
+ "loss": 0.0003,
291
  "step": 410
292
  },
293
  {
294
  "epoch": 4.67,
295
+ "learning_rate": 1.3333333333333333e-05,
296
+ "loss": 0.002,
297
  "step": 420
298
  },
299
  {
300
  "epoch": 4.78,
301
+ "learning_rate": 8.88888888888889e-06,
302
+ "loss": 0.0046,
303
  "step": 430
304
  },
305
  {
306
  "epoch": 4.89,
307
+ "learning_rate": 4.444444444444445e-06,
308
+ "loss": 0.0032,
309
  "step": 440
310
  },
311
  {
312
  "epoch": 5.0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
313
  "learning_rate": 0.0,
314
+ "loss": 0.0003,
315
+ "step": 450
316
  },
317
  {
318
+ "epoch": 5.0,
319
+ "step": 450,
320
+ "total_flos": 1.6739319895474176e+18,
321
+ "train_loss": 0.02156418908904824,
322
+ "train_runtime": 434.3833,
323
+ "train_samples_per_second": 49.726,
324
+ "train_steps_per_second": 1.036
325
  }
326
  ],
327
  "logging_steps": 10,
328
+ "max_steps": 450,
329
+ "num_train_epochs": 5,
330
  "save_steps": 1000,
331
+ "total_flos": 1.6739319895474176e+18,
332
  "trial_name": null,
333
  "trial_params": null
334
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd1730b66d0a10c6ae60145a64d1a6d9117e02a26a443fa7777efa08ddb33be4
3
  size 4536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff0144e19c345d1cafd5108586fa9401bae1c146468e7f549fa127ec77535ac0
3
  size 4536