JorgeGIT commited on
Commit
dc24622
1 Parent(s): b8c1436

Model save

Browse files
README.md CHANGED
@@ -22,7 +22,7 @@ model-index:
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
- value: 0.981203007518797
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -32,8 +32,8 @@ should probably proofread and complete it, then remove this comment. -->
32
 
33
  This model is a fine-tuned version of [facebook/convnext-tiny-224](https://huggingface.co/facebook/convnext-tiny-224) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 0.0595
36
- - Accuracy: 0.9812
37
 
38
  ## Model description
39
 
@@ -58,19 +58,30 @@ The following hyperparameters were used during training:
58
  - seed: 42
59
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
60
  - lr_scheduler_type: linear
61
- - num_epochs: 20
62
  - mixed_precision_training: Native AMP
63
 
64
  ### Training results
65
 
66
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
67
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
68
- | 0.2926 | 2.94 | 100 | 0.1524 | 0.9436 |
69
- | 0.1844 | 5.88 | 200 | 0.1718 | 0.9436 |
70
- | 0.1189 | 8.82 | 300 | 0.0886 | 0.9662 |
71
- | 0.0682 | 11.76 | 400 | 0.0978 | 0.9699 |
72
- | 0.0439 | 14.71 | 500 | 0.0812 | 0.9737 |
73
- | 0.0544 | 17.65 | 600 | 0.0595 | 0.9812 |
 
 
 
 
 
 
 
 
 
 
 
74
 
75
 
76
  ### Framework versions
 
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
+ value: 0.9887218045112782
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
32
 
33
  This model is a fine-tuned version of [facebook/convnext-tiny-224](https://huggingface.co/facebook/convnext-tiny-224) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 0.0732
36
+ - Accuracy: 0.9887
37
 
38
  ## Model description
39
 
 
58
  - seed: 42
59
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
60
  - lr_scheduler_type: linear
61
+ - num_epochs: 50
62
  - mixed_precision_training: Native AMP
63
 
64
  ### Training results
65
 
66
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
67
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
68
+ | 0.2908 | 2.94 | 100 | 0.1524 | 0.9511 |
69
+ | 0.1806 | 5.88 | 200 | 0.1269 | 0.9586 |
70
+ | 0.1135 | 8.82 | 300 | 0.0720 | 0.9774 |
71
+ | 0.1061 | 11.76 | 400 | 0.1519 | 0.9624 |
72
+ | 0.0816 | 14.71 | 500 | 0.1845 | 0.9398 |
73
+ | 0.0815 | 17.65 | 600 | 0.0966 | 0.9737 |
74
+ | 0.0741 | 20.59 | 700 | 0.1029 | 0.9812 |
75
+ | 0.0423 | 23.53 | 800 | 0.1519 | 0.9699 |
76
+ | 0.0468 | 26.47 | 900 | 0.0757 | 0.9850 |
77
+ | 0.0249 | 29.41 | 1000 | 0.0859 | 0.9850 |
78
+ | 0.0443 | 32.35 | 1100 | 0.0878 | 0.9774 |
79
+ | 0.0291 | 35.29 | 1200 | 0.0487 | 0.9887 |
80
+ | 0.0263 | 38.24 | 1300 | 0.0643 | 0.9887 |
81
+ | 0.0239 | 41.18 | 1400 | 0.1042 | 0.9774 |
82
+ | 0.0331 | 44.12 | 1500 | 0.0679 | 0.9887 |
83
+ | 0.0103 | 47.06 | 1600 | 0.0723 | 0.9887 |
84
+ | 0.0131 | 50.0 | 1700 | 0.0732 | 0.9887 |
85
 
86
 
87
  ### Framework versions
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.9624060150375939,
4
- "eval_loss": 0.0946127399802208,
5
- "eval_runtime": 2.633,
6
- "eval_samples_per_second": 101.024,
7
- "eval_steps_per_second": 12.913,
8
- "total_flos": 1.6434612963230515e+18,
9
- "train_loss": 0.5515072485103326,
10
- "train_runtime": 465.2023,
11
- "train_samples_per_second": 45.658,
12
- "train_steps_per_second": 1.462
13
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.015037593984962405,
4
+ "eval_loss": 2.07399845123291,
5
+ "eval_runtime": 3.7505,
6
+ "eval_samples_per_second": 70.923,
7
+ "eval_steps_per_second": 9.065,
8
+ "total_flos": 5.337848001643315e+17,
9
+ "train_loss": 0.17142059330773704,
10
+ "train_runtime": 289.6781,
11
+ "train_samples_per_second": 73.323,
12
+ "train_steps_per_second": 2.347
13
  }
eval_results.json CHANGED
@@ -1,8 +1,7 @@
1
  {
2
- "epoch": 20.0,
3
- "eval_accuracy": 0.9624060150375939,
4
- "eval_loss": 0.0946127399802208,
5
- "eval_runtime": 2.633,
6
- "eval_samples_per_second": 101.024,
7
- "eval_steps_per_second": 12.913
8
  }
 
1
  {
2
+ "eval_accuracy": 0.015037593984962405,
3
+ "eval_loss": 2.07399845123291,
4
+ "eval_runtime": 3.7505,
5
+ "eval_samples_per_second": 70.923,
6
+ "eval_steps_per_second": 9.065
 
7
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4fa14359c275d65dc97a7f3b269406a2a20534033e6cd3c3319d98d6a78eba7f
3
  size 111323316
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a925714ef2935d8483bd6ac2cc822604c804a1a4e0e9f6c1009b76d6af332472
3
  size 111323316
runs/Dec01_13-41-24_610b2a9400b8/events.out.tfevents.1701438092.610b2a9400b8.3653.8 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44278824c45c5f66cfc1d834fe8aa9456abef81f190b89f154cc7d318c6c92ec
3
+ size 37403
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "total_flos": 1.6434612963230515e+18,
4
- "train_loss": 0.5515072485103326,
5
- "train_runtime": 465.2023,
6
- "train_samples_per_second": 45.658,
7
- "train_steps_per_second": 1.462
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "total_flos": 5.337848001643315e+17,
4
+ "train_loss": 0.17142059330773704,
5
+ "train_runtime": 289.6781,
6
+ "train_samples_per_second": 73.323,
7
+ "train_steps_per_second": 2.347
8
  }
trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.0946127399802208,
3
  "best_model_checkpoint": "finetuned-Leukemia-cell/checkpoint-600",
4
  "epoch": 20.0,
5
  "eval_steps": 100,
@@ -10,481 +10,481 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.29,
13
- "learning_rate": 0.00019794117647058826,
14
- "loss": 1.9655,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.59,
19
- "learning_rate": 0.000195,
20
- "loss": 1.7836,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.88,
25
- "learning_rate": 0.0001920588235294118,
26
- "loss": 1.5744,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 1.18,
31
- "learning_rate": 0.00018911764705882353,
32
- "loss": 1.3903,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 1.47,
37
- "learning_rate": 0.00018617647058823532,
38
- "loss": 1.3596,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 1.76,
43
- "learning_rate": 0.00018323529411764706,
44
- "loss": 1.3915,
45
  "step": 60
46
  },
47
  {
48
  "epoch": 2.06,
49
  "learning_rate": 0.00018029411764705885,
50
- "loss": 1.2525,
51
  "step": 70
52
  },
53
  {
54
  "epoch": 2.35,
55
  "learning_rate": 0.00017735294117647059,
56
- "loss": 1.167,
57
  "step": 80
58
  },
59
  {
60
  "epoch": 2.65,
61
  "learning_rate": 0.00017441176470588235,
62
- "loss": 1.0339,
63
  "step": 90
64
  },
65
  {
66
  "epoch": 2.94,
67
  "learning_rate": 0.00017147058823529412,
68
- "loss": 0.9733,
69
  "step": 100
70
  },
71
  {
72
  "epoch": 2.94,
73
- "eval_accuracy": 0.7255639097744361,
74
- "eval_loss": 0.8894439339637756,
75
- "eval_runtime": 2.491,
76
- "eval_samples_per_second": 106.783,
77
- "eval_steps_per_second": 13.649,
78
  "step": 100
79
  },
80
  {
81
  "epoch": 3.24,
82
  "learning_rate": 0.00016852941176470588,
83
- "loss": 0.9561,
84
  "step": 110
85
  },
86
  {
87
  "epoch": 3.53,
88
  "learning_rate": 0.00016558823529411765,
89
- "loss": 0.9493,
90
  "step": 120
91
  },
92
  {
93
  "epoch": 3.82,
94
  "learning_rate": 0.0001626470588235294,
95
- "loss": 0.8258,
96
  "step": 130
97
  },
98
  {
99
  "epoch": 4.12,
100
  "learning_rate": 0.0001597058823529412,
101
- "loss": 0.8111,
102
  "step": 140
103
  },
104
  {
105
  "epoch": 4.41,
106
  "learning_rate": 0.00015676470588235294,
107
- "loss": 0.9361,
108
  "step": 150
109
  },
110
  {
111
  "epoch": 4.71,
112
  "learning_rate": 0.00015382352941176473,
113
- "loss": 0.7733,
114
  "step": 160
115
  },
116
  {
117
  "epoch": 5.0,
118
  "learning_rate": 0.00015088235294117647,
119
- "loss": 0.5113,
120
  "step": 170
121
  },
122
  {
123
  "epoch": 5.29,
124
  "learning_rate": 0.00014794117647058826,
125
- "loss": 0.7088,
126
  "step": 180
127
  },
128
  {
129
  "epoch": 5.59,
130
  "learning_rate": 0.000145,
131
- "loss": 0.6652,
132
  "step": 190
133
  },
134
  {
135
  "epoch": 5.88,
136
  "learning_rate": 0.00014205882352941177,
137
- "loss": 0.7184,
138
  "step": 200
139
  },
140
  {
141
  "epoch": 5.88,
142
- "eval_accuracy": 0.7293233082706767,
143
- "eval_loss": 0.7875903248786926,
144
- "eval_runtime": 2.5205,
145
- "eval_samples_per_second": 105.533,
146
- "eval_steps_per_second": 13.489,
147
  "step": 200
148
  },
149
  {
150
  "epoch": 6.18,
151
  "learning_rate": 0.00013911764705882353,
152
- "loss": 0.9143,
153
  "step": 210
154
  },
155
  {
156
  "epoch": 6.47,
157
  "learning_rate": 0.0001361764705882353,
158
- "loss": 0.7709,
159
  "step": 220
160
  },
161
  {
162
  "epoch": 6.76,
163
  "learning_rate": 0.00013323529411764706,
164
- "loss": 0.5797,
165
  "step": 230
166
  },
167
  {
168
  "epoch": 7.06,
169
  "learning_rate": 0.00013029411764705883,
170
- "loss": 0.5424,
171
  "step": 240
172
  },
173
  {
174
  "epoch": 7.35,
175
  "learning_rate": 0.0001273529411764706,
176
- "loss": 0.6525,
177
  "step": 250
178
  },
179
  {
180
  "epoch": 7.65,
181
  "learning_rate": 0.00012441176470588236,
182
- "loss": 0.3516,
183
  "step": 260
184
  },
185
  {
186
  "epoch": 7.94,
187
  "learning_rate": 0.00012147058823529412,
188
- "loss": 0.4829,
189
  "step": 270
190
  },
191
  {
192
  "epoch": 8.24,
193
  "learning_rate": 0.00011852941176470589,
194
- "loss": 0.5154,
195
  "step": 280
196
  },
197
  {
198
  "epoch": 8.53,
199
  "learning_rate": 0.00011558823529411764,
200
- "loss": 0.3485,
201
  "step": 290
202
  },
203
  {
204
  "epoch": 8.82,
205
  "learning_rate": 0.00011264705882352942,
206
- "loss": 0.5299,
207
  "step": 300
208
  },
209
  {
210
  "epoch": 8.82,
211
- "eval_accuracy": 0.8609022556390977,
212
- "eval_loss": 0.5183172821998596,
213
- "eval_runtime": 2.8712,
214
- "eval_samples_per_second": 92.644,
215
- "eval_steps_per_second": 11.842,
216
  "step": 300
217
  },
218
  {
219
  "epoch": 9.12,
220
  "learning_rate": 0.0001097058823529412,
221
- "loss": 0.5263,
222
  "step": 310
223
  },
224
  {
225
  "epoch": 9.41,
226
  "learning_rate": 0.00010676470588235295,
227
- "loss": 0.4048,
228
  "step": 320
229
  },
230
  {
231
  "epoch": 9.71,
232
  "learning_rate": 0.00010382352941176472,
233
- "loss": 0.4386,
234
  "step": 330
235
  },
236
  {
237
  "epoch": 10.0,
238
  "learning_rate": 0.00010088235294117648,
239
- "loss": 0.4728,
240
  "step": 340
241
  },
242
  {
243
  "epoch": 10.29,
244
  "learning_rate": 9.794117647058824e-05,
245
- "loss": 0.379,
246
  "step": 350
247
  },
248
  {
249
  "epoch": 10.59,
250
  "learning_rate": 9.5e-05,
251
- "loss": 0.4361,
252
  "step": 360
253
  },
254
  {
255
  "epoch": 10.88,
256
  "learning_rate": 9.205882352941177e-05,
257
- "loss": 0.2929,
258
  "step": 370
259
  },
260
  {
261
  "epoch": 11.18,
262
  "learning_rate": 8.911764705882354e-05,
263
- "loss": 0.2848,
264
  "step": 380
265
  },
266
  {
267
  "epoch": 11.47,
268
  "learning_rate": 8.61764705882353e-05,
269
- "loss": 0.4282,
270
  "step": 390
271
  },
272
  {
273
  "epoch": 11.76,
274
  "learning_rate": 8.323529411764707e-05,
275
- "loss": 0.3991,
276
  "step": 400
277
  },
278
  {
279
  "epoch": 11.76,
280
- "eval_accuracy": 0.8947368421052632,
281
- "eval_loss": 0.31206753849983215,
282
- "eval_runtime": 2.9634,
283
- "eval_samples_per_second": 89.761,
284
- "eval_steps_per_second": 11.473,
285
  "step": 400
286
  },
287
  {
288
  "epoch": 12.06,
289
  "learning_rate": 8.029411764705883e-05,
290
- "loss": 0.3552,
291
  "step": 410
292
  },
293
  {
294
  "epoch": 12.35,
295
  "learning_rate": 7.73529411764706e-05,
296
- "loss": 0.2886,
297
  "step": 420
298
  },
299
  {
300
  "epoch": 12.65,
301
  "learning_rate": 7.441176470588236e-05,
302
- "loss": 0.2809,
303
  "step": 430
304
  },
305
  {
306
  "epoch": 12.94,
307
  "learning_rate": 7.147058823529412e-05,
308
- "loss": 0.3627,
309
  "step": 440
310
  },
311
  {
312
  "epoch": 13.24,
313
  "learning_rate": 6.852941176470589e-05,
314
- "loss": 0.3137,
315
  "step": 450
316
  },
317
  {
318
  "epoch": 13.53,
319
  "learning_rate": 6.558823529411765e-05,
320
- "loss": 0.198,
321
  "step": 460
322
  },
323
  {
324
  "epoch": 13.82,
325
  "learning_rate": 6.264705882352942e-05,
326
- "loss": 0.3268,
327
  "step": 470
328
  },
329
  {
330
  "epoch": 14.12,
331
  "learning_rate": 5.970588235294118e-05,
332
- "loss": 0.256,
333
  "step": 480
334
  },
335
  {
336
  "epoch": 14.41,
337
  "learning_rate": 5.676470588235294e-05,
338
- "loss": 0.2544,
339
  "step": 490
340
  },
341
  {
342
  "epoch": 14.71,
343
  "learning_rate": 5.382352941176471e-05,
344
- "loss": 0.2263,
345
  "step": 500
346
  },
347
  {
348
  "epoch": 14.71,
349
- "eval_accuracy": 0.9548872180451128,
350
- "eval_loss": 0.13367173075675964,
351
- "eval_runtime": 2.4729,
352
- "eval_samples_per_second": 107.566,
353
- "eval_steps_per_second": 13.749,
354
  "step": 500
355
  },
356
  {
357
  "epoch": 15.0,
358
  "learning_rate": 5.088235294117647e-05,
359
- "loss": 0.4011,
360
  "step": 510
361
  },
362
  {
363
  "epoch": 15.29,
364
  "learning_rate": 4.794117647058824e-05,
365
- "loss": 0.2051,
366
  "step": 520
367
  },
368
  {
369
  "epoch": 15.59,
370
  "learning_rate": 4.5e-05,
371
- "loss": 0.2563,
372
  "step": 530
373
  },
374
  {
375
  "epoch": 15.88,
376
  "learning_rate": 4.205882352941177e-05,
377
- "loss": 0.2428,
378
  "step": 540
379
  },
380
  {
381
  "epoch": 16.18,
382
  "learning_rate": 3.911764705882353e-05,
383
- "loss": 0.189,
384
  "step": 550
385
  },
386
  {
387
  "epoch": 16.47,
388
  "learning_rate": 3.61764705882353e-05,
389
- "loss": 0.19,
390
  "step": 560
391
  },
392
  {
393
  "epoch": 16.76,
394
  "learning_rate": 3.3235294117647056e-05,
395
- "loss": 0.1696,
396
  "step": 570
397
  },
398
  {
399
  "epoch": 17.06,
400
  "learning_rate": 3.0294117647058824e-05,
401
- "loss": 0.1336,
402
  "step": 580
403
  },
404
  {
405
  "epoch": 17.35,
406
  "learning_rate": 2.7352941176470593e-05,
407
- "loss": 0.1532,
408
  "step": 590
409
  },
410
  {
411
  "epoch": 17.65,
412
  "learning_rate": 2.4411764705882354e-05,
413
- "loss": 0.1782,
414
  "step": 600
415
  },
416
  {
417
  "epoch": 17.65,
418
- "eval_accuracy": 0.9624060150375939,
419
- "eval_loss": 0.0946127399802208,
420
- "eval_runtime": 2.8201,
421
- "eval_samples_per_second": 94.322,
422
- "eval_steps_per_second": 12.056,
423
  "step": 600
424
  },
425
  {
426
  "epoch": 17.94,
427
  "learning_rate": 2.1470588235294116e-05,
428
- "loss": 0.1511,
429
  "step": 610
430
  },
431
  {
432
  "epoch": 18.24,
433
  "learning_rate": 1.8529411764705884e-05,
434
- "loss": 0.0837,
435
  "step": 620
436
  },
437
  {
438
  "epoch": 18.53,
439
  "learning_rate": 1.558823529411765e-05,
440
- "loss": 0.1084,
441
  "step": 630
442
  },
443
  {
444
  "epoch": 18.82,
445
  "learning_rate": 1.2647058823529412e-05,
446
- "loss": 0.1313,
447
  "step": 640
448
  },
449
  {
450
  "epoch": 19.12,
451
  "learning_rate": 9.705882352941177e-06,
452
- "loss": 0.1135,
453
  "step": 650
454
  },
455
  {
456
  "epoch": 19.41,
457
  "learning_rate": 6.7647058823529414e-06,
458
- "loss": 0.0844,
459
  "step": 660
460
  },
461
  {
462
  "epoch": 19.71,
463
  "learning_rate": 3.823529411764706e-06,
464
- "loss": 0.1765,
465
  "step": 670
466
  },
467
  {
468
  "epoch": 20.0,
469
  "learning_rate": 8.823529411764706e-07,
470
- "loss": 0.1745,
471
  "step": 680
472
  },
473
  {
474
  "epoch": 20.0,
475
  "step": 680,
476
- "total_flos": 1.6434612963230515e+18,
477
- "train_loss": 0.5515072485103326,
478
- "train_runtime": 465.2023,
479
- "train_samples_per_second": 45.658,
480
- "train_steps_per_second": 1.462
481
  }
482
  ],
483
  "logging_steps": 10,
484
  "max_steps": 680,
485
  "num_train_epochs": 20,
486
  "save_steps": 100,
487
- "total_flos": 1.6434612963230515e+18,
488
  "trial_name": null,
489
  "trial_params": null
490
  }
 
1
  {
2
+ "best_metric": 0.05951835587620735,
3
  "best_model_checkpoint": "finetuned-Leukemia-cell/checkpoint-600",
4
  "epoch": 20.0,
5
  "eval_steps": 100,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.29,
13
+ "learning_rate": 0.00019705882352941177,
14
+ "loss": 1.5799,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.59,
19
+ "learning_rate": 0.00019441176470588235,
20
+ "loss": 1.1614,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.88,
25
+ "learning_rate": 0.00019147058823529414,
26
+ "loss": 0.8677,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 1.18,
31
+ "learning_rate": 0.00018882352941176472,
32
+ "loss": 0.7303,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 1.47,
37
+ "learning_rate": 0.00018588235294117648,
38
+ "loss": 0.4837,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 1.76,
43
+ "learning_rate": 0.00018294117647058825,
44
+ "loss": 0.4625,
45
  "step": 60
46
  },
47
  {
48
  "epoch": 2.06,
49
  "learning_rate": 0.00018029411764705885,
50
+ "loss": 0.3693,
51
  "step": 70
52
  },
53
  {
54
  "epoch": 2.35,
55
  "learning_rate": 0.00017735294117647059,
56
+ "loss": 0.331,
57
  "step": 80
58
  },
59
  {
60
  "epoch": 2.65,
61
  "learning_rate": 0.00017441176470588235,
62
+ "loss": 0.2914,
63
  "step": 90
64
  },
65
  {
66
  "epoch": 2.94,
67
  "learning_rate": 0.00017147058823529412,
68
+ "loss": 0.2926,
69
  "step": 100
70
  },
71
  {
72
  "epoch": 2.94,
73
+ "eval_accuracy": 0.943609022556391,
74
+ "eval_loss": 0.15237735211849213,
75
+ "eval_runtime": 1.9186,
76
+ "eval_samples_per_second": 138.643,
77
+ "eval_steps_per_second": 17.721,
78
  "step": 100
79
  },
80
  {
81
  "epoch": 3.24,
82
  "learning_rate": 0.00016852941176470588,
83
+ "loss": 0.2343,
84
  "step": 110
85
  },
86
  {
87
  "epoch": 3.53,
88
  "learning_rate": 0.00016558823529411765,
89
+ "loss": 0.2078,
90
  "step": 120
91
  },
92
  {
93
  "epoch": 3.82,
94
  "learning_rate": 0.0001626470588235294,
95
+ "loss": 0.2448,
96
  "step": 130
97
  },
98
  {
99
  "epoch": 4.12,
100
  "learning_rate": 0.0001597058823529412,
101
+ "loss": 0.2146,
102
  "step": 140
103
  },
104
  {
105
  "epoch": 4.41,
106
  "learning_rate": 0.00015676470588235294,
107
+ "loss": 0.2086,
108
  "step": 150
109
  },
110
  {
111
  "epoch": 4.71,
112
  "learning_rate": 0.00015382352941176473,
113
+ "loss": 0.1924,
114
  "step": 160
115
  },
116
  {
117
  "epoch": 5.0,
118
  "learning_rate": 0.00015088235294117647,
119
+ "loss": 0.0882,
120
  "step": 170
121
  },
122
  {
123
  "epoch": 5.29,
124
  "learning_rate": 0.00014794117647058826,
125
+ "loss": 0.1726,
126
  "step": 180
127
  },
128
  {
129
  "epoch": 5.59,
130
  "learning_rate": 0.000145,
131
+ "loss": 0.1193,
132
  "step": 190
133
  },
134
  {
135
  "epoch": 5.88,
136
  "learning_rate": 0.00014205882352941177,
137
+ "loss": 0.1844,
138
  "step": 200
139
  },
140
  {
141
  "epoch": 5.88,
142
+ "eval_accuracy": 0.943609022556391,
143
+ "eval_loss": 0.1717682033777237,
144
+ "eval_runtime": 2.2294,
145
+ "eval_samples_per_second": 119.314,
146
+ "eval_steps_per_second": 15.251,
147
  "step": 200
148
  },
149
  {
150
  "epoch": 6.18,
151
  "learning_rate": 0.00013911764705882353,
152
+ "loss": 0.31,
153
  "step": 210
154
  },
155
  {
156
  "epoch": 6.47,
157
  "learning_rate": 0.0001361764705882353,
158
+ "loss": 0.1387,
159
  "step": 220
160
  },
161
  {
162
  "epoch": 6.76,
163
  "learning_rate": 0.00013323529411764706,
164
+ "loss": 0.2172,
165
  "step": 230
166
  },
167
  {
168
  "epoch": 7.06,
169
  "learning_rate": 0.00013029411764705883,
170
+ "loss": 0.1062,
171
  "step": 240
172
  },
173
  {
174
  "epoch": 7.35,
175
  "learning_rate": 0.0001273529411764706,
176
+ "loss": 0.1055,
177
  "step": 250
178
  },
179
  {
180
  "epoch": 7.65,
181
  "learning_rate": 0.00012441176470588236,
182
+ "loss": 0.087,
183
  "step": 260
184
  },
185
  {
186
  "epoch": 7.94,
187
  "learning_rate": 0.00012147058823529412,
188
+ "loss": 0.0817,
189
  "step": 270
190
  },
191
  {
192
  "epoch": 8.24,
193
  "learning_rate": 0.00011852941176470589,
194
+ "loss": 0.0727,
195
  "step": 280
196
  },
197
  {
198
  "epoch": 8.53,
199
  "learning_rate": 0.00011558823529411764,
200
+ "loss": 0.0561,
201
  "step": 290
202
  },
203
  {
204
  "epoch": 8.82,
205
  "learning_rate": 0.00011264705882352942,
206
+ "loss": 0.1189,
207
  "step": 300
208
  },
209
  {
210
  "epoch": 8.82,
211
+ "eval_accuracy": 0.9661654135338346,
212
+ "eval_loss": 0.08863785862922668,
213
+ "eval_runtime": 1.8695,
214
+ "eval_samples_per_second": 142.285,
215
+ "eval_steps_per_second": 18.187,
216
  "step": 300
217
  },
218
  {
219
  "epoch": 9.12,
220
  "learning_rate": 0.0001097058823529412,
221
+ "loss": 0.0705,
222
  "step": 310
223
  },
224
  {
225
  "epoch": 9.41,
226
  "learning_rate": 0.00010676470588235295,
227
+ "loss": 0.147,
228
  "step": 320
229
  },
230
  {
231
  "epoch": 9.71,
232
  "learning_rate": 0.00010382352941176472,
233
+ "loss": 0.0612,
234
  "step": 330
235
  },
236
  {
237
  "epoch": 10.0,
238
  "learning_rate": 0.00010088235294117648,
239
+ "loss": 0.0926,
240
  "step": 340
241
  },
242
  {
243
  "epoch": 10.29,
244
  "learning_rate": 9.794117647058824e-05,
245
+ "loss": 0.0977,
246
  "step": 350
247
  },
248
  {
249
  "epoch": 10.59,
250
  "learning_rate": 9.5e-05,
251
+ "loss": 0.1104,
252
  "step": 360
253
  },
254
  {
255
  "epoch": 10.88,
256
  "learning_rate": 9.205882352941177e-05,
257
+ "loss": 0.0834,
258
  "step": 370
259
  },
260
  {
261
  "epoch": 11.18,
262
  "learning_rate": 8.911764705882354e-05,
263
+ "loss": 0.0831,
264
  "step": 380
265
  },
266
  {
267
  "epoch": 11.47,
268
  "learning_rate": 8.61764705882353e-05,
269
+ "loss": 0.0464,
270
  "step": 390
271
  },
272
  {
273
  "epoch": 11.76,
274
  "learning_rate": 8.323529411764707e-05,
275
+ "loss": 0.0682,
276
  "step": 400
277
  },
278
  {
279
  "epoch": 11.76,
280
+ "eval_accuracy": 0.9699248120300752,
281
+ "eval_loss": 0.09778111428022385,
282
+ "eval_runtime": 2.3942,
283
+ "eval_samples_per_second": 111.103,
284
+ "eval_steps_per_second": 14.201,
285
  "step": 400
286
  },
287
  {
288
  "epoch": 12.06,
289
  "learning_rate": 8.029411764705883e-05,
290
+ "loss": 0.0504,
291
  "step": 410
292
  },
293
  {
294
  "epoch": 12.35,
295
  "learning_rate": 7.73529411764706e-05,
296
+ "loss": 0.0379,
297
  "step": 420
298
  },
299
  {
300
  "epoch": 12.65,
301
  "learning_rate": 7.441176470588236e-05,
302
+ "loss": 0.0261,
303
  "step": 430
304
  },
305
  {
306
  "epoch": 12.94,
307
  "learning_rate": 7.147058823529412e-05,
308
+ "loss": 0.0838,
309
  "step": 440
310
  },
311
  {
312
  "epoch": 13.24,
313
  "learning_rate": 6.852941176470589e-05,
314
+ "loss": 0.0262,
315
  "step": 450
316
  },
317
  {
318
  "epoch": 13.53,
319
  "learning_rate": 6.558823529411765e-05,
320
+ "loss": 0.0232,
321
  "step": 460
322
  },
323
  {
324
  "epoch": 13.82,
325
  "learning_rate": 6.264705882352942e-05,
326
+ "loss": 0.0403,
327
  "step": 470
328
  },
329
  {
330
  "epoch": 14.12,
331
  "learning_rate": 5.970588235294118e-05,
332
+ "loss": 0.049,
333
  "step": 480
334
  },
335
  {
336
  "epoch": 14.41,
337
  "learning_rate": 5.676470588235294e-05,
338
+ "loss": 0.0606,
339
  "step": 490
340
  },
341
  {
342
  "epoch": 14.71,
343
  "learning_rate": 5.382352941176471e-05,
344
+ "loss": 0.0439,
345
  "step": 500
346
  },
347
  {
348
  "epoch": 14.71,
349
+ "eval_accuracy": 0.9736842105263158,
350
+ "eval_loss": 0.08120405673980713,
351
+ "eval_runtime": 1.897,
352
+ "eval_samples_per_second": 140.22,
353
+ "eval_steps_per_second": 17.923,
354
  "step": 500
355
  },
356
  {
357
  "epoch": 15.0,
358
  "learning_rate": 5.088235294117647e-05,
359
+ "loss": 0.0495,
360
  "step": 510
361
  },
362
  {
363
  "epoch": 15.29,
364
  "learning_rate": 4.794117647058824e-05,
365
+ "loss": 0.0337,
366
  "step": 520
367
  },
368
  {
369
  "epoch": 15.59,
370
  "learning_rate": 4.5e-05,
371
+ "loss": 0.0987,
372
  "step": 530
373
  },
374
  {
375
  "epoch": 15.88,
376
  "learning_rate": 4.205882352941177e-05,
377
+ "loss": 0.0218,
378
  "step": 540
379
  },
380
  {
381
  "epoch": 16.18,
382
  "learning_rate": 3.911764705882353e-05,
383
+ "loss": 0.0324,
384
  "step": 550
385
  },
386
  {
387
  "epoch": 16.47,
388
  "learning_rate": 3.61764705882353e-05,
389
+ "loss": 0.0364,
390
  "step": 560
391
  },
392
  {
393
  "epoch": 16.76,
394
  "learning_rate": 3.3235294117647056e-05,
395
+ "loss": 0.0284,
396
  "step": 570
397
  },
398
  {
399
  "epoch": 17.06,
400
  "learning_rate": 3.0294117647058824e-05,
401
+ "loss": 0.0306,
402
  "step": 580
403
  },
404
  {
405
  "epoch": 17.35,
406
  "learning_rate": 2.7352941176470593e-05,
407
+ "loss": 0.0535,
408
  "step": 590
409
  },
410
  {
411
  "epoch": 17.65,
412
  "learning_rate": 2.4411764705882354e-05,
413
+ "loss": 0.0544,
414
  "step": 600
415
  },
416
  {
417
  "epoch": 17.65,
418
+ "eval_accuracy": 0.981203007518797,
419
+ "eval_loss": 0.05951835587620735,
420
+ "eval_runtime": 1.8559,
421
+ "eval_samples_per_second": 143.327,
422
+ "eval_steps_per_second": 18.32,
423
  "step": 600
424
  },
425
  {
426
  "epoch": 17.94,
427
  "learning_rate": 2.1470588235294116e-05,
428
+ "loss": 0.0121,
429
  "step": 610
430
  },
431
  {
432
  "epoch": 18.24,
433
  "learning_rate": 1.8529411764705884e-05,
434
+ "loss": 0.0049,
435
  "step": 620
436
  },
437
  {
438
  "epoch": 18.53,
439
  "learning_rate": 1.558823529411765e-05,
440
+ "loss": 0.0086,
441
  "step": 630
442
  },
443
  {
444
  "epoch": 18.82,
445
  "learning_rate": 1.2647058823529412e-05,
446
+ "loss": 0.0102,
447
  "step": 640
448
  },
449
  {
450
  "epoch": 19.12,
451
  "learning_rate": 9.705882352941177e-06,
452
+ "loss": 0.0342,
453
  "step": 650
454
  },
455
  {
456
  "epoch": 19.41,
457
  "learning_rate": 6.7647058823529414e-06,
458
+ "loss": 0.0367,
459
  "step": 660
460
  },
461
  {
462
  "epoch": 19.71,
463
  "learning_rate": 3.823529411764706e-06,
464
+ "loss": 0.0313,
465
  "step": 670
466
  },
467
  {
468
  "epoch": 20.0,
469
  "learning_rate": 8.823529411764706e-07,
470
+ "loss": 0.0465,
471
  "step": 680
472
  },
473
  {
474
  "epoch": 20.0,
475
  "step": 680,
476
+ "total_flos": 5.337848001643315e+17,
477
+ "train_loss": 0.17142059330773704,
478
+ "train_runtime": 289.6781,
479
+ "train_samples_per_second": 73.323,
480
+ "train_steps_per_second": 2.347
481
  }
482
  ],
483
  "logging_steps": 10,
484
  "max_steps": 680,
485
  "num_train_epochs": 20,
486
  "save_steps": 100,
487
+ "total_flos": 5.337848001643315e+17,
488
  "trial_name": null,
489
  "trial_params": null
490
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7453b4d852d9cd2e5e3323177f837efc3e7f9c2f92828be34bc2ab59e6d6d25a
3
  size 4600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68354e0c5b01561e938df0004ef20ef191e70a1505432fac27a2b4cc3d56354e
3
  size 4600