dhritic99 commited on
Commit
519064f
1 Parent(s): acc3019

dhritic99/model99123

Browse files
README.md CHANGED
@@ -17,8 +17,8 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.2613
21
- - Accuracy: 0.9508
22
 
23
  ## Model description
24
 
@@ -37,7 +37,7 @@ More information needed
37
  ### Training hyperparameters
38
 
39
  The following hyperparameters were used during training:
40
- - learning_rate: 3e-05
41
  - train_batch_size: 16
42
  - eval_batch_size: 8
43
  - seed: 42
@@ -53,13 +53,13 @@ The following hyperparameters were used during training:
53
 
54
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
55
  |:-------------:|:-------:|:----:|:---------------:|:--------:|
56
- | 0.4758 | 7.8125 | 500 | 0.5695 | 0.7939 |
57
- | 0.1137 | 15.625 | 1000 | 0.4398 | 0.8711 |
58
- | 0.0466 | 23.4375 | 1500 | 0.4086 | 0.9023 |
59
- | 0.0086 | 31.25 | 2000 | 0.2433 | 0.9463 |
60
- | 0.0034 | 39.0625 | 2500 | 0.1636 | 0.9688 |
61
- | 0.002 | 46.875 | 3000 | 0.1739 | 0.9707 |
62
- | 0.0014 | 54.6875 | 3500 | 0.1818 | 0.9707 |
63
 
64
 
65
  ### Framework versions
 
17
 
18
  This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 0.3419
21
+ - Accuracy: 0.9406
22
 
23
  ## Model description
24
 
 
37
  ### Training hyperparameters
38
 
39
  The following hyperparameters were used during training:
40
+ - learning_rate: 1e-05
41
  - train_batch_size: 16
42
  - eval_batch_size: 8
43
  - seed: 42
 
53
 
54
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
55
  |:-------------:|:-------:|:----:|:---------------:|:--------:|
56
+ | 0.0048 | 7.8125 | 500 | 0.2337 | 0.9473 |
57
+ | 0.0012 | 15.625 | 1000 | 0.1950 | 0.9531 |
58
+ | 0.0007 | 23.4375 | 1500 | 0.1927 | 0.9580 |
59
+ | 0.0004 | 31.25 | 2000 | 0.1970 | 0.9629 |
60
+ | 0.0003 | 39.0625 | 2500 | 0.2040 | 0.9629 |
61
+ | 0.0002 | 46.875 | 3000 | 0.2114 | 0.9629 |
62
+ | 0.0002 | 54.6875 | 3500 | 0.2171 | 0.9648 |
63
 
64
 
65
  ### Framework versions
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 60.0,
3
- "eval_accuracy": 0.95078125,
4
- "eval_loss": 0.26134949922561646,
5
- "eval_runtime": 6.7187,
6
- "eval_samples_per_second": 190.513,
7
- "eval_steps_per_second": 23.814,
8
  "total_flos": 1.904477274611122e+19,
9
- "train_loss": 0.14902369955088943,
10
- "train_runtime": 2785.8591,
11
- "train_samples_per_second": 88.217,
12
- "train_steps_per_second": 1.378
13
  }
 
1
  {
2
  "epoch": 60.0,
3
+ "eval_accuracy": 0.940625,
4
+ "eval_loss": 0.3418883681297302,
5
+ "eval_runtime": 6.7246,
6
+ "eval_samples_per_second": 190.345,
7
+ "eval_steps_per_second": 23.793,
8
  "total_flos": 1.904477274611122e+19,
9
+ "train_loss": 0.0010260362852325974,
10
+ "train_runtime": 2806.5312,
11
+ "train_samples_per_second": 87.567,
12
+ "train_steps_per_second": 1.368
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 60.0,
3
- "eval_accuracy": 0.95078125,
4
- "eval_loss": 0.26134949922561646,
5
- "eval_runtime": 6.7187,
6
- "eval_samples_per_second": 190.513,
7
- "eval_steps_per_second": 23.814
8
  }
 
1
  {
2
  "epoch": 60.0,
3
+ "eval_accuracy": 0.940625,
4
+ "eval_loss": 0.3418883681297302,
5
+ "eval_runtime": 6.7246,
6
+ "eval_samples_per_second": 190.345,
7
+ "eval_steps_per_second": 23.793
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3158ce4921cc7de61a99fa5043ead77c62382fbae720ec9b3a76b25a7120d00c
3
  size 343230128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd9f5e271ad1d8119de83579c28db38d081d9cf64bf984cb94bcef6fec2881b1
3
  size 343230128
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 60.0,
3
  "total_flos": 1.904477274611122e+19,
4
- "train_loss": 0.14902369955088943,
5
- "train_runtime": 2785.8591,
6
- "train_samples_per_second": 88.217,
7
- "train_steps_per_second": 1.378
8
  }
 
1
  {
2
  "epoch": 60.0,
3
  "total_flos": 1.904477274611122e+19,
4
+ "train_loss": 0.0010260362852325974,
5
+ "train_runtime": 2806.5312,
6
+ "train_samples_per_second": 87.567,
7
+ "train_steps_per_second": 1.368
8
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.16363227367401123,
3
- "best_model_checkpoint": "./vit-base-brain-tumor-detection3/checkpoint-2500",
4
  "epoch": 60.0,
5
  "eval_steps": 500,
6
  "global_step": 3840,
@@ -10,607 +10,607 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.78125,
13
- "grad_norm": 1.8165547847747803,
14
- "learning_rate": 1.5e-06,
15
- "loss": 1.3842,
16
  "step": 50
17
  },
18
  {
19
  "epoch": 1.5625,
20
- "grad_norm": 1.1670353412628174,
21
- "learning_rate": 3e-06,
22
- "loss": 1.2128,
23
  "step": 100
24
  },
25
  {
26
  "epoch": 2.34375,
27
- "grad_norm": 1.4207779169082642,
28
- "learning_rate": 4.5e-06,
29
- "loss": 1.0365,
30
  "step": 150
31
  },
32
  {
33
  "epoch": 3.125,
34
- "grad_norm": 3.359316825866699,
35
- "learning_rate": 6e-06,
36
- "loss": 0.9528,
37
  "step": 200
38
  },
39
  {
40
  "epoch": 3.90625,
41
- "grad_norm": 1.4744346141815186,
42
- "learning_rate": 7.5e-06,
43
- "loss": 0.8693,
44
  "step": 250
45
  },
46
  {
47
  "epoch": 4.6875,
48
- "grad_norm": 2.3251893520355225,
49
- "learning_rate": 9e-06,
50
- "loss": 0.8014,
51
  "step": 300
52
  },
53
  {
54
  "epoch": 5.46875,
55
- "grad_norm": 3.080569267272949,
56
- "learning_rate": 1.05e-05,
57
- "loss": 0.7156,
58
  "step": 350
59
  },
60
  {
61
  "epoch": 6.25,
62
- "grad_norm": 2.417529582977295,
63
- "learning_rate": 1.2e-05,
64
- "loss": 0.6215,
65
  "step": 400
66
  },
67
  {
68
  "epoch": 7.03125,
69
- "grad_norm": 2.6332898139953613,
70
- "learning_rate": 1.3500000000000001e-05,
71
- "loss": 0.5781,
72
  "step": 450
73
  },
74
  {
75
  "epoch": 7.8125,
76
- "grad_norm": 4.613404750823975,
77
- "learning_rate": 1.5e-05,
78
- "loss": 0.4758,
79
  "step": 500
80
  },
81
  {
82
  "epoch": 7.8125,
83
- "eval_accuracy": 0.7939453125,
84
- "eval_loss": 0.5694867372512817,
85
- "eval_runtime": 6.0709,
86
- "eval_samples_per_second": 168.674,
87
- "eval_steps_per_second": 21.084,
88
  "step": 500
89
  },
90
  {
91
  "epoch": 8.59375,
92
- "grad_norm": 3.2593910694122314,
93
- "learning_rate": 1.65e-05,
94
- "loss": 0.3951,
95
  "step": 550
96
  },
97
  {
98
  "epoch": 9.375,
99
- "grad_norm": 4.835229396820068,
100
- "learning_rate": 1.8e-05,
101
- "loss": 0.278,
102
  "step": 600
103
  },
104
  {
105
  "epoch": 10.15625,
106
- "grad_norm": 2.339912176132202,
107
- "learning_rate": 1.95e-05,
108
- "loss": 0.2564,
109
  "step": 650
110
  },
111
  {
112
  "epoch": 10.9375,
113
- "grad_norm": 1.6154627799987793,
114
- "learning_rate": 2.1e-05,
115
- "loss": 0.2094,
116
  "step": 700
117
  },
118
  {
119
  "epoch": 11.71875,
120
- "grad_norm": 3.8827977180480957,
121
- "learning_rate": 2.25e-05,
122
- "loss": 0.1495,
123
  "step": 750
124
  },
125
  {
126
  "epoch": 12.5,
127
- "grad_norm": 6.941565036773682,
128
- "learning_rate": 2.4e-05,
129
- "loss": 0.1284,
130
  "step": 800
131
  },
132
  {
133
  "epoch": 13.28125,
134
- "grad_norm": 2.415818691253662,
135
- "learning_rate": 2.55e-05,
136
- "loss": 0.1136,
137
  "step": 850
138
  },
139
  {
140
  "epoch": 14.0625,
141
- "grad_norm": 1.2879343032836914,
142
- "learning_rate": 2.7000000000000002e-05,
143
- "loss": 0.0863,
144
  "step": 900
145
  },
146
  {
147
  "epoch": 14.84375,
148
- "grad_norm": 14.791470527648926,
149
- "learning_rate": 2.8499999999999998e-05,
150
- "loss": 0.1258,
151
  "step": 950
152
  },
153
  {
154
  "epoch": 15.625,
155
- "grad_norm": 13.995768547058105,
156
- "learning_rate": 3e-05,
157
- "loss": 0.1137,
158
  "step": 1000
159
  },
160
  {
161
  "epoch": 15.625,
162
- "eval_accuracy": 0.87109375,
163
- "eval_loss": 0.43984174728393555,
164
- "eval_runtime": 5.4017,
165
- "eval_samples_per_second": 189.569,
166
- "eval_steps_per_second": 23.696,
167
  "step": 1000
168
  },
169
  {
170
  "epoch": 16.40625,
171
- "grad_norm": 24.01154327392578,
172
- "learning_rate": 2.989554317548747e-05,
173
- "loss": 0.0756,
174
  "step": 1050
175
  },
176
  {
177
  "epoch": 17.1875,
178
- "grad_norm": 0.24175554513931274,
179
- "learning_rate": 2.979108635097493e-05,
180
- "loss": 0.0933,
181
  "step": 1100
182
  },
183
  {
184
  "epoch": 17.96875,
185
- "grad_norm": 0.25062623620033264,
186
- "learning_rate": 2.96866295264624e-05,
187
- "loss": 0.0676,
188
  "step": 1150
189
  },
190
  {
191
  "epoch": 18.75,
192
- "grad_norm": 0.775455892086029,
193
- "learning_rate": 2.958217270194986e-05,
194
- "loss": 0.081,
195
  "step": 1200
196
  },
197
  {
198
  "epoch": 19.53125,
199
- "grad_norm": 0.12767118215560913,
200
- "learning_rate": 2.947771587743733e-05,
201
- "loss": 0.0756,
202
  "step": 1250
203
  },
204
  {
205
  "epoch": 20.3125,
206
- "grad_norm": 0.17824232578277588,
207
- "learning_rate": 2.937325905292479e-05,
208
- "loss": 0.0571,
209
  "step": 1300
210
  },
211
  {
212
  "epoch": 21.09375,
213
- "grad_norm": 0.1250143200159073,
214
- "learning_rate": 2.926880222841226e-05,
215
- "loss": 0.0462,
216
  "step": 1350
217
  },
218
  {
219
  "epoch": 21.875,
220
- "grad_norm": 1.6465438604354858,
221
- "learning_rate": 2.916434540389972e-05,
222
- "loss": 0.0346,
223
  "step": 1400
224
  },
225
  {
226
  "epoch": 22.65625,
227
- "grad_norm": 9.335956573486328,
228
- "learning_rate": 2.905988857938719e-05,
229
- "loss": 0.046,
230
  "step": 1450
231
  },
232
  {
233
  "epoch": 23.4375,
234
- "grad_norm": 0.20395609736442566,
235
- "learning_rate": 2.895543175487465e-05,
236
- "loss": 0.0466,
237
  "step": 1500
238
  },
239
  {
240
  "epoch": 23.4375,
241
- "eval_accuracy": 0.90234375,
242
- "eval_loss": 0.4086352288722992,
243
- "eval_runtime": 5.3678,
244
- "eval_samples_per_second": 190.768,
245
- "eval_steps_per_second": 23.846,
246
  "step": 1500
247
  },
248
  {
249
  "epoch": 24.21875,
250
- "grad_norm": 0.06884710490703583,
251
- "learning_rate": 2.885097493036212e-05,
252
- "loss": 0.044,
253
  "step": 1550
254
  },
255
  {
256
  "epoch": 25.0,
257
- "grad_norm": 0.2089349329471588,
258
- "learning_rate": 2.8746518105849583e-05,
259
- "loss": 0.0382,
260
  "step": 1600
261
  },
262
  {
263
  "epoch": 25.78125,
264
- "grad_norm": 0.09039656072854996,
265
- "learning_rate": 2.8642061281337048e-05,
266
- "loss": 0.0279,
267
  "step": 1650
268
  },
269
  {
270
  "epoch": 26.5625,
271
- "grad_norm": 0.05787573382258415,
272
- "learning_rate": 2.8537604456824513e-05,
273
- "loss": 0.0257,
274
  "step": 1700
275
  },
276
  {
277
  "epoch": 27.34375,
278
- "grad_norm": 0.04917814955115318,
279
- "learning_rate": 2.8433147632311978e-05,
280
- "loss": 0.0229,
281
  "step": 1750
282
  },
283
  {
284
  "epoch": 28.125,
285
- "grad_norm": 0.06560017913579941,
286
- "learning_rate": 2.8328690807799443e-05,
287
- "loss": 0.0191,
288
  "step": 1800
289
  },
290
  {
291
  "epoch": 28.90625,
292
- "grad_norm": 0.03920649737119675,
293
- "learning_rate": 2.8224233983286908e-05,
294
- "loss": 0.0114,
295
  "step": 1850
296
  },
297
  {
298
  "epoch": 29.6875,
299
- "grad_norm": 0.9064533114433289,
300
- "learning_rate": 2.8119777158774373e-05,
301
- "loss": 0.0165,
302
  "step": 1900
303
  },
304
  {
305
  "epoch": 30.46875,
306
- "grad_norm": 0.03491423651576042,
307
- "learning_rate": 2.8015320334261838e-05,
308
- "loss": 0.0078,
309
  "step": 1950
310
  },
311
  {
312
  "epoch": 31.25,
313
- "grad_norm": 0.029768764972686768,
314
- "learning_rate": 2.7910863509749306e-05,
315
- "loss": 0.0086,
316
  "step": 2000
317
  },
318
  {
319
  "epoch": 31.25,
320
- "eval_accuracy": 0.9462890625,
321
- "eval_loss": 0.2432650774717331,
322
- "eval_runtime": 5.6297,
323
- "eval_samples_per_second": 181.891,
324
- "eval_steps_per_second": 22.736,
325
  "step": 2000
326
  },
327
  {
328
  "epoch": 32.03125,
329
- "grad_norm": 0.02953988127410412,
330
- "learning_rate": 2.780640668523677e-05,
331
- "loss": 0.0062,
332
  "step": 2050
333
  },
334
  {
335
  "epoch": 32.8125,
336
- "grad_norm": 0.025788016617298126,
337
- "learning_rate": 2.7701949860724236e-05,
338
- "loss": 0.0057,
339
  "step": 2100
340
  },
341
  {
342
  "epoch": 33.59375,
343
- "grad_norm": 0.03053743578493595,
344
- "learning_rate": 2.75974930362117e-05,
345
- "loss": 0.0053,
346
  "step": 2150
347
  },
348
  {
349
  "epoch": 34.375,
350
- "grad_norm": 0.021916454657912254,
351
- "learning_rate": 2.7493036211699166e-05,
352
- "loss": 0.0049,
353
  "step": 2200
354
  },
355
  {
356
  "epoch": 35.15625,
357
- "grad_norm": 0.021212272346019745,
358
- "learning_rate": 2.738857938718663e-05,
359
- "loss": 0.0045,
360
  "step": 2250
361
  },
362
  {
363
  "epoch": 35.9375,
364
- "grad_norm": 0.020344305783510208,
365
- "learning_rate": 2.7284122562674096e-05,
366
- "loss": 0.0043,
367
  "step": 2300
368
  },
369
  {
370
  "epoch": 36.71875,
371
- "grad_norm": 0.018891936168074608,
372
- "learning_rate": 2.717966573816156e-05,
373
- "loss": 0.004,
374
  "step": 2350
375
  },
376
  {
377
  "epoch": 37.5,
378
- "grad_norm": 0.017234979197382927,
379
- "learning_rate": 2.7075208913649025e-05,
380
- "loss": 0.0038,
381
  "step": 2400
382
  },
383
  {
384
  "epoch": 38.28125,
385
- "grad_norm": 0.016466792672872543,
386
- "learning_rate": 2.697075208913649e-05,
387
- "loss": 0.0035,
388
  "step": 2450
389
  },
390
  {
391
  "epoch": 39.0625,
392
- "grad_norm": 0.017314311116933823,
393
- "learning_rate": 2.6866295264623955e-05,
394
- "loss": 0.0034,
395
  "step": 2500
396
  },
397
  {
398
  "epoch": 39.0625,
399
- "eval_accuracy": 0.96875,
400
- "eval_loss": 0.16363227367401123,
401
- "eval_runtime": 6.1162,
402
- "eval_samples_per_second": 167.424,
403
- "eval_steps_per_second": 20.928,
404
  "step": 2500
405
  },
406
  {
407
  "epoch": 39.84375,
408
- "grad_norm": 0.01642526686191559,
409
- "learning_rate": 2.676183844011142e-05,
410
- "loss": 0.0032,
411
  "step": 2550
412
  },
413
  {
414
  "epoch": 40.625,
415
- "grad_norm": 0.013880325481295586,
416
- "learning_rate": 2.665738161559889e-05,
417
- "loss": 0.003,
418
  "step": 2600
419
  },
420
  {
421
  "epoch": 41.40625,
422
- "grad_norm": 0.01303493045270443,
423
- "learning_rate": 2.655292479108635e-05,
424
- "loss": 0.0028,
425
  "step": 2650
426
  },
427
  {
428
  "epoch": 42.1875,
429
- "grad_norm": 0.013205628842115402,
430
- "learning_rate": 2.644846796657382e-05,
431
- "loss": 0.0027,
432
  "step": 2700
433
  },
434
  {
435
  "epoch": 42.96875,
436
- "grad_norm": 0.011895690113306046,
437
- "learning_rate": 2.634401114206128e-05,
438
- "loss": 0.0026,
439
  "step": 2750
440
  },
441
  {
442
  "epoch": 43.75,
443
- "grad_norm": 0.011271192692220211,
444
- "learning_rate": 2.6239554317548748e-05,
445
- "loss": 0.0024,
446
  "step": 2800
447
  },
448
  {
449
  "epoch": 44.53125,
450
- "grad_norm": 0.011179978027939796,
451
- "learning_rate": 2.613509749303621e-05,
452
- "loss": 0.0024,
453
  "step": 2850
454
  },
455
  {
456
  "epoch": 45.3125,
457
- "grad_norm": 0.010614069178700447,
458
- "learning_rate": 2.6030640668523678e-05,
459
- "loss": 0.0022,
460
  "step": 2900
461
  },
462
  {
463
  "epoch": 46.09375,
464
- "grad_norm": 0.009998313151299953,
465
- "learning_rate": 2.5926183844011143e-05,
466
- "loss": 0.0021,
467
  "step": 2950
468
  },
469
  {
470
  "epoch": 46.875,
471
- "grad_norm": 0.009446458891034126,
472
- "learning_rate": 2.5821727019498608e-05,
473
- "loss": 0.002,
474
  "step": 3000
475
  },
476
  {
477
  "epoch": 46.875,
478
- "eval_accuracy": 0.970703125,
479
- "eval_loss": 0.17385585606098175,
480
- "eval_runtime": 5.8806,
481
- "eval_samples_per_second": 174.131,
482
- "eval_steps_per_second": 21.766,
483
  "step": 3000
484
  },
485
  {
486
  "epoch": 47.65625,
487
- "grad_norm": 0.009701834060251713,
488
- "learning_rate": 2.5717270194986073e-05,
489
- "loss": 0.002,
490
  "step": 3050
491
  },
492
  {
493
  "epoch": 48.4375,
494
- "grad_norm": 0.009180723689496517,
495
- "learning_rate": 2.5612813370473538e-05,
496
- "loss": 0.0019,
497
  "step": 3100
498
  },
499
  {
500
  "epoch": 49.21875,
501
- "grad_norm": 0.008364294655621052,
502
- "learning_rate": 2.5508356545961006e-05,
503
- "loss": 0.0018,
504
  "step": 3150
505
  },
506
  {
507
  "epoch": 50.0,
508
- "grad_norm": 0.008195644244551659,
509
- "learning_rate": 2.5403899721448468e-05,
510
- "loss": 0.0017,
511
  "step": 3200
512
  },
513
  {
514
  "epoch": 50.78125,
515
- "grad_norm": 0.008112799376249313,
516
- "learning_rate": 2.5299442896935936e-05,
517
- "loss": 0.0016,
518
  "step": 3250
519
  },
520
  {
521
  "epoch": 51.5625,
522
- "grad_norm": 0.007568549830466509,
523
- "learning_rate": 2.5194986072423398e-05,
524
- "loss": 0.0016,
525
  "step": 3300
526
  },
527
  {
528
  "epoch": 52.34375,
529
- "grad_norm": 0.007013232912868261,
530
- "learning_rate": 2.5090529247910866e-05,
531
- "loss": 0.0015,
532
  "step": 3350
533
  },
534
  {
535
  "epoch": 53.125,
536
- "grad_norm": 0.006883300840854645,
537
- "learning_rate": 2.4986072423398327e-05,
538
- "loss": 0.0014,
539
  "step": 3400
540
  },
541
  {
542
  "epoch": 53.90625,
543
- "grad_norm": 0.006791520398110151,
544
- "learning_rate": 2.4881615598885796e-05,
545
- "loss": 0.0014,
546
  "step": 3450
547
  },
548
  {
549
  "epoch": 54.6875,
550
- "grad_norm": 0.008187716826796532,
551
- "learning_rate": 2.4777158774373257e-05,
552
- "loss": 0.0014,
553
  "step": 3500
554
  },
555
  {
556
  "epoch": 54.6875,
557
- "eval_accuracy": 0.970703125,
558
- "eval_loss": 0.1817573606967926,
559
- "eval_runtime": 5.8237,
560
- "eval_samples_per_second": 175.833,
561
- "eval_steps_per_second": 21.979,
562
  "step": 3500
563
  },
564
  {
565
  "epoch": 55.46875,
566
- "grad_norm": 0.0065140994265675545,
567
- "learning_rate": 2.4672701949860726e-05,
568
- "loss": 0.0013,
569
  "step": 3550
570
  },
571
  {
572
  "epoch": 56.25,
573
- "grad_norm": 0.007060408126562834,
574
- "learning_rate": 2.456824512534819e-05,
575
- "loss": 0.0012,
576
  "step": 3600
577
  },
578
  {
579
  "epoch": 57.03125,
580
- "grad_norm": 0.0056546530686318874,
581
- "learning_rate": 2.4463788300835655e-05,
582
- "loss": 0.0012,
583
  "step": 3650
584
  },
585
  {
586
  "epoch": 57.8125,
587
- "grad_norm": 0.006707963068038225,
588
- "learning_rate": 2.435933147632312e-05,
589
- "loss": 0.0012,
590
  "step": 3700
591
  },
592
  {
593
  "epoch": 58.59375,
594
- "grad_norm": 0.006301193963736296,
595
- "learning_rate": 2.4254874651810585e-05,
596
- "loss": 0.0011,
597
  "step": 3750
598
  },
599
  {
600
  "epoch": 59.375,
601
- "grad_norm": 0.005066621117293835,
602
- "learning_rate": 2.415041782729805e-05,
603
- "loss": 0.0011,
604
  "step": 3800
605
  },
606
  {
607
  "epoch": 60.0,
608
  "step": 3840,
609
  "total_flos": 1.904477274611122e+19,
610
- "train_loss": 0.14902369955088943,
611
- "train_runtime": 2785.8591,
612
- "train_samples_per_second": 88.217,
613
- "train_steps_per_second": 1.378
614
  }
615
  ],
616
  "logging_steps": 50,
 
1
  {
2
+ "best_metric": 0.1927209496498108,
3
+ "best_model_checkpoint": "./vit-base-brain-tumor-detection3/checkpoint-1500",
4
  "epoch": 60.0,
5
  "eval_steps": 500,
6
  "global_step": 3840,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.78125,
13
+ "grad_norm": 0.014016176573932171,
14
+ "learning_rate": 5.000000000000001e-07,
15
+ "loss": 0.0028,
16
  "step": 50
17
  },
18
  {
19
  "epoch": 1.5625,
20
+ "grad_norm": 0.013060510158538818,
21
+ "learning_rate": 1.0000000000000002e-06,
22
+ "loss": 0.0028,
23
  "step": 100
24
  },
25
  {
26
  "epoch": 2.34375,
27
+ "grad_norm": 0.013337934389710426,
28
+ "learning_rate": 1.5e-06,
29
+ "loss": 0.0028,
30
  "step": 150
31
  },
32
  {
33
  "epoch": 3.125,
34
+ "grad_norm": 0.013658256269991398,
35
+ "learning_rate": 2.0000000000000003e-06,
36
+ "loss": 0.0027,
37
  "step": 200
38
  },
39
  {
40
  "epoch": 3.90625,
41
+ "grad_norm": 0.012454299256205559,
42
+ "learning_rate": 2.5e-06,
43
+ "loss": 0.0026,
44
  "step": 250
45
  },
46
  {
47
  "epoch": 4.6875,
48
+ "grad_norm": 0.013191607780754566,
49
+ "learning_rate": 3e-06,
50
+ "loss": 0.0026,
51
  "step": 300
52
  },
53
  {
54
  "epoch": 5.46875,
55
+ "grad_norm": 0.0118486937135458,
56
+ "learning_rate": 3.5e-06,
57
+ "loss": 0.0025,
58
  "step": 350
59
  },
60
  {
61
  "epoch": 6.25,
62
+ "grad_norm": 0.0345335379242897,
63
+ "learning_rate": 4.000000000000001e-06,
64
+ "loss": 0.0024,
65
  "step": 400
66
  },
67
  {
68
  "epoch": 7.03125,
69
+ "grad_norm": 0.013763554394245148,
70
+ "learning_rate": 4.5e-06,
71
+ "loss": 0.0137,
72
  "step": 450
73
  },
74
  {
75
  "epoch": 7.8125,
76
+ "grad_norm": 0.014735482633113861,
77
+ "learning_rate": 5e-06,
78
+ "loss": 0.0048,
79
  "step": 500
80
  },
81
  {
82
  "epoch": 7.8125,
83
+ "eval_accuracy": 0.947265625,
84
+ "eval_loss": 0.2336536943912506,
85
+ "eval_runtime": 5.8997,
86
+ "eval_samples_per_second": 173.569,
87
+ "eval_steps_per_second": 21.696,
88
  "step": 500
89
  },
90
  {
91
  "epoch": 8.59375,
92
+ "grad_norm": 0.013056355528533459,
93
+ "learning_rate": 5.500000000000001e-06,
94
+ "loss": 0.0022,
95
  "step": 550
96
  },
97
  {
98
  "epoch": 9.375,
99
+ "grad_norm": 0.009187333285808563,
100
+ "learning_rate": 6e-06,
101
+ "loss": 0.0021,
102
  "step": 600
103
  },
104
  {
105
  "epoch": 10.15625,
106
+ "grad_norm": 0.0087556978687644,
107
+ "learning_rate": 6.5000000000000004e-06,
108
+ "loss": 0.0019,
109
  "step": 650
110
  },
111
  {
112
  "epoch": 10.9375,
113
+ "grad_norm": 0.008410913869738579,
114
+ "learning_rate": 7e-06,
115
+ "loss": 0.0018,
116
  "step": 700
117
  },
118
  {
119
  "epoch": 11.71875,
120
+ "grad_norm": 0.008203917182981968,
121
+ "learning_rate": 7.500000000000001e-06,
122
+ "loss": 0.0017,
123
  "step": 750
124
  },
125
  {
126
  "epoch": 12.5,
127
+ "grad_norm": 0.007246215827763081,
128
+ "learning_rate": 8.000000000000001e-06,
129
+ "loss": 0.0016,
130
  "step": 800
131
  },
132
  {
133
  "epoch": 13.28125,
134
+ "grad_norm": 0.006727874744683504,
135
+ "learning_rate": 8.5e-06,
136
+ "loss": 0.0015,
137
  "step": 850
138
  },
139
  {
140
  "epoch": 14.0625,
141
+ "grad_norm": 0.007697463966906071,
142
+ "learning_rate": 9e-06,
143
+ "loss": 0.0014,
144
  "step": 900
145
  },
146
  {
147
  "epoch": 14.84375,
148
+ "grad_norm": 0.005949131678789854,
149
+ "learning_rate": 9.5e-06,
150
+ "loss": 0.0013,
151
  "step": 950
152
  },
153
  {
154
  "epoch": 15.625,
155
+ "grad_norm": 0.0054717655293643475,
156
+ "learning_rate": 1e-05,
157
+ "loss": 0.0012,
158
  "step": 1000
159
  },
160
  {
161
  "epoch": 15.625,
162
+ "eval_accuracy": 0.953125,
163
+ "eval_loss": 0.19501826167106628,
164
+ "eval_runtime": 5.9147,
165
+ "eval_samples_per_second": 173.128,
166
+ "eval_steps_per_second": 21.641,
167
  "step": 1000
168
  },
169
  {
170
  "epoch": 16.40625,
171
+ "grad_norm": 0.005219893530011177,
172
+ "learning_rate": 9.965181058495823e-06,
173
+ "loss": 0.0011,
174
  "step": 1050
175
  },
176
  {
177
  "epoch": 17.1875,
178
+ "grad_norm": 0.004757468122988939,
179
+ "learning_rate": 9.930362116991644e-06,
180
+ "loss": 0.0011,
181
  "step": 1100
182
  },
183
  {
184
  "epoch": 17.96875,
185
+ "grad_norm": 0.004971610382199287,
186
+ "learning_rate": 9.895543175487466e-06,
187
+ "loss": 0.001,
188
  "step": 1150
189
  },
190
  {
191
  "epoch": 18.75,
192
+ "grad_norm": 0.0046828743070364,
193
+ "learning_rate": 9.860724233983288e-06,
194
+ "loss": 0.0009,
195
  "step": 1200
196
  },
197
  {
198
  "epoch": 19.53125,
199
+ "grad_norm": 0.004280711989849806,
200
+ "learning_rate": 9.82590529247911e-06,
201
+ "loss": 0.0009,
202
  "step": 1250
203
  },
204
  {
205
  "epoch": 20.3125,
206
+ "grad_norm": 0.004425444174557924,
207
+ "learning_rate": 9.79108635097493e-06,
208
+ "loss": 0.0008,
209
  "step": 1300
210
  },
211
  {
212
  "epoch": 21.09375,
213
+ "grad_norm": 0.0037732652854174376,
214
+ "learning_rate": 9.756267409470753e-06,
215
+ "loss": 0.0008,
216
  "step": 1350
217
  },
218
  {
219
  "epoch": 21.875,
220
+ "grad_norm": 0.0033754699397832155,
221
+ "learning_rate": 9.721448467966575e-06,
222
+ "loss": 0.0007,
223
  "step": 1400
224
  },
225
  {
226
  "epoch": 22.65625,
227
+ "grad_norm": 0.003637350630015135,
228
+ "learning_rate": 9.686629526462397e-06,
229
+ "loss": 0.0007,
230
  "step": 1450
231
  },
232
  {
233
  "epoch": 23.4375,
234
+ "grad_norm": 0.003412399208173156,
235
+ "learning_rate": 9.651810584958218e-06,
236
+ "loss": 0.0007,
237
  "step": 1500
238
  },
239
  {
240
  "epoch": 23.4375,
241
+ "eval_accuracy": 0.9580078125,
242
+ "eval_loss": 0.1927209496498108,
243
+ "eval_runtime": 5.2401,
244
+ "eval_samples_per_second": 195.416,
245
+ "eval_steps_per_second": 24.427,
246
  "step": 1500
247
  },
248
  {
249
  "epoch": 24.21875,
250
+ "grad_norm": 0.002839893801137805,
251
+ "learning_rate": 9.61699164345404e-06,
252
+ "loss": 0.0006,
253
  "step": 1550
254
  },
255
  {
256
  "epoch": 25.0,
257
+ "grad_norm": 0.0031008291989564896,
258
+ "learning_rate": 9.58217270194986e-06,
259
+ "loss": 0.0006,
260
  "step": 1600
261
  },
262
  {
263
  "epoch": 25.78125,
264
+ "grad_norm": 0.002541514113545418,
265
+ "learning_rate": 9.547353760445683e-06,
266
+ "loss": 0.0006,
267
  "step": 1650
268
  },
269
  {
270
  "epoch": 26.5625,
271
+ "grad_norm": 0.0025104843080043793,
272
+ "learning_rate": 9.512534818941505e-06,
273
+ "loss": 0.0005,
274
  "step": 1700
275
  },
276
  {
277
  "epoch": 27.34375,
278
+ "grad_norm": 0.0023143806029111147,
279
+ "learning_rate": 9.477715877437327e-06,
280
+ "loss": 0.0005,
281
  "step": 1750
282
  },
283
  {
284
  "epoch": 28.125,
285
+ "grad_norm": 0.0023780674673616886,
286
+ "learning_rate": 9.442896935933148e-06,
287
+ "loss": 0.0005,
288
  "step": 1800
289
  },
290
  {
291
  "epoch": 28.90625,
292
+ "grad_norm": 0.002274406375363469,
293
+ "learning_rate": 9.40807799442897e-06,
294
+ "loss": 0.0005,
295
  "step": 1850
296
  },
297
  {
298
  "epoch": 29.6875,
299
+ "grad_norm": 0.002076026052236557,
300
+ "learning_rate": 9.373259052924792e-06,
301
+ "loss": 0.0005,
302
  "step": 1900
303
  },
304
  {
305
  "epoch": 30.46875,
306
+ "grad_norm": 0.0024436817038804293,
307
+ "learning_rate": 9.338440111420614e-06,
308
+ "loss": 0.0004,
309
  "step": 1950
310
  },
311
  {
312
  "epoch": 31.25,
313
+ "grad_norm": 0.0018446892499923706,
314
+ "learning_rate": 9.303621169916436e-06,
315
+ "loss": 0.0004,
316
  "step": 2000
317
  },
318
  {
319
  "epoch": 31.25,
320
+ "eval_accuracy": 0.962890625,
321
+ "eval_loss": 0.1969820261001587,
322
+ "eval_runtime": 5.2387,
323
+ "eval_samples_per_second": 195.469,
324
+ "eval_steps_per_second": 24.434,
325
  "step": 2000
326
  },
327
  {
328
  "epoch": 32.03125,
329
+ "grad_norm": 0.0020159403793513775,
330
+ "learning_rate": 9.268802228412257e-06,
331
+ "loss": 0.0004,
332
  "step": 2050
333
  },
334
  {
335
  "epoch": 32.8125,
336
+ "grad_norm": 0.0019202978583052754,
337
+ "learning_rate": 9.23398328690808e-06,
338
+ "loss": 0.0004,
339
  "step": 2100
340
  },
341
  {
342
  "epoch": 33.59375,
343
+ "grad_norm": 0.0030681404750794172,
344
+ "learning_rate": 9.1991643454039e-06,
345
+ "loss": 0.0004,
346
  "step": 2150
347
  },
348
  {
349
  "epoch": 34.375,
350
+ "grad_norm": 0.0016341815935447812,
351
+ "learning_rate": 9.164345403899722e-06,
352
+ "loss": 0.0004,
353
  "step": 2200
354
  },
355
  {
356
  "epoch": 35.15625,
357
+ "grad_norm": 0.0016691142227500677,
358
+ "learning_rate": 9.129526462395544e-06,
359
+ "loss": 0.0003,
360
  "step": 2250
361
  },
362
  {
363
  "epoch": 35.9375,
364
+ "grad_norm": 0.0017921621911227703,
365
+ "learning_rate": 9.094707520891366e-06,
366
+ "loss": 0.0003,
367
  "step": 2300
368
  },
369
  {
370
  "epoch": 36.71875,
371
+ "grad_norm": 0.00160547427367419,
372
+ "learning_rate": 9.059888579387187e-06,
373
+ "loss": 0.0003,
374
  "step": 2350
375
  },
376
  {
377
  "epoch": 37.5,
378
+ "grad_norm": 0.0014217059360817075,
379
+ "learning_rate": 9.025069637883009e-06,
380
+ "loss": 0.0003,
381
  "step": 2400
382
  },
383
  {
384
  "epoch": 38.28125,
385
+ "grad_norm": 0.001448018359951675,
386
+ "learning_rate": 8.990250696378831e-06,
387
+ "loss": 0.0003,
388
  "step": 2450
389
  },
390
  {
391
  "epoch": 39.0625,
392
+ "grad_norm": 0.0017675248673185706,
393
+ "learning_rate": 8.955431754874653e-06,
394
+ "loss": 0.0003,
395
  "step": 2500
396
  },
397
  {
398
  "epoch": 39.0625,
399
+ "eval_accuracy": 0.962890625,
400
+ "eval_loss": 0.20403626561164856,
401
+ "eval_runtime": 5.1962,
402
+ "eval_samples_per_second": 197.067,
403
+ "eval_steps_per_second": 24.633,
404
  "step": 2500
405
  },
406
  {
407
  "epoch": 39.84375,
408
+ "grad_norm": 0.0017623680178076029,
409
+ "learning_rate": 8.920612813370474e-06,
410
+ "loss": 0.0003,
411
  "step": 2550
412
  },
413
  {
414
  "epoch": 40.625,
415
+ "grad_norm": 0.0011810092255473137,
416
+ "learning_rate": 8.885793871866296e-06,
417
+ "loss": 0.0003,
418
  "step": 2600
419
  },
420
  {
421
  "epoch": 41.40625,
422
+ "grad_norm": 0.001152553828433156,
423
+ "learning_rate": 8.850974930362117e-06,
424
+ "loss": 0.0003,
425
  "step": 2650
426
  },
427
  {
428
  "epoch": 42.1875,
429
+ "grad_norm": 0.0012170104309916496,
430
+ "learning_rate": 8.816155988857939e-06,
431
+ "loss": 0.0003,
432
  "step": 2700
433
  },
434
  {
435
  "epoch": 42.96875,
436
+ "grad_norm": 0.0010642099659889936,
437
+ "learning_rate": 8.781337047353761e-06,
438
+ "loss": 0.0002,
439
  "step": 2750
440
  },
441
  {
442
  "epoch": 43.75,
443
+ "grad_norm": 0.0010462955106049776,
444
+ "learning_rate": 8.746518105849583e-06,
445
+ "loss": 0.0002,
446
  "step": 2800
447
  },
448
  {
449
  "epoch": 44.53125,
450
+ "grad_norm": 0.0010893407743424177,
451
+ "learning_rate": 8.711699164345404e-06,
452
+ "loss": 0.0002,
453
  "step": 2850
454
  },
455
  {
456
  "epoch": 45.3125,
457
+ "grad_norm": 0.0010920371860265732,
458
+ "learning_rate": 8.676880222841226e-06,
459
+ "loss": 0.0002,
460
  "step": 2900
461
  },
462
  {
463
  "epoch": 46.09375,
464
+ "grad_norm": 0.0010040885536000133,
465
+ "learning_rate": 8.642061281337048e-06,
466
+ "loss": 0.0002,
467
  "step": 2950
468
  },
469
  {
470
  "epoch": 46.875,
471
+ "grad_norm": 0.0009422469302080572,
472
+ "learning_rate": 8.60724233983287e-06,
473
+ "loss": 0.0002,
474
  "step": 3000
475
  },
476
  {
477
  "epoch": 46.875,
478
+ "eval_accuracy": 0.962890625,
479
+ "eval_loss": 0.21138769388198853,
480
+ "eval_runtime": 5.8076,
481
+ "eval_samples_per_second": 176.32,
482
+ "eval_steps_per_second": 22.04,
483
  "step": 3000
484
  },
485
  {
486
  "epoch": 47.65625,
487
+ "grad_norm": 0.0011073002824559808,
488
+ "learning_rate": 8.572423398328693e-06,
489
+ "loss": 0.0002,
490
  "step": 3050
491
  },
492
  {
493
  "epoch": 48.4375,
494
+ "grad_norm": 0.000991741195321083,
495
+ "learning_rate": 8.537604456824513e-06,
496
+ "loss": 0.0002,
497
  "step": 3100
498
  },
499
  {
500
  "epoch": 49.21875,
501
+ "grad_norm": 0.0008712337585166097,
502
+ "learning_rate": 8.502785515320335e-06,
503
+ "loss": 0.0002,
504
  "step": 3150
505
  },
506
  {
507
  "epoch": 50.0,
508
+ "grad_norm": 0.0008826220873743296,
509
+ "learning_rate": 8.467966573816156e-06,
510
+ "loss": 0.0002,
511
  "step": 3200
512
  },
513
  {
514
  "epoch": 50.78125,
515
+ "grad_norm": 0.0009179635089822114,
516
+ "learning_rate": 8.433147632311978e-06,
517
+ "loss": 0.0002,
518
  "step": 3250
519
  },
520
  {
521
  "epoch": 51.5625,
522
+ "grad_norm": 0.0008320676279254258,
523
+ "learning_rate": 8.3983286908078e-06,
524
+ "loss": 0.0002,
525
  "step": 3300
526
  },
527
  {
528
  "epoch": 52.34375,
529
+ "grad_norm": 0.0007437244057655334,
530
+ "learning_rate": 8.363509749303623e-06,
531
+ "loss": 0.0002,
532
  "step": 3350
533
  },
534
  {
535
  "epoch": 53.125,
536
+ "grad_norm": 0.0007439731853082776,
537
+ "learning_rate": 8.328690807799443e-06,
538
+ "loss": 0.0002,
539
  "step": 3400
540
  },
541
  {
542
  "epoch": 53.90625,
543
+ "grad_norm": 0.0007023093639872968,
544
+ "learning_rate": 8.293871866295265e-06,
545
+ "loss": 0.0002,
546
  "step": 3450
547
  },
548
  {
549
  "epoch": 54.6875,
550
+ "grad_norm": 0.0011785700917243958,
551
+ "learning_rate": 8.259052924791087e-06,
552
+ "loss": 0.0002,
553
  "step": 3500
554
  },
555
  {
556
  "epoch": 54.6875,
557
+ "eval_accuracy": 0.96484375,
558
+ "eval_loss": 0.217063769698143,
559
+ "eval_runtime": 5.3451,
560
+ "eval_samples_per_second": 191.577,
561
+ "eval_steps_per_second": 23.947,
562
  "step": 3500
563
  },
564
  {
565
  "epoch": 55.46875,
566
+ "grad_norm": 0.0007988162687979639,
567
+ "learning_rate": 8.22423398328691e-06,
568
+ "loss": 0.0001,
569
  "step": 3550
570
  },
571
  {
572
  "epoch": 56.25,
573
+ "grad_norm": 0.0009737128275446594,
574
+ "learning_rate": 8.18941504178273e-06,
575
+ "loss": 0.0001,
576
  "step": 3600
577
  },
578
  {
579
  "epoch": 57.03125,
580
+ "grad_norm": 0.0006344786379486322,
581
+ "learning_rate": 8.154596100278552e-06,
582
+ "loss": 0.0001,
583
  "step": 3650
584
  },
585
  {
586
  "epoch": 57.8125,
587
+ "grad_norm": 0.0009238629718311131,
588
+ "learning_rate": 8.119777158774373e-06,
589
+ "loss": 0.0001,
590
  "step": 3700
591
  },
592
  {
593
  "epoch": 58.59375,
594
+ "grad_norm": 0.000863746739923954,
595
+ "learning_rate": 8.084958217270195e-06,
596
+ "loss": 0.0001,
597
  "step": 3750
598
  },
599
  {
600
  "epoch": 59.375,
601
+ "grad_norm": 0.0005797584308311343,
602
+ "learning_rate": 8.050139275766017e-06,
603
+ "loss": 0.0001,
604
  "step": 3800
605
  },
606
  {
607
  "epoch": 60.0,
608
  "step": 3840,
609
  "total_flos": 1.904477274611122e+19,
610
+ "train_loss": 0.0010260362852325974,
611
+ "train_runtime": 2806.5312,
612
+ "train_samples_per_second": 87.567,
613
+ "train_steps_per_second": 1.368
614
  }
615
  ],
616
  "logging_steps": 50,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84173542b25ab0ff865d0db2a4e5d9b4838d6312ff6d86d248cd2347a190daf4
3
  size 5112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c6bd5896a6c68059187803c69a823434e3456afd1c4beb716bf45d591574b6c
3
  size 5112