abdulelahagr commited on
Commit
a07926d
1 Parent(s): f5076cb

initial commit

Browse files
README.md CHANGED
@@ -23,7 +23,7 @@ model-index:
23
  metrics:
24
  - name: Accuracy
25
  type: accuracy
26
- value: 0.7081218274111675
27
  ---
28
 
29
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -33,8 +33,8 @@ should probably proofread and complete it, then remove this comment. -->
33
 
34
  This model is a fine-tuned version of [google/vit-large-patch32-224-in21k](https://huggingface.co/google/vit-large-patch32-224-in21k) on the sartajbhuvaji/Brain-Tumor-Classification dataset.
35
  It achieves the following results on the evaluation set:
36
- - Loss: 1.0935
37
- - Accuracy: 0.7081
38
 
39
  ## Model description
40
 
@@ -66,13 +66,13 @@ The following hyperparameters were used during training:
66
 
67
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
68
  |:-------------:|:------:|:----:|:---------------:|:--------:|
69
- | 0.2144 | 0.5556 | 100 | 1.2679 | 0.6269 |
70
- | 0.1091 | 1.1111 | 200 | 1.0935 | 0.7081 |
71
- | 0.1078 | 1.6667 | 300 | 1.1237 | 0.7589 |
72
- | 0.016 | 2.2222 | 400 | 1.2356 | 0.7563 |
73
- | 0.0095 | 2.7778 | 500 | 1.2316 | 0.7589 |
74
- | 0.0066 | 3.3333 | 600 | 1.3165 | 0.7589 |
75
- | 0.0161 | 3.8889 | 700 | 1.3412 | 0.7614 |
76
 
77
 
78
  ### Framework versions
 
23
  metrics:
24
  - name: Accuracy
25
  type: accuracy
26
+ value: 0.7741116751269036
27
  ---
28
 
29
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
33
 
34
  This model is a fine-tuned version of [google/vit-large-patch32-224-in21k](https://huggingface.co/google/vit-large-patch32-224-in21k) on the sartajbhuvaji/Brain-Tumor-Classification dataset.
35
  It achieves the following results on the evaluation set:
36
+ - Loss: 0.9050
37
+ - Accuracy: 0.7741
38
 
39
  ## Model description
40
 
 
66
 
67
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
68
  |:-------------:|:------:|:----:|:---------------:|:--------:|
69
+ | 0.352 | 0.5556 | 100 | 1.2267 | 0.6294 |
70
+ | 0.1612 | 1.1111 | 200 | 1.0895 | 0.7538 |
71
+ | 0.0473 | 1.6667 | 300 | 0.9050 | 0.7741 |
72
+ | 0.0525 | 2.2222 | 400 | 1.0663 | 0.7690 |
73
+ | 0.0123 | 2.7778 | 500 | 1.2450 | 0.7462 |
74
+ | 0.0066 | 3.3333 | 600 | 1.1283 | 0.7817 |
75
+ | 0.0126 | 3.8889 | 700 | 1.1717 | 0.7843 |
76
 
77
 
78
  ### Framework versions
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 4.0,
3
- "eval_accuracy": 0.7081218274111675,
4
- "eval_loss": 1.0935020446777344,
5
- "eval_runtime": 7.0813,
6
- "eval_samples_per_second": 55.64,
7
- "eval_steps_per_second": 7.061,
8
  "total_flos": 3.16768696086528e+18,
9
- "train_loss": 0.15098576029348704,
10
- "train_runtime": 924.305,
11
- "train_samples_per_second": 12.42,
12
- "train_steps_per_second": 0.779
13
  }
 
1
  {
2
  "epoch": 4.0,
3
+ "eval_accuracy": 0.7741116751269036,
4
+ "eval_loss": 0.9049533605575562,
5
+ "eval_runtime": 6.5562,
6
+ "eval_samples_per_second": 60.096,
7
+ "eval_steps_per_second": 7.626,
8
  "total_flos": 3.16768696086528e+18,
9
+ "train_loss": 0.16034429804939362,
10
+ "train_runtime": 1070.1524,
11
+ "train_samples_per_second": 10.727,
12
+ "train_steps_per_second": 0.673
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.0,
3
- "eval_accuracy": 0.7081218274111675,
4
- "eval_loss": 1.0935020446777344,
5
- "eval_runtime": 7.0813,
6
- "eval_samples_per_second": 55.64,
7
- "eval_steps_per_second": 7.061
8
  }
 
1
  {
2
  "epoch": 4.0,
3
+ "eval_accuracy": 0.7741116751269036,
4
+ "eval_loss": 0.9049533605575562,
5
+ "eval_runtime": 6.5562,
6
+ "eval_samples_per_second": 60.096,
7
+ "eval_steps_per_second": 7.626
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f20682313c689880b290f25df7f7285c02008199174ac41de0f2eff2748cc23
3
  size 1222104568
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fe26c731c183216b927e1ce0b6f662aab1cefb27ec9b8a9be42fd07a7c6eb23
3
  size 1222104568
runs/May01_18-09-22_e5fd9b370bfd/events.out.tfevents.1714586963.e5fd9b370bfd.8190.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbf87b52d8a80ef0a15dba296f126c9dd2b3831e3da9ca245e22305477f7356a
3
+ size 4810
runs/May01_18-27-41_e5fd9b370bfd/events.out.tfevents.1714588064.e5fd9b370bfd.8190.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6aa0d6375f444aca8fb66acfe45a77dfcb2aad5239e72af858e31655f0d83a5a
3
+ size 8280
runs/May01_18-28-50_e5fd9b370bfd/events.out.tfevents.1714588131.e5fd9b370bfd.17713.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86a630185358ab232804b9446b10a6182b03da53b403c346476bfdef71c754d8
3
+ size 22564
runs/May01_18-28-50_e5fd9b370bfd/events.out.tfevents.1714589250.e5fd9b370bfd.17713.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73d9f02977269441d4a3178c448d5e1fc91fa22705f7b1199e2403267f68ab98
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.0,
3
  "total_flos": 3.16768696086528e+18,
4
- "train_loss": 0.15098576029348704,
5
- "train_runtime": 924.305,
6
- "train_samples_per_second": 12.42,
7
- "train_steps_per_second": 0.779
8
  }
 
1
  {
2
  "epoch": 4.0,
3
  "total_flos": 3.16768696086528e+18,
4
+ "train_loss": 0.16034429804939362,
5
+ "train_runtime": 1070.1524,
6
+ "train_samples_per_second": 10.727,
7
+ "train_steps_per_second": 0.673
8
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 1.0935020446777344,
3
- "best_model_checkpoint": "./vit-large-brain-xray/checkpoint-200",
4
  "epoch": 4.0,
5
  "eval_steps": 100,
6
  "global_step": 720,
@@ -10,579 +10,579 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.05555555555555555,
13
- "grad_norm": 1.083911657333374,
14
  "learning_rate": 0.00019722222222222225,
15
- "loss": 1.3206,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.1111111111111111,
20
- "grad_norm": 1.5310566425323486,
21
  "learning_rate": 0.00019444444444444446,
22
- "loss": 0.9361,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.16666666666666666,
27
- "grad_norm": 3.7486021518707275,
28
  "learning_rate": 0.00019166666666666667,
29
- "loss": 0.5999,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.2222222222222222,
34
- "grad_norm": 3.448930025100708,
35
  "learning_rate": 0.00018888888888888888,
36
- "loss": 0.5138,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 0.2777777777777778,
41
- "grad_norm": 1.6228857040405273,
42
  "learning_rate": 0.00018611111111111112,
43
- "loss": 0.4566,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 0.3333333333333333,
48
- "grad_norm": 2.215758800506592,
49
  "learning_rate": 0.00018333333333333334,
50
- "loss": 0.4068,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 0.3888888888888889,
55
- "grad_norm": 1.5400978326797485,
56
  "learning_rate": 0.00018055555555555557,
57
- "loss": 0.3305,
58
  "step": 70
59
  },
60
  {
61
  "epoch": 0.4444444444444444,
62
- "grad_norm": 1.4063529968261719,
63
  "learning_rate": 0.00017777777777777779,
64
- "loss": 0.4062,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 0.5,
69
- "grad_norm": 0.7729310393333435,
70
- "learning_rate": 0.00017527777777777778,
71
- "loss": 0.2615,
72
  "step": 90
73
  },
74
  {
75
  "epoch": 0.5555555555555556,
76
- "grad_norm": 6.676679611206055,
77
- "learning_rate": 0.00017250000000000002,
78
- "loss": 0.2144,
79
  "step": 100
80
  },
81
  {
82
  "epoch": 0.5555555555555556,
83
- "eval_accuracy": 0.6269035532994924,
84
- "eval_loss": 1.2678815126419067,
85
- "eval_runtime": 7.3748,
86
- "eval_samples_per_second": 53.425,
87
- "eval_steps_per_second": 6.78,
88
  "step": 100
89
  },
90
  {
91
  "epoch": 0.6111111111111112,
92
- "grad_norm": 1.6695863008499146,
93
- "learning_rate": 0.00016972222222222223,
94
- "loss": 0.3772,
95
  "step": 110
96
  },
97
  {
98
  "epoch": 0.6666666666666666,
99
- "grad_norm": 0.457354336977005,
100
- "learning_rate": 0.00016694444444444447,
101
- "loss": 0.261,
102
  "step": 120
103
  },
104
  {
105
  "epoch": 0.7222222222222222,
106
- "grad_norm": 6.663379192352295,
107
- "learning_rate": 0.00016416666666666668,
108
- "loss": 0.2832,
109
  "step": 130
110
  },
111
  {
112
  "epoch": 0.7777777777777778,
113
- "grad_norm": 0.6463492512702942,
114
- "learning_rate": 0.0001613888888888889,
115
- "loss": 0.1613,
116
  "step": 140
117
  },
118
  {
119
  "epoch": 0.8333333333333334,
120
- "grad_norm": 5.534128189086914,
121
- "learning_rate": 0.0001586111111111111,
122
- "loss": 0.452,
123
  "step": 150
124
  },
125
  {
126
  "epoch": 0.8888888888888888,
127
- "grad_norm": 1.487303614616394,
128
- "learning_rate": 0.00015583333333333334,
129
- "loss": 0.3549,
130
  "step": 160
131
  },
132
  {
133
  "epoch": 0.9444444444444444,
134
- "grad_norm": 8.732577323913574,
135
- "learning_rate": 0.00015305555555555556,
136
- "loss": 0.26,
137
  "step": 170
138
  },
139
  {
140
  "epoch": 1.0,
141
- "grad_norm": 0.3070247173309326,
142
- "learning_rate": 0.0001502777777777778,
143
- "loss": 0.1787,
144
  "step": 180
145
  },
146
  {
147
  "epoch": 1.0555555555555556,
148
- "grad_norm": 0.15718808770179749,
149
- "learning_rate": 0.0001475,
150
- "loss": 0.156,
151
  "step": 190
152
  },
153
  {
154
  "epoch": 1.1111111111111112,
155
- "grad_norm": 1.4413491487503052,
156
- "learning_rate": 0.00014472222222222222,
157
- "loss": 0.1091,
158
  "step": 200
159
  },
160
  {
161
  "epoch": 1.1111111111111112,
162
- "eval_accuracy": 0.7081218274111675,
163
- "eval_loss": 1.0935020446777344,
164
- "eval_runtime": 6.8075,
165
- "eval_samples_per_second": 57.877,
166
- "eval_steps_per_second": 7.345,
167
  "step": 200
168
  },
169
  {
170
  "epoch": 1.1666666666666667,
171
- "grad_norm": 1.1086454391479492,
172
- "learning_rate": 0.00014194444444444446,
173
- "loss": 0.1243,
174
  "step": 210
175
  },
176
  {
177
  "epoch": 1.2222222222222223,
178
- "grad_norm": 0.09483372420072556,
179
- "learning_rate": 0.00013916666666666667,
180
- "loss": 0.1712,
181
  "step": 220
182
  },
183
  {
184
  "epoch": 1.2777777777777777,
185
- "grad_norm": 2.7763671875,
186
- "learning_rate": 0.0001363888888888889,
187
- "loss": 0.2156,
188
  "step": 230
189
  },
190
  {
191
  "epoch": 1.3333333333333333,
192
- "grad_norm": 1.6603256464004517,
193
- "learning_rate": 0.00013361111111111112,
194
- "loss": 0.1186,
195
  "step": 240
196
  },
197
  {
198
  "epoch": 1.3888888888888888,
199
- "grad_norm": 5.325634479522705,
200
- "learning_rate": 0.00013083333333333333,
201
- "loss": 0.1065,
202
  "step": 250
203
  },
204
  {
205
  "epoch": 1.4444444444444444,
206
- "grad_norm": 0.07357333600521088,
207
- "learning_rate": 0.00012805555555555555,
208
- "loss": 0.1192,
209
  "step": 260
210
  },
211
  {
212
  "epoch": 1.5,
213
- "grad_norm": 3.8914055824279785,
214
- "learning_rate": 0.00012527777777777778,
215
- "loss": 0.0622,
216
  "step": 270
217
  },
218
  {
219
  "epoch": 1.5555555555555556,
220
- "grad_norm": 0.37973469495773315,
221
- "learning_rate": 0.00012250000000000002,
222
- "loss": 0.2217,
223
  "step": 280
224
  },
225
  {
226
  "epoch": 1.6111111111111112,
227
- "grad_norm": 0.08939097821712494,
228
- "learning_rate": 0.00011972222222222222,
229
- "loss": 0.1868,
230
  "step": 290
231
  },
232
  {
233
  "epoch": 1.6666666666666665,
234
- "grad_norm": 0.29244643449783325,
235
- "learning_rate": 0.00011694444444444446,
236
- "loss": 0.1078,
237
  "step": 300
238
  },
239
  {
240
  "epoch": 1.6666666666666665,
241
- "eval_accuracy": 0.7588832487309645,
242
- "eval_loss": 1.1237390041351318,
243
- "eval_runtime": 6.6691,
244
- "eval_samples_per_second": 59.078,
245
- "eval_steps_per_second": 7.497,
246
  "step": 300
247
  },
248
  {
249
  "epoch": 1.7222222222222223,
250
- "grad_norm": 2.0514800548553467,
251
- "learning_rate": 0.00011416666666666667,
252
- "loss": 0.0995,
253
  "step": 310
254
  },
255
  {
256
  "epoch": 1.7777777777777777,
257
- "grad_norm": 0.3739979863166809,
258
- "learning_rate": 0.0001113888888888889,
259
- "loss": 0.0752,
260
  "step": 320
261
  },
262
  {
263
  "epoch": 1.8333333333333335,
264
- "grad_norm": 0.07262101769447327,
265
- "learning_rate": 0.00010861111111111111,
266
- "loss": 0.1004,
267
  "step": 330
268
  },
269
  {
270
  "epoch": 1.8888888888888888,
271
- "grad_norm": 1.6550071239471436,
272
- "learning_rate": 0.00010583333333333334,
273
- "loss": 0.0593,
274
  "step": 340
275
  },
276
  {
277
  "epoch": 1.9444444444444444,
278
- "grad_norm": 1.8396694660186768,
279
- "learning_rate": 0.00010305555555555555,
280
- "loss": 0.1425,
281
  "step": 350
282
  },
283
  {
284
  "epoch": 2.0,
285
- "grad_norm": 0.061273444443941116,
286
- "learning_rate": 0.00010027777777777779,
287
- "loss": 0.0944,
288
  "step": 360
289
  },
290
  {
291
  "epoch": 2.0555555555555554,
292
- "grad_norm": 1.0921915769577026,
293
- "learning_rate": 9.75e-05,
294
- "loss": 0.118,
295
  "step": 370
296
  },
297
  {
298
  "epoch": 2.111111111111111,
299
- "grad_norm": 0.16066594421863556,
300
- "learning_rate": 9.472222222222222e-05,
301
- "loss": 0.0479,
302
  "step": 380
303
  },
304
  {
305
  "epoch": 2.1666666666666665,
306
- "grad_norm": 0.06149543076753616,
307
- "learning_rate": 9.194444444444445e-05,
308
- "loss": 0.0161,
309
  "step": 390
310
  },
311
  {
312
  "epoch": 2.2222222222222223,
313
- "grad_norm": 0.10219839215278625,
314
- "learning_rate": 8.916666666666667e-05,
315
- "loss": 0.016,
316
  "step": 400
317
  },
318
  {
319
  "epoch": 2.2222222222222223,
320
- "eval_accuracy": 0.7563451776649747,
321
- "eval_loss": 1.235643982887268,
322
- "eval_runtime": 6.0001,
323
- "eval_samples_per_second": 65.666,
324
- "eval_steps_per_second": 8.333,
325
  "step": 400
326
  },
327
  {
328
  "epoch": 2.2777777777777777,
329
- "grad_norm": 0.36931440234184265,
330
- "learning_rate": 8.63888888888889e-05,
331
- "loss": 0.0674,
332
  "step": 410
333
  },
334
  {
335
  "epoch": 2.3333333333333335,
336
- "grad_norm": 0.05050282925367355,
337
- "learning_rate": 8.361111111111111e-05,
338
- "loss": 0.0404,
339
  "step": 420
340
  },
341
  {
342
  "epoch": 2.388888888888889,
343
- "grad_norm": 0.0446784570813179,
344
- "learning_rate": 8.083333333333334e-05,
345
- "loss": 0.0205,
346
  "step": 430
347
  },
348
  {
349
  "epoch": 2.4444444444444446,
350
- "grad_norm": 0.04102100059390068,
351
- "learning_rate": 7.805555555555556e-05,
352
- "loss": 0.0207,
353
  "step": 440
354
  },
355
  {
356
  "epoch": 2.5,
357
- "grad_norm": 4.75399112701416,
358
- "learning_rate": 7.527777777777777e-05,
359
- "loss": 0.0526,
360
  "step": 450
361
  },
362
  {
363
  "epoch": 2.5555555555555554,
364
- "grad_norm": 0.046476561576128006,
365
- "learning_rate": 7.25e-05,
366
- "loss": 0.0461,
367
  "step": 460
368
  },
369
  {
370
  "epoch": 2.611111111111111,
371
- "grad_norm": 5.1403632164001465,
372
- "learning_rate": 6.972222222222223e-05,
373
- "loss": 0.0424,
374
  "step": 470
375
  },
376
  {
377
  "epoch": 2.6666666666666665,
378
- "grad_norm": 0.04179733246564865,
379
- "learning_rate": 6.694444444444444e-05,
380
- "loss": 0.0831,
381
  "step": 480
382
  },
383
  {
384
  "epoch": 2.7222222222222223,
385
- "grad_norm": 0.0394105389714241,
386
- "learning_rate": 6.416666666666668e-05,
387
- "loss": 0.0102,
388
  "step": 490
389
  },
390
  {
391
  "epoch": 2.7777777777777777,
392
- "grad_norm": 0.039878394454717636,
393
- "learning_rate": 6.13888888888889e-05,
394
- "loss": 0.0095,
395
  "step": 500
396
  },
397
  {
398
  "epoch": 2.7777777777777777,
399
- "eval_accuracy": 0.7588832487309645,
400
- "eval_loss": 1.2315524816513062,
401
- "eval_runtime": 6.6635,
402
- "eval_samples_per_second": 59.128,
403
- "eval_steps_per_second": 7.504,
404
  "step": 500
405
  },
406
  {
407
  "epoch": 2.8333333333333335,
408
- "grad_norm": 3.2955222129821777,
409
- "learning_rate": 5.8611111111111114e-05,
410
- "loss": 0.0223,
411
  "step": 510
412
  },
413
  {
414
  "epoch": 2.888888888888889,
415
- "grad_norm": 0.03517436608672142,
416
- "learning_rate": 5.583333333333334e-05,
417
- "loss": 0.0083,
418
  "step": 520
419
  },
420
  {
421
  "epoch": 2.9444444444444446,
422
- "grad_norm": 0.03378378599882126,
423
- "learning_rate": 5.305555555555556e-05,
424
- "loss": 0.017,
425
  "step": 530
426
  },
427
  {
428
  "epoch": 3.0,
429
- "grad_norm": 0.029930729418992996,
430
- "learning_rate": 5.027777777777778e-05,
431
- "loss": 0.0076,
432
  "step": 540
433
  },
434
  {
435
  "epoch": 3.0555555555555554,
436
- "grad_norm": 0.030685801059007645,
437
- "learning_rate": 4.75e-05,
438
- "loss": 0.0082,
439
  "step": 550
440
  },
441
  {
442
  "epoch": 3.111111111111111,
443
- "grad_norm": 0.029904644936323166,
444
- "learning_rate": 4.472222222222223e-05,
445
- "loss": 0.0073,
446
  "step": 560
447
  },
448
  {
449
  "epoch": 3.1666666666666665,
450
- "grad_norm": 0.02878139540553093,
451
- "learning_rate": 4.194444444444445e-05,
452
- "loss": 0.0069,
453
  "step": 570
454
  },
455
  {
456
  "epoch": 3.2222222222222223,
457
- "grad_norm": 0.028465483337640762,
458
- "learning_rate": 3.9166666666666665e-05,
459
- "loss": 0.0067,
460
  "step": 580
461
  },
462
  {
463
  "epoch": 3.2777777777777777,
464
- "grad_norm": 0.026676874607801437,
465
- "learning_rate": 3.638888888888889e-05,
466
- "loss": 0.0067,
467
  "step": 590
468
  },
469
  {
470
  "epoch": 3.3333333333333335,
471
- "grad_norm": 0.028720058500766754,
472
- "learning_rate": 3.3611111111111116e-05,
473
  "loss": 0.0066,
474
  "step": 600
475
  },
476
  {
477
  "epoch": 3.3333333333333335,
478
- "eval_accuracy": 0.7588832487309645,
479
- "eval_loss": 1.3164998292922974,
480
- "eval_runtime": 6.9479,
481
- "eval_samples_per_second": 56.707,
482
- "eval_steps_per_second": 7.196,
483
  "step": 600
484
  },
485
  {
486
  "epoch": 3.388888888888889,
487
- "grad_norm": 0.030909936875104904,
488
- "learning_rate": 3.0833333333333335e-05,
489
- "loss": 0.0066,
490
  "step": 610
491
  },
492
  {
493
  "epoch": 3.4444444444444446,
494
- "grad_norm": 0.02741164341568947,
495
- "learning_rate": 2.8055555555555557e-05,
496
- "loss": 0.0429,
497
  "step": 620
498
  },
499
  {
500
  "epoch": 3.5,
501
- "grad_norm": 0.028826788067817688,
502
- "learning_rate": 2.527777777777778e-05,
503
- "loss": 0.0079,
504
  "step": 630
505
  },
506
  {
507
  "epoch": 3.5555555555555554,
508
- "grad_norm": 0.02735409140586853,
509
- "learning_rate": 2.25e-05,
510
- "loss": 0.0066,
511
  "step": 640
512
  },
513
  {
514
  "epoch": 3.611111111111111,
515
- "grad_norm": 0.10283248126506805,
516
- "learning_rate": 1.9722222222222224e-05,
517
- "loss": 0.0159,
518
  "step": 650
519
  },
520
  {
521
  "epoch": 3.6666666666666665,
522
- "grad_norm": 0.028924111276865005,
523
- "learning_rate": 1.6944444444444446e-05,
524
- "loss": 0.0064,
525
  "step": 660
526
  },
527
  {
528
  "epoch": 3.7222222222222223,
529
- "grad_norm": 0.025118501856923103,
530
- "learning_rate": 1.4166666666666668e-05,
531
  "loss": 0.0062,
532
  "step": 670
533
  },
534
  {
535
  "epoch": 3.7777777777777777,
536
- "grad_norm": 0.05767374858260155,
537
- "learning_rate": 1.138888888888889e-05,
538
- "loss": 0.0065,
539
  "step": 680
540
  },
541
  {
542
  "epoch": 3.8333333333333335,
543
- "grad_norm": 0.0323575958609581,
544
- "learning_rate": 8.611111111111112e-06,
545
- "loss": 0.0064,
546
  "step": 690
547
  },
548
  {
549
  "epoch": 3.888888888888889,
550
- "grad_norm": 0.026371095329523087,
551
- "learning_rate": 5.833333333333334e-06,
552
- "loss": 0.0161,
553
  "step": 700
554
  },
555
  {
556
  "epoch": 3.888888888888889,
557
- "eval_accuracy": 0.7614213197969543,
558
- "eval_loss": 1.341164469718933,
559
- "eval_runtime": 5.8873,
560
- "eval_samples_per_second": 66.924,
561
- "eval_steps_per_second": 8.493,
562
  "step": 700
563
  },
564
  {
565
  "epoch": 3.9444444444444446,
566
- "grad_norm": 0.025476330891251564,
567
- "learning_rate": 3.0555555555555556e-06,
568
- "loss": 0.0062,
569
  "step": 710
570
  },
571
  {
572
  "epoch": 4.0,
573
- "grad_norm": 0.041727062314748764,
574
- "learning_rate": 2.777777777777778e-07,
575
- "loss": 0.0097,
576
  "step": 720
577
  },
578
  {
579
  "epoch": 4.0,
580
  "step": 720,
581
  "total_flos": 3.16768696086528e+18,
582
- "train_loss": 0.15098576029348704,
583
- "train_runtime": 924.305,
584
- "train_samples_per_second": 12.42,
585
- "train_steps_per_second": 0.779
586
  }
587
  ],
588
  "logging_steps": 10,
 
1
  {
2
+ "best_metric": 0.9049533605575562,
3
+ "best_model_checkpoint": "./vit-large-brain-xray/checkpoint-300",
4
  "epoch": 4.0,
5
  "eval_steps": 100,
6
  "global_step": 720,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.05555555555555555,
13
+ "grad_norm": 0.6702606678009033,
14
  "learning_rate": 0.00019722222222222225,
15
+ "loss": 1.3662,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.1111111111111111,
20
+ "grad_norm": 1.5160739421844482,
21
  "learning_rate": 0.00019444444444444446,
22
+ "loss": 1.2341,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.16666666666666666,
27
+ "grad_norm": 3.668222665786743,
28
  "learning_rate": 0.00019166666666666667,
29
+ "loss": 0.9384,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.2222222222222222,
34
+ "grad_norm": 2.245746612548828,
35
  "learning_rate": 0.00018888888888888888,
36
+ "loss": 0.6538,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 0.2777777777777778,
41
+ "grad_norm": 2.4727349281311035,
42
  "learning_rate": 0.00018611111111111112,
43
+ "loss": 0.4997,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 0.3333333333333333,
48
+ "grad_norm": 3.7319023609161377,
49
  "learning_rate": 0.00018333333333333334,
50
+ "loss": 0.3898,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 0.3888888888888889,
55
+ "grad_norm": 0.6027088165283203,
56
  "learning_rate": 0.00018055555555555557,
57
+ "loss": 0.3304,
58
  "step": 70
59
  },
60
  {
61
  "epoch": 0.4444444444444444,
62
+ "grad_norm": 0.37244492769241333,
63
  "learning_rate": 0.00017777777777777779,
64
+ "loss": 0.2543,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 0.5,
69
+ "grad_norm": 1.6335736513137817,
70
+ "learning_rate": 0.000175,
71
+ "loss": 0.3538,
72
  "step": 90
73
  },
74
  {
75
  "epoch": 0.5555555555555556,
76
+ "grad_norm": 1.6272715330123901,
77
+ "learning_rate": 0.00017222222222222224,
78
+ "loss": 0.352,
79
  "step": 100
80
  },
81
  {
82
  "epoch": 0.5555555555555556,
83
+ "eval_accuracy": 0.6294416243654822,
84
+ "eval_loss": 1.2266901731491089,
85
+ "eval_runtime": 8.2003,
86
+ "eval_samples_per_second": 48.047,
87
+ "eval_steps_per_second": 6.097,
88
  "step": 100
89
  },
90
  {
91
  "epoch": 0.6111111111111112,
92
+ "grad_norm": 1.5688170194625854,
93
+ "learning_rate": 0.00016944444444444445,
94
+ "loss": 0.3773,
95
  "step": 110
96
  },
97
  {
98
  "epoch": 0.6666666666666666,
99
+ "grad_norm": 0.8265367150306702,
100
+ "learning_rate": 0.0001666666666666667,
101
+ "loss": 0.226,
102
  "step": 120
103
  },
104
  {
105
  "epoch": 0.7222222222222222,
106
+ "grad_norm": 2.6476309299468994,
107
+ "learning_rate": 0.0001638888888888889,
108
+ "loss": 0.3515,
109
  "step": 130
110
  },
111
  {
112
  "epoch": 0.7777777777777778,
113
+ "grad_norm": 0.8978700637817383,
114
+ "learning_rate": 0.0001611111111111111,
115
+ "loss": 0.2367,
116
  "step": 140
117
  },
118
  {
119
  "epoch": 0.8333333333333334,
120
+ "grad_norm": 1.1820647716522217,
121
+ "learning_rate": 0.00015833333333333332,
122
+ "loss": 0.3112,
123
  "step": 150
124
  },
125
  {
126
  "epoch": 0.8888888888888888,
127
+ "grad_norm": 2.701751708984375,
128
+ "learning_rate": 0.00015555555555555556,
129
+ "loss": 0.2741,
130
  "step": 160
131
  },
132
  {
133
  "epoch": 0.9444444444444444,
134
+ "grad_norm": 1.249694585800171,
135
+ "learning_rate": 0.00015277777777777777,
136
+ "loss": 0.2529,
137
  "step": 170
138
  },
139
  {
140
  "epoch": 1.0,
141
+ "grad_norm": 0.23323917388916016,
142
+ "learning_rate": 0.00015000000000000001,
143
+ "loss": 0.2239,
144
  "step": 180
145
  },
146
  {
147
  "epoch": 1.0555555555555556,
148
+ "grad_norm": 0.4952305853366852,
149
+ "learning_rate": 0.00014722222222222223,
150
+ "loss": 0.1749,
151
  "step": 190
152
  },
153
  {
154
  "epoch": 1.1111111111111112,
155
+ "grad_norm": 0.2073395550251007,
156
+ "learning_rate": 0.00014444444444444444,
157
+ "loss": 0.1612,
158
  "step": 200
159
  },
160
  {
161
  "epoch": 1.1111111111111112,
162
+ "eval_accuracy": 0.7538071065989848,
163
+ "eval_loss": 1.0894657373428345,
164
+ "eval_runtime": 7.609,
165
+ "eval_samples_per_second": 51.781,
166
+ "eval_steps_per_second": 6.571,
167
  "step": 200
168
  },
169
  {
170
  "epoch": 1.1666666666666667,
171
+ "grad_norm": 0.16568297147750854,
172
+ "learning_rate": 0.00014166666666666668,
173
+ "loss": 0.0718,
174
  "step": 210
175
  },
176
  {
177
  "epoch": 1.2222222222222223,
178
+ "grad_norm": 0.0984596461057663,
179
+ "learning_rate": 0.0001388888888888889,
180
+ "loss": 0.1395,
181
  "step": 220
182
  },
183
  {
184
  "epoch": 1.2777777777777777,
185
+ "grad_norm": 0.6732985973358154,
186
+ "learning_rate": 0.00013611111111111113,
187
+ "loss": 0.1421,
188
  "step": 230
189
  },
190
  {
191
  "epoch": 1.3333333333333333,
192
+ "grad_norm": 2.5877740383148193,
193
+ "learning_rate": 0.00013333333333333334,
194
+ "loss": 0.1118,
195
  "step": 240
196
  },
197
  {
198
  "epoch": 1.3888888888888888,
199
+ "grad_norm": 3.4756956100463867,
200
+ "learning_rate": 0.00013055555555555555,
201
+ "loss": 0.1562,
202
  "step": 250
203
  },
204
  {
205
  "epoch": 1.4444444444444444,
206
+ "grad_norm": 0.09811172634363174,
207
+ "learning_rate": 0.00012777777777777776,
208
+ "loss": 0.1463,
209
  "step": 260
210
  },
211
  {
212
  "epoch": 1.5,
213
+ "grad_norm": 3.1999194622039795,
214
+ "learning_rate": 0.000125,
215
+ "loss": 0.1179,
216
  "step": 270
217
  },
218
  {
219
  "epoch": 1.5555555555555556,
220
+ "grad_norm": 0.45012134313583374,
221
+ "learning_rate": 0.00012222222222222224,
222
+ "loss": 0.0929,
223
  "step": 280
224
  },
225
  {
226
  "epoch": 1.6111111111111112,
227
+ "grad_norm": 1.862654209136963,
228
+ "learning_rate": 0.00011944444444444445,
229
+ "loss": 0.186,
230
  "step": 290
231
  },
232
  {
233
  "epoch": 1.6666666666666665,
234
+ "grad_norm": 1.39608895778656,
235
+ "learning_rate": 0.00011666666666666668,
236
+ "loss": 0.0473,
237
  "step": 300
238
  },
239
  {
240
  "epoch": 1.6666666666666665,
241
+ "eval_accuracy": 0.7741116751269036,
242
+ "eval_loss": 0.9049533605575562,
243
+ "eval_runtime": 6.1753,
244
+ "eval_samples_per_second": 63.803,
245
+ "eval_steps_per_second": 8.097,
246
  "step": 300
247
  },
248
  {
249
  "epoch": 1.7222222222222223,
250
+ "grad_norm": 0.0962180569767952,
251
+ "learning_rate": 0.00011388888888888889,
252
+ "loss": 0.0858,
253
  "step": 310
254
  },
255
  {
256
  "epoch": 1.7777777777777777,
257
+ "grad_norm": 0.08585009723901749,
258
+ "learning_rate": 0.00011111111111111112,
259
+ "loss": 0.0519,
260
  "step": 320
261
  },
262
  {
263
  "epoch": 1.8333333333333335,
264
+ "grad_norm": 0.06303343176841736,
265
+ "learning_rate": 0.00010833333333333333,
266
+ "loss": 0.0221,
267
  "step": 330
268
  },
269
  {
270
  "epoch": 1.8888888888888888,
271
+ "grad_norm": 5.68204402923584,
272
+ "learning_rate": 0.00010555555555555557,
273
+ "loss": 0.0589,
274
  "step": 340
275
  },
276
  {
277
  "epoch": 1.9444444444444444,
278
+ "grad_norm": 5.385427474975586,
279
+ "learning_rate": 0.00010277777777777778,
280
+ "loss": 0.1881,
281
  "step": 350
282
  },
283
  {
284
  "epoch": 2.0,
285
+ "grad_norm": 3.201244831085205,
286
+ "learning_rate": 0.0001,
287
+ "loss": 0.0758,
288
  "step": 360
289
  },
290
  {
291
  "epoch": 2.0555555555555554,
292
+ "grad_norm": 1.2587229013442993,
293
+ "learning_rate": 9.722222222222223e-05,
294
+ "loss": 0.0461,
295
  "step": 370
296
  },
297
  {
298
  "epoch": 2.111111111111111,
299
+ "grad_norm": 0.07729563117027283,
300
+ "learning_rate": 9.444444444444444e-05,
301
+ "loss": 0.0155,
302
  "step": 380
303
  },
304
  {
305
  "epoch": 2.1666666666666665,
306
+ "grad_norm": 5.077848434448242,
307
+ "learning_rate": 9.166666666666667e-05,
308
+ "loss": 0.0724,
309
  "step": 390
310
  },
311
  {
312
  "epoch": 2.2222222222222223,
313
+ "grad_norm": 0.05171338841319084,
314
+ "learning_rate": 8.888888888888889e-05,
315
+ "loss": 0.0525,
316
  "step": 400
317
  },
318
  {
319
  "epoch": 2.2222222222222223,
320
+ "eval_accuracy": 0.7690355329949239,
321
+ "eval_loss": 1.0663037300109863,
322
+ "eval_runtime": 6.6314,
323
+ "eval_samples_per_second": 59.415,
324
+ "eval_steps_per_second": 7.54,
325
  "step": 400
326
  },
327
  {
328
  "epoch": 2.2777777777777777,
329
+ "grad_norm": 0.056168586015701294,
330
+ "learning_rate": 8.611111111111112e-05,
331
+ "loss": 0.1214,
332
  "step": 410
333
  },
334
  {
335
  "epoch": 2.3333333333333335,
336
+ "grad_norm": 0.2696777582168579,
337
+ "learning_rate": 8.333333333333334e-05,
338
+ "loss": 0.0148,
339
  "step": 420
340
  },
341
  {
342
  "epoch": 2.388888888888889,
343
+ "grad_norm": 0.045138537883758545,
344
+ "learning_rate": 8.055555555555556e-05,
345
+ "loss": 0.0175,
346
  "step": 430
347
  },
348
  {
349
  "epoch": 2.4444444444444446,
350
+ "grad_norm": 0.03756405785679817,
351
+ "learning_rate": 7.777777777777778e-05,
352
+ "loss": 0.0264,
353
  "step": 440
354
  },
355
  {
356
  "epoch": 2.5,
357
+ "grad_norm": 0.17634020745754242,
358
+ "learning_rate": 7.500000000000001e-05,
359
+ "loss": 0.0101,
360
  "step": 450
361
  },
362
  {
363
  "epoch": 2.5555555555555554,
364
+ "grad_norm": 0.037890926003456116,
365
+ "learning_rate": 7.222222222222222e-05,
366
+ "loss": 0.0484,
367
  "step": 460
368
  },
369
  {
370
  "epoch": 2.611111111111111,
371
+ "grad_norm": 4.751524448394775,
372
+ "learning_rate": 6.944444444444444e-05,
373
+ "loss": 0.0525,
374
  "step": 470
375
  },
376
  {
377
  "epoch": 2.6666666666666665,
378
+ "grad_norm": 0.15853020548820496,
379
+ "learning_rate": 6.666666666666667e-05,
380
+ "loss": 0.0719,
381
  "step": 480
382
  },
383
  {
384
  "epoch": 2.7222222222222223,
385
+ "grad_norm": 0.039081115275621414,
386
+ "learning_rate": 6.388888888888888e-05,
387
+ "loss": 0.0085,
388
  "step": 490
389
  },
390
  {
391
  "epoch": 2.7777777777777777,
392
+ "grad_norm": 0.4480770230293274,
393
+ "learning_rate": 6.111111111111112e-05,
394
+ "loss": 0.0123,
395
  "step": 500
396
  },
397
  {
398
  "epoch": 2.7777777777777777,
399
+ "eval_accuracy": 0.7461928934010152,
400
+ "eval_loss": 1.2449774742126465,
401
+ "eval_runtime": 5.9167,
402
+ "eval_samples_per_second": 66.591,
403
+ "eval_steps_per_second": 8.451,
404
  "step": 500
405
  },
406
  {
407
  "epoch": 2.8333333333333335,
408
+ "grad_norm": 7.5741801261901855,
409
+ "learning_rate": 5.833333333333334e-05,
410
+ "loss": 0.0278,
411
  "step": 510
412
  },
413
  {
414
  "epoch": 2.888888888888889,
415
+ "grad_norm": 0.1201184019446373,
416
+ "learning_rate": 5.555555555555556e-05,
417
+ "loss": 0.0091,
418
  "step": 520
419
  },
420
  {
421
  "epoch": 2.9444444444444446,
422
+ "grad_norm": 0.032710809260606766,
423
+ "learning_rate": 5.2777777777777784e-05,
424
+ "loss": 0.0077,
425
  "step": 530
426
  },
427
  {
428
  "epoch": 3.0,
429
+ "grad_norm": 0.03236711025238037,
430
+ "learning_rate": 5e-05,
431
+ "loss": 0.0557,
432
  "step": 540
433
  },
434
  {
435
  "epoch": 3.0555555555555554,
436
+ "grad_norm": 0.034722838550806046,
437
+ "learning_rate": 4.722222222222222e-05,
438
+ "loss": 0.0191,
439
  "step": 550
440
  },
441
  {
442
  "epoch": 3.111111111111111,
443
+ "grad_norm": 4.018179416656494,
444
+ "learning_rate": 4.4444444444444447e-05,
445
+ "loss": 0.0176,
446
  "step": 560
447
  },
448
  {
449
  "epoch": 3.1666666666666665,
450
+ "grad_norm": 0.5732712745666504,
451
+ "learning_rate": 4.166666666666667e-05,
452
+ "loss": 0.0087,
453
  "step": 570
454
  },
455
  {
456
  "epoch": 3.2222222222222223,
457
+ "grad_norm": 0.027404414489865303,
458
+ "learning_rate": 3.888888888888889e-05,
459
+ "loss": 0.0079,
460
  "step": 580
461
  },
462
  {
463
  "epoch": 3.2777777777777777,
464
+ "grad_norm": 0.02965979278087616,
465
+ "learning_rate": 3.611111111111111e-05,
466
+ "loss": 0.0068,
467
  "step": 590
468
  },
469
  {
470
  "epoch": 3.3333333333333335,
471
+ "grad_norm": 0.026871565729379654,
472
+ "learning_rate": 3.3333333333333335e-05,
473
  "loss": 0.0066,
474
  "step": 600
475
  },
476
  {
477
  "epoch": 3.3333333333333335,
478
+ "eval_accuracy": 0.7817258883248731,
479
+ "eval_loss": 1.1282514333724976,
480
+ "eval_runtime": 6.7045,
481
+ "eval_samples_per_second": 58.767,
482
+ "eval_steps_per_second": 7.458,
483
  "step": 600
484
  },
485
  {
486
  "epoch": 3.388888888888889,
487
+ "grad_norm": 0.03278065472841263,
488
+ "learning_rate": 3.055555555555556e-05,
489
+ "loss": 0.0086,
490
  "step": 610
491
  },
492
  {
493
  "epoch": 3.4444444444444446,
494
+ "grad_norm": 0.07111264020204544,
495
+ "learning_rate": 2.777777777777778e-05,
496
+ "loss": 0.0295,
497
  "step": 620
498
  },
499
  {
500
  "epoch": 3.5,
501
+ "grad_norm": 0.028257286176085472,
502
+ "learning_rate": 2.5e-05,
503
+ "loss": 0.0065,
504
  "step": 630
505
  },
506
  {
507
  "epoch": 3.5555555555555554,
508
+ "grad_norm": 0.02719848044216633,
509
+ "learning_rate": 2.2222222222222223e-05,
510
+ "loss": 0.0418,
511
  "step": 640
512
  },
513
  {
514
  "epoch": 3.611111111111111,
515
+ "grad_norm": 0.026137089356780052,
516
+ "learning_rate": 1.9444444444444445e-05,
517
+ "loss": 0.0073,
518
  "step": 650
519
  },
520
  {
521
  "epoch": 3.6666666666666665,
522
+ "grad_norm": 0.030431417748332024,
523
+ "learning_rate": 1.6666666666666667e-05,
524
+ "loss": 0.0101,
525
  "step": 660
526
  },
527
  {
528
  "epoch": 3.7222222222222223,
529
+ "grad_norm": 0.025364473462104797,
530
+ "learning_rate": 1.388888888888889e-05,
531
  "loss": 0.0062,
532
  "step": 670
533
  },
534
  {
535
  "epoch": 3.7777777777777777,
536
+ "grad_norm": 0.02630157209932804,
537
+ "learning_rate": 1.1111111111111112e-05,
538
+ "loss": 0.0076,
539
  "step": 680
540
  },
541
  {
542
  "epoch": 3.8333333333333335,
543
+ "grad_norm": 0.025917503982782364,
544
+ "learning_rate": 8.333333333333334e-06,
545
+ "loss": 0.0062,
546
  "step": 690
547
  },
548
  {
549
  "epoch": 3.888888888888889,
550
+ "grad_norm": 6.676637649536133,
551
+ "learning_rate": 5.555555555555556e-06,
552
+ "loss": 0.0126,
553
  "step": 700
554
  },
555
  {
556
  "epoch": 3.888888888888889,
557
+ "eval_accuracy": 0.7842639593908629,
558
+ "eval_loss": 1.1716859340667725,
559
+ "eval_runtime": 6.745,
560
+ "eval_samples_per_second": 58.414,
561
+ "eval_steps_per_second": 7.413,
562
  "step": 700
563
  },
564
  {
565
  "epoch": 3.9444444444444446,
566
+ "grad_norm": 0.025717712938785553,
567
+ "learning_rate": 2.777777777777778e-06,
568
+ "loss": 0.008,
569
  "step": 710
570
  },
571
  {
572
  "epoch": 4.0,
573
+ "grad_norm": 0.07326529920101166,
574
+ "learning_rate": 0.0,
575
+ "loss": 0.0065,
576
  "step": 720
577
  },
578
  {
579
  "epoch": 4.0,
580
  "step": 720,
581
  "total_flos": 3.16768696086528e+18,
582
+ "train_loss": 0.16034429804939362,
583
+ "train_runtime": 1070.1524,
584
+ "train_samples_per_second": 10.727,
585
+ "train_steps_per_second": 0.673
586
  }
587
  ],
588
  "logging_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e70ceac62f8179510d4ddd273ceca649f5752a19a00c2cbb59c7f9eabae6d13
3
  size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fce6746ccd146e07105259c2e2e4af9d52f0db85b3fc814d35a71a4f602c62a9
3
  size 4984