JorgeGIT commited on
Commit
b8c1436
1 Parent(s): b9c0f1c

Model save

Browse files
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  license: apache-2.0
3
- base_model: microsoft/beit-base-patch16-224-pt22k
4
  tags:
5
  - generated_from_trainer
6
  datasets:
@@ -22,7 +22,7 @@ model-index:
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
- value: 0.9624060150375939
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -30,10 +30,10 @@ should probably proofread and complete it, then remove this comment. -->
30
 
31
  # finetuned-Leukemia-cell
32
 
33
- This model is a fine-tuned version of [microsoft/beit-base-patch16-224-pt22k](https://huggingface.co/microsoft/beit-base-patch16-224-pt22k) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 0.0946
36
- - Accuracy: 0.9624
37
 
38
  ## Model description
39
 
@@ -65,12 +65,12 @@ The following hyperparameters were used during training:
65
 
66
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
67
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
68
- | 0.9733 | 2.94 | 100 | 0.8894 | 0.7256 |
69
- | 0.7184 | 5.88 | 200 | 0.7876 | 0.7293 |
70
- | 0.5299 | 8.82 | 300 | 0.5183 | 0.8609 |
71
- | 0.3991 | 11.76 | 400 | 0.3121 | 0.8947 |
72
- | 0.2263 | 14.71 | 500 | 0.1337 | 0.9549 |
73
- | 0.1782 | 17.65 | 600 | 0.0946 | 0.9624 |
74
 
75
 
76
  ### Framework versions
 
1
  ---
2
  license: apache-2.0
3
+ base_model: facebook/convnext-tiny-224
4
  tags:
5
  - generated_from_trainer
6
  datasets:
 
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
+ value: 0.981203007518797
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
30
 
31
  # finetuned-Leukemia-cell
32
 
33
+ This model is a fine-tuned version of [facebook/convnext-tiny-224](https://huggingface.co/facebook/convnext-tiny-224) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 0.0595
36
+ - Accuracy: 0.9812
37
 
38
  ## Model description
39
 
 
65
 
66
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
67
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
68
+ | 0.2926 | 2.94 | 100 | 0.1524 | 0.9436 |
69
+ | 0.1844 | 5.88 | 200 | 0.1718 | 0.9436 |
70
+ | 0.1189 | 8.82 | 300 | 0.0886 | 0.9662 |
71
+ | 0.0682 | 11.76 | 400 | 0.0978 | 0.9699 |
72
+ | 0.0439 | 14.71 | 500 | 0.0812 | 0.9737 |
73
+ | 0.0544 | 17.65 | 600 | 0.0595 | 0.9812 |
74
 
75
 
76
  ### Framework versions
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.9661654135338346,
4
- "eval_loss": 0.09556835144758224,
5
- "eval_runtime": 3.1465,
6
- "eval_samples_per_second": 84.539,
7
- "eval_steps_per_second": 10.806,
8
- "total_flos": 1.646033084688384e+18,
9
- "train_loss": 0.19354178275474732,
10
- "train_runtime": 444.6785,
11
- "train_samples_per_second": 47.765,
12
- "train_steps_per_second": 1.529
13
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.9624060150375939,
4
+ "eval_loss": 0.0946127399802208,
5
+ "eval_runtime": 2.633,
6
+ "eval_samples_per_second": 101.024,
7
+ "eval_steps_per_second": 12.913,
8
+ "total_flos": 1.6434612963230515e+18,
9
+ "train_loss": 0.5515072485103326,
10
+ "train_runtime": 465.2023,
11
+ "train_samples_per_second": 45.658,
12
+ "train_steps_per_second": 1.462
13
  }
config.json CHANGED
@@ -1,17 +1,22 @@
1
  {
2
- "_name_or_path": "microsoft/beit-base-patch16-224-pt22k",
3
  "architectures": [
4
- "BeitForImageClassification"
5
  ],
6
- "attention_probs_dropout_prob": 0.0,
7
- "auxiliary_channels": 256,
8
- "auxiliary_concat_input": false,
9
- "auxiliary_loss_weight": 0.4,
10
- "auxiliary_num_convs": 1,
11
- "drop_path_rate": 0.1,
 
12
  "hidden_act": "gelu",
13
- "hidden_dropout_prob": 0.0,
14
- "hidden_size": 768,
 
 
 
 
15
  "id2label": {
16
  "0": "LLA",
17
  "1": "folicular",
@@ -23,7 +28,6 @@
23
  },
24
  "image_size": 224,
25
  "initializer_range": 0.02,
26
- "intermediate_size": 3072,
27
  "label2id": {
28
  "LLA": "0",
29
  "folicular": "1",
@@ -34,33 +38,25 @@
34
  "trico": "6"
35
  },
36
  "layer_norm_eps": 1e-12,
37
- "layer_scale_init_value": 0.1,
38
- "model_type": "beit",
39
- "num_attention_heads": 12,
40
  "num_channels": 3,
41
- "num_hidden_layers": 12,
42
- "out_indices": [
43
- 3,
44
- 5,
45
- 7,
46
- 11
47
  ],
48
- "patch_size": 16,
49
- "pool_scales": [
50
- 1,
51
- 2,
52
- 3,
53
- 6
54
  ],
 
55
  "problem_type": "single_label_classification",
56
- "semantic_loss_ignore_index": 255,
 
 
 
 
 
 
57
  "torch_dtype": "float32",
58
- "transformers_version": "4.35.2",
59
- "use_absolute_position_embeddings": false,
60
- "use_auxiliary_head": true,
61
- "use_mask_token": true,
62
- "use_mean_pooling": true,
63
- "use_relative_position_bias": false,
64
- "use_shared_relative_position_bias": true,
65
- "vocab_size": 8192
66
  }
 
1
  {
2
+ "_name_or_path": "facebook/convnext-tiny-224",
3
  "architectures": [
4
+ "ConvNextForImageClassification"
5
  ],
6
+ "depths": [
7
+ 3,
8
+ 3,
9
+ 9,
10
+ 3
11
+ ],
12
+ "drop_path_rate": 0.0,
13
  "hidden_act": "gelu",
14
+ "hidden_sizes": [
15
+ 96,
16
+ 192,
17
+ 384,
18
+ 768
19
+ ],
20
  "id2label": {
21
  "0": "LLA",
22
  "1": "folicular",
 
28
  },
29
  "image_size": 224,
30
  "initializer_range": 0.02,
 
31
  "label2id": {
32
  "LLA": "0",
33
  "folicular": "1",
 
38
  "trico": "6"
39
  },
40
  "layer_norm_eps": 1e-12,
41
+ "layer_scale_init_value": 1e-06,
42
+ "model_type": "convnext",
 
43
  "num_channels": 3,
44
+ "num_stages": 4,
45
+ "out_features": [
46
+ "stage4"
 
 
 
47
  ],
48
+ "out_indices": [
49
+ 4
 
 
 
 
50
  ],
51
+ "patch_size": 4,
52
  "problem_type": "single_label_classification",
53
+ "stage_names": [
54
+ "stem",
55
+ "stage1",
56
+ "stage2",
57
+ "stage3",
58
+ "stage4"
59
+ ],
60
  "torch_dtype": "float32",
61
+ "transformers_version": "4.35.2"
 
 
 
 
 
 
 
62
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.9661654135338346,
4
- "eval_loss": 0.09556835144758224,
5
- "eval_runtime": 3.1465,
6
- "eval_samples_per_second": 84.539,
7
- "eval_steps_per_second": 10.806
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.9624060150375939,
4
+ "eval_loss": 0.0946127399802208,
5
+ "eval_runtime": 2.633,
6
+ "eval_samples_per_second": 101.024,
7
+ "eval_steps_per_second": 12.913
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae4a6b3a2562aa63e1abd081ac90bbba7f0bef4b41d3240d4d8e328a08ced51d
3
- size 342710540
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fa14359c275d65dc97a7f3b269406a2a20534033e6cd3c3319d98d6a78eba7f
3
+ size 111323316
preprocessor_config.json CHANGED
@@ -1,29 +1,23 @@
1
  {
2
- "crop_size": {
3
- "height": 224,
4
- "width": 224
5
- },
6
- "do_center_crop": false,
7
  "do_normalize": true,
8
- "do_reduce_labels": false,
9
  "do_rescale": true,
10
  "do_resize": true,
11
- "feature_extractor_type": "BeitFeatureExtractor",
12
  "image_mean": [
13
- 0.5,
14
- 0.5,
15
- 0.5
16
  ],
17
- "image_processor_type": "BeitImageProcessor",
18
  "image_std": [
19
- 0.5,
20
- 0.5,
21
- 0.5
22
  ],
23
- "resample": 2,
24
  "rescale_factor": 0.00392156862745098,
25
  "size": {
26
- "height": 224,
27
- "width": 224
28
  }
29
  }
 
1
  {
2
+ "crop_pct": 0.875,
 
 
 
 
3
  "do_normalize": true,
 
4
  "do_rescale": true,
5
  "do_resize": true,
6
+ "feature_extractor_type": "ConvNextFeatureExtractor",
7
  "image_mean": [
8
+ 0.485,
9
+ 0.456,
10
+ 0.406
11
  ],
12
+ "image_processor_type": "ConvNextFeatureExtractor",
13
  "image_std": [
14
+ 0.229,
15
+ 0.224,
16
+ 0.225
17
  ],
18
+ "resample": 3,
19
  "rescale_factor": 0.00392156862745098,
20
  "size": {
21
+ "shortest_edge": 224
 
22
  }
23
  }
runs/Dec01_13-11-56_610b2a9400b8/events.out.tfevents.1701436839.610b2a9400b8.3653.6 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ced1ed782ae47e34565d004bd2516bfc4697edd2f6ad6e28ebc884121a0ac21b
3
+ size 411
runs/Dec01_13-35-54_610b2a9400b8/events.out.tfevents.1701437772.610b2a9400b8.3653.7 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f36524b90caf446e74df5af5bd286d28e0acec054d7d94fea880ae46be56f921
3
+ size 17529
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "total_flos": 1.646033084688384e+18,
4
- "train_loss": 0.19354178275474732,
5
- "train_runtime": 444.6785,
6
- "train_samples_per_second": 47.765,
7
- "train_steps_per_second": 1.529
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "total_flos": 1.6434612963230515e+18,
4
+ "train_loss": 0.5515072485103326,
5
+ "train_runtime": 465.2023,
6
+ "train_samples_per_second": 45.658,
7
+ "train_steps_per_second": 1.462
8
  }
trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.09556835144758224,
3
  "best_model_checkpoint": "finetuned-Leukemia-cell/checkpoint-600",
4
  "epoch": 20.0,
5
  "eval_steps": 100,
@@ -10,481 +10,481 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.29,
13
- "learning_rate": 0.00019764705882352942,
14
- "loss": 1.7578,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.59,
19
- "learning_rate": 0.0001947058823529412,
20
- "loss": 1.016,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.88,
25
- "learning_rate": 0.00019176470588235295,
26
- "loss": 0.9006,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 1.18,
31
- "learning_rate": 0.00018882352941176472,
32
- "loss": 0.7129,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 1.47,
37
- "learning_rate": 0.00018588235294117648,
38
- "loss": 0.3781,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 1.76,
43
- "learning_rate": 0.00018294117647058825,
44
- "loss": 0.4442,
45
  "step": 60
46
  },
47
  {
48
  "epoch": 2.06,
49
- "learning_rate": 0.00018,
50
- "loss": 0.3716,
51
  "step": 70
52
  },
53
  {
54
  "epoch": 2.35,
55
- "learning_rate": 0.00017705882352941178,
56
- "loss": 0.371,
57
  "step": 80
58
  },
59
  {
60
  "epoch": 2.65,
61
- "learning_rate": 0.00017411764705882354,
62
- "loss": 0.3183,
63
  "step": 90
64
  },
65
  {
66
  "epoch": 2.94,
67
- "learning_rate": 0.0001711764705882353,
68
- "loss": 0.3464,
69
  "step": 100
70
  },
71
  {
72
  "epoch": 2.94,
73
- "eval_accuracy": 0.9285714285714286,
74
- "eval_loss": 0.2377004772424698,
75
- "eval_runtime": 103.2565,
76
- "eval_samples_per_second": 2.576,
77
- "eval_steps_per_second": 0.329,
78
  "step": 100
79
  },
80
  {
81
  "epoch": 3.24,
82
- "learning_rate": 0.00016823529411764707,
83
- "loss": 0.2195,
84
  "step": 110
85
  },
86
  {
87
  "epoch": 3.53,
88
- "learning_rate": 0.0001652941176470588,
89
- "loss": 0.3354,
90
  "step": 120
91
  },
92
  {
93
  "epoch": 3.82,
94
- "learning_rate": 0.0001623529411764706,
95
- "loss": 0.3365,
96
  "step": 130
97
  },
98
  {
99
  "epoch": 4.12,
100
- "learning_rate": 0.00015941176470588237,
101
- "loss": 0.2152,
102
  "step": 140
103
  },
104
  {
105
  "epoch": 4.41,
106
- "learning_rate": 0.00015647058823529413,
107
- "loss": 0.3078,
108
  "step": 150
109
  },
110
  {
111
  "epoch": 4.71,
112
- "learning_rate": 0.0001535294117647059,
113
- "loss": 0.2712,
114
  "step": 160
115
  },
116
  {
117
  "epoch": 5.0,
118
- "learning_rate": 0.00015058823529411766,
119
- "loss": 0.133,
120
  "step": 170
121
  },
122
  {
123
  "epoch": 5.29,
124
- "learning_rate": 0.00014764705882352943,
125
- "loss": 0.3753,
126
  "step": 180
127
  },
128
  {
129
  "epoch": 5.59,
130
- "learning_rate": 0.0001447058823529412,
131
- "loss": 0.2242,
132
  "step": 190
133
  },
134
  {
135
  "epoch": 5.88,
136
- "learning_rate": 0.00014176470588235296,
137
- "loss": 0.153,
138
  "step": 200
139
  },
140
  {
141
  "epoch": 5.88,
142
- "eval_accuracy": 0.9060150375939849,
143
- "eval_loss": 0.2518657147884369,
144
- "eval_runtime": 2.626,
145
- "eval_samples_per_second": 101.294,
146
- "eval_steps_per_second": 12.947,
147
  "step": 200
148
  },
149
  {
150
  "epoch": 6.18,
151
- "learning_rate": 0.00013882352941176472,
152
- "loss": 0.2496,
153
  "step": 210
154
  },
155
  {
156
  "epoch": 6.47,
157
- "learning_rate": 0.00013588235294117649,
158
- "loss": 0.2584,
159
  "step": 220
160
  },
161
  {
162
  "epoch": 6.76,
163
- "learning_rate": 0.00013294117647058822,
164
- "loss": 0.1437,
165
  "step": 230
166
  },
167
  {
168
  "epoch": 7.06,
169
- "learning_rate": 0.00013000000000000002,
170
- "loss": 0.1991,
171
  "step": 240
172
  },
173
  {
174
  "epoch": 7.35,
175
- "learning_rate": 0.00012705882352941175,
176
- "loss": 0.1776,
177
  "step": 250
178
  },
179
  {
180
  "epoch": 7.65,
181
- "learning_rate": 0.00012411764705882355,
182
- "loss": 0.133,
183
  "step": 260
184
  },
185
  {
186
  "epoch": 7.94,
187
- "learning_rate": 0.0001211764705882353,
188
- "loss": 0.1119,
189
  "step": 270
190
  },
191
  {
192
  "epoch": 8.24,
193
- "learning_rate": 0.00011823529411764706,
194
- "loss": 0.087,
195
  "step": 280
196
  },
197
  {
198
  "epoch": 8.53,
199
- "learning_rate": 0.00011529411764705881,
200
- "loss": 0.0795,
201
  "step": 290
202
  },
203
  {
204
  "epoch": 8.82,
205
- "learning_rate": 0.00011235294117647059,
206
- "loss": 0.1663,
207
  "step": 300
208
  },
209
  {
210
  "epoch": 8.82,
211
- "eval_accuracy": 0.9398496240601504,
212
- "eval_loss": 0.1752895563840866,
213
- "eval_runtime": 2.5567,
214
- "eval_samples_per_second": 104.042,
215
- "eval_steps_per_second": 13.299,
216
  "step": 300
217
  },
218
  {
219
  "epoch": 9.12,
220
- "learning_rate": 0.00010941176470588237,
221
- "loss": 0.1149,
222
  "step": 310
223
  },
224
  {
225
  "epoch": 9.41,
226
- "learning_rate": 0.00010647058823529412,
227
- "loss": 0.1415,
228
  "step": 320
229
  },
230
  {
231
  "epoch": 9.71,
232
- "learning_rate": 0.0001035294117647059,
233
- "loss": 0.0998,
234
  "step": 330
235
  },
236
  {
237
  "epoch": 10.0,
238
- "learning_rate": 0.00010058823529411765,
239
- "loss": 0.1299,
240
  "step": 340
241
  },
242
  {
243
  "epoch": 10.29,
244
- "learning_rate": 9.764705882352942e-05,
245
- "loss": 0.1028,
246
  "step": 350
247
  },
248
  {
249
  "epoch": 10.59,
250
- "learning_rate": 9.470588235294118e-05,
251
- "loss": 0.0636,
252
  "step": 360
253
  },
254
  {
255
  "epoch": 10.88,
256
- "learning_rate": 9.176470588235295e-05,
257
- "loss": 0.1064,
258
  "step": 370
259
  },
260
  {
261
  "epoch": 11.18,
262
- "learning_rate": 8.882352941176471e-05,
263
- "loss": 0.1138,
264
  "step": 380
265
  },
266
  {
267
  "epoch": 11.47,
268
- "learning_rate": 8.588235294117646e-05,
269
- "loss": 0.0469,
270
  "step": 390
271
  },
272
  {
273
  "epoch": 11.76,
274
- "learning_rate": 8.294117647058824e-05,
275
- "loss": 0.0962,
276
  "step": 400
277
  },
278
  {
279
  "epoch": 11.76,
280
- "eval_accuracy": 0.9398496240601504,
281
- "eval_loss": 0.20087337493896484,
282
- "eval_runtime": 2.1402,
283
- "eval_samples_per_second": 124.286,
284
- "eval_steps_per_second": 15.886,
285
  "step": 400
286
  },
287
  {
288
  "epoch": 12.06,
289
- "learning_rate": 8e-05,
290
- "loss": 0.1099,
291
  "step": 410
292
  },
293
  {
294
  "epoch": 12.35,
295
- "learning_rate": 7.705882352941177e-05,
296
- "loss": 0.0475,
297
  "step": 420
298
  },
299
  {
300
  "epoch": 12.65,
301
- "learning_rate": 7.411764705882354e-05,
302
- "loss": 0.0448,
303
  "step": 430
304
  },
305
  {
306
  "epoch": 12.94,
307
- "learning_rate": 7.11764705882353e-05,
308
- "loss": 0.0782,
309
  "step": 440
310
  },
311
  {
312
  "epoch": 13.24,
313
- "learning_rate": 6.823529411764707e-05,
314
- "loss": 0.0543,
315
  "step": 450
316
  },
317
  {
318
  "epoch": 13.53,
319
- "learning_rate": 6.529411764705883e-05,
320
- "loss": 0.0252,
321
  "step": 460
322
  },
323
  {
324
  "epoch": 13.82,
325
- "learning_rate": 6.23529411764706e-05,
326
- "loss": 0.0517,
327
  "step": 470
328
  },
329
  {
330
  "epoch": 14.12,
331
- "learning_rate": 5.9411764705882355e-05,
332
- "loss": 0.0179,
333
  "step": 480
334
  },
335
  {
336
  "epoch": 14.41,
337
- "learning_rate": 5.647058823529412e-05,
338
- "loss": 0.0661,
339
  "step": 490
340
  },
341
  {
342
  "epoch": 14.71,
343
- "learning_rate": 5.3529411764705884e-05,
344
- "loss": 0.0612,
345
  "step": 500
346
  },
347
  {
348
  "epoch": 14.71,
349
- "eval_accuracy": 0.9586466165413534,
350
- "eval_loss": 0.13481324911117554,
351
- "eval_runtime": 2.1446,
352
- "eval_samples_per_second": 124.03,
353
- "eval_steps_per_second": 15.853,
354
  "step": 500
355
  },
356
  {
357
  "epoch": 15.0,
358
- "learning_rate": 5.058823529411765e-05,
359
- "loss": 0.0611,
360
  "step": 510
361
  },
362
  {
363
  "epoch": 15.29,
364
- "learning_rate": 4.7647058823529414e-05,
365
- "loss": 0.0208,
366
  "step": 520
367
  },
368
  {
369
  "epoch": 15.59,
370
- "learning_rate": 4.470588235294118e-05,
371
- "loss": 0.0838,
372
  "step": 530
373
  },
374
  {
375
  "epoch": 15.88,
376
- "learning_rate": 4.1764705882352944e-05,
377
- "loss": 0.0558,
378
  "step": 540
379
  },
380
  {
381
  "epoch": 16.18,
382
- "learning_rate": 3.882352941176471e-05,
383
- "loss": 0.0544,
384
  "step": 550
385
  },
386
  {
387
  "epoch": 16.47,
388
- "learning_rate": 3.5882352941176474e-05,
389
- "loss": 0.0729,
390
  "step": 560
391
  },
392
  {
393
  "epoch": 16.76,
394
- "learning_rate": 3.294117647058824e-05,
395
- "loss": 0.0799,
396
  "step": 570
397
  },
398
  {
399
  "epoch": 17.06,
400
- "learning_rate": 3e-05,
401
- "loss": 0.011,
402
  "step": 580
403
  },
404
  {
405
  "epoch": 17.35,
406
- "learning_rate": 2.7058823529411766e-05,
407
- "loss": 0.0749,
408
  "step": 590
409
  },
410
  {
411
  "epoch": 17.65,
412
- "learning_rate": 2.411764705882353e-05,
413
- "loss": 0.0341,
414
  "step": 600
415
  },
416
  {
417
  "epoch": 17.65,
418
- "eval_accuracy": 0.9661654135338346,
419
- "eval_loss": 0.09556835144758224,
420
- "eval_runtime": 2.4514,
421
- "eval_samples_per_second": 108.508,
422
- "eval_steps_per_second": 13.869,
423
  "step": 600
424
  },
425
  {
426
  "epoch": 17.94,
427
- "learning_rate": 2.1176470588235296e-05,
428
- "loss": 0.0134,
429
  "step": 610
430
  },
431
  {
432
  "epoch": 18.24,
433
- "learning_rate": 1.8235294117647057e-05,
434
- "loss": 0.0339,
435
  "step": 620
436
  },
437
  {
438
  "epoch": 18.53,
439
- "learning_rate": 1.5294117647058826e-05,
440
- "loss": 0.0383,
441
  "step": 630
442
  },
443
  {
444
  "epoch": 18.82,
445
- "learning_rate": 1.2352941176470589e-05,
446
- "loss": 0.0209,
447
  "step": 640
448
  },
449
  {
450
  "epoch": 19.12,
451
- "learning_rate": 9.411764705882354e-06,
452
- "loss": 0.0418,
453
  "step": 650
454
  },
455
  {
456
  "epoch": 19.41,
457
- "learning_rate": 6.470588235294119e-06,
458
- "loss": 0.0431,
459
  "step": 660
460
  },
461
  {
462
  "epoch": 19.71,
463
- "learning_rate": 3.5294117647058825e-06,
464
- "loss": 0.0334,
465
  "step": 670
466
  },
467
  {
468
  "epoch": 20.0,
469
- "learning_rate": 5.882352941176471e-07,
470
- "loss": 0.0208,
471
  "step": 680
472
  },
473
  {
474
  "epoch": 20.0,
475
  "step": 680,
476
- "total_flos": 1.646033084688384e+18,
477
- "train_loss": 0.19354178275474732,
478
- "train_runtime": 444.6785,
479
- "train_samples_per_second": 47.765,
480
- "train_steps_per_second": 1.529
481
  }
482
  ],
483
  "logging_steps": 10,
484
  "max_steps": 680,
485
  "num_train_epochs": 20,
486
  "save_steps": 100,
487
- "total_flos": 1.646033084688384e+18,
488
  "trial_name": null,
489
  "trial_params": null
490
  }
 
1
  {
2
+ "best_metric": 0.0946127399802208,
3
  "best_model_checkpoint": "finetuned-Leukemia-cell/checkpoint-600",
4
  "epoch": 20.0,
5
  "eval_steps": 100,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.29,
13
+ "learning_rate": 0.00019794117647058826,
14
+ "loss": 1.9655,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.59,
19
+ "learning_rate": 0.000195,
20
+ "loss": 1.7836,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.88,
25
+ "learning_rate": 0.0001920588235294118,
26
+ "loss": 1.5744,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 1.18,
31
+ "learning_rate": 0.00018911764705882353,
32
+ "loss": 1.3903,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 1.47,
37
+ "learning_rate": 0.00018617647058823532,
38
+ "loss": 1.3596,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 1.76,
43
+ "learning_rate": 0.00018323529411764706,
44
+ "loss": 1.3915,
45
  "step": 60
46
  },
47
  {
48
  "epoch": 2.06,
49
+ "learning_rate": 0.00018029411764705885,
50
+ "loss": 1.2525,
51
  "step": 70
52
  },
53
  {
54
  "epoch": 2.35,
55
+ "learning_rate": 0.00017735294117647059,
56
+ "loss": 1.167,
57
  "step": 80
58
  },
59
  {
60
  "epoch": 2.65,
61
+ "learning_rate": 0.00017441176470588235,
62
+ "loss": 1.0339,
63
  "step": 90
64
  },
65
  {
66
  "epoch": 2.94,
67
+ "learning_rate": 0.00017147058823529412,
68
+ "loss": 0.9733,
69
  "step": 100
70
  },
71
  {
72
  "epoch": 2.94,
73
+ "eval_accuracy": 0.7255639097744361,
74
+ "eval_loss": 0.8894439339637756,
75
+ "eval_runtime": 2.491,
76
+ "eval_samples_per_second": 106.783,
77
+ "eval_steps_per_second": 13.649,
78
  "step": 100
79
  },
80
  {
81
  "epoch": 3.24,
82
+ "learning_rate": 0.00016852941176470588,
83
+ "loss": 0.9561,
84
  "step": 110
85
  },
86
  {
87
  "epoch": 3.53,
88
+ "learning_rate": 0.00016558823529411765,
89
+ "loss": 0.9493,
90
  "step": 120
91
  },
92
  {
93
  "epoch": 3.82,
94
+ "learning_rate": 0.0001626470588235294,
95
+ "loss": 0.8258,
96
  "step": 130
97
  },
98
  {
99
  "epoch": 4.12,
100
+ "learning_rate": 0.0001597058823529412,
101
+ "loss": 0.8111,
102
  "step": 140
103
  },
104
  {
105
  "epoch": 4.41,
106
+ "learning_rate": 0.00015676470588235294,
107
+ "loss": 0.9361,
108
  "step": 150
109
  },
110
  {
111
  "epoch": 4.71,
112
+ "learning_rate": 0.00015382352941176473,
113
+ "loss": 0.7733,
114
  "step": 160
115
  },
116
  {
117
  "epoch": 5.0,
118
+ "learning_rate": 0.00015088235294117647,
119
+ "loss": 0.5113,
120
  "step": 170
121
  },
122
  {
123
  "epoch": 5.29,
124
+ "learning_rate": 0.00014794117647058826,
125
+ "loss": 0.7088,
126
  "step": 180
127
  },
128
  {
129
  "epoch": 5.59,
130
+ "learning_rate": 0.000145,
131
+ "loss": 0.6652,
132
  "step": 190
133
  },
134
  {
135
  "epoch": 5.88,
136
+ "learning_rate": 0.00014205882352941177,
137
+ "loss": 0.7184,
138
  "step": 200
139
  },
140
  {
141
  "epoch": 5.88,
142
+ "eval_accuracy": 0.7293233082706767,
143
+ "eval_loss": 0.7875903248786926,
144
+ "eval_runtime": 2.5205,
145
+ "eval_samples_per_second": 105.533,
146
+ "eval_steps_per_second": 13.489,
147
  "step": 200
148
  },
149
  {
150
  "epoch": 6.18,
151
+ "learning_rate": 0.00013911764705882353,
152
+ "loss": 0.9143,
153
  "step": 210
154
  },
155
  {
156
  "epoch": 6.47,
157
+ "learning_rate": 0.0001361764705882353,
158
+ "loss": 0.7709,
159
  "step": 220
160
  },
161
  {
162
  "epoch": 6.76,
163
+ "learning_rate": 0.00013323529411764706,
164
+ "loss": 0.5797,
165
  "step": 230
166
  },
167
  {
168
  "epoch": 7.06,
169
+ "learning_rate": 0.00013029411764705883,
170
+ "loss": 0.5424,
171
  "step": 240
172
  },
173
  {
174
  "epoch": 7.35,
175
+ "learning_rate": 0.0001273529411764706,
176
+ "loss": 0.6525,
177
  "step": 250
178
  },
179
  {
180
  "epoch": 7.65,
181
+ "learning_rate": 0.00012441176470588236,
182
+ "loss": 0.3516,
183
  "step": 260
184
  },
185
  {
186
  "epoch": 7.94,
187
+ "learning_rate": 0.00012147058823529412,
188
+ "loss": 0.4829,
189
  "step": 270
190
  },
191
  {
192
  "epoch": 8.24,
193
+ "learning_rate": 0.00011852941176470589,
194
+ "loss": 0.5154,
195
  "step": 280
196
  },
197
  {
198
  "epoch": 8.53,
199
+ "learning_rate": 0.00011558823529411764,
200
+ "loss": 0.3485,
201
  "step": 290
202
  },
203
  {
204
  "epoch": 8.82,
205
+ "learning_rate": 0.00011264705882352942,
206
+ "loss": 0.5299,
207
  "step": 300
208
  },
209
  {
210
  "epoch": 8.82,
211
+ "eval_accuracy": 0.8609022556390977,
212
+ "eval_loss": 0.5183172821998596,
213
+ "eval_runtime": 2.8712,
214
+ "eval_samples_per_second": 92.644,
215
+ "eval_steps_per_second": 11.842,
216
  "step": 300
217
  },
218
  {
219
  "epoch": 9.12,
220
+ "learning_rate": 0.0001097058823529412,
221
+ "loss": 0.5263,
222
  "step": 310
223
  },
224
  {
225
  "epoch": 9.41,
226
+ "learning_rate": 0.00010676470588235295,
227
+ "loss": 0.4048,
228
  "step": 320
229
  },
230
  {
231
  "epoch": 9.71,
232
+ "learning_rate": 0.00010382352941176472,
233
+ "loss": 0.4386,
234
  "step": 330
235
  },
236
  {
237
  "epoch": 10.0,
238
+ "learning_rate": 0.00010088235294117648,
239
+ "loss": 0.4728,
240
  "step": 340
241
  },
242
  {
243
  "epoch": 10.29,
244
+ "learning_rate": 9.794117647058824e-05,
245
+ "loss": 0.379,
246
  "step": 350
247
  },
248
  {
249
  "epoch": 10.59,
250
+ "learning_rate": 9.5e-05,
251
+ "loss": 0.4361,
252
  "step": 360
253
  },
254
  {
255
  "epoch": 10.88,
256
+ "learning_rate": 9.205882352941177e-05,
257
+ "loss": 0.2929,
258
  "step": 370
259
  },
260
  {
261
  "epoch": 11.18,
262
+ "learning_rate": 8.911764705882354e-05,
263
+ "loss": 0.2848,
264
  "step": 380
265
  },
266
  {
267
  "epoch": 11.47,
268
+ "learning_rate": 8.61764705882353e-05,
269
+ "loss": 0.4282,
270
  "step": 390
271
  },
272
  {
273
  "epoch": 11.76,
274
+ "learning_rate": 8.323529411764707e-05,
275
+ "loss": 0.3991,
276
  "step": 400
277
  },
278
  {
279
  "epoch": 11.76,
280
+ "eval_accuracy": 0.8947368421052632,
281
+ "eval_loss": 0.31206753849983215,
282
+ "eval_runtime": 2.9634,
283
+ "eval_samples_per_second": 89.761,
284
+ "eval_steps_per_second": 11.473,
285
  "step": 400
286
  },
287
  {
288
  "epoch": 12.06,
289
+ "learning_rate": 8.029411764705883e-05,
290
+ "loss": 0.3552,
291
  "step": 410
292
  },
293
  {
294
  "epoch": 12.35,
295
+ "learning_rate": 7.73529411764706e-05,
296
+ "loss": 0.2886,
297
  "step": 420
298
  },
299
  {
300
  "epoch": 12.65,
301
+ "learning_rate": 7.441176470588236e-05,
302
+ "loss": 0.2809,
303
  "step": 430
304
  },
305
  {
306
  "epoch": 12.94,
307
+ "learning_rate": 7.147058823529412e-05,
308
+ "loss": 0.3627,
309
  "step": 440
310
  },
311
  {
312
  "epoch": 13.24,
313
+ "learning_rate": 6.852941176470589e-05,
314
+ "loss": 0.3137,
315
  "step": 450
316
  },
317
  {
318
  "epoch": 13.53,
319
+ "learning_rate": 6.558823529411765e-05,
320
+ "loss": 0.198,
321
  "step": 460
322
  },
323
  {
324
  "epoch": 13.82,
325
+ "learning_rate": 6.264705882352942e-05,
326
+ "loss": 0.3268,
327
  "step": 470
328
  },
329
  {
330
  "epoch": 14.12,
331
+ "learning_rate": 5.970588235294118e-05,
332
+ "loss": 0.256,
333
  "step": 480
334
  },
335
  {
336
  "epoch": 14.41,
337
+ "learning_rate": 5.676470588235294e-05,
338
+ "loss": 0.2544,
339
  "step": 490
340
  },
341
  {
342
  "epoch": 14.71,
343
+ "learning_rate": 5.382352941176471e-05,
344
+ "loss": 0.2263,
345
  "step": 500
346
  },
347
  {
348
  "epoch": 14.71,
349
+ "eval_accuracy": 0.9548872180451128,
350
+ "eval_loss": 0.13367173075675964,
351
+ "eval_runtime": 2.4729,
352
+ "eval_samples_per_second": 107.566,
353
+ "eval_steps_per_second": 13.749,
354
  "step": 500
355
  },
356
  {
357
  "epoch": 15.0,
358
+ "learning_rate": 5.088235294117647e-05,
359
+ "loss": 0.4011,
360
  "step": 510
361
  },
362
  {
363
  "epoch": 15.29,
364
+ "learning_rate": 4.794117647058824e-05,
365
+ "loss": 0.2051,
366
  "step": 520
367
  },
368
  {
369
  "epoch": 15.59,
370
+ "learning_rate": 4.5e-05,
371
+ "loss": 0.2563,
372
  "step": 530
373
  },
374
  {
375
  "epoch": 15.88,
376
+ "learning_rate": 4.205882352941177e-05,
377
+ "loss": 0.2428,
378
  "step": 540
379
  },
380
  {
381
  "epoch": 16.18,
382
+ "learning_rate": 3.911764705882353e-05,
383
+ "loss": 0.189,
384
  "step": 550
385
  },
386
  {
387
  "epoch": 16.47,
388
+ "learning_rate": 3.61764705882353e-05,
389
+ "loss": 0.19,
390
  "step": 560
391
  },
392
  {
393
  "epoch": 16.76,
394
+ "learning_rate": 3.3235294117647056e-05,
395
+ "loss": 0.1696,
396
  "step": 570
397
  },
398
  {
399
  "epoch": 17.06,
400
+ "learning_rate": 3.0294117647058824e-05,
401
+ "loss": 0.1336,
402
  "step": 580
403
  },
404
  {
405
  "epoch": 17.35,
406
+ "learning_rate": 2.7352941176470593e-05,
407
+ "loss": 0.1532,
408
  "step": 590
409
  },
410
  {
411
  "epoch": 17.65,
412
+ "learning_rate": 2.4411764705882354e-05,
413
+ "loss": 0.1782,
414
  "step": 600
415
  },
416
  {
417
  "epoch": 17.65,
418
+ "eval_accuracy": 0.9624060150375939,
419
+ "eval_loss": 0.0946127399802208,
420
+ "eval_runtime": 2.8201,
421
+ "eval_samples_per_second": 94.322,
422
+ "eval_steps_per_second": 12.056,
423
  "step": 600
424
  },
425
  {
426
  "epoch": 17.94,
427
+ "learning_rate": 2.1470588235294116e-05,
428
+ "loss": 0.1511,
429
  "step": 610
430
  },
431
  {
432
  "epoch": 18.24,
433
+ "learning_rate": 1.8529411764705884e-05,
434
+ "loss": 0.0837,
435
  "step": 620
436
  },
437
  {
438
  "epoch": 18.53,
439
+ "learning_rate": 1.558823529411765e-05,
440
+ "loss": 0.1084,
441
  "step": 630
442
  },
443
  {
444
  "epoch": 18.82,
445
+ "learning_rate": 1.2647058823529412e-05,
446
+ "loss": 0.1313,
447
  "step": 640
448
  },
449
  {
450
  "epoch": 19.12,
451
+ "learning_rate": 9.705882352941177e-06,
452
+ "loss": 0.1135,
453
  "step": 650
454
  },
455
  {
456
  "epoch": 19.41,
457
+ "learning_rate": 6.7647058823529414e-06,
458
+ "loss": 0.0844,
459
  "step": 660
460
  },
461
  {
462
  "epoch": 19.71,
463
+ "learning_rate": 3.823529411764706e-06,
464
+ "loss": 0.1765,
465
  "step": 670
466
  },
467
  {
468
  "epoch": 20.0,
469
+ "learning_rate": 8.823529411764706e-07,
470
+ "loss": 0.1745,
471
  "step": 680
472
  },
473
  {
474
  "epoch": 20.0,
475
  "step": 680,
476
+ "total_flos": 1.6434612963230515e+18,
477
+ "train_loss": 0.5515072485103326,
478
+ "train_runtime": 465.2023,
479
+ "train_samples_per_second": 45.658,
480
+ "train_steps_per_second": 1.462
481
  }
482
  ],
483
  "logging_steps": 10,
484
  "max_steps": 680,
485
  "num_train_epochs": 20,
486
  "save_steps": 100,
487
+ "total_flos": 1.6434612963230515e+18,
488
  "trial_name": null,
489
  "trial_params": null
490
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22a2c928be698f630b880bb20978029c7af153c01476125f0fdff83c1eb2efab
3
  size 4600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7453b4d852d9cd2e5e3323177f837efc3e7f9c2f92828be34bc2ab59e6d6d25a
3
  size 4600