JorgeGIT commited on
Commit
b9c0f1c
1 Parent(s): 86af773

Model save

Browse files
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  license: apache-2.0
3
- base_model: facebook/deit-base-distilled-patch16-224
4
  tags:
5
  - generated_from_trainer
6
  datasets:
@@ -22,7 +22,7 @@ model-index:
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
- value: 0.9661654135338346
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -30,10 +30,10 @@ should probably proofread and complete it, then remove this comment. -->
30
 
31
  # finetuned-Leukemia-cell
32
 
33
- This model is a fine-tuned version of [facebook/deit-base-distilled-patch16-224](https://huggingface.co/facebook/deit-base-distilled-patch16-224) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 0.0956
36
- - Accuracy: 0.9662
37
 
38
  ## Model description
39
 
@@ -65,12 +65,12 @@ The following hyperparameters were used during training:
65
 
66
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
67
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
68
- | 0.3464 | 2.94 | 100 | 0.2377 | 0.9286 |
69
- | 0.153 | 5.88 | 200 | 0.2519 | 0.9060 |
70
- | 0.1663 | 8.82 | 300 | 0.1753 | 0.9398 |
71
- | 0.0962 | 11.76 | 400 | 0.2009 | 0.9398 |
72
- | 0.0612 | 14.71 | 500 | 0.1348 | 0.9586 |
73
- | 0.0341 | 17.65 | 600 | 0.0956 | 0.9662 |
74
 
75
 
76
  ### Framework versions
 
1
  ---
2
  license: apache-2.0
3
+ base_model: microsoft/beit-base-patch16-224-pt22k
4
  tags:
5
  - generated_from_trainer
6
  datasets:
 
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
+ value: 0.9624060150375939
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
30
 
31
  # finetuned-Leukemia-cell
32
 
33
+ This model is a fine-tuned version of [microsoft/beit-base-patch16-224-pt22k](https://huggingface.co/microsoft/beit-base-patch16-224-pt22k) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 0.0946
36
+ - Accuracy: 0.9624
37
 
38
  ## Model description
39
 
 
65
 
66
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
67
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
68
+ | 0.9733 | 2.94 | 100 | 0.8894 | 0.7256 |
69
+ | 0.7184 | 5.88 | 200 | 0.7876 | 0.7293 |
70
+ | 0.5299 | 8.82 | 300 | 0.5183 | 0.8609 |
71
+ | 0.3991 | 11.76 | 400 | 0.3121 | 0.8947 |
72
+ | 0.2263 | 14.71 | 500 | 0.1337 | 0.9549 |
73
+ | 0.1782 | 17.65 | 600 | 0.0946 | 0.9624 |
74
 
75
 
76
  ### Framework versions
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 10.0,
3
- "eval_accuracy": 0.9774436090225563,
4
- "eval_loss": 0.08701933920383453,
5
- "eval_runtime": 6.074,
6
- "eval_samples_per_second": 43.794,
7
- "eval_steps_per_second": 5.598,
8
- "total_flos": 8.230018095787622e+17,
9
- "train_loss": 0.1465345554492053,
10
- "train_runtime": 221.0307,
11
- "train_samples_per_second": 48.048,
12
- "train_steps_per_second": 1.538
13
  }
 
1
  {
2
+ "epoch": 20.0,
3
+ "eval_accuracy": 0.9661654135338346,
4
+ "eval_loss": 0.09556835144758224,
5
+ "eval_runtime": 3.1465,
6
+ "eval_samples_per_second": 84.539,
7
+ "eval_steps_per_second": 10.806,
8
+ "total_flos": 1.646033084688384e+18,
9
+ "train_loss": 0.19354178275474732,
10
+ "train_runtime": 444.6785,
11
+ "train_samples_per_second": 47.765,
12
+ "train_steps_per_second": 1.529
13
  }
config.json CHANGED
@@ -1,10 +1,14 @@
1
  {
2
- "_name_or_path": "facebook/deit-base-distilled-patch16-224",
3
  "architectures": [
4
- "DeiTForImageClassification"
5
  ],
6
  "attention_probs_dropout_prob": 0.0,
7
- "encoder_stride": 16,
 
 
 
 
8
  "hidden_act": "gelu",
9
  "hidden_dropout_prob": 0.0,
10
  "hidden_size": 768,
@@ -30,13 +34,33 @@
30
  "trico": "6"
31
  },
32
  "layer_norm_eps": 1e-12,
33
- "model_type": "deit",
 
34
  "num_attention_heads": 12,
35
  "num_channels": 3,
36
  "num_hidden_layers": 12,
 
 
 
 
 
 
37
  "patch_size": 16,
 
 
 
 
 
 
38
  "problem_type": "single_label_classification",
39
- "qkv_bias": true,
40
  "torch_dtype": "float32",
41
- "transformers_version": "4.35.2"
 
 
 
 
 
 
 
42
  }
 
1
  {
2
+ "_name_or_path": "microsoft/beit-base-patch16-224-pt22k",
3
  "architectures": [
4
+ "BeitForImageClassification"
5
  ],
6
  "attention_probs_dropout_prob": 0.0,
7
+ "auxiliary_channels": 256,
8
+ "auxiliary_concat_input": false,
9
+ "auxiliary_loss_weight": 0.4,
10
+ "auxiliary_num_convs": 1,
11
+ "drop_path_rate": 0.1,
12
  "hidden_act": "gelu",
13
  "hidden_dropout_prob": 0.0,
14
  "hidden_size": 768,
 
34
  "trico": "6"
35
  },
36
  "layer_norm_eps": 1e-12,
37
+ "layer_scale_init_value": 0.1,
38
+ "model_type": "beit",
39
  "num_attention_heads": 12,
40
  "num_channels": 3,
41
  "num_hidden_layers": 12,
42
+ "out_indices": [
43
+ 3,
44
+ 5,
45
+ 7,
46
+ 11
47
+ ],
48
  "patch_size": 16,
49
+ "pool_scales": [
50
+ 1,
51
+ 2,
52
+ 3,
53
+ 6
54
+ ],
55
  "problem_type": "single_label_classification",
56
+ "semantic_loss_ignore_index": 255,
57
  "torch_dtype": "float32",
58
+ "transformers_version": "4.35.2",
59
+ "use_absolute_position_embeddings": false,
60
+ "use_auxiliary_head": true,
61
+ "use_mask_token": true,
62
+ "use_mean_pooling": true,
63
+ "use_relative_position_bias": false,
64
+ "use_shared_relative_position_bias": true,
65
+ "vocab_size": 8192
66
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 10.0,
3
- "eval_accuracy": 0.9774436090225563,
4
- "eval_loss": 0.08701933920383453,
5
- "eval_runtime": 6.074,
6
- "eval_samples_per_second": 43.794,
7
- "eval_steps_per_second": 5.598
8
  }
 
1
  {
2
+ "epoch": 20.0,
3
+ "eval_accuracy": 0.9661654135338346,
4
+ "eval_loss": 0.09556835144758224,
5
+ "eval_runtime": 3.1465,
6
+ "eval_samples_per_second": 84.539,
7
+ "eval_steps_per_second": 10.806
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6776e9b0b9e3c3eb96201cecbd325ce233c78bbcd9bb45487d90adf02d5aa37c
3
- size 343245796
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae4a6b3a2562aa63e1abd081ac90bbba7f0bef4b41d3240d4d8e328a08ced51d
3
+ size 342710540
preprocessor_config.json CHANGED
@@ -3,26 +3,27 @@
3
  "height": 224,
4
  "width": 224
5
  },
6
- "do_center_crop": true,
7
  "do_normalize": true,
 
8
  "do_rescale": true,
9
  "do_resize": true,
10
- "feature_extractor_type": "DeiTFeatureExtractor",
11
  "image_mean": [
12
- 0.485,
13
- 0.456,
14
- 0.406
15
  ],
16
- "image_processor_type": "DeiTFeatureExtractor",
17
  "image_std": [
18
- 0.229,
19
- 0.224,
20
- 0.225
21
  ],
22
- "resample": 3,
23
  "rescale_factor": 0.00392156862745098,
24
  "size": {
25
- "height": 256,
26
- "width": 256
27
  }
28
  }
 
3
  "height": 224,
4
  "width": 224
5
  },
6
+ "do_center_crop": false,
7
  "do_normalize": true,
8
+ "do_reduce_labels": false,
9
  "do_rescale": true,
10
  "do_resize": true,
11
+ "feature_extractor_type": "BeitFeatureExtractor",
12
  "image_mean": [
13
+ 0.5,
14
+ 0.5,
15
+ 0.5
16
  ],
17
+ "image_processor_type": "BeitImageProcessor",
18
  "image_std": [
19
+ 0.5,
20
+ 0.5,
21
+ 0.5
22
  ],
23
+ "resample": 2,
24
  "rescale_factor": 0.00392156862745098,
25
  "size": {
26
+ "height": 224,
27
+ "width": 224
28
  }
29
  }
runs/Dec01_13-00-42_610b2a9400b8/events.out.tfevents.1701436123.610b2a9400b8.3653.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dcda7f2804c809ef20157f7ba1b65ba909b365c9a571bcbba6a9c1770079a26
3
+ size 411
runs/Dec01_13-11-56_610b2a9400b8/events.out.tfevents.1701436344.610b2a9400b8.3653.5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2baa2cec02fcb787ee9a6c9dadaaf45a1a7ec0f1e0b33a8d1d013e7496cba707
3
+ size 17923
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 10.0,
3
- "total_flos": 8.230018095787622e+17,
4
- "train_loss": 0.1465345554492053,
5
- "train_runtime": 221.0307,
6
- "train_samples_per_second": 48.048,
7
- "train_steps_per_second": 1.538
8
  }
 
1
  {
2
+ "epoch": 20.0,
3
+ "total_flos": 1.646033084688384e+18,
4
+ "train_loss": 0.19354178275474732,
5
+ "train_runtime": 444.6785,
6
+ "train_samples_per_second": 47.765,
7
+ "train_steps_per_second": 1.529
8
  }
trainer_state.json CHANGED
@@ -1,259 +1,490 @@
1
  {
2
- "best_metric": 0.08701933920383453,
3
- "best_model_checkpoint": "finetuned-Leukemia-cell/checkpoint-300",
4
- "epoch": 10.0,
5
  "eval_steps": 100,
6
- "global_step": 340,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.29,
13
- "learning_rate": 0.00019411764705882354,
14
- "loss": 0.5837,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.59,
19
- "learning_rate": 0.00018823529411764707,
20
- "loss": 0.2956,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.88,
25
- "learning_rate": 0.0001823529411764706,
26
- "loss": 0.279,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 1.18,
31
- "learning_rate": 0.00017647058823529413,
32
- "loss": 0.1566,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 1.47,
37
- "learning_rate": 0.00017058823529411766,
38
- "loss": 0.1042,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 1.76,
43
- "learning_rate": 0.0001647058823529412,
44
- "loss": 0.176,
45
  "step": 60
46
  },
47
  {
48
  "epoch": 2.06,
49
- "learning_rate": 0.0001588235294117647,
50
- "loss": 0.1231,
51
  "step": 70
52
  },
53
  {
54
  "epoch": 2.35,
55
- "learning_rate": 0.00015294117647058822,
56
- "loss": 0.0952,
57
  "step": 80
58
  },
59
  {
60
  "epoch": 2.65,
61
- "learning_rate": 0.00014705882352941178,
62
- "loss": 0.1003,
63
  "step": 90
64
  },
65
  {
66
  "epoch": 2.94,
67
- "learning_rate": 0.0001411764705882353,
68
- "loss": 0.1606,
69
  "step": 100
70
  },
71
  {
72
  "epoch": 2.94,
73
- "eval_accuracy": 0.9511278195488722,
74
- "eval_loss": 0.18294382095336914,
75
- "eval_runtime": 2.8739,
76
- "eval_samples_per_second": 92.556,
77
- "eval_steps_per_second": 11.83,
78
  "step": 100
79
  },
80
  {
81
  "epoch": 3.24,
82
- "learning_rate": 0.00013529411764705884,
83
- "loss": 0.2204,
84
  "step": 110
85
  },
86
  {
87
  "epoch": 3.53,
88
- "learning_rate": 0.00012941176470588237,
89
- "loss": 0.2845,
90
  "step": 120
91
  },
92
  {
93
  "epoch": 3.82,
94
- "learning_rate": 0.0001235294117647059,
95
- "loss": 0.2104,
96
  "step": 130
97
  },
98
  {
99
  "epoch": 4.12,
100
- "learning_rate": 0.00011764705882352942,
101
- "loss": 0.2029,
102
  "step": 140
103
  },
104
  {
105
  "epoch": 4.41,
106
- "learning_rate": 0.00011176470588235294,
107
- "loss": 0.1105,
108
  "step": 150
109
  },
110
  {
111
  "epoch": 4.71,
112
- "learning_rate": 0.00010588235294117647,
113
- "loss": 0.177,
114
  "step": 160
115
  },
116
  {
117
  "epoch": 5.0,
118
- "learning_rate": 0.0001,
119
- "loss": 0.1881,
120
  "step": 170
121
  },
122
  {
123
  "epoch": 5.29,
124
- "learning_rate": 9.411764705882353e-05,
125
- "loss": 0.1445,
126
  "step": 180
127
  },
128
  {
129
  "epoch": 5.59,
130
- "learning_rate": 8.823529411764706e-05,
131
- "loss": 0.0891,
132
  "step": 190
133
  },
134
  {
135
  "epoch": 5.88,
136
- "learning_rate": 8.23529411764706e-05,
137
- "loss": 0.1895,
138
  "step": 200
139
  },
140
  {
141
  "epoch": 5.88,
142
- "eval_accuracy": 0.9661654135338346,
143
- "eval_loss": 0.14408943057060242,
144
- "eval_runtime": 2.3757,
145
- "eval_samples_per_second": 111.968,
146
- "eval_steps_per_second": 14.312,
147
  "step": 200
148
  },
149
  {
150
  "epoch": 6.18,
151
- "learning_rate": 7.647058823529411e-05,
152
- "loss": 0.1095,
153
  "step": 210
154
  },
155
  {
156
  "epoch": 6.47,
157
- "learning_rate": 7.058823529411765e-05,
158
- "loss": 0.1908,
159
  "step": 220
160
  },
161
  {
162
  "epoch": 6.76,
163
- "learning_rate": 6.470588235294118e-05,
164
- "loss": 0.1118,
165
  "step": 230
166
  },
167
  {
168
  "epoch": 7.06,
169
- "learning_rate": 5.882352941176471e-05,
170
- "loss": 0.0461,
171
  "step": 240
172
  },
173
  {
174
  "epoch": 7.35,
175
- "learning_rate": 5.294117647058824e-05,
176
- "loss": 0.1002,
177
  "step": 250
178
  },
179
  {
180
  "epoch": 7.65,
181
- "learning_rate": 4.705882352941177e-05,
182
- "loss": 0.082,
183
  "step": 260
184
  },
185
  {
186
  "epoch": 7.94,
187
- "learning_rate": 4.11764705882353e-05,
188
- "loss": 0.0712,
189
  "step": 270
190
  },
191
  {
192
  "epoch": 8.24,
193
- "learning_rate": 3.529411764705883e-05,
194
- "loss": 0.0565,
195
  "step": 280
196
  },
197
  {
198
  "epoch": 8.53,
199
- "learning_rate": 2.9411764705882354e-05,
200
- "loss": 0.0702,
201
  "step": 290
202
  },
203
  {
204
  "epoch": 8.82,
205
- "learning_rate": 2.3529411764705884e-05,
206
- "loss": 0.0423,
207
  "step": 300
208
  },
209
  {
210
  "epoch": 8.82,
211
- "eval_accuracy": 0.9774436090225563,
212
- "eval_loss": 0.08701933920383453,
213
- "eval_runtime": 2.414,
214
- "eval_samples_per_second": 110.19,
215
- "eval_steps_per_second": 14.084,
216
  "step": 300
217
  },
218
  {
219
  "epoch": 9.12,
220
- "learning_rate": 1.7647058823529414e-05,
221
- "loss": 0.0652,
222
  "step": 310
223
  },
224
  {
225
  "epoch": 9.41,
226
- "learning_rate": 1.1764705882352942e-05,
227
- "loss": 0.027,
228
  "step": 320
229
  },
230
  {
231
  "epoch": 9.71,
232
- "learning_rate": 5.882352941176471e-06,
233
- "loss": 0.0633,
234
  "step": 330
235
  },
236
  {
237
  "epoch": 10.0,
238
- "learning_rate": 0.0,
239
- "loss": 0.055,
240
  "step": 340
241
  },
242
  {
243
- "epoch": 10.0,
244
- "step": 340,
245
- "total_flos": 8.230018095787622e+17,
246
- "train_loss": 0.1465345554492053,
247
- "train_runtime": 221.0307,
248
- "train_samples_per_second": 48.048,
249
- "train_steps_per_second": 1.538
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
  }
251
  ],
252
  "logging_steps": 10,
253
- "max_steps": 340,
254
- "num_train_epochs": 10,
255
  "save_steps": 100,
256
- "total_flos": 8.230018095787622e+17,
257
  "trial_name": null,
258
  "trial_params": null
259
  }
 
1
  {
2
+ "best_metric": 0.09556835144758224,
3
+ "best_model_checkpoint": "finetuned-Leukemia-cell/checkpoint-600",
4
+ "epoch": 20.0,
5
  "eval_steps": 100,
6
+ "global_step": 680,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.29,
13
+ "learning_rate": 0.00019764705882352942,
14
+ "loss": 1.7578,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.59,
19
+ "learning_rate": 0.0001947058823529412,
20
+ "loss": 1.016,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.88,
25
+ "learning_rate": 0.00019176470588235295,
26
+ "loss": 0.9006,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 1.18,
31
+ "learning_rate": 0.00018882352941176472,
32
+ "loss": 0.7129,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 1.47,
37
+ "learning_rate": 0.00018588235294117648,
38
+ "loss": 0.3781,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 1.76,
43
+ "learning_rate": 0.00018294117647058825,
44
+ "loss": 0.4442,
45
  "step": 60
46
  },
47
  {
48
  "epoch": 2.06,
49
+ "learning_rate": 0.00018,
50
+ "loss": 0.3716,
51
  "step": 70
52
  },
53
  {
54
  "epoch": 2.35,
55
+ "learning_rate": 0.00017705882352941178,
56
+ "loss": 0.371,
57
  "step": 80
58
  },
59
  {
60
  "epoch": 2.65,
61
+ "learning_rate": 0.00017411764705882354,
62
+ "loss": 0.3183,
63
  "step": 90
64
  },
65
  {
66
  "epoch": 2.94,
67
+ "learning_rate": 0.0001711764705882353,
68
+ "loss": 0.3464,
69
  "step": 100
70
  },
71
  {
72
  "epoch": 2.94,
73
+ "eval_accuracy": 0.9285714285714286,
74
+ "eval_loss": 0.2377004772424698,
75
+ "eval_runtime": 103.2565,
76
+ "eval_samples_per_second": 2.576,
77
+ "eval_steps_per_second": 0.329,
78
  "step": 100
79
  },
80
  {
81
  "epoch": 3.24,
82
+ "learning_rate": 0.00016823529411764707,
83
+ "loss": 0.2195,
84
  "step": 110
85
  },
86
  {
87
  "epoch": 3.53,
88
+ "learning_rate": 0.0001652941176470588,
89
+ "loss": 0.3354,
90
  "step": 120
91
  },
92
  {
93
  "epoch": 3.82,
94
+ "learning_rate": 0.0001623529411764706,
95
+ "loss": 0.3365,
96
  "step": 130
97
  },
98
  {
99
  "epoch": 4.12,
100
+ "learning_rate": 0.00015941176470588237,
101
+ "loss": 0.2152,
102
  "step": 140
103
  },
104
  {
105
  "epoch": 4.41,
106
+ "learning_rate": 0.00015647058823529413,
107
+ "loss": 0.3078,
108
  "step": 150
109
  },
110
  {
111
  "epoch": 4.71,
112
+ "learning_rate": 0.0001535294117647059,
113
+ "loss": 0.2712,
114
  "step": 160
115
  },
116
  {
117
  "epoch": 5.0,
118
+ "learning_rate": 0.00015058823529411766,
119
+ "loss": 0.133,
120
  "step": 170
121
  },
122
  {
123
  "epoch": 5.29,
124
+ "learning_rate": 0.00014764705882352943,
125
+ "loss": 0.3753,
126
  "step": 180
127
  },
128
  {
129
  "epoch": 5.59,
130
+ "learning_rate": 0.0001447058823529412,
131
+ "loss": 0.2242,
132
  "step": 190
133
  },
134
  {
135
  "epoch": 5.88,
136
+ "learning_rate": 0.00014176470588235296,
137
+ "loss": 0.153,
138
  "step": 200
139
  },
140
  {
141
  "epoch": 5.88,
142
+ "eval_accuracy": 0.9060150375939849,
143
+ "eval_loss": 0.2518657147884369,
144
+ "eval_runtime": 2.626,
145
+ "eval_samples_per_second": 101.294,
146
+ "eval_steps_per_second": 12.947,
147
  "step": 200
148
  },
149
  {
150
  "epoch": 6.18,
151
+ "learning_rate": 0.00013882352941176472,
152
+ "loss": 0.2496,
153
  "step": 210
154
  },
155
  {
156
  "epoch": 6.47,
157
+ "learning_rate": 0.00013588235294117649,
158
+ "loss": 0.2584,
159
  "step": 220
160
  },
161
  {
162
  "epoch": 6.76,
163
+ "learning_rate": 0.00013294117647058822,
164
+ "loss": 0.1437,
165
  "step": 230
166
  },
167
  {
168
  "epoch": 7.06,
169
+ "learning_rate": 0.00013000000000000002,
170
+ "loss": 0.1991,
171
  "step": 240
172
  },
173
  {
174
  "epoch": 7.35,
175
+ "learning_rate": 0.00012705882352941175,
176
+ "loss": 0.1776,
177
  "step": 250
178
  },
179
  {
180
  "epoch": 7.65,
181
+ "learning_rate": 0.00012411764705882355,
182
+ "loss": 0.133,
183
  "step": 260
184
  },
185
  {
186
  "epoch": 7.94,
187
+ "learning_rate": 0.0001211764705882353,
188
+ "loss": 0.1119,
189
  "step": 270
190
  },
191
  {
192
  "epoch": 8.24,
193
+ "learning_rate": 0.00011823529411764706,
194
+ "loss": 0.087,
195
  "step": 280
196
  },
197
  {
198
  "epoch": 8.53,
199
+ "learning_rate": 0.00011529411764705881,
200
+ "loss": 0.0795,
201
  "step": 290
202
  },
203
  {
204
  "epoch": 8.82,
205
+ "learning_rate": 0.00011235294117647059,
206
+ "loss": 0.1663,
207
  "step": 300
208
  },
209
  {
210
  "epoch": 8.82,
211
+ "eval_accuracy": 0.9398496240601504,
212
+ "eval_loss": 0.1752895563840866,
213
+ "eval_runtime": 2.5567,
214
+ "eval_samples_per_second": 104.042,
215
+ "eval_steps_per_second": 13.299,
216
  "step": 300
217
  },
218
  {
219
  "epoch": 9.12,
220
+ "learning_rate": 0.00010941176470588237,
221
+ "loss": 0.1149,
222
  "step": 310
223
  },
224
  {
225
  "epoch": 9.41,
226
+ "learning_rate": 0.00010647058823529412,
227
+ "loss": 0.1415,
228
  "step": 320
229
  },
230
  {
231
  "epoch": 9.71,
232
+ "learning_rate": 0.0001035294117647059,
233
+ "loss": 0.0998,
234
  "step": 330
235
  },
236
  {
237
  "epoch": 10.0,
238
+ "learning_rate": 0.00010058823529411765,
239
+ "loss": 0.1299,
240
  "step": 340
241
  },
242
  {
243
+ "epoch": 10.29,
244
+ "learning_rate": 9.764705882352942e-05,
245
+ "loss": 0.1028,
246
+ "step": 350
247
+ },
248
+ {
249
+ "epoch": 10.59,
250
+ "learning_rate": 9.470588235294118e-05,
251
+ "loss": 0.0636,
252
+ "step": 360
253
+ },
254
+ {
255
+ "epoch": 10.88,
256
+ "learning_rate": 9.176470588235295e-05,
257
+ "loss": 0.1064,
258
+ "step": 370
259
+ },
260
+ {
261
+ "epoch": 11.18,
262
+ "learning_rate": 8.882352941176471e-05,
263
+ "loss": 0.1138,
264
+ "step": 380
265
+ },
266
+ {
267
+ "epoch": 11.47,
268
+ "learning_rate": 8.588235294117646e-05,
269
+ "loss": 0.0469,
270
+ "step": 390
271
+ },
272
+ {
273
+ "epoch": 11.76,
274
+ "learning_rate": 8.294117647058824e-05,
275
+ "loss": 0.0962,
276
+ "step": 400
277
+ },
278
+ {
279
+ "epoch": 11.76,
280
+ "eval_accuracy": 0.9398496240601504,
281
+ "eval_loss": 0.20087337493896484,
282
+ "eval_runtime": 2.1402,
283
+ "eval_samples_per_second": 124.286,
284
+ "eval_steps_per_second": 15.886,
285
+ "step": 400
286
+ },
287
+ {
288
+ "epoch": 12.06,
289
+ "learning_rate": 8e-05,
290
+ "loss": 0.1099,
291
+ "step": 410
292
+ },
293
+ {
294
+ "epoch": 12.35,
295
+ "learning_rate": 7.705882352941177e-05,
296
+ "loss": 0.0475,
297
+ "step": 420
298
+ },
299
+ {
300
+ "epoch": 12.65,
301
+ "learning_rate": 7.411764705882354e-05,
302
+ "loss": 0.0448,
303
+ "step": 430
304
+ },
305
+ {
306
+ "epoch": 12.94,
307
+ "learning_rate": 7.11764705882353e-05,
308
+ "loss": 0.0782,
309
+ "step": 440
310
+ },
311
+ {
312
+ "epoch": 13.24,
313
+ "learning_rate": 6.823529411764707e-05,
314
+ "loss": 0.0543,
315
+ "step": 450
316
+ },
317
+ {
318
+ "epoch": 13.53,
319
+ "learning_rate": 6.529411764705883e-05,
320
+ "loss": 0.0252,
321
+ "step": 460
322
+ },
323
+ {
324
+ "epoch": 13.82,
325
+ "learning_rate": 6.23529411764706e-05,
326
+ "loss": 0.0517,
327
+ "step": 470
328
+ },
329
+ {
330
+ "epoch": 14.12,
331
+ "learning_rate": 5.9411764705882355e-05,
332
+ "loss": 0.0179,
333
+ "step": 480
334
+ },
335
+ {
336
+ "epoch": 14.41,
337
+ "learning_rate": 5.647058823529412e-05,
338
+ "loss": 0.0661,
339
+ "step": 490
340
+ },
341
+ {
342
+ "epoch": 14.71,
343
+ "learning_rate": 5.3529411764705884e-05,
344
+ "loss": 0.0612,
345
+ "step": 500
346
+ },
347
+ {
348
+ "epoch": 14.71,
349
+ "eval_accuracy": 0.9586466165413534,
350
+ "eval_loss": 0.13481324911117554,
351
+ "eval_runtime": 2.1446,
352
+ "eval_samples_per_second": 124.03,
353
+ "eval_steps_per_second": 15.853,
354
+ "step": 500
355
+ },
356
+ {
357
+ "epoch": 15.0,
358
+ "learning_rate": 5.058823529411765e-05,
359
+ "loss": 0.0611,
360
+ "step": 510
361
+ },
362
+ {
363
+ "epoch": 15.29,
364
+ "learning_rate": 4.7647058823529414e-05,
365
+ "loss": 0.0208,
366
+ "step": 520
367
+ },
368
+ {
369
+ "epoch": 15.59,
370
+ "learning_rate": 4.470588235294118e-05,
371
+ "loss": 0.0838,
372
+ "step": 530
373
+ },
374
+ {
375
+ "epoch": 15.88,
376
+ "learning_rate": 4.1764705882352944e-05,
377
+ "loss": 0.0558,
378
+ "step": 540
379
+ },
380
+ {
381
+ "epoch": 16.18,
382
+ "learning_rate": 3.882352941176471e-05,
383
+ "loss": 0.0544,
384
+ "step": 550
385
+ },
386
+ {
387
+ "epoch": 16.47,
388
+ "learning_rate": 3.5882352941176474e-05,
389
+ "loss": 0.0729,
390
+ "step": 560
391
+ },
392
+ {
393
+ "epoch": 16.76,
394
+ "learning_rate": 3.294117647058824e-05,
395
+ "loss": 0.0799,
396
+ "step": 570
397
+ },
398
+ {
399
+ "epoch": 17.06,
400
+ "learning_rate": 3e-05,
401
+ "loss": 0.011,
402
+ "step": 580
403
+ },
404
+ {
405
+ "epoch": 17.35,
406
+ "learning_rate": 2.7058823529411766e-05,
407
+ "loss": 0.0749,
408
+ "step": 590
409
+ },
410
+ {
411
+ "epoch": 17.65,
412
+ "learning_rate": 2.411764705882353e-05,
413
+ "loss": 0.0341,
414
+ "step": 600
415
+ },
416
+ {
417
+ "epoch": 17.65,
418
+ "eval_accuracy": 0.9661654135338346,
419
+ "eval_loss": 0.09556835144758224,
420
+ "eval_runtime": 2.4514,
421
+ "eval_samples_per_second": 108.508,
422
+ "eval_steps_per_second": 13.869,
423
+ "step": 600
424
+ },
425
+ {
426
+ "epoch": 17.94,
427
+ "learning_rate": 2.1176470588235296e-05,
428
+ "loss": 0.0134,
429
+ "step": 610
430
+ },
431
+ {
432
+ "epoch": 18.24,
433
+ "learning_rate": 1.8235294117647057e-05,
434
+ "loss": 0.0339,
435
+ "step": 620
436
+ },
437
+ {
438
+ "epoch": 18.53,
439
+ "learning_rate": 1.5294117647058826e-05,
440
+ "loss": 0.0383,
441
+ "step": 630
442
+ },
443
+ {
444
+ "epoch": 18.82,
445
+ "learning_rate": 1.2352941176470589e-05,
446
+ "loss": 0.0209,
447
+ "step": 640
448
+ },
449
+ {
450
+ "epoch": 19.12,
451
+ "learning_rate": 9.411764705882354e-06,
452
+ "loss": 0.0418,
453
+ "step": 650
454
+ },
455
+ {
456
+ "epoch": 19.41,
457
+ "learning_rate": 6.470588235294119e-06,
458
+ "loss": 0.0431,
459
+ "step": 660
460
+ },
461
+ {
462
+ "epoch": 19.71,
463
+ "learning_rate": 3.5294117647058825e-06,
464
+ "loss": 0.0334,
465
+ "step": 670
466
+ },
467
+ {
468
+ "epoch": 20.0,
469
+ "learning_rate": 5.882352941176471e-07,
470
+ "loss": 0.0208,
471
+ "step": 680
472
+ },
473
+ {
474
+ "epoch": 20.0,
475
+ "step": 680,
476
+ "total_flos": 1.646033084688384e+18,
477
+ "train_loss": 0.19354178275474732,
478
+ "train_runtime": 444.6785,
479
+ "train_samples_per_second": 47.765,
480
+ "train_steps_per_second": 1.529
481
  }
482
  ],
483
  "logging_steps": 10,
484
+ "max_steps": 680,
485
+ "num_train_epochs": 20,
486
  "save_steps": 100,
487
+ "total_flos": 1.646033084688384e+18,
488
  "trial_name": null,
489
  "trial_params": null
490
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dacbff944cbe4f5a09b4fac9f1c809f6343455d2e3919516f15e7fd6ecdc5c74
3
  size 4600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22a2c928be698f630b880bb20978029c7af153c01476125f0fdff83c1eb2efab
3
  size 4600