Raihan004 commited on
Commit
d607ad5
1 Parent(s): 755cb7c

Model save

Browse files
README.md ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: google/vit-base-patch16-224-in21k
4
+ tags:
5
+ - generated_from_trainer
6
+ metrics:
7
+ - accuracy
8
+ model-index:
9
+ - name: Action_all_10_class
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # Action_all_10_class
17
+
18
+ This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on an unknown dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 0.3598
21
+ - Accuracy: 0.9017
22
+
23
+ ## Model description
24
+
25
+ More information needed
26
+
27
+ ## Intended uses & limitations
28
+
29
+ More information needed
30
+
31
+ ## Training and evaluation data
32
+
33
+ More information needed
34
+
35
+ ## Training procedure
36
+
37
+ ### Training hyperparameters
38
+
39
+ The following hyperparameters were used during training:
40
+ - learning_rate: 0.0002
41
+ - train_batch_size: 16
42
+ - eval_batch_size: 8
43
+ - seed: 42
44
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
+ - lr_scheduler_type: linear
46
+ - num_epochs: 5
47
+ - mixed_precision_training: Native AMP
48
+
49
+ ### Training results
50
+
51
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy |
52
+ |:-------------:|:-----:|:----:|:---------------:|:--------:|
53
+ | 1.1524 | 0.4 | 100 | 1.0612 | 0.6952 |
54
+ | 0.9818 | 0.8 | 200 | 0.7399 | 0.7877 |
55
+ | 0.844 | 1.2 | 300 | 0.7196 | 0.7692 |
56
+ | 0.7992 | 1.61 | 400 | 0.5383 | 0.8376 |
57
+ | 0.7203 | 2.01 | 500 | 0.5222 | 0.8390 |
58
+ | 0.5527 | 2.41 | 600 | 0.5394 | 0.8376 |
59
+ | 0.5682 | 2.81 | 700 | 0.4943 | 0.8462 |
60
+ | 0.4238 | 3.21 | 800 | 0.4441 | 0.8675 |
61
+ | 0.4684 | 3.61 | 900 | 0.4113 | 0.8846 |
62
+ | 0.4239 | 4.02 | 1000 | 0.3587 | 0.8946 |
63
+ | 0.4083 | 4.42 | 1100 | 0.3768 | 0.8832 |
64
+ | 0.3541 | 4.82 | 1200 | 0.3598 | 0.9017 |
65
+
66
+
67
+ ### Framework versions
68
+
69
+ - Transformers 4.35.0
70
+ - Pytorch 2.1.0+cu118
71
+ - Datasets 2.14.6
72
+ - Tokenizers 0.14.1
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "total_flos": 1.539101261655982e+18,
4
+ "train_loss": 0.6643937945844658,
5
+ "train_runtime": 748.7697,
6
+ "train_samples_per_second": 26.524,
7
+ "train_steps_per_second": 1.663
8
+ }
config.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/vit-base-patch16-224-in21k",
3
+ "architectures": [
4
+ "ViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "\u0995\u09a5\u09be_\u09ac\u09b2\u09be",
13
+ "1": "\u0995\u09ae\u09cd\u09aa\u09bf\u0989\u099f\u09be\u09b0_\u09ac\u09cd\u09af\u09ac\u09b9\u09be\u09b0_\u0995\u09b0\u09be",
14
+ "2": "\u0996\u09be\u0993\u09af\u09bc\u09be",
15
+ "3": "\u0996\u09c7\u09b2\u09be_\u0995\u09b0\u09be",
16
+ "4": "\u0998\u09c1\u09ae\u09be\u09a8\u09c7\u09be",
17
+ "5": "\u09aa\u09a1\u09bc\u09be",
18
+ "6": "\u09aa\u09be\u09a8_\u0995\u09b0\u09be",
19
+ "7": "\u09b0\u09be\u09a8\u09cd\u09a8\u09be_\u0995\u09b0\u09be",
20
+ "8": "\u09b2\u09c7\u0996\u09be",
21
+ "9": "\u09b9\u09be\u0981\u099f\u09be"
22
+ },
23
+ "image_size": 224,
24
+ "initializer_range": 0.02,
25
+ "intermediate_size": 3072,
26
+ "label2id": {
27
+ "\u0995\u09a5\u09be_\u09ac\u09b2\u09be": "0",
28
+ "\u0995\u09ae\u09cd\u09aa\u09bf\u0989\u099f\u09be\u09b0_\u09ac\u09cd\u09af\u09ac\u09b9\u09be\u09b0_\u0995\u09b0\u09be": "1",
29
+ "\u0996\u09be\u0993\u09af\u09bc\u09be": "2",
30
+ "\u0996\u09c7\u09b2\u09be_\u0995\u09b0\u09be": "3",
31
+ "\u0998\u09c1\u09ae\u09be\u09a8\u09c7\u09be": "4",
32
+ "\u09aa\u09a1\u09bc\u09be": "5",
33
+ "\u09aa\u09be\u09a8_\u0995\u09b0\u09be": "6",
34
+ "\u09b0\u09be\u09a8\u09cd\u09a8\u09be_\u0995\u09b0\u09be": "7",
35
+ "\u09b2\u09c7\u0996\u09be": "8",
36
+ "\u09b9\u09be\u0981\u099f\u09be": "9"
37
+ },
38
+ "layer_norm_eps": 1e-12,
39
+ "model_type": "vit",
40
+ "num_attention_heads": 12,
41
+ "num_channels": 3,
42
+ "num_hidden_layers": 12,
43
+ "patch_size": 16,
44
+ "problem_type": "single_label_classification",
45
+ "qkv_bias": true,
46
+ "torch_dtype": "float32",
47
+ "transformers_version": "4.35.0"
48
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6363d9138987ea57aac64eafb945b3ac0b14d984633e1fb7767c771d2c14ce60
3
+ size 343248584
preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_rescale": true,
4
+ "do_resize": true,
5
+ "image_mean": [
6
+ 0.5,
7
+ 0.5,
8
+ 0.5
9
+ ],
10
+ "image_processor_type": "ViTFeatureExtractor",
11
+ "image_std": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
+ ],
16
+ "resample": 2,
17
+ "rescale_factor": 0.00392156862745098,
18
+ "size": {
19
+ "height": 224,
20
+ "width": 224
21
+ }
22
+ }
runs/Nov06_15-28-47_dfb0c3665f64/events.out.tfevents.1699284576.dfb0c3665f64.13158.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d107ba01bc126303e3030ec6601d7fd8529baef4d485825e1c3bb5a1d42913e4
3
+ size 28930
runs/Nov06_15-48-14_dfb0c3665f64/events.out.tfevents.1699285717.dfb0c3665f64.18450.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ef7591ced63d1eb31098d3cf612684c3e577dfaea1ff781856b4c86a6805e7c
3
+ size 28930
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "total_flos": 1.539101261655982e+18,
4
+ "train_loss": 0.6643937945844658,
5
+ "train_runtime": 748.7697,
6
+ "train_samples_per_second": 26.524,
7
+ "train_steps_per_second": 1.663
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,880 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.39916983246803284,
3
+ "best_model_checkpoint": "Action_all_10_class/checkpoint-1200",
4
+ "epoch": 5.0,
5
+ "eval_steps": 100,
6
+ "global_step": 1245,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.04,
13
+ "learning_rate": 0.00019839357429718877,
14
+ "loss": 2.2294,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.08,
19
+ "learning_rate": 0.00019678714859437752,
20
+ "loss": 2.1036,
21
+ "step": 20
22
+ },
23
+ {
24
+ "epoch": 0.12,
25
+ "learning_rate": 0.00019518072289156628,
26
+ "loss": 1.9019,
27
+ "step": 30
28
+ },
29
+ {
30
+ "epoch": 0.16,
31
+ "learning_rate": 0.00019357429718875504,
32
+ "loss": 1.6396,
33
+ "step": 40
34
+ },
35
+ {
36
+ "epoch": 0.2,
37
+ "learning_rate": 0.00019196787148594377,
38
+ "loss": 1.5942,
39
+ "step": 50
40
+ },
41
+ {
42
+ "epoch": 0.24,
43
+ "learning_rate": 0.00019036144578313252,
44
+ "loss": 1.3722,
45
+ "step": 60
46
+ },
47
+ {
48
+ "epoch": 0.28,
49
+ "learning_rate": 0.00018875502008032128,
50
+ "loss": 1.2927,
51
+ "step": 70
52
+ },
53
+ {
54
+ "epoch": 0.32,
55
+ "learning_rate": 0.00018714859437751004,
56
+ "loss": 1.2947,
57
+ "step": 80
58
+ },
59
+ {
60
+ "epoch": 0.36,
61
+ "learning_rate": 0.0001855421686746988,
62
+ "loss": 1.2353,
63
+ "step": 90
64
+ },
65
+ {
66
+ "epoch": 0.4,
67
+ "learning_rate": 0.00018393574297188755,
68
+ "loss": 1.1348,
69
+ "step": 100
70
+ },
71
+ {
72
+ "epoch": 0.4,
73
+ "eval_accuracy": 0.698005698005698,
74
+ "eval_loss": 1.0964313745498657,
75
+ "eval_runtime": 13.3411,
76
+ "eval_samples_per_second": 52.619,
77
+ "eval_steps_per_second": 6.596,
78
+ "step": 100
79
+ },
80
+ {
81
+ "epoch": 0.44,
82
+ "learning_rate": 0.0001823293172690763,
83
+ "loss": 1.0305,
84
+ "step": 110
85
+ },
86
+ {
87
+ "epoch": 0.48,
88
+ "learning_rate": 0.00018072289156626507,
89
+ "loss": 1.0352,
90
+ "step": 120
91
+ },
92
+ {
93
+ "epoch": 0.52,
94
+ "learning_rate": 0.00017911646586345382,
95
+ "loss": 1.0957,
96
+ "step": 130
97
+ },
98
+ {
99
+ "epoch": 0.56,
100
+ "learning_rate": 0.00017751004016064258,
101
+ "loss": 1.1747,
102
+ "step": 140
103
+ },
104
+ {
105
+ "epoch": 0.6,
106
+ "learning_rate": 0.00017590361445783134,
107
+ "loss": 1.0863,
108
+ "step": 150
109
+ },
110
+ {
111
+ "epoch": 0.64,
112
+ "learning_rate": 0.0001742971887550201,
113
+ "loss": 1.0802,
114
+ "step": 160
115
+ },
116
+ {
117
+ "epoch": 0.68,
118
+ "learning_rate": 0.00017269076305220885,
119
+ "loss": 1.0912,
120
+ "step": 170
121
+ },
122
+ {
123
+ "epoch": 0.72,
124
+ "learning_rate": 0.0001710843373493976,
125
+ "loss": 1.0056,
126
+ "step": 180
127
+ },
128
+ {
129
+ "epoch": 0.76,
130
+ "learning_rate": 0.00016947791164658636,
131
+ "loss": 0.9641,
132
+ "step": 190
133
+ },
134
+ {
135
+ "epoch": 0.8,
136
+ "learning_rate": 0.00016787148594377512,
137
+ "loss": 0.9992,
138
+ "step": 200
139
+ },
140
+ {
141
+ "epoch": 0.8,
142
+ "eval_accuracy": 0.7948717948717948,
143
+ "eval_loss": 0.7362096309661865,
144
+ "eval_runtime": 13.2093,
145
+ "eval_samples_per_second": 53.145,
146
+ "eval_steps_per_second": 6.662,
147
+ "step": 200
148
+ },
149
+ {
150
+ "epoch": 0.84,
151
+ "learning_rate": 0.00016626506024096388,
152
+ "loss": 0.9851,
153
+ "step": 210
154
+ },
155
+ {
156
+ "epoch": 0.88,
157
+ "learning_rate": 0.00016465863453815263,
158
+ "loss": 0.886,
159
+ "step": 220
160
+ },
161
+ {
162
+ "epoch": 0.92,
163
+ "learning_rate": 0.0001630522088353414,
164
+ "loss": 0.9604,
165
+ "step": 230
166
+ },
167
+ {
168
+ "epoch": 0.96,
169
+ "learning_rate": 0.00016144578313253015,
170
+ "loss": 0.7707,
171
+ "step": 240
172
+ },
173
+ {
174
+ "epoch": 1.0,
175
+ "learning_rate": 0.00016,
176
+ "loss": 0.9153,
177
+ "step": 250
178
+ },
179
+ {
180
+ "epoch": 1.04,
181
+ "learning_rate": 0.00015839357429718874,
182
+ "loss": 0.7581,
183
+ "step": 260
184
+ },
185
+ {
186
+ "epoch": 1.08,
187
+ "learning_rate": 0.0001567871485943775,
188
+ "loss": 0.6622,
189
+ "step": 270
190
+ },
191
+ {
192
+ "epoch": 1.12,
193
+ "learning_rate": 0.00015518072289156626,
194
+ "loss": 0.7251,
195
+ "step": 280
196
+ },
197
+ {
198
+ "epoch": 1.16,
199
+ "learning_rate": 0.00015357429718875501,
200
+ "loss": 0.7955,
201
+ "step": 290
202
+ },
203
+ {
204
+ "epoch": 1.2,
205
+ "learning_rate": 0.00015196787148594377,
206
+ "loss": 0.8314,
207
+ "step": 300
208
+ },
209
+ {
210
+ "epoch": 1.2,
211
+ "eval_accuracy": 0.8205128205128205,
212
+ "eval_loss": 0.6410418748855591,
213
+ "eval_runtime": 12.6655,
214
+ "eval_samples_per_second": 55.426,
215
+ "eval_steps_per_second": 6.948,
216
+ "step": 300
217
+ },
218
+ {
219
+ "epoch": 1.24,
220
+ "learning_rate": 0.00015036144578313253,
221
+ "loss": 0.845,
222
+ "step": 310
223
+ },
224
+ {
225
+ "epoch": 1.29,
226
+ "learning_rate": 0.00014875502008032128,
227
+ "loss": 0.7132,
228
+ "step": 320
229
+ },
230
+ {
231
+ "epoch": 1.33,
232
+ "learning_rate": 0.00014714859437751004,
233
+ "loss": 0.7161,
234
+ "step": 330
235
+ },
236
+ {
237
+ "epoch": 1.37,
238
+ "learning_rate": 0.0001455421686746988,
239
+ "loss": 0.7773,
240
+ "step": 340
241
+ },
242
+ {
243
+ "epoch": 1.41,
244
+ "learning_rate": 0.00014393574297188756,
245
+ "loss": 0.7676,
246
+ "step": 350
247
+ },
248
+ {
249
+ "epoch": 1.45,
250
+ "learning_rate": 0.0001423293172690763,
251
+ "loss": 0.8516,
252
+ "step": 360
253
+ },
254
+ {
255
+ "epoch": 1.49,
256
+ "learning_rate": 0.00014072289156626507,
257
+ "loss": 0.6597,
258
+ "step": 370
259
+ },
260
+ {
261
+ "epoch": 1.53,
262
+ "learning_rate": 0.00013911646586345383,
263
+ "loss": 0.6998,
264
+ "step": 380
265
+ },
266
+ {
267
+ "epoch": 1.57,
268
+ "learning_rate": 0.00013751004016064258,
269
+ "loss": 0.8191,
270
+ "step": 390
271
+ },
272
+ {
273
+ "epoch": 1.61,
274
+ "learning_rate": 0.00013590361445783134,
275
+ "loss": 0.7359,
276
+ "step": 400
277
+ },
278
+ {
279
+ "epoch": 1.61,
280
+ "eval_accuracy": 0.8247863247863247,
281
+ "eval_loss": 0.5803518891334534,
282
+ "eval_runtime": 12.2034,
283
+ "eval_samples_per_second": 57.525,
284
+ "eval_steps_per_second": 7.211,
285
+ "step": 400
286
+ },
287
+ {
288
+ "epoch": 1.65,
289
+ "learning_rate": 0.0001342971887550201,
290
+ "loss": 0.7354,
291
+ "step": 410
292
+ },
293
+ {
294
+ "epoch": 1.69,
295
+ "learning_rate": 0.00013269076305220885,
296
+ "loss": 0.7507,
297
+ "step": 420
298
+ },
299
+ {
300
+ "epoch": 1.73,
301
+ "learning_rate": 0.0001310843373493976,
302
+ "loss": 0.8088,
303
+ "step": 430
304
+ },
305
+ {
306
+ "epoch": 1.77,
307
+ "learning_rate": 0.00012947791164658637,
308
+ "loss": 0.7279,
309
+ "step": 440
310
+ },
311
+ {
312
+ "epoch": 1.81,
313
+ "learning_rate": 0.00012787148594377512,
314
+ "loss": 0.7254,
315
+ "step": 450
316
+ },
317
+ {
318
+ "epoch": 1.85,
319
+ "learning_rate": 0.00012626506024096385,
320
+ "loss": 0.6707,
321
+ "step": 460
322
+ },
323
+ {
324
+ "epoch": 1.89,
325
+ "learning_rate": 0.0001246586345381526,
326
+ "loss": 0.6482,
327
+ "step": 470
328
+ },
329
+ {
330
+ "epoch": 1.93,
331
+ "learning_rate": 0.00012305220883534137,
332
+ "loss": 0.7238,
333
+ "step": 480
334
+ },
335
+ {
336
+ "epoch": 1.97,
337
+ "learning_rate": 0.00012144578313253012,
338
+ "loss": 0.6626,
339
+ "step": 490
340
+ },
341
+ {
342
+ "epoch": 2.01,
343
+ "learning_rate": 0.00011983935742971888,
344
+ "loss": 0.776,
345
+ "step": 500
346
+ },
347
+ {
348
+ "epoch": 2.01,
349
+ "eval_accuracy": 0.8376068376068376,
350
+ "eval_loss": 0.5489934086799622,
351
+ "eval_runtime": 12.4349,
352
+ "eval_samples_per_second": 56.454,
353
+ "eval_steps_per_second": 7.077,
354
+ "step": 500
355
+ },
356
+ {
357
+ "epoch": 2.05,
358
+ "learning_rate": 0.00011823293172690764,
359
+ "loss": 0.8069,
360
+ "step": 510
361
+ },
362
+ {
363
+ "epoch": 2.09,
364
+ "learning_rate": 0.0001166265060240964,
365
+ "loss": 0.5448,
366
+ "step": 520
367
+ },
368
+ {
369
+ "epoch": 2.13,
370
+ "learning_rate": 0.00011502008032128515,
371
+ "loss": 0.7318,
372
+ "step": 530
373
+ },
374
+ {
375
+ "epoch": 2.17,
376
+ "learning_rate": 0.00011341365461847391,
377
+ "loss": 0.5083,
378
+ "step": 540
379
+ },
380
+ {
381
+ "epoch": 2.21,
382
+ "learning_rate": 0.00011180722891566267,
383
+ "loss": 0.6493,
384
+ "step": 550
385
+ },
386
+ {
387
+ "epoch": 2.25,
388
+ "learning_rate": 0.0001102008032128514,
389
+ "loss": 0.4928,
390
+ "step": 560
391
+ },
392
+ {
393
+ "epoch": 2.29,
394
+ "learning_rate": 0.00010859437751004015,
395
+ "loss": 0.5395,
396
+ "step": 570
397
+ },
398
+ {
399
+ "epoch": 2.33,
400
+ "learning_rate": 0.00010698795180722891,
401
+ "loss": 0.5588,
402
+ "step": 580
403
+ },
404
+ {
405
+ "epoch": 2.37,
406
+ "learning_rate": 0.00010538152610441767,
407
+ "loss": 0.5892,
408
+ "step": 590
409
+ },
410
+ {
411
+ "epoch": 2.41,
412
+ "learning_rate": 0.00010377510040160642,
413
+ "loss": 0.614,
414
+ "step": 600
415
+ },
416
+ {
417
+ "epoch": 2.41,
418
+ "eval_accuracy": 0.8504273504273504,
419
+ "eval_loss": 0.5006864070892334,
420
+ "eval_runtime": 12.2103,
421
+ "eval_samples_per_second": 57.493,
422
+ "eval_steps_per_second": 7.207,
423
+ "step": 600
424
+ },
425
+ {
426
+ "epoch": 2.45,
427
+ "learning_rate": 0.00010216867469879518,
428
+ "loss": 0.7658,
429
+ "step": 610
430
+ },
431
+ {
432
+ "epoch": 2.49,
433
+ "learning_rate": 0.00010056224899598394,
434
+ "loss": 0.5152,
435
+ "step": 620
436
+ },
437
+ {
438
+ "epoch": 2.53,
439
+ "learning_rate": 9.89558232931727e-05,
440
+ "loss": 0.4941,
441
+ "step": 630
442
+ },
443
+ {
444
+ "epoch": 2.57,
445
+ "learning_rate": 9.734939759036145e-05,
446
+ "loss": 0.6831,
447
+ "step": 640
448
+ },
449
+ {
450
+ "epoch": 2.61,
451
+ "learning_rate": 9.574297188755021e-05,
452
+ "loss": 0.5971,
453
+ "step": 650
454
+ },
455
+ {
456
+ "epoch": 2.65,
457
+ "learning_rate": 9.413654618473896e-05,
458
+ "loss": 0.5842,
459
+ "step": 660
460
+ },
461
+ {
462
+ "epoch": 2.69,
463
+ "learning_rate": 9.253012048192772e-05,
464
+ "loss": 0.5976,
465
+ "step": 670
466
+ },
467
+ {
468
+ "epoch": 2.73,
469
+ "learning_rate": 9.092369477911648e-05,
470
+ "loss": 0.5051,
471
+ "step": 680
472
+ },
473
+ {
474
+ "epoch": 2.77,
475
+ "learning_rate": 8.931726907630522e-05,
476
+ "loss": 0.4737,
477
+ "step": 690
478
+ },
479
+ {
480
+ "epoch": 2.81,
481
+ "learning_rate": 8.771084337349398e-05,
482
+ "loss": 0.5484,
483
+ "step": 700
484
+ },
485
+ {
486
+ "epoch": 2.81,
487
+ "eval_accuracy": 0.8333333333333334,
488
+ "eval_loss": 0.5322346091270447,
489
+ "eval_runtime": 12.3228,
490
+ "eval_samples_per_second": 56.968,
491
+ "eval_steps_per_second": 7.141,
492
+ "step": 700
493
+ },
494
+ {
495
+ "epoch": 2.85,
496
+ "learning_rate": 8.610441767068274e-05,
497
+ "loss": 0.4436,
498
+ "step": 710
499
+ },
500
+ {
501
+ "epoch": 2.89,
502
+ "learning_rate": 8.449799196787149e-05,
503
+ "loss": 0.6452,
504
+ "step": 720
505
+ },
506
+ {
507
+ "epoch": 2.93,
508
+ "learning_rate": 8.289156626506025e-05,
509
+ "loss": 0.5724,
510
+ "step": 730
511
+ },
512
+ {
513
+ "epoch": 2.97,
514
+ "learning_rate": 8.128514056224899e-05,
515
+ "loss": 0.3933,
516
+ "step": 740
517
+ },
518
+ {
519
+ "epoch": 3.01,
520
+ "learning_rate": 7.967871485943775e-05,
521
+ "loss": 0.5753,
522
+ "step": 750
523
+ },
524
+ {
525
+ "epoch": 3.05,
526
+ "learning_rate": 7.80722891566265e-05,
527
+ "loss": 0.4426,
528
+ "step": 760
529
+ },
530
+ {
531
+ "epoch": 3.09,
532
+ "learning_rate": 7.646586345381526e-05,
533
+ "loss": 0.5442,
534
+ "step": 770
535
+ },
536
+ {
537
+ "epoch": 3.13,
538
+ "learning_rate": 7.485943775100402e-05,
539
+ "loss": 0.4839,
540
+ "step": 780
541
+ },
542
+ {
543
+ "epoch": 3.17,
544
+ "learning_rate": 7.325301204819278e-05,
545
+ "loss": 0.3711,
546
+ "step": 790
547
+ },
548
+ {
549
+ "epoch": 3.21,
550
+ "learning_rate": 7.164658634538153e-05,
551
+ "loss": 0.3844,
552
+ "step": 800
553
+ },
554
+ {
555
+ "epoch": 3.21,
556
+ "eval_accuracy": 0.8518518518518519,
557
+ "eval_loss": 0.5012323260307312,
558
+ "eval_runtime": 12.9006,
559
+ "eval_samples_per_second": 54.416,
560
+ "eval_steps_per_second": 6.821,
561
+ "step": 800
562
+ },
563
+ {
564
+ "epoch": 3.25,
565
+ "learning_rate": 7.004016064257029e-05,
566
+ "loss": 0.513,
567
+ "step": 810
568
+ },
569
+ {
570
+ "epoch": 3.29,
571
+ "learning_rate": 6.843373493975905e-05,
572
+ "loss": 0.3505,
573
+ "step": 820
574
+ },
575
+ {
576
+ "epoch": 3.33,
577
+ "learning_rate": 6.68273092369478e-05,
578
+ "loss": 0.4022,
579
+ "step": 830
580
+ },
581
+ {
582
+ "epoch": 3.37,
583
+ "learning_rate": 6.522088353413655e-05,
584
+ "loss": 0.3574,
585
+ "step": 840
586
+ },
587
+ {
588
+ "epoch": 3.41,
589
+ "learning_rate": 6.36144578313253e-05,
590
+ "loss": 0.5703,
591
+ "step": 850
592
+ },
593
+ {
594
+ "epoch": 3.45,
595
+ "learning_rate": 6.200803212851406e-05,
596
+ "loss": 0.4585,
597
+ "step": 860
598
+ },
599
+ {
600
+ "epoch": 3.49,
601
+ "learning_rate": 6.040160642570282e-05,
602
+ "loss": 0.3114,
603
+ "step": 870
604
+ },
605
+ {
606
+ "epoch": 3.53,
607
+ "learning_rate": 5.8795180722891576e-05,
608
+ "loss": 0.3508,
609
+ "step": 880
610
+ },
611
+ {
612
+ "epoch": 3.57,
613
+ "learning_rate": 5.718875502008032e-05,
614
+ "loss": 0.3614,
615
+ "step": 890
616
+ },
617
+ {
618
+ "epoch": 3.61,
619
+ "learning_rate": 5.5582329317269076e-05,
620
+ "loss": 0.5681,
621
+ "step": 900
622
+ },
623
+ {
624
+ "epoch": 3.61,
625
+ "eval_accuracy": 0.8589743589743589,
626
+ "eval_loss": 0.49228861927986145,
627
+ "eval_runtime": 13.1999,
628
+ "eval_samples_per_second": 53.182,
629
+ "eval_steps_per_second": 6.667,
630
+ "step": 900
631
+ },
632
+ {
633
+ "epoch": 3.65,
634
+ "learning_rate": 5.397590361445783e-05,
635
+ "loss": 0.5074,
636
+ "step": 910
637
+ },
638
+ {
639
+ "epoch": 3.69,
640
+ "learning_rate": 5.236947791164659e-05,
641
+ "loss": 0.3371,
642
+ "step": 920
643
+ },
644
+ {
645
+ "epoch": 3.73,
646
+ "learning_rate": 5.076305220883535e-05,
647
+ "loss": 0.5226,
648
+ "step": 930
649
+ },
650
+ {
651
+ "epoch": 3.78,
652
+ "learning_rate": 4.9156626506024104e-05,
653
+ "loss": 0.4462,
654
+ "step": 940
655
+ },
656
+ {
657
+ "epoch": 3.82,
658
+ "learning_rate": 4.7550200803212854e-05,
659
+ "loss": 0.4781,
660
+ "step": 950
661
+ },
662
+ {
663
+ "epoch": 3.86,
664
+ "learning_rate": 4.594377510040161e-05,
665
+ "loss": 0.4001,
666
+ "step": 960
667
+ },
668
+ {
669
+ "epoch": 3.9,
670
+ "learning_rate": 4.433734939759036e-05,
671
+ "loss": 0.4788,
672
+ "step": 970
673
+ },
674
+ {
675
+ "epoch": 3.94,
676
+ "learning_rate": 4.273092369477912e-05,
677
+ "loss": 0.4561,
678
+ "step": 980
679
+ },
680
+ {
681
+ "epoch": 3.98,
682
+ "learning_rate": 4.1124497991967875e-05,
683
+ "loss": 0.2563,
684
+ "step": 990
685
+ },
686
+ {
687
+ "epoch": 4.02,
688
+ "learning_rate": 3.9518072289156625e-05,
689
+ "loss": 0.4315,
690
+ "step": 1000
691
+ },
692
+ {
693
+ "epoch": 4.02,
694
+ "eval_accuracy": 0.8774928774928775,
695
+ "eval_loss": 0.42575880885124207,
696
+ "eval_runtime": 13.0968,
697
+ "eval_samples_per_second": 53.601,
698
+ "eval_steps_per_second": 6.719,
699
+ "step": 1000
700
+ },
701
+ {
702
+ "epoch": 4.06,
703
+ "learning_rate": 3.791164658634538e-05,
704
+ "loss": 0.2898,
705
+ "step": 1010
706
+ },
707
+ {
708
+ "epoch": 4.1,
709
+ "learning_rate": 3.630522088353414e-05,
710
+ "loss": 0.3803,
711
+ "step": 1020
712
+ },
713
+ {
714
+ "epoch": 4.14,
715
+ "learning_rate": 3.4698795180722896e-05,
716
+ "loss": 0.2827,
717
+ "step": 1030
718
+ },
719
+ {
720
+ "epoch": 4.18,
721
+ "learning_rate": 3.309236947791165e-05,
722
+ "loss": 0.3382,
723
+ "step": 1040
724
+ },
725
+ {
726
+ "epoch": 4.22,
727
+ "learning_rate": 3.14859437751004e-05,
728
+ "loss": 0.3808,
729
+ "step": 1050
730
+ },
731
+ {
732
+ "epoch": 4.26,
733
+ "learning_rate": 2.987951807228916e-05,
734
+ "loss": 0.2551,
735
+ "step": 1060
736
+ },
737
+ {
738
+ "epoch": 4.3,
739
+ "learning_rate": 2.827309236947791e-05,
740
+ "loss": 0.3011,
741
+ "step": 1070
742
+ },
743
+ {
744
+ "epoch": 4.34,
745
+ "learning_rate": 2.6666666666666667e-05,
746
+ "loss": 0.4091,
747
+ "step": 1080
748
+ },
749
+ {
750
+ "epoch": 4.38,
751
+ "learning_rate": 2.5060240963855423e-05,
752
+ "loss": 0.286,
753
+ "step": 1090
754
+ },
755
+ {
756
+ "epoch": 4.42,
757
+ "learning_rate": 2.345381526104418e-05,
758
+ "loss": 0.3804,
759
+ "step": 1100
760
+ },
761
+ {
762
+ "epoch": 4.42,
763
+ "eval_accuracy": 0.8675213675213675,
764
+ "eval_loss": 0.4324430227279663,
765
+ "eval_runtime": 12.1257,
766
+ "eval_samples_per_second": 57.894,
767
+ "eval_steps_per_second": 7.257,
768
+ "step": 1100
769
+ },
770
+ {
771
+ "epoch": 4.46,
772
+ "learning_rate": 2.1847389558232934e-05,
773
+ "loss": 0.2672,
774
+ "step": 1110
775
+ },
776
+ {
777
+ "epoch": 4.5,
778
+ "learning_rate": 2.0240963855421687e-05,
779
+ "loss": 0.3661,
780
+ "step": 1120
781
+ },
782
+ {
783
+ "epoch": 4.54,
784
+ "learning_rate": 1.863453815261044e-05,
785
+ "loss": 0.3126,
786
+ "step": 1130
787
+ },
788
+ {
789
+ "epoch": 4.58,
790
+ "learning_rate": 1.7028112449799198e-05,
791
+ "loss": 0.3455,
792
+ "step": 1140
793
+ },
794
+ {
795
+ "epoch": 4.62,
796
+ "learning_rate": 1.5421686746987955e-05,
797
+ "loss": 0.3604,
798
+ "step": 1150
799
+ },
800
+ {
801
+ "epoch": 4.66,
802
+ "learning_rate": 1.3815261044176708e-05,
803
+ "loss": 0.4628,
804
+ "step": 1160
805
+ },
806
+ {
807
+ "epoch": 4.7,
808
+ "learning_rate": 1.2208835341365463e-05,
809
+ "loss": 0.4074,
810
+ "step": 1170
811
+ },
812
+ {
813
+ "epoch": 4.74,
814
+ "learning_rate": 1.0602409638554217e-05,
815
+ "loss": 0.2512,
816
+ "step": 1180
817
+ },
818
+ {
819
+ "epoch": 4.78,
820
+ "learning_rate": 8.995983935742972e-06,
821
+ "loss": 0.2974,
822
+ "step": 1190
823
+ },
824
+ {
825
+ "epoch": 4.82,
826
+ "learning_rate": 7.389558232931727e-06,
827
+ "loss": 0.2887,
828
+ "step": 1200
829
+ },
830
+ {
831
+ "epoch": 4.82,
832
+ "eval_accuracy": 0.8803418803418803,
833
+ "eval_loss": 0.39916983246803284,
834
+ "eval_runtime": 12.2383,
835
+ "eval_samples_per_second": 57.361,
836
+ "eval_steps_per_second": 7.191,
837
+ "step": 1200
838
+ },
839
+ {
840
+ "epoch": 4.86,
841
+ "learning_rate": 5.783132530120483e-06,
842
+ "loss": 0.3073,
843
+ "step": 1210
844
+ },
845
+ {
846
+ "epoch": 4.9,
847
+ "learning_rate": 4.176706827309238e-06,
848
+ "loss": 0.2825,
849
+ "step": 1220
850
+ },
851
+ {
852
+ "epoch": 4.94,
853
+ "learning_rate": 2.570281124497992e-06,
854
+ "loss": 0.288,
855
+ "step": 1230
856
+ },
857
+ {
858
+ "epoch": 4.98,
859
+ "learning_rate": 9.638554216867472e-07,
860
+ "loss": 0.3198,
861
+ "step": 1240
862
+ },
863
+ {
864
+ "epoch": 5.0,
865
+ "step": 1245,
866
+ "total_flos": 1.539101261655982e+18,
867
+ "train_loss": 0.6643937945844658,
868
+ "train_runtime": 748.7697,
869
+ "train_samples_per_second": 26.524,
870
+ "train_steps_per_second": 1.663
871
+ }
872
+ ],
873
+ "logging_steps": 10,
874
+ "max_steps": 1245,
875
+ "num_train_epochs": 5,
876
+ "save_steps": 100,
877
+ "total_flos": 1.539101261655982e+18,
878
+ "trial_name": null,
879
+ "trial_params": null
880
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a86b97c3f3eb410a6524ee6c1322af224937e174f3d53cb28b8020e2c478c35
3
+ size 4536