Tianjiao-Yu commited on
Commit
9e966de
1 Parent(s): 6d0026b

End of training

Browse files
README.md ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - generated_from_trainer
4
+ metrics:
5
+ - accuracy
6
+ model-index:
7
+ - name: videomae-large
8
+ results: []
9
+ ---
10
+
11
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
+ should probably proofread and complete it, then remove this comment. -->
13
+
14
+ # videomae-large
15
+
16
+ This model was trained from scratch on an unknown dataset.
17
+ It achieves the following results on the evaluation set:
18
+ - Loss: 0.0993
19
+ - Accuracy: 0.9742
20
+
21
+ ## Model description
22
+
23
+ More information needed
24
+
25
+ ## Intended uses & limitations
26
+
27
+ More information needed
28
+
29
+ ## Training and evaluation data
30
+
31
+ More information needed
32
+
33
+ ## Training procedure
34
+
35
+ ### Training hyperparameters
36
+
37
+ The following hyperparameters were used during training:
38
+ - learning_rate: 5e-05
39
+ - train_batch_size: 32
40
+ - eval_batch_size: 32
41
+ - seed: 42
42
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
43
+ - lr_scheduler_type: linear
44
+ - lr_scheduler_warmup_ratio: 0.1
45
+ - training_steps: 300
46
+
47
+ ### Training results
48
+
49
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy |
50
+ |:-------------:|:-----:|:----:|:---------------:|:--------:|
51
+ | 2.3203 | 0.03 | 10 | 2.1994 | 0.1571 |
52
+ | 1.9795 | 1.03 | 20 | 1.7835 | 0.3429 |
53
+ | 1.0467 | 2.03 | 30 | 0.7311 | 0.6571 |
54
+ | 0.301 | 3.03 | 40 | 0.2195 | 0.9429 |
55
+ | 0.1061 | 4.03 | 50 | 0.1529 | 0.9143 |
56
+ | 0.0499 | 5.03 | 60 | 0.0826 | 0.9857 |
57
+ | 0.079 | 6.03 | 70 | 0.0534 | 0.9857 |
58
+ | 0.0487 | 7.03 | 80 | 0.0299 | 0.9857 |
59
+ | 0.0217 | 8.03 | 90 | 0.3283 | 0.9 |
60
+ | 0.0387 | 9.03 | 100 | 0.0268 | 0.9857 |
61
+ | 0.0252 | 10.03 | 110 | 0.0386 | 0.9857 |
62
+ | 0.0324 | 11.03 | 120 | 0.3067 | 0.9 |
63
+ | 0.0022 | 12.03 | 130 | 0.0131 | 1.0 |
64
+ | 0.0115 | 13.03 | 140 | 0.0889 | 0.9857 |
65
+ | 0.0225 | 14.03 | 150 | 0.0091 | 1.0 |
66
+ | 0.0012 | 15.03 | 160 | 0.0081 | 1.0 |
67
+ | 0.001 | 16.03 | 170 | 0.0103 | 1.0 |
68
+ | 0.0255 | 17.03 | 180 | 0.0113 | 1.0 |
69
+ | 0.0016 | 18.03 | 190 | 0.0252 | 0.9857 |
70
+ | 0.0039 | 19.03 | 200 | 0.0177 | 0.9857 |
71
+ | 0.0007 | 20.03 | 210 | 0.0017 | 1.0 |
72
+ | 0.0006 | 21.03 | 220 | 0.0013 | 1.0 |
73
+ | 0.0006 | 22.03 | 230 | 0.0012 | 1.0 |
74
+ | 0.0007 | 23.03 | 240 | 0.0011 | 1.0 |
75
+ | 0.0005 | 24.03 | 250 | 0.0011 | 1.0 |
76
+ | 0.0005 | 25.03 | 260 | 0.0011 | 1.0 |
77
+ | 0.0005 | 26.03 | 270 | 0.0011 | 1.0 |
78
+ | 0.0005 | 27.03 | 280 | 0.0011 | 1.0 |
79
+ | 0.0005 | 28.03 | 290 | 0.0011 | 1.0 |
80
+ | 0.0005 | 29.03 | 300 | 0.0011 | 1.0 |
81
+
82
+
83
+ ### Framework versions
84
+
85
+ - Transformers 4.37.2
86
+ - Pytorch 2.1.0+cu121
87
+ - Datasets 2.17.1
88
+ - Tokenizers 0.15.2
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 29.03,
3
+ "eval_accuracy": 0.9741935483870968,
4
+ "eval_loss": 0.09928672015666962,
5
+ "eval_runtime": 13.0498,
6
+ "eval_samples_per_second": 11.878,
7
+ "eval_steps_per_second": 0.383
8
+ }
config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "MCG-NJU/videomae-large",
3
+ "architectures": [
4
+ "VideoMAEForVideoClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "decoder_hidden_size": 512,
8
+ "decoder_intermediate_size": 2048,
9
+ "decoder_num_attention_heads": 8,
10
+ "decoder_num_hidden_layers": 12,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.0,
13
+ "hidden_size": 1024,
14
+ "id2label": {
15
+ "0": "ApplyEyeMakeup",
16
+ "1": "ApplyLipstick",
17
+ "2": "Archery",
18
+ "3": "BabyCrawling",
19
+ "4": "BalanceBeam",
20
+ "5": "BandMarching",
21
+ "6": "BaseballPitch",
22
+ "7": "Basketball",
23
+ "8": "BasketballDunk",
24
+ "9": "BenchPress"
25
+ },
26
+ "image_size": 224,
27
+ "initializer_range": 0.02,
28
+ "intermediate_size": 4096,
29
+ "label2id": {
30
+ "ApplyEyeMakeup": 0,
31
+ "ApplyLipstick": 1,
32
+ "Archery": 2,
33
+ "BabyCrawling": 3,
34
+ "BalanceBeam": 4,
35
+ "BandMarching": 5,
36
+ "BaseballPitch": 6,
37
+ "Basketball": 7,
38
+ "BasketballDunk": 8,
39
+ "BenchPress": 9
40
+ },
41
+ "layer_norm_eps": 1e-12,
42
+ "model_type": "videomae",
43
+ "norm_pix_loss": true,
44
+ "num_attention_heads": 16,
45
+ "num_channels": 3,
46
+ "num_frames": 16,
47
+ "num_hidden_layers": 24,
48
+ "patch_size": 16,
49
+ "problem_type": "single_label_classification",
50
+ "qkv_bias": true,
51
+ "torch_dtype": "float32",
52
+ "transformers_version": "4.37.2",
53
+ "tubelet_size": 2,
54
+ "use_mean_pooling": false
55
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8f784d5e79a9b4421e39993c18671489940fd7d36ff370e75ffa6cc83706838
3
+ size 1215529056
preprocessor_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": true,
7
+ "do_normalize": true,
8
+ "do_rescale": true,
9
+ "do_resize": true,
10
+ "image_mean": [
11
+ 0.485,
12
+ 0.456,
13
+ 0.406
14
+ ],
15
+ "image_processor_type": "VideoMAEImageProcessor",
16
+ "image_std": [
17
+ 0.229,
18
+ 0.224,
19
+ 0.225
20
+ ],
21
+ "resample": 2,
22
+ "rescale_factor": 0.00392156862745098,
23
+ "size": {
24
+ "shortest_edge": 224
25
+ }
26
+ }
runs/Feb19_23-28-00_plan.cs.vt.edu/events.out.tfevents.1708414348.plan.cs.vt.edu.3195129.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b2e6a8a1d0391c022c0ba07fafb94b5e5209744ab5c0d558abae99efa5c8e9b
3
+ size 9936
runs/Feb19_23-35-34_plan.cs.vt.edu/events.out.tfevents.1708414542.plan.cs.vt.edu.3195129.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c227ccee982c8b03bd31b77cf0151a0a3dcd48b45b0c89304a8b67ffae4305c
3
+ size 19659
runs/Feb19_23-35-34_plan.cs.vt.edu/events.out.tfevents.1708415980.plan.cs.vt.edu.3195129.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42f15f9888178ab8822ad8e242588c2af13dacff8af4ed9f299e1aab1cc1bd30
3
+ size 734
test_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 29.03,
3
+ "eval_accuracy": 0.9741935483870968,
4
+ "eval_loss": 0.09928672015666962,
5
+ "eval_runtime": 13.0498,
6
+ "eval_samples_per_second": 11.878,
7
+ "eval_steps_per_second": 0.383
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,498 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.0,
3
+ "best_model_checkpoint": "MCG-NJU/videomae-large/checkpoint-130",
4
+ "epoch": 29.033333333333335,
5
+ "eval_steps": 500,
6
+ "global_step": 300,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.03,
13
+ "learning_rate": 1.6666666666666667e-05,
14
+ "loss": 2.3203,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.03,
19
+ "eval_accuracy": 0.15714285714285714,
20
+ "eval_loss": 2.199398994445801,
21
+ "eval_runtime": 5.6646,
22
+ "eval_samples_per_second": 12.357,
23
+ "eval_steps_per_second": 0.53,
24
+ "step": 10
25
+ },
26
+ {
27
+ "epoch": 1.03,
28
+ "learning_rate": 3.3333333333333335e-05,
29
+ "loss": 1.9795,
30
+ "step": 20
31
+ },
32
+ {
33
+ "epoch": 1.03,
34
+ "eval_accuracy": 0.34285714285714286,
35
+ "eval_loss": 1.7835056781768799,
36
+ "eval_runtime": 6.2612,
37
+ "eval_samples_per_second": 11.18,
38
+ "eval_steps_per_second": 0.479,
39
+ "step": 20
40
+ },
41
+ {
42
+ "epoch": 2.03,
43
+ "learning_rate": 5e-05,
44
+ "loss": 1.0467,
45
+ "step": 30
46
+ },
47
+ {
48
+ "epoch": 2.03,
49
+ "eval_accuracy": 0.6571428571428571,
50
+ "eval_loss": 0.7310971617698669,
51
+ "eval_runtime": 6.125,
52
+ "eval_samples_per_second": 11.429,
53
+ "eval_steps_per_second": 0.49,
54
+ "step": 30
55
+ },
56
+ {
57
+ "epoch": 3.03,
58
+ "learning_rate": 4.814814814814815e-05,
59
+ "loss": 0.301,
60
+ "step": 40
61
+ },
62
+ {
63
+ "epoch": 3.03,
64
+ "eval_accuracy": 0.9428571428571428,
65
+ "eval_loss": 0.21951383352279663,
66
+ "eval_runtime": 5.975,
67
+ "eval_samples_per_second": 11.715,
68
+ "eval_steps_per_second": 0.502,
69
+ "step": 40
70
+ },
71
+ {
72
+ "epoch": 4.03,
73
+ "learning_rate": 4.62962962962963e-05,
74
+ "loss": 0.1061,
75
+ "step": 50
76
+ },
77
+ {
78
+ "epoch": 4.03,
79
+ "eval_accuracy": 0.9142857142857143,
80
+ "eval_loss": 0.15290319919586182,
81
+ "eval_runtime": 6.6932,
82
+ "eval_samples_per_second": 10.458,
83
+ "eval_steps_per_second": 0.448,
84
+ "step": 50
85
+ },
86
+ {
87
+ "epoch": 5.03,
88
+ "learning_rate": 4.4444444444444447e-05,
89
+ "loss": 0.0499,
90
+ "step": 60
91
+ },
92
+ {
93
+ "epoch": 5.03,
94
+ "eval_accuracy": 0.9857142857142858,
95
+ "eval_loss": 0.08257948607206345,
96
+ "eval_runtime": 6.8638,
97
+ "eval_samples_per_second": 10.198,
98
+ "eval_steps_per_second": 0.437,
99
+ "step": 60
100
+ },
101
+ {
102
+ "epoch": 6.03,
103
+ "learning_rate": 4.259259259259259e-05,
104
+ "loss": 0.079,
105
+ "step": 70
106
+ },
107
+ {
108
+ "epoch": 6.03,
109
+ "eval_accuracy": 0.9857142857142858,
110
+ "eval_loss": 0.05339507758617401,
111
+ "eval_runtime": 5.6428,
112
+ "eval_samples_per_second": 12.405,
113
+ "eval_steps_per_second": 0.532,
114
+ "step": 70
115
+ },
116
+ {
117
+ "epoch": 7.03,
118
+ "learning_rate": 4.074074074074074e-05,
119
+ "loss": 0.0487,
120
+ "step": 80
121
+ },
122
+ {
123
+ "epoch": 7.03,
124
+ "eval_accuracy": 0.9857142857142858,
125
+ "eval_loss": 0.02986798621714115,
126
+ "eval_runtime": 5.2266,
127
+ "eval_samples_per_second": 13.393,
128
+ "eval_steps_per_second": 0.574,
129
+ "step": 80
130
+ },
131
+ {
132
+ "epoch": 8.03,
133
+ "learning_rate": 3.888888888888889e-05,
134
+ "loss": 0.0217,
135
+ "step": 90
136
+ },
137
+ {
138
+ "epoch": 8.03,
139
+ "eval_accuracy": 0.9,
140
+ "eval_loss": 0.32826170325279236,
141
+ "eval_runtime": 6.6113,
142
+ "eval_samples_per_second": 10.588,
143
+ "eval_steps_per_second": 0.454,
144
+ "step": 90
145
+ },
146
+ {
147
+ "epoch": 9.03,
148
+ "learning_rate": 3.7037037037037037e-05,
149
+ "loss": 0.0387,
150
+ "step": 100
151
+ },
152
+ {
153
+ "epoch": 9.03,
154
+ "eval_accuracy": 0.9857142857142858,
155
+ "eval_loss": 0.026779260486364365,
156
+ "eval_runtime": 6.3515,
157
+ "eval_samples_per_second": 11.021,
158
+ "eval_steps_per_second": 0.472,
159
+ "step": 100
160
+ },
161
+ {
162
+ "epoch": 10.03,
163
+ "learning_rate": 3.518518518518519e-05,
164
+ "loss": 0.0252,
165
+ "step": 110
166
+ },
167
+ {
168
+ "epoch": 10.03,
169
+ "eval_accuracy": 0.9857142857142858,
170
+ "eval_loss": 0.03859327733516693,
171
+ "eval_runtime": 6.9316,
172
+ "eval_samples_per_second": 10.099,
173
+ "eval_steps_per_second": 0.433,
174
+ "step": 110
175
+ },
176
+ {
177
+ "epoch": 11.03,
178
+ "learning_rate": 3.3333333333333335e-05,
179
+ "loss": 0.0324,
180
+ "step": 120
181
+ },
182
+ {
183
+ "epoch": 11.03,
184
+ "eval_accuracy": 0.9,
185
+ "eval_loss": 0.3067415654659271,
186
+ "eval_runtime": 5.7803,
187
+ "eval_samples_per_second": 12.11,
188
+ "eval_steps_per_second": 0.519,
189
+ "step": 120
190
+ },
191
+ {
192
+ "epoch": 12.03,
193
+ "learning_rate": 3.148148148148148e-05,
194
+ "loss": 0.0022,
195
+ "step": 130
196
+ },
197
+ {
198
+ "epoch": 12.03,
199
+ "eval_accuracy": 1.0,
200
+ "eval_loss": 0.013092391192913055,
201
+ "eval_runtime": 5.2039,
202
+ "eval_samples_per_second": 13.451,
203
+ "eval_steps_per_second": 0.576,
204
+ "step": 130
205
+ },
206
+ {
207
+ "epoch": 13.03,
208
+ "learning_rate": 2.962962962962963e-05,
209
+ "loss": 0.0115,
210
+ "step": 140
211
+ },
212
+ {
213
+ "epoch": 13.03,
214
+ "eval_accuracy": 0.9857142857142858,
215
+ "eval_loss": 0.08892710506916046,
216
+ "eval_runtime": 4.8696,
217
+ "eval_samples_per_second": 14.375,
218
+ "eval_steps_per_second": 0.616,
219
+ "step": 140
220
+ },
221
+ {
222
+ "epoch": 14.03,
223
+ "learning_rate": 2.777777777777778e-05,
224
+ "loss": 0.0225,
225
+ "step": 150
226
+ },
227
+ {
228
+ "epoch": 14.03,
229
+ "eval_accuracy": 1.0,
230
+ "eval_loss": 0.009070915170013905,
231
+ "eval_runtime": 6.0016,
232
+ "eval_samples_per_second": 11.663,
233
+ "eval_steps_per_second": 0.5,
234
+ "step": 150
235
+ },
236
+ {
237
+ "epoch": 15.03,
238
+ "learning_rate": 2.5925925925925925e-05,
239
+ "loss": 0.0012,
240
+ "step": 160
241
+ },
242
+ {
243
+ "epoch": 15.03,
244
+ "eval_accuracy": 1.0,
245
+ "eval_loss": 0.008068457245826721,
246
+ "eval_runtime": 5.6404,
247
+ "eval_samples_per_second": 12.411,
248
+ "eval_steps_per_second": 0.532,
249
+ "step": 160
250
+ },
251
+ {
252
+ "epoch": 16.03,
253
+ "learning_rate": 2.4074074074074074e-05,
254
+ "loss": 0.001,
255
+ "step": 170
256
+ },
257
+ {
258
+ "epoch": 16.03,
259
+ "eval_accuracy": 1.0,
260
+ "eval_loss": 0.010284548625349998,
261
+ "eval_runtime": 5.7893,
262
+ "eval_samples_per_second": 12.091,
263
+ "eval_steps_per_second": 0.518,
264
+ "step": 170
265
+ },
266
+ {
267
+ "epoch": 17.03,
268
+ "learning_rate": 2.2222222222222223e-05,
269
+ "loss": 0.0255,
270
+ "step": 180
271
+ },
272
+ {
273
+ "epoch": 17.03,
274
+ "eval_accuracy": 1.0,
275
+ "eval_loss": 0.01131558045744896,
276
+ "eval_runtime": 6.4736,
277
+ "eval_samples_per_second": 10.813,
278
+ "eval_steps_per_second": 0.463,
279
+ "step": 180
280
+ },
281
+ {
282
+ "epoch": 18.03,
283
+ "learning_rate": 2.037037037037037e-05,
284
+ "loss": 0.0016,
285
+ "step": 190
286
+ },
287
+ {
288
+ "epoch": 18.03,
289
+ "eval_accuracy": 0.9857142857142858,
290
+ "eval_loss": 0.025160381570458412,
291
+ "eval_runtime": 5.7118,
292
+ "eval_samples_per_second": 12.255,
293
+ "eval_steps_per_second": 0.525,
294
+ "step": 190
295
+ },
296
+ {
297
+ "epoch": 19.03,
298
+ "learning_rate": 1.8518518518518518e-05,
299
+ "loss": 0.0039,
300
+ "step": 200
301
+ },
302
+ {
303
+ "epoch": 19.03,
304
+ "eval_accuracy": 0.9857142857142858,
305
+ "eval_loss": 0.017666514962911606,
306
+ "eval_runtime": 5.7688,
307
+ "eval_samples_per_second": 12.134,
308
+ "eval_steps_per_second": 0.52,
309
+ "step": 200
310
+ },
311
+ {
312
+ "epoch": 20.03,
313
+ "learning_rate": 1.6666666666666667e-05,
314
+ "loss": 0.0007,
315
+ "step": 210
316
+ },
317
+ {
318
+ "epoch": 20.03,
319
+ "eval_accuracy": 1.0,
320
+ "eval_loss": 0.0016855127178132534,
321
+ "eval_runtime": 5.9439,
322
+ "eval_samples_per_second": 11.777,
323
+ "eval_steps_per_second": 0.505,
324
+ "step": 210
325
+ },
326
+ {
327
+ "epoch": 21.03,
328
+ "learning_rate": 1.4814814814814815e-05,
329
+ "loss": 0.0006,
330
+ "step": 220
331
+ },
332
+ {
333
+ "epoch": 21.03,
334
+ "eval_accuracy": 1.0,
335
+ "eval_loss": 0.00132262974511832,
336
+ "eval_runtime": 6.2152,
337
+ "eval_samples_per_second": 11.263,
338
+ "eval_steps_per_second": 0.483,
339
+ "step": 220
340
+ },
341
+ {
342
+ "epoch": 22.03,
343
+ "learning_rate": 1.2962962962962962e-05,
344
+ "loss": 0.0006,
345
+ "step": 230
346
+ },
347
+ {
348
+ "epoch": 22.03,
349
+ "eval_accuracy": 1.0,
350
+ "eval_loss": 0.0012219419004395604,
351
+ "eval_runtime": 5.788,
352
+ "eval_samples_per_second": 12.094,
353
+ "eval_steps_per_second": 0.518,
354
+ "step": 230
355
+ },
356
+ {
357
+ "epoch": 23.03,
358
+ "learning_rate": 1.1111111111111112e-05,
359
+ "loss": 0.0007,
360
+ "step": 240
361
+ },
362
+ {
363
+ "epoch": 23.03,
364
+ "eval_accuracy": 1.0,
365
+ "eval_loss": 0.001058029243722558,
366
+ "eval_runtime": 6.1445,
367
+ "eval_samples_per_second": 11.392,
368
+ "eval_steps_per_second": 0.488,
369
+ "step": 240
370
+ },
371
+ {
372
+ "epoch": 24.03,
373
+ "learning_rate": 9.259259259259259e-06,
374
+ "loss": 0.0005,
375
+ "step": 250
376
+ },
377
+ {
378
+ "epoch": 24.03,
379
+ "eval_accuracy": 1.0,
380
+ "eval_loss": 0.0010857696179300547,
381
+ "eval_runtime": 6.974,
382
+ "eval_samples_per_second": 10.037,
383
+ "eval_steps_per_second": 0.43,
384
+ "step": 250
385
+ },
386
+ {
387
+ "epoch": 25.03,
388
+ "learning_rate": 7.4074074074074075e-06,
389
+ "loss": 0.0005,
390
+ "step": 260
391
+ },
392
+ {
393
+ "epoch": 25.03,
394
+ "eval_accuracy": 1.0,
395
+ "eval_loss": 0.0010956026380881667,
396
+ "eval_runtime": 5.6397,
397
+ "eval_samples_per_second": 12.412,
398
+ "eval_steps_per_second": 0.532,
399
+ "step": 260
400
+ },
401
+ {
402
+ "epoch": 26.03,
403
+ "learning_rate": 5.555555555555556e-06,
404
+ "loss": 0.0005,
405
+ "step": 270
406
+ },
407
+ {
408
+ "epoch": 26.03,
409
+ "eval_accuracy": 1.0,
410
+ "eval_loss": 0.0010995334014296532,
411
+ "eval_runtime": 5.1379,
412
+ "eval_samples_per_second": 13.624,
413
+ "eval_steps_per_second": 0.584,
414
+ "step": 270
415
+ },
416
+ {
417
+ "epoch": 27.03,
418
+ "learning_rate": 3.7037037037037037e-06,
419
+ "loss": 0.0005,
420
+ "step": 280
421
+ },
422
+ {
423
+ "epoch": 27.03,
424
+ "eval_accuracy": 1.0,
425
+ "eval_loss": 0.0010933494195342064,
426
+ "eval_runtime": 5.5747,
427
+ "eval_samples_per_second": 12.557,
428
+ "eval_steps_per_second": 0.538,
429
+ "step": 280
430
+ },
431
+ {
432
+ "epoch": 28.03,
433
+ "learning_rate": 1.8518518518518519e-06,
434
+ "loss": 0.0005,
435
+ "step": 290
436
+ },
437
+ {
438
+ "epoch": 28.03,
439
+ "eval_accuracy": 1.0,
440
+ "eval_loss": 0.0010898270411416888,
441
+ "eval_runtime": 5.3602,
442
+ "eval_samples_per_second": 13.059,
443
+ "eval_steps_per_second": 0.56,
444
+ "step": 290
445
+ },
446
+ {
447
+ "epoch": 29.03,
448
+ "learning_rate": 0.0,
449
+ "loss": 0.0005,
450
+ "step": 300
451
+ },
452
+ {
453
+ "epoch": 29.03,
454
+ "eval_accuracy": 1.0,
455
+ "eval_loss": 0.0010924884118139744,
456
+ "eval_runtime": 6.1154,
457
+ "eval_samples_per_second": 11.447,
458
+ "eval_steps_per_second": 0.491,
459
+ "step": 300
460
+ },
461
+ {
462
+ "epoch": 29.03,
463
+ "step": 300,
464
+ "total_flos": 3.952030329844531e+19,
465
+ "train_loss": 0.2041329901261876,
466
+ "train_runtime": 1416.8695,
467
+ "train_samples_per_second": 6.776,
468
+ "train_steps_per_second": 0.212
469
+ },
470
+ {
471
+ "epoch": 29.03,
472
+ "eval_accuracy": 0.9741935483870968,
473
+ "eval_loss": 0.09911247342824936,
474
+ "eval_runtime": 16.8335,
475
+ "eval_samples_per_second": 9.208,
476
+ "eval_steps_per_second": 0.297,
477
+ "step": 300
478
+ },
479
+ {
480
+ "epoch": 29.03,
481
+ "eval_accuracy": 0.9741935483870968,
482
+ "eval_loss": 0.09928672015666962,
483
+ "eval_runtime": 13.0498,
484
+ "eval_samples_per_second": 11.878,
485
+ "eval_steps_per_second": 0.383,
486
+ "step": 300
487
+ }
488
+ ],
489
+ "logging_steps": 10,
490
+ "max_steps": 300,
491
+ "num_input_tokens_seen": 0,
492
+ "num_train_epochs": 9223372036854775807,
493
+ "save_steps": 500,
494
+ "total_flos": 3.952030329844531e+19,
495
+ "train_batch_size": 32,
496
+ "trial_name": null,
497
+ "trial_params": null
498
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3454bc7aa261c0482f31475bec6bc2ac80d03b78757742a3642beabe3f48f8f2
3
+ size 4728