andromeda01111 commited on
Commit
ea3c0b1
1 Parent(s): 2e9194b

Upload 9 files

Browse files
config.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "D:\\PycharmProjects\\AER_ENGLISH\\wav2vec2-large-xlsr-53-english",
3
+ "activation_dropout": 0.05,
4
+ "adapter_kernel_size": 3,
5
+ "adapter_stride": 2,
6
+ "add_adapter": false,
7
+ "apply_spec_augment": true,
8
+ "architectures": [
9
+ "Wav2Vec2ForSpeechClassification"
10
+ ],
11
+ "attention_dropout": 0.1,
12
+ "bos_token_id": 1,
13
+ "classifier_proj_size": 256,
14
+ "codevector_dim": 256,
15
+ "contrastive_logits_temperature": 0.1,
16
+ "conv_bias": true,
17
+ "conv_dim": [
18
+ 512,
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512
25
+ ],
26
+ "conv_kernel": [
27
+ 10,
28
+ 3,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 2,
33
+ 2
34
+ ],
35
+ "conv_stride": [
36
+ 5,
37
+ 2,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2
43
+ ],
44
+ "ctc_loss_reduction": "mean",
45
+ "ctc_zero_infinity": true,
46
+ "diversity_loss_weight": 0.1,
47
+ "do_stable_layer_norm": true,
48
+ "eos_token_id": 2,
49
+ "feat_extract_activation": "gelu",
50
+ "feat_extract_dropout": 0.0,
51
+ "feat_extract_norm": "layer",
52
+ "feat_proj_dropout": 0.05,
53
+ "feat_quantizer_dropout": 0.0,
54
+ "final_dropout": 0.0,
55
+ "finetuning_task": "wav2vec2_clf",
56
+ "hidden_act": "gelu",
57
+ "hidden_dropout": 0.05,
58
+ "hidden_size": 1024,
59
+ "id2label": {
60
+ "0": "angry",
61
+ "1": "happy",
62
+ "2": "neutral",
63
+ "3": "sad"
64
+ },
65
+ "initializer_range": 0.02,
66
+ "intermediate_size": 4096,
67
+ "label2id": {
68
+ "angry": 0,
69
+ "happy": 1,
70
+ "neutral": 2,
71
+ "sad": 3
72
+ },
73
+ "layer_norm_eps": 1e-05,
74
+ "layerdrop": 0.05,
75
+ "mask_channel_length": 10,
76
+ "mask_channel_min_space": 1,
77
+ "mask_channel_other": 0.0,
78
+ "mask_channel_prob": 0.0,
79
+ "mask_channel_selection": "static",
80
+ "mask_feature_length": 10,
81
+ "mask_feature_min_masks": 0,
82
+ "mask_feature_prob": 0.0,
83
+ "mask_time_length": 10,
84
+ "mask_time_min_masks": 2,
85
+ "mask_time_min_space": 1,
86
+ "mask_time_other": 0.0,
87
+ "mask_time_prob": 0.05,
88
+ "mask_time_selection": "static",
89
+ "model_type": "wav2vec2",
90
+ "num_adapter_layers": 3,
91
+ "num_attention_heads": 16,
92
+ "num_codevector_groups": 2,
93
+ "num_codevectors_per_group": 320,
94
+ "num_conv_pos_embedding_groups": 16,
95
+ "num_conv_pos_embeddings": 128,
96
+ "num_feat_extract_layers": 7,
97
+ "num_hidden_layers": 24,
98
+ "num_negatives": 100,
99
+ "output_hidden_size": 1024,
100
+ "pad_token_id": 0,
101
+ "pooling_mode": "mean",
102
+ "problem_type": "single_label_classification",
103
+ "proj_codevector_dim": 256,
104
+ "tdnn_dilation": [
105
+ 1,
106
+ 2,
107
+ 3,
108
+ 1,
109
+ 1
110
+ ],
111
+ "tdnn_dim": [
112
+ 512,
113
+ 512,
114
+ 512,
115
+ 512,
116
+ 1500
117
+ ],
118
+ "tdnn_kernel": [
119
+ 5,
120
+ 3,
121
+ 3,
122
+ 1,
123
+ 1
124
+ ],
125
+ "torch_dtype": "float32",
126
+ "transformers_version": "4.29.2",
127
+ "use_weighted_layer_sum": false,
128
+ "vocab_size": 33,
129
+ "xvector_output_dim": 512
130
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:227ea10a0cc5559adf0b5919bd69c0b84f21c9f1f8c429ae6758fc96e53d1abb
3
+ size 2498491717
preprocessor_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "processor_class": "Wav2Vec2ProcessorWithLM",
8
+ "return_attention_mask": true,
9
+ "sampling_rate": 16000
10
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adc7cf78c831f7d654a0cf296a355c46ab1e3a1571dc63523610d1c75c8ecae3
3
+ size 1266117365
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d101375d77124aeda5ffb16b4a5d6858b063ce818ad9fbdb5b5ce05ba49cfd2
3
+ size 14639
scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e8d407729cf398de31e94e18fff7f357437646319b163e49b1d38c97af5554b
3
+ size 557
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbbe56b494f428613e2af18e08ece3e17de14a2fa2e37347180bc3dcecca109d
3
+ size 627
trainer_state.json ADDED
@@ -0,0 +1,1351 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 9.94413407821229,
5
+ "global_step": 890,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.11,
12
+ "learning_rate": 9.887640449438202e-05,
13
+ "loss": 1.3963,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.11,
18
+ "eval_accuracy": 0.33006536960601807,
19
+ "eval_loss": 1.3515560626983643,
20
+ "eval_runtime": 318.9295,
21
+ "eval_samples_per_second": 0.959,
22
+ "eval_steps_per_second": 0.241,
23
+ "step": 10
24
+ },
25
+ {
26
+ "epoch": 0.22,
27
+ "learning_rate": 9.775280898876405e-05,
28
+ "loss": 1.2981,
29
+ "step": 20
30
+ },
31
+ {
32
+ "epoch": 0.22,
33
+ "eval_accuracy": 0.30392158031463623,
34
+ "eval_loss": 1.2864599227905273,
35
+ "eval_runtime": 533.6299,
36
+ "eval_samples_per_second": 0.573,
37
+ "eval_steps_per_second": 0.144,
38
+ "step": 20
39
+ },
40
+ {
41
+ "epoch": 0.34,
42
+ "learning_rate": 9.662921348314608e-05,
43
+ "loss": 1.2614,
44
+ "step": 30
45
+ },
46
+ {
47
+ "epoch": 0.34,
48
+ "eval_accuracy": 0.6535947918891907,
49
+ "eval_loss": 0.9824705719947815,
50
+ "eval_runtime": 348.7575,
51
+ "eval_samples_per_second": 0.877,
52
+ "eval_steps_per_second": 0.221,
53
+ "step": 30
54
+ },
55
+ {
56
+ "epoch": 0.45,
57
+ "learning_rate": 9.550561797752809e-05,
58
+ "loss": 0.9419,
59
+ "step": 40
60
+ },
61
+ {
62
+ "epoch": 0.45,
63
+ "eval_accuracy": 0.49346405267715454,
64
+ "eval_loss": 1.0978724956512451,
65
+ "eval_runtime": 312.9714,
66
+ "eval_samples_per_second": 0.978,
67
+ "eval_steps_per_second": 0.246,
68
+ "step": 40
69
+ },
70
+ {
71
+ "epoch": 0.56,
72
+ "learning_rate": 9.438202247191012e-05,
73
+ "loss": 0.9813,
74
+ "step": 50
75
+ },
76
+ {
77
+ "epoch": 0.56,
78
+ "eval_accuracy": 0.5196078419685364,
79
+ "eval_loss": 0.9674614667892456,
80
+ "eval_runtime": 312.3793,
81
+ "eval_samples_per_second": 0.98,
82
+ "eval_steps_per_second": 0.246,
83
+ "step": 50
84
+ },
85
+ {
86
+ "epoch": 0.67,
87
+ "learning_rate": 9.325842696629214e-05,
88
+ "loss": 0.7973,
89
+ "step": 60
90
+ },
91
+ {
92
+ "epoch": 0.67,
93
+ "eval_accuracy": 0.5947712659835815,
94
+ "eval_loss": 1.0033761262893677,
95
+ "eval_runtime": 316.8558,
96
+ "eval_samples_per_second": 0.966,
97
+ "eval_steps_per_second": 0.243,
98
+ "step": 60
99
+ },
100
+ {
101
+ "epoch": 0.78,
102
+ "learning_rate": 9.213483146067416e-05,
103
+ "loss": 0.9575,
104
+ "step": 70
105
+ },
106
+ {
107
+ "epoch": 0.78,
108
+ "eval_accuracy": 0.5882353186607361,
109
+ "eval_loss": 0.8489904403686523,
110
+ "eval_runtime": 535.4904,
111
+ "eval_samples_per_second": 0.571,
112
+ "eval_steps_per_second": 0.144,
113
+ "step": 70
114
+ },
115
+ {
116
+ "epoch": 0.89,
117
+ "learning_rate": 9.112359550561799e-05,
118
+ "loss": 0.8752,
119
+ "step": 80
120
+ },
121
+ {
122
+ "epoch": 0.89,
123
+ "eval_accuracy": 0.6895424723625183,
124
+ "eval_loss": 0.7460987567901611,
125
+ "eval_runtime": 597.5484,
126
+ "eval_samples_per_second": 0.512,
127
+ "eval_steps_per_second": 0.129,
128
+ "step": 80
129
+ },
130
+ {
131
+ "epoch": 1.01,
132
+ "learning_rate": 9e-05,
133
+ "loss": 0.719,
134
+ "step": 90
135
+ },
136
+ {
137
+ "epoch": 1.01,
138
+ "eval_accuracy": 0.7973856329917908,
139
+ "eval_loss": 0.5500715374946594,
140
+ "eval_runtime": 583.2832,
141
+ "eval_samples_per_second": 0.525,
142
+ "eval_steps_per_second": 0.132,
143
+ "step": 90
144
+ },
145
+ {
146
+ "epoch": 1.12,
147
+ "learning_rate": 8.887640449438202e-05,
148
+ "loss": 0.5311,
149
+ "step": 100
150
+ },
151
+ {
152
+ "epoch": 1.12,
153
+ "eval_accuracy": 0.7973856329917908,
154
+ "eval_loss": 0.5389693379402161,
155
+ "eval_runtime": 590.4987,
156
+ "eval_samples_per_second": 0.518,
157
+ "eval_steps_per_second": 0.13,
158
+ "step": 100
159
+ },
160
+ {
161
+ "epoch": 1.23,
162
+ "learning_rate": 8.775280898876405e-05,
163
+ "loss": 0.3922,
164
+ "step": 110
165
+ },
166
+ {
167
+ "epoch": 1.23,
168
+ "eval_accuracy": 0.843137264251709,
169
+ "eval_loss": 0.42480573058128357,
170
+ "eval_runtime": 597.2704,
171
+ "eval_samples_per_second": 0.512,
172
+ "eval_steps_per_second": 0.129,
173
+ "step": 110
174
+ },
175
+ {
176
+ "epoch": 1.34,
177
+ "learning_rate": 8.662921348314608e-05,
178
+ "loss": 0.3043,
179
+ "step": 120
180
+ },
181
+ {
182
+ "epoch": 1.34,
183
+ "eval_accuracy": 0.8496732115745544,
184
+ "eval_loss": 0.5262107253074646,
185
+ "eval_runtime": 618.0607,
186
+ "eval_samples_per_second": 0.495,
187
+ "eval_steps_per_second": 0.125,
188
+ "step": 120
189
+ },
190
+ {
191
+ "epoch": 1.45,
192
+ "learning_rate": 8.550561797752809e-05,
193
+ "loss": 0.7841,
194
+ "step": 130
195
+ },
196
+ {
197
+ "epoch": 1.45,
198
+ "eval_accuracy": 0.8202614188194275,
199
+ "eval_loss": 0.618194580078125,
200
+ "eval_runtime": 480.4521,
201
+ "eval_samples_per_second": 0.637,
202
+ "eval_steps_per_second": 0.16,
203
+ "step": 130
204
+ },
205
+ {
206
+ "epoch": 1.56,
207
+ "learning_rate": 8.438202247191012e-05,
208
+ "loss": 0.4881,
209
+ "step": 140
210
+ },
211
+ {
212
+ "epoch": 1.56,
213
+ "eval_accuracy": 0.8333333134651184,
214
+ "eval_loss": 0.4706672728061676,
215
+ "eval_runtime": 546.9776,
216
+ "eval_samples_per_second": 0.559,
217
+ "eval_steps_per_second": 0.141,
218
+ "step": 140
219
+ },
220
+ {
221
+ "epoch": 1.68,
222
+ "learning_rate": 8.325842696629214e-05,
223
+ "loss": 0.39,
224
+ "step": 150
225
+ },
226
+ {
227
+ "epoch": 1.68,
228
+ "eval_accuracy": 0.8594771027565002,
229
+ "eval_loss": 0.4261144995689392,
230
+ "eval_runtime": 314.3222,
231
+ "eval_samples_per_second": 0.974,
232
+ "eval_steps_per_second": 0.245,
233
+ "step": 150
234
+ },
235
+ {
236
+ "epoch": 1.79,
237
+ "learning_rate": 8.213483146067417e-05,
238
+ "loss": 0.4687,
239
+ "step": 160
240
+ },
241
+ {
242
+ "epoch": 1.79,
243
+ "eval_accuracy": 0.7745097875595093,
244
+ "eval_loss": 0.7588664293289185,
245
+ "eval_runtime": 316.9261,
246
+ "eval_samples_per_second": 0.966,
247
+ "eval_steps_per_second": 0.243,
248
+ "step": 160
249
+ },
250
+ {
251
+ "epoch": 1.9,
252
+ "learning_rate": 8.101123595505618e-05,
253
+ "loss": 0.4289,
254
+ "step": 170
255
+ },
256
+ {
257
+ "epoch": 1.9,
258
+ "eval_accuracy": 0.843137264251709,
259
+ "eval_loss": 0.4307919144630432,
260
+ "eval_runtime": 299.2563,
261
+ "eval_samples_per_second": 1.023,
262
+ "eval_steps_per_second": 0.257,
263
+ "step": 170
264
+ },
265
+ {
266
+ "epoch": 2.01,
267
+ "learning_rate": 7.988764044943821e-05,
268
+ "loss": 0.4127,
269
+ "step": 180
270
+ },
271
+ {
272
+ "epoch": 2.01,
273
+ "eval_accuracy": 0.9117646813392639,
274
+ "eval_loss": 0.30210039019584656,
275
+ "eval_runtime": 293.1021,
276
+ "eval_samples_per_second": 1.044,
277
+ "eval_steps_per_second": 0.263,
278
+ "step": 180
279
+ },
280
+ {
281
+ "epoch": 2.12,
282
+ "learning_rate": 7.876404494382022e-05,
283
+ "loss": 0.3205,
284
+ "step": 190
285
+ },
286
+ {
287
+ "epoch": 2.12,
288
+ "eval_accuracy": 0.9052287340164185,
289
+ "eval_loss": 0.3764496445655823,
290
+ "eval_runtime": 626.3698,
291
+ "eval_samples_per_second": 0.489,
292
+ "eval_steps_per_second": 0.123,
293
+ "step": 190
294
+ },
295
+ {
296
+ "epoch": 2.23,
297
+ "learning_rate": 7.764044943820225e-05,
298
+ "loss": 0.302,
299
+ "step": 200
300
+ },
301
+ {
302
+ "epoch": 2.23,
303
+ "eval_accuracy": 0.8169934749603271,
304
+ "eval_loss": 0.6414448618888855,
305
+ "eval_runtime": 406.1799,
306
+ "eval_samples_per_second": 0.753,
307
+ "eval_steps_per_second": 0.19,
308
+ "step": 200
309
+ },
310
+ {
311
+ "epoch": 2.35,
312
+ "learning_rate": 7.651685393258428e-05,
313
+ "loss": 0.2767,
314
+ "step": 210
315
+ },
316
+ {
317
+ "epoch": 2.35,
318
+ "eval_accuracy": 0.8856208920478821,
319
+ "eval_loss": 0.3875592350959778,
320
+ "eval_runtime": 358.8222,
321
+ "eval_samples_per_second": 0.853,
322
+ "eval_steps_per_second": 0.215,
323
+ "step": 210
324
+ },
325
+ {
326
+ "epoch": 2.46,
327
+ "learning_rate": 7.53932584269663e-05,
328
+ "loss": 0.4107,
329
+ "step": 220
330
+ },
331
+ {
332
+ "epoch": 2.46,
333
+ "eval_accuracy": 0.8104575276374817,
334
+ "eval_loss": 0.6241660118103027,
335
+ "eval_runtime": 301.8068,
336
+ "eval_samples_per_second": 1.014,
337
+ "eval_steps_per_second": 0.255,
338
+ "step": 220
339
+ },
340
+ {
341
+ "epoch": 2.57,
342
+ "learning_rate": 7.426966292134831e-05,
343
+ "loss": 0.4392,
344
+ "step": 230
345
+ },
346
+ {
347
+ "epoch": 2.57,
348
+ "eval_accuracy": 0.9183006286621094,
349
+ "eval_loss": 0.2545139491558075,
350
+ "eval_runtime": 315.8591,
351
+ "eval_samples_per_second": 0.969,
352
+ "eval_steps_per_second": 0.244,
353
+ "step": 230
354
+ },
355
+ {
356
+ "epoch": 2.68,
357
+ "learning_rate": 7.314606741573034e-05,
358
+ "loss": 0.2376,
359
+ "step": 240
360
+ },
361
+ {
362
+ "epoch": 2.68,
363
+ "eval_accuracy": 0.8496732115745544,
364
+ "eval_loss": 0.49588432908058167,
365
+ "eval_runtime": 304.5879,
366
+ "eval_samples_per_second": 1.005,
367
+ "eval_steps_per_second": 0.253,
368
+ "step": 240
369
+ },
370
+ {
371
+ "epoch": 2.79,
372
+ "learning_rate": 7.202247191011237e-05,
373
+ "loss": 0.4715,
374
+ "step": 250
375
+ },
376
+ {
377
+ "epoch": 2.79,
378
+ "eval_accuracy": 0.8235294222831726,
379
+ "eval_loss": 0.6127722859382629,
380
+ "eval_runtime": 296.1686,
381
+ "eval_samples_per_second": 1.033,
382
+ "eval_steps_per_second": 0.26,
383
+ "step": 250
384
+ },
385
+ {
386
+ "epoch": 2.91,
387
+ "learning_rate": 7.089887640449438e-05,
388
+ "loss": 0.2753,
389
+ "step": 260
390
+ },
391
+ {
392
+ "epoch": 2.91,
393
+ "eval_accuracy": 0.898692786693573,
394
+ "eval_loss": 0.33089637756347656,
395
+ "eval_runtime": 318.4459,
396
+ "eval_samples_per_second": 0.961,
397
+ "eval_steps_per_second": 0.242,
398
+ "step": 260
399
+ },
400
+ {
401
+ "epoch": 3.02,
402
+ "learning_rate": 6.97752808988764e-05,
403
+ "loss": 0.2919,
404
+ "step": 270
405
+ },
406
+ {
407
+ "epoch": 3.02,
408
+ "eval_accuracy": 0.8921568393707275,
409
+ "eval_loss": 0.41311776638031006,
410
+ "eval_runtime": 285.5443,
411
+ "eval_samples_per_second": 1.072,
412
+ "eval_steps_per_second": 0.27,
413
+ "step": 270
414
+ },
415
+ {
416
+ "epoch": 3.13,
417
+ "learning_rate": 6.865168539325843e-05,
418
+ "loss": 0.2222,
419
+ "step": 280
420
+ },
421
+ {
422
+ "epoch": 3.13,
423
+ "eval_accuracy": 0.898692786693573,
424
+ "eval_loss": 0.3918479084968567,
425
+ "eval_runtime": 297.6229,
426
+ "eval_samples_per_second": 1.028,
427
+ "eval_steps_per_second": 0.259,
428
+ "step": 280
429
+ },
430
+ {
431
+ "epoch": 3.24,
432
+ "learning_rate": 6.752808988764046e-05,
433
+ "loss": 0.0371,
434
+ "step": 290
435
+ },
436
+ {
437
+ "epoch": 3.24,
438
+ "eval_accuracy": 0.9183006286621094,
439
+ "eval_loss": 0.28783220052719116,
440
+ "eval_runtime": 293.7099,
441
+ "eval_samples_per_second": 1.042,
442
+ "eval_steps_per_second": 0.262,
443
+ "step": 290
444
+ },
445
+ {
446
+ "epoch": 3.35,
447
+ "learning_rate": 6.640449438202247e-05,
448
+ "loss": 0.0172,
449
+ "step": 300
450
+ },
451
+ {
452
+ "epoch": 3.35,
453
+ "eval_accuracy": 0.9215686321258545,
454
+ "eval_loss": 0.3087099492549896,
455
+ "eval_runtime": 317.6569,
456
+ "eval_samples_per_second": 0.963,
457
+ "eval_steps_per_second": 0.242,
458
+ "step": 300
459
+ },
460
+ {
461
+ "epoch": 3.46,
462
+ "learning_rate": 6.52808988764045e-05,
463
+ "loss": 0.1953,
464
+ "step": 310
465
+ },
466
+ {
467
+ "epoch": 3.46,
468
+ "eval_accuracy": 0.9183006286621094,
469
+ "eval_loss": 0.29489144682884216,
470
+ "eval_runtime": 293.4224,
471
+ "eval_samples_per_second": 1.043,
472
+ "eval_steps_per_second": 0.262,
473
+ "step": 310
474
+ },
475
+ {
476
+ "epoch": 3.58,
477
+ "learning_rate": 6.415730337078652e-05,
478
+ "loss": 0.2093,
479
+ "step": 320
480
+ },
481
+ {
482
+ "epoch": 3.58,
483
+ "eval_accuracy": 0.8921568393707275,
484
+ "eval_loss": 0.38903045654296875,
485
+ "eval_runtime": 313.0985,
486
+ "eval_samples_per_second": 0.977,
487
+ "eval_steps_per_second": 0.246,
488
+ "step": 320
489
+ },
490
+ {
491
+ "epoch": 3.69,
492
+ "learning_rate": 6.303370786516854e-05,
493
+ "loss": 0.1393,
494
+ "step": 330
495
+ },
496
+ {
497
+ "epoch": 3.69,
498
+ "eval_accuracy": 0.898692786693573,
499
+ "eval_loss": 0.34058284759521484,
500
+ "eval_runtime": 316.0484,
501
+ "eval_samples_per_second": 0.968,
502
+ "eval_steps_per_second": 0.244,
503
+ "step": 330
504
+ },
505
+ {
506
+ "epoch": 3.8,
507
+ "learning_rate": 6.191011235955056e-05,
508
+ "loss": 0.0532,
509
+ "step": 340
510
+ },
511
+ {
512
+ "epoch": 3.8,
513
+ "eval_accuracy": 0.9313725233078003,
514
+ "eval_loss": 0.38309353590011597,
515
+ "eval_runtime": 315.0427,
516
+ "eval_samples_per_second": 0.971,
517
+ "eval_steps_per_second": 0.244,
518
+ "step": 340
519
+ },
520
+ {
521
+ "epoch": 3.91,
522
+ "learning_rate": 6.078651685393258e-05,
523
+ "loss": 0.2061,
524
+ "step": 350
525
+ },
526
+ {
527
+ "epoch": 3.91,
528
+ "eval_accuracy": 0.8954248428344727,
529
+ "eval_loss": 0.43487662076950073,
530
+ "eval_runtime": 308.5388,
531
+ "eval_samples_per_second": 0.992,
532
+ "eval_steps_per_second": 0.25,
533
+ "step": 350
534
+ },
535
+ {
536
+ "epoch": 4.02,
537
+ "learning_rate": 5.96629213483146e-05,
538
+ "loss": 0.1755,
539
+ "step": 360
540
+ },
541
+ {
542
+ "epoch": 4.02,
543
+ "eval_accuracy": 0.8954248428344727,
544
+ "eval_loss": 0.4112664461135864,
545
+ "eval_runtime": 314.6158,
546
+ "eval_samples_per_second": 0.973,
547
+ "eval_steps_per_second": 0.245,
548
+ "step": 360
549
+ },
550
+ {
551
+ "epoch": 4.13,
552
+ "learning_rate": 5.853932584269663e-05,
553
+ "loss": 0.0155,
554
+ "step": 370
555
+ },
556
+ {
557
+ "epoch": 4.13,
558
+ "eval_accuracy": 0.9084967374801636,
559
+ "eval_loss": 0.34788793325424194,
560
+ "eval_runtime": 317.1579,
561
+ "eval_samples_per_second": 0.965,
562
+ "eval_steps_per_second": 0.243,
563
+ "step": 370
564
+ },
565
+ {
566
+ "epoch": 4.25,
567
+ "learning_rate": 5.7415730337078654e-05,
568
+ "loss": 0.1389,
569
+ "step": 380
570
+ },
571
+ {
572
+ "epoch": 4.25,
573
+ "eval_accuracy": 0.9248365759849548,
574
+ "eval_loss": 0.28591012954711914,
575
+ "eval_runtime": 323.1301,
576
+ "eval_samples_per_second": 0.947,
577
+ "eval_steps_per_second": 0.238,
578
+ "step": 380
579
+ },
580
+ {
581
+ "epoch": 4.36,
582
+ "learning_rate": 5.6292134831460676e-05,
583
+ "loss": 0.1102,
584
+ "step": 390
585
+ },
586
+ {
587
+ "epoch": 4.36,
588
+ "eval_accuracy": 0.9183006286621094,
589
+ "eval_loss": 0.2804703712463379,
590
+ "eval_runtime": 308.9869,
591
+ "eval_samples_per_second": 0.99,
592
+ "eval_steps_per_second": 0.249,
593
+ "step": 390
594
+ },
595
+ {
596
+ "epoch": 4.47,
597
+ "learning_rate": 5.516853932584269e-05,
598
+ "loss": 0.0447,
599
+ "step": 400
600
+ },
601
+ {
602
+ "epoch": 4.47,
603
+ "eval_accuracy": 0.9281045794487,
604
+ "eval_loss": 0.28759482502937317,
605
+ "eval_runtime": 325.9228,
606
+ "eval_samples_per_second": 0.939,
607
+ "eval_steps_per_second": 0.236,
608
+ "step": 400
609
+ },
610
+ {
611
+ "epoch": 4.58,
612
+ "learning_rate": 5.4044943820224726e-05,
613
+ "loss": 0.3047,
614
+ "step": 410
615
+ },
616
+ {
617
+ "epoch": 4.58,
618
+ "eval_accuracy": 0.9281045794487,
619
+ "eval_loss": 0.28950873017311096,
620
+ "eval_runtime": 302.1849,
621
+ "eval_samples_per_second": 1.013,
622
+ "eval_steps_per_second": 0.255,
623
+ "step": 410
624
+ },
625
+ {
626
+ "epoch": 4.69,
627
+ "learning_rate": 5.292134831460674e-05,
628
+ "loss": 0.2309,
629
+ "step": 420
630
+ },
631
+ {
632
+ "epoch": 4.69,
633
+ "eval_accuracy": 0.9313725233078003,
634
+ "eval_loss": 0.20166385173797607,
635
+ "eval_runtime": 313.1316,
636
+ "eval_samples_per_second": 0.977,
637
+ "eval_steps_per_second": 0.246,
638
+ "step": 420
639
+ },
640
+ {
641
+ "epoch": 4.8,
642
+ "learning_rate": 5.179775280898876e-05,
643
+ "loss": 0.2097,
644
+ "step": 430
645
+ },
646
+ {
647
+ "epoch": 4.8,
648
+ "eval_accuracy": 0.8692810535430908,
649
+ "eval_loss": 0.4348565936088562,
650
+ "eval_runtime": 328.5983,
651
+ "eval_samples_per_second": 0.931,
652
+ "eval_steps_per_second": 0.234,
653
+ "step": 430
654
+ },
655
+ {
656
+ "epoch": 4.92,
657
+ "learning_rate": 5.0674157303370785e-05,
658
+ "loss": 0.0094,
659
+ "step": 440
660
+ },
661
+ {
662
+ "epoch": 4.92,
663
+ "eval_accuracy": 0.898692786693573,
664
+ "eval_loss": 0.3308834135532379,
665
+ "eval_runtime": 261.8274,
666
+ "eval_samples_per_second": 1.169,
667
+ "eval_steps_per_second": 0.294,
668
+ "step": 440
669
+ },
670
+ {
671
+ "epoch": 5.03,
672
+ "learning_rate": 4.955056179775281e-05,
673
+ "loss": 0.0068,
674
+ "step": 450
675
+ },
676
+ {
677
+ "epoch": 5.03,
678
+ "eval_accuracy": 0.9379084706306458,
679
+ "eval_loss": 0.22690723836421967,
680
+ "eval_runtime": 321.9057,
681
+ "eval_samples_per_second": 0.951,
682
+ "eval_steps_per_second": 0.239,
683
+ "step": 450
684
+ },
685
+ {
686
+ "epoch": 5.14,
687
+ "learning_rate": 4.8426966292134836e-05,
688
+ "loss": 0.0428,
689
+ "step": 460
690
+ },
691
+ {
692
+ "epoch": 5.14,
693
+ "eval_accuracy": 0.9313725233078003,
694
+ "eval_loss": 0.24997933208942413,
695
+ "eval_runtime": 306.0175,
696
+ "eval_samples_per_second": 1.0,
697
+ "eval_steps_per_second": 0.252,
698
+ "step": 460
699
+ },
700
+ {
701
+ "epoch": 5.25,
702
+ "learning_rate": 4.730337078651685e-05,
703
+ "loss": 0.0555,
704
+ "step": 470
705
+ },
706
+ {
707
+ "epoch": 5.25,
708
+ "eval_accuracy": 0.8888888955116272,
709
+ "eval_loss": 0.4563826024532318,
710
+ "eval_runtime": 304.3479,
711
+ "eval_samples_per_second": 1.005,
712
+ "eval_steps_per_second": 0.253,
713
+ "step": 470
714
+ },
715
+ {
716
+ "epoch": 5.36,
717
+ "learning_rate": 4.617977528089888e-05,
718
+ "loss": 0.0928,
719
+ "step": 480
720
+ },
721
+ {
722
+ "epoch": 5.36,
723
+ "eval_accuracy": 0.915032684803009,
724
+ "eval_loss": 0.3516130745410919,
725
+ "eval_runtime": 320.668,
726
+ "eval_samples_per_second": 0.954,
727
+ "eval_steps_per_second": 0.24,
728
+ "step": 480
729
+ },
730
+ {
731
+ "epoch": 5.47,
732
+ "learning_rate": 4.50561797752809e-05,
733
+ "loss": 0.1947,
734
+ "step": 490
735
+ },
736
+ {
737
+ "epoch": 5.47,
738
+ "eval_accuracy": 0.9379084706306458,
739
+ "eval_loss": 0.24630288779735565,
740
+ "eval_runtime": 305.3109,
741
+ "eval_samples_per_second": 1.002,
742
+ "eval_steps_per_second": 0.252,
743
+ "step": 490
744
+ },
745
+ {
746
+ "epoch": 5.59,
747
+ "learning_rate": 4.393258426966292e-05,
748
+ "loss": 0.0934,
749
+ "step": 500
750
+ },
751
+ {
752
+ "epoch": 5.59,
753
+ "eval_accuracy": 0.9477124214172363,
754
+ "eval_loss": 0.18016140162944794,
755
+ "eval_runtime": 308.018,
756
+ "eval_samples_per_second": 0.993,
757
+ "eval_steps_per_second": 0.25,
758
+ "step": 500
759
+ },
760
+ {
761
+ "epoch": 5.7,
762
+ "learning_rate": 4.2808988764044945e-05,
763
+ "loss": 0.0035,
764
+ "step": 510
765
+ },
766
+ {
767
+ "epoch": 5.7,
768
+ "eval_accuracy": 0.9411764740943909,
769
+ "eval_loss": 0.22933033108711243,
770
+ "eval_runtime": 316.2253,
771
+ "eval_samples_per_second": 0.968,
772
+ "eval_steps_per_second": 0.243,
773
+ "step": 510
774
+ },
775
+ {
776
+ "epoch": 5.81,
777
+ "learning_rate": 4.168539325842697e-05,
778
+ "loss": 0.0038,
779
+ "step": 520
780
+ },
781
+ {
782
+ "epoch": 5.81,
783
+ "eval_accuracy": 0.9215686321258545,
784
+ "eval_loss": 0.2865773141384125,
785
+ "eval_runtime": 306.9042,
786
+ "eval_samples_per_second": 0.997,
787
+ "eval_steps_per_second": 0.251,
788
+ "step": 520
789
+ },
790
+ {
791
+ "epoch": 5.92,
792
+ "learning_rate": 4.056179775280899e-05,
793
+ "loss": 0.0027,
794
+ "step": 530
795
+ },
796
+ {
797
+ "epoch": 5.92,
798
+ "eval_accuracy": 0.915032684803009,
799
+ "eval_loss": 0.3221026659011841,
800
+ "eval_runtime": 308.5611,
801
+ "eval_samples_per_second": 0.992,
802
+ "eval_steps_per_second": 0.25,
803
+ "step": 530
804
+ },
805
+ {
806
+ "epoch": 6.03,
807
+ "learning_rate": 3.943820224719101e-05,
808
+ "loss": 0.0586,
809
+ "step": 540
810
+ },
811
+ {
812
+ "epoch": 6.03,
813
+ "eval_accuracy": 0.9215686321258545,
814
+ "eval_loss": 0.2714509665966034,
815
+ "eval_runtime": 307.936,
816
+ "eval_samples_per_second": 0.994,
817
+ "eval_steps_per_second": 0.25,
818
+ "step": 540
819
+ },
820
+ {
821
+ "epoch": 6.15,
822
+ "learning_rate": 3.831460674157303e-05,
823
+ "loss": 0.003,
824
+ "step": 550
825
+ },
826
+ {
827
+ "epoch": 6.15,
828
+ "eval_accuracy": 0.9117646813392639,
829
+ "eval_loss": 0.2935124635696411,
830
+ "eval_runtime": 322.5288,
831
+ "eval_samples_per_second": 0.949,
832
+ "eval_steps_per_second": 0.239,
833
+ "step": 550
834
+ },
835
+ {
836
+ "epoch": 6.26,
837
+ "learning_rate": 3.719101123595506e-05,
838
+ "loss": 0.0748,
839
+ "step": 560
840
+ },
841
+ {
842
+ "epoch": 6.26,
843
+ "eval_accuracy": 0.9379084706306458,
844
+ "eval_loss": 0.2554876506328583,
845
+ "eval_runtime": 310.2055,
846
+ "eval_samples_per_second": 0.986,
847
+ "eval_steps_per_second": 0.248,
848
+ "step": 560
849
+ },
850
+ {
851
+ "epoch": 6.37,
852
+ "learning_rate": 3.6179775280898874e-05,
853
+ "loss": 0.0273,
854
+ "step": 570
855
+ },
856
+ {
857
+ "epoch": 6.37,
858
+ "eval_accuracy": 0.9477124214172363,
859
+ "eval_loss": 0.26076748967170715,
860
+ "eval_runtime": 309.3501,
861
+ "eval_samples_per_second": 0.989,
862
+ "eval_steps_per_second": 0.249,
863
+ "step": 570
864
+ },
865
+ {
866
+ "epoch": 6.48,
867
+ "learning_rate": 3.50561797752809e-05,
868
+ "loss": 0.0021,
869
+ "step": 580
870
+ },
871
+ {
872
+ "epoch": 6.48,
873
+ "eval_accuracy": 0.9542483687400818,
874
+ "eval_loss": 0.2612459659576416,
875
+ "eval_runtime": 317.8944,
876
+ "eval_samples_per_second": 0.963,
877
+ "eval_steps_per_second": 0.242,
878
+ "step": 580
879
+ },
880
+ {
881
+ "epoch": 6.59,
882
+ "learning_rate": 3.393258426966292e-05,
883
+ "loss": 0.0042,
884
+ "step": 590
885
+ },
886
+ {
887
+ "epoch": 6.59,
888
+ "eval_accuracy": 0.9575163125991821,
889
+ "eval_loss": 0.24474120140075684,
890
+ "eval_runtime": 308.9228,
891
+ "eval_samples_per_second": 0.991,
892
+ "eval_steps_per_second": 0.249,
893
+ "step": 590
894
+ },
895
+ {
896
+ "epoch": 6.7,
897
+ "learning_rate": 3.2808988764044946e-05,
898
+ "loss": 0.0274,
899
+ "step": 600
900
+ },
901
+ {
902
+ "epoch": 6.7,
903
+ "eval_accuracy": 0.9542483687400818,
904
+ "eval_loss": 0.23039507865905762,
905
+ "eval_runtime": 311.9509,
906
+ "eval_samples_per_second": 0.981,
907
+ "eval_steps_per_second": 0.247,
908
+ "step": 600
909
+ },
910
+ {
911
+ "epoch": 6.82,
912
+ "learning_rate": 3.168539325842697e-05,
913
+ "loss": 0.0037,
914
+ "step": 610
915
+ },
916
+ {
917
+ "epoch": 6.82,
918
+ "eval_accuracy": 0.9542483687400818,
919
+ "eval_loss": 0.22053539752960205,
920
+ "eval_runtime": 315.7913,
921
+ "eval_samples_per_second": 0.969,
922
+ "eval_steps_per_second": 0.244,
923
+ "step": 610
924
+ },
925
+ {
926
+ "epoch": 6.93,
927
+ "learning_rate": 3.056179775280899e-05,
928
+ "loss": 0.0059,
929
+ "step": 620
930
+ },
931
+ {
932
+ "epoch": 6.93,
933
+ "eval_accuracy": 0.9379084706306458,
934
+ "eval_loss": 0.26862725615501404,
935
+ "eval_runtime": 321.9798,
936
+ "eval_samples_per_second": 0.95,
937
+ "eval_steps_per_second": 0.239,
938
+ "step": 620
939
+ },
940
+ {
941
+ "epoch": 7.04,
942
+ "learning_rate": 2.9438202247191012e-05,
943
+ "loss": 0.002,
944
+ "step": 630
945
+ },
946
+ {
947
+ "epoch": 7.04,
948
+ "eval_accuracy": 0.9379084706306458,
949
+ "eval_loss": 0.29074591398239136,
950
+ "eval_runtime": 311.6792,
951
+ "eval_samples_per_second": 0.982,
952
+ "eval_steps_per_second": 0.247,
953
+ "step": 630
954
+ },
955
+ {
956
+ "epoch": 7.15,
957
+ "learning_rate": 2.8314606741573037e-05,
958
+ "loss": 0.0214,
959
+ "step": 640
960
+ },
961
+ {
962
+ "epoch": 7.15,
963
+ "eval_accuracy": 0.9509803652763367,
964
+ "eval_loss": 0.217881441116333,
965
+ "eval_runtime": 312.0489,
966
+ "eval_samples_per_second": 0.981,
967
+ "eval_steps_per_second": 0.247,
968
+ "step": 640
969
+ },
970
+ {
971
+ "epoch": 7.26,
972
+ "learning_rate": 2.7191011235955055e-05,
973
+ "loss": 0.0011,
974
+ "step": 650
975
+ },
976
+ {
977
+ "epoch": 7.26,
978
+ "eval_accuracy": 0.9444444179534912,
979
+ "eval_loss": 0.24239015579223633,
980
+ "eval_runtime": 320.4623,
981
+ "eval_samples_per_second": 0.955,
982
+ "eval_steps_per_second": 0.24,
983
+ "step": 650
984
+ },
985
+ {
986
+ "epoch": 7.37,
987
+ "learning_rate": 2.606741573033708e-05,
988
+ "loss": 0.1222,
989
+ "step": 660
990
+ },
991
+ {
992
+ "epoch": 7.37,
993
+ "eval_accuracy": 0.9607843160629272,
994
+ "eval_loss": 0.22233766317367554,
995
+ "eval_runtime": 316.2672,
996
+ "eval_samples_per_second": 0.968,
997
+ "eval_steps_per_second": 0.243,
998
+ "step": 660
999
+ },
1000
+ {
1001
+ "epoch": 7.49,
1002
+ "learning_rate": 2.4943820224719103e-05,
1003
+ "loss": 0.0308,
1004
+ "step": 670
1005
+ },
1006
+ {
1007
+ "epoch": 7.49,
1008
+ "eval_accuracy": 0.9542483687400818,
1009
+ "eval_loss": 0.23289808630943298,
1010
+ "eval_runtime": 314.263,
1011
+ "eval_samples_per_second": 0.974,
1012
+ "eval_steps_per_second": 0.245,
1013
+ "step": 670
1014
+ },
1015
+ {
1016
+ "epoch": 7.6,
1017
+ "learning_rate": 2.3820224719101125e-05,
1018
+ "loss": 0.0047,
1019
+ "step": 680
1020
+ },
1021
+ {
1022
+ "epoch": 7.6,
1023
+ "eval_accuracy": 0.9444444179534912,
1024
+ "eval_loss": 0.2540358901023865,
1025
+ "eval_runtime": 314.1415,
1026
+ "eval_samples_per_second": 0.974,
1027
+ "eval_steps_per_second": 0.245,
1028
+ "step": 680
1029
+ },
1030
+ {
1031
+ "epoch": 7.71,
1032
+ "learning_rate": 2.2696629213483146e-05,
1033
+ "loss": 0.0033,
1034
+ "step": 690
1035
+ },
1036
+ {
1037
+ "epoch": 7.71,
1038
+ "eval_accuracy": 0.9379084706306458,
1039
+ "eval_loss": 0.26486942172050476,
1040
+ "eval_runtime": 313.7641,
1041
+ "eval_samples_per_second": 0.975,
1042
+ "eval_steps_per_second": 0.245,
1043
+ "step": 690
1044
+ },
1045
+ {
1046
+ "epoch": 7.82,
1047
+ "learning_rate": 2.157303370786517e-05,
1048
+ "loss": 0.0799,
1049
+ "step": 700
1050
+ },
1051
+ {
1052
+ "epoch": 7.82,
1053
+ "eval_accuracy": 0.9346405267715454,
1054
+ "eval_loss": 0.2804279029369354,
1055
+ "eval_runtime": 305.6654,
1056
+ "eval_samples_per_second": 1.001,
1057
+ "eval_steps_per_second": 0.252,
1058
+ "step": 700
1059
+ },
1060
+ {
1061
+ "epoch": 7.93,
1062
+ "learning_rate": 2.0449438202247194e-05,
1063
+ "loss": 0.0223,
1064
+ "step": 710
1065
+ },
1066
+ {
1067
+ "epoch": 7.93,
1068
+ "eval_accuracy": 0.9346405267715454,
1069
+ "eval_loss": 0.2961590886116028,
1070
+ "eval_runtime": 33.833,
1071
+ "eval_samples_per_second": 9.044,
1072
+ "eval_steps_per_second": 2.276,
1073
+ "step": 710
1074
+ },
1075
+ {
1076
+ "epoch": 8.04,
1077
+ "learning_rate": 1.9325842696629215e-05,
1078
+ "loss": 0.0065,
1079
+ "step": 720
1080
+ },
1081
+ {
1082
+ "epoch": 8.04,
1083
+ "eval_accuracy": 0.9313725233078003,
1084
+ "eval_loss": 0.3002856373786926,
1085
+ "eval_runtime": 28.9416,
1086
+ "eval_samples_per_second": 10.573,
1087
+ "eval_steps_per_second": 2.661,
1088
+ "step": 720
1089
+ },
1090
+ {
1091
+ "epoch": 8.16,
1092
+ "learning_rate": 1.8202247191011237e-05,
1093
+ "loss": 0.0032,
1094
+ "step": 730
1095
+ },
1096
+ {
1097
+ "epoch": 8.16,
1098
+ "eval_accuracy": 0.9215686321258545,
1099
+ "eval_loss": 0.33132508397102356,
1100
+ "eval_runtime": 28.9077,
1101
+ "eval_samples_per_second": 10.585,
1102
+ "eval_steps_per_second": 2.664,
1103
+ "step": 730
1104
+ },
1105
+ {
1106
+ "epoch": 8.27,
1107
+ "learning_rate": 1.707865168539326e-05,
1108
+ "loss": 0.0012,
1109
+ "step": 740
1110
+ },
1111
+ {
1112
+ "epoch": 8.27,
1113
+ "eval_accuracy": 0.9117646813392639,
1114
+ "eval_loss": 0.3802509605884552,
1115
+ "eval_runtime": 28.4035,
1116
+ "eval_samples_per_second": 10.773,
1117
+ "eval_steps_per_second": 2.711,
1118
+ "step": 740
1119
+ },
1120
+ {
1121
+ "epoch": 8.38,
1122
+ "learning_rate": 1.595505617977528e-05,
1123
+ "loss": 0.0022,
1124
+ "step": 750
1125
+ },
1126
+ {
1127
+ "epoch": 8.38,
1128
+ "eval_accuracy": 0.9183006286621094,
1129
+ "eval_loss": 0.3360930383205414,
1130
+ "eval_runtime": 28.4227,
1131
+ "eval_samples_per_second": 10.766,
1132
+ "eval_steps_per_second": 2.709,
1133
+ "step": 750
1134
+ },
1135
+ {
1136
+ "epoch": 8.49,
1137
+ "learning_rate": 1.4831460674157305e-05,
1138
+ "loss": 0.0012,
1139
+ "step": 760
1140
+ },
1141
+ {
1142
+ "epoch": 8.49,
1143
+ "eval_accuracy": 0.9346405267715454,
1144
+ "eval_loss": 0.29600241780281067,
1145
+ "eval_runtime": 300.6077,
1146
+ "eval_samples_per_second": 1.018,
1147
+ "eval_steps_per_second": 0.256,
1148
+ "step": 760
1149
+ },
1150
+ {
1151
+ "epoch": 8.6,
1152
+ "learning_rate": 1.3707865168539327e-05,
1153
+ "loss": 0.0011,
1154
+ "step": 770
1155
+ },
1156
+ {
1157
+ "epoch": 8.6,
1158
+ "eval_accuracy": 0.9346405267715454,
1159
+ "eval_loss": 0.2943996489048004,
1160
+ "eval_runtime": 312.1252,
1161
+ "eval_samples_per_second": 0.98,
1162
+ "eval_steps_per_second": 0.247,
1163
+ "step": 770
1164
+ },
1165
+ {
1166
+ "epoch": 8.72,
1167
+ "learning_rate": 1.258426966292135e-05,
1168
+ "loss": 0.0039,
1169
+ "step": 780
1170
+ },
1171
+ {
1172
+ "epoch": 8.72,
1173
+ "eval_accuracy": 0.9346405267715454,
1174
+ "eval_loss": 0.29687556624412537,
1175
+ "eval_runtime": 310.6326,
1176
+ "eval_samples_per_second": 0.985,
1177
+ "eval_steps_per_second": 0.248,
1178
+ "step": 780
1179
+ },
1180
+ {
1181
+ "epoch": 8.83,
1182
+ "learning_rate": 1.146067415730337e-05,
1183
+ "loss": 0.0011,
1184
+ "step": 790
1185
+ },
1186
+ {
1187
+ "epoch": 8.83,
1188
+ "eval_accuracy": 0.9281045794487,
1189
+ "eval_loss": 0.32291698455810547,
1190
+ "eval_runtime": 312.3321,
1191
+ "eval_samples_per_second": 0.98,
1192
+ "eval_steps_per_second": 0.247,
1193
+ "step": 790
1194
+ },
1195
+ {
1196
+ "epoch": 8.94,
1197
+ "learning_rate": 1.0337078651685394e-05,
1198
+ "loss": 0.0011,
1199
+ "step": 800
1200
+ },
1201
+ {
1202
+ "epoch": 8.94,
1203
+ "eval_accuracy": 0.9281045794487,
1204
+ "eval_loss": 0.3310171067714691,
1205
+ "eval_runtime": 309.2689,
1206
+ "eval_samples_per_second": 0.989,
1207
+ "eval_steps_per_second": 0.249,
1208
+ "step": 800
1209
+ },
1210
+ {
1211
+ "epoch": 9.05,
1212
+ "learning_rate": 9.213483146067416e-06,
1213
+ "loss": 0.0023,
1214
+ "step": 810
1215
+ },
1216
+ {
1217
+ "epoch": 9.05,
1218
+ "eval_accuracy": 0.9346405267715454,
1219
+ "eval_loss": 0.3102934658527374,
1220
+ "eval_runtime": 308.188,
1221
+ "eval_samples_per_second": 0.993,
1222
+ "eval_steps_per_second": 0.25,
1223
+ "step": 810
1224
+ },
1225
+ {
1226
+ "epoch": 9.16,
1227
+ "learning_rate": 8.089887640449438e-06,
1228
+ "loss": 0.0011,
1229
+ "step": 820
1230
+ },
1231
+ {
1232
+ "epoch": 9.16,
1233
+ "eval_accuracy": 0.9444444179534912,
1234
+ "eval_loss": 0.29238083958625793,
1235
+ "eval_runtime": 323.8949,
1236
+ "eval_samples_per_second": 0.945,
1237
+ "eval_steps_per_second": 0.238,
1238
+ "step": 820
1239
+ },
1240
+ {
1241
+ "epoch": 9.27,
1242
+ "learning_rate": 6.96629213483146e-06,
1243
+ "loss": 0.0015,
1244
+ "step": 830
1245
+ },
1246
+ {
1247
+ "epoch": 9.27,
1248
+ "eval_accuracy": 0.9509803652763367,
1249
+ "eval_loss": 0.282598614692688,
1250
+ "eval_runtime": 298.2173,
1251
+ "eval_samples_per_second": 1.026,
1252
+ "eval_steps_per_second": 0.258,
1253
+ "step": 830
1254
+ },
1255
+ {
1256
+ "epoch": 9.39,
1257
+ "learning_rate": 5.842696629213484e-06,
1258
+ "loss": 0.0015,
1259
+ "step": 840
1260
+ },
1261
+ {
1262
+ "epoch": 9.39,
1263
+ "eval_accuracy": 0.9477124214172363,
1264
+ "eval_loss": 0.2785097658634186,
1265
+ "eval_runtime": 310.1298,
1266
+ "eval_samples_per_second": 0.987,
1267
+ "eval_steps_per_second": 0.248,
1268
+ "step": 840
1269
+ },
1270
+ {
1271
+ "epoch": 9.5,
1272
+ "learning_rate": 4.719101123595506e-06,
1273
+ "loss": 0.0011,
1274
+ "step": 850
1275
+ },
1276
+ {
1277
+ "epoch": 9.5,
1278
+ "eval_accuracy": 0.9477124214172363,
1279
+ "eval_loss": 0.2726196050643921,
1280
+ "eval_runtime": 315.4917,
1281
+ "eval_samples_per_second": 0.97,
1282
+ "eval_steps_per_second": 0.244,
1283
+ "step": 850
1284
+ },
1285
+ {
1286
+ "epoch": 9.61,
1287
+ "learning_rate": 3.5955056179775286e-06,
1288
+ "loss": 0.0297,
1289
+ "step": 860
1290
+ },
1291
+ {
1292
+ "epoch": 9.61,
1293
+ "eval_accuracy": 0.9509803652763367,
1294
+ "eval_loss": 0.26638907194137573,
1295
+ "eval_runtime": 299.8251,
1296
+ "eval_samples_per_second": 1.021,
1297
+ "eval_steps_per_second": 0.257,
1298
+ "step": 860
1299
+ },
1300
+ {
1301
+ "epoch": 9.72,
1302
+ "learning_rate": 2.4719101123595505e-06,
1303
+ "loss": 0.0045,
1304
+ "step": 870
1305
+ },
1306
+ {
1307
+ "epoch": 9.72,
1308
+ "eval_accuracy": 0.9509803652763367,
1309
+ "eval_loss": 0.26572802662849426,
1310
+ "eval_runtime": 300.0469,
1311
+ "eval_samples_per_second": 1.02,
1312
+ "eval_steps_per_second": 0.257,
1313
+ "step": 870
1314
+ },
1315
+ {
1316
+ "epoch": 9.83,
1317
+ "learning_rate": 1.3483146067415732e-06,
1318
+ "loss": 0.0016,
1319
+ "step": 880
1320
+ },
1321
+ {
1322
+ "epoch": 9.83,
1323
+ "eval_accuracy": 0.9477124214172363,
1324
+ "eval_loss": 0.2656039297580719,
1325
+ "eval_runtime": 305.8484,
1326
+ "eval_samples_per_second": 1.0,
1327
+ "eval_steps_per_second": 0.252,
1328
+ "step": 880
1329
+ },
1330
+ {
1331
+ "epoch": 9.94,
1332
+ "learning_rate": 2.2471910112359554e-07,
1333
+ "loss": 0.0008,
1334
+ "step": 890
1335
+ },
1336
+ {
1337
+ "epoch": 9.94,
1338
+ "eval_accuracy": 0.9477124214172363,
1339
+ "eval_loss": 0.26553988456726074,
1340
+ "eval_runtime": 335.2081,
1341
+ "eval_samples_per_second": 0.913,
1342
+ "eval_steps_per_second": 0.23,
1343
+ "step": 890
1344
+ }
1345
+ ],
1346
+ "max_steps": 890,
1347
+ "num_train_epochs": 10,
1348
+ "total_flos": 1.51047400011648e+18,
1349
+ "trial_name": null,
1350
+ "trial_params": null
1351
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f71df8ecc6bb75ee5b159921480032d2ca70c2621fe6853ca66f21cd55230014
3
+ size 3963