Xiyin02 commited on
Commit
93707de
1 Parent(s): 9f49481

Upload 8 files

Browse files
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 96.20253164556962,
3
+ "eval_accuracy": 0.8746031746031746,
4
+ "eval_loss": 0.5267017483711243,
5
+ "eval_runtime": 10.7929,
6
+ "eval_samples_per_second": 116.743,
7
+ "eval_steps_per_second": 0.927,
8
+ "total_flos": 7.515490775048022e+19,
9
+ "train_loss": 0.33647052476280614,
10
+ "train_runtime": 20573.1873,
11
+ "train_samples_per_second": 48.996,
12
+ "train_steps_per_second": 0.092
13
+ }
config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/vit-base-patch16-224",
3
+ "architectures": [
4
+ "ViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "sitting",
13
+ "1": "using_laptop",
14
+ "2": "hugging",
15
+ "3": "sleeping",
16
+ "4": "drinking",
17
+ "5": "clapping",
18
+ "6": "dancing",
19
+ "7": "cycling",
20
+ "8": "calling",
21
+ "9": "laughing",
22
+ "10": "eating",
23
+ "11": "fighting",
24
+ "12": "listening_to_music",
25
+ "13": "running",
26
+ "14": "texting"
27
+ },
28
+ "image_size": 224,
29
+ "initializer_range": 0.02,
30
+ "intermediate_size": 3072,
31
+ "label2id": {
32
+ "calling": 8,
33
+ "clapping": 5,
34
+ "cycling": 7,
35
+ "dancing": 6,
36
+ "drinking": 4,
37
+ "eating": 10,
38
+ "fighting": 11,
39
+ "hugging": 2,
40
+ "laughing": 9,
41
+ "listening_to_music": 12,
42
+ "running": 13,
43
+ "sitting": 0,
44
+ "sleeping": 3,
45
+ "texting": 14,
46
+ "using_laptop": 1
47
+ },
48
+ "layer_norm_eps": 1e-12,
49
+ "model_type": "vit",
50
+ "num_attention_heads": 12,
51
+ "num_channels": 3,
52
+ "num_hidden_layers": 12,
53
+ "patch_size": 16,
54
+ "problem_type": "single_label_classification",
55
+ "qkv_bias": true,
56
+ "torch_dtype": "float32",
57
+ "transformers_version": "4.41.2"
58
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 96.20253164556962,
3
+ "eval_accuracy": 0.8746031746031746,
4
+ "eval_loss": 0.5267017483711243,
5
+ "eval_runtime": 10.7929,
6
+ "eval_samples_per_second": 116.743,
7
+ "eval_steps_per_second": 0.927
8
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff64402a55469a315d1e1c5a2136d2f36fa2972e9e72b454371736d9368e64da
3
+ size 343263964
preprocessor_config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_valid_processor_keys": [
3
+ "images",
4
+ "do_resize",
5
+ "size",
6
+ "resample",
7
+ "do_rescale",
8
+ "rescale_factor",
9
+ "do_normalize",
10
+ "image_mean",
11
+ "image_std",
12
+ "return_tensors",
13
+ "data_format",
14
+ "input_data_format"
15
+ ],
16
+ "do_normalize": true,
17
+ "do_rescale": true,
18
+ "do_resize": true,
19
+ "image_mean": [
20
+ 0.5,
21
+ 0.5,
22
+ 0.5
23
+ ],
24
+ "image_processor_type": "ViTImageProcessor",
25
+ "image_std": [
26
+ 0.5,
27
+ 0.5,
28
+ 0.5
29
+ ],
30
+ "resample": 2,
31
+ "rescale_factor": 0.00392156862745098,
32
+ "size": {
33
+ "height": 224,
34
+ "width": 224
35
+ }
36
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 96.20253164556962,
3
+ "total_flos": 7.515490775048022e+19,
4
+ "train_loss": 0.33647052476280614,
5
+ "train_runtime": 20573.1873,
6
+ "train_samples_per_second": 48.996,
7
+ "train_steps_per_second": 0.092
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,1594 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8746031746031746,
3
+ "best_model_checkpoint": "CP2_HAR_vit-base-patch16-224/checkpoint-1382",
4
+ "epoch": 96.20253164556962,
5
+ "eval_steps": 500,
6
+ "global_step": 1900,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.9620253164556962,
13
+ "grad_norm": 2.8217339515686035,
14
+ "learning_rate": 5e-06,
15
+ "loss": 2.8429,
16
+ "step": 19
17
+ },
18
+ {
19
+ "epoch": 0.9620253164556962,
20
+ "eval_accuracy": 0.16904761904761906,
21
+ "eval_loss": 2.6474363803863525,
22
+ "eval_runtime": 10.7675,
23
+ "eval_samples_per_second": 117.018,
24
+ "eval_steps_per_second": 0.929,
25
+ "step": 19
26
+ },
27
+ {
28
+ "epoch": 1.9746835443037973,
29
+ "grad_norm": 2.534130334854126,
30
+ "learning_rate": 1.0263157894736843e-05,
31
+ "loss": 2.5152,
32
+ "step": 39
33
+ },
34
+ {
35
+ "epoch": 1.9746835443037973,
36
+ "eval_accuracy": 0.31587301587301586,
37
+ "eval_loss": 2.342618227005005,
38
+ "eval_runtime": 10.7708,
39
+ "eval_samples_per_second": 116.983,
40
+ "eval_steps_per_second": 0.928,
41
+ "step": 39
42
+ },
43
+ {
44
+ "epoch": 2.9873417721518987,
45
+ "grad_norm": 2.3139591217041016,
46
+ "learning_rate": 1.5526315789473686e-05,
47
+ "loss": 2.1548,
48
+ "step": 59
49
+ },
50
+ {
51
+ "epoch": 2.9873417721518987,
52
+ "eval_accuracy": 0.5626984126984127,
53
+ "eval_loss": 1.8545457124710083,
54
+ "eval_runtime": 10.8474,
55
+ "eval_samples_per_second": 116.157,
56
+ "eval_steps_per_second": 0.922,
57
+ "step": 59
58
+ },
59
+ {
60
+ "epoch": 4.0,
61
+ "grad_norm": 1.9620369672775269,
62
+ "learning_rate": 2.078947368421053e-05,
63
+ "loss": 1.6569,
64
+ "step": 79
65
+ },
66
+ {
67
+ "epoch": 4.0,
68
+ "eval_accuracy": 0.7261904761904762,
69
+ "eval_loss": 1.2644377946853638,
70
+ "eval_runtime": 10.7977,
71
+ "eval_samples_per_second": 116.692,
72
+ "eval_steps_per_second": 0.926,
73
+ "step": 79
74
+ },
75
+ {
76
+ "epoch": 4.962025316455696,
77
+ "grad_norm": 1.4392253160476685,
78
+ "learning_rate": 2.578947368421053e-05,
79
+ "loss": 1.2393,
80
+ "step": 98
81
+ },
82
+ {
83
+ "epoch": 4.962025316455696,
84
+ "eval_accuracy": 0.7714285714285715,
85
+ "eval_loss": 0.8716472387313843,
86
+ "eval_runtime": 10.7696,
87
+ "eval_samples_per_second": 116.996,
88
+ "eval_steps_per_second": 0.929,
89
+ "step": 98
90
+ },
91
+ {
92
+ "epoch": 5.974683544303797,
93
+ "grad_norm": 1.3771088123321533,
94
+ "learning_rate": 3.105263157894737e-05,
95
+ "loss": 0.8982,
96
+ "step": 118
97
+ },
98
+ {
99
+ "epoch": 5.974683544303797,
100
+ "eval_accuracy": 0.8150793650793651,
101
+ "eval_loss": 0.6652108430862427,
102
+ "eval_runtime": 10.7233,
103
+ "eval_samples_per_second": 117.501,
104
+ "eval_steps_per_second": 0.933,
105
+ "step": 118
106
+ },
107
+ {
108
+ "epoch": 6.987341772151899,
109
+ "grad_norm": 1.6404207944869995,
110
+ "learning_rate": 3.6315789473684214e-05,
111
+ "loss": 0.7694,
112
+ "step": 138
113
+ },
114
+ {
115
+ "epoch": 6.987341772151899,
116
+ "eval_accuracy": 0.830952380952381,
117
+ "eval_loss": 0.596939206123352,
118
+ "eval_runtime": 10.7687,
119
+ "eval_samples_per_second": 117.006,
120
+ "eval_steps_per_second": 0.929,
121
+ "step": 138
122
+ },
123
+ {
124
+ "epoch": 8.0,
125
+ "grad_norm": 1.4968894720077515,
126
+ "learning_rate": 4.157894736842106e-05,
127
+ "loss": 0.6819,
128
+ "step": 158
129
+ },
130
+ {
131
+ "epoch": 8.0,
132
+ "eval_accuracy": 0.8396825396825397,
133
+ "eval_loss": 0.5484516620635986,
134
+ "eval_runtime": 10.8135,
135
+ "eval_samples_per_second": 116.521,
136
+ "eval_steps_per_second": 0.925,
137
+ "step": 158
138
+ },
139
+ {
140
+ "epoch": 8.962025316455696,
141
+ "grad_norm": 1.414362907409668,
142
+ "learning_rate": 4.657894736842106e-05,
143
+ "loss": 0.6628,
144
+ "step": 177
145
+ },
146
+ {
147
+ "epoch": 8.962025316455696,
148
+ "eval_accuracy": 0.8476190476190476,
149
+ "eval_loss": 0.5053515434265137,
150
+ "eval_runtime": 10.7521,
151
+ "eval_samples_per_second": 117.186,
152
+ "eval_steps_per_second": 0.93,
153
+ "step": 177
154
+ },
155
+ {
156
+ "epoch": 9.974683544303797,
157
+ "grad_norm": 1.5052249431610107,
158
+ "learning_rate": 4.97953216374269e-05,
159
+ "loss": 0.5759,
160
+ "step": 197
161
+ },
162
+ {
163
+ "epoch": 9.974683544303797,
164
+ "eval_accuracy": 0.8476190476190476,
165
+ "eval_loss": 0.5064724683761597,
166
+ "eval_runtime": 10.8275,
167
+ "eval_samples_per_second": 116.37,
168
+ "eval_steps_per_second": 0.924,
169
+ "step": 197
170
+ },
171
+ {
172
+ "epoch": 10.987341772151899,
173
+ "grad_norm": 1.4250831604003906,
174
+ "learning_rate": 4.921052631578947e-05,
175
+ "loss": 0.5385,
176
+ "step": 217
177
+ },
178
+ {
179
+ "epoch": 10.987341772151899,
180
+ "eval_accuracy": 0.8420634920634921,
181
+ "eval_loss": 0.4821438789367676,
182
+ "eval_runtime": 10.8099,
183
+ "eval_samples_per_second": 116.56,
184
+ "eval_steps_per_second": 0.925,
185
+ "step": 217
186
+ },
187
+ {
188
+ "epoch": 12.0,
189
+ "grad_norm": 1.3822650909423828,
190
+ "learning_rate": 4.8625730994152046e-05,
191
+ "loss": 0.5022,
192
+ "step": 237
193
+ },
194
+ {
195
+ "epoch": 12.0,
196
+ "eval_accuracy": 0.8507936507936508,
197
+ "eval_loss": 0.47235107421875,
198
+ "eval_runtime": 10.7944,
199
+ "eval_samples_per_second": 116.727,
200
+ "eval_steps_per_second": 0.926,
201
+ "step": 237
202
+ },
203
+ {
204
+ "epoch": 12.962025316455696,
205
+ "grad_norm": 1.543864369392395,
206
+ "learning_rate": 4.807017543859649e-05,
207
+ "loss": 0.4841,
208
+ "step": 256
209
+ },
210
+ {
211
+ "epoch": 12.962025316455696,
212
+ "eval_accuracy": 0.8587301587301587,
213
+ "eval_loss": 0.48088887333869934,
214
+ "eval_runtime": 10.7741,
215
+ "eval_samples_per_second": 116.947,
216
+ "eval_steps_per_second": 0.928,
217
+ "step": 256
218
+ },
219
+ {
220
+ "epoch": 13.974683544303797,
221
+ "grad_norm": 1.4722260236740112,
222
+ "learning_rate": 4.7485380116959065e-05,
223
+ "loss": 0.4543,
224
+ "step": 276
225
+ },
226
+ {
227
+ "epoch": 13.974683544303797,
228
+ "eval_accuracy": 0.861904761904762,
229
+ "eval_loss": 0.4476229250431061,
230
+ "eval_runtime": 10.73,
231
+ "eval_samples_per_second": 117.428,
232
+ "eval_steps_per_second": 0.932,
233
+ "step": 276
234
+ },
235
+ {
236
+ "epoch": 14.987341772151899,
237
+ "grad_norm": 1.5065507888793945,
238
+ "learning_rate": 4.690058479532164e-05,
239
+ "loss": 0.4356,
240
+ "step": 296
241
+ },
242
+ {
243
+ "epoch": 14.987341772151899,
244
+ "eval_accuracy": 0.8579365079365079,
245
+ "eval_loss": 0.47357481718063354,
246
+ "eval_runtime": 10.7482,
247
+ "eval_samples_per_second": 117.229,
248
+ "eval_steps_per_second": 0.93,
249
+ "step": 296
250
+ },
251
+ {
252
+ "epoch": 16.0,
253
+ "grad_norm": 1.6331088542938232,
254
+ "learning_rate": 4.6315789473684214e-05,
255
+ "loss": 0.4021,
256
+ "step": 316
257
+ },
258
+ {
259
+ "epoch": 16.0,
260
+ "eval_accuracy": 0.8587301587301587,
261
+ "eval_loss": 0.46398431062698364,
262
+ "eval_runtime": 10.7745,
263
+ "eval_samples_per_second": 116.943,
264
+ "eval_steps_per_second": 0.928,
265
+ "step": 316
266
+ },
267
+ {
268
+ "epoch": 16.962025316455698,
269
+ "grad_norm": 1.7429494857788086,
270
+ "learning_rate": 4.576023391812866e-05,
271
+ "loss": 0.4073,
272
+ "step": 335
273
+ },
274
+ {
275
+ "epoch": 16.962025316455698,
276
+ "eval_accuracy": 0.8579365079365079,
277
+ "eval_loss": 0.4629625976085663,
278
+ "eval_runtime": 10.7423,
279
+ "eval_samples_per_second": 117.293,
280
+ "eval_steps_per_second": 0.931,
281
+ "step": 335
282
+ },
283
+ {
284
+ "epoch": 17.974683544303797,
285
+ "grad_norm": 1.3264607191085815,
286
+ "learning_rate": 4.517543859649123e-05,
287
+ "loss": 0.3782,
288
+ "step": 355
289
+ },
290
+ {
291
+ "epoch": 17.974683544303797,
292
+ "eval_accuracy": 0.8595238095238096,
293
+ "eval_loss": 0.4655977785587311,
294
+ "eval_runtime": 10.8299,
295
+ "eval_samples_per_second": 116.344,
296
+ "eval_steps_per_second": 0.923,
297
+ "step": 355
298
+ },
299
+ {
300
+ "epoch": 18.9873417721519,
301
+ "grad_norm": 1.481920599937439,
302
+ "learning_rate": 4.4590643274853806e-05,
303
+ "loss": 0.3617,
304
+ "step": 375
305
+ },
306
+ {
307
+ "epoch": 18.9873417721519,
308
+ "eval_accuracy": 0.861904761904762,
309
+ "eval_loss": 0.4484713077545166,
310
+ "eval_runtime": 10.749,
311
+ "eval_samples_per_second": 117.221,
312
+ "eval_steps_per_second": 0.93,
313
+ "step": 375
314
+ },
315
+ {
316
+ "epoch": 20.0,
317
+ "grad_norm": 1.3743647336959839,
318
+ "learning_rate": 4.400584795321638e-05,
319
+ "loss": 0.3448,
320
+ "step": 395
321
+ },
322
+ {
323
+ "epoch": 20.0,
324
+ "eval_accuracy": 0.861904761904762,
325
+ "eval_loss": 0.4735279381275177,
326
+ "eval_runtime": 10.7186,
327
+ "eval_samples_per_second": 117.553,
328
+ "eval_steps_per_second": 0.933,
329
+ "step": 395
330
+ },
331
+ {
332
+ "epoch": 20.962025316455698,
333
+ "grad_norm": 1.3664813041687012,
334
+ "learning_rate": 4.345029239766082e-05,
335
+ "loss": 0.3549,
336
+ "step": 414
337
+ },
338
+ {
339
+ "epoch": 20.962025316455698,
340
+ "eval_accuracy": 0.8571428571428571,
341
+ "eval_loss": 0.4780659079551697,
342
+ "eval_runtime": 10.7555,
343
+ "eval_samples_per_second": 117.149,
344
+ "eval_steps_per_second": 0.93,
345
+ "step": 414
346
+ },
347
+ {
348
+ "epoch": 21.974683544303797,
349
+ "grad_norm": 1.4020764827728271,
350
+ "learning_rate": 4.286549707602339e-05,
351
+ "loss": 0.3195,
352
+ "step": 434
353
+ },
354
+ {
355
+ "epoch": 21.974683544303797,
356
+ "eval_accuracy": 0.861904761904762,
357
+ "eval_loss": 0.4818320572376251,
358
+ "eval_runtime": 10.6974,
359
+ "eval_samples_per_second": 117.785,
360
+ "eval_steps_per_second": 0.935,
361
+ "step": 434
362
+ },
363
+ {
364
+ "epoch": 22.9873417721519,
365
+ "grad_norm": 1.2878130674362183,
366
+ "learning_rate": 4.228070175438597e-05,
367
+ "loss": 0.3219,
368
+ "step": 454
369
+ },
370
+ {
371
+ "epoch": 22.9873417721519,
372
+ "eval_accuracy": 0.8650793650793651,
373
+ "eval_loss": 0.47401970624923706,
374
+ "eval_runtime": 10.7479,
375
+ "eval_samples_per_second": 117.232,
376
+ "eval_steps_per_second": 0.93,
377
+ "step": 454
378
+ },
379
+ {
380
+ "epoch": 24.0,
381
+ "grad_norm": 1.5816872119903564,
382
+ "learning_rate": 4.169590643274854e-05,
383
+ "loss": 0.2966,
384
+ "step": 474
385
+ },
386
+ {
387
+ "epoch": 24.0,
388
+ "eval_accuracy": 0.8642857142857143,
389
+ "eval_loss": 0.4857538044452667,
390
+ "eval_runtime": 10.884,
391
+ "eval_samples_per_second": 115.766,
392
+ "eval_steps_per_second": 0.919,
393
+ "step": 474
394
+ },
395
+ {
396
+ "epoch": 24.962025316455698,
397
+ "grad_norm": 1.4161866903305054,
398
+ "learning_rate": 4.1140350877192985e-05,
399
+ "loss": 0.322,
400
+ "step": 493
401
+ },
402
+ {
403
+ "epoch": 24.962025316455698,
404
+ "eval_accuracy": 0.8579365079365079,
405
+ "eval_loss": 0.4993390738964081,
406
+ "eval_runtime": 10.7563,
407
+ "eval_samples_per_second": 117.141,
408
+ "eval_steps_per_second": 0.93,
409
+ "step": 493
410
+ },
411
+ {
412
+ "epoch": 25.974683544303797,
413
+ "grad_norm": 8.147224426269531,
414
+ "learning_rate": 4.055555555555556e-05,
415
+ "loss": 0.2806,
416
+ "step": 513
417
+ },
418
+ {
419
+ "epoch": 25.974683544303797,
420
+ "eval_accuracy": 0.8650793650793651,
421
+ "eval_loss": 0.4862901568412781,
422
+ "eval_runtime": 10.7246,
423
+ "eval_samples_per_second": 117.487,
424
+ "eval_steps_per_second": 0.932,
425
+ "step": 513
426
+ },
427
+ {
428
+ "epoch": 26.9873417721519,
429
+ "grad_norm": 1.3954640626907349,
430
+ "learning_rate": 3.9970760233918134e-05,
431
+ "loss": 0.2696,
432
+ "step": 533
433
+ },
434
+ {
435
+ "epoch": 26.9873417721519,
436
+ "eval_accuracy": 0.8595238095238096,
437
+ "eval_loss": 0.5064178705215454,
438
+ "eval_runtime": 10.7667,
439
+ "eval_samples_per_second": 117.027,
440
+ "eval_steps_per_second": 0.929,
441
+ "step": 533
442
+ },
443
+ {
444
+ "epoch": 28.0,
445
+ "grad_norm": 1.5532612800598145,
446
+ "learning_rate": 3.93859649122807e-05,
447
+ "loss": 0.2709,
448
+ "step": 553
449
+ },
450
+ {
451
+ "epoch": 28.0,
452
+ "eval_accuracy": 0.8674603174603175,
453
+ "eval_loss": 0.4656953811645508,
454
+ "eval_runtime": 10.8334,
455
+ "eval_samples_per_second": 116.307,
456
+ "eval_steps_per_second": 0.923,
457
+ "step": 553
458
+ },
459
+ {
460
+ "epoch": 28.962025316455698,
461
+ "grad_norm": 1.5748584270477295,
462
+ "learning_rate": 3.883040935672515e-05,
463
+ "loss": 0.2702,
464
+ "step": 572
465
+ },
466
+ {
467
+ "epoch": 28.962025316455698,
468
+ "eval_accuracy": 0.8571428571428571,
469
+ "eval_loss": 0.4933919310569763,
470
+ "eval_runtime": 10.7751,
471
+ "eval_samples_per_second": 116.936,
472
+ "eval_steps_per_second": 0.928,
473
+ "step": 572
474
+ },
475
+ {
476
+ "epoch": 29.974683544303797,
477
+ "grad_norm": 1.3018436431884766,
478
+ "learning_rate": 3.824561403508773e-05,
479
+ "loss": 0.2628,
480
+ "step": 592
481
+ },
482
+ {
483
+ "epoch": 29.974683544303797,
484
+ "eval_accuracy": 0.8555555555555555,
485
+ "eval_loss": 0.4940374493598938,
486
+ "eval_runtime": 10.7573,
487
+ "eval_samples_per_second": 117.13,
488
+ "eval_steps_per_second": 0.93,
489
+ "step": 592
490
+ },
491
+ {
492
+ "epoch": 30.9873417721519,
493
+ "grad_norm": 1.811011791229248,
494
+ "learning_rate": 3.7660818713450294e-05,
495
+ "loss": 0.2543,
496
+ "step": 612
497
+ },
498
+ {
499
+ "epoch": 30.9873417721519,
500
+ "eval_accuracy": 0.8642857142857143,
501
+ "eval_loss": 0.48308396339416504,
502
+ "eval_runtime": 10.8262,
503
+ "eval_samples_per_second": 116.384,
504
+ "eval_steps_per_second": 0.924,
505
+ "step": 612
506
+ },
507
+ {
508
+ "epoch": 32.0,
509
+ "grad_norm": 1.4332562685012817,
510
+ "learning_rate": 3.707602339181287e-05,
511
+ "loss": 0.2427,
512
+ "step": 632
513
+ },
514
+ {
515
+ "epoch": 32.0,
516
+ "eval_accuracy": 0.861904761904762,
517
+ "eval_loss": 0.4981466233730316,
518
+ "eval_runtime": 10.8291,
519
+ "eval_samples_per_second": 116.353,
520
+ "eval_steps_per_second": 0.923,
521
+ "step": 632
522
+ },
523
+ {
524
+ "epoch": 32.962025316455694,
525
+ "grad_norm": 1.5480865240097046,
526
+ "learning_rate": 3.652046783625731e-05,
527
+ "loss": 0.2659,
528
+ "step": 651
529
+ },
530
+ {
531
+ "epoch": 32.962025316455694,
532
+ "eval_accuracy": 0.8642857142857143,
533
+ "eval_loss": 0.5094291567802429,
534
+ "eval_runtime": 10.7442,
535
+ "eval_samples_per_second": 117.272,
536
+ "eval_steps_per_second": 0.931,
537
+ "step": 651
538
+ },
539
+ {
540
+ "epoch": 33.9746835443038,
541
+ "grad_norm": 1.3587052822113037,
542
+ "learning_rate": 3.593567251461988e-05,
543
+ "loss": 0.2398,
544
+ "step": 671
545
+ },
546
+ {
547
+ "epoch": 33.9746835443038,
548
+ "eval_accuracy": 0.8658730158730159,
549
+ "eval_loss": 0.5013704895973206,
550
+ "eval_runtime": 10.759,
551
+ "eval_samples_per_second": 117.112,
552
+ "eval_steps_per_second": 0.929,
553
+ "step": 671
554
+ },
555
+ {
556
+ "epoch": 34.9873417721519,
557
+ "grad_norm": 1.3286776542663574,
558
+ "learning_rate": 3.5350877192982455e-05,
559
+ "loss": 0.227,
560
+ "step": 691
561
+ },
562
+ {
563
+ "epoch": 34.9873417721519,
564
+ "eval_accuracy": 0.8634920634920635,
565
+ "eval_loss": 0.5037477612495422,
566
+ "eval_runtime": 10.7589,
567
+ "eval_samples_per_second": 117.112,
568
+ "eval_steps_per_second": 0.929,
569
+ "step": 691
570
+ },
571
+ {
572
+ "epoch": 36.0,
573
+ "grad_norm": 1.6547776460647583,
574
+ "learning_rate": 3.476608187134503e-05,
575
+ "loss": 0.2308,
576
+ "step": 711
577
+ },
578
+ {
579
+ "epoch": 36.0,
580
+ "eval_accuracy": 0.8658730158730159,
581
+ "eval_loss": 0.5068167448043823,
582
+ "eval_runtime": 10.7754,
583
+ "eval_samples_per_second": 116.933,
584
+ "eval_steps_per_second": 0.928,
585
+ "step": 711
586
+ },
587
+ {
588
+ "epoch": 36.962025316455694,
589
+ "grad_norm": 1.3685321807861328,
590
+ "learning_rate": 3.421052631578947e-05,
591
+ "loss": 0.2326,
592
+ "step": 730
593
+ },
594
+ {
595
+ "epoch": 36.962025316455694,
596
+ "eval_accuracy": 0.8650793650793651,
597
+ "eval_loss": 0.4980192184448242,
598
+ "eval_runtime": 10.761,
599
+ "eval_samples_per_second": 117.09,
600
+ "eval_steps_per_second": 0.929,
601
+ "step": 730
602
+ },
603
+ {
604
+ "epoch": 37.9746835443038,
605
+ "grad_norm": 1.2418972253799438,
606
+ "learning_rate": 3.362573099415205e-05,
607
+ "loss": 0.2242,
608
+ "step": 750
609
+ },
610
+ {
611
+ "epoch": 37.9746835443038,
612
+ "eval_accuracy": 0.8587301587301587,
613
+ "eval_loss": 0.4938106834888458,
614
+ "eval_runtime": 11.0548,
615
+ "eval_samples_per_second": 113.978,
616
+ "eval_steps_per_second": 0.905,
617
+ "step": 750
618
+ },
619
+ {
620
+ "epoch": 38.9873417721519,
621
+ "grad_norm": 1.3450112342834473,
622
+ "learning_rate": 3.304093567251462e-05,
623
+ "loss": 0.2152,
624
+ "step": 770
625
+ },
626
+ {
627
+ "epoch": 38.9873417721519,
628
+ "eval_accuracy": 0.8626984126984127,
629
+ "eval_loss": 0.49911221861839294,
630
+ "eval_runtime": 10.8459,
631
+ "eval_samples_per_second": 116.173,
632
+ "eval_steps_per_second": 0.922,
633
+ "step": 770
634
+ },
635
+ {
636
+ "epoch": 40.0,
637
+ "grad_norm": 1.3505226373672485,
638
+ "learning_rate": 3.24561403508772e-05,
639
+ "loss": 0.2205,
640
+ "step": 790
641
+ },
642
+ {
643
+ "epoch": 40.0,
644
+ "eval_accuracy": 0.8571428571428571,
645
+ "eval_loss": 0.5294431447982788,
646
+ "eval_runtime": 11.024,
647
+ "eval_samples_per_second": 114.296,
648
+ "eval_steps_per_second": 0.907,
649
+ "step": 790
650
+ },
651
+ {
652
+ "epoch": 40.962025316455694,
653
+ "grad_norm": 1.1549227237701416,
654
+ "learning_rate": 3.1900584795321634e-05,
655
+ "loss": 0.2299,
656
+ "step": 809
657
+ },
658
+ {
659
+ "epoch": 40.962025316455694,
660
+ "eval_accuracy": 0.8650793650793651,
661
+ "eval_loss": 0.5079935789108276,
662
+ "eval_runtime": 10.8051,
663
+ "eval_samples_per_second": 116.612,
664
+ "eval_steps_per_second": 0.925,
665
+ "step": 809
666
+ },
667
+ {
668
+ "epoch": 41.9746835443038,
669
+ "grad_norm": 1.252756118774414,
670
+ "learning_rate": 3.131578947368421e-05,
671
+ "loss": 0.1978,
672
+ "step": 829
673
+ },
674
+ {
675
+ "epoch": 41.9746835443038,
676
+ "eval_accuracy": 0.861904761904762,
677
+ "eval_loss": 0.5043396949768066,
678
+ "eval_runtime": 11.0945,
679
+ "eval_samples_per_second": 113.57,
680
+ "eval_steps_per_second": 0.901,
681
+ "step": 829
682
+ },
683
+ {
684
+ "epoch": 42.9873417721519,
685
+ "grad_norm": 1.20892333984375,
686
+ "learning_rate": 3.073099415204678e-05,
687
+ "loss": 0.2081,
688
+ "step": 849
689
+ },
690
+ {
691
+ "epoch": 42.9873417721519,
692
+ "eval_accuracy": 0.8634920634920635,
693
+ "eval_loss": 0.5008840560913086,
694
+ "eval_runtime": 10.8249,
695
+ "eval_samples_per_second": 116.398,
696
+ "eval_steps_per_second": 0.924,
697
+ "step": 849
698
+ },
699
+ {
700
+ "epoch": 44.0,
701
+ "grad_norm": 0.9471483826637268,
702
+ "learning_rate": 3.0146198830409357e-05,
703
+ "loss": 0.1893,
704
+ "step": 869
705
+ },
706
+ {
707
+ "epoch": 44.0,
708
+ "eval_accuracy": 0.8571428571428571,
709
+ "eval_loss": 0.5212369561195374,
710
+ "eval_runtime": 10.7573,
711
+ "eval_samples_per_second": 117.13,
712
+ "eval_steps_per_second": 0.93,
713
+ "step": 869
714
+ },
715
+ {
716
+ "epoch": 44.962025316455694,
717
+ "grad_norm": 1.4700783491134644,
718
+ "learning_rate": 2.95906432748538e-05,
719
+ "loss": 0.1988,
720
+ "step": 888
721
+ },
722
+ {
723
+ "epoch": 44.962025316455694,
724
+ "eval_accuracy": 0.8626984126984127,
725
+ "eval_loss": 0.4991550147533417,
726
+ "eval_runtime": 10.8133,
727
+ "eval_samples_per_second": 116.523,
728
+ "eval_steps_per_second": 0.925,
729
+ "step": 888
730
+ },
731
+ {
732
+ "epoch": 45.9746835443038,
733
+ "grad_norm": 1.0916502475738525,
734
+ "learning_rate": 2.9005847953216375e-05,
735
+ "loss": 0.1911,
736
+ "step": 908
737
+ },
738
+ {
739
+ "epoch": 45.9746835443038,
740
+ "eval_accuracy": 0.8674603174603175,
741
+ "eval_loss": 0.5237799882888794,
742
+ "eval_runtime": 10.7538,
743
+ "eval_samples_per_second": 117.168,
744
+ "eval_steps_per_second": 0.93,
745
+ "step": 908
746
+ },
747
+ {
748
+ "epoch": 46.9873417721519,
749
+ "grad_norm": 1.2590000629425049,
750
+ "learning_rate": 2.842105263157895e-05,
751
+ "loss": 0.1877,
752
+ "step": 928
753
+ },
754
+ {
755
+ "epoch": 46.9873417721519,
756
+ "eval_accuracy": 0.8674603174603175,
757
+ "eval_loss": 0.5184463262557983,
758
+ "eval_runtime": 10.8005,
759
+ "eval_samples_per_second": 116.662,
760
+ "eval_steps_per_second": 0.926,
761
+ "step": 928
762
+ },
763
+ {
764
+ "epoch": 48.0,
765
+ "grad_norm": 1.417845606803894,
766
+ "learning_rate": 2.783625730994152e-05,
767
+ "loss": 0.1957,
768
+ "step": 948
769
+ },
770
+ {
771
+ "epoch": 48.0,
772
+ "eval_accuracy": 0.8571428571428571,
773
+ "eval_loss": 0.5155279040336609,
774
+ "eval_runtime": 10.7518,
775
+ "eval_samples_per_second": 117.19,
776
+ "eval_steps_per_second": 0.93,
777
+ "step": 948
778
+ },
779
+ {
780
+ "epoch": 48.962025316455694,
781
+ "grad_norm": 1.304579496383667,
782
+ "learning_rate": 2.7280701754385968e-05,
783
+ "loss": 0.185,
784
+ "step": 967
785
+ },
786
+ {
787
+ "epoch": 48.962025316455694,
788
+ "eval_accuracy": 0.8674603174603175,
789
+ "eval_loss": 0.5028470158576965,
790
+ "eval_runtime": 10.7385,
791
+ "eval_samples_per_second": 117.335,
792
+ "eval_steps_per_second": 0.931,
793
+ "step": 967
794
+ },
795
+ {
796
+ "epoch": 49.9746835443038,
797
+ "grad_norm": 1.1883121728897095,
798
+ "learning_rate": 2.669590643274854e-05,
799
+ "loss": 0.1821,
800
+ "step": 987
801
+ },
802
+ {
803
+ "epoch": 49.9746835443038,
804
+ "eval_accuracy": 0.8626984126984127,
805
+ "eval_loss": 0.5118417739868164,
806
+ "eval_runtime": 10.7974,
807
+ "eval_samples_per_second": 116.694,
808
+ "eval_steps_per_second": 0.926,
809
+ "step": 987
810
+ },
811
+ {
812
+ "epoch": 50.9873417721519,
813
+ "grad_norm": 0.9844208359718323,
814
+ "learning_rate": 2.6111111111111114e-05,
815
+ "loss": 0.1843,
816
+ "step": 1007
817
+ },
818
+ {
819
+ "epoch": 50.9873417721519,
820
+ "eval_accuracy": 0.8650793650793651,
821
+ "eval_loss": 0.5006617903709412,
822
+ "eval_runtime": 10.7816,
823
+ "eval_samples_per_second": 116.866,
824
+ "eval_steps_per_second": 0.928,
825
+ "step": 1007
826
+ },
827
+ {
828
+ "epoch": 52.0,
829
+ "grad_norm": 1.392893671989441,
830
+ "learning_rate": 2.5526315789473688e-05,
831
+ "loss": 0.1711,
832
+ "step": 1027
833
+ },
834
+ {
835
+ "epoch": 52.0,
836
+ "eval_accuracy": 0.8571428571428571,
837
+ "eval_loss": 0.511702299118042,
838
+ "eval_runtime": 10.8792,
839
+ "eval_samples_per_second": 115.817,
840
+ "eval_steps_per_second": 0.919,
841
+ "step": 1027
842
+ },
843
+ {
844
+ "epoch": 52.962025316455694,
845
+ "grad_norm": 1.6035434007644653,
846
+ "learning_rate": 2.4970760233918132e-05,
847
+ "loss": 0.1903,
848
+ "step": 1046
849
+ },
850
+ {
851
+ "epoch": 52.962025316455694,
852
+ "eval_accuracy": 0.8587301587301587,
853
+ "eval_loss": 0.507360577583313,
854
+ "eval_runtime": 10.9028,
855
+ "eval_samples_per_second": 115.567,
856
+ "eval_steps_per_second": 0.917,
857
+ "step": 1046
858
+ },
859
+ {
860
+ "epoch": 53.9746835443038,
861
+ "grad_norm": 1.0503844022750854,
862
+ "learning_rate": 2.4385964912280703e-05,
863
+ "loss": 0.1713,
864
+ "step": 1066
865
+ },
866
+ {
867
+ "epoch": 53.9746835443038,
868
+ "eval_accuracy": 0.8658730158730159,
869
+ "eval_loss": 0.5167327523231506,
870
+ "eval_runtime": 10.8055,
871
+ "eval_samples_per_second": 116.608,
872
+ "eval_steps_per_second": 0.925,
873
+ "step": 1066
874
+ },
875
+ {
876
+ "epoch": 54.9873417721519,
877
+ "grad_norm": 1.0421777963638306,
878
+ "learning_rate": 2.3801169590643278e-05,
879
+ "loss": 0.1677,
880
+ "step": 1086
881
+ },
882
+ {
883
+ "epoch": 54.9873417721519,
884
+ "eval_accuracy": 0.8666666666666667,
885
+ "eval_loss": 0.5178954601287842,
886
+ "eval_runtime": 10.802,
887
+ "eval_samples_per_second": 116.645,
888
+ "eval_steps_per_second": 0.926,
889
+ "step": 1086
890
+ },
891
+ {
892
+ "epoch": 56.0,
893
+ "grad_norm": 1.283031940460205,
894
+ "learning_rate": 2.321637426900585e-05,
895
+ "loss": 0.16,
896
+ "step": 1106
897
+ },
898
+ {
899
+ "epoch": 56.0,
900
+ "eval_accuracy": 0.8650793650793651,
901
+ "eval_loss": 0.5145161747932434,
902
+ "eval_runtime": 10.7346,
903
+ "eval_samples_per_second": 117.377,
904
+ "eval_steps_per_second": 0.932,
905
+ "step": 1106
906
+ },
907
+ {
908
+ "epoch": 56.962025316455694,
909
+ "grad_norm": 2.3518636226654053,
910
+ "learning_rate": 2.2660818713450292e-05,
911
+ "loss": 0.1818,
912
+ "step": 1125
913
+ },
914
+ {
915
+ "epoch": 56.962025316455694,
916
+ "eval_accuracy": 0.8650793650793651,
917
+ "eval_loss": 0.5295758247375488,
918
+ "eval_runtime": 10.7992,
919
+ "eval_samples_per_second": 116.676,
920
+ "eval_steps_per_second": 0.926,
921
+ "step": 1125
922
+ },
923
+ {
924
+ "epoch": 57.9746835443038,
925
+ "grad_norm": 1.4039283990859985,
926
+ "learning_rate": 2.2076023391812867e-05,
927
+ "loss": 0.1725,
928
+ "step": 1145
929
+ },
930
+ {
931
+ "epoch": 57.9746835443038,
932
+ "eval_accuracy": 0.8642857142857143,
933
+ "eval_loss": 0.531140923500061,
934
+ "eval_runtime": 10.7624,
935
+ "eval_samples_per_second": 117.075,
936
+ "eval_steps_per_second": 0.929,
937
+ "step": 1145
938
+ },
939
+ {
940
+ "epoch": 58.9873417721519,
941
+ "grad_norm": 1.1663118600845337,
942
+ "learning_rate": 2.149122807017544e-05,
943
+ "loss": 0.1642,
944
+ "step": 1165
945
+ },
946
+ {
947
+ "epoch": 58.9873417721519,
948
+ "eval_accuracy": 0.8626984126984127,
949
+ "eval_loss": 0.5316585302352905,
950
+ "eval_runtime": 10.7932,
951
+ "eval_samples_per_second": 116.74,
952
+ "eval_steps_per_second": 0.927,
953
+ "step": 1165
954
+ },
955
+ {
956
+ "epoch": 60.0,
957
+ "grad_norm": 1.2489556074142456,
958
+ "learning_rate": 2.0906432748538013e-05,
959
+ "loss": 0.1626,
960
+ "step": 1185
961
+ },
962
+ {
963
+ "epoch": 60.0,
964
+ "eval_accuracy": 0.861904761904762,
965
+ "eval_loss": 0.5282865166664124,
966
+ "eval_runtime": 10.7244,
967
+ "eval_samples_per_second": 117.49,
968
+ "eval_steps_per_second": 0.932,
969
+ "step": 1185
970
+ },
971
+ {
972
+ "epoch": 60.962025316455694,
973
+ "grad_norm": 1.4613455533981323,
974
+ "learning_rate": 2.0350877192982456e-05,
975
+ "loss": 0.1621,
976
+ "step": 1204
977
+ },
978
+ {
979
+ "epoch": 60.962025316455694,
980
+ "eval_accuracy": 0.8603174603174604,
981
+ "eval_loss": 0.5266717076301575,
982
+ "eval_runtime": 10.8467,
983
+ "eval_samples_per_second": 116.164,
984
+ "eval_steps_per_second": 0.922,
985
+ "step": 1204
986
+ },
987
+ {
988
+ "epoch": 61.9746835443038,
989
+ "grad_norm": 1.4865529537200928,
990
+ "learning_rate": 1.976608187134503e-05,
991
+ "loss": 0.1503,
992
+ "step": 1224
993
+ },
994
+ {
995
+ "epoch": 61.9746835443038,
996
+ "eval_accuracy": 0.861904761904762,
997
+ "eval_loss": 0.5389307141304016,
998
+ "eval_runtime": 10.7859,
999
+ "eval_samples_per_second": 116.819,
1000
+ "eval_steps_per_second": 0.927,
1001
+ "step": 1224
1002
+ },
1003
+ {
1004
+ "epoch": 62.9873417721519,
1005
+ "grad_norm": 1.0126421451568604,
1006
+ "learning_rate": 1.9181286549707602e-05,
1007
+ "loss": 0.162,
1008
+ "step": 1244
1009
+ },
1010
+ {
1011
+ "epoch": 62.9873417721519,
1012
+ "eval_accuracy": 0.8626984126984127,
1013
+ "eval_loss": 0.540341317653656,
1014
+ "eval_runtime": 10.7503,
1015
+ "eval_samples_per_second": 117.206,
1016
+ "eval_steps_per_second": 0.93,
1017
+ "step": 1244
1018
+ },
1019
+ {
1020
+ "epoch": 64.0,
1021
+ "grad_norm": 1.3681743144989014,
1022
+ "learning_rate": 1.8596491228070176e-05,
1023
+ "loss": 0.154,
1024
+ "step": 1264
1025
+ },
1026
+ {
1027
+ "epoch": 64.0,
1028
+ "eval_accuracy": 0.8650793650793651,
1029
+ "eval_loss": 0.5240360498428345,
1030
+ "eval_runtime": 10.6902,
1031
+ "eval_samples_per_second": 117.865,
1032
+ "eval_steps_per_second": 0.935,
1033
+ "step": 1264
1034
+ },
1035
+ {
1036
+ "epoch": 64.9620253164557,
1037
+ "grad_norm": 1.1314650774002075,
1038
+ "learning_rate": 1.804093567251462e-05,
1039
+ "loss": 0.1525,
1040
+ "step": 1283
1041
+ },
1042
+ {
1043
+ "epoch": 64.9620253164557,
1044
+ "eval_accuracy": 0.8650793650793651,
1045
+ "eval_loss": 0.5337327718734741,
1046
+ "eval_runtime": 10.5876,
1047
+ "eval_samples_per_second": 119.007,
1048
+ "eval_steps_per_second": 0.945,
1049
+ "step": 1283
1050
+ },
1051
+ {
1052
+ "epoch": 65.9746835443038,
1053
+ "grad_norm": 1.1210103034973145,
1054
+ "learning_rate": 1.745614035087719e-05,
1055
+ "loss": 0.1529,
1056
+ "step": 1303
1057
+ },
1058
+ {
1059
+ "epoch": 65.9746835443038,
1060
+ "eval_accuracy": 0.8642857142857143,
1061
+ "eval_loss": 0.5457757115364075,
1062
+ "eval_runtime": 10.8103,
1063
+ "eval_samples_per_second": 116.555,
1064
+ "eval_steps_per_second": 0.925,
1065
+ "step": 1303
1066
+ },
1067
+ {
1068
+ "epoch": 66.9873417721519,
1069
+ "grad_norm": 1.2637122869491577,
1070
+ "learning_rate": 1.6871345029239766e-05,
1071
+ "loss": 0.1548,
1072
+ "step": 1323
1073
+ },
1074
+ {
1075
+ "epoch": 66.9873417721519,
1076
+ "eval_accuracy": 0.8650793650793651,
1077
+ "eval_loss": 0.5383771061897278,
1078
+ "eval_runtime": 10.877,
1079
+ "eval_samples_per_second": 115.84,
1080
+ "eval_steps_per_second": 0.919,
1081
+ "step": 1323
1082
+ },
1083
+ {
1084
+ "epoch": 68.0,
1085
+ "grad_norm": 1.1812046766281128,
1086
+ "learning_rate": 1.628654970760234e-05,
1087
+ "loss": 0.1556,
1088
+ "step": 1343
1089
+ },
1090
+ {
1091
+ "epoch": 68.0,
1092
+ "eval_accuracy": 0.8626984126984127,
1093
+ "eval_loss": 0.5395429134368896,
1094
+ "eval_runtime": 10.829,
1095
+ "eval_samples_per_second": 116.354,
1096
+ "eval_steps_per_second": 0.923,
1097
+ "step": 1343
1098
+ },
1099
+ {
1100
+ "epoch": 68.9620253164557,
1101
+ "grad_norm": 1.21077299118042,
1102
+ "learning_rate": 1.5730994152046784e-05,
1103
+ "loss": 0.1629,
1104
+ "step": 1362
1105
+ },
1106
+ {
1107
+ "epoch": 68.9620253164557,
1108
+ "eval_accuracy": 0.8634920634920635,
1109
+ "eval_loss": 0.5454122424125671,
1110
+ "eval_runtime": 10.6544,
1111
+ "eval_samples_per_second": 118.261,
1112
+ "eval_steps_per_second": 0.939,
1113
+ "step": 1362
1114
+ },
1115
+ {
1116
+ "epoch": 69.9746835443038,
1117
+ "grad_norm": 1.3317054510116577,
1118
+ "learning_rate": 1.5146198830409358e-05,
1119
+ "loss": 0.1387,
1120
+ "step": 1382
1121
+ },
1122
+ {
1123
+ "epoch": 69.9746835443038,
1124
+ "eval_accuracy": 0.8746031746031746,
1125
+ "eval_loss": 0.5267017483711243,
1126
+ "eval_runtime": 10.8484,
1127
+ "eval_samples_per_second": 116.147,
1128
+ "eval_steps_per_second": 0.922,
1129
+ "step": 1382
1130
+ },
1131
+ {
1132
+ "epoch": 70.9873417721519,
1133
+ "grad_norm": 1.2003090381622314,
1134
+ "learning_rate": 1.4561403508771931e-05,
1135
+ "loss": 0.1495,
1136
+ "step": 1402
1137
+ },
1138
+ {
1139
+ "epoch": 70.9873417721519,
1140
+ "eval_accuracy": 0.8650793650793651,
1141
+ "eval_loss": 0.5427414774894714,
1142
+ "eval_runtime": 10.7136,
1143
+ "eval_samples_per_second": 117.608,
1144
+ "eval_steps_per_second": 0.933,
1145
+ "step": 1402
1146
+ },
1147
+ {
1148
+ "epoch": 72.0,
1149
+ "grad_norm": 1.2609037160873413,
1150
+ "learning_rate": 1.3976608187134504e-05,
1151
+ "loss": 0.1465,
1152
+ "step": 1422
1153
+ },
1154
+ {
1155
+ "epoch": 72.0,
1156
+ "eval_accuracy": 0.8690476190476191,
1157
+ "eval_loss": 0.559054434299469,
1158
+ "eval_runtime": 10.7661,
1159
+ "eval_samples_per_second": 117.034,
1160
+ "eval_steps_per_second": 0.929,
1161
+ "step": 1422
1162
+ },
1163
+ {
1164
+ "epoch": 72.9620253164557,
1165
+ "grad_norm": 1.456437587738037,
1166
+ "learning_rate": 1.3421052631578948e-05,
1167
+ "loss": 0.1478,
1168
+ "step": 1441
1169
+ },
1170
+ {
1171
+ "epoch": 72.9620253164557,
1172
+ "eval_accuracy": 0.8722222222222222,
1173
+ "eval_loss": 0.532349705696106,
1174
+ "eval_runtime": 10.7627,
1175
+ "eval_samples_per_second": 117.071,
1176
+ "eval_steps_per_second": 0.929,
1177
+ "step": 1441
1178
+ },
1179
+ {
1180
+ "epoch": 73.9746835443038,
1181
+ "grad_norm": 1.404703140258789,
1182
+ "learning_rate": 1.283625730994152e-05,
1183
+ "loss": 0.1447,
1184
+ "step": 1461
1185
+ },
1186
+ {
1187
+ "epoch": 73.9746835443038,
1188
+ "eval_accuracy": 0.8690476190476191,
1189
+ "eval_loss": 0.5560940504074097,
1190
+ "eval_runtime": 10.7443,
1191
+ "eval_samples_per_second": 117.271,
1192
+ "eval_steps_per_second": 0.931,
1193
+ "step": 1461
1194
+ },
1195
+ {
1196
+ "epoch": 74.9873417721519,
1197
+ "grad_norm": 1.3342186212539673,
1198
+ "learning_rate": 1.2251461988304095e-05,
1199
+ "loss": 0.1435,
1200
+ "step": 1481
1201
+ },
1202
+ {
1203
+ "epoch": 74.9873417721519,
1204
+ "eval_accuracy": 0.8658730158730159,
1205
+ "eval_loss": 0.5346001982688904,
1206
+ "eval_runtime": 10.8529,
1207
+ "eval_samples_per_second": 116.098,
1208
+ "eval_steps_per_second": 0.921,
1209
+ "step": 1481
1210
+ },
1211
+ {
1212
+ "epoch": 76.0,
1213
+ "grad_norm": 1.2481029033660889,
1214
+ "learning_rate": 1.1666666666666668e-05,
1215
+ "loss": 0.1459,
1216
+ "step": 1501
1217
+ },
1218
+ {
1219
+ "epoch": 76.0,
1220
+ "eval_accuracy": 0.8658730158730159,
1221
+ "eval_loss": 0.5466868281364441,
1222
+ "eval_runtime": 11.0795,
1223
+ "eval_samples_per_second": 113.723,
1224
+ "eval_steps_per_second": 0.903,
1225
+ "step": 1501
1226
+ },
1227
+ {
1228
+ "epoch": 76.9620253164557,
1229
+ "grad_norm": 1.2153362035751343,
1230
+ "learning_rate": 1.1111111111111112e-05,
1231
+ "loss": 0.1474,
1232
+ "step": 1520
1233
+ },
1234
+ {
1235
+ "epoch": 76.9620253164557,
1236
+ "eval_accuracy": 0.8690476190476191,
1237
+ "eval_loss": 0.5463184118270874,
1238
+ "eval_runtime": 10.7937,
1239
+ "eval_samples_per_second": 116.735,
1240
+ "eval_steps_per_second": 0.926,
1241
+ "step": 1520
1242
+ },
1243
+ {
1244
+ "epoch": 77.9746835443038,
1245
+ "grad_norm": 1.2351834774017334,
1246
+ "learning_rate": 1.0526315789473684e-05,
1247
+ "loss": 0.1352,
1248
+ "step": 1540
1249
+ },
1250
+ {
1251
+ "epoch": 77.9746835443038,
1252
+ "eval_accuracy": 0.8650793650793651,
1253
+ "eval_loss": 0.5412562489509583,
1254
+ "eval_runtime": 11.1033,
1255
+ "eval_samples_per_second": 113.48,
1256
+ "eval_steps_per_second": 0.901,
1257
+ "step": 1540
1258
+ },
1259
+ {
1260
+ "epoch": 78.9873417721519,
1261
+ "grad_norm": 1.3961732387542725,
1262
+ "learning_rate": 9.941520467836257e-06,
1263
+ "loss": 0.1337,
1264
+ "step": 1560
1265
+ },
1266
+ {
1267
+ "epoch": 78.9873417721519,
1268
+ "eval_accuracy": 0.8650793650793651,
1269
+ "eval_loss": 0.5488775372505188,
1270
+ "eval_runtime": 10.7671,
1271
+ "eval_samples_per_second": 117.023,
1272
+ "eval_steps_per_second": 0.929,
1273
+ "step": 1560
1274
+ },
1275
+ {
1276
+ "epoch": 80.0,
1277
+ "grad_norm": 1.8050953149795532,
1278
+ "learning_rate": 9.35672514619883e-06,
1279
+ "loss": 0.1374,
1280
+ "step": 1580
1281
+ },
1282
+ {
1283
+ "epoch": 80.0,
1284
+ "eval_accuracy": 0.8587301587301587,
1285
+ "eval_loss": 0.5454345941543579,
1286
+ "eval_runtime": 10.8299,
1287
+ "eval_samples_per_second": 116.345,
1288
+ "eval_steps_per_second": 0.923,
1289
+ "step": 1580
1290
+ },
1291
+ {
1292
+ "epoch": 80.9620253164557,
1293
+ "grad_norm": 1.2362314462661743,
1294
+ "learning_rate": 8.801169590643275e-06,
1295
+ "loss": 0.1383,
1296
+ "step": 1599
1297
+ },
1298
+ {
1299
+ "epoch": 80.9620253164557,
1300
+ "eval_accuracy": 0.8626984126984127,
1301
+ "eval_loss": 0.5432500243186951,
1302
+ "eval_runtime": 10.8767,
1303
+ "eval_samples_per_second": 115.844,
1304
+ "eval_steps_per_second": 0.919,
1305
+ "step": 1599
1306
+ },
1307
+ {
1308
+ "epoch": 81.9746835443038,
1309
+ "grad_norm": 1.1372051239013672,
1310
+ "learning_rate": 8.216374269005848e-06,
1311
+ "loss": 0.1408,
1312
+ "step": 1619
1313
+ },
1314
+ {
1315
+ "epoch": 81.9746835443038,
1316
+ "eval_accuracy": 0.8682539682539683,
1317
+ "eval_loss": 0.5383033156394958,
1318
+ "eval_runtime": 10.9264,
1319
+ "eval_samples_per_second": 115.317,
1320
+ "eval_steps_per_second": 0.915,
1321
+ "step": 1619
1322
+ },
1323
+ {
1324
+ "epoch": 82.9873417721519,
1325
+ "grad_norm": 1.6927990913391113,
1326
+ "learning_rate": 7.631578947368421e-06,
1327
+ "loss": 0.134,
1328
+ "step": 1639
1329
+ },
1330
+ {
1331
+ "epoch": 82.9873417721519,
1332
+ "eval_accuracy": 0.8642857142857143,
1333
+ "eval_loss": 0.5522441267967224,
1334
+ "eval_runtime": 10.7932,
1335
+ "eval_samples_per_second": 116.74,
1336
+ "eval_steps_per_second": 0.927,
1337
+ "step": 1639
1338
+ },
1339
+ {
1340
+ "epoch": 84.0,
1341
+ "grad_norm": 1.1734745502471924,
1342
+ "learning_rate": 7.046783625730995e-06,
1343
+ "loss": 0.1353,
1344
+ "step": 1659
1345
+ },
1346
+ {
1347
+ "epoch": 84.0,
1348
+ "eval_accuracy": 0.8579365079365079,
1349
+ "eval_loss": 0.5485585331916809,
1350
+ "eval_runtime": 10.8452,
1351
+ "eval_samples_per_second": 116.181,
1352
+ "eval_steps_per_second": 0.922,
1353
+ "step": 1659
1354
+ },
1355
+ {
1356
+ "epoch": 84.9620253164557,
1357
+ "grad_norm": 1.3662621974945068,
1358
+ "learning_rate": 6.4912280701754385e-06,
1359
+ "loss": 0.1435,
1360
+ "step": 1678
1361
+ },
1362
+ {
1363
+ "epoch": 84.9620253164557,
1364
+ "eval_accuracy": 0.8595238095238096,
1365
+ "eval_loss": 0.5582545399665833,
1366
+ "eval_runtime": 10.7527,
1367
+ "eval_samples_per_second": 117.18,
1368
+ "eval_steps_per_second": 0.93,
1369
+ "step": 1678
1370
+ },
1371
+ {
1372
+ "epoch": 85.9746835443038,
1373
+ "grad_norm": 1.3297693729400635,
1374
+ "learning_rate": 5.906432748538012e-06,
1375
+ "loss": 0.1324,
1376
+ "step": 1698
1377
+ },
1378
+ {
1379
+ "epoch": 85.9746835443038,
1380
+ "eval_accuracy": 0.861904761904762,
1381
+ "eval_loss": 0.5551320910453796,
1382
+ "eval_runtime": 10.8253,
1383
+ "eval_samples_per_second": 116.394,
1384
+ "eval_steps_per_second": 0.924,
1385
+ "step": 1698
1386
+ },
1387
+ {
1388
+ "epoch": 86.9873417721519,
1389
+ "grad_norm": 1.2304210662841797,
1390
+ "learning_rate": 5.321637426900585e-06,
1391
+ "loss": 0.1306,
1392
+ "step": 1718
1393
+ },
1394
+ {
1395
+ "epoch": 86.9873417721519,
1396
+ "eval_accuracy": 0.8611111111111112,
1397
+ "eval_loss": 0.553473949432373,
1398
+ "eval_runtime": 10.7756,
1399
+ "eval_samples_per_second": 116.931,
1400
+ "eval_steps_per_second": 0.928,
1401
+ "step": 1718
1402
+ },
1403
+ {
1404
+ "epoch": 88.0,
1405
+ "grad_norm": 1.323527216911316,
1406
+ "learning_rate": 4.736842105263159e-06,
1407
+ "loss": 0.1348,
1408
+ "step": 1738
1409
+ },
1410
+ {
1411
+ "epoch": 88.0,
1412
+ "eval_accuracy": 0.8666666666666667,
1413
+ "eval_loss": 0.5498299598693848,
1414
+ "eval_runtime": 10.7878,
1415
+ "eval_samples_per_second": 116.799,
1416
+ "eval_steps_per_second": 0.927,
1417
+ "step": 1738
1418
+ },
1419
+ {
1420
+ "epoch": 88.9620253164557,
1421
+ "grad_norm": 1.0867611169815063,
1422
+ "learning_rate": 4.181286549707602e-06,
1423
+ "loss": 0.1334,
1424
+ "step": 1757
1425
+ },
1426
+ {
1427
+ "epoch": 88.9620253164557,
1428
+ "eval_accuracy": 0.8658730158730159,
1429
+ "eval_loss": 0.5582374930381775,
1430
+ "eval_runtime": 10.7756,
1431
+ "eval_samples_per_second": 116.931,
1432
+ "eval_steps_per_second": 0.928,
1433
+ "step": 1757
1434
+ },
1435
+ {
1436
+ "epoch": 89.9746835443038,
1437
+ "grad_norm": 1.0990999937057495,
1438
+ "learning_rate": 3.5964912280701756e-06,
1439
+ "loss": 0.1343,
1440
+ "step": 1777
1441
+ },
1442
+ {
1443
+ "epoch": 89.9746835443038,
1444
+ "eval_accuracy": 0.8658730158730159,
1445
+ "eval_loss": 0.5526331067085266,
1446
+ "eval_runtime": 10.8124,
1447
+ "eval_samples_per_second": 116.533,
1448
+ "eval_steps_per_second": 0.925,
1449
+ "step": 1777
1450
+ },
1451
+ {
1452
+ "epoch": 90.9873417721519,
1453
+ "grad_norm": 1.3471728563308716,
1454
+ "learning_rate": 3.011695906432749e-06,
1455
+ "loss": 0.1275,
1456
+ "step": 1797
1457
+ },
1458
+ {
1459
+ "epoch": 90.9873417721519,
1460
+ "eval_accuracy": 0.8650793650793651,
1461
+ "eval_loss": 0.5543471574783325,
1462
+ "eval_runtime": 10.7534,
1463
+ "eval_samples_per_second": 117.172,
1464
+ "eval_steps_per_second": 0.93,
1465
+ "step": 1797
1466
+ },
1467
+ {
1468
+ "epoch": 92.0,
1469
+ "grad_norm": 1.3125709295272827,
1470
+ "learning_rate": 2.426900584795322e-06,
1471
+ "loss": 0.1285,
1472
+ "step": 1817
1473
+ },
1474
+ {
1475
+ "epoch": 92.0,
1476
+ "eval_accuracy": 0.8674603174603175,
1477
+ "eval_loss": 0.551249086856842,
1478
+ "eval_runtime": 10.7174,
1479
+ "eval_samples_per_second": 117.566,
1480
+ "eval_steps_per_second": 0.933,
1481
+ "step": 1817
1482
+ },
1483
+ {
1484
+ "epoch": 92.9620253164557,
1485
+ "grad_norm": 1.069954752922058,
1486
+ "learning_rate": 1.8713450292397662e-06,
1487
+ "loss": 0.127,
1488
+ "step": 1836
1489
+ },
1490
+ {
1491
+ "epoch": 92.9620253164557,
1492
+ "eval_accuracy": 0.8634920634920635,
1493
+ "eval_loss": 0.5507932305335999,
1494
+ "eval_runtime": 10.7352,
1495
+ "eval_samples_per_second": 117.371,
1496
+ "eval_steps_per_second": 0.932,
1497
+ "step": 1836
1498
+ },
1499
+ {
1500
+ "epoch": 93.9746835443038,
1501
+ "grad_norm": 1.1107105016708374,
1502
+ "learning_rate": 1.2865497076023394e-06,
1503
+ "loss": 0.1258,
1504
+ "step": 1856
1505
+ },
1506
+ {
1507
+ "epoch": 93.9746835443038,
1508
+ "eval_accuracy": 0.8642857142857143,
1509
+ "eval_loss": 0.5506840944290161,
1510
+ "eval_runtime": 10.5904,
1511
+ "eval_samples_per_second": 118.975,
1512
+ "eval_steps_per_second": 0.944,
1513
+ "step": 1856
1514
+ },
1515
+ {
1516
+ "epoch": 94.9873417721519,
1517
+ "grad_norm": 1.1726576089859009,
1518
+ "learning_rate": 7.017543859649123e-07,
1519
+ "loss": 0.1119,
1520
+ "step": 1876
1521
+ },
1522
+ {
1523
+ "epoch": 94.9873417721519,
1524
+ "eval_accuracy": 0.8666666666666667,
1525
+ "eval_loss": 0.5506576299667358,
1526
+ "eval_runtime": 10.8474,
1527
+ "eval_samples_per_second": 116.157,
1528
+ "eval_steps_per_second": 0.922,
1529
+ "step": 1876
1530
+ },
1531
+ {
1532
+ "epoch": 96.0,
1533
+ "grad_norm": 1.4846915006637573,
1534
+ "learning_rate": 1.1695906432748539e-07,
1535
+ "loss": 0.1322,
1536
+ "step": 1896
1537
+ },
1538
+ {
1539
+ "epoch": 96.0,
1540
+ "eval_accuracy": 0.8658730158730159,
1541
+ "eval_loss": 0.5504564046859741,
1542
+ "eval_runtime": 11.0992,
1543
+ "eval_samples_per_second": 113.522,
1544
+ "eval_steps_per_second": 0.901,
1545
+ "step": 1896
1546
+ },
1547
+ {
1548
+ "epoch": 96.20253164556962,
1549
+ "grad_norm": 1.0216985940933228,
1550
+ "learning_rate": 0.0,
1551
+ "loss": 0.1315,
1552
+ "step": 1900
1553
+ },
1554
+ {
1555
+ "epoch": 96.20253164556962,
1556
+ "eval_accuracy": 0.8658730158730159,
1557
+ "eval_loss": 0.5504307150840759,
1558
+ "eval_runtime": 10.802,
1559
+ "eval_samples_per_second": 116.645,
1560
+ "eval_steps_per_second": 0.926,
1561
+ "step": 1900
1562
+ },
1563
+ {
1564
+ "epoch": 96.20253164556962,
1565
+ "step": 1900,
1566
+ "total_flos": 7.515490775048022e+19,
1567
+ "train_loss": 0.33647052476280614,
1568
+ "train_runtime": 20573.1873,
1569
+ "train_samples_per_second": 48.996,
1570
+ "train_steps_per_second": 0.092
1571
+ }
1572
+ ],
1573
+ "logging_steps": 500,
1574
+ "max_steps": 1900,
1575
+ "num_input_tokens_seen": 0,
1576
+ "num_train_epochs": 100,
1577
+ "save_steps": 500,
1578
+ "stateful_callbacks": {
1579
+ "TrainerControl": {
1580
+ "args": {
1581
+ "should_epoch_stop": false,
1582
+ "should_evaluate": false,
1583
+ "should_log": false,
1584
+ "should_save": true,
1585
+ "should_training_stop": true
1586
+ },
1587
+ "attributes": {}
1588
+ }
1589
+ },
1590
+ "total_flos": 7.515490775048022e+19,
1591
+ "train_batch_size": 128,
1592
+ "trial_name": null,
1593
+ "trial_params": null
1594
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a1c0f33a3024e6ec45ce8978209c580f91e2084ba0bf40c70af9b63aea9815a
3
+ size 5112