HYPJUDY commited on
Commit
47b8d3d
1 Parent(s): 41e3fb7
all_results.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 100.0,
3
+ "eval_accuracy": 0.8317638952687184,
4
+ "eval_f1": 0.9058561897702001,
5
+ "eval_loss": 1.1073323488235474,
6
+ "eval_precision": 0.8954567659990229,
7
+ "eval_recall": 0.9165,
8
+ "eval_runtime": 6.9815,
9
+ "eval_samples": 54,
10
+ "eval_samples_per_second": 7.735,
11
+ "eval_steps_per_second": 0.143,
12
+ "train_loss": 0.12190062952041626,
13
+ "train_runtime": 303.1848,
14
+ "train_samples": 150,
15
+ "train_samples_per_second": 52.773,
16
+ "train_steps_per_second": 3.298
17
+ }
config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/layoutlmv3-base",
3
+ "architectures": [
4
+ "LayoutLMv3ForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "coordinate_size": 128,
10
+ "device": "cuda",
11
+ "eos_token_id": 2,
12
+ "finetuning_task": "ner",
13
+ "has_relative_attention_bias": true,
14
+ "has_spatial_attention_bias": true,
15
+ "hidden_act": "gelu",
16
+ "hidden_dropout_prob": 0.1,
17
+ "hidden_size": 768,
18
+ "id2label": {
19
+ "0": "LABEL_0",
20
+ "1": "LABEL_1",
21
+ "2": "LABEL_2",
22
+ "3": "LABEL_3",
23
+ "4": "LABEL_4",
24
+ "5": "LABEL_5",
25
+ "6": "LABEL_6"
26
+ },
27
+ "initializer_range": 0.02,
28
+ "input_size": 224,
29
+ "intermediate_size": 3072,
30
+ "label2id": {
31
+ "LABEL_0": 0,
32
+ "LABEL_1": 1,
33
+ "LABEL_2": 2,
34
+ "LABEL_3": 3,
35
+ "LABEL_4": 4,
36
+ "LABEL_5": 5,
37
+ "LABEL_6": 6
38
+ },
39
+ "layer_norm_eps": 1e-05,
40
+ "max_2d_position_embeddings": 1024,
41
+ "max_position_embeddings": 514,
42
+ "max_rel_2d_pos": 256,
43
+ "max_rel_pos": 128,
44
+ "model_type": "layoutlmv3",
45
+ "num_attention_heads": 12,
46
+ "num_hidden_layers": 12,
47
+ "pad_token_id": 1,
48
+ "rel_2d_pos_bins": 64,
49
+ "rel_pos_bins": 32,
50
+ "second_input_size": 112,
51
+ "shape_size": 128,
52
+ "torch_dtype": "float32",
53
+ "transformers_version": "4.12.5",
54
+ "type_vocab_size": 1,
55
+ "visual_embed": true,
56
+ "vocab_size": 50265,
57
+ }
eval_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_accuracy": 0.8317638952687184,
3
+ "eval_f1": 0.9058561897702001,
4
+ "eval_loss": 1.1073323488235474,
5
+ "eval_precision": 0.8954567659990229,
6
+ "eval_recall": 0.9165,
7
+ "eval_runtime": 6.9815,
8
+ "eval_samples": 54,
9
+ "eval_samples_per_second": 7.735,
10
+ "eval_steps_per_second": 0.143
11
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
runs/Apr17_09-22-17_deeplearning28/1650187356.1828399/events.out.tfevents.1650187356.deeplearning28.62374.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e56b786e1c8f0a414418427c0bd4dac933d88d70b6c4e2bb105b6ba57d177344
3
+ size 4668
runs/Apr17_09-22-17_deeplearning28/events.out.tfevents.1650187356.deeplearning28.62374.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a51a059f1e8d92b176e851ce3f7d93912de23d218b5408971a1e950f01569225
3
+ size 28437
runs/Apr17_09-22-17_deeplearning28/events.out.tfevents.1650187663.deeplearning28.62374.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73d6bf2f4828829ac579df22b615df28002226f081334f02e419947b166f48da
3
+ size 512
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": true, "errors": "replace", "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "/mnt/localdata/users/yupanhuang/models/layoutlmv3/pts/layoutlmv3-base", "tokenizer_class": "LayoutLMv3Tokenizer"}
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 100.0,
3
+ "train_loss": 0.12190062952041626,
4
+ "train_runtime": 303.1848,
5
+ "train_samples": 150,
6
+ "train_samples_per_second": 52.773,
7
+ "train_steps_per_second": 3.298
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,637 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 100.0,
5
+ "global_step": 1000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 2.0,
12
+ "eval_accuracy": 0.6288470372071658,
13
+ "eval_f1": 0.43084004602991943,
14
+ "eval_loss": 1.2868107557296753,
15
+ "eval_precision": 0.3991471215351812,
16
+ "eval_recall": 0.468,
17
+ "eval_runtime": 1.4977,
18
+ "eval_samples_per_second": 36.054,
19
+ "eval_steps_per_second": 0.668,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 4.0,
24
+ "eval_accuracy": 0.7672255397335783,
25
+ "eval_f1": 0.7671957671957673,
26
+ "eval_loss": 0.8454405665397644,
27
+ "eval_precision": 0.7391102873030584,
28
+ "eval_recall": 0.7975,
29
+ "eval_runtime": 2.3492,
30
+ "eval_samples_per_second": 22.986,
31
+ "eval_steps_per_second": 0.426,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 6.0,
36
+ "eval_accuracy": 0.7731970601745521,
37
+ "eval_f1": 0.8189655172413792,
38
+ "eval_loss": 0.6648961305618286,
39
+ "eval_precision": 0.7858455882352942,
40
+ "eval_recall": 0.855,
41
+ "eval_runtime": 2.4309,
42
+ "eval_samples_per_second": 22.214,
43
+ "eval_steps_per_second": 0.411,
44
+ "step": 60
45
+ },
46
+ {
47
+ "epoch": 8.0,
48
+ "eval_accuracy": 0.8240698208543867,
49
+ "eval_f1": 0.8516630250060694,
50
+ "eval_loss": 0.6003230810165405,
51
+ "eval_precision": 0.827748938178386,
52
+ "eval_recall": 0.877,
53
+ "eval_runtime": 1.946,
54
+ "eval_samples_per_second": 27.749,
55
+ "eval_steps_per_second": 0.514,
56
+ "step": 80
57
+ },
58
+ {
59
+ "epoch": 10.0,
60
+ "eval_accuracy": 0.8000689021589343,
61
+ "eval_f1": 0.8560497369679579,
62
+ "eval_loss": 0.5940394401550293,
63
+ "eval_precision": 0.8203483043079743,
64
+ "eval_recall": 0.895,
65
+ "eval_runtime": 1.5738,
66
+ "eval_samples_per_second": 34.312,
67
+ "eval_steps_per_second": 0.635,
68
+ "step": 100
69
+ },
70
+ {
71
+ "epoch": 12.0,
72
+ "eval_accuracy": 0.8124712907671107,
73
+ "eval_f1": 0.8579682233991335,
74
+ "eval_loss": 0.6028273701667786,
75
+ "eval_precision": 0.8272980501392758,
76
+ "eval_recall": 0.891,
77
+ "eval_runtime": 2.3361,
78
+ "eval_samples_per_second": 23.116,
79
+ "eval_steps_per_second": 0.428,
80
+ "step": 120
81
+ },
82
+ {
83
+ "epoch": 14.0,
84
+ "eval_accuracy": 0.8300413412953606,
85
+ "eval_f1": 0.8839373163565133,
86
+ "eval_loss": 0.5925479531288147,
87
+ "eval_precision": 0.8661228406909789,
88
+ "eval_recall": 0.9025,
89
+ "eval_runtime": 1.549,
90
+ "eval_samples_per_second": 34.86,
91
+ "eval_steps_per_second": 0.646,
92
+ "step": 140
93
+ },
94
+ {
95
+ "epoch": 16.0,
96
+ "eval_accuracy": 0.8147680293982544,
97
+ "eval_f1": 0.864694471387003,
98
+ "eval_loss": 0.6319019198417664,
99
+ "eval_precision": 0.8394538606403014,
100
+ "eval_recall": 0.8915,
101
+ "eval_runtime": 1.7343,
102
+ "eval_samples_per_second": 31.136,
103
+ "eval_steps_per_second": 0.577,
104
+ "step": 160
105
+ },
106
+ {
107
+ "epoch": 18.0,
108
+ "eval_accuracy": 0.8295819935691319,
109
+ "eval_f1": 0.8697776691913022,
110
+ "eval_loss": 0.660470187664032,
111
+ "eval_precision": 0.8504538939321548,
112
+ "eval_recall": 0.89,
113
+ "eval_runtime": 1.7198,
114
+ "eval_samples_per_second": 31.398,
115
+ "eval_steps_per_second": 0.581,
116
+ "step": 180
117
+ },
118
+ {
119
+ "epoch": 20.0,
120
+ "eval_accuracy": 0.8275149288011024,
121
+ "eval_f1": 0.8871046228710462,
122
+ "eval_loss": 0.6778170466423035,
123
+ "eval_precision": 0.8639810426540284,
124
+ "eval_recall": 0.9115,
125
+ "eval_runtime": 1.5366,
126
+ "eval_samples_per_second": 35.142,
127
+ "eval_steps_per_second": 0.651,
128
+ "step": 200
129
+ },
130
+ {
131
+ "epoch": 22.0,
132
+ "eval_accuracy": 0.815686724850712,
133
+ "eval_f1": 0.8839615668883962,
134
+ "eval_loss": 0.7490019798278809,
135
+ "eval_precision": 0.8712967459932006,
136
+ "eval_recall": 0.897,
137
+ "eval_runtime": 2.3087,
138
+ "eval_samples_per_second": 23.39,
139
+ "eval_steps_per_second": 0.433,
140
+ "step": 220
141
+ },
142
+ {
143
+ "epoch": 24.0,
144
+ "eval_accuracy": 0.8222324299494718,
145
+ "eval_f1": 0.8948667324777887,
146
+ "eval_loss": 0.7746959924697876,
147
+ "eval_precision": 0.8835282651072125,
148
+ "eval_recall": 0.9065,
149
+ "eval_runtime": 1.9661,
150
+ "eval_samples_per_second": 27.465,
151
+ "eval_steps_per_second": 0.509,
152
+ "step": 240
153
+ },
154
+ {
155
+ "epoch": 26.0,
156
+ "eval_accuracy": 0.8058107487367937,
157
+ "eval_f1": 0.8912883435582821,
158
+ "eval_loss": 0.8429032564163208,
159
+ "eval_precision": 0.8751807228915662,
160
+ "eval_recall": 0.908,
161
+ "eval_runtime": 1.9643,
162
+ "eval_samples_per_second": 27.491,
163
+ "eval_steps_per_second": 0.509,
164
+ "step": 260
165
+ },
166
+ {
167
+ "epoch": 28.0,
168
+ "eval_accuracy": 0.8129306384933395,
169
+ "eval_f1": 0.8899308983218163,
170
+ "eval_loss": 0.8374262452125549,
171
+ "eval_precision": 0.8786549707602339,
172
+ "eval_recall": 0.9015,
173
+ "eval_runtime": 1.7097,
174
+ "eval_samples_per_second": 31.584,
175
+ "eval_steps_per_second": 0.585,
176
+ "step": 280
177
+ },
178
+ {
179
+ "epoch": 30.0,
180
+ "eval_accuracy": 0.8112080845199816,
181
+ "eval_f1": 0.8747830399206545,
182
+ "eval_loss": 0.9091736078262329,
183
+ "eval_precision": 0.867683226758485,
184
+ "eval_recall": 0.882,
185
+ "eval_runtime": 2.3751,
186
+ "eval_samples_per_second": 22.736,
187
+ "eval_steps_per_second": 0.421,
188
+ "step": 300
189
+ },
190
+ {
191
+ "epoch": 32.0,
192
+ "eval_accuracy": 0.8288929719797887,
193
+ "eval_f1": 0.8963474827245804,
194
+ "eval_loss": 0.8785933256149292,
195
+ "eval_precision": 0.884990253411306,
196
+ "eval_recall": 0.908,
197
+ "eval_runtime": 2.0542,
198
+ "eval_samples_per_second": 26.288,
199
+ "eval_steps_per_second": 0.487,
200
+ "step": 320
201
+ },
202
+ {
203
+ "epoch": 34.0,
204
+ "eval_accuracy": 0.8307303628847037,
205
+ "eval_f1": 0.8931750741839762,
206
+ "eval_loss": 0.91584312915802,
207
+ "eval_precision": 0.8835616438356164,
208
+ "eval_recall": 0.903,
209
+ "eval_runtime": 2.4582,
210
+ "eval_samples_per_second": 21.967,
211
+ "eval_steps_per_second": 0.407,
212
+ "step": 340
213
+ },
214
+ {
215
+ "epoch": 36.0,
216
+ "eval_accuracy": 0.8259072117593018,
217
+ "eval_f1": 0.8948534843634572,
218
+ "eval_loss": 0.9160082340240479,
219
+ "eval_precision": 0.8816108685104318,
220
+ "eval_recall": 0.9085,
221
+ "eval_runtime": 2.3705,
222
+ "eval_samples_per_second": 22.78,
223
+ "eval_steps_per_second": 0.422,
224
+ "step": 360
225
+ },
226
+ {
227
+ "epoch": 38.0,
228
+ "eval_accuracy": 0.8171796049609554,
229
+ "eval_f1": 0.8942850134903116,
230
+ "eval_loss": 0.9378513693809509,
231
+ "eval_precision": 0.8777082330284064,
232
+ "eval_recall": 0.9115,
233
+ "eval_runtime": 2.2021,
234
+ "eval_samples_per_second": 24.522,
235
+ "eval_steps_per_second": 0.454,
236
+ "step": 380
237
+ },
238
+ {
239
+ "epoch": 40.0,
240
+ "eval_accuracy": 0.8079926504363804,
241
+ "eval_f1": 0.8961134197017844,
242
+ "eval_loss": 0.9751215577125549,
243
+ "eval_precision": 0.8766140602582496,
244
+ "eval_recall": 0.9165,
245
+ "eval_runtime": 1.9472,
246
+ "eval_samples_per_second": 27.733,
247
+ "eval_steps_per_second": 0.514,
248
+ "step": 400
249
+ },
250
+ {
251
+ "epoch": 42.0,
252
+ "eval_accuracy": 0.8140790078089113,
253
+ "eval_f1": 0.8976067110782137,
254
+ "eval_loss": 0.922682523727417,
255
+ "eval_precision": 0.8860204578665368,
256
+ "eval_recall": 0.9095,
257
+ "eval_runtime": 1.9496,
258
+ "eval_samples_per_second": 27.699,
259
+ "eval_steps_per_second": 0.513,
260
+ "step": 420
261
+ },
262
+ {
263
+ "epoch": 44.0,
264
+ "eval_accuracy": 0.8154570509875976,
265
+ "eval_f1": 0.8868017795353437,
266
+ "eval_loss": 1.0090957880020142,
267
+ "eval_precision": 0.8768328445747801,
268
+ "eval_recall": 0.897,
269
+ "eval_runtime": 1.9314,
270
+ "eval_samples_per_second": 27.96,
271
+ "eval_steps_per_second": 0.518,
272
+ "step": 440
273
+ },
274
+ {
275
+ "epoch": 46.0,
276
+ "eval_accuracy": 0.8182131373449701,
277
+ "eval_f1": 0.8956933034602937,
278
+ "eval_loss": 0.9963611960411072,
279
+ "eval_precision": 0.8919186911254338,
280
+ "eval_recall": 0.8995,
281
+ "eval_runtime": 2.4248,
282
+ "eval_samples_per_second": 22.27,
283
+ "eval_steps_per_second": 0.412,
284
+ "step": 460
285
+ },
286
+ {
287
+ "epoch": 48.0,
288
+ "eval_accuracy": 0.8203950390445567,
289
+ "eval_f1": 0.896329365079365,
290
+ "eval_loss": 1.0006074905395508,
291
+ "eval_precision": 0.8892716535433071,
292
+ "eval_recall": 0.9035,
293
+ "eval_runtime": 2.4269,
294
+ "eval_samples_per_second": 22.25,
295
+ "eval_steps_per_second": 0.412,
296
+ "step": 480
297
+ },
298
+ {
299
+ "epoch": 50.0,
300
+ "learning_rate": 5e-06,
301
+ "loss": 0.235,
302
+ "step": 500
303
+ },
304
+ {
305
+ "epoch": 50.0,
306
+ "eval_accuracy": 0.8178686265502986,
307
+ "eval_f1": 0.8960199004975123,
308
+ "eval_loss": 1.0283308029174805,
309
+ "eval_precision": 0.8915841584158416,
310
+ "eval_recall": 0.9005,
311
+ "eval_runtime": 2.2977,
312
+ "eval_samples_per_second": 23.502,
313
+ "eval_steps_per_second": 0.435,
314
+ "step": 500
315
+ },
316
+ {
317
+ "epoch": 52.0,
318
+ "eval_accuracy": 0.827859439595774,
319
+ "eval_f1": 0.8951014349332014,
320
+ "eval_loss": 0.9926251769065857,
321
+ "eval_precision": 0.885896180215475,
322
+ "eval_recall": 0.9045,
323
+ "eval_runtime": 1.6933,
324
+ "eval_samples_per_second": 31.891,
325
+ "eval_steps_per_second": 0.591,
326
+ "step": 520
327
+ },
328
+ {
329
+ "epoch": 54.0,
330
+ "eval_accuracy": 0.8228066146072577,
331
+ "eval_f1": 0.8948148148148148,
332
+ "eval_loss": 1.0112966299057007,
333
+ "eval_precision": 0.8839024390243903,
334
+ "eval_recall": 0.906,
335
+ "eval_runtime": 2.4054,
336
+ "eval_samples_per_second": 22.449,
337
+ "eval_steps_per_second": 0.416,
338
+ "step": 540
339
+ },
340
+ {
341
+ "epoch": 56.0,
342
+ "eval_accuracy": 0.8290078089113458,
343
+ "eval_f1": 0.8993819530284302,
344
+ "eval_loss": 1.004191279411316,
345
+ "eval_precision": 0.8894865525672372,
346
+ "eval_recall": 0.9095,
347
+ "eval_runtime": 1.6522,
348
+ "eval_samples_per_second": 32.684,
349
+ "eval_steps_per_second": 0.605,
350
+ "step": 560
351
+ },
352
+ {
353
+ "epoch": 58.0,
354
+ "eval_accuracy": 0.8225769407441433,
355
+ "eval_f1": 0.8922392486406326,
356
+ "eval_loss": 1.0357481241226196,
357
+ "eval_precision": 0.8822091886608016,
358
+ "eval_recall": 0.9025,
359
+ "eval_runtime": 2.1102,
360
+ "eval_samples_per_second": 25.59,
361
+ "eval_steps_per_second": 0.474,
362
+ "step": 580
363
+ },
364
+ {
365
+ "epoch": 60.0,
366
+ "eval_accuracy": 0.8178686265502986,
367
+ "eval_f1": 0.901213171577123,
368
+ "eval_loss": 1.0394996404647827,
369
+ "eval_precision": 0.8925944090240314,
370
+ "eval_recall": 0.91,
371
+ "eval_runtime": 2.4208,
372
+ "eval_samples_per_second": 22.307,
373
+ "eval_steps_per_second": 0.413,
374
+ "step": 600
375
+ },
376
+ {
377
+ "epoch": 62.0,
378
+ "eval_accuracy": 0.8201653651814423,
379
+ "eval_f1": 0.8971962616822431,
380
+ "eval_loss": 1.004025936126709,
381
+ "eval_precision": 0.882865440464666,
382
+ "eval_recall": 0.912,
383
+ "eval_runtime": 2.4098,
384
+ "eval_samples_per_second": 22.408,
385
+ "eval_steps_per_second": 0.415,
386
+ "step": 620
387
+ },
388
+ {
389
+ "epoch": 64.0,
390
+ "eval_accuracy": 0.8195911805236564,
391
+ "eval_f1": 0.8941929133858268,
392
+ "eval_loss": 1.029054880142212,
393
+ "eval_precision": 0.8803294573643411,
394
+ "eval_recall": 0.9085,
395
+ "eval_runtime": 2.4017,
396
+ "eval_samples_per_second": 22.484,
397
+ "eval_steps_per_second": 0.416,
398
+ "step": 640
399
+ },
400
+ {
401
+ "epoch": 66.0,
402
+ "eval_accuracy": 0.8279742765273312,
403
+ "eval_f1": 0.8993055555555556,
404
+ "eval_loss": 1.075648546218872,
405
+ "eval_precision": 0.8922244094488189,
406
+ "eval_recall": 0.9065,
407
+ "eval_runtime": 2.4111,
408
+ "eval_samples_per_second": 22.396,
409
+ "eval_steps_per_second": 0.415,
410
+ "step": 660
411
+ },
412
+ {
413
+ "epoch": 68.0,
414
+ "eval_accuracy": 0.8234956361966008,
415
+ "eval_f1": 0.8966716343765524,
416
+ "eval_loss": 1.1056932210922241,
417
+ "eval_precision": 0.8909180651530109,
418
+ "eval_recall": 0.9025,
419
+ "eval_runtime": 2.4045,
420
+ "eval_samples_per_second": 22.458,
421
+ "eval_steps_per_second": 0.416,
422
+ "step": 680
423
+ },
424
+ {
425
+ "epoch": 70.0,
426
+ "eval_accuracy": 0.8268259072117593,
427
+ "eval_f1": 0.9007407407407408,
428
+ "eval_loss": 1.1429905891418457,
429
+ "eval_precision": 0.8897560975609756,
430
+ "eval_recall": 0.912,
431
+ "eval_runtime": 2.3943,
432
+ "eval_samples_per_second": 22.553,
433
+ "eval_steps_per_second": 0.418,
434
+ "step": 700
435
+ },
436
+ {
437
+ "epoch": 72.0,
438
+ "eval_accuracy": 0.8221175930179145,
439
+ "eval_f1": 0.9012012748222604,
440
+ "eval_loss": 1.0474272966384888,
441
+ "eval_precision": 0.8840788840788841,
442
+ "eval_recall": 0.919,
443
+ "eval_runtime": 2.6325,
444
+ "eval_samples_per_second": 20.512,
445
+ "eval_steps_per_second": 0.38,
446
+ "step": 720
447
+ },
448
+ {
449
+ "epoch": 74.0,
450
+ "eval_accuracy": 0.8191318327974276,
451
+ "eval_f1": 0.9079694053787319,
452
+ "eval_loss": 1.1182180643081665,
453
+ "eval_precision": 0.8962493911349245,
454
+ "eval_recall": 0.92,
455
+ "eval_runtime": 2.4698,
456
+ "eval_samples_per_second": 21.864,
457
+ "eval_steps_per_second": 0.405,
458
+ "step": 740
459
+ },
460
+ {
461
+ "epoch": 76.0,
462
+ "eval_accuracy": 0.8267110702802021,
463
+ "eval_f1": 0.9056324110671936,
464
+ "eval_loss": 1.1421239376068115,
465
+ "eval_precision": 0.89501953125,
466
+ "eval_recall": 0.9165,
467
+ "eval_runtime": 2.0412,
468
+ "eval_samples_per_second": 26.455,
469
+ "eval_steps_per_second": 0.49,
470
+ "step": 760
471
+ },
472
+ {
473
+ "epoch": 78.0,
474
+ "eval_accuracy": 0.825447864033073,
475
+ "eval_f1": 0.9112103174603173,
476
+ "eval_loss": 1.1723241806030273,
477
+ "eval_precision": 0.9040354330708661,
478
+ "eval_recall": 0.9185,
479
+ "eval_runtime": 2.0338,
480
+ "eval_samples_per_second": 26.551,
481
+ "eval_steps_per_second": 0.492,
482
+ "step": 780
483
+ },
484
+ {
485
+ "epoch": 80.0,
486
+ "eval_accuracy": 0.8314193844740468,
487
+ "eval_f1": 0.9091358024691357,
488
+ "eval_loss": 1.0977429151535034,
489
+ "eval_precision": 0.8980487804878049,
490
+ "eval_recall": 0.9205,
491
+ "eval_runtime": 2.708,
492
+ "eval_samples_per_second": 19.941,
493
+ "eval_steps_per_second": 0.369,
494
+ "step": 800
495
+ },
496
+ {
497
+ "epoch": 82.0,
498
+ "eval_accuracy": 0.826596233348645,
499
+ "eval_f1": 0.9020771513353116,
500
+ "eval_loss": 1.1165635585784912,
501
+ "eval_precision": 0.8923679060665362,
502
+ "eval_recall": 0.912,
503
+ "eval_runtime": 2.3964,
504
+ "eval_samples_per_second": 22.533,
505
+ "eval_steps_per_second": 0.417,
506
+ "step": 820
507
+ },
508
+ {
509
+ "epoch": 84.0,
510
+ "eval_accuracy": 0.83210840606339,
511
+ "eval_f1": 0.9058561897702001,
512
+ "eval_loss": 1.1296281814575195,
513
+ "eval_precision": 0.8954567659990229,
514
+ "eval_recall": 0.9165,
515
+ "eval_runtime": 1.5143,
516
+ "eval_samples_per_second": 35.659,
517
+ "eval_steps_per_second": 0.66,
518
+ "step": 840
519
+ },
520
+ {
521
+ "epoch": 86.0,
522
+ "eval_accuracy": 0.8313045475424896,
523
+ "eval_f1": 0.9070687098368759,
524
+ "eval_loss": 1.122943639755249,
525
+ "eval_precision": 0.896871945259042,
526
+ "eval_recall": 0.9175,
527
+ "eval_runtime": 2.2961,
528
+ "eval_samples_per_second": 23.518,
529
+ "eval_steps_per_second": 0.436,
530
+ "step": 860
531
+ },
532
+ {
533
+ "epoch": 88.0,
534
+ "eval_accuracy": 0.8325677537896188,
535
+ "eval_f1": 0.9080573405832921,
536
+ "eval_loss": 1.1123418807983398,
537
+ "eval_precision": 0.8978494623655914,
538
+ "eval_recall": 0.9185,
539
+ "eval_runtime": 2.1658,
540
+ "eval_samples_per_second": 24.933,
541
+ "eval_steps_per_second": 0.462,
542
+ "step": 880
543
+ },
544
+ {
545
+ "epoch": 90.0,
546
+ "eval_accuracy": 0.8325677537896188,
547
+ "eval_f1": 0.9037843185753152,
548
+ "eval_loss": 1.1032230854034424,
549
+ "eval_precision": 0.8942731277533039,
550
+ "eval_recall": 0.9135,
551
+ "eval_runtime": 1.6625,
552
+ "eval_samples_per_second": 32.481,
553
+ "eval_steps_per_second": 0.601,
554
+ "step": 900
555
+ },
556
+ {
557
+ "epoch": 92.0,
558
+ "eval_accuracy": 0.83578318787322,
559
+ "eval_f1": 0.9086407526615499,
560
+ "eval_loss": 1.0933949947357178,
561
+ "eval_precision": 0.8999509563511525,
562
+ "eval_recall": 0.9175,
563
+ "eval_runtime": 2.2261,
564
+ "eval_samples_per_second": 24.258,
565
+ "eval_steps_per_second": 0.449,
566
+ "step": 920
567
+ },
568
+ {
569
+ "epoch": 94.0,
570
+ "eval_accuracy": 0.8360128617363344,
571
+ "eval_f1": 0.9090909090909091,
572
+ "eval_loss": 1.0976922512054443,
573
+ "eval_precision": 0.9008345606283751,
574
+ "eval_recall": 0.9175,
575
+ "eval_runtime": 2.4337,
576
+ "eval_samples_per_second": 22.189,
577
+ "eval_steps_per_second": 0.411,
578
+ "step": 940
579
+ },
580
+ {
581
+ "epoch": 96.0,
582
+ "eval_accuracy": 0.833945796968305,
583
+ "eval_f1": 0.9066205533596838,
584
+ "eval_loss": 1.1002885103225708,
585
+ "eval_precision": 0.89599609375,
586
+ "eval_recall": 0.9175,
587
+ "eval_runtime": 2.3183,
588
+ "eval_samples_per_second": 23.293,
589
+ "eval_steps_per_second": 0.431,
590
+ "step": 960
591
+ },
592
+ {
593
+ "epoch": 98.0,
594
+ "eval_accuracy": 0.8329122645842904,
595
+ "eval_f1": 0.9057164068299927,
596
+ "eval_loss": 1.1073625087738037,
597
+ "eval_precision": 0.8966193042626164,
598
+ "eval_recall": 0.915,
599
+ "eval_runtime": 2.4241,
600
+ "eval_samples_per_second": 22.276,
601
+ "eval_steps_per_second": 0.413,
602
+ "step": 980
603
+ },
604
+ {
605
+ "epoch": 100.0,
606
+ "learning_rate": 0.0,
607
+ "loss": 0.0088,
608
+ "step": 1000
609
+ },
610
+ {
611
+ "epoch": 100.0,
612
+ "eval_accuracy": 0.8317638952687184,
613
+ "eval_f1": 0.9058561897702001,
614
+ "eval_loss": 1.1073323488235474,
615
+ "eval_precision": 0.8954567659990229,
616
+ "eval_recall": 0.9165,
617
+ "eval_runtime": 1.5045,
618
+ "eval_samples_per_second": 35.892,
619
+ "eval_steps_per_second": 0.665,
620
+ "step": 1000
621
+ },
622
+ {
623
+ "epoch": 100.0,
624
+ "step": 1000,
625
+ "total_flos": 4006801297113088.0,
626
+ "train_loss": 0.12190062952041626,
627
+ "train_runtime": 303.1848,
628
+ "train_samples_per_second": 52.773,
629
+ "train_steps_per_second": 3.298
630
+ }
631
+ ],
632
+ "max_steps": 1000,
633
+ "num_train_epochs": 100,
634
+ "total_flos": 4006801297113088.0,
635
+ "trial_name": null,
636
+ "trial_params": null
637
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d2fd64da493d1cc0e186398f838f436ad4f7e1fcff300da892a6ec00409833a
3
+ size 2927
vocab.json ADDED
The diff for this file is too large to render. See raw diff