Theoreticallyhugo commited on
Commit
53e766f
·
verified ·
1 Parent(s): 1447e80

Training in progress, epoch 12, checkpoint

Browse files
checkpoint-492/config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "allenai/longformer-base-4096",
3
+ "architectures": [
4
+ "LongformerForTokenClassification"
5
+ ],
6
+ "attention_mode": "longformer",
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "attention_window": [
9
+ 700,
10
+ 700,
11
+ 700,
12
+ 700,
13
+ 700,
14
+ 700,
15
+ 700,
16
+ 700,
17
+ 700,
18
+ 700,
19
+ 700,
20
+ 700
21
+ ],
22
+ "bos_token_id": 0,
23
+ "eos_token_id": 2,
24
+ "gradient_checkpointing": false,
25
+ "hidden_act": "gelu",
26
+ "hidden_dropout_prob": 0.1,
27
+ "hidden_size": 768,
28
+ "id2label": {
29
+ "0": "O",
30
+ "1": "X_placeholder_X",
31
+ "2": "MajorClaim",
32
+ "3": "Claim",
33
+ "4": "Premise"
34
+ },
35
+ "ignore_attention_mask": false,
36
+ "initializer_range": 0.02,
37
+ "intermediate_size": 3072,
38
+ "label2id": {
39
+ "Claim": 3,
40
+ "MajorClaim": 2,
41
+ "O": 0,
42
+ "Premise": 4,
43
+ "X_placeholder_X": 1
44
+ },
45
+ "layer_norm_eps": 1e-05,
46
+ "max_position_embeddings": 4098,
47
+ "model_type": "longformer",
48
+ "num_attention_heads": 12,
49
+ "num_hidden_layers": 12,
50
+ "onnx_export": false,
51
+ "pad_token_id": 1,
52
+ "sep_token_id": 2,
53
+ "torch_dtype": "float32",
54
+ "transformers_version": "4.37.2",
55
+ "type_vocab_size": 1,
56
+ "vocab_size": 50265
57
+ }
checkpoint-492/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-492/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32e0f3665e357e9850bfc04afc3d109586a5d95f0f76c4fe44df576d2f7da2f4
3
+ size 592324828
checkpoint-492/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ecb58cde0bfaec47430ba557738e4a5126a730fb6abce4c1cb3e59c810cb209
3
+ size 1014657786
checkpoint-492/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c189466643703424c77db24cb1c70bb1b76b65752ddeeaa3d7bb424b6ea3dcb
3
+ size 14244
checkpoint-492/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e40ce765a2db50b61afa20f73544a6db3531a7c20a21e8ebd94c0459daeb642
3
+ size 1064
checkpoint-492/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
checkpoint-492/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-492/tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<pad>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "50264": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "<s>",
46
+ "clean_up_tokenization_spaces": true,
47
+ "cls_token": "<s>",
48
+ "eos_token": "</s>",
49
+ "errors": "replace",
50
+ "mask_token": "<mask>",
51
+ "model_max_length": 4096,
52
+ "pad_token": "<pad>",
53
+ "sep_token": "</s>",
54
+ "tokenizer_class": "LongformerTokenizer",
55
+ "trim_offsets": true,
56
+ "unk_token": "<unk>"
57
+ }
checkpoint-492/trainer_state.json ADDED
@@ -0,0 +1,561 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 12.0,
5
+ "eval_steps": 500,
6
+ "global_step": 492,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_Claim": {
14
+ "f1-score": 0.3472626289341444,
15
+ "precision": 0.3978787878787879,
16
+ "recall": 0.3080713280150164,
17
+ "support": 4262.0
18
+ },
19
+ "eval_MajorClaim": {
20
+ "f1-score": 0.5334239746204397,
21
+ "precision": 0.5235765124555161,
22
+ "recall": 0.54364896073903,
23
+ "support": 2165.0
24
+ },
25
+ "eval_O": {
26
+ "f1-score": 0.8390533194223273,
27
+ "precision": 0.9157377442167086,
28
+ "recall": 0.7742197000405351,
29
+ "support": 9868.0
30
+ },
31
+ "eval_Premise": {
32
+ "f1-score": 0.8562600940945159,
33
+ "precision": 0.7896134170821731,
34
+ "recall": 0.9351944167497508,
35
+ "support": 13039.0
36
+ },
37
+ "eval_accuracy": 0.7610281584509443,
38
+ "eval_loss": 0.6209574341773987,
39
+ "eval_macro avg": {
40
+ "f1-score": 0.6440000042678569,
41
+ "precision": 0.6567016154082965,
42
+ "recall": 0.6402836013860831,
43
+ "support": 29334.0
44
+ },
45
+ "eval_runtime": 1.3904,
46
+ "eval_samples_per_second": 57.539,
47
+ "eval_steps_per_second": 7.192,
48
+ "eval_weighted avg": {
49
+ "f1-score": 0.7526914076678427,
50
+ "precision": 0.7554909643645776,
51
+ "recall": 0.7610281584509443,
52
+ "support": 29334.0
53
+ },
54
+ "step": 41
55
+ },
56
+ {
57
+ "epoch": 2.0,
58
+ "eval_Claim": {
59
+ "f1-score": 0.4196301564722618,
60
+ "precision": 0.5328757225433526,
61
+ "recall": 0.34608165180666356,
62
+ "support": 4262.0
63
+ },
64
+ "eval_MajorClaim": {
65
+ "f1-score": 0.6500777604976672,
66
+ "precision": 0.6262842465753424,
67
+ "recall": 0.6757505773672056,
68
+ "support": 2165.0
69
+ },
70
+ "eval_O": {
71
+ "f1-score": 0.8802794869120867,
72
+ "precision": 0.9066595059076262,
73
+ "recall": 0.8553911633563032,
74
+ "support": 9868.0
75
+ },
76
+ "eval_Premise": {
77
+ "f1-score": 0.8765692621338388,
78
+ "precision": 0.821313672922252,
79
+ "recall": 0.9397959966255081,
80
+ "support": 13039.0
81
+ },
82
+ "eval_accuracy": 0.8056521442694484,
83
+ "eval_loss": 0.5057439804077148,
84
+ "eval_macro avg": {
85
+ "f1-score": 0.7066391665039636,
86
+ "precision": 0.7217832869871433,
87
+ "recall": 0.7042548472889201,
88
+ "support": 29334.0
89
+ },
90
+ "eval_runtime": 1.3936,
91
+ "eval_samples_per_second": 57.406,
92
+ "eval_steps_per_second": 7.176,
93
+ "eval_weighted avg": {
94
+ "f1-score": 0.7947114837449316,
95
+ "precision": 0.7937221895699558,
96
+ "recall": 0.8056521442694484,
97
+ "support": 29334.0
98
+ },
99
+ "step": 82
100
+ },
101
+ {
102
+ "epoch": 3.0,
103
+ "eval_Claim": {
104
+ "f1-score": 0.5597184377838329,
105
+ "precision": 0.542234931808183,
106
+ "recall": 0.5783669638667293,
107
+ "support": 4262.0
108
+ },
109
+ "eval_MajorClaim": {
110
+ "f1-score": 0.7123406094661768,
111
+ "precision": 0.669374492282697,
112
+ "recall": 0.7612009237875289,
113
+ "support": 2165.0
114
+ },
115
+ "eval_O": {
116
+ "f1-score": 0.896587330270019,
117
+ "precision": 0.9139037996000421,
118
+ "recall": 0.8799148763680583,
119
+ "support": 9868.0
120
+ },
121
+ "eval_Premise": {
122
+ "f1-score": 0.879910300030931,
123
+ "precision": 0.8872514619883041,
124
+ "recall": 0.8726896234373802,
125
+ "support": 13039.0
126
+ },
127
+ "eval_accuracy": 0.8241289970682485,
128
+ "eval_loss": 0.47074389457702637,
129
+ "eval_macro avg": {
130
+ "f1-score": 0.76213916938774,
131
+ "precision": 0.7531911714198065,
132
+ "recall": 0.7730430968649242,
133
+ "support": 29334.0
134
+ },
135
+ "eval_runtime": 1.3982,
136
+ "eval_samples_per_second": 57.218,
137
+ "eval_steps_per_second": 7.152,
138
+ "eval_weighted avg": {
139
+ "f1-score": 0.8266316076408545,
140
+ "precision": 0.8300087121591747,
141
+ "recall": 0.8241289970682485,
142
+ "support": 29334.0
143
+ },
144
+ "step": 123
145
+ },
146
+ {
147
+ "epoch": 4.0,
148
+ "eval_Claim": {
149
+ "f1-score": 0.5578872907333177,
150
+ "precision": 0.5606635071090047,
151
+ "recall": 0.5551384326607227,
152
+ "support": 4262.0
153
+ },
154
+ "eval_MajorClaim": {
155
+ "f1-score": 0.7178253548231899,
156
+ "precision": 0.748995983935743,
157
+ "recall": 0.6891454965357968,
158
+ "support": 2165.0
159
+ },
160
+ "eval_O": {
161
+ "f1-score": 0.8866524874202418,
162
+ "precision": 0.9082793070464449,
163
+ "recall": 0.8660316173490069,
164
+ "support": 9868.0
165
+ },
166
+ "eval_Premise": {
167
+ "f1-score": 0.8822517942583731,
168
+ "precision": 0.8605702617953767,
169
+ "recall": 0.9050540685635402,
170
+ "support": 13039.0
171
+ },
172
+ "eval_accuracy": 0.8251517010977023,
173
+ "eval_loss": 0.49949103593826294,
174
+ "eval_macro avg": {
175
+ "f1-score": 0.7611542318087806,
176
+ "precision": 0.7696272649716422,
177
+ "recall": 0.7538424037772666,
178
+ "support": 29334.0
179
+ },
180
+ "eval_runtime": 1.3933,
181
+ "eval_samples_per_second": 57.419,
182
+ "eval_steps_per_second": 7.177,
183
+ "eval_weighted avg": {
184
+ "f1-score": 0.8244690603905188,
185
+ "precision": 0.8248108003682995,
186
+ "recall": 0.8251517010977023,
187
+ "support": 29334.0
188
+ },
189
+ "step": 164
190
+ },
191
+ {
192
+ "epoch": 5.0,
193
+ "eval_Claim": {
194
+ "f1-score": 0.5622098421541318,
195
+ "precision": 0.5562700964630225,
196
+ "recall": 0.5682778038479587,
197
+ "support": 4262.0
198
+ },
199
+ "eval_MajorClaim": {
200
+ "f1-score": 0.7078507078507079,
201
+ "precision": 0.7994186046511628,
202
+ "recall": 0.6351039260969977,
203
+ "support": 2165.0
204
+ },
205
+ "eval_O": {
206
+ "f1-score": 0.8867608581894296,
207
+ "precision": 0.9167929019692708,
208
+ "recall": 0.858633968382651,
209
+ "support": 9868.0
210
+ },
211
+ "eval_Premise": {
212
+ "f1-score": 0.8842074139778985,
213
+ "precision": 0.8533314310172635,
214
+ "recall": 0.9174016412301557,
215
+ "support": 13039.0
216
+ },
217
+ "eval_accuracy": 0.8260721347242108,
218
+ "eval_loss": 0.5355645418167114,
219
+ "eval_macro avg": {
220
+ "f1-score": 0.760257205543042,
221
+ "precision": 0.7814532585251799,
222
+ "recall": 0.7448543348894408,
223
+ "support": 29334.0
224
+ },
225
+ "eval_runtime": 1.3973,
226
+ "eval_samples_per_second": 57.254,
227
+ "eval_steps_per_second": 7.157,
228
+ "eval_weighted avg": {
229
+ "f1-score": 0.8252666444817892,
230
+ "precision": 0.827540237126271,
231
+ "recall": 0.8260721347242108,
232
+ "support": 29334.0
233
+ },
234
+ "step": 205
235
+ },
236
+ {
237
+ "epoch": 6.0,
238
+ "eval_Claim": {
239
+ "f1-score": 0.5778948628906718,
240
+ "precision": 0.5901198337001712,
241
+ "recall": 0.5661661191928672,
242
+ "support": 4262.0
243
+ },
244
+ "eval_MajorClaim": {
245
+ "f1-score": 0.7630429786256032,
246
+ "precision": 0.7593778591033852,
247
+ "recall": 0.766743648960739,
248
+ "support": 2165.0
249
+ },
250
+ "eval_O": {
251
+ "f1-score": 0.8932349450436038,
252
+ "precision": 0.909998948585848,
253
+ "recall": 0.877077421970004,
254
+ "support": 9868.0
255
+ },
256
+ "eval_Premise": {
257
+ "f1-score": 0.8871252867942979,
258
+ "precision": 0.8704605845881311,
259
+ "recall": 0.9044405245801058,
260
+ "support": 13039.0
261
+ },
262
+ "eval_accuracy": 0.8359241835412832,
263
+ "eval_loss": 0.5402312278747559,
264
+ "eval_macro avg": {
265
+ "f1-score": 0.7803245183385442,
266
+ "precision": 0.7824893064943839,
267
+ "recall": 0.778606928675929,
268
+ "support": 29334.0
269
+ },
270
+ "eval_runtime": 1.3946,
271
+ "eval_samples_per_second": 57.364,
272
+ "eval_steps_per_second": 7.17,
273
+ "eval_weighted avg": {
274
+ "f1-score": 0.8350939185438606,
275
+ "precision": 0.8348315600763192,
276
+ "recall": 0.8359241835412832,
277
+ "support": 29334.0
278
+ },
279
+ "step": 246
280
+ },
281
+ {
282
+ "epoch": 7.0,
283
+ "eval_Claim": {
284
+ "f1-score": 0.5928237129485181,
285
+ "precision": 0.5645161290322581,
286
+ "recall": 0.6241201313937119,
287
+ "support": 4262.0
288
+ },
289
+ "eval_MajorClaim": {
290
+ "f1-score": 0.7545109211775878,
291
+ "precision": 0.776257938446507,
292
+ "recall": 0.7339491916859122,
293
+ "support": 2165.0
294
+ },
295
+ "eval_O": {
296
+ "f1-score": 0.8982721603108067,
297
+ "precision": 0.9063338147307612,
298
+ "recall": 0.8903526550466153,
299
+ "support": 9868.0
300
+ },
301
+ "eval_Premise": {
302
+ "f1-score": 0.8843364197530864,
303
+ "precision": 0.8897601117925626,
304
+ "recall": 0.8789784492675818,
305
+ "support": 13039.0
306
+ },
307
+ "eval_accuracy": 0.835071930183405,
308
+ "eval_loss": 0.5522010922431946,
309
+ "eval_macro avg": {
310
+ "f1-score": 0.7824858035474997,
311
+ "precision": 0.7842169985005223,
312
+ "recall": 0.7818501068484554,
313
+ "support": 29334.0
314
+ },
315
+ "eval_runtime": 1.3973,
316
+ "eval_samples_per_second": 57.253,
317
+ "eval_steps_per_second": 7.157,
318
+ "eval_weighted avg": {
319
+ "f1-score": 0.8370881251804593,
320
+ "precision": 0.8397030872059231,
321
+ "recall": 0.835071930183405,
322
+ "support": 29334.0
323
+ },
324
+ "step": 287
325
+ },
326
+ {
327
+ "epoch": 8.0,
328
+ "eval_Claim": {
329
+ "f1-score": 0.5708034520481342,
330
+ "precision": 0.5921815889029004,
331
+ "recall": 0.5509150633505396,
332
+ "support": 4262.0
333
+ },
334
+ "eval_MajorClaim": {
335
+ "f1-score": 0.7608799617407939,
336
+ "precision": 0.7887952404561229,
337
+ "recall": 0.7348729792147806,
338
+ "support": 2165.0
339
+ },
340
+ "eval_O": {
341
+ "f1-score": 0.9017624521072796,
342
+ "precision": 0.909240754094983,
343
+ "recall": 0.8944061613295501,
344
+ "support": 9868.0
345
+ },
346
+ "eval_Premise": {
347
+ "f1-score": 0.888622395442962,
348
+ "precision": 0.868889703187981,
349
+ "recall": 0.909272183449651,
350
+ "support": 13039.0
351
+ },
352
+ "eval_accuracy": 0.8393331969727961,
353
+ "eval_loss": 0.5863537788391113,
354
+ "eval_macro avg": {
355
+ "f1-score": 0.7805170653347924,
356
+ "precision": 0.7897768216604968,
357
+ "recall": 0.7723665968361304,
358
+ "support": 29334.0
359
+ },
360
+ "eval_runtime": 1.4013,
361
+ "eval_samples_per_second": 57.089,
362
+ "eval_steps_per_second": 7.136,
363
+ "eval_weighted avg": {
364
+ "f1-score": 0.8374380828176649,
365
+ "precision": 0.8363489544136171,
366
+ "recall": 0.8393331969727961,
367
+ "support": 29334.0
368
+ },
369
+ "step": 328
370
+ },
371
+ {
372
+ "epoch": 9.0,
373
+ "eval_Claim": {
374
+ "f1-score": 0.5834502103786816,
375
+ "precision": 0.5400439384861194,
376
+ "recall": 0.6344439230408259,
377
+ "support": 4262.0
378
+ },
379
+ "eval_MajorClaim": {
380
+ "f1-score": 0.739652870493992,
381
+ "precision": 0.7136109918419923,
382
+ "recall": 0.7676674364896073,
383
+ "support": 2165.0
384
+ },
385
+ "eval_O": {
386
+ "f1-score": 0.8923944839114083,
387
+ "precision": 0.9208710651142734,
388
+ "recall": 0.8656262667207134,
389
+ "support": 9868.0
390
+ },
391
+ "eval_Premise": {
392
+ "f1-score": 0.8790807810255813,
393
+ "precision": 0.8900330136770948,
394
+ "recall": 0.86839481555334,
395
+ "support": 13039.0
396
+ },
397
+ "eval_accuracy": 0.8260380445898957,
398
+ "eval_loss": 0.6257872581481934,
399
+ "eval_macro avg": {
400
+ "f1-score": 0.7736445864524157,
401
+ "precision": 0.76613975227987,
402
+ "recall": 0.7840331104511216,
403
+ "support": 29334.0
404
+ },
405
+ "eval_runtime": 1.3929,
406
+ "eval_samples_per_second": 57.435,
407
+ "eval_steps_per_second": 7.179,
408
+ "eval_weighted avg": {
409
+ "f1-score": 0.8303162314135053,
410
+ "precision": 0.8365354605252965,
411
+ "recall": 0.8260380445898957,
412
+ "support": 29334.0
413
+ },
414
+ "step": 369
415
+ },
416
+ {
417
+ "epoch": 10.0,
418
+ "eval_Claim": {
419
+ "f1-score": 0.5916413728694839,
420
+ "precision": 0.5887546468401487,
421
+ "recall": 0.5945565462224308,
422
+ "support": 4262.0
423
+ },
424
+ "eval_MajorClaim": {
425
+ "f1-score": 0.747756258856873,
426
+ "precision": 0.765103914934751,
427
+ "recall": 0.7311778290993072,
428
+ "support": 2165.0
429
+ },
430
+ "eval_O": {
431
+ "f1-score": 0.8968070337806571,
432
+ "precision": 0.9102390147166266,
433
+ "recall": 0.8837657073368463,
434
+ "support": 9868.0
435
+ },
436
+ "eval_Premise": {
437
+ "f1-score": 0.8894356334456263,
438
+ "precision": 0.878101644245142,
439
+ "recall": 0.9010660326712171,
440
+ "support": 13039.0
441
+ },
442
+ "eval_accuracy": 0.8381741324060816,
443
+ "eval_loss": 0.643328845500946,
444
+ "eval_macro avg": {
445
+ "f1-score": 0.78141007473816,
446
+ "precision": 0.7855498051841671,
447
+ "recall": 0.7776415288324503,
448
+ "support": 29334.0
449
+ },
450
+ "eval_runtime": 1.3951,
451
+ "eval_samples_per_second": 57.345,
452
+ "eval_steps_per_second": 7.168,
453
+ "eval_weighted avg": {
454
+ "f1-score": 0.8381915478775454,
455
+ "precision": 0.8385330407446147,
456
+ "recall": 0.8381741324060816,
457
+ "support": 29334.0
458
+ },
459
+ "step": 410
460
+ },
461
+ {
462
+ "epoch": 11.0,
463
+ "eval_Claim": {
464
+ "f1-score": 0.5791188895594448,
465
+ "precision": 0.5963211533681332,
466
+ "recall": 0.5628812763960582,
467
+ "support": 4262.0
468
+ },
469
+ "eval_MajorClaim": {
470
+ "f1-score": 0.7536862460720328,
471
+ "precision": 0.7905679513184585,
472
+ "recall": 0.7200923787528868,
473
+ "support": 2165.0
474
+ },
475
+ "eval_O": {
476
+ "f1-score": 0.896530612244898,
477
+ "precision": 0.9027949034114262,
478
+ "recall": 0.8903526550466153,
479
+ "support": 9868.0
480
+ },
481
+ "eval_Premise": {
482
+ "f1-score": 0.888538617428507,
483
+ "precision": 0.8699933857573308,
484
+ "recall": 0.9078917094869239,
485
+ "support": 13039.0
486
+ },
487
+ "eval_accuracy": 0.838003681734506,
488
+ "eval_loss": 0.691639244556427,
489
+ "eval_macro avg": {
490
+ "f1-score": 0.7794685913262207,
491
+ "precision": 0.7899193484638372,
492
+ "recall": 0.770304504920621,
493
+ "support": 29334.0
494
+ },
495
+ "eval_runtime": 1.3951,
496
+ "eval_samples_per_second": 57.345,
497
+ "eval_steps_per_second": 7.168,
498
+ "eval_weighted avg": {
499
+ "f1-score": 0.836318079509486,
500
+ "precision": 0.8354034306270279,
501
+ "recall": 0.838003681734506,
502
+ "support": 29334.0
503
+ },
504
+ "step": 451
505
+ },
506
+ {
507
+ "epoch": 12.0,
508
+ "eval_Claim": {
509
+ "f1-score": 0.5808454740864581,
510
+ "precision": 0.5914396887159533,
511
+ "recall": 0.5706241201313937,
512
+ "support": 4262.0
513
+ },
514
+ "eval_MajorClaim": {
515
+ "f1-score": 0.7569141193595342,
516
+ "precision": 0.797138477261114,
517
+ "recall": 0.7205542725173211,
518
+ "support": 2165.0
519
+ },
520
+ "eval_O": {
521
+ "f1-score": 0.8973055414336554,
522
+ "precision": 0.9003264639869415,
523
+ "recall": 0.8943048236724767,
524
+ "support": 9868.0
525
+ },
526
+ "eval_Premise": {
527
+ "f1-score": 0.8841596860614294,
528
+ "precision": 0.870236945703038,
529
+ "recall": 0.8985351637395506,
530
+ "support": 13039.0
531
+ },
532
+ "eval_accuracy": 0.8363332651530647,
533
+ "eval_loss": 0.6996743679046631,
534
+ "eval_macro avg": {
535
+ "f1-score": 0.7798062052352693,
536
+ "precision": 0.7897853939167616,
537
+ "recall": 0.7710045950151856,
538
+ "support": 29334.0
539
+ },
540
+ "eval_runtime": 1.3919,
541
+ "eval_samples_per_second": 57.477,
542
+ "eval_steps_per_second": 7.185,
543
+ "eval_weighted avg": {
544
+ "f1-score": 0.8351214191174802,
545
+ "precision": 0.8344570068256206,
546
+ "recall": 0.8363332651530647,
547
+ "support": 29334.0
548
+ },
549
+ "step": 492
550
+ }
551
+ ],
552
+ "logging_steps": 500,
553
+ "max_steps": 656,
554
+ "num_input_tokens_seen": 0,
555
+ "num_train_epochs": 16,
556
+ "save_steps": 500,
557
+ "total_flos": 1725464792721600.0,
558
+ "train_batch_size": 8,
559
+ "trial_name": null,
560
+ "trial_params": null
561
+ }
checkpoint-492/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7094583e4fe5a2c55a5632947c89b5062ef3102a7d8117131675dc677677192b
3
+ size 4664
checkpoint-492/vocab.json ADDED
The diff for this file is too large to render. See raw diff