Chantland commited on
Commit
48e9b11
1 Parent(s): 01da8c7

Upload 11 files

Browse files
config.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "HierarchicalMultiLabelClassifier"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "EVENT",
13
+ "1": "EVENT_Illness",
14
+ "2": "EVENT_Accident",
15
+ "3": "EVENT_Other",
16
+ "4": "CAUSE",
17
+ "5": "CAUSE_Just_Happens",
18
+ "6": "CAUSE_Material_Physical",
19
+ "7": "CAUSE_Spirits_Gods",
20
+ "8": "CAUSE_Witchcraft_Sorcery",
21
+ "9": "CAUSE_Rule_Violation_Taboo",
22
+ "10": "CAUSE_Other",
23
+ "11": "ACTION",
24
+ "12": "ACTION_Physical_Material",
25
+ "13": "ACTION_Technical_Specialist",
26
+ "14": "ACTION_Divination",
27
+ "15": "ACTION_Shaman_Medium_Healer",
28
+ "16": "ACTION_Priest_High_Religion",
29
+ "17": "ACTION_Other"
30
+ },
31
+ "initializer_range": 0.02,
32
+ "label2id": {
33
+ "ACTION": 11,
34
+ "ACTION_Divination": 14,
35
+ "ACTION_Other": 17,
36
+ "ACTION_Physical_Material": 12,
37
+ "ACTION_Priest_High_Religion": 16,
38
+ "ACTION_Shaman_Medium_Healer": 15,
39
+ "ACTION_Technical_Specialist": 13,
40
+ "CAUSE": 4,
41
+ "CAUSE_Just_Happens": 5,
42
+ "CAUSE_Material_Physical": 6,
43
+ "CAUSE_Other": 10,
44
+ "CAUSE_Rule_Violation_Taboo": 9,
45
+ "CAUSE_Spirits_Gods": 7,
46
+ "CAUSE_Witchcraft_Sorcery": 8,
47
+ "EVENT": 0,
48
+ "EVENT_Accident": 2,
49
+ "EVENT_Illness": 1,
50
+ "EVENT_Other": 3
51
+ },
52
+ "max_position_embeddings": 512,
53
+ "model_type": "distilbert",
54
+ "n_heads": 12,
55
+ "n_layers": 6,
56
+ "pad_token_id": 0,
57
+ "qa_dropout": 0.1,
58
+ "seq_classif_dropout": 0.2,
59
+ "sinusoidal_pos_embds": false,
60
+ "tie_weights_": true,
61
+ "torch_dtype": "float32",
62
+ "transformers_version": "4.42.4",
63
+ "vocab_size": 30522
64
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2418e84da1103ed5fa5ec08c1b4f3cf110d222e7f74f1bbf1fba59d6021a664
3
+ size 265938696
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd5d633e79e518d0459d8bee314a49fd9b2566d41b2e4b3e4482fee0a7d5973e
3
+ size 531945530
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b5a8f2106c34eeb573aad7cb7f7a0eb352cc85dc08adef1137eba3bdb370c4a
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10b3b185fea1205fbfcb56f550d303bc0b6cb7d325de973a2d3dc5d3e34c96cf
3
+ size 1064
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "model_max_length": 512,
49
+ "pad_token": "[PAD]",
50
+ "sep_token": "[SEP]",
51
+ "strip_accents": null,
52
+ "tokenize_chinese_chars": true,
53
+ "tokenizer_class": "DistilBertTokenizer",
54
+ "unk_token": "[UNK]"
55
+ }
trainer_state.json ADDED
@@ -0,0 +1,1211 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.6518611919550361,
3
+ "best_model_checkpoint": "drive/MyDrive/NLP_HRAF//Models/HRAF_MultiLabel_Hierarchical/Model_1_BaseTest/Hierarchy_test_fold_1/checkpoint-10790",
4
+ "epoch": 13.0,
5
+ "eval_steps": 500,
6
+ "global_step": 10790,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.12048192771084337,
13
+ "grad_norm": 4.097501277923584,
14
+ "learning_rate": 1.9839357429718877e-05,
15
+ "loss": 2.1939,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.24096385542168675,
20
+ "grad_norm": 3.734555721282959,
21
+ "learning_rate": 1.967871485943775e-05,
22
+ "loss": 1.7255,
23
+ "step": 200
24
+ },
25
+ {
26
+ "epoch": 0.3614457831325301,
27
+ "grad_norm": 3.1774344444274902,
28
+ "learning_rate": 1.951807228915663e-05,
29
+ "loss": 1.5907,
30
+ "step": 300
31
+ },
32
+ {
33
+ "epoch": 0.4819277108433735,
34
+ "grad_norm": 5.335158824920654,
35
+ "learning_rate": 1.9357429718875505e-05,
36
+ "loss": 1.5148,
37
+ "step": 400
38
+ },
39
+ {
40
+ "epoch": 0.6024096385542169,
41
+ "grad_norm": 10.060626983642578,
42
+ "learning_rate": 1.9196787148594377e-05,
43
+ "loss": 1.4449,
44
+ "step": 500
45
+ },
46
+ {
47
+ "epoch": 0.7228915662650602,
48
+ "grad_norm": 6.7067084312438965,
49
+ "learning_rate": 1.9036144578313255e-05,
50
+ "loss": 1.3943,
51
+ "step": 600
52
+ },
53
+ {
54
+ "epoch": 0.8433734939759037,
55
+ "grad_norm": 5.713496685028076,
56
+ "learning_rate": 1.887550200803213e-05,
57
+ "loss": 1.3414,
58
+ "step": 700
59
+ },
60
+ {
61
+ "epoch": 0.963855421686747,
62
+ "grad_norm": 5.983292102813721,
63
+ "learning_rate": 1.8714859437751005e-05,
64
+ "loss": 1.3267,
65
+ "step": 800
66
+ },
67
+ {
68
+ "epoch": 1.0,
69
+ "eval_action_metrics": {
70
+ "accuracy": 0.568414707655214,
71
+ "f1": 0.2052586938083121,
72
+ "loss": 0.28379881381988525,
73
+ "roc_auc": 0.557318161535029
74
+ },
75
+ "eval_cause_metrics": {
76
+ "accuracy": 0.5563592525617842,
77
+ "f1": 0.12061206120612061,
78
+ "loss": 0.4551348090171814,
79
+ "roc_auc": 0.5310209707194632
80
+ },
81
+ "eval_event_metrics": {
82
+ "accuracy": 0.6865581675708258,
83
+ "f1": 0.6954503249767873,
84
+ "loss": 0.4480176568031311,
85
+ "roc_auc": 0.7856260330258503
86
+ },
87
+ "eval_f1": 0.4580407659608961,
88
+ "eval_loss": 1.2922335863113403,
89
+ "eval_main_metrics": {
90
+ "accuracy": 0.5792646172393008,
91
+ "f1": 0.8108419838523645,
92
+ "loss": 1.066821813583374,
93
+ "roc_auc": 0.8017363757036454
94
+ },
95
+ "eval_runtime": 19.3038,
96
+ "eval_samples_per_second": 85.942,
97
+ "eval_steps_per_second": 10.775,
98
+ "step": 830
99
+ },
100
+ {
101
+ "epoch": 1.0843373493975903,
102
+ "grad_norm": 2.3373818397521973,
103
+ "learning_rate": 1.855421686746988e-05,
104
+ "loss": 1.2549,
105
+ "step": 900
106
+ },
107
+ {
108
+ "epoch": 1.2048192771084336,
109
+ "grad_norm": 4.526793956756592,
110
+ "learning_rate": 1.8393574297188755e-05,
111
+ "loss": 1.184,
112
+ "step": 1000
113
+ },
114
+ {
115
+ "epoch": 1.3253012048192772,
116
+ "grad_norm": 4.537077903747559,
117
+ "learning_rate": 1.8232931726907634e-05,
118
+ "loss": 1.16,
119
+ "step": 1100
120
+ },
121
+ {
122
+ "epoch": 1.4457831325301205,
123
+ "grad_norm": 9.092710494995117,
124
+ "learning_rate": 1.807228915662651e-05,
125
+ "loss": 1.1414,
126
+ "step": 1200
127
+ },
128
+ {
129
+ "epoch": 1.5662650602409638,
130
+ "grad_norm": 11.58354377746582,
131
+ "learning_rate": 1.7911646586345384e-05,
132
+ "loss": 1.1569,
133
+ "step": 1300
134
+ },
135
+ {
136
+ "epoch": 1.6867469879518073,
137
+ "grad_norm": 8.659878730773926,
138
+ "learning_rate": 1.775100401606426e-05,
139
+ "loss": 1.0936,
140
+ "step": 1400
141
+ },
142
+ {
143
+ "epoch": 1.8072289156626506,
144
+ "grad_norm": 4.855623245239258,
145
+ "learning_rate": 1.7590361445783134e-05,
146
+ "loss": 1.1401,
147
+ "step": 1500
148
+ },
149
+ {
150
+ "epoch": 1.927710843373494,
151
+ "grad_norm": 5.588471412658691,
152
+ "learning_rate": 1.742971887550201e-05,
153
+ "loss": 1.0588,
154
+ "step": 1600
155
+ },
156
+ {
157
+ "epoch": 2.0,
158
+ "eval_action_metrics": {
159
+ "accuracy": 0.6118143459915611,
160
+ "f1": 0.37830319888734354,
161
+ "loss": 0.3113616406917572,
162
+ "roc_auc": 0.6275566914121131
163
+ },
164
+ "eval_cause_metrics": {
165
+ "accuracy": 0.5750452079566004,
166
+ "f1": 0.20998278829604128,
167
+ "loss": 0.46545448899269104,
168
+ "roc_auc": 0.5587718371637969
169
+ },
170
+ "eval_event_metrics": {
171
+ "accuracy": 0.6913803496081977,
172
+ "f1": 0.7058823529411764,
173
+ "loss": 0.5396388173103333,
174
+ "roc_auc": 0.7942354115176712
175
+ },
176
+ "eval_f1": 0.5340634299499163,
177
+ "eval_loss": 1.181040644645691,
178
+ "eval_main_metrics": {
179
+ "accuracy": 0.6377335744424352,
180
+ "f1": 0.8420853796751038,
181
+ "loss": 1.2089903354644775,
182
+ "roc_auc": 0.8308065869363559
183
+ },
184
+ "eval_runtime": 19.4619,
185
+ "eval_samples_per_second": 85.243,
186
+ "eval_steps_per_second": 10.688,
187
+ "step": 1660
188
+ },
189
+ {
190
+ "epoch": 2.0481927710843375,
191
+ "grad_norm": 4.3426384925842285,
192
+ "learning_rate": 1.7269076305220884e-05,
193
+ "loss": 1.0881,
194
+ "step": 1700
195
+ },
196
+ {
197
+ "epoch": 2.1686746987951806,
198
+ "grad_norm": 15.190299987792969,
199
+ "learning_rate": 1.710843373493976e-05,
200
+ "loss": 0.9603,
201
+ "step": 1800
202
+ },
203
+ {
204
+ "epoch": 2.289156626506024,
205
+ "grad_norm": 10.794776916503906,
206
+ "learning_rate": 1.6947791164658637e-05,
207
+ "loss": 0.9664,
208
+ "step": 1900
209
+ },
210
+ {
211
+ "epoch": 2.4096385542168672,
212
+ "grad_norm": 10.562026977539062,
213
+ "learning_rate": 1.6787148594377512e-05,
214
+ "loss": 0.9303,
215
+ "step": 2000
216
+ },
217
+ {
218
+ "epoch": 2.5301204819277108,
219
+ "grad_norm": 4.81545877456665,
220
+ "learning_rate": 1.6626506024096387e-05,
221
+ "loss": 0.9407,
222
+ "step": 2100
223
+ },
224
+ {
225
+ "epoch": 2.6506024096385543,
226
+ "grad_norm": 4.499901294708252,
227
+ "learning_rate": 1.6465863453815262e-05,
228
+ "loss": 0.9614,
229
+ "step": 2200
230
+ },
231
+ {
232
+ "epoch": 2.7710843373493974,
233
+ "grad_norm": 4.810569763183594,
234
+ "learning_rate": 1.6305220883534137e-05,
235
+ "loss": 0.9482,
236
+ "step": 2300
237
+ },
238
+ {
239
+ "epoch": 2.891566265060241,
240
+ "grad_norm": 8.373404502868652,
241
+ "learning_rate": 1.6144578313253015e-05,
242
+ "loss": 0.8748,
243
+ "step": 2400
244
+ },
245
+ {
246
+ "epoch": 3.0,
247
+ "eval_action_metrics": {
248
+ "accuracy": 0.621458710066305,
249
+ "f1": 0.4234990316333118,
250
+ "loss": 0.31134167313575745,
251
+ "roc_auc": 0.6527716928319338
252
+ },
253
+ "eval_cause_metrics": {
254
+ "accuracy": 0.5822784810126582,
255
+ "f1": 0.2585924713584288,
256
+ "loss": 0.489061564207077,
257
+ "roc_auc": 0.5754498216809775
258
+ },
259
+ "eval_event_metrics": {
260
+ "accuracy": 0.6937914406268837,
261
+ "f1": 0.7111111111111111,
262
+ "loss": 0.6078017354011536,
263
+ "roc_auc": 0.798155101571195
264
+ },
265
+ "eval_f1": 0.5597986447252539,
266
+ "eval_loss": 1.1940369606018066,
267
+ "eval_main_metrics": {
268
+ "accuracy": 0.6497890295358649,
269
+ "f1": 0.8459919647981634,
270
+ "loss": 1.356093406677246,
271
+ "roc_auc": 0.8375618095347757
272
+ },
273
+ "eval_runtime": 19.4292,
274
+ "eval_samples_per_second": 85.387,
275
+ "eval_steps_per_second": 10.706,
276
+ "step": 2490
277
+ },
278
+ {
279
+ "epoch": 3.0120481927710845,
280
+ "grad_norm": 7.537188529968262,
281
+ "learning_rate": 1.5983935742971887e-05,
282
+ "loss": 0.9201,
283
+ "step": 2500
284
+ },
285
+ {
286
+ "epoch": 3.1325301204819276,
287
+ "grad_norm": 6.753769397735596,
288
+ "learning_rate": 1.5823293172690762e-05,
289
+ "loss": 0.7751,
290
+ "step": 2600
291
+ },
292
+ {
293
+ "epoch": 3.253012048192771,
294
+ "grad_norm": 6.366585731506348,
295
+ "learning_rate": 1.566265060240964e-05,
296
+ "loss": 0.7857,
297
+ "step": 2700
298
+ },
299
+ {
300
+ "epoch": 3.3734939759036147,
301
+ "grad_norm": 8.141886711120605,
302
+ "learning_rate": 1.5502008032128516e-05,
303
+ "loss": 0.7844,
304
+ "step": 2800
305
+ },
306
+ {
307
+ "epoch": 3.4939759036144578,
308
+ "grad_norm": 12.003459930419922,
309
+ "learning_rate": 1.534136546184739e-05,
310
+ "loss": 0.7853,
311
+ "step": 2900
312
+ },
313
+ {
314
+ "epoch": 3.6144578313253013,
315
+ "grad_norm": 4.899393558502197,
316
+ "learning_rate": 1.5180722891566266e-05,
317
+ "loss": 0.7652,
318
+ "step": 3000
319
+ },
320
+ {
321
+ "epoch": 3.734939759036145,
322
+ "grad_norm": 7.926997661590576,
323
+ "learning_rate": 1.5020080321285142e-05,
324
+ "loss": 0.7908,
325
+ "step": 3100
326
+ },
327
+ {
328
+ "epoch": 3.855421686746988,
329
+ "grad_norm": 3.9363718032836914,
330
+ "learning_rate": 1.4859437751004017e-05,
331
+ "loss": 0.8142,
332
+ "step": 3200
333
+ },
334
+ {
335
+ "epoch": 3.9759036144578315,
336
+ "grad_norm": 4.785311222076416,
337
+ "learning_rate": 1.4698795180722894e-05,
338
+ "loss": 0.75,
339
+ "step": 3300
340
+ },
341
+ {
342
+ "epoch": 4.0,
343
+ "eval_action_metrics": {
344
+ "accuracy": 0.6262808921036769,
345
+ "f1": 0.47516457211250746,
346
+ "loss": 0.3067108690738678,
347
+ "roc_auc": 0.6846639081578841
348
+ },
349
+ "eval_cause_metrics": {
350
+ "accuracy": 0.6100060277275468,
351
+ "f1": 0.38606271777003487,
352
+ "loss": 0.5579449534416199,
353
+ "roc_auc": 0.6297609166955901
354
+ },
355
+ "eval_event_metrics": {
356
+ "accuracy": 0.6955997588908981,
357
+ "f1": 0.7214808437365476,
358
+ "loss": 0.6555235981941223,
359
+ "roc_auc": 0.8122881121542476
360
+ },
361
+ "eval_f1": 0.6080738826170755,
362
+ "eval_loss": 1.2421071529388428,
363
+ "eval_main_metrics": {
364
+ "accuracy": 0.6594333936106088,
365
+ "f1": 0.8495873968492123,
366
+ "loss": 1.4700279235839844,
367
+ "roc_auc": 0.8373546457094286
368
+ },
369
+ "eval_runtime": 19.3706,
370
+ "eval_samples_per_second": 85.645,
371
+ "eval_steps_per_second": 10.738,
372
+ "step": 3320
373
+ },
374
+ {
375
+ "epoch": 4.096385542168675,
376
+ "grad_norm": 2.819312334060669,
377
+ "learning_rate": 1.4538152610441769e-05,
378
+ "loss": 0.6905,
379
+ "step": 3400
380
+ },
381
+ {
382
+ "epoch": 4.216867469879518,
383
+ "grad_norm": 15.132315635681152,
384
+ "learning_rate": 1.4377510040160642e-05,
385
+ "loss": 0.66,
386
+ "step": 3500
387
+ },
388
+ {
389
+ "epoch": 4.337349397590361,
390
+ "grad_norm": 13.252199172973633,
391
+ "learning_rate": 1.4216867469879519e-05,
392
+ "loss": 0.6854,
393
+ "step": 3600
394
+ },
395
+ {
396
+ "epoch": 4.457831325301205,
397
+ "grad_norm": 3.5887160301208496,
398
+ "learning_rate": 1.4056224899598394e-05,
399
+ "loss": 0.6423,
400
+ "step": 3700
401
+ },
402
+ {
403
+ "epoch": 4.578313253012048,
404
+ "grad_norm": 3.583491563796997,
405
+ "learning_rate": 1.3895582329317269e-05,
406
+ "loss": 0.6372,
407
+ "step": 3800
408
+ },
409
+ {
410
+ "epoch": 4.698795180722891,
411
+ "grad_norm": 9.358288764953613,
412
+ "learning_rate": 1.3734939759036146e-05,
413
+ "loss": 0.6457,
414
+ "step": 3900
415
+ },
416
+ {
417
+ "epoch": 4.8192771084337345,
418
+ "grad_norm": 6.61803674697876,
419
+ "learning_rate": 1.357429718875502e-05,
420
+ "loss": 0.6402,
421
+ "step": 4000
422
+ },
423
+ {
424
+ "epoch": 4.9397590361445785,
425
+ "grad_norm": 12.123319625854492,
426
+ "learning_rate": 1.3413654618473897e-05,
427
+ "loss": 0.6447,
428
+ "step": 4100
429
+ },
430
+ {
431
+ "epoch": 5.0,
432
+ "eval_action_metrics": {
433
+ "accuracy": 0.6274864376130199,
434
+ "f1": 0.47335025380710655,
435
+ "loss": 0.4026564955711365,
436
+ "roc_auc": 0.6765029816234635
437
+ },
438
+ "eval_cause_metrics": {
439
+ "accuracy": 0.6244725738396625,
440
+ "f1": 0.42625169147496617,
441
+ "loss": 0.5328236222267151,
442
+ "roc_auc": 0.6485007620686012
443
+ },
444
+ "eval_event_metrics": {
445
+ "accuracy": 0.6949969861362266,
446
+ "f1": 0.7206896551724137,
447
+ "loss": 0.7433874011039734,
448
+ "roc_auc": 0.8115834972530086
449
+ },
450
+ "eval_f1": 0.6173571829119144,
451
+ "eval_loss": 1.272179365158081,
452
+ "eval_main_metrics": {
453
+ "accuracy": 0.650994575045208,
454
+ "f1": 0.8491371311931712,
455
+ "loss": 1.7594246864318848,
456
+ "roc_auc": 0.8347010566163695
457
+ },
458
+ "eval_runtime": 19.3728,
459
+ "eval_samples_per_second": 85.635,
460
+ "eval_steps_per_second": 10.737,
461
+ "step": 4150
462
+ },
463
+ {
464
+ "epoch": 5.0602409638554215,
465
+ "grad_norm": 4.934209823608398,
466
+ "learning_rate": 1.3253012048192772e-05,
467
+ "loss": 0.5871,
468
+ "step": 4200
469
+ },
470
+ {
471
+ "epoch": 5.180722891566265,
472
+ "grad_norm": 3.2406387329101562,
473
+ "learning_rate": 1.309236947791165e-05,
474
+ "loss": 0.5427,
475
+ "step": 4300
476
+ },
477
+ {
478
+ "epoch": 5.301204819277109,
479
+ "grad_norm": 5.232382774353027,
480
+ "learning_rate": 1.2931726907630524e-05,
481
+ "loss": 0.5588,
482
+ "step": 4400
483
+ },
484
+ {
485
+ "epoch": 5.421686746987952,
486
+ "grad_norm": 14.17167854309082,
487
+ "learning_rate": 1.2771084337349398e-05,
488
+ "loss": 0.5816,
489
+ "step": 4500
490
+ },
491
+ {
492
+ "epoch": 5.542168674698795,
493
+ "grad_norm": 15.979377746582031,
494
+ "learning_rate": 1.2610441767068273e-05,
495
+ "loss": 0.5946,
496
+ "step": 4600
497
+ },
498
+ {
499
+ "epoch": 5.662650602409639,
500
+ "grad_norm": 7.11401891708374,
501
+ "learning_rate": 1.244979919678715e-05,
502
+ "loss": 0.5395,
503
+ "step": 4700
504
+ },
505
+ {
506
+ "epoch": 5.783132530120482,
507
+ "grad_norm": 2.767531156539917,
508
+ "learning_rate": 1.2289156626506024e-05,
509
+ "loss": 0.5364,
510
+ "step": 4800
511
+ },
512
+ {
513
+ "epoch": 5.903614457831325,
514
+ "grad_norm": 4.462075233459473,
515
+ "learning_rate": 1.2128514056224901e-05,
516
+ "loss": 0.5586,
517
+ "step": 4900
518
+ },
519
+ {
520
+ "epoch": 6.0,
521
+ "eval_action_metrics": {
522
+ "accuracy": 0.6359252561784208,
523
+ "f1": 0.4963144963144963,
524
+ "loss": 0.3993087410926819,
525
+ "roc_auc": 0.6909881952050626
526
+ },
527
+ "eval_cause_metrics": {
528
+ "accuracy": 0.6172393007836046,
529
+ "f1": 0.4432786885245902,
530
+ "loss": 0.5477549433708191,
531
+ "roc_auc": 0.6586722400792753
532
+ },
533
+ "eval_event_metrics": {
534
+ "accuracy": 0.6895720313441832,
535
+ "f1": 0.7254658385093167,
536
+ "loss": 0.775439441204071,
537
+ "roc_auc": 0.8210486138922756
538
+ },
539
+ "eval_f1": 0.628022957090326,
540
+ "eval_loss": 1.3330581188201904,
541
+ "eval_main_metrics": {
542
+ "accuracy": 0.6377335744424352,
543
+ "f1": 0.8470328050129008,
544
+ "loss": 1.7559667825698853,
545
+ "roc_auc": 0.8309945868949555
546
+ },
547
+ "eval_runtime": 19.4268,
548
+ "eval_samples_per_second": 85.398,
549
+ "eval_steps_per_second": 10.707,
550
+ "step": 4980
551
+ },
552
+ {
553
+ "epoch": 6.024096385542169,
554
+ "grad_norm": 4.994393825531006,
555
+ "learning_rate": 1.1967871485943776e-05,
556
+ "loss": 0.5326,
557
+ "step": 5000
558
+ },
559
+ {
560
+ "epoch": 6.144578313253012,
561
+ "grad_norm": 2.5982210636138916,
562
+ "learning_rate": 1.1807228915662651e-05,
563
+ "loss": 0.4935,
564
+ "step": 5100
565
+ },
566
+ {
567
+ "epoch": 6.265060240963855,
568
+ "grad_norm": 9.731000900268555,
569
+ "learning_rate": 1.1646586345381528e-05,
570
+ "loss": 0.4444,
571
+ "step": 5200
572
+ },
573
+ {
574
+ "epoch": 6.385542168674699,
575
+ "grad_norm": 3.476001501083374,
576
+ "learning_rate": 1.1485943775100403e-05,
577
+ "loss": 0.4772,
578
+ "step": 5300
579
+ },
580
+ {
581
+ "epoch": 6.506024096385542,
582
+ "grad_norm": 9.325688362121582,
583
+ "learning_rate": 1.132530120481928e-05,
584
+ "loss": 0.4758,
585
+ "step": 5400
586
+ },
587
+ {
588
+ "epoch": 6.626506024096385,
589
+ "grad_norm": 10.23697566986084,
590
+ "learning_rate": 1.1164658634538153e-05,
591
+ "loss": 0.467,
592
+ "step": 5500
593
+ },
594
+ {
595
+ "epoch": 6.746987951807229,
596
+ "grad_norm": 6.919719219207764,
597
+ "learning_rate": 1.1004016064257028e-05,
598
+ "loss": 0.4596,
599
+ "step": 5600
600
+ },
601
+ {
602
+ "epoch": 6.867469879518072,
603
+ "grad_norm": 3.161492347717285,
604
+ "learning_rate": 1.0843373493975904e-05,
605
+ "loss": 0.4901,
606
+ "step": 5700
607
+ },
608
+ {
609
+ "epoch": 6.9879518072289155,
610
+ "grad_norm": 5.633391857147217,
611
+ "learning_rate": 1.068273092369478e-05,
612
+ "loss": 0.5053,
613
+ "step": 5800
614
+ },
615
+ {
616
+ "epoch": 7.0,
617
+ "eval_action_metrics": {
618
+ "accuracy": 0.6347197106690777,
619
+ "f1": 0.4936014625228519,
620
+ "loss": 0.44190824031829834,
621
+ "roc_auc": 0.6908239016672751
622
+ },
623
+ "eval_cause_metrics": {
624
+ "accuracy": 0.6208559373116335,
625
+ "f1": 0.4570337364735837,
626
+ "loss": 0.5815909504890442,
627
+ "roc_auc": 0.6677868823597467
628
+ },
629
+ "eval_event_metrics": {
630
+ "accuracy": 0.6714888487040386,
631
+ "f1": 0.7207713941341904,
632
+ "loss": 0.7780100703239441,
633
+ "roc_auc": 0.8228283501014495
634
+ },
635
+ "eval_f1": 0.6299279587105135,
636
+ "eval_loss": 1.403409481048584,
637
+ "eval_main_metrics": {
638
+ "accuracy": 0.6443640747438216,
639
+ "f1": 0.8483052417114281,
640
+ "loss": 2.0327484607696533,
641
+ "roc_auc": 0.8334165115229015
642
+ },
643
+ "eval_runtime": 19.4509,
644
+ "eval_samples_per_second": 85.292,
645
+ "eval_steps_per_second": 10.694,
646
+ "step": 5810
647
+ },
648
+ {
649
+ "epoch": 7.108433734939759,
650
+ "grad_norm": 2.695713996887207,
651
+ "learning_rate": 1.0522088353413654e-05,
652
+ "loss": 0.4098,
653
+ "step": 5900
654
+ },
655
+ {
656
+ "epoch": 7.228915662650603,
657
+ "grad_norm": 3.300992727279663,
658
+ "learning_rate": 1.0361445783132531e-05,
659
+ "loss": 0.4406,
660
+ "step": 6000
661
+ },
662
+ {
663
+ "epoch": 7.349397590361446,
664
+ "grad_norm": 9.012105941772461,
665
+ "learning_rate": 1.0200803212851406e-05,
666
+ "loss": 0.3848,
667
+ "step": 6100
668
+ },
669
+ {
670
+ "epoch": 7.469879518072289,
671
+ "grad_norm": 1.8880501985549927,
672
+ "learning_rate": 1.0040160642570283e-05,
673
+ "loss": 0.4086,
674
+ "step": 6200
675
+ },
676
+ {
677
+ "epoch": 7.590361445783133,
678
+ "grad_norm": 6.892731666564941,
679
+ "learning_rate": 9.879518072289156e-06,
680
+ "loss": 0.4284,
681
+ "step": 6300
682
+ },
683
+ {
684
+ "epoch": 7.710843373493976,
685
+ "grad_norm": 7.901873588562012,
686
+ "learning_rate": 9.718875502008033e-06,
687
+ "loss": 0.45,
688
+ "step": 6400
689
+ },
690
+ {
691
+ "epoch": 7.831325301204819,
692
+ "grad_norm": 4.076874256134033,
693
+ "learning_rate": 9.558232931726908e-06,
694
+ "loss": 0.4319,
695
+ "step": 6500
696
+ },
697
+ {
698
+ "epoch": 7.951807228915663,
699
+ "grad_norm": 4.073435306549072,
700
+ "learning_rate": 9.397590361445785e-06,
701
+ "loss": 0.42,
702
+ "step": 6600
703
+ },
704
+ {
705
+ "epoch": 8.0,
706
+ "eval_action_metrics": {
707
+ "accuracy": 0.6395418927064497,
708
+ "f1": 0.5042735042735043,
709
+ "loss": 0.45456641912460327,
710
+ "roc_auc": 0.6954778710802808
711
+ },
712
+ "eval_cause_metrics": {
713
+ "accuracy": 0.6298975286317059,
714
+ "f1": 0.4623179227359088,
715
+ "loss": 0.5961005687713623,
716
+ "roc_auc": 0.6706782159043465
717
+ },
718
+ "eval_event_metrics": {
719
+ "accuracy": 0.6853526220614828,
720
+ "f1": 0.7223140495867769,
721
+ "loss": 0.8032154440879822,
722
+ "roc_auc": 0.8192863575314914
723
+ },
724
+ "eval_f1": 0.6344324161815224,
725
+ "eval_loss": 1.4469618797302246,
726
+ "eval_main_metrics": {
727
+ "accuracy": 0.646172393007836,
728
+ "f1": 0.8488241881298993,
729
+ "loss": 2.039750337600708,
730
+ "roc_auc": 0.835544105032868
731
+ },
732
+ "eval_runtime": 19.422,
733
+ "eval_samples_per_second": 85.419,
734
+ "eval_steps_per_second": 10.71,
735
+ "step": 6640
736
+ },
737
+ {
738
+ "epoch": 8.072289156626505,
739
+ "grad_norm": 2.453636407852173,
740
+ "learning_rate": 9.23694779116466e-06,
741
+ "loss": 0.3554,
742
+ "step": 6700
743
+ },
744
+ {
745
+ "epoch": 8.19277108433735,
746
+ "grad_norm": 3.2704885005950928,
747
+ "learning_rate": 9.076305220883535e-06,
748
+ "loss": 0.3916,
749
+ "step": 6800
750
+ },
751
+ {
752
+ "epoch": 8.313253012048193,
753
+ "grad_norm": 5.025237083435059,
754
+ "learning_rate": 8.91566265060241e-06,
755
+ "loss": 0.3619,
756
+ "step": 6900
757
+ },
758
+ {
759
+ "epoch": 8.433734939759036,
760
+ "grad_norm": 7.339418888092041,
761
+ "learning_rate": 8.755020080321286e-06,
762
+ "loss": 0.345,
763
+ "step": 7000
764
+ },
765
+ {
766
+ "epoch": 8.55421686746988,
767
+ "grad_norm": 11.077627182006836,
768
+ "learning_rate": 8.594377510040161e-06,
769
+ "loss": 0.3704,
770
+ "step": 7100
771
+ },
772
+ {
773
+ "epoch": 8.674698795180722,
774
+ "grad_norm": 4.094336986541748,
775
+ "learning_rate": 8.433734939759038e-06,
776
+ "loss": 0.4081,
777
+ "step": 7200
778
+ },
779
+ {
780
+ "epoch": 8.795180722891565,
781
+ "grad_norm": 2.9691665172576904,
782
+ "learning_rate": 8.273092369477911e-06,
783
+ "loss": 0.3692,
784
+ "step": 7300
785
+ },
786
+ {
787
+ "epoch": 8.91566265060241,
788
+ "grad_norm": 2.072866916656494,
789
+ "learning_rate": 8.112449799196788e-06,
790
+ "loss": 0.3552,
791
+ "step": 7400
792
+ },
793
+ {
794
+ "epoch": 9.0,
795
+ "eval_action_metrics": {
796
+ "accuracy": 0.6305003013863774,
797
+ "f1": 0.509478672985782,
798
+ "loss": 0.4554847180843353,
799
+ "roc_auc": 0.7022230335483347
800
+ },
801
+ "eval_cause_metrics": {
802
+ "accuracy": 0.6106088004822182,
803
+ "f1": 0.43440594059405946,
804
+ "loss": 0.6525366902351379,
805
+ "roc_auc": 0.6608236377080599
806
+ },
807
+ "eval_event_metrics": {
808
+ "accuracy": 0.6799276672694394,
809
+ "f1": 0.724025974025974,
810
+ "loss": 0.9302711486816406,
811
+ "roc_auc": 0.8233804035411074
812
+ },
813
+ "eval_f1": 0.6287379744042451,
814
+ "eval_loss": 1.5192142724990845,
815
+ "eval_main_metrics": {
816
+ "accuracy": 0.6449668474984931,
817
+ "f1": 0.8470413100111649,
818
+ "loss": 2.102555513381958,
819
+ "roc_auc": 0.8330059034412807
820
+ },
821
+ "eval_runtime": 19.4322,
822
+ "eval_samples_per_second": 85.374,
823
+ "eval_steps_per_second": 10.704,
824
+ "step": 7470
825
+ },
826
+ {
827
+ "epoch": 9.036144578313253,
828
+ "grad_norm": 6.187836170196533,
829
+ "learning_rate": 7.951807228915663e-06,
830
+ "loss": 0.3559,
831
+ "step": 7500
832
+ },
833
+ {
834
+ "epoch": 9.156626506024097,
835
+ "grad_norm": 9.382346153259277,
836
+ "learning_rate": 7.79116465863454e-06,
837
+ "loss": 0.3518,
838
+ "step": 7600
839
+ },
840
+ {
841
+ "epoch": 9.27710843373494,
842
+ "grad_norm": 3.6472980976104736,
843
+ "learning_rate": 7.630522088353415e-06,
844
+ "loss": 0.3199,
845
+ "step": 7700
846
+ },
847
+ {
848
+ "epoch": 9.397590361445783,
849
+ "grad_norm": 6.128402233123779,
850
+ "learning_rate": 7.469879518072289e-06,
851
+ "loss": 0.3212,
852
+ "step": 7800
853
+ },
854
+ {
855
+ "epoch": 9.518072289156626,
856
+ "grad_norm": 2.361755132675171,
857
+ "learning_rate": 7.309236947791165e-06,
858
+ "loss": 0.3245,
859
+ "step": 7900
860
+ },
861
+ {
862
+ "epoch": 9.638554216867469,
863
+ "grad_norm": 4.379687786102295,
864
+ "learning_rate": 7.148594377510041e-06,
865
+ "loss": 0.3294,
866
+ "step": 8000
867
+ },
868
+ {
869
+ "epoch": 9.759036144578314,
870
+ "grad_norm": 6.3661370277404785,
871
+ "learning_rate": 6.987951807228917e-06,
872
+ "loss": 0.3325,
873
+ "step": 8100
874
+ },
875
+ {
876
+ "epoch": 9.879518072289157,
877
+ "grad_norm": 2.1560158729553223,
878
+ "learning_rate": 6.8273092369477925e-06,
879
+ "loss": 0.3192,
880
+ "step": 8200
881
+ },
882
+ {
883
+ "epoch": 10.0,
884
+ "grad_norm": 4.878142356872559,
885
+ "learning_rate": 6.666666666666667e-06,
886
+ "loss": 0.3227,
887
+ "step": 8300
888
+ },
889
+ {
890
+ "epoch": 10.0,
891
+ "eval_action_metrics": {
892
+ "accuracy": 0.6317058468957203,
893
+ "f1": 0.5257076834199884,
894
+ "loss": 0.3950282335281372,
895
+ "roc_auc": 0.7138452801103403
896
+ },
897
+ "eval_cause_metrics": {
898
+ "accuracy": 0.6190476190476191,
899
+ "f1": 0.48321342925659466,
900
+ "loss": 0.6035521626472473,
901
+ "roc_auc": 0.6868496637340858
902
+ },
903
+ "eval_event_metrics": {
904
+ "accuracy": 0.6949969861362266,
905
+ "f1": 0.73347022587269,
906
+ "loss": 1.0751080513000488,
907
+ "roc_auc": 0.8277649690562431
908
+ },
909
+ "eval_f1": 0.6476335229111712,
910
+ "eval_loss": 1.5520377159118652,
911
+ "eval_main_metrics": {
912
+ "accuracy": 0.6353224834237492,
913
+ "f1": 0.8481427530954114,
914
+ "loss": 2.005683422088623,
915
+ "roc_auc": 0.8296800426683087
916
+ },
917
+ "eval_runtime": 19.4155,
918
+ "eval_samples_per_second": 85.447,
919
+ "eval_steps_per_second": 10.713,
920
+ "step": 8300
921
+ },
922
+ {
923
+ "epoch": 10.120481927710843,
924
+ "grad_norm": 4.528349876403809,
925
+ "learning_rate": 6.5060240963855425e-06,
926
+ "loss": 0.3223,
927
+ "step": 8400
928
+ },
929
+ {
930
+ "epoch": 10.240963855421686,
931
+ "grad_norm": 6.405284881591797,
932
+ "learning_rate": 6.345381526104418e-06,
933
+ "loss": 0.2805,
934
+ "step": 8500
935
+ },
936
+ {
937
+ "epoch": 10.36144578313253,
938
+ "grad_norm": 4.246714115142822,
939
+ "learning_rate": 6.184738955823294e-06,
940
+ "loss": 0.2912,
941
+ "step": 8600
942
+ },
943
+ {
944
+ "epoch": 10.481927710843374,
945
+ "grad_norm": 2.8840444087982178,
946
+ "learning_rate": 6.02409638554217e-06,
947
+ "loss": 0.303,
948
+ "step": 8700
949
+ },
950
+ {
951
+ "epoch": 10.602409638554217,
952
+ "grad_norm": 6.155231952667236,
953
+ "learning_rate": 5.863453815261044e-06,
954
+ "loss": 0.289,
955
+ "step": 8800
956
+ },
957
+ {
958
+ "epoch": 10.72289156626506,
959
+ "grad_norm": 5.590630531311035,
960
+ "learning_rate": 5.70281124497992e-06,
961
+ "loss": 0.2866,
962
+ "step": 8900
963
+ },
964
+ {
965
+ "epoch": 10.843373493975903,
966
+ "grad_norm": 2.1477112770080566,
967
+ "learning_rate": 5.542168674698796e-06,
968
+ "loss": 0.2908,
969
+ "step": 9000
970
+ },
971
+ {
972
+ "epoch": 10.963855421686747,
973
+ "grad_norm": 3.363445520401001,
974
+ "learning_rate": 5.381526104417672e-06,
975
+ "loss": 0.3135,
976
+ "step": 9100
977
+ },
978
+ {
979
+ "epoch": 11.0,
980
+ "eval_action_metrics": {
981
+ "accuracy": 0.6365280289330922,
982
+ "f1": 0.5067319461444308,
983
+ "loss": 0.4702241122722626,
984
+ "roc_auc": 0.6962618149365137
985
+ },
986
+ "eval_cause_metrics": {
987
+ "accuracy": 0.621458710066305,
988
+ "f1": 0.4755501222493888,
989
+ "loss": 0.6331600546836853,
990
+ "roc_auc": 0.680847681852707
991
+ },
992
+ "eval_event_metrics": {
993
+ "accuracy": 0.6943942133815552,
994
+ "f1": 0.73487616727568,
995
+ "loss": 0.8929408192634583,
996
+ "roc_auc": 0.8306706066392446
997
+ },
998
+ "eval_f1": 0.6414051572871415,
999
+ "eval_loss": 1.5805182456970215,
1000
+ "eval_main_metrics": {
1001
+ "accuracy": 0.6401446654611211,
1002
+ "f1": 0.8484623934790664,
1003
+ "loss": 2.2667479515075684,
1004
+ "roc_auc": 0.8336254542725883
1005
+ },
1006
+ "eval_runtime": 19.3897,
1007
+ "eval_samples_per_second": 85.561,
1008
+ "eval_steps_per_second": 10.727,
1009
+ "step": 9130
1010
+ },
1011
+ {
1012
+ "epoch": 11.08433734939759,
1013
+ "grad_norm": 3.677710771560669,
1014
+ "learning_rate": 5.220883534136547e-06,
1015
+ "loss": 0.2626,
1016
+ "step": 9200
1017
+ },
1018
+ {
1019
+ "epoch": 11.204819277108435,
1020
+ "grad_norm": 3.71104097366333,
1021
+ "learning_rate": 5.060240963855422e-06,
1022
+ "loss": 0.2811,
1023
+ "step": 9300
1024
+ },
1025
+ {
1026
+ "epoch": 11.325301204819278,
1027
+ "grad_norm": 4.658101558685303,
1028
+ "learning_rate": 4.899598393574298e-06,
1029
+ "loss": 0.2556,
1030
+ "step": 9400
1031
+ },
1032
+ {
1033
+ "epoch": 11.44578313253012,
1034
+ "grad_norm": 2.3544187545776367,
1035
+ "learning_rate": 4.7389558232931736e-06,
1036
+ "loss": 0.2675,
1037
+ "step": 9500
1038
+ },
1039
+ {
1040
+ "epoch": 11.566265060240964,
1041
+ "grad_norm": 2.9858105182647705,
1042
+ "learning_rate": 4.578313253012049e-06,
1043
+ "loss": 0.2601,
1044
+ "step": 9600
1045
+ },
1046
+ {
1047
+ "epoch": 11.686746987951807,
1048
+ "grad_norm": 5.517003059387207,
1049
+ "learning_rate": 4.4176706827309244e-06,
1050
+ "loss": 0.2706,
1051
+ "step": 9700
1052
+ },
1053
+ {
1054
+ "epoch": 11.80722891566265,
1055
+ "grad_norm": 5.479954242706299,
1056
+ "learning_rate": 4.2570281124497995e-06,
1057
+ "loss": 0.2678,
1058
+ "step": 9800
1059
+ },
1060
+ {
1061
+ "epoch": 11.927710843373493,
1062
+ "grad_norm": 3.5991973876953125,
1063
+ "learning_rate": 4.096385542168675e-06,
1064
+ "loss": 0.2555,
1065
+ "step": 9900
1066
+ },
1067
+ {
1068
+ "epoch": 12.0,
1069
+ "eval_action_metrics": {
1070
+ "accuracy": 0.6329113924050633,
1071
+ "f1": 0.5242030696576152,
1072
+ "loss": 0.41705745458602905,
1073
+ "roc_auc": 0.7097399699809339
1074
+ },
1075
+ "eval_cause_metrics": {
1076
+ "accuracy": 0.6262808921036769,
1077
+ "f1": 0.5017221584385763,
1078
+ "loss": 0.6140196919441223,
1079
+ "roc_auc": 0.7016332915830403
1080
+ },
1081
+ "eval_event_metrics": {
1082
+ "accuracy": 0.6871609403254972,
1083
+ "f1": 0.7303643724696356,
1084
+ "loss": 1.0149368047714233,
1085
+ "roc_auc": 0.8280933246892902
1086
+ },
1087
+ "eval_f1": 0.6511378774267244,
1088
+ "eval_loss": 1.6208207607269287,
1089
+ "eval_main_metrics": {
1090
+ "accuracy": 0.6395418927064497,
1091
+ "f1": 0.8482619091410705,
1092
+ "loss": 2.2304847240448,
1093
+ "roc_auc": 0.8319157462619947
1094
+ },
1095
+ "eval_runtime": 19.4554,
1096
+ "eval_samples_per_second": 85.272,
1097
+ "eval_steps_per_second": 10.691,
1098
+ "step": 9960
1099
+ },
1100
+ {
1101
+ "epoch": 12.048192771084338,
1102
+ "grad_norm": 4.944530010223389,
1103
+ "learning_rate": 3.93574297188755e-06,
1104
+ "loss": 0.2796,
1105
+ "step": 10000
1106
+ },
1107
+ {
1108
+ "epoch": 12.168674698795181,
1109
+ "grad_norm": 4.537522792816162,
1110
+ "learning_rate": 3.7751004016064258e-06,
1111
+ "loss": 0.2397,
1112
+ "step": 10100
1113
+ },
1114
+ {
1115
+ "epoch": 12.289156626506024,
1116
+ "grad_norm": 5.054214954376221,
1117
+ "learning_rate": 3.6144578313253016e-06,
1118
+ "loss": 0.2311,
1119
+ "step": 10200
1120
+ },
1121
+ {
1122
+ "epoch": 12.409638554216867,
1123
+ "grad_norm": 1.6431657075881958,
1124
+ "learning_rate": 3.453815261044177e-06,
1125
+ "loss": 0.2515,
1126
+ "step": 10300
1127
+ },
1128
+ {
1129
+ "epoch": 12.53012048192771,
1130
+ "grad_norm": 2.898925304412842,
1131
+ "learning_rate": 3.2931726907630525e-06,
1132
+ "loss": 0.2424,
1133
+ "step": 10400
1134
+ },
1135
+ {
1136
+ "epoch": 12.650602409638553,
1137
+ "grad_norm": 3.5978894233703613,
1138
+ "learning_rate": 3.132530120481928e-06,
1139
+ "loss": 0.2573,
1140
+ "step": 10500
1141
+ },
1142
+ {
1143
+ "epoch": 12.771084337349398,
1144
+ "grad_norm": 3.019956588745117,
1145
+ "learning_rate": 2.9718875502008034e-06,
1146
+ "loss": 0.2469,
1147
+ "step": 10600
1148
+ },
1149
+ {
1150
+ "epoch": 12.891566265060241,
1151
+ "grad_norm": 4.129029273986816,
1152
+ "learning_rate": 2.811244979919679e-06,
1153
+ "loss": 0.2377,
1154
+ "step": 10700
1155
+ },
1156
+ {
1157
+ "epoch": 13.0,
1158
+ "eval_action_metrics": {
1159
+ "accuracy": 0.6298975286317059,
1160
+ "f1": 0.5311438278595696,
1161
+ "loss": 0.40619078278541565,
1162
+ "roc_auc": 0.7197446351060808
1163
+ },
1164
+ "eval_cause_metrics": {
1165
+ "accuracy": 0.6220614828209765,
1166
+ "f1": 0.5011520737327189,
1167
+ "loss": 0.6362375617027283,
1168
+ "roc_auc": 0.7008556294988456
1169
+ },
1170
+ "eval_event_metrics": {
1171
+ "accuracy": 0.6841470765521398,
1172
+ "f1": 0.729192042224929,
1173
+ "loss": 1.0542970895767212,
1174
+ "roc_auc": 0.8268163000692555
1175
+ },
1176
+ "eval_f1": 0.6518611919550361,
1177
+ "eval_loss": 1.6575490236282349,
1178
+ "eval_main_metrics": {
1179
+ "accuracy": 0.6305003013863774,
1180
+ "f1": 0.8459568240029272,
1181
+ "loss": 2.175412654876709,
1182
+ "roc_auc": 0.8282710537312776
1183
+ },
1184
+ "eval_runtime": 19.3864,
1185
+ "eval_samples_per_second": 85.575,
1186
+ "eval_steps_per_second": 10.729,
1187
+ "step": 10790
1188
+ }
1189
+ ],
1190
+ "logging_steps": 100,
1191
+ "max_steps": 12450,
1192
+ "num_input_tokens_seen": 0,
1193
+ "num_train_epochs": 15,
1194
+ "save_steps": 500,
1195
+ "stateful_callbacks": {
1196
+ "TrainerControl": {
1197
+ "args": {
1198
+ "should_epoch_stop": false,
1199
+ "should_evaluate": false,
1200
+ "should_log": false,
1201
+ "should_save": true,
1202
+ "should_training_stop": false
1203
+ },
1204
+ "attributes": {}
1205
+ }
1206
+ },
1207
+ "total_flos": 7890557846663520.0,
1208
+ "train_batch_size": 8,
1209
+ "trial_name": null,
1210
+ "trial_params": null
1211
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e281bffd89136ee0dd827c8128960558474cc42ab286246ed0831eb92fb72fdd
3
+ size 5368
vocab.txt ADDED
The diff for this file is too large to render. See raw diff