Kithogue commited on
Commit
a9103ae
1 Parent(s): 2fd9761

Upload model files

Browse files
config.json ADDED
@@ -0,0 +1,264 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "roberta-base",
3
+ "architectures": [
4
+ "RobertaForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "id2label": {
14
+ "0": "acceptagreementcontractceasefire",
15
+ "1": "accidentcrash",
16
+ "2": "agreements",
17
+ "3": "airstrikemissilestrike",
18
+ "4": "arrestjaildetain",
19
+ "5": "artifact",
20
+ "6": "artifactexistence",
21
+ "7": "attack",
22
+ "8": "biologicalchemicalpoisonattack",
23
+ "9": "bombing",
24
+ "10": "borrowlend",
25
+ "11": "bringcarryunload",
26
+ "12": "broadcast",
27
+ "13": "build",
28
+ "14": "castvote",
29
+ "15": "chargeindict",
30
+ "16": "collaborate",
31
+ "17": "commandorder",
32
+ "18": "commitmentpromiseexpressintent",
33
+ "19": "conflict",
34
+ "20": "contact",
35
+ "21": "convict",
36
+ "22": "correspondence",
37
+ "23": "createintellectualproperty",
38
+ "24": "createmanufacture",
39
+ "25": "damage",
40
+ "26": "damagedestroy",
41
+ "27": "deathcausedbyviolentevents",
42
+ "28": "demonstrate",
43
+ "29": "destroy",
44
+ "30": "die",
45
+ "31": "disaster",
46
+ "32": "discussion",
47
+ "33": "disperseseparate",
48
+ "34": "elect",
49
+ "35": "embargosanction",
50
+ "36": "endposition",
51
+ "37": "evacuationrescue",
52
+ "38": "execute",
53
+ "39": "extradite",
54
+ "40": "fall",
55
+ "41": "firearmattack",
56
+ "42": "fireexplosion",
57
+ "43": "firinglayoff",
58
+ "44": "formation",
59
+ "45": "funeralvigil",
60
+ "46": "giftgrantprovideaid",
61
+ "47": "government",
62
+ "48": "grantentry",
63
+ "49": "grantentryasylum",
64
+ "50": "hanging",
65
+ "51": "hide",
66
+ "52": "hiring",
67
+ "53": "illnessdegradationhungerthirst",
68
+ "54": "illnessdegradationphysical",
69
+ "55": "initiatejudicialprocess",
70
+ "56": "injure",
71
+ "57": "injurycausedbyviolentevents",
72
+ "58": "inspection",
73
+ "59": "inspectpeopleorganization",
74
+ "60": "invade",
75
+ "61": "investigate",
76
+ "62": "investigatecrime",
77
+ "63": "judicialconsequences",
78
+ "64": "justice",
79
+ "65": "legislate",
80
+ "66": "life",
81
+ "67": "manufacture",
82
+ "68": "marchprotestpoliticalgathering",
83
+ "69": "mediastatement",
84
+ "70": "meet",
85
+ "71": "mergegpe",
86
+ "72": "monitorelection",
87
+ "73": "movement",
88
+ "74": "negotiate",
89
+ "75": "nonviolentdeath",
90
+ "76": "nonviolentthrowlaunch",
91
+ "77": "payforservice",
92
+ "78": "personnel",
93
+ "79": "physicalinvestigateinspect",
94
+ "80": "prevarication",
95
+ "81": "prevententry",
96
+ "82": "preventexit",
97
+ "83": "publicstatementinperson",
98
+ "84": "purchase",
99
+ "85": "quitretire",
100
+ "86": "receiveimport",
101
+ "87": "rejectnullifyagreementcontractceasefire",
102
+ "88": "requestadvise",
103
+ "89": "retreat",
104
+ "90": "selfdirectedbattle",
105
+ "91": "selfmotion",
106
+ "92": "sendsupplyexport",
107
+ "93": "sensoryobserve",
108
+ "94": "setfire",
109
+ "95": "smuggleextract",
110
+ "96": "spy",
111
+ "97": "stabbing",
112
+ "98": "startgpe",
113
+ "99": "startposition",
114
+ "100": "stealrobhijack",
115
+ "101": "strangling",
116
+ "102": "surrender",
117
+ "103": "threatencoerce",
118
+ "104": "transaction",
119
+ "105": "transfercontrol",
120
+ "106": "transfermoney",
121
+ "107": "transferownership",
122
+ "108": "transportartifact",
123
+ "109": "transportperson",
124
+ "110": "trialhearing",
125
+ "111": "violateagreement",
126
+ "112": "violationspreventvote",
127
+ "113": "vote",
128
+ "114": "winelection",
129
+ "115": "yield"
130
+ },
131
+ "initializer_range": 0.02,
132
+ "intermediate_size": 3072,
133
+ "label2id": {
134
+ "acceptagreementcontractceasefire": 0,
135
+ "accidentcrash": 1,
136
+ "agreements": 2,
137
+ "airstrikemissilestrike": 3,
138
+ "arrestjaildetain": 4,
139
+ "artifact": 5,
140
+ "artifactexistence": 6,
141
+ "attack": 7,
142
+ "biologicalchemicalpoisonattack": 8,
143
+ "bombing": 9,
144
+ "borrowlend": 10,
145
+ "bringcarryunload": 11,
146
+ "broadcast": 12,
147
+ "build": 13,
148
+ "castvote": 14,
149
+ "chargeindict": 15,
150
+ "collaborate": 16,
151
+ "commandorder": 17,
152
+ "commitmentpromiseexpressintent": 18,
153
+ "conflict": 19,
154
+ "contact": 20,
155
+ "convict": 21,
156
+ "correspondence": 22,
157
+ "createintellectualproperty": 23,
158
+ "createmanufacture": 24,
159
+ "damage": 25,
160
+ "damagedestroy": 26,
161
+ "deathcausedbyviolentevents": 27,
162
+ "demonstrate": 28,
163
+ "destroy": 29,
164
+ "die": 30,
165
+ "disaster": 31,
166
+ "discussion": 32,
167
+ "disperseseparate": 33,
168
+ "elect": 34,
169
+ "embargosanction": 35,
170
+ "endposition": 36,
171
+ "evacuationrescue": 37,
172
+ "execute": 38,
173
+ "extradite": 39,
174
+ "fall": 40,
175
+ "firearmattack": 41,
176
+ "fireexplosion": 42,
177
+ "firinglayoff": 43,
178
+ "formation": 44,
179
+ "funeralvigil": 45,
180
+ "giftgrantprovideaid": 46,
181
+ "government": 47,
182
+ "grantentry": 48,
183
+ "grantentryasylum": 49,
184
+ "hanging": 50,
185
+ "hide": 51,
186
+ "hiring": 52,
187
+ "illnessdegradationhungerthirst": 53,
188
+ "illnessdegradationphysical": 54,
189
+ "initiatejudicialprocess": 55,
190
+ "injure": 56,
191
+ "injurycausedbyviolentevents": 57,
192
+ "inspection": 58,
193
+ "inspectpeopleorganization": 59,
194
+ "invade": 60,
195
+ "investigate": 61,
196
+ "investigatecrime": 62,
197
+ "judicialconsequences": 63,
198
+ "justice": 64,
199
+ "legislate": 65,
200
+ "life": 66,
201
+ "manufacture": 67,
202
+ "marchprotestpoliticalgathering": 68,
203
+ "mediastatement": 69,
204
+ "meet": 70,
205
+ "mergegpe": 71,
206
+ "monitorelection": 72,
207
+ "movement": 73,
208
+ "negotiate": 74,
209
+ "nonviolentdeath": 75,
210
+ "nonviolentthrowlaunch": 76,
211
+ "payforservice": 77,
212
+ "personnel": 78,
213
+ "physicalinvestigateinspect": 79,
214
+ "prevarication": 80,
215
+ "prevententry": 81,
216
+ "preventexit": 82,
217
+ "publicstatementinperson": 83,
218
+ "purchase": 84,
219
+ "quitretire": 85,
220
+ "receiveimport": 86,
221
+ "rejectnullifyagreementcontractceasefire": 87,
222
+ "requestadvise": 88,
223
+ "retreat": 89,
224
+ "selfdirectedbattle": 90,
225
+ "selfmotion": 91,
226
+ "sendsupplyexport": 92,
227
+ "sensoryobserve": 93,
228
+ "setfire": 94,
229
+ "smuggleextract": 95,
230
+ "spy": 96,
231
+ "stabbing": 97,
232
+ "startgpe": 98,
233
+ "startposition": 99,
234
+ "stealrobhijack": 100,
235
+ "strangling": 101,
236
+ "surrender": 102,
237
+ "threatencoerce": 103,
238
+ "transaction": 104,
239
+ "transfercontrol": 105,
240
+ "transfermoney": 106,
241
+ "transferownership": 107,
242
+ "transportartifact": 108,
243
+ "transportperson": 109,
244
+ "trialhearing": 110,
245
+ "violateagreement": 111,
246
+ "violationspreventvote": 112,
247
+ "vote": 113,
248
+ "winelection": 114,
249
+ "yield": 115
250
+ },
251
+ "layer_norm_eps": 1e-05,
252
+ "max_position_embeddings": 514,
253
+ "model_type": "roberta",
254
+ "num_attention_heads": 12,
255
+ "num_hidden_layers": 12,
256
+ "pad_token_id": 1,
257
+ "position_embedding_type": "absolute",
258
+ "problem_type": "multi_label_classification",
259
+ "torch_dtype": "float32",
260
+ "transformers_version": "4.30.0",
261
+ "type_vocab_size": 1,
262
+ "use_cache": true,
263
+ "vocab_size": 50265
264
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f581d2baf77a71ebf12a5010656f02dd5bb17fde4a1df174299a1519171a6782
3
+ size 685989061
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b23d8d05114cd040f4b2e354f6cf088e22869aa882b6ec70485d648ed9f4a7bd
3
+ size 499012725
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5222e75d734bccdd7c107bea8376baa60ac7774c6f42faf740863489eab72287
3
+ size 14575
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e73a459b3df7488e4cc14fe1f2f5d67538056e0c66f5fa32c217682302d83949
3
+ size 627
special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<s>",
4
+ "clean_up_tokenization_spaces": true,
5
+ "cls_token": "<s>",
6
+ "eos_token": "</s>",
7
+ "errors": "replace",
8
+ "mask_token": "<mask>",
9
+ "model_max_length": 512,
10
+ "pad_token": "<pad>",
11
+ "sep_token": "</s>",
12
+ "tokenizer_class": "RobertaTokenizer",
13
+ "trim_offsets": true,
14
+ "unk_token": "<unk>"
15
+ }
trainer_state.json ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8082437275985663,
3
+ "best_model_checkpoint": "roberta_finetuned_hier/checkpoint-9170",
4
+ "epoch": 10.0,
5
+ "global_step": 9170,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.55,
12
+ "learning_rate": 1.8909487459105783e-05,
13
+ "loss": 0.1685,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 1.0,
18
+ "eval_accuracy": 0.0,
19
+ "eval_f1": 0.0,
20
+ "eval_loss": 0.0928020253777504,
21
+ "eval_roc_auc": 0.5,
22
+ "eval_runtime": 17.532,
23
+ "eval_samples_per_second": 52.704,
24
+ "eval_steps_per_second": 6.616,
25
+ "step": 917
26
+ },
27
+ {
28
+ "epoch": 1.09,
29
+ "learning_rate": 1.781897491821156e-05,
30
+ "loss": 0.094,
31
+ "step": 1000
32
+ },
33
+ {
34
+ "epoch": 1.64,
35
+ "learning_rate": 1.6728462377317342e-05,
36
+ "loss": 0.093,
37
+ "step": 1500
38
+ },
39
+ {
40
+ "epoch": 2.0,
41
+ "eval_accuracy": 0.0021645021645021645,
42
+ "eval_f1": 0.2291442892946652,
43
+ "eval_loss": 0.07939616590738297,
44
+ "eval_roc_auc": 0.5650002587190003,
45
+ "eval_runtime": 17.4376,
46
+ "eval_samples_per_second": 52.989,
47
+ "eval_steps_per_second": 6.652,
48
+ "step": 1834
49
+ },
50
+ {
51
+ "epoch": 2.18,
52
+ "learning_rate": 1.563794983642312e-05,
53
+ "loss": 0.0845,
54
+ "step": 2000
55
+ },
56
+ {
57
+ "epoch": 2.73,
58
+ "learning_rate": 1.45474372955289e-05,
59
+ "loss": 0.0637,
60
+ "step": 2500
61
+ },
62
+ {
63
+ "epoch": 3.0,
64
+ "eval_accuracy": 0.10064935064935066,
65
+ "eval_f1": 0.6067974578612877,
66
+ "eval_loss": 0.048662275075912476,
67
+ "eval_roc_auc": 0.7229654750415772,
68
+ "eval_runtime": 17.4089,
69
+ "eval_samples_per_second": 53.076,
70
+ "eval_steps_per_second": 6.663,
71
+ "step": 2751
72
+ },
73
+ {
74
+ "epoch": 3.27,
75
+ "learning_rate": 1.3456924754634679e-05,
76
+ "loss": 0.0511,
77
+ "step": 3000
78
+ },
79
+ {
80
+ "epoch": 3.82,
81
+ "learning_rate": 1.236641221374046e-05,
82
+ "loss": 0.0429,
83
+ "step": 3500
84
+ },
85
+ {
86
+ "epoch": 4.0,
87
+ "eval_accuracy": 0.16017316017316016,
88
+ "eval_f1": 0.7002300025555839,
89
+ "eval_loss": 0.03621600940823555,
90
+ "eval_roc_auc": 0.7781674734201419,
91
+ "eval_runtime": 17.2108,
92
+ "eval_samples_per_second": 53.687,
93
+ "eval_steps_per_second": 6.74,
94
+ "step": 3668
95
+ },
96
+ {
97
+ "epoch": 4.36,
98
+ "learning_rate": 1.1275899672846238e-05,
99
+ "loss": 0.0374,
100
+ "step": 4000
101
+ },
102
+ {
103
+ "epoch": 4.91,
104
+ "learning_rate": 1.0185387131952018e-05,
105
+ "loss": 0.0334,
106
+ "step": 4500
107
+ },
108
+ {
109
+ "epoch": 5.0,
110
+ "eval_accuracy": 0.2196969696969697,
111
+ "eval_f1": 0.7534612581977169,
112
+ "eval_loss": 0.029505521059036255,
113
+ "eval_roc_auc": 0.8148612406998286,
114
+ "eval_runtime": 17.208,
115
+ "eval_samples_per_second": 53.696,
116
+ "eval_steps_per_second": 6.741,
117
+ "step": 4585
118
+ },
119
+ {
120
+ "epoch": 5.45,
121
+ "learning_rate": 9.094874591057799e-06,
122
+ "loss": 0.0305,
123
+ "step": 5000
124
+ },
125
+ {
126
+ "epoch": 6.0,
127
+ "learning_rate": 8.004362050163578e-06,
128
+ "loss": 0.0282,
129
+ "step": 5500
130
+ },
131
+ {
132
+ "epoch": 6.0,
133
+ "eval_accuracy": 0.23376623376623376,
134
+ "eval_f1": 0.7756258856872934,
135
+ "eval_loss": 0.02626137062907219,
136
+ "eval_roc_auc": 0.8332405627512013,
137
+ "eval_runtime": 17.2218,
138
+ "eval_samples_per_second": 53.653,
139
+ "eval_steps_per_second": 6.736,
140
+ "step": 5502
141
+ },
142
+ {
143
+ "epoch": 6.54,
144
+ "learning_rate": 6.913849509269357e-06,
145
+ "loss": 0.0267,
146
+ "step": 6000
147
+ },
148
+ {
149
+ "epoch": 7.0,
150
+ "eval_accuracy": 0.2694805194805195,
151
+ "eval_f1": 0.7864801864801864,
152
+ "eval_loss": 0.023826098069548607,
153
+ "eval_roc_auc": 0.8423381052507725,
154
+ "eval_runtime": 17.4138,
155
+ "eval_samples_per_second": 53.061,
156
+ "eval_steps_per_second": 6.661,
157
+ "step": 6419
158
+ },
159
+ {
160
+ "epoch": 7.09,
161
+ "learning_rate": 5.823336968375137e-06,
162
+ "loss": 0.0252,
163
+ "step": 6500
164
+ },
165
+ {
166
+ "epoch": 7.63,
167
+ "learning_rate": 4.732824427480917e-06,
168
+ "loss": 0.0241,
169
+ "step": 7000
170
+ },
171
+ {
172
+ "epoch": 8.0,
173
+ "eval_accuracy": 0.2922077922077922,
174
+ "eval_f1": 0.7973497829563628,
175
+ "eval_loss": 0.02243439108133316,
176
+ "eval_roc_auc": 0.8539930593410131,
177
+ "eval_runtime": 17.2304,
178
+ "eval_samples_per_second": 53.626,
179
+ "eval_steps_per_second": 6.732,
180
+ "step": 7336
181
+ },
182
+ {
183
+ "epoch": 8.18,
184
+ "learning_rate": 3.6423118865866965e-06,
185
+ "loss": 0.0235,
186
+ "step": 7500
187
+ },
188
+ {
189
+ "epoch": 8.72,
190
+ "learning_rate": 2.5517993456924756e-06,
191
+ "loss": 0.0224,
192
+ "step": 8000
193
+ },
194
+ {
195
+ "epoch": 9.0,
196
+ "eval_accuracy": 0.28679653679653677,
197
+ "eval_f1": 0.7981900452488687,
198
+ "eval_loss": 0.021712390705943108,
199
+ "eval_roc_auc": 0.8577418326100689,
200
+ "eval_runtime": 18.0929,
201
+ "eval_samples_per_second": 51.07,
202
+ "eval_steps_per_second": 6.411,
203
+ "step": 8253
204
+ },
205
+ {
206
+ "epoch": 9.27,
207
+ "learning_rate": 1.4612868047982554e-06,
208
+ "loss": 0.0223,
209
+ "step": 8500
210
+ },
211
+ {
212
+ "epoch": 9.81,
213
+ "learning_rate": 3.7077426390403497e-07,
214
+ "loss": 0.0218,
215
+ "step": 9000
216
+ },
217
+ {
218
+ "epoch": 10.0,
219
+ "eval_accuracy": 0.3106060606060606,
220
+ "eval_f1": 0.8082437275985663,
221
+ "eval_loss": 0.021442167460918427,
222
+ "eval_roc_auc": 0.8658561225293185,
223
+ "eval_runtime": 17.28,
224
+ "eval_samples_per_second": 53.472,
225
+ "eval_steps_per_second": 6.713,
226
+ "step": 9170
227
+ }
228
+ ],
229
+ "max_steps": 9170,
230
+ "num_train_epochs": 10,
231
+ "total_flos": 1.930314697371648e+16,
232
+ "trial_name": null,
233
+ "trial_params": null
234
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b067917048e0f73b7e123db97e5b3b3f3cf9ce46b7963373d25007ee56bc1a94
3
+ size 3899
vocab.json ADDED
The diff for this file is too large to render. See raw diff