syke9p3 commited on
Commit
24a35d9
1 Parent(s): 9e17387

Upload POS Model

Browse files
added_tokens.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "[PMC]": 52101,
3
+ "[PMP]": 52099,
4
+ "[PMS]": 52100
5
+ }
config.json ADDED
@@ -0,0 +1,462 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gklmip/bert-tagalog-base-uncased",
3
+ "architectures": [
4
+ "BertForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "directionality": "bidi",
9
+ "gradient_checkpointing": false,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "id2label": {
14
+ "0": "[PAD]",
15
+ "1": "NNC",
16
+ "2": "NNP",
17
+ "3": "NNPA",
18
+ "4": "NNCA",
19
+ "5": "PR",
20
+ "6": "PRS",
21
+ "7": "PRP",
22
+ "8": "PRSP",
23
+ "9": "PRO",
24
+ "10": "PRQ",
25
+ "11": "PRQP",
26
+ "12": "PRL",
27
+ "13": "PRC",
28
+ "14": "PRF",
29
+ "15": "PRI",
30
+ "16": "DT",
31
+ "17": "DTC",
32
+ "18": "DTP",
33
+ "19": "DTPP",
34
+ "20": "LM",
35
+ "21": "CC",
36
+ "22": "CCT",
37
+ "23": "CCR",
38
+ "24": "CCB",
39
+ "25": "CCA",
40
+ "26": "PM",
41
+ "27": "PMP",
42
+ "28": "PME",
43
+ "29": "PMQ",
44
+ "30": "PMC",
45
+ "31": "PMSC",
46
+ "32": "PMS",
47
+ "33": "VB",
48
+ "34": "VBW",
49
+ "35": "VBS",
50
+ "36": "VBN",
51
+ "37": "VBTS",
52
+ "38": "VBTR",
53
+ "39": "VBTF",
54
+ "40": "VBTP",
55
+ "41": "VBAF",
56
+ "42": "VBOF",
57
+ "43": "VBOB",
58
+ "44": "VBOL",
59
+ "45": "VBOI",
60
+ "46": "VBRF",
61
+ "47": "JJ",
62
+ "48": "JJD",
63
+ "49": "JJC",
64
+ "50": "JJCC",
65
+ "51": "JJCS",
66
+ "52": "JJCN",
67
+ "53": "JJCF",
68
+ "54": "JJCB",
69
+ "55": "JJT",
70
+ "56": "RB",
71
+ "57": "RBD",
72
+ "58": "RBN",
73
+ "59": "RBK",
74
+ "60": "RBP",
75
+ "61": "RBB",
76
+ "62": "RBR",
77
+ "63": "RBQ",
78
+ "64": "RBT",
79
+ "65": "RBF",
80
+ "66": "RBW",
81
+ "67": "RBM",
82
+ "68": "RBL",
83
+ "69": "RBI",
84
+ "70": "RBS",
85
+ "71": "RBJ",
86
+ "72": "RBY",
87
+ "73": "RBLI",
88
+ "74": "TS",
89
+ "75": "FW",
90
+ "76": "CD",
91
+ "77": "CCB_CCP",
92
+ "78": "CCR_CCA",
93
+ "79": "CCR_CCB",
94
+ "80": "CCR_CCP",
95
+ "81": "CCR_LM",
96
+ "82": "CCT_CCA",
97
+ "83": "CCT_CCP",
98
+ "84": "CCT_LM",
99
+ "85": "CCU_DTP",
100
+ "86": "CDB_CCA",
101
+ "87": "CDB_CCP",
102
+ "88": "CDB_LM",
103
+ "89": "CDB_NNC",
104
+ "90": "CDB_NNC_CCP",
105
+ "91": "JJCC_CCP",
106
+ "92": "JJCC_JJD",
107
+ "93": "JJCN_CCP",
108
+ "94": "JJCN_LM",
109
+ "95": "JJCS_CCB",
110
+ "96": "JJCS_CCP",
111
+ "97": "JJCS_JJC",
112
+ "98": "JJCS_JJC_CCP",
113
+ "99": "JJCS_JJD",
114
+ "100": "[UNK]",
115
+ "101": "[CLS]",
116
+ "102": "[SEP]",
117
+ "103": "JJCS_JJN",
118
+ "104": "JJCS_JJN_CCP",
119
+ "105": "JJCS_RBF",
120
+ "106": "JJCS_VBAF",
121
+ "107": "JJCS_VBAF_CCP",
122
+ "108": "JJCS_VBN_CCP",
123
+ "109": "JJCS_VBOF",
124
+ "110": "JJCS_VBOF_CCP",
125
+ "111": "JJCS_VBN",
126
+ "112": "RBQ_CCP",
127
+ "113": "JJC_CCB",
128
+ "114": "JJC_CCP",
129
+ "115": "JJC_PRL",
130
+ "116": "JJD_CCA",
131
+ "117": "JJD_CCB",
132
+ "118": "JJD_CCP",
133
+ "119": "JJD_CCT",
134
+ "120": "JJD_NNC",
135
+ "121": "JJD_NNP",
136
+ "122": "JJN_CCA",
137
+ "123": "JJN_CCB",
138
+ "124": "JJN_CCP",
139
+ "125": "JJN_NNC",
140
+ "126": "JJN_NNC_CCP",
141
+ "127": "JJD_NNC_CCP",
142
+ "128": "NNC_CCA",
143
+ "129": "NNC_CCB",
144
+ "130": "NNC_CCP",
145
+ "131": "NNC_NNC_CCP",
146
+ "132": "NN",
147
+ "133": "JJN",
148
+ "134": "NNP_CCA",
149
+ "135": "NNP_CCP",
150
+ "136": "NNP_NNP",
151
+ "137": "PRC_CCB",
152
+ "138": "PRC_CCP",
153
+ "139": "PRF_CCP",
154
+ "140": "PRQ_CCP",
155
+ "141": "PRQ_LM",
156
+ "142": "PRS_CCB",
157
+ "143": "PRS_CCP",
158
+ "144": "PRSP_CCP",
159
+ "145": "PRSP_CCP_NNP",
160
+ "146": "PRL_CCP",
161
+ "147": "PRL_LM",
162
+ "148": "PRO_CCB",
163
+ "149": "PRO_CCP",
164
+ "150": "VBS_CCP",
165
+ "151": "VBTR_CCP",
166
+ "152": "VBTS_CCA",
167
+ "153": "VBTS_CCP",
168
+ "154": "VBTS_JJD",
169
+ "155": "VBTS_LM",
170
+ "156": "VBAF_CCP",
171
+ "157": "VBOB_CCP",
172
+ "158": "VBOF_CCP",
173
+ "159": "VBOF_CCP_NNP",
174
+ "160": "VBRF_CCP",
175
+ "161": "CCP",
176
+ "162": "CDB",
177
+ "163": "RBW_CCP",
178
+ "164": "RBD_CCP",
179
+ "165": "DTCP",
180
+ "166": "VBH",
181
+ "167": "VBTS_VBOF",
182
+ "168": "PRI_CCP",
183
+ "169": "VBTR_VBAF_CCP",
184
+ "170": "DQL",
185
+ "171": "DQR",
186
+ "172": "RBT_CCP",
187
+ "173": "VBW_CCP",
188
+ "174": "RBI_CCP",
189
+ "175": "VBN_CCP",
190
+ "176": "VBTR_VBAF",
191
+ "177": "VBTF_CCP",
192
+ "178": "JJCS_JJD_NNC",
193
+ "179": "CCU",
194
+ "180": "RBL_CCP",
195
+ "181": "VBTR_VBRF_CCP",
196
+ "182": "PRP_CCP",
197
+ "183": "VBTR_VBRF",
198
+ "184": "VBH_CCP",
199
+ "185": "VBTS_VBAF",
200
+ "186": "VBTF_VBOF",
201
+ "187": "VBTR_VBOF",
202
+ "188": "VBTF_VBAF",
203
+ "189": "JJCS_JJD_CCB",
204
+ "190": "JJCS_JJD_CCP",
205
+ "191": "RBM_CCP",
206
+ "192": "NNCS",
207
+ "193": "PRI_CCB",
208
+ "194": "NNA",
209
+ "195": "VBTR_VBOB",
210
+ "196": "DC",
211
+ "197": "JJD_CP",
212
+ "198": "NC",
213
+ "199": "NC_CCP",
214
+ "200": "VBO",
215
+ "201": "JJD_CC",
216
+ "202": "VBF",
217
+ "203": "CP",
218
+ "204": "NP",
219
+ "205": "N",
220
+ "206": "F",
221
+ "207": "CT",
222
+ "208": "MS",
223
+ "209": "BTF",
224
+ "210": "CA",
225
+ "211": "VBOF_RBR",
226
+ "212": "DP"
227
+ },
228
+ "initializer_range": 0.02,
229
+ "intermediate_size": 3072,
230
+ "label2id": {
231
+ "BTF": 209,
232
+ "CA": 210,
233
+ "CC": 21,
234
+ "CCA": 25,
235
+ "CCB": 24,
236
+ "CCB_CCP": 77,
237
+ "CCP": 161,
238
+ "CCR": 23,
239
+ "CCR_CCA": 78,
240
+ "CCR_CCB": 79,
241
+ "CCR_CCP": 80,
242
+ "CCR_LM": 81,
243
+ "CCT": 22,
244
+ "CCT_CCA": 82,
245
+ "CCT_CCP": 83,
246
+ "CCT_LM": 84,
247
+ "CCU": 179,
248
+ "CCU_DTP": 85,
249
+ "CD": 76,
250
+ "CDB": 162,
251
+ "CDB_CCA": 86,
252
+ "CDB_CCP": 87,
253
+ "CDB_LM": 88,
254
+ "CDB_NNC": 89,
255
+ "CDB_NNC_CCP": 90,
256
+ "CP": 203,
257
+ "CT": 207,
258
+ "DC": 196,
259
+ "DP": 212,
260
+ "DQL": 170,
261
+ "DQR": 171,
262
+ "DT": 16,
263
+ "DTC": 17,
264
+ "DTCP": 165,
265
+ "DTP": 18,
266
+ "DTPP": 19,
267
+ "F": 206,
268
+ "FW": 75,
269
+ "JJ": 47,
270
+ "JJC": 49,
271
+ "JJCB": 54,
272
+ "JJCC": 50,
273
+ "JJCC_CCP": 91,
274
+ "JJCC_JJD": 92,
275
+ "JJCF": 53,
276
+ "JJCN": 52,
277
+ "JJCN_CCP": 93,
278
+ "JJCN_LM": 94,
279
+ "JJCS": 51,
280
+ "JJCS_CCB": 95,
281
+ "JJCS_CCP": 96,
282
+ "JJCS_JJC": 97,
283
+ "JJCS_JJC_CCP": 98,
284
+ "JJCS_JJD": 99,
285
+ "JJCS_JJD_CCB": 189,
286
+ "JJCS_JJD_CCP": 190,
287
+ "JJCS_JJD_NNC": 178,
288
+ "JJCS_JJN": 103,
289
+ "JJCS_JJN_CCP": 104,
290
+ "JJCS_RBF": 105,
291
+ "JJCS_VBAF": 106,
292
+ "JJCS_VBAF_CCP": 107,
293
+ "JJCS_VBN": 111,
294
+ "JJCS_VBN_CCP": 108,
295
+ "JJCS_VBOF": 109,
296
+ "JJCS_VBOF_CCP": 110,
297
+ "JJC_CCB": 113,
298
+ "JJC_CCP": 114,
299
+ "JJC_PRL": 115,
300
+ "JJD": 48,
301
+ "JJD_CC": 201,
302
+ "JJD_CCA": 116,
303
+ "JJD_CCB": 117,
304
+ "JJD_CCP": 118,
305
+ "JJD_CCT": 119,
306
+ "JJD_CP": 197,
307
+ "JJD_NNC": 120,
308
+ "JJD_NNC_CCP": 127,
309
+ "JJD_NNP": 121,
310
+ "JJN": 133,
311
+ "JJN_CCA": 122,
312
+ "JJN_CCB": 123,
313
+ "JJN_CCP": 124,
314
+ "JJN_NNC": 125,
315
+ "JJN_NNC_CCP": 126,
316
+ "JJT": 55,
317
+ "LM": 20,
318
+ "MS": 208,
319
+ "N": 205,
320
+ "NC": 198,
321
+ "NC_CCP": 199,
322
+ "NN": 132,
323
+ "NNA": 194,
324
+ "NNC": 1,
325
+ "NNCA": 4,
326
+ "NNCS": 192,
327
+ "NNC_CCA": 128,
328
+ "NNC_CCB": 129,
329
+ "NNC_CCP": 130,
330
+ "NNC_NNC_CCP": 131,
331
+ "NNP": 2,
332
+ "NNPA": 3,
333
+ "NNP_CCA": 134,
334
+ "NNP_CCP": 135,
335
+ "NNP_NNP": 136,
336
+ "NP": 204,
337
+ "PM": 26,
338
+ "PMC": 30,
339
+ "PME": 28,
340
+ "PMP": 27,
341
+ "PMQ": 29,
342
+ "PMS": 32,
343
+ "PMSC": 31,
344
+ "PR": 5,
345
+ "PRC": 13,
346
+ "PRC_CCB": 137,
347
+ "PRC_CCP": 138,
348
+ "PRF": 14,
349
+ "PRF_CCP": 139,
350
+ "PRI": 15,
351
+ "PRI_CCB": 193,
352
+ "PRI_CCP": 168,
353
+ "PRL": 12,
354
+ "PRL_CCP": 146,
355
+ "PRL_LM": 147,
356
+ "PRO": 9,
357
+ "PRO_CCB": 148,
358
+ "PRO_CCP": 149,
359
+ "PRP": 7,
360
+ "PRP_CCP": 182,
361
+ "PRQ": 10,
362
+ "PRQP": 11,
363
+ "PRQ_CCP": 140,
364
+ "PRQ_LM": 141,
365
+ "PRS": 6,
366
+ "PRSP": 8,
367
+ "PRSP_CCP": 144,
368
+ "PRSP_CCP_NNP": 145,
369
+ "PRS_CCB": 142,
370
+ "PRS_CCP": 143,
371
+ "RB": 56,
372
+ "RBB": 61,
373
+ "RBD": 57,
374
+ "RBD_CCP": 164,
375
+ "RBF": 65,
376
+ "RBI": 69,
377
+ "RBI_CCP": 174,
378
+ "RBJ": 71,
379
+ "RBK": 59,
380
+ "RBL": 68,
381
+ "RBLI": 73,
382
+ "RBL_CCP": 180,
383
+ "RBM": 67,
384
+ "RBM_CCP": 191,
385
+ "RBN": 58,
386
+ "RBP": 60,
387
+ "RBQ": 63,
388
+ "RBQ_CCP": 112,
389
+ "RBR": 62,
390
+ "RBS": 70,
391
+ "RBT": 64,
392
+ "RBT_CCP": 172,
393
+ "RBW": 66,
394
+ "RBW_CCP": 163,
395
+ "RBY": 72,
396
+ "TS": 74,
397
+ "VB": 33,
398
+ "VBAF": 41,
399
+ "VBAF_CCP": 156,
400
+ "VBF": 202,
401
+ "VBH": 166,
402
+ "VBH_CCP": 184,
403
+ "VBN": 36,
404
+ "VBN_CCP": 175,
405
+ "VBO": 200,
406
+ "VBOB": 43,
407
+ "VBOB_CCP": 157,
408
+ "VBOF": 42,
409
+ "VBOF_CCP": 158,
410
+ "VBOF_CCP_NNP": 159,
411
+ "VBOF_RBR": 211,
412
+ "VBOI": 45,
413
+ "VBOL": 44,
414
+ "VBRF": 46,
415
+ "VBRF_CCP": 160,
416
+ "VBS": 35,
417
+ "VBS_CCP": 150,
418
+ "VBTF": 39,
419
+ "VBTF_CCP": 177,
420
+ "VBTF_VBAF": 188,
421
+ "VBTF_VBOF": 186,
422
+ "VBTP": 40,
423
+ "VBTR": 38,
424
+ "VBTR_CCP": 151,
425
+ "VBTR_VBAF": 176,
426
+ "VBTR_VBAF_CCP": 169,
427
+ "VBTR_VBOB": 195,
428
+ "VBTR_VBOF": 187,
429
+ "VBTR_VBRF": 183,
430
+ "VBTR_VBRF_CCP": 181,
431
+ "VBTS": 37,
432
+ "VBTS_CCA": 152,
433
+ "VBTS_CCP": 153,
434
+ "VBTS_JJD": 154,
435
+ "VBTS_LM": 155,
436
+ "VBTS_VBAF": 185,
437
+ "VBTS_VBOF": 167,
438
+ "VBW": 34,
439
+ "VBW_CCP": 173,
440
+ "[CLS]": 101,
441
+ "[PAD]": 0,
442
+ "[SEP]": 102,
443
+ "[UNK]": 100
444
+ },
445
+ "layer_norm_eps": 1e-12,
446
+ "max_position_embeddings": 512,
447
+ "model_type": "bert",
448
+ "num_attention_heads": 12,
449
+ "num_hidden_layers": 12,
450
+ "pad_token_id": 0,
451
+ "pooler_fc_size": 768,
452
+ "pooler_num_attention_heads": 12,
453
+ "pooler_num_fc_layers": 3,
454
+ "pooler_size_per_head": 128,
455
+ "pooler_type": "first_token_transform",
456
+ "position_embedding_type": "absolute",
457
+ "torch_dtype": "float32",
458
+ "transformers_version": "4.31.0",
459
+ "type_vocab_size": 2,
460
+ "use_cache": true,
461
+ "vocab_size": 52102
462
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1317c5f4d533a524cfc5e3c8ca29663fcbe73362c8ee1bde8dae55d24698899b
3
+ size 502580457
special_tokens_map.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "[PMP]",
4
+ "[PMS]",
5
+ "[PMC]"
6
+ ],
7
+ "cls_token": "[CLS]",
8
+ "mask_token": "[MASK]",
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "unk_token": "[UNK]"
12
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "clean_up_tokenization_spaces": true,
3
+ "cls_token": "[CLS]",
4
+ "do_basic_tokenize": true,
5
+ "do_lower_case": true,
6
+ "mask_token": "[MASK]",
7
+ "model_max_length": 512,
8
+ "never_split": null,
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "strip_accents": null,
12
+ "tokenize_chinese_chars": true,
13
+ "tokenizer_class": "BertTokenizer",
14
+ "unk_token": "[UNK]"
15
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e672459f13fa4cde87b344d03ac907a366e4c9cc2774c1c21018f29b8eac669
3
+ size 3963
vocab.txt ADDED
The diff for this file is too large to render. See raw diff