KoichiYasuoka commited on
Commit
4758dea
1 Parent(s): 1b632ad

re-initialize

Browse files
Files changed (38) hide show
  1. suparkanbun/models/guwenbert-base.danku/config.json +45 -0
  2. suparkanbun/models/guwenbert-base.danku/filesize.txt +1 -0
  3. suparkanbun/models/guwenbert-base.danku/pytorch_model.bin +3 -0
  4. suparkanbun/models/guwenbert-base.danku/special_tokens_map.json +1 -0
  5. suparkanbun/models/guwenbert-base.danku/tokenizer_config.json +1 -0
  6. suparkanbun/models/guwenbert-base.danku/vocab.txt +0 -0
  7. suparkanbun/models/guwenbert-large.danku/config.json +45 -0
  8. suparkanbun/models/guwenbert-large.danku/filesize.txt +1 -0
  9. suparkanbun/models/guwenbert-large.danku/pytorch_model.bin +3 -0
  10. suparkanbun/models/guwenbert-large.danku/special_tokens_map.json +1 -0
  11. suparkanbun/models/guwenbert-large.danku/tokenizer_config.json +1 -0
  12. suparkanbun/models/guwenbert-large.danku/vocab.txt +0 -0
  13. suparkanbun/models/guwenbert-large.pos/config.json +291 -0
  14. suparkanbun/models/guwenbert-large.pos/filesize.txt +2 -0
  15. suparkanbun/models/guwenbert-large.pos/guwenbert-large.supar +3 -0
  16. suparkanbun/models/guwenbert-large.pos/pytorch_model.bin +3 -0
  17. suparkanbun/models/guwenbert-large.pos/special_tokens_map.json +1 -0
  18. suparkanbun/models/guwenbert-large.pos/tokenizer_config.json +1 -0
  19. suparkanbun/models/guwenbert-large.pos/vocab.txt +0 -0
  20. suparkanbun/models/sikubert.danku/config.json +48 -0
  21. suparkanbun/models/sikubert.danku/filesize.txt +1 -0
  22. suparkanbun/models/sikubert.danku/pytorch_model.bin +3 -0
  23. suparkanbun/models/sikubert.danku/special_tokens_map.json +1 -0
  24. suparkanbun/models/sikubert.danku/tokenizer_config.json +1 -0
  25. suparkanbun/models/sikubert.danku/vocab.txt +0 -0
  26. suparkanbun/models/sikubert.pos/config.json +294 -0
  27. suparkanbun/models/sikubert.pos/filesize.txt +2 -0
  28. suparkanbun/models/sikubert.pos/pytorch_model.bin +3 -0
  29. suparkanbun/models/sikubert.pos/sikubert.supar +3 -0
  30. suparkanbun/models/sikubert.pos/special_tokens_map.json +1 -0
  31. suparkanbun/models/sikubert.pos/tokenizer_config.json +1 -0
  32. suparkanbun/models/sikubert.pos/vocab.txt +0 -0
  33. suparkanbun/models/sikuroberta.danku/config.json +48 -0
  34. suparkanbun/models/sikuroberta.danku/filesize.txt +1 -0
  35. suparkanbun/models/sikuroberta.danku/pytorch_model.bin +3 -0
  36. suparkanbun/models/sikuroberta.danku/special_tokens_map.json +1 -0
  37. suparkanbun/models/sikuroberta.danku/tokenizer_config.json +1 -0
  38. suparkanbun/models/sikuroberta.danku/vocab.txt +0 -0
suparkanbun/models/guwenbert-base.danku/config.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ethanyt/guwenbert-base",
3
+ "architectures": [
4
+ "RobertaForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "eos_token_id": 2,
9
+ "finetuning_task": "ner",
10
+ "gradient_checkpointing": false,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "LABEL_0",
16
+ "1": "LABEL_1",
17
+ "2": "LABEL_2",
18
+ "3": "LABEL_3",
19
+ "4": "LABEL_4",
20
+ "5": "LABEL_5"
21
+ },
22
+ "initializer_range": 0.02,
23
+ "intermediate_size": 3072,
24
+ "label2id": {
25
+ "LABEL_0": 0,
26
+ "LABEL_1": 1,
27
+ "LABEL_2": 2,
28
+ "LABEL_3": 3,
29
+ "LABEL_4": 4,
30
+ "LABEL_5": 5
31
+ },
32
+ "layer_norm_eps": 1e-05,
33
+ "max_position_embeddings": 514,
34
+ "model_type": "roberta",
35
+ "num_attention_heads": 12,
36
+ "num_hidden_layers": 12,
37
+ "pad_token_id": 1,
38
+ "position_embedding_type": "absolute",
39
+ "tokenizer_class": "BertTokenizer",
40
+ "torch_dtype": "float32",
41
+ "transformers_version": "4.9.2",
42
+ "type_vocab_size": 1,
43
+ "use_cache": true,
44
+ "vocab_size": 23292
45
+ }
suparkanbun/models/guwenbert-base.danku/filesize.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ pytorch_model.bin 413465365
suparkanbun/models/guwenbert-base.danku/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39f108469923aff969a33f3c8d0d786d4104e87bc32692ba7d5ea951c388a8a7
3
+ size 413465365
suparkanbun/models/guwenbert-base.danku/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
suparkanbun/models/guwenbert-base.danku/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "special_tokens_map_file": null, "name_or_path": "ethanyt/guwenbert-base", "do_basic_tokenize": true, "never_split": null, "tokenizer_class": "BertTokenizer"}
suparkanbun/models/guwenbert-base.danku/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
suparkanbun/models/guwenbert-large.danku/config.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ethanyt/guwenbert-large",
3
+ "architectures": [
4
+ "RobertaForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "eos_token_id": 2,
9
+ "finetuning_task": "ner",
10
+ "gradient_checkpointing": false,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 1024,
14
+ "id2label": {
15
+ "0": "LABEL_0",
16
+ "1": "LABEL_1",
17
+ "2": "LABEL_2",
18
+ "3": "LABEL_3",
19
+ "4": "LABEL_4",
20
+ "5": "LABEL_5"
21
+ },
22
+ "initializer_range": 0.02,
23
+ "intermediate_size": 4096,
24
+ "label2id": {
25
+ "LABEL_0": 0,
26
+ "LABEL_1": 1,
27
+ "LABEL_2": 2,
28
+ "LABEL_3": 3,
29
+ "LABEL_4": 4,
30
+ "LABEL_5": 5
31
+ },
32
+ "layer_norm_eps": 1e-05,
33
+ "max_position_embeddings": 514,
34
+ "model_type": "roberta",
35
+ "num_attention_heads": 16,
36
+ "num_hidden_layers": 24,
37
+ "pad_token_id": 1,
38
+ "position_embedding_type": "absolute",
39
+ "tokenizer_class": "BertTokenizer",
40
+ "torch_dtype": "float32",
41
+ "transformers_version": "4.9.2",
42
+ "type_vocab_size": 1,
43
+ "use_cache": true,
44
+ "vocab_size": 23292
45
+ }
suparkanbun/models/guwenbert-large.danku/filesize.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ pytorch_model.bin 1306952528
suparkanbun/models/guwenbert-large.danku/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d1850e2c3ea723802113e7e9d3dc5793aa5048c3104ec18e21602d02cd3b552
3
+ size 1306952528
suparkanbun/models/guwenbert-large.danku/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
suparkanbun/models/guwenbert-large.danku/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "special_tokens_map_file": null, "name_or_path": "ethanyt/guwenbert-large", "do_basic_tokenize": true, "never_split": null, "tokenizer_class": "BertTokenizer"}
suparkanbun/models/guwenbert-large.danku/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
suparkanbun/models/guwenbert-large.pos/config.json ADDED
@@ -0,0 +1,291 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ethanyt/guwenbert-large",
3
+ "architectures": [
4
+ "RobertaForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "eos_token_id": 2,
9
+ "finetuning_task": "ner",
10
+ "gradient_checkpointing": false,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 1024,
14
+ "id2label": {
15
+ "0": "n,代名詞,人称,他,PRON,Person=1|PronType=Prs",
16
+ "1": "n,代名詞,人称,他,PRON,Person=2|PronType=Prs",
17
+ "2": "n,代名詞,人称,他,PRON,Person=3|PronType=Prs",
18
+ "3": "n,代名詞,人称,他,PRON,PronType=Prs",
19
+ "4": "n,代名詞,人称,他,PRON,PronType=Prs|Reflex=Yes",
20
+ "5": "n,代名詞,人称,止格,PRON,Person=1|PronType=Prs",
21
+ "6": "n,代名詞,人称,止格,PRON,Person=2|PronType=Prs",
22
+ "7": "n,代名詞,人称,止格,PRON,Person=3|PronType=Prs",
23
+ "8": "n,代名詞,人称,止格,PRON,PronType=Prs",
24
+ "9": "n,代名詞,人称,起格,PRON,Person=1|PronType=Prs",
25
+ "10": "n,代名詞,人称,起格,PRON,Person=2|PronType=Prs",
26
+ "11": "n,代名詞,人称,起格,PRON,Person=3|PronType=Prs",
27
+ "12": "n,代名詞,人称,起格,PRON,PronType=Prs",
28
+ "13": "n,代名詞,指示,*,PRON,PronType=Dem",
29
+ "14": "n,代名詞,疑問,*,PRON,PronType=Int",
30
+ "15": "n,名詞,不可譲,属性,NOUN,_",
31
+ "16": "n,名詞,不可譲,疾病,NOUN,_",
32
+ "17": "n,名詞,不可譲,身体,NOUN,_",
33
+ "18": "n,名詞,主体,動物,NOUN,_",
34
+ "19": "n,名詞,主体,国名,PROPN,Case=Loc|NameType=Nat",
35
+ "20": "n,名詞,主体,書物,NOUN,_",
36
+ "21": "n,名詞,主体,機関,NOUN,_",
37
+ "22": "n,名詞,主体,集団,NOUN,_",
38
+ "23": "n,名詞,人,その他の人名,PROPN,NameType=Prs",
39
+ "24": "n,名詞,人,人,NOUN,_",
40
+ "25": "n,名詞,人,名,PROPN,NameType=Giv",
41
+ "26": "n,名詞,人,姓氏,PROPN,NameType=Sur",
42
+ "27": "n,名詞,人,役割,NOUN,_",
43
+ "28": "n,名詞,人,複合的人名,PROPN,NameType=Prs",
44
+ "29": "n,名詞,人,関係,NOUN,_",
45
+ "30": "n,名詞,制度,儀礼,NOUN,_",
46
+ "31": "n,名詞,制度,場,NOUN,Case=Loc",
47
+ "32": "n,名詞,可搬,乗り物,NOUN,_",
48
+ "33": "n,名詞,可搬,伝達,NOUN,_",
49
+ "34": "n,名詞,可搬,成果物,NOUN,_",
50
+ "35": "n,名詞,可搬,糧食,NOUN,_",
51
+ "36": "n,名詞,可搬,道具,NOUN,_",
52
+ "37": "n,名詞,固定物,地名,PROPN,Case=Loc|NameType=Geo",
53
+ "38": "n,名詞,固定物,地形,NOUN,Case=Loc",
54
+ "39": "n,名詞,固定物,建造物,NOUN,Case=Loc",
55
+ "40": "n,名詞,固定物,樹木,NOUN,_",
56
+ "41": "n,名詞,固定物,関係,NOUN,Case=Loc",
57
+ "42": "n,名詞,外観,人,NOUN,_",
58
+ "43": "n,名詞,天象,天文,NOUN,_",
59
+ "44": "n,名詞,天象,怪異,NOUN,_",
60
+ "45": "n,名詞,天象,気象,NOUN,_",
61
+ "46": "n,名詞,度量衡,*,NOUN,NounType=Clf",
62
+ "47": "n,名詞,思考,*,NOUN,_",
63
+ "48": "n,名詞,描写,形質,NOUN,_",
64
+ "49": "n,名詞,描写,態度,NOUN,_",
65
+ "50": "n,名詞,数量,*,NOUN,_",
66
+ "51": "n,名詞,時,*,NOUN,Case=Tem",
67
+ "52": "n,名詞,行為,*,NOUN,_",
68
+ "53": "n,数詞,干支,*,NUM,NumType=Ord",
69
+ "54": "n,数詞,数,*,NUM,_",
70
+ "55": "n,数詞,数字,*,NUM,_",
71
+ "56": "p,助詞,句末,*,PART,_",
72
+ "57": "p,助詞,句頭,*,PART,_",
73
+ "58": "p,助詞,接続,並列,CCONJ,_",
74
+ "59": "p,助詞,接続,体言化,PART,_",
75
+ "60": "p,助詞,接続,属格,SCONJ,_",
76
+ "61": "p,助詞,提示,*,PART,_",
77
+ "62": "p,感嘆詞,*,*,INTJ,_",
78
+ "63": "p,接尾辞,*,*,PART,_",
79
+ "64": "s,文字,*,*,SYM,_",
80
+ "65": "s,記号,一般,*,SYM,_",
81
+ "66": "s,記号,句点,*,PUNCT,_",
82
+ "67": "s,記号,読点,*,PUNCT,_",
83
+ "68": "v,前置詞,基盤,*,ADP,_",
84
+ "69": "v,前置詞,源泉,*,ADP,_",
85
+ "70": "v,前置詞,経由,*,ADP,_",
86
+ "71": "v,前置詞,関係,*,ADP,_",
87
+ "72": "v,副詞,判断,推定,ADV,_",
88
+ "73": "v,副詞,判断,確定,ADV,_",
89
+ "74": "v,副詞,判断,逆接,ADV,_",
90
+ "75": "v,副詞,否定,体言否定,ADV,Polarity=Neg",
91
+ "76": "v,副詞,否定,有界,ADV,Polarity=Neg",
92
+ "77": "v,副詞,否定,無界,ADV,Polarity=Neg",
93
+ "78": "v,副詞,否定,禁止,ADV,Polarity=Neg",
94
+ "79": "v,副詞,描写,*,ADV,_",
95
+ "80": "v,副詞,時相,変化,ADV,AdvType=Tim",
96
+ "81": "v,副詞,時相,完了,ADV,AdvType=Tim|Aspect=Perf",
97
+ "82": "v,副詞,時相,将来,ADV,AdvType=Tim|Tense=Fut",
98
+ "83": "v,副詞,時相,恒常,ADV,AdvType=Tim",
99
+ "84": "v,副詞,時相,現在,ADV,AdvType=Tim|Tense=Pres",
100
+ "85": "v,副詞,時相,終局,ADV,AdvType=Tim",
101
+ "86": "v,副詞,時相,継起,ADV,AdvType=Tim",
102
+ "87": "v,副詞,時相,緊接,ADV,AdvType=Tim",
103
+ "88": "v,副詞,時相,過去,ADV,AdvType=Tim|Tense=Past",
104
+ "89": "v,副詞,疑問,原因,ADV,AdvType=Cau",
105
+ "90": "v,副詞,疑問,反語,ADV,_",
106
+ "91": "v,副詞,疑問,所在,ADV,_",
107
+ "92": "v,副詞,程度,やや高度,ADV,AdvType=Deg|Degree=Cmp",
108
+ "93": "v,副詞,程度,極度,ADV,AdvType=Deg|Degree=Sup",
109
+ "94": "v,副詞,程度,軽度,ADV,AdvType=Deg|Degree=Pos",
110
+ "95": "v,副詞,範囲,共同,ADV,_",
111
+ "96": "v,副詞,範囲,総括,ADV,_",
112
+ "97": "v,副詞,範囲,限定,ADV,_",
113
+ "98": "v,副詞,頻度,偶発,ADV,_",
114
+ "99": "v,副詞,頻度,重複,ADV,_",
115
+ "100": "v,副詞,頻度,頻繁,ADV,_",
116
+ "101": "v,助動詞,受動,*,AUX,Voice=Pass",
117
+ "102": "v,助動詞,可能,*,AUX,Mood=Pot",
118
+ "103": "v,助動詞,必要,*,AUX,Mood=Nec",
119
+ "104": "v,助動詞,願望,*,AUX,Mood=Des",
120
+ "105": "v,動詞,変化,制度,VERB,_",
121
+ "106": "v,動詞,変化,性質,VERB,_",
122
+ "107": "v,動詞,変化,生物,VERB,_",
123
+ "108": "v,動詞,存在,存在,VERB,Polarity=Neg",
124
+ "109": "v,動詞,存在,存在,VERB,VerbType=Cop",
125
+ "110": "v,動詞,存在,存在,VERB,_",
126
+ "111": "v,動詞,描写,境遇,VERB,Degree=Pos",
127
+ "112": "v,動詞,描写,形質,VERB,Degree=Pos",
128
+ "113": "v,動詞,描写,態度,VERB,Degree=Pos",
129
+ "114": "v,動詞,描写,量,VERB,Degree=Pos",
130
+ "115": "v,動詞,行為,交流,VERB,_",
131
+ "116": "v,動詞,行為,伝達,VERB,_",
132
+ "117": "v,動詞,行為,使役,VERB,_",
133
+ "118": "v,動詞,行為,儀礼,VERB,_",
134
+ "119": "v,動詞,行為,分類,VERB,Degree=Equ",
135
+ "120": "v,動詞,行為,動作,VERB,_",
136
+ "121": "v,動詞,行為,姿勢,VERB,_",
137
+ "122": "v,動詞,行為,役割,VERB,_",
138
+ "123": "v,動詞,行為,得失,VERB,_",
139
+ "124": "v,動詞,行為,態度,VERB,_",
140
+ "125": "v,動詞,行為,生産,VERB,_",
141
+ "126": "v,動詞,行為,移動,VERB,_",
142
+ "127": "v,動詞,行為,設置,VERB,_",
143
+ "128": "v,動詞,行為,飲食,VERB,_"
144
+ },
145
+ "initializer_range": 0.02,
146
+ "intermediate_size": 4096,
147
+ "label2id": {
148
+ "n,代名詞,人称,他,PRON,Person=1|PronType=Prs": 0,
149
+ "n,代名詞,人称,他,PRON,Person=2|PronType=Prs": 1,
150
+ "n,代名詞,人称,他,PRON,Person=3|PronType=Prs": 2,
151
+ "n,代名詞,人称,他,PRON,PronType=Prs": 3,
152
+ "n,代名詞,人称,他,PRON,PronType=Prs|Reflex=Yes": 4,
153
+ "n,代名詞,人称,止格,PRON,Person=1|PronType=Prs": 5,
154
+ "n,代名詞,人称,止格,PRON,Person=2|PronType=Prs": 6,
155
+ "n,代名詞,人称,止格,PRON,Person=3|PronType=Prs": 7,
156
+ "n,代名詞,人称,止格,PRON,PronType=Prs": 8,
157
+ "n,代名詞,人称,起格,PRON,Person=1|PronType=Prs": 9,
158
+ "n,代名詞,人称,起格,PRON,Person=2|PronType=Prs": 10,
159
+ "n,代名詞,人称,起格,PRON,Person=3|PronType=Prs": 11,
160
+ "n,代名詞,人称,起格,PRON,PronType=Prs": 12,
161
+ "n,代名詞,指示,*,PRON,PronType=Dem": 13,
162
+ "n,代名詞,疑問,*,PRON,PronType=Int": 14,
163
+ "n,名詞,不可譲,属性,NOUN,_": 15,
164
+ "n,名詞,不可譲,疾病,NOUN,_": 16,
165
+ "n,名詞,不可譲,身体,NOUN,_": 17,
166
+ "n,名詞,主体,動物,NOUN,_": 18,
167
+ "n,名詞,主体,国名,PROPN,Case=Loc|NameType=Nat": 19,
168
+ "n,名詞,主体,書物,NOUN,_": 20,
169
+ "n,名詞,主体,機関,NOUN,_": 21,
170
+ "n,名詞,主体,集団,NOUN,_": 22,
171
+ "n,名詞,人,その他の人名,PROPN,NameType=Prs": 23,
172
+ "n,名詞,人,人,NOUN,_": 24,
173
+ "n,名詞,人,名,PROPN,NameType=Giv": 25,
174
+ "n,名詞,人,姓氏,PROPN,NameType=Sur": 26,
175
+ "n,名詞,人,役割,NOUN,_": 27,
176
+ "n,名詞,人,複合的人名,PROPN,NameType=Prs": 28,
177
+ "n,名詞,人,関係,NOUN,_": 29,
178
+ "n,名詞,制度,儀礼,NOUN,_": 30,
179
+ "n,名詞,制度,場,NOUN,Case=Loc": 31,
180
+ "n,名詞,可搬,乗り物,NOUN,_": 32,
181
+ "n,名詞,可搬,伝達,NOUN,_": 33,
182
+ "n,名詞,可搬,成果物,NOUN,_": 34,
183
+ "n,名詞,可搬,糧食,NOUN,_": 35,
184
+ "n,名詞,可搬,道具,NOUN,_": 36,
185
+ "n,名詞,固定物,地名,PROPN,Case=Loc|NameType=Geo": 37,
186
+ "n,名詞,固定物,地形,NOUN,Case=Loc": 38,
187
+ "n,名詞,固定物,建造物,NOUN,Case=Loc": 39,
188
+ "n,名詞,固定物,樹木,NOUN,_": 40,
189
+ "n,名詞,固定物,関係,NOUN,Case=Loc": 41,
190
+ "n,名詞,外観,人,NOUN,_": 42,
191
+ "n,名詞,天象,天文,NOUN,_": 43,
192
+ "n,名詞,天象,怪異,NOUN,_": 44,
193
+ "n,名詞,天象,気象,NOUN,_": 45,
194
+ "n,名詞,度量衡,*,NOUN,NounType=Clf": 46,
195
+ "n,名詞,思考,*,NOUN,_": 47,
196
+ "n,名詞,描写,形質,NOUN,_": 48,
197
+ "n,名詞,描写,態度,NOUN,_": 49,
198
+ "n,名詞,数量,*,NOUN,_": 50,
199
+ "n,名詞,時,*,NOUN,Case=Tem": 51,
200
+ "n,名詞,行為,*,NOUN,_": 52,
201
+ "n,数詞,干支,*,NUM,NumType=Ord": 53,
202
+ "n,数詞,数,*,NUM,_": 54,
203
+ "n,数詞,数字,*,NUM,_": 55,
204
+ "p,助詞,句末,*,PART,_": 56,
205
+ "p,助詞,句頭,*,PART,_": 57,
206
+ "p,助詞,接続,並列,CCONJ,_": 58,
207
+ "p,助詞,接続,体言化,PART,_": 59,
208
+ "p,助詞,接続,属格,SCONJ,_": 60,
209
+ "p,助詞,提示,*,PART,_": 61,
210
+ "p,感嘆詞,*,*,INTJ,_": 62,
211
+ "p,接尾辞,*,*,PART,_": 63,
212
+ "s,文字,*,*,SYM,_": 64,
213
+ "s,記号,一般,*,SYM,_": 65,
214
+ "s,記号,句点,*,PUNCT,_": 66,
215
+ "s,記号,読点,*,PUNCT,_": 67,
216
+ "v,前置詞,基盤,*,ADP,_": 68,
217
+ "v,前置詞,源泉,*,ADP,_": 69,
218
+ "v,前置詞,経由,*,ADP,_": 70,
219
+ "v,前置詞,関係,*,ADP,_": 71,
220
+ "v,副詞,判断,推定,ADV,_": 72,
221
+ "v,副詞,判断,確定,ADV,_": 73,
222
+ "v,副詞,判断,逆接,ADV,_": 74,
223
+ "v,副詞,否定,体言否定,ADV,Polarity=Neg": 75,
224
+ "v,副詞,否定,有界,ADV,Polarity=Neg": 76,
225
+ "v,副詞,否定,無界,ADV,Polarity=Neg": 77,
226
+ "v,副詞,否定,禁止,ADV,Polarity=Neg": 78,
227
+ "v,副詞,描写,*,ADV,_": 79,
228
+ "v,副詞,時相,変化,ADV,AdvType=Tim": 80,
229
+ "v,副詞,時相,完了,ADV,AdvType=Tim|Aspect=Perf": 81,
230
+ "v,副詞,時相,将来,ADV,AdvType=Tim|Tense=Fut": 82,
231
+ "v,副詞,時相,恒常,ADV,AdvType=Tim": 83,
232
+ "v,副詞,時相,現在,ADV,AdvType=Tim|Tense=Pres": 84,
233
+ "v,副詞,時相,終局,ADV,AdvType=Tim": 85,
234
+ "v,副詞,時相,継起,ADV,AdvType=Tim": 86,
235
+ "v,副詞,時相,緊接,ADV,AdvType=Tim": 87,
236
+ "v,副詞,時相,過去,ADV,AdvType=Tim|Tense=Past": 88,
237
+ "v,副詞,疑問,原因,ADV,AdvType=Cau": 89,
238
+ "v,副詞,疑問,反語,ADV,_": 90,
239
+ "v,副詞,疑問,所在,ADV,_": 91,
240
+ "v,副詞,程度,やや高度,ADV,AdvType=Deg|Degree=Cmp": 92,
241
+ "v,副詞,程度,極度,ADV,AdvType=Deg|Degree=Sup": 93,
242
+ "v,副詞,程度,軽度,ADV,AdvType=Deg|Degree=Pos": 94,
243
+ "v,副詞,範囲,共同,ADV,_": 95,
244
+ "v,副詞,範囲,総括,ADV,_": 96,
245
+ "v,副詞,範囲,限定,ADV,_": 97,
246
+ "v,副詞,頻度,偶発,ADV,_": 98,
247
+ "v,副詞,頻度,重複,ADV,_": 99,
248
+ "v,副詞,頻度,頻繁,ADV,_": 100,
249
+ "v,助動詞,受動,*,AUX,Voice=Pass": 101,
250
+ "v,助動詞,可能,*,AUX,Mood=Pot": 102,
251
+ "v,助動詞,必要,*,AUX,Mood=Nec": 103,
252
+ "v,助動詞,願望,*,AUX,Mood=Des": 104,
253
+ "v,動詞,変化,制度,VERB,_": 105,
254
+ "v,動詞,変化,性質,VERB,_": 106,
255
+ "v,動詞,変化,生物,VERB,_": 107,
256
+ "v,動詞,存在,存在,VERB,Polarity=Neg": 108,
257
+ "v,動詞,存在,存在,VERB,VerbType=Cop": 109,
258
+ "v,動詞,存在,存在,VERB,_": 110,
259
+ "v,動詞,描写,境遇,VERB,Degree=Pos": 111,
260
+ "v,動詞,描写,形質,VERB,Degree=Pos": 112,
261
+ "v,動詞,描写,態度,VERB,Degree=Pos": 113,
262
+ "v,動詞,描写,量,VERB,Degree=Pos": 114,
263
+ "v,動詞,行為,交流,VERB,_": 115,
264
+ "v,動詞,行為,伝達,VERB,_": 116,
265
+ "v,動詞,行為,使役,VERB,_": 117,
266
+ "v,動詞,行為,儀礼,VERB,_": 118,
267
+ "v,動詞,行為,分類,VERB,Degree=Equ": 119,
268
+ "v,動詞,行為,動作,VERB,_": 120,
269
+ "v,動詞,行為,姿勢,VERB,_": 121,
270
+ "v,動詞,行為,役割,VERB,_": 122,
271
+ "v,動詞,行為,得失,VERB,_": 123,
272
+ "v,動詞,行為,態度,VERB,_": 124,
273
+ "v,動詞,行為,生産,VERB,_": 125,
274
+ "v,動詞,行為,移動,VERB,_": 126,
275
+ "v,動詞,行為,設置,VERB,_": 127,
276
+ "v,動詞,行為,飲食,VERB,_": 128
277
+ },
278
+ "layer_norm_eps": 1e-05,
279
+ "max_position_embeddings": 514,
280
+ "model_type": "roberta",
281
+ "num_attention_heads": 16,
282
+ "num_hidden_layers": 24,
283
+ "pad_token_id": 1,
284
+ "position_embedding_type": "absolute",
285
+ "tokenizer_class": "BertTokenizer",
286
+ "torch_dtype": "float32",
287
+ "transformers_version": "4.9.2",
288
+ "type_vocab_size": 1,
289
+ "use_cache": true,
290
+ "vocab_size": 23292
291
+ }
suparkanbun/models/guwenbert-large.pos/filesize.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ pytorch_model.bin 1307456849
2
+ guwenbert-large.supar 1358476727
suparkanbun/models/guwenbert-large.pos/guwenbert-large.supar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f0040a8bf03e6e15bb0854ea911d5e4f0533d5f057fd0cb511e41a89872ee29
3
+ size 1358476727
suparkanbun/models/guwenbert-large.pos/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc56f044840d62fd4f332d76d81275f04ecd66509f1699eadc5ce9353a0b2385
3
+ size 1307456849
suparkanbun/models/guwenbert-large.pos/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
suparkanbun/models/guwenbert-large.pos/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "special_tokens_map_file": null, "name_or_path": "ethanyt/guwenbert-large", "do_basic_tokenize": true, "never_split": null, "tokenizer_class": "BertTokenizer"}
suparkanbun/models/guwenbert-large.pos/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
suparkanbun/models/sikubert.danku/config.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "SIKU-BERT/sikubert",
3
+ "architectures": [
4
+ "BertForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "directionality": "bidi",
8
+ "finetuning_task": "ner",
9
+ "gradient_checkpointing": false,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "id2label": {
14
+ "0": "LABEL_0",
15
+ "1": "LABEL_1",
16
+ "2": "LABEL_2",
17
+ "3": "LABEL_3",
18
+ "4": "LABEL_4",
19
+ "5": "LABEL_5"
20
+ },
21
+ "initializer_range": 0.02,
22
+ "intermediate_size": 3072,
23
+ "label2id": {
24
+ "LABEL_0": 0,
25
+ "LABEL_1": 1,
26
+ "LABEL_2": 2,
27
+ "LABEL_3": 3,
28
+ "LABEL_4": 4,
29
+ "LABEL_5": 5
30
+ },
31
+ "layer_norm_eps": 1e-12,
32
+ "max_position_embeddings": 512,
33
+ "model_type": "bert",
34
+ "num_attention_heads": 12,
35
+ "num_hidden_layers": 12,
36
+ "pad_token_id": 0,
37
+ "pooler_fc_size": 768,
38
+ "pooler_num_attention_heads": 12,
39
+ "pooler_num_fc_layers": 3,
40
+ "pooler_size_per_head": 128,
41
+ "pooler_type": "first_token_transform",
42
+ "position_embedding_type": "absolute",
43
+ "torch_dtype": "float32",
44
+ "transformers_version": "4.9.2",
45
+ "type_vocab_size": 2,
46
+ "use_cache": true,
47
+ "vocab_size": 29791
48
+ }
suparkanbun/models/sikubert.danku/filesize.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ pytorch_model.bin 433425937
suparkanbun/models/sikubert.danku/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b655295a6358965d08f69fa19968d7922f4aff9dafcad10521a8c8dbdbd202b0
3
+ size 433425937
suparkanbun/models/sikubert.danku/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
suparkanbun/models/sikubert.danku/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "special_tokens_map_file": null, "name_or_path": "SIKU-BERT/sikubert", "do_basic_tokenize": true, "never_split": null, "tokenizer_class": "BertTokenizer"}
suparkanbun/models/sikubert.danku/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
suparkanbun/models/sikubert.pos/config.json ADDED
@@ -0,0 +1,294 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "SIKU-BERT/sikubert",
3
+ "architectures": [
4
+ "BertForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "directionality": "bidi",
8
+ "finetuning_task": "ner",
9
+ "gradient_checkpointing": false,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "id2label": {
14
+ "0": "n,代名詞,人称,他,PRON,Person=1|PronType=Prs",
15
+ "1": "n,代名詞,人称,他,PRON,Person=2|PronType=Prs",
16
+ "2": "n,代名詞,人称,他,PRON,Person=3|PronType=Prs",
17
+ "3": "n,代名詞,人称,他,PRON,PronType=Prs",
18
+ "4": "n,代名詞,人称,他,PRON,PronType=Prs|Reflex=Yes",
19
+ "5": "n,代名詞,人称,止格,PRON,Person=1|PronType=Prs",
20
+ "6": "n,代名詞,人称,止格,PRON,Person=2|PronType=Prs",
21
+ "7": "n,代名詞,人称,止格,PRON,Person=3|PronType=Prs",
22
+ "8": "n,代名詞,人称,止格,PRON,PronType=Prs",
23
+ "9": "n,代名詞,人称,起格,PRON,Person=1|PronType=Prs",
24
+ "10": "n,代名詞,人称,起格,PRON,Person=2|PronType=Prs",
25
+ "11": "n,代名詞,人称,起格,PRON,Person=3|PronType=Prs",
26
+ "12": "n,代名詞,人称,起格,PRON,PronType=Prs",
27
+ "13": "n,代名詞,指示,*,PRON,PronType=Dem",
28
+ "14": "n,代名詞,疑問,*,PRON,PronType=Int",
29
+ "15": "n,名詞,不可譲,属性,NOUN,_",
30
+ "16": "n,名詞,不可譲,疾病,NOUN,_",
31
+ "17": "n,名詞,不可譲,身体,NOUN,_",
32
+ "18": "n,名詞,主体,動物,NOUN,_",
33
+ "19": "n,名詞,主体,国名,PROPN,Case=Loc|NameType=Nat",
34
+ "20": "n,名詞,主体,書物,NOUN,_",
35
+ "21": "n,名詞,主体,機関,NOUN,_",
36
+ "22": "n,名詞,主体,集団,NOUN,_",
37
+ "23": "n,名詞,人,その他の人名,PROPN,NameType=Prs",
38
+ "24": "n,名詞,人,人,NOUN,_",
39
+ "25": "n,名詞,人,名,PROPN,NameType=Giv",
40
+ "26": "n,名詞,人,姓氏,PROPN,NameType=Sur",
41
+ "27": "n,名詞,人,役割,NOUN,_",
42
+ "28": "n,名詞,人,複合的人名,PROPN,NameType=Prs",
43
+ "29": "n,名詞,人,関係,NOUN,_",
44
+ "30": "n,名詞,制度,儀礼,NOUN,_",
45
+ "31": "n,名詞,制度,場,NOUN,Case=Loc",
46
+ "32": "n,名詞,可搬,乗り物,NOUN,_",
47
+ "33": "n,名詞,可搬,伝達,NOUN,_",
48
+ "34": "n,名詞,可搬,成果物,NOUN,_",
49
+ "35": "n,名詞,可搬,糧食,NOUN,_",
50
+ "36": "n,名詞,可搬,道具,NOUN,_",
51
+ "37": "n,名詞,固定物,地名,PROPN,Case=Loc|NameType=Geo",
52
+ "38": "n,名詞,固定物,地形,NOUN,Case=Loc",
53
+ "39": "n,名詞,固定物,建造物,NOUN,Case=Loc",
54
+ "40": "n,名詞,固定物,樹木,NOUN,_",
55
+ "41": "n,名詞,固定物,関係,NOUN,Case=Loc",
56
+ "42": "n,名詞,外観,人,NOUN,_",
57
+ "43": "n,名詞,天象,天文,NOUN,_",
58
+ "44": "n,名詞,天象,怪異,NOUN,_",
59
+ "45": "n,名詞,天象,気象,NOUN,_",
60
+ "46": "n,名詞,度量衡,*,NOUN,NounType=Clf",
61
+ "47": "n,名詞,思考,*,NOUN,_",
62
+ "48": "n,名詞,描写,形質,NOUN,_",
63
+ "49": "n,名詞,描写,態度,NOUN,_",
64
+ "50": "n,名詞,数量,*,NOUN,_",
65
+ "51": "n,名詞,時,*,NOUN,Case=Tem",
66
+ "52": "n,名詞,行為,*,NOUN,_",
67
+ "53": "n,数詞,干支,*,NUM,NumType=Ord",
68
+ "54": "n,数詞,数,*,NUM,_",
69
+ "55": "n,数詞,数字,*,NUM,_",
70
+ "56": "p,助詞,句末,*,PART,_",
71
+ "57": "p,助詞,句頭,*,PART,_",
72
+ "58": "p,助詞,接続,並列,CCONJ,_",
73
+ "59": "p,助詞,接続,体言化,PART,_",
74
+ "60": "p,助詞,接続,属格,SCONJ,_",
75
+ "61": "p,助詞,提示,*,PART,_",
76
+ "62": "p,感嘆詞,*,*,INTJ,_",
77
+ "63": "p,接尾辞,*,*,PART,_",
78
+ "64": "s,文字,*,*,SYM,_",
79
+ "65": "s,記号,一般,*,SYM,_",
80
+ "66": "s,記号,句点,*,PUNCT,_",
81
+ "67": "s,記号,読点,*,PUNCT,_",
82
+ "68": "v,前置詞,基盤,*,ADP,_",
83
+ "69": "v,前置詞,源泉,*,ADP,_",
84
+ "70": "v,前置詞,経由,*,ADP,_",
85
+ "71": "v,前置詞,関係,*,ADP,_",
86
+ "72": "v,副詞,判断,推定,ADV,_",
87
+ "73": "v,副詞,判断,確定,ADV,_",
88
+ "74": "v,副詞,判断,逆接,ADV,_",
89
+ "75": "v,副詞,否定,体言否定,ADV,Polarity=Neg",
90
+ "76": "v,副詞,否定,有界,ADV,Polarity=Neg",
91
+ "77": "v,副詞,否定,無界,ADV,Polarity=Neg",
92
+ "78": "v,副詞,否定,禁止,ADV,Polarity=Neg",
93
+ "79": "v,副詞,描写,*,ADV,_",
94
+ "80": "v,副詞,時相,変化,ADV,AdvType=Tim",
95
+ "81": "v,副詞,時相,完了,ADV,AdvType=Tim|Aspect=Perf",
96
+ "82": "v,副詞,時相,将来,ADV,AdvType=Tim|Tense=Fut",
97
+ "83": "v,副詞,時相,恒常,ADV,AdvType=Tim",
98
+ "84": "v,副詞,時相,現在,ADV,AdvType=Tim|Tense=Pres",
99
+ "85": "v,副詞,時相,終局,ADV,AdvType=Tim",
100
+ "86": "v,副詞,時相,継起,ADV,AdvType=Tim",
101
+ "87": "v,副詞,時相,緊接,ADV,AdvType=Tim",
102
+ "88": "v,副詞,時相,過去,ADV,AdvType=Tim|Tense=Past",
103
+ "89": "v,副詞,疑問,原因,ADV,AdvType=Cau",
104
+ "90": "v,副詞,疑問,反語,ADV,_",
105
+ "91": "v,副詞,疑問,所在,ADV,_",
106
+ "92": "v,副詞,程度,やや高度,ADV,AdvType=Deg|Degree=Cmp",
107
+ "93": "v,副詞,程度,極度,ADV,AdvType=Deg|Degree=Sup",
108
+ "94": "v,副詞,程度,軽度,ADV,AdvType=Deg|Degree=Pos",
109
+ "95": "v,副詞,��囲,共同,ADV,_",
110
+ "96": "v,副詞,範囲,総括,ADV,_",
111
+ "97": "v,副詞,範囲,限定,ADV,_",
112
+ "98": "v,副詞,頻度,偶発,ADV,_",
113
+ "99": "v,副詞,頻度,重複,ADV,_",
114
+ "100": "v,副詞,頻度,頻繁,ADV,_",
115
+ "101": "v,助動詞,受動,*,AUX,Voice=Pass",
116
+ "102": "v,助動詞,可能,*,AUX,Mood=Pot",
117
+ "103": "v,助動詞,必要,*,AUX,Mood=Nec",
118
+ "104": "v,助動詞,願望,*,AUX,Mood=Des",
119
+ "105": "v,動詞,変化,制度,VERB,_",
120
+ "106": "v,動詞,変化,性質,VERB,_",
121
+ "107": "v,動詞,変化,生物,VERB,_",
122
+ "108": "v,動詞,存在,存在,VERB,Polarity=Neg",
123
+ "109": "v,動詞,存在,存在,VERB,VerbType=Cop",
124
+ "110": "v,動詞,存在,存在,VERB,_",
125
+ "111": "v,動詞,描写,境遇,VERB,Degree=Pos",
126
+ "112": "v,動詞,描写,形質,VERB,Degree=Pos",
127
+ "113": "v,動詞,描写,態度,VERB,Degree=Pos",
128
+ "114": "v,動詞,描写,量,VERB,Degree=Pos",
129
+ "115": "v,動詞,行為,交流,VERB,_",
130
+ "116": "v,動詞,行為,伝達,VERB,_",
131
+ "117": "v,動詞,行為,使役,VERB,_",
132
+ "118": "v,動詞,行為,儀礼,VERB,_",
133
+ "119": "v,動詞,行為,分類,VERB,Degree=Equ",
134
+ "120": "v,動詞,行為,動作,VERB,_",
135
+ "121": "v,動詞,行為,姿勢,VERB,_",
136
+ "122": "v,動詞,行為,役割,VERB,_",
137
+ "123": "v,動詞,行為,得失,VERB,_",
138
+ "124": "v,動詞,行為,態度,VERB,_",
139
+ "125": "v,動詞,行為,生産,VERB,_",
140
+ "126": "v,動詞,行為,移動,VERB,_",
141
+ "127": "v,動詞,行為,設置,VERB,_",
142
+ "128": "v,動詞,行為,飲食,VERB,_"
143
+ },
144
+ "initializer_range": 0.02,
145
+ "intermediate_size": 3072,
146
+ "label2id": {
147
+ "n,代名詞,人称,他,PRON,Person=1|PronType=Prs": 0,
148
+ "n,代名詞,人称,他,PRON,Person=2|PronType=Prs": 1,
149
+ "n,代名詞,人称,他,PRON,Person=3|PronType=Prs": 2,
150
+ "n,代名詞,人称,他,PRON,PronType=Prs": 3,
151
+ "n,代名詞,人称,他,PRON,PronType=Prs|Reflex=Yes": 4,
152
+ "n,代名詞,人称,止格,PRON,Person=1|PronType=Prs": 5,
153
+ "n,代名詞,人称,止格,PRON,Person=2|PronType=Prs": 6,
154
+ "n,代名詞,人称,止格,PRON,Person=3|PronType=Prs": 7,
155
+ "n,代名詞,人称,止格,PRON,PronType=Prs": 8,
156
+ "n,代名詞,人称,起格,PRON,Person=1|PronType=Prs": 9,
157
+ "n,代名詞,人称,起格,PRON,Person=2|PronType=Prs": 10,
158
+ "n,代名詞,人称,起格,PRON,Person=3|PronType=Prs": 11,
159
+ "n,代名詞,人称,起格,PRON,PronType=Prs": 12,
160
+ "n,代名詞,指示,*,PRON,PronType=Dem": 13,
161
+ "n,代名詞,疑問,*,PRON,PronType=Int": 14,
162
+ "n,名詞,不可譲,属性,NOUN,_": 15,
163
+ "n,名詞,不可譲,疾病,NOUN,_": 16,
164
+ "n,名詞,不可譲,身体,NOUN,_": 17,
165
+ "n,名詞,主体,動物,NOUN,_": 18,
166
+ "n,名詞,主体,国名,PROPN,Case=Loc|NameType=Nat": 19,
167
+ "n,名詞,主体,書物,NOUN,_": 20,
168
+ "n,名詞,主体,機関,NOUN,_": 21,
169
+ "n,名詞,主体,集団,NOUN,_": 22,
170
+ "n,名詞,人,その他の人名,PROPN,NameType=Prs": 23,
171
+ "n,名詞,人,人,NOUN,_": 24,
172
+ "n,名詞,人,名,PROPN,NameType=Giv": 25,
173
+ "n,名詞,人,姓氏,PROPN,NameType=Sur": 26,
174
+ "n,名詞,人,役割,NOUN,_": 27,
175
+ "n,名詞,人,複合的人名,PROPN,NameType=Prs": 28,
176
+ "n,名詞,人,関係,NOUN,_": 29,
177
+ "n,名詞,制度,儀礼,NOUN,_": 30,
178
+ "n,名詞,制度,場,NOUN,Case=Loc": 31,
179
+ "n,名詞,可搬,乗り物,NOUN,_": 32,
180
+ "n,名詞,可搬,伝達,NOUN,_": 33,
181
+ "n,名詞,可搬,成果物,NOUN,_": 34,
182
+ "n,名詞,可搬,糧食,NOUN,_": 35,
183
+ "n,名詞,可搬,道具,NOUN,_": 36,
184
+ "n,名詞,固定物,地名,PROPN,Case=Loc|NameType=Geo": 37,
185
+ "n,名詞,固定物,地形,NOUN,Case=Loc": 38,
186
+ "n,名詞,固定物,建造物,NOUN,Case=Loc": 39,
187
+ "n,名詞,固定物,樹木,NOUN,_": 40,
188
+ "n,名詞,固定物,関係,NOUN,Case=Loc": 41,
189
+ "n,名詞,外観,人,NOUN,_": 42,
190
+ "n,名詞,天象,天文,NOUN,_": 43,
191
+ "n,名詞,天象,怪異,NOUN,_": 44,
192
+ "n,名詞,天象,気象,NOUN,_": 45,
193
+ "n,名詞,度量衡,*,NOUN,NounType=Clf": 46,
194
+ "n,名詞,思考,*,NOUN,_": 47,
195
+ "n,名詞,描写,形質,NOUN,_": 48,
196
+ "n,名詞,描写,態度,NOUN,_": 49,
197
+ "n,名詞,数量,*,NOUN,_": 50,
198
+ "n,名詞,時,*,NOUN,Case=Tem": 51,
199
+ "n,名詞,行為,*,NOUN,_": 52,
200
+ "n,数詞,干支,*,NUM,NumType=Ord": 53,
201
+ "n,数詞,数,*,NUM,_": 54,
202
+ "n,数詞,数字,*,NUM,_": 55,
203
+ "p,助詞,句末,*,PART,_": 56,
204
+ "p,助詞,句頭,*,PART,_": 57,
205
+ "p,助詞,接続,並列,CCONJ,_": 58,
206
+ "p,助詞,接続,体言化,PART,_": 59,
207
+ "p,助詞,接続,属格,SCONJ,_": 60,
208
+ "p,助詞,提示,*,PART,_": 61,
209
+ "p,感嘆詞,*,*,INTJ,_": 62,
210
+ "p,接尾辞,*,*,PART,_": 63,
211
+ "s,文字,*,*,SYM,_": 64,
212
+ "s,記号,一般,*,SYM,_": 65,
213
+ "s,記号,句点,*,PUNCT,_": 66,
214
+ "s,記号,読点,*,PUNCT,_": 67,
215
+ "v,前置詞,基盤,*,ADP,_": 68,
216
+ "v,前置詞,源泉,*,ADP,_": 69,
217
+ "v,前置詞,経由,*,ADP,_": 70,
218
+ "v,前置詞,関係,*,ADP,_": 71,
219
+ "v,副詞,判断,推定,ADV,_": 72,
220
+ "v,副詞,判断,確定,ADV,_": 73,
221
+ "v,副詞,判断,逆��,ADV,_": 74,
222
+ "v,副詞,否定,体言否定,ADV,Polarity=Neg": 75,
223
+ "v,副詞,否定,有界,ADV,Polarity=Neg": 76,
224
+ "v,副詞,否定,無界,ADV,Polarity=Neg": 77,
225
+ "v,副詞,否定,禁止,ADV,Polarity=Neg": 78,
226
+ "v,副詞,描写,*,ADV,_": 79,
227
+ "v,副詞,時相,変化,ADV,AdvType=Tim": 80,
228
+ "v,副詞,時相,完了,ADV,AdvType=Tim|Aspect=Perf": 81,
229
+ "v,副詞,時相,将来,ADV,AdvType=Tim|Tense=Fut": 82,
230
+ "v,副詞,時相,恒常,ADV,AdvType=Tim": 83,
231
+ "v,副詞,時相,現在,ADV,AdvType=Tim|Tense=Pres": 84,
232
+ "v,副詞,時相,終局,ADV,AdvType=Tim": 85,
233
+ "v,副詞,時相,継起,ADV,AdvType=Tim": 86,
234
+ "v,副詞,時相,緊接,ADV,AdvType=Tim": 87,
235
+ "v,副詞,時相,過去,ADV,AdvType=Tim|Tense=Past": 88,
236
+ "v,副詞,疑問,原因,ADV,AdvType=Cau": 89,
237
+ "v,副詞,疑問,反語,ADV,_": 90,
238
+ "v,副詞,疑問,所在,ADV,_": 91,
239
+ "v,副詞,程度,やや高度,ADV,AdvType=Deg|Degree=Cmp": 92,
240
+ "v,副詞,程度,極度,ADV,AdvType=Deg|Degree=Sup": 93,
241
+ "v,副詞,程度,軽度,ADV,AdvType=Deg|Degree=Pos": 94,
242
+ "v,副詞,範囲,共同,ADV,_": 95,
243
+ "v,副詞,範囲,総括,ADV,_": 96,
244
+ "v,副詞,範囲,限定,ADV,_": 97,
245
+ "v,副詞,頻度,偶発,ADV,_": 98,
246
+ "v,副詞,頻度,重複,ADV,_": 99,
247
+ "v,副詞,頻度,頻繁,ADV,_": 100,
248
+ "v,助動詞,受動,*,AUX,Voice=Pass": 101,
249
+ "v,助動詞,可能,*,AUX,Mood=Pot": 102,
250
+ "v,助動詞,必要,*,AUX,Mood=Nec": 103,
251
+ "v,助動詞,願望,*,AUX,Mood=Des": 104,
252
+ "v,動詞,変化,制度,VERB,_": 105,
253
+ "v,動詞,変化,性質,VERB,_": 106,
254
+ "v,動詞,変化,生物,VERB,_": 107,
255
+ "v,動詞,存在,存在,VERB,Polarity=Neg": 108,
256
+ "v,動詞,存在,存在,VERB,VerbType=Cop": 109,
257
+ "v,動詞,存在,存在,VERB,_": 110,
258
+ "v,動詞,描写,境遇,VERB,Degree=Pos": 111,
259
+ "v,動詞,描写,形質,VERB,Degree=Pos": 112,
260
+ "v,動詞,描写,態度,VERB,Degree=Pos": 113,
261
+ "v,動詞,描写,量,VERB,Degree=Pos": 114,
262
+ "v,動詞,行為,交流,VERB,_": 115,
263
+ "v,動詞,行為,伝達,VERB,_": 116,
264
+ "v,動詞,行為,使役,VERB,_": 117,
265
+ "v,動詞,行為,儀礼,VERB,_": 118,
266
+ "v,動詞,行為,分類,VERB,Degree=Equ": 119,
267
+ "v,動詞,行為,動作,VERB,_": 120,
268
+ "v,動詞,行為,姿勢,VERB,_": 121,
269
+ "v,動詞,行為,役割,VERB,_": 122,
270
+ "v,動詞,行為,得失,VERB,_": 123,
271
+ "v,動詞,行為,態度,VERB,_": 124,
272
+ "v,動詞,行為,生産,VERB,_": 125,
273
+ "v,動詞,行為,移動,VERB,_": 126,
274
+ "v,動詞,行為,設置,VERB,_": 127,
275
+ "v,動詞,行為,飲食,VERB,_": 128
276
+ },
277
+ "layer_norm_eps": 1e-12,
278
+ "max_position_embeddings": 512,
279
+ "model_type": "bert",
280
+ "num_attention_heads": 12,
281
+ "num_hidden_layers": 12,
282
+ "pad_token_id": 0,
283
+ "pooler_fc_size": 768,
284
+ "pooler_num_attention_heads": 12,
285
+ "pooler_num_fc_layers": 3,
286
+ "pooler_size_per_head": 128,
287
+ "pooler_type": "first_token_transform",
288
+ "position_embedding_type": "absolute",
289
+ "torch_dtype": "float32",
290
+ "transformers_version": "4.9.2",
291
+ "type_vocab_size": 2,
292
+ "use_cache": true,
293
+ "vocab_size": 29791
294
+ }
suparkanbun/models/sikubert.pos/filesize.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ pytorch_model.bin 433804370
2
+ sikubert.supar 483258728
suparkanbun/models/sikubert.pos/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a0cde43bb172db79af9912f6f509db1c7f939a6a1bea4de2a1b3948a08d9583
3
+ size 433804370
suparkanbun/models/sikubert.pos/sikubert.supar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:656918b314211befbfa8286ad69bb2617bd1af9447ff6178f0921e73730278ef
3
+ size 483258728
suparkanbun/models/sikubert.pos/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
suparkanbun/models/sikubert.pos/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "special_tokens_map_file": null, "name_or_path": "SIKU-BERT/sikubert", "do_basic_tokenize": true, "never_split": null, "tokenizer_class": "BertTokenizer"}
suparkanbun/models/sikubert.pos/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
suparkanbun/models/sikuroberta.danku/config.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "SIKU-BERT/sikuroberta",
3
+ "architectures": [
4
+ "BertForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "directionality": "bidi",
8
+ "finetuning_task": "ner",
9
+ "gradient_checkpointing": false,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "id2label": {
14
+ "0": "LABEL_0",
15
+ "1": "LABEL_1",
16
+ "2": "LABEL_2",
17
+ "3": "LABEL_3",
18
+ "4": "LABEL_4",
19
+ "5": "LABEL_5"
20
+ },
21
+ "initializer_range": 0.02,
22
+ "intermediate_size": 3072,
23
+ "label2id": {
24
+ "LABEL_0": 0,
25
+ "LABEL_1": 1,
26
+ "LABEL_2": 2,
27
+ "LABEL_3": 3,
28
+ "LABEL_4": 4,
29
+ "LABEL_5": 5
30
+ },
31
+ "layer_norm_eps": 1e-12,
32
+ "max_position_embeddings": 512,
33
+ "model_type": "bert",
34
+ "num_attention_heads": 12,
35
+ "num_hidden_layers": 12,
36
+ "pad_token_id": 0,
37
+ "pooler_fc_size": 768,
38
+ "pooler_num_attention_heads": 12,
39
+ "pooler_num_fc_layers": 3,
40
+ "pooler_size_per_head": 128,
41
+ "pooler_type": "first_token_transform",
42
+ "position_embedding_type": "absolute",
43
+ "torch_dtype": "float32",
44
+ "transformers_version": "4.9.2",
45
+ "type_vocab_size": 2,
46
+ "use_cache": true,
47
+ "vocab_size": 29791
48
+ }
suparkanbun/models/sikuroberta.danku/filesize.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ pytorch_model.bin 433425937
suparkanbun/models/sikuroberta.danku/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38fcddebe6c1d03849a74ca2bcd97c6e33a7b09322ffa88b0ebaf49cf33cf5e6
3
+ size 433425937
suparkanbun/models/sikuroberta.danku/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
suparkanbun/models/sikuroberta.danku/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "special_tokens_map_file": null, "name_or_path": "SIKU-BERT/sikuroberta", "do_basic_tokenize": true, "never_split": null, "tokenizer_class": "BertTokenizer"}
suparkanbun/models/sikuroberta.danku/vocab.txt ADDED
The diff for this file is too large to render. See raw diff