KoichiYasuoka commited on
Commit
7c180bc
·
1 Parent(s): ee99341

model improved

Browse files
Files changed (4) hide show
  1. README.md +2 -1
  2. config.json +68 -71
  3. pytorch_model.bin +2 -2
  4. tokenizer.json +0 -0
README.md CHANGED
@@ -11,13 +11,14 @@ pipeline_tag: "token-classification"
11
  widget:
12
  - text: "itak=as awa pon rupne aynu ene itaki"
13
  - text: "イタカㇱ アワ ポン ルㇷ゚ネ アイヌ エネ イタキ"
 
14
  ---
15
 
16
  # deberta-base-ainu-ud-goeswith
17
 
18
  ## Model Description
19
 
20
- This is a DeBERTa(V2) model pre-trained on Ainu texts (both カタカナ and romanized) for POS-tagging and dependency-parsing (using `goeswith` for subwords), derived from [deberta-base-ainu-upos](https://huggingface.co/KoichiYasuoka/deberta-base-ainu-upos).
21
 
22
  ## How to Use
23
 
 
11
  widget:
12
  - text: "itak=as awa pon rupne aynu ene itaki"
13
  - text: "イタカㇱ アワ ポン ルㇷ゚ネ アイヌ エネ イタキ"
14
+ - text: "итак ас ава пон рупне айну ене итакі"
15
  ---
16
 
17
  # deberta-base-ainu-ud-goeswith
18
 
19
  ## Model Description
20
 
21
+ This is a DeBERTa(V2) model pre-trained on Ainu texts (in カタカナ, Roman, and Кириллица) for POS-tagging and dependency-parsing (using `goeswith` for subwords), derived from [deberta-base-ainu-upos](https://huggingface.co/KoichiYasuoka/deberta-base-ainu-upos).
22
 
23
  ## How to Use
24
 
config.json CHANGED
@@ -4,11 +4,6 @@
4
  ],
5
  "attention_probs_dropout_prob": 0.1,
6
  "bos_token_id": 0,
7
- "custom_pipelines": {
8
- "universal-dependencies": {
9
- "impl": "ud.UniversalDependenciesPipeline"
10
- }
11
- },
12
  "eos_token_id": 2,
13
  "hidden_act": "gelu",
14
  "hidden_dropout_prob": 0.1,
@@ -114,39 +109,40 @@
114
  "97": "PROPN|\u56fa\u6709\u540d\u8a5e|nsubj",
115
  "98": "PROPN|\u56fa\u6709\u540d\u8a5e|obl",
116
  "99": "PROPN|\u56fa\u6709\u540d\u8a5e|root",
117
- "100": "PUNCT|\u8a18\u53f7|obj",
118
- "101": "PUNCT|\u8a18\u53f7|punct",
119
- "102": "SCONJ|\u5f8c\u7f6e\u526f\u8a5e|advmod",
120
- "103": "SCONJ|\u5f8c\u7f6e\u526f\u8a5e|case",
121
- "104": "SCONJ|\u5f8c\u7f6e\u526f\u8a5e|mark",
122
- "105": "SCONJ|\u5f8c\u7f6e\u526f\u8a5e|parataxis",
123
- "106": "SCONJ|\u5f8c\u7f6e\u526f\u8a5e|root",
124
- "107": "SCONJ|\u63a5\u7d9a\u52a9\u8a5e|advmod",
125
- "108": "SCONJ|\u63a5\u7d9a\u52a9\u8a5e|case",
126
- "109": "SCONJ|\u63a5\u7d9a\u52a9\u8a5e|cc",
127
- "110": "SCONJ|\u63a5\u7d9a\u52a9\u8a5e|mark",
128
- "111": "SCONJ|\u63a5\u7d9a\u52a9\u8a5e|root",
129
- "112": "SCONJ|\u63a5\u7d9a\u8a5e|case",
130
- "113": "SCONJ|\u63a5\u7d9a\u8a5e|mark",
131
- "114": "VERB|\u4ed6\u52d5\u8a5e|acl",
132
- "115": "VERB|\u4ed6\u52d5\u8a5e|advcl",
133
- "116": "VERB|\u4ed6\u52d5\u8a5e|amod",
134
- "117": "VERB|\u4ed6\u52d5\u8a5e|ccomp",
135
- "118": "VERB|\u4ed6\u52d5\u8a5e|conj",
136
- "119": "VERB|\u4ed6\u52d5\u8a5e|parataxis",
137
- "120": "VERB|\u4ed6\u52d5\u8a5e|root",
138
- "121": "VERB|\u5b8c\u5168\u52d5\u8a5e|acl",
139
- "122": "VERB|\u5b8c\u5168\u52d5\u8a5e|advcl",
140
- "123": "VERB|\u5b8c\u5168\u52d5\u8a5e|parataxis",
141
- "124": "VERB|\u5b8c\u5168\u52d5\u8a5e|root",
142
- "125": "VERB|\u81ea\u52d5\u8a5e|acl",
143
- "126": "VERB|\u81ea\u52d5\u8a5e|advcl",
144
- "127": "VERB|\u81ea\u52d5\u8a5e|amod",
145
- "128": "VERB|\u81ea\u52d5\u8a5e|ccomp",
146
- "129": "VERB|\u81ea\u52d5\u8a5e|conj",
147
- "130": "VERB|\u81ea\u52d5\u8a5e|parataxis",
148
- "131": "VERB|\u81ea\u52d5\u8a5e|root",
149
- "132": "X|_|goeswith"
 
150
  },
151
  "initializer_range": 0.02,
152
  "intermediate_size": 3072,
@@ -251,39 +247,40 @@
251
  "PROPN|\u56fa\u6709\u540d\u8a5e|nsubj": 97,
252
  "PROPN|\u56fa\u6709\u540d\u8a5e|obl": 98,
253
  "PROPN|\u56fa\u6709\u540d\u8a5e|root": 99,
254
- "PUNCT|\u8a18\u53f7|obj": 100,
255
- "PUNCT|\u8a18\u53f7|punct": 101,
256
- "SCONJ|\u5f8c\u7f6e\u526f\u8a5e|advmod": 102,
257
- "SCONJ|\u5f8c\u7f6e\u526f\u8a5e|case": 103,
258
- "SCONJ|\u5f8c\u7f6e\u526f\u8a5e|mark": 104,
259
- "SCONJ|\u5f8c\u7f6e\u526f\u8a5e|parataxis": 105,
260
- "SCONJ|\u5f8c\u7f6e\u526f\u8a5e|root": 106,
261
- "SCONJ|\u63a5\u7d9a\u52a9\u8a5e|advmod": 107,
262
- "SCONJ|\u63a5\u7d9a\u52a9\u8a5e|case": 108,
263
- "SCONJ|\u63a5\u7d9a\u52a9\u8a5e|cc": 109,
264
- "SCONJ|\u63a5\u7d9a\u52a9\u8a5e|mark": 110,
265
- "SCONJ|\u63a5\u7d9a\u52a9\u8a5e|root": 111,
266
- "SCONJ|\u63a5\u7d9a\u8a5e|case": 112,
267
- "SCONJ|\u63a5\u7d9a\u8a5e|mark": 113,
268
- "VERB|\u4ed6\u52d5\u8a5e|acl": 114,
269
- "VERB|\u4ed6\u52d5\u8a5e|advcl": 115,
270
- "VERB|\u4ed6\u52d5\u8a5e|amod": 116,
271
- "VERB|\u4ed6\u52d5\u8a5e|ccomp": 117,
272
- "VERB|\u4ed6\u52d5\u8a5e|conj": 118,
273
- "VERB|\u4ed6\u52d5\u8a5e|parataxis": 119,
274
- "VERB|\u4ed6\u52d5\u8a5e|root": 120,
275
- "VERB|\u5b8c\u5168\u52d5\u8a5e|acl": 121,
276
- "VERB|\u5b8c\u5168\u52d5\u8a5e|advcl": 122,
277
- "VERB|\u5b8c\u5168\u52d5\u8a5e|parataxis": 123,
278
- "VERB|\u5b8c\u5168\u52d5\u8a5e|root": 124,
279
- "VERB|\u81ea\u52d5\u8a5e|acl": 125,
280
- "VERB|\u81ea\u52d5\u8a5e|advcl": 126,
281
- "VERB|\u81ea\u52d5\u8a5e|amod": 127,
282
- "VERB|\u81ea\u52d5\u8a5e|ccomp": 128,
283
- "VERB|\u81ea\u52d5\u8a5e|conj": 129,
284
- "VERB|\u81ea\u52d5\u8a5e|parataxis": 130,
285
- "VERB|\u81ea\u52d5\u8a5e|root": 131,
286
- "X|_|goeswith": 132
 
287
  },
288
  "layer_norm_eps": 1e-07,
289
  "max_position_embeddings": 512,
 
4
  ],
5
  "attention_probs_dropout_prob": 0.1,
6
  "bos_token_id": 0,
 
 
 
 
 
7
  "eos_token_id": 2,
8
  "hidden_act": "gelu",
9
  "hidden_dropout_prob": 0.1,
 
109
  "97": "PROPN|\u56fa\u6709\u540d\u8a5e|nsubj",
110
  "98": "PROPN|\u56fa\u6709\u540d\u8a5e|obl",
111
  "99": "PROPN|\u56fa\u6709\u540d\u8a5e|root",
112
+ "100": "PUNCT|\u8a18\u53f7|nmod",
113
+ "101": "PUNCT|\u8a18\u53f7|obj",
114
+ "102": "PUNCT|\u8a18\u53f7|punct",
115
+ "103": "SCONJ|\u5f8c\u7f6e\u526f\u8a5e|advmod",
116
+ "104": "SCONJ|\u5f8c\u7f6e\u526f\u8a5e|case",
117
+ "105": "SCONJ|\u5f8c\u7f6e\u526f\u8a5e|mark",
118
+ "106": "SCONJ|\u5f8c\u7f6e\u526f\u8a5e|parataxis",
119
+ "107": "SCONJ|\u5f8c\u7f6e\u526f\u8a5e|root",
120
+ "108": "SCONJ|\u63a5\u7d9a\u52a9\u8a5e|advmod",
121
+ "109": "SCONJ|\u63a5\u7d9a\u52a9\u8a5e|case",
122
+ "110": "SCONJ|\u63a5\u7d9a\u52a9\u8a5e|cc",
123
+ "111": "SCONJ|\u63a5\u7d9a\u52a9\u8a5e|mark",
124
+ "112": "SCONJ|\u63a5\u7d9a\u52a9\u8a5e|root",
125
+ "113": "SCONJ|\u63a5\u7d9a\u8a5e|case",
126
+ "114": "SCONJ|\u63a5\u7d9a\u8a5e|mark",
127
+ "115": "VERB|\u4ed6\u52d5\u8a5e|acl",
128
+ "116": "VERB|\u4ed6\u52d5\u8a5e|advcl",
129
+ "117": "VERB|\u4ed6\u52d5\u8a5e|amod",
130
+ "118": "VERB|\u4ed6\u52d5\u8a5e|ccomp",
131
+ "119": "VERB|\u4ed6\u52d5\u8a5e|conj",
132
+ "120": "VERB|\u4ed6\u52d5\u8a5e|parataxis",
133
+ "121": "VERB|\u4ed6\u52d5\u8a5e|root",
134
+ "122": "VERB|\u5b8c\u5168\u52d5\u8a5e|acl",
135
+ "123": "VERB|\u5b8c\u5168\u52d5\u8a5e|advcl",
136
+ "124": "VERB|\u5b8c\u5168\u52d5\u8a5e|parataxis",
137
+ "125": "VERB|\u5b8c\u5168\u52d5\u8a5e|root",
138
+ "126": "VERB|\u81ea\u52d5\u8a5e|acl",
139
+ "127": "VERB|\u81ea\u52d5\u8a5e|advcl",
140
+ "128": "VERB|\u81ea\u52d5\u8a5e|amod",
141
+ "129": "VERB|\u81ea\u52d5\u8a5e|ccomp",
142
+ "130": "VERB|\u81ea\u52d5\u8a5e|conj",
143
+ "131": "VERB|\u81ea\u52d5\u8a5e|parataxis",
144
+ "132": "VERB|\u81ea\u52d5\u8a5e|root",
145
+ "133": "X|_|goeswith"
146
  },
147
  "initializer_range": 0.02,
148
  "intermediate_size": 3072,
 
247
  "PROPN|\u56fa\u6709\u540d\u8a5e|nsubj": 97,
248
  "PROPN|\u56fa\u6709\u540d\u8a5e|obl": 98,
249
  "PROPN|\u56fa\u6709\u540d\u8a5e|root": 99,
250
+ "PUNCT|\u8a18\u53f7|nmod": 100,
251
+ "PUNCT|\u8a18\u53f7|obj": 101,
252
+ "PUNCT|\u8a18\u53f7|punct": 102,
253
+ "SCONJ|\u5f8c\u7f6e\u526f\u8a5e|advmod": 103,
254
+ "SCONJ|\u5f8c\u7f6e\u526f\u8a5e|case": 104,
255
+ "SCONJ|\u5f8c\u7f6e\u526f\u8a5e|mark": 105,
256
+ "SCONJ|\u5f8c\u7f6e\u526f\u8a5e|parataxis": 106,
257
+ "SCONJ|\u5f8c\u7f6e\u526f\u8a5e|root": 107,
258
+ "SCONJ|\u63a5\u7d9a\u52a9\u8a5e|advmod": 108,
259
+ "SCONJ|\u63a5\u7d9a\u52a9\u8a5e|case": 109,
260
+ "SCONJ|\u63a5\u7d9a\u52a9\u8a5e|cc": 110,
261
+ "SCONJ|\u63a5\u7d9a\u52a9\u8a5e|mark": 111,
262
+ "SCONJ|\u63a5\u7d9a\u52a9\u8a5e|root": 112,
263
+ "SCONJ|\u63a5\u7d9a\u8a5e|case": 113,
264
+ "SCONJ|\u63a5\u7d9a\u8a5e|mark": 114,
265
+ "VERB|\u4ed6\u52d5\u8a5e|acl": 115,
266
+ "VERB|\u4ed6\u52d5\u8a5e|advcl": 116,
267
+ "VERB|\u4ed6\u52d5\u8a5e|amod": 117,
268
+ "VERB|\u4ed6\u52d5\u8a5e|ccomp": 118,
269
+ "VERB|\u4ed6\u52d5\u8a5e|conj": 119,
270
+ "VERB|\u4ed6\u52d5\u8a5e|parataxis": 120,
271
+ "VERB|\u4ed6\u52d5\u8a5e|root": 121,
272
+ "VERB|\u5b8c\u5168\u52d5\u8a5e|acl": 122,
273
+ "VERB|\u5b8c\u5168\u52d5\u8a5e|advcl": 123,
274
+ "VERB|\u5b8c\u5168\u52d5\u8a5e|parataxis": 124,
275
+ "VERB|\u5b8c\u5168\u52d5\u8a5e|root": 125,
276
+ "VERB|\u81ea\u52d5\u8a5e|acl": 126,
277
+ "VERB|\u81ea\u52d5\u8a5e|advcl": 127,
278
+ "VERB|\u81ea\u52d5\u8a5e|amod": 128,
279
+ "VERB|\u81ea\u52d5\u8a5e|ccomp": 129,
280
+ "VERB|\u81ea\u52d5\u8a5e|conj": 130,
281
+ "VERB|\u81ea\u52d5\u8a5e|parataxis": 131,
282
+ "VERB|\u81ea\u52d5\u8a5e|root": 132,
283
+ "X|_|goeswith": 133
284
  },
285
  "layer_norm_eps": 1e-07,
286
  "max_position_embeddings": 512,
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54bdaa89235af378027f95f8042cad457abc25d946efc636ee064e3ef7ca69ca
3
- size 419434771
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3685debfcb2b8c74abcf2c05dfebdb9a12428212ec98a7adf9841d12d79096ff
3
+ size 419437843
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff