KoichiYasuoka
commited on
Commit
•
1cf9d5f
1
Parent(s):
fa4a248
model improved
Browse files- config.json +3 -25
- pytorch_model.bin +2 -2
- special_tokens_map.json +51 -1
- supar.model +2 -2
- tokenizer.json +2 -1
- tokenizer_config.json +61 -1
config.json
CHANGED
@@ -261,8 +261,7 @@
|
|
261 |
"251": "VERB+PART",
|
262 |
"252": "VERB+PUNCT",
|
263 |
"253": "VERB+VERB",
|
264 |
-
"254": "X"
|
265 |
-
"255": "X+X"
|
266 |
},
|
267 |
"initializer_range": 0.02,
|
268 |
"intermediate_size": 3072,
|
@@ -521,8 +520,7 @@
|
|
521 |
"VERB+PART": 251,
|
522 |
"VERB+PUNCT": 252,
|
523 |
"VERB+VERB": 253,
|
524 |
-
"X": 254
|
525 |
-
"X+X": 255
|
526 |
},
|
527 |
"layer_norm_eps": 1e-07,
|
528 |
"max_position_embeddings": 512,
|
@@ -1384,10 +1382,6 @@
|
|
1384 |
"\u0e42\u0e14\u0e22\u0e40\u0e2a\u0e23\u0e34\u0e21": [
|
1385 |
"\u0e42\u0e14\u0e22",
|
1386 |
"\u0e40\u0e2a\u0e23\u0e34\u0e21"
|
1387 |
-
],
|
1388 |
-
"\u0e42\u0e14\u0e22\u0e40\u0e40\u0e1a\u0e48\u0e07": [
|
1389 |
-
"\u0e42\u0e14\u0e22",
|
1390 |
-
"\u0e40\u0e40\u0e1a\u0e48\u0e07"
|
1391 |
]
|
1392 |
},
|
1393 |
"DET+DET": {
|
@@ -4847,10 +4841,6 @@
|
|
4847 |
"\u0e27\u0e38\u0e12\u0e34\u0e2a\u0e20\u0e32",
|
4848 |
"\u0e1e.\u0e28."
|
4849 |
],
|
4850 |
-
"\u0e2a\u0e38\u0e23\u0e2a\u0e31\u0e08\u0e08\u0e30": [
|
4851 |
-
"\u0e2a\u0e38\u0e23\u0e2a\u0e31\u0e08\u0e08",
|
4852 |
-
"\u0e30"
|
4853 |
-
],
|
4854 |
"\u0e2d.\u0e2d\u0e48\u0e32\u0e27\u0e25\u0e36\u0e01": [
|
4855 |
"\u0e2d.\u0e2d\u0e48\u0e32\u0e27\u0e25\u0e36",
|
4856 |
"\u0e01"
|
@@ -6407,10 +6397,6 @@
|
|
6407 |
"\u0e1a\u0e23\u0e34\u0e01\u0e32\u0e23",
|
6408 |
"\u0e40\u0e0a\u0e37\u0e49\u0e2d"
|
6409 |
],
|
6410 |
-
"\u0e1b\u0e23\u0e30\u0e08\u0e33\u0e17\u0e32\u0e07": [
|
6411 |
-
"\u0e1b\u0e23\u0e30\u0e08",
|
6412 |
-
"\u0e33\u0e17\u0e32\u0e07"
|
6413 |
-
],
|
6414 |
"\u0e1b\u0e23\u0e30\u0e0a\u0e38\u0e21\u0e19\u0e23\u0e32\u0e17\u0e31\u0e28\u0e19\u0e4c": [
|
6415 |
"\u0e1b\u0e23\u0e30\u0e0a\u0e38\u0e21",
|
6416 |
"\u0e19\u0e23\u0e32\u0e17\u0e31\u0e28\u0e19\u0e4c"
|
@@ -6903,10 +6889,6 @@
|
|
6903 |
"\u0e23\u0e31\u0e01\u0e29\u0e32\u0e01\u0e32\u0e23",
|
6904 |
"\u0e40\u0e09\u0e1e\u0e32\u0e30"
|
6905 |
],
|
6906 |
-
"\u0e23\u0e31\u0e1a\u0e1c\u0e34\u0e14\u0e0a\u0e2d\u0e1a": [
|
6907 |
-
"\u0e23",
|
6908 |
-
"\u0e31\u0e1a\u0e1c\u0e34\u0e14\u0e0a\u0e2d\u0e1a"
|
6909 |
-
],
|
6910 |
"\u0e23\u0e39\u0e49\u0e08\u0e31\u0e01\u0e23\u0e30\u0e21\u0e31\u0e14\u0e23\u0e30\u0e27\u0e31\u0e07": [
|
6911 |
"\u0e23\u0e39\u0e49\u0e08\u0e31\u0e01",
|
6912 |
"\u0e23\u0e30\u0e21\u0e31\u0e14\u0e23\u0e30\u0e27\u0e31\u0e07"
|
@@ -7113,17 +7095,13 @@
|
|
7113 |
"\u0e42\u0e19\u0e49\u0e15\u0e1a\u0e38\u0e4a\u0e04\u0e23\u0e38\u0e48\u0e19": [
|
7114 |
"\u0e42\u0e19\u0e49\u0e15\u0e1a\u0e38\u0e4a",
|
7115 |
"\u0e04\u0e23\u0e38\u0e48\u0e19"
|
7116 |
-
],
|
7117 |
-
"\u0e42\u0e1b\u0e23": [
|
7118 |
-
"\u0e42",
|
7119 |
-
"\u0e1b\u0e23"
|
7120 |
]
|
7121 |
}
|
7122 |
}
|
7123 |
},
|
7124 |
"tokenizer_class": "DebertaV2TokenizerFast",
|
7125 |
"torch_dtype": "float32",
|
7126 |
-
"transformers_version": "4.
|
7127 |
"type_vocab_size": 0,
|
7128 |
"vocab_size": 3000
|
7129 |
}
|
|
|
261 |
"251": "VERB+PART",
|
262 |
"252": "VERB+PUNCT",
|
263 |
"253": "VERB+VERB",
|
264 |
+
"254": "X"
|
|
|
265 |
},
|
266 |
"initializer_range": 0.02,
|
267 |
"intermediate_size": 3072,
|
|
|
520 |
"VERB+PART": 251,
|
521 |
"VERB+PUNCT": 252,
|
522 |
"VERB+VERB": 253,
|
523 |
+
"X": 254
|
|
|
524 |
},
|
525 |
"layer_norm_eps": 1e-07,
|
526 |
"max_position_embeddings": 512,
|
|
|
1382 |
"\u0e42\u0e14\u0e22\u0e40\u0e2a\u0e23\u0e34\u0e21": [
|
1383 |
"\u0e42\u0e14\u0e22",
|
1384 |
"\u0e40\u0e2a\u0e23\u0e34\u0e21"
|
|
|
|
|
|
|
|
|
1385 |
]
|
1386 |
},
|
1387 |
"DET+DET": {
|
|
|
4841 |
"\u0e27\u0e38\u0e12\u0e34\u0e2a\u0e20\u0e32",
|
4842 |
"\u0e1e.\u0e28."
|
4843 |
],
|
|
|
|
|
|
|
|
|
4844 |
"\u0e2d.\u0e2d\u0e48\u0e32\u0e27\u0e25\u0e36\u0e01": [
|
4845 |
"\u0e2d.\u0e2d\u0e48\u0e32\u0e27\u0e25\u0e36",
|
4846 |
"\u0e01"
|
|
|
6397 |
"\u0e1a\u0e23\u0e34\u0e01\u0e32\u0e23",
|
6398 |
"\u0e40\u0e0a\u0e37\u0e49\u0e2d"
|
6399 |
],
|
|
|
|
|
|
|
|
|
6400 |
"\u0e1b\u0e23\u0e30\u0e0a\u0e38\u0e21\u0e19\u0e23\u0e32\u0e17\u0e31\u0e28\u0e19\u0e4c": [
|
6401 |
"\u0e1b\u0e23\u0e30\u0e0a\u0e38\u0e21",
|
6402 |
"\u0e19\u0e23\u0e32\u0e17\u0e31\u0e28\u0e19\u0e4c"
|
|
|
6889 |
"\u0e23\u0e31\u0e01\u0e29\u0e32\u0e01\u0e32\u0e23",
|
6890 |
"\u0e40\u0e09\u0e1e\u0e32\u0e30"
|
6891 |
],
|
|
|
|
|
|
|
|
|
6892 |
"\u0e23\u0e39\u0e49\u0e08\u0e31\u0e01\u0e23\u0e30\u0e21\u0e31\u0e14\u0e23\u0e30\u0e27\u0e31\u0e07": [
|
6893 |
"\u0e23\u0e39\u0e49\u0e08\u0e31\u0e01",
|
6894 |
"\u0e23\u0e30\u0e21\u0e31\u0e14\u0e23\u0e30\u0e27\u0e31\u0e07"
|
|
|
7095 |
"\u0e42\u0e19\u0e49\u0e15\u0e1a\u0e38\u0e4a\u0e04\u0e23\u0e38\u0e48\u0e19": [
|
7096 |
"\u0e42\u0e19\u0e49\u0e15\u0e1a\u0e38\u0e4a",
|
7097 |
"\u0e04\u0e23\u0e38\u0e48\u0e19"
|
|
|
|
|
|
|
|
|
7098 |
]
|
7099 |
}
|
7100 |
}
|
7101 |
},
|
7102 |
"tokenizer_class": "DebertaV2TokenizerFast",
|
7103 |
"torch_dtype": "float32",
|
7104 |
+
"transformers_version": "4.40.1",
|
7105 |
"type_vocab_size": 0,
|
7106 |
"vocab_size": 3000
|
7107 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6affcb1836a4840990d2062d60cc3a128e3227d36e1246ad54ab40fc5f0940cf
|
3 |
+
size 351866018
|
special_tokens_map.json
CHANGED
@@ -1 +1,51 @@
|
|
1 |
-
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"cls_token": {
|
10 |
+
"content": "[CLS]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"eos_token": {
|
17 |
+
"content": "[SEP]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"mask_token": {
|
24 |
+
"content": "[MASK]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"pad_token": {
|
31 |
+
"content": "[PAD]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
},
|
37 |
+
"sep_token": {
|
38 |
+
"content": "[SEP]",
|
39 |
+
"lstrip": false,
|
40 |
+
"normalized": false,
|
41 |
+
"rstrip": false,
|
42 |
+
"single_word": false
|
43 |
+
},
|
44 |
+
"unk_token": {
|
45 |
+
"content": "[UNK]",
|
46 |
+
"lstrip": false,
|
47 |
+
"normalized": false,
|
48 |
+
"rstrip": false,
|
49 |
+
"single_word": false
|
50 |
+
}
|
51 |
+
}
|
supar.model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d2776e81c062b841d36b9d422a1ddfd32c79f82659afeed9912f7864ba4cc0c4
|
3 |
+
size 397575682
|
tokenizer.json
CHANGED
@@ -12156,6 +12156,7 @@
|
|
12156 |
"ü",
|
12157 |
-16.64917507077761
|
12158 |
]
|
12159 |
-
]
|
|
|
12160 |
}
|
12161 |
}
|
|
|
12156 |
"ü",
|
12157 |
-16.64917507077761
|
12158 |
]
|
12159 |
+
],
|
12160 |
+
"byte_fallback": false
|
12161 |
}
|
12162 |
}
|
tokenizer_config.json
CHANGED
@@ -1 +1,61 @@
|
|
1 |
-
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[CLS]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"1": {
|
12 |
+
"content": "[PAD]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"2": {
|
20 |
+
"content": "[SEP]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"3": {
|
28 |
+
"content": "[UNK]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"4": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"bos_token": "[CLS]",
|
45 |
+
"clean_up_tokenization_spaces": true,
|
46 |
+
"cls_token": "[CLS]",
|
47 |
+
"do_lower_case": false,
|
48 |
+
"eos_token": "[SEP]",
|
49 |
+
"keep_accents": true,
|
50 |
+
"mask_token": "[MASK]",
|
51 |
+
"max_length": 510,
|
52 |
+
"model_max_length": 512,
|
53 |
+
"pad_token": "[PAD]",
|
54 |
+
"sep_token": "[SEP]",
|
55 |
+
"split_by_punct": true,
|
56 |
+
"stride": 0,
|
57 |
+
"tokenizer_class": "DebertaV2TokenizerFast",
|
58 |
+
"truncation_side": "right",
|
59 |
+
"truncation_strategy": "longest_first",
|
60 |
+
"unk_token": "[UNK]"
|
61 |
+
}
|