KoichiYasuoka commited on
Commit
1cf9d5f
1 Parent(s): fa4a248

model improved

Browse files
config.json CHANGED
@@ -261,8 +261,7 @@
261
  "251": "VERB+PART",
262
  "252": "VERB+PUNCT",
263
  "253": "VERB+VERB",
264
- "254": "X",
265
- "255": "X+X"
266
  },
267
  "initializer_range": 0.02,
268
  "intermediate_size": 3072,
@@ -521,8 +520,7 @@
521
  "VERB+PART": 251,
522
  "VERB+PUNCT": 252,
523
  "VERB+VERB": 253,
524
- "X": 254,
525
- "X+X": 255
526
  },
527
  "layer_norm_eps": 1e-07,
528
  "max_position_embeddings": 512,
@@ -1384,10 +1382,6 @@
1384
  "\u0e42\u0e14\u0e22\u0e40\u0e2a\u0e23\u0e34\u0e21": [
1385
  "\u0e42\u0e14\u0e22",
1386
  "\u0e40\u0e2a\u0e23\u0e34\u0e21"
1387
- ],
1388
- "\u0e42\u0e14\u0e22\u0e40\u0e40\u0e1a\u0e48\u0e07": [
1389
- "\u0e42\u0e14\u0e22",
1390
- "\u0e40\u0e40\u0e1a\u0e48\u0e07"
1391
  ]
1392
  },
1393
  "DET+DET": {
@@ -4847,10 +4841,6 @@
4847
  "\u0e27\u0e38\u0e12\u0e34\u0e2a\u0e20\u0e32",
4848
  "\u0e1e.\u0e28."
4849
  ],
4850
- "\u0e2a\u0e38\u0e23\u0e2a\u0e31\u0e08\u0e08\u0e30": [
4851
- "\u0e2a\u0e38\u0e23\u0e2a\u0e31\u0e08\u0e08",
4852
- "\u0e30"
4853
- ],
4854
  "\u0e2d.\u0e2d\u0e48\u0e32\u0e27\u0e25\u0e36\u0e01": [
4855
  "\u0e2d.\u0e2d\u0e48\u0e32\u0e27\u0e25\u0e36",
4856
  "\u0e01"
@@ -6407,10 +6397,6 @@
6407
  "\u0e1a\u0e23\u0e34\u0e01\u0e32\u0e23",
6408
  "\u0e40\u0e0a\u0e37\u0e49\u0e2d"
6409
  ],
6410
- "\u0e1b\u0e23\u0e30\u0e08\u0e33\u0e17\u0e32\u0e07": [
6411
- "\u0e1b\u0e23\u0e30\u0e08",
6412
- "\u0e33\u0e17\u0e32\u0e07"
6413
- ],
6414
  "\u0e1b\u0e23\u0e30\u0e0a\u0e38\u0e21\u0e19\u0e23\u0e32\u0e17\u0e31\u0e28\u0e19\u0e4c": [
6415
  "\u0e1b\u0e23\u0e30\u0e0a\u0e38\u0e21",
6416
  "\u0e19\u0e23\u0e32\u0e17\u0e31\u0e28\u0e19\u0e4c"
@@ -6903,10 +6889,6 @@
6903
  "\u0e23\u0e31\u0e01\u0e29\u0e32\u0e01\u0e32\u0e23",
6904
  "\u0e40\u0e09\u0e1e\u0e32\u0e30"
6905
  ],
6906
- "\u0e23\u0e31\u0e1a\u0e1c\u0e34\u0e14\u0e0a\u0e2d\u0e1a": [
6907
- "\u0e23",
6908
- "\u0e31\u0e1a\u0e1c\u0e34\u0e14\u0e0a\u0e2d\u0e1a"
6909
- ],
6910
  "\u0e23\u0e39\u0e49\u0e08\u0e31\u0e01\u0e23\u0e30\u0e21\u0e31\u0e14\u0e23\u0e30\u0e27\u0e31\u0e07": [
6911
  "\u0e23\u0e39\u0e49\u0e08\u0e31\u0e01",
6912
  "\u0e23\u0e30\u0e21\u0e31\u0e14\u0e23\u0e30\u0e27\u0e31\u0e07"
@@ -7113,17 +7095,13 @@
7113
  "\u0e42\u0e19\u0e49\u0e15\u0e1a\u0e38\u0e4a\u0e04\u0e23\u0e38\u0e48\u0e19": [
7114
  "\u0e42\u0e19\u0e49\u0e15\u0e1a\u0e38\u0e4a",
7115
  "\u0e04\u0e23\u0e38\u0e48\u0e19"
7116
- ],
7117
- "\u0e42\u0e1b\u0e23": [
7118
- "\u0e42",
7119
- "\u0e1b\u0e23"
7120
  ]
7121
  }
7122
  }
7123
  },
7124
  "tokenizer_class": "DebertaV2TokenizerFast",
7125
  "torch_dtype": "float32",
7126
- "transformers_version": "4.19.2",
7127
  "type_vocab_size": 0,
7128
  "vocab_size": 3000
7129
  }
 
261
  "251": "VERB+PART",
262
  "252": "VERB+PUNCT",
263
  "253": "VERB+VERB",
264
+ "254": "X"
 
265
  },
266
  "initializer_range": 0.02,
267
  "intermediate_size": 3072,
 
520
  "VERB+PART": 251,
521
  "VERB+PUNCT": 252,
522
  "VERB+VERB": 253,
523
+ "X": 254
 
524
  },
525
  "layer_norm_eps": 1e-07,
526
  "max_position_embeddings": 512,
 
1382
  "\u0e42\u0e14\u0e22\u0e40\u0e2a\u0e23\u0e34\u0e21": [
1383
  "\u0e42\u0e14\u0e22",
1384
  "\u0e40\u0e2a\u0e23\u0e34\u0e21"
 
 
 
 
1385
  ]
1386
  },
1387
  "DET+DET": {
 
4841
  "\u0e27\u0e38\u0e12\u0e34\u0e2a\u0e20\u0e32",
4842
  "\u0e1e.\u0e28."
4843
  ],
 
 
 
 
4844
  "\u0e2d.\u0e2d\u0e48\u0e32\u0e27\u0e25\u0e36\u0e01": [
4845
  "\u0e2d.\u0e2d\u0e48\u0e32\u0e27\u0e25\u0e36",
4846
  "\u0e01"
 
6397
  "\u0e1a\u0e23\u0e34\u0e01\u0e32\u0e23",
6398
  "\u0e40\u0e0a\u0e37\u0e49\u0e2d"
6399
  ],
 
 
 
 
6400
  "\u0e1b\u0e23\u0e30\u0e0a\u0e38\u0e21\u0e19\u0e23\u0e32\u0e17\u0e31\u0e28\u0e19\u0e4c": [
6401
  "\u0e1b\u0e23\u0e30\u0e0a\u0e38\u0e21",
6402
  "\u0e19\u0e23\u0e32\u0e17\u0e31\u0e28\u0e19\u0e4c"
 
6889
  "\u0e23\u0e31\u0e01\u0e29\u0e32\u0e01\u0e32\u0e23",
6890
  "\u0e40\u0e09\u0e1e\u0e32\u0e30"
6891
  ],
 
 
 
 
6892
  "\u0e23\u0e39\u0e49\u0e08\u0e31\u0e01\u0e23\u0e30\u0e21\u0e31\u0e14\u0e23\u0e30\u0e27\u0e31\u0e07": [
6893
  "\u0e23\u0e39\u0e49\u0e08\u0e31\u0e01",
6894
  "\u0e23\u0e30\u0e21\u0e31\u0e14\u0e23\u0e30\u0e27\u0e31\u0e07"
 
7095
  "\u0e42\u0e19\u0e49\u0e15\u0e1a\u0e38\u0e4a\u0e04\u0e23\u0e38\u0e48\u0e19": [
7096
  "\u0e42\u0e19\u0e49\u0e15\u0e1a\u0e38\u0e4a",
7097
  "\u0e04\u0e23\u0e38\u0e48\u0e19"
 
 
 
 
7098
  ]
7099
  }
7100
  }
7101
  },
7102
  "tokenizer_class": "DebertaV2TokenizerFast",
7103
  "torch_dtype": "float32",
7104
+ "transformers_version": "4.40.1",
7105
  "type_vocab_size": 0,
7106
  "vocab_size": 3000
7107
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b4df0a015f13d3d01f865929497bed7bc19ebceb23cba209343750ccda3dafb
3
- size 351871347
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6affcb1836a4840990d2062d60cc3a128e3227d36e1246ad54ab40fc5f0940cf
3
+ size 351866018
special_tokens_map.json CHANGED
@@ -1 +1,51 @@
1
- {"bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "[CLS]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "[SEP]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "[MASK]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "[PAD]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "[SEP]",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "[UNK]",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
supar.model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4bba8e755ff6cfa5ac3dcfd17e1d95b7c9228186a1141ff7c8afc592f5d73a9
3
- size 397593643
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2776e81c062b841d36b9d422a1ddfd32c79f82659afeed9912f7864ba4cc0c4
3
+ size 397575682
tokenizer.json CHANGED
@@ -12156,6 +12156,7 @@
12156
  "ü",
12157
  -16.64917507077761
12158
  ]
12159
- ]
 
12160
  }
12161
  }
 
12156
  "ü",
12157
  -16.64917507077761
12158
  ]
12159
+ ],
12160
+ "byte_fallback": false
12161
  }
12162
  }
tokenizer_config.json CHANGED
@@ -1 +1,61 @@
1
- {"do_lower_case": false, "bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "split_by_punct": true, "keep_accents": true, "model_max_length": 512, "tokenizer_class": "DebertaV2TokenizerFast"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[CLS]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[PAD]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[UNK]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "[CLS]",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "[CLS]",
47
+ "do_lower_case": false,
48
+ "eos_token": "[SEP]",
49
+ "keep_accents": true,
50
+ "mask_token": "[MASK]",
51
+ "max_length": 510,
52
+ "model_max_length": 512,
53
+ "pad_token": "[PAD]",
54
+ "sep_token": "[SEP]",
55
+ "split_by_punct": true,
56
+ "stride": 0,
57
+ "tokenizer_class": "DebertaV2TokenizerFast",
58
+ "truncation_side": "right",
59
+ "truncation_strategy": "longest_first",
60
+ "unk_token": "[UNK]"
61
+ }