ctaguchi commited on
Commit
3120d30
1 Parent(s): 2ed5167

Upload tokenizer

Browse files
added_tokens.json CHANGED
@@ -1,4 +1,6 @@
1
  {
 
 
2
  "ã": 6398,
3
  "ãː": 6400,
4
  "ãˤ": 6408,
@@ -50,7 +52,6 @@
50
  "ɒ̃ː": 6422,
51
  "ɒ̃ˤ": 6418,
52
  "ɒ̰̃": 6463,
53
- "ɒ̤ˠ": 6368,
54
  "ɔ̃": 6390,
55
  "ɔ̃ː": 6392,
56
  "ɔ̃ˤ": 6414,
@@ -98,7 +99,6 @@
98
  "ɶ̃ː": 6403,
99
  "ɶ̃ˤ": 6420,
100
  "ɶ̰̃": 6462,
101
- "ɶ̤ˠ": 6369,
102
  "ɺ̩": 6479,
103
  "ɽ̩": 6472,
104
  "ɾ̩": 6474,
 
1
  {
2
+ "[PAD]": 6369,
3
+ "[UNK]": 6368,
4
  "ã": 6398,
5
  "ãː": 6400,
6
  "ãˤ": 6408,
 
52
  "ɒ̃ː": 6422,
53
  "ɒ̃ˤ": 6418,
54
  "ɒ̰̃": 6463,
 
55
  "ɔ̃": 6390,
56
  "ɔ̃ː": 6392,
57
  "ɔ̃ˤ": 6414,
 
99
  "ɶ̃ː": 6403,
100
  "ɶ̃ˤ": 6420,
101
  "ɶ̰̃": 6462,
 
102
  "ɺ̩": 6479,
103
  "ɽ̩": 6472,
104
  "ɾ̩": 6474,
special_tokens_map.json CHANGED
@@ -1,18 +1,6 @@
1
  {
2
  "bos_token": "<s>",
3
  "eos_token": "</s>",
4
- "pad_token": {
5
- "content": "[PAD]",
6
- "lstrip": true,
7
- "normalized": false,
8
- "rstrip": true,
9
- "single_word": false
10
- },
11
- "unk_token": {
12
- "content": "[UNK]",
13
- "lstrip": true,
14
- "normalized": false,
15
- "rstrip": true,
16
- "single_word": false
17
- }
18
  }
 
1
  {
2
  "bos_token": "<s>",
3
  "eos_token": "</s>",
4
+ "pad_token": "[PAD]",
5
+ "unk_token": "[UNK]"
 
 
 
 
 
 
 
 
 
 
 
 
6
  }
tokenizer_config.json CHANGED
@@ -49121,7 +49121,7 @@
49121
  "special": false
49122
  },
49123
  "6368": {
49124
- "content": "ɒ̤ˠ",
49125
  "lstrip": true,
49126
  "normalized": false,
49127
  "rstrip": true,
@@ -49129,7 +49129,7 @@
49129
  "special": false
49130
  },
49131
  "6369": {
49132
- "content": "ɶ̤ˠ",
49133
  "lstrip": true,
49134
  "normalized": false,
49135
  "rstrip": true,
 
49121
  "special": false
49122
  },
49123
  "6368": {
49124
+ "content": "[UNK]",
49125
  "lstrip": true,
49126
  "normalized": false,
49127
  "rstrip": true,
 
49129
  "special": false
49130
  },
49131
  "6369": {
49132
+ "content": "[PAD]",
49133
  "lstrip": true,
49134
  "normalized": false,
49135
  "rstrip": true,
vocab.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "[PAD]": 6368,
3
- "[UNK]": 6367,
4
  "a": 5711,
5
  "aˀ": 6159,
6
  "aː": 5729,
@@ -2587,7 +2587,7 @@
2587
  "z̺": 2666,
2588
  "z̻": 2692,
2589
  "zᶣ": 2928,
2590
- "|": 6369,
2591
  "æ": 5712,
2592
  "æˀ": 6160,
2593
  "æː": 5730,
 
1
  {
2
+ "[PAD]": 6369,
3
+ "[UNK]": 6368,
4
  "a": 5711,
5
  "aˀ": 6159,
6
  "aː": 5729,
 
2587
  "z̺": 2666,
2588
  "z̻": 2692,
2589
  "zᶣ": 2928,
2590
+ "|": 6367,
2591
  "æ": 5712,
2592
  "æˀ": 6160,
2593
  "æː": 5730,