ctaguchi commited on
Commit
2ed5167
1 Parent(s): ca63912

Upload tokenizer

Browse files
added_tokens.json CHANGED
@@ -1,6 +1,4 @@
1
  {
2
- "[PAD]": 6368,
3
- "[UNK]": 6367,
4
  "ã": 6398,
5
  "ãː": 6400,
6
  "ãˤ": 6408,
@@ -52,6 +50,7 @@
52
  "ɒ̃ː": 6422,
53
  "ɒ̃ˤ": 6418,
54
  "ɒ̰̃": 6463,
 
55
  "ɔ̃": 6390,
56
  "ɔ̃ː": 6392,
57
  "ɔ̃ˤ": 6414,
@@ -117,6 +116,7 @@
117
  "ʏ̃ː": 6428,
118
  "ʏ̃ˤ": 6444,
119
  "ʏ̰̃": 6466,
 
120
  "ʙ̩": 6476,
121
  "ʟ̩": 6481
122
  }
 
1
  {
 
 
2
  "ã": 6398,
3
  "ãː": 6400,
4
  "ãˤ": 6408,
 
50
  "ɒ̃ː": 6422,
51
  "ɒ̃ˤ": 6418,
52
  "ɒ̰̃": 6463,
53
+ "ɒ̤ˠ": 6368,
54
  "ɔ̃": 6390,
55
  "ɔ̃ː": 6392,
56
  "ɔ̃ˤ": 6414,
 
116
  "ʏ̃ː": 6428,
117
  "ʏ̃ˤ": 6444,
118
  "ʏ̰̃": 6466,
119
+ "ʏ̤ˤ": 6367,
120
  "ʙ̩": 6476,
121
  "ʟ̩": 6481
122
  }
special_tokens_map.json CHANGED
@@ -1,6 +1,18 @@
1
  {
2
  "bos_token": "<s>",
3
  "eos_token": "</s>",
4
- "pad_token": "[PAD]",
5
- "unk_token": "[UNK]"
 
 
 
 
 
 
 
 
 
 
 
 
6
  }
 
1
  {
2
  "bos_token": "<s>",
3
  "eos_token": "</s>",
4
+ "pad_token": {
5
+ "content": "[PAD]",
6
+ "lstrip": true,
7
+ "normalized": false,
8
+ "rstrip": true,
9
+ "single_word": false
10
+ },
11
+ "unk_token": {
12
+ "content": "[UNK]",
13
+ "lstrip": true,
14
+ "normalized": false,
15
+ "rstrip": true,
16
+ "single_word": false
17
+ }
18
  }
tokenizer_config.json CHANGED
@@ -49113,7 +49113,7 @@
49113
  "special": false
49114
  },
49115
  "6367": {
49116
- "content": "[UNK]",
49117
  "lstrip": true,
49118
  "normalized": false,
49119
  "rstrip": true,
@@ -49121,7 +49121,7 @@
49121
  "special": false
49122
  },
49123
  "6368": {
49124
- "content": "[PAD]",
49125
  "lstrip": true,
49126
  "normalized": false,
49127
  "rstrip": true,
 
49113
  "special": false
49114
  },
49115
  "6367": {
49116
+ "content": "ʏ̤ˤ",
49117
  "lstrip": true,
49118
  "normalized": false,
49119
  "rstrip": true,
 
49121
  "special": false
49122
  },
49123
  "6368": {
49124
+ "content": "ɒ̤ˠ",
49125
  "lstrip": true,
49126
  "normalized": false,
49127
  "rstrip": true,