Plachta commited on
Commit
52e32c0
1 Parent(s): 39af5eb

updated requirements

Browse files
utils/__init__.py CHANGED
@@ -1,5 +1,11 @@
1
  import torch
2
  import torch.nn as nn
 
 
 
 
 
 
3
 
4
 
5
  class Transpose(nn.Identity):
 
1
  import torch
2
  import torch.nn as nn
3
+ # from icefall.utils import make_pad_mask
4
+
5
+ from .symbol_table import SymbolTable
6
+
7
+ # make_pad_mask = make_pad_mask
8
+ SymbolTable = SymbolTable
9
 
10
 
11
  class Transpose(nn.Identity):
utils/g2p/__init__.py CHANGED
@@ -14,14 +14,15 @@ class PhonemeBpeTokenizer:
14
 
15
  def tokenize(self, text):
16
  # 1. convert text to phoneme
17
- phonemes = _clean_text(text, ['cje_cleaners'])
18
  # 2. replace blank space " " with "_"
19
  phonemes = phonemes.replace(" ", "_")
20
  # 3. tokenize phonemes
21
  phoneme_tokens = self.tokenizer.encode(phonemes).ids
 
22
  if not len(phoneme_tokens):
23
- phoneme_tokens = self.tokenizer.encode(text).ids
24
- return phoneme_tokens
25
 
26
  def text_to_sequence(text, cleaner_names):
27
  '''Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
@@ -67,5 +68,5 @@ def _clean_text(text, cleaner_names):
67
  cleaner = getattr(cleaners, name)
68
  if not cleaner:
69
  raise Exception('Unknown cleaner: %s' % name)
70
- text = cleaner(text)
71
- return text
 
14
 
15
  def tokenize(self, text):
16
  # 1. convert text to phoneme
17
+ phonemes, langs = _clean_text(text, ['cje_cleaners'])
18
  # 2. replace blank space " " with "_"
19
  phonemes = phonemes.replace(" ", "_")
20
  # 3. tokenize phonemes
21
  phoneme_tokens = self.tokenizer.encode(phonemes).ids
22
+ assert(len(phoneme_tokens) == len(langs))
23
  if not len(phoneme_tokens):
24
+ raise ValueError("Empty text is given")
25
+ return phoneme_tokens, langs
26
 
27
  def text_to_sequence(text, cleaner_names):
28
  '''Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
 
68
  cleaner = getattr(cleaners, name)
69
  if not cleaner:
70
  raise Exception('Unknown cleaner: %s' % name)
71
+ text, langs = cleaner(text)
72
+ return text, langs
utils/g2p/bpe_1024.json ADDED
@@ -0,0 +1,2049 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "content": "[UNK]",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 1,
17
+ "content": "[CLS]",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 2,
26
+ "content": "[SEP]",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ },
33
+ {
34
+ "id": 3,
35
+ "content": "[PAD]",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
+ },
42
+ {
43
+ "id": 4,
44
+ "content": "[MASK]",
45
+ "single_word": false,
46
+ "lstrip": false,
47
+ "rstrip": false,
48
+ "normalized": false,
49
+ "special": true
50
+ }
51
+ ],
52
+ "normalizer": null,
53
+ "pre_tokenizer": {
54
+ "type": "Whitespace"
55
+ },
56
+ "post_processor": null,
57
+ "decoder": null,
58
+ "model": {
59
+ "type": "BPE",
60
+ "dropout": null,
61
+ "unk_token": "[UNK]",
62
+ "continuing_subword_prefix": null,
63
+ "end_of_word_suffix": null,
64
+ "fuse_unk": false,
65
+ "byte_fallback": false,
66
+ "vocab": {
67
+ "[UNK]": 0,
68
+ "[CLS]": 1,
69
+ "[SEP]": 2,
70
+ "[PAD]": 3,
71
+ "[MASK]": 4,
72
+ "!": 5,
73
+ "#": 6,
74
+ "*": 7,
75
+ ",": 8,
76
+ "-": 9,
77
+ ".": 10,
78
+ "=": 11,
79
+ "?": 12,
80
+ "N": 13,
81
+ "Q": 14,
82
+ "^": 15,
83
+ "_": 16,
84
+ "`": 17,
85
+ "a": 18,
86
+ "b": 19,
87
+ "d": 20,
88
+ "e": 21,
89
+ "f": 22,
90
+ "g": 23,
91
+ "h": 24,
92
+ "i": 25,
93
+ "j": 26,
94
+ "k": 27,
95
+ "l": 28,
96
+ "m": 29,
97
+ "n": 30,
98
+ "o": 31,
99
+ "p": 32,
100
+ "s": 33,
101
+ "t": 34,
102
+ "u": 35,
103
+ "v": 36,
104
+ "w": 37,
105
+ "x": 38,
106
+ "y": 39,
107
+ "z": 40,
108
+ "~": 41,
109
+ "æ": 42,
110
+ "ç": 43,
111
+ "ð": 44,
112
+ "ŋ": 45,
113
+ "ɑ": 46,
114
+ "ɔ": 47,
115
+ "ə": 48,
116
+ "ɛ": 49,
117
+ "ɥ": 50,
118
+ "ɪ": 51,
119
+ "ɫ": 52,
120
+ "ɯ": 53,
121
+ "ɸ": 54,
122
+ "ɹ": 55,
123
+ "ɾ": 56,
124
+ "ʃ": 57,
125
+ "ʊ": 58,
126
+ "ʑ": 59,
127
+ "ʒ": 60,
128
+ "ʰ": 61,
129
+ "ˈ": 62,
130
+ "ˌ": 63,
131
+ "θ": 64,
132
+ "…": 65,
133
+ "⁼": 66,
134
+ "↑": 67,
135
+ "→": 68,
136
+ "↓": 69,
137
+ "_t": 70,
138
+ "↓↑": 71,
139
+ "_ˈ": 72,
140
+ "ən": 73,
141
+ "_s": 74,
142
+ "aɪ": 75,
143
+ "əɹ": 76,
144
+ "eɪ": 77,
145
+ "oʊ": 78,
146
+ "_k": 79,
147
+ "ʃi": 80,
148
+ "_w": 81,
149
+ "_ð": 82,
150
+ "ts": 83,
151
+ "tʃ": 84,
152
+ "_ts": 85,
153
+ "_h": 86,
154
+ "_ə": 87,
155
+ "_m": 88,
156
+ "an": 89,
157
+ "_n": 90,
158
+ "_ðə": 91,
159
+ "ɛn": 92,
160
+ "ɑʊ": 93,
161
+ "ɑŋ": 94,
162
+ "`⁼": 95,
163
+ "_p": 96,
164
+ "_i": 97,
165
+ "_ɪ": 98,
166
+ "_tʃ": 99,
167
+ "_l": 100,
168
+ "jɛn": 101,
169
+ "_d": 102,
170
+ "_f": 103,
171
+ "_j": 104,
172
+ "wo": 105,
173
+ "_b": 106,
174
+ "ta": 107,
175
+ "`↓": 108,
176
+ "te": 109,
177
+ "ənd": 110,
178
+ "_ʃi": 111,
179
+ "wa": 112,
180
+ "ka": 113,
181
+ "ɪŋ": 114,
182
+ "in": 115,
183
+ "st": 116,
184
+ "li": 117,
185
+ "ʊŋ": 118,
186
+ "_tɪ": 119,
187
+ "to": 120,
188
+ "weɪ": 121,
189
+ "_ənd": 122,
190
+ "ʰi": 123,
191
+ "_əv": 124,
192
+ "əŋ": 125,
193
+ "no": 126,
194
+ "_x": 127,
195
+ "ɾɯ": 128,
196
+ "na": 129,
197
+ "_a": 130,
198
+ "_ɹ": 131,
199
+ "ɪn": 132,
200
+ "ga": 133,
201
+ "de": 134,
202
+ "joʊ": 135,
203
+ "æn": 136,
204
+ "kɯ": 137,
205
+ "ɾe": 138,
206
+ "ma": 139,
207
+ "_ðə_ˈ": 140,
208
+ "ɾa": 141,
209
+ "ɛɹ": 142,
210
+ "mo": 143,
211
+ "ɔɹ": 144,
212
+ "əɫ": 145,
213
+ "_g": 146,
214
+ "da": 147,
215
+ "*↑": 148,
216
+ "ɪˈ": 149,
217
+ "_o": 150,
218
+ "_ʃ": 151,
219
+ "iŋ": 152,
220
+ "ja": 153,
221
+ "əm": 154,
222
+ "_ˌ": 155,
223
+ "aʊ": 156,
224
+ "_əˈ": 157,
225
+ "`↑": 158,
226
+ "ət": 159,
227
+ "_aɪ": 160,
228
+ "oo": 161,
229
+ "sɯ": 162,
230
+ "↓.": 163,
231
+ "_ɪn": 164,
232
+ "_hi": 165,
233
+ "_wɪ": 166,
234
+ "ɪz": 167,
235
+ "_na": 168,
236
+ "wan": 169,
237
+ "_ko": 170,
238
+ "_wo": 171,
239
+ "ɪd": 172,
240
+ "ɾi": 173,
241
+ "_ju": 174,
242
+ "mə": 175,
243
+ "_lə": 176,
244
+ "_hæ": 177,
245
+ "_ðət": 178,
246
+ "ɑɹ": 179,
247
+ "tʰ": 180,
248
+ "ki": 181,
249
+ "……": 182,
250
+ "ɑz": 183,
251
+ "_ɔ": 184,
252
+ "_mi": 185,
253
+ "_wɑz": 186,
254
+ "_ˈs": 187,
255
+ "↓,": 188,
256
+ "_tʰ": 189,
257
+ "əˈ": 190,
258
+ "dʑ": 191,
259
+ "ɪt": 192,
260
+ "_kʰ": 193,
261
+ "iɛ": 194,
262
+ "_ma": 195,
263
+ "ɪs": 196,
264
+ "tsɯ": 197,
265
+ "_ni": 198,
266
+ "_ɪt": 199,
267
+ "ke": 200,
268
+ "iɑʊ": 201,
269
+ "_ka": 202,
270
+ "_əɹ": 203,
271
+ "nd": 204,
272
+ "_ˈp": 205,
273
+ "ko": 206,
274
+ "jo": 207,
275
+ "ɹi": 208,
276
+ "mən": 209,
277
+ "ʊd": 210,
278
+ "_ˈm": 211,
279
+ "_fəɹ": 212,
280
+ "tʃʰi": 213,
281
+ "sa": 214,
282
+ "ʰɥ": 215,
283
+ "kʰ": 216,
284
+ "ˈs": 217,
285
+ "ɑt": 218,
286
+ "ɛd": 219,
287
+ "se": 220,
288
+ "tʃi": 221,
289
+ "ɛɫ": 222,
290
+ "_ˈk": 223,
291
+ "_joʊ": 224,
292
+ "təɹ": 225,
293
+ "ɛz": 226,
294
+ "--": 227,
295
+ "vəɹ": 228,
296
+ "`→": 229,
297
+ "ʃən": 230,
298
+ "_ɪz": 231,
299
+ "_meɪ": 232,
300
+ "_æ": 233,
301
+ "dʒ": 234,
302
+ "_ki": 235,
303
+ "_hɪz": 236,
304
+ "_bi": 237,
305
+ "uɑŋ": 238,
306
+ "_ˈf": 239,
307
+ "↓↑.": 240,
308
+ "_wɪθ": 241,
309
+ "ju": 242,
310
+ "iɑŋ": 243,
311
+ "→.": 244,
312
+ "_so": 245,
313
+ "_həɹ": 246,
314
+ "↑.": 247,
315
+ "ni": 248,
316
+ "_mo": 249,
317
+ "_maɪ": 250,
318
+ "laɪ": 251,
319
+ "ɥɛ": 252,
320
+ "_ta": 253,
321
+ "ənt": 254,
322
+ "_tʃʰi": 255,
323
+ "_sɯ": 256,
324
+ "_θ": 257,
325
+ "_ɛz": 258,
326
+ "wən": 259,
327
+ "me": 260,
328
+ "mi": 261,
329
+ "_hæd": 262,
330
+ "_ha": 263,
331
+ "əs": 264,
332
+ "_ˈl": 265,
333
+ "_st": 266,
334
+ "ðəɹ": 267,
335
+ "oʊn": 268,
336
+ "_wa": 269,
337
+ "ʰəŋ": 270,
338
+ "_nɑt": 271,
339
+ "*.": 272,
340
+ "kt": 273,
341
+ "_ˈh": 274,
342
+ "do": 275,
343
+ "ɥæn": 276,
344
+ "ne": 277,
345
+ "_to": 278,
346
+ "_wən": 279,
347
+ "_no": 280,
348
+ "_laɪ": 281,
349
+ "_wəɹ": 282,
350
+ "↑,": 283,
351
+ "→,": 284,
352
+ "ɛs": 285,
353
+ "↓↑,": 286,
354
+ "_ɔn": 287,
355
+ "ʰu": 288,
356
+ "so": 289,
357
+ "_ˈb": 290,
358
+ "ɫd": 291,
359
+ "ɪk": 292,
360
+ "ɪst": 293,
361
+ "_fɹ": 294,
362
+ "_ðɛɹ": 295,
363
+ "_weɪ": 296,
364
+ "kaɾa": 297,
365
+ "_ˈd": 298,
366
+ "_hæv": 299,
367
+ "tsʰ": 300,
368
+ "waɪ": 301,
369
+ "ɾo": 302,
370
+ "ɛm": 303,
371
+ "_æt": 304,
372
+ "ʊɹ": 305,
373
+ "_ˈw": 306,
374
+ "ba": 307,
375
+ "_noʊ": 308,
376
+ "ʰjɛn": 309,
377
+ "ɹeɪ": 310,
378
+ "_jo": 311,
379
+ "ɸɯ": 312,
380
+ "_sa": 313,
381
+ "_ɹɪˈ": 314,
382
+ "_ˈn": 315,
383
+ "ai": 316,
384
+ "_bət": 317,
385
+ "ɪɹ": 318,
386
+ "tʃʰɥ": 319,
387
+ "_dʑ": 320,
388
+ "əˌ": 321,
389
+ "_ðɪs": 322,
390
+ "..": 323,
391
+ "xwa": 324,
392
+ "_ɪm": 325,
393
+ "_dɪˈ": 326,
394
+ "_kən": 327,
395
+ "dʑi": 328,
396
+ "*,": 329,
397
+ "ɑn": 330,
398
+ "_ʃiɑŋ": 331,
399
+ "_kɯ": 332,
400
+ "ʃin": 333,
401
+ "_soʊ": 334,
402
+ "bi": 335,
403
+ "tʰjɛn": 336,
404
+ "te_i": 337,
405
+ "_tsʰ": 338,
406
+ "_ɯ": 339,
407
+ "aɪt": 340,
408
+ "ʰiŋ": 341,
409
+ "ðə": 342,
410
+ "_ɔɫ": 343,
411
+ "_ˈɹ": 344,
412
+ "nai": 345,
413
+ "əɹd": 346,
414
+ "_ˈt": 347,
415
+ "_ən": 348,
416
+ "_tʃʰɥ": 349,
417
+ "_iɛ": 350,
418
+ "leɪ": 351,
419
+ "ɛɹi": 352,
420
+ "ˈt": 353,
421
+ "ha": 354,
422
+ "ʃiŋ": 355,
423
+ "ɛvəɹ": 356,
424
+ "zɯ": 357,
425
+ "_wi": 358,
426
+ "_ja": 359,
427
+ "ɛk": 360,
428
+ "ʰɑŋ": 361,
429
+ "_tsɯ": 362,
430
+ "_əv_ðə": 363,
431
+ "taʃi": 364,
432
+ "_sɛd": 365,
433
+ "_xə": 366,
434
+ "_li": 367,
435
+ "_si": 368,
436
+ "desɯ": 369,
437
+ "_ˌɪn": 370,
438
+ "ʃjɛn": 371,
439
+ "_baɪ": 372,
440
+ "on": 373,
441
+ "_xɑʊ": 374,
442
+ "_ðeɪ": 375,
443
+ "_xaɪ": 376,
444
+ "`↓↑": 377,
445
+ "xweɪ": 378,
446
+ "hi": 379,
447
+ "_se": 380,
448
+ "ə_s": 381,
449
+ "_fɹəm": 382,
450
+ "ʊt": 383,
451
+ "di": 384,
452
+ "aʊt": 385,
453
+ "əb": 386,
454
+ "sɹ": 387,
455
+ "əz": 388,
456
+ "_xweɪ": 389,
457
+ "_kʰə": 390,
458
+ "ɹu": 391,
459
+ "_u": 392,
460
+ "_de": 393,
461
+ "aɪd": 394,
462
+ "ɪv": 395,
463
+ "bɯ": 396,
464
+ "_ho": 397,
465
+ "əɹz": 398,
466
+ "joo": 399,
467
+ "_bɪˈ": 400,
468
+ "_tʰa": 401,
469
+ "ɛt": 402,
470
+ "en": 403,
471
+ "ɛni": 404,
472
+ "əst": 405,
473
+ "æk": 406,
474
+ "ə_ts": 407,
475
+ "_ˈɪn": 408,
476
+ "ti": 409,
477
+ "ɥn": 410,
478
+ "_dʒ": 411,
479
+ "xɑʊ": 412,
480
+ "_ˈv": 413,
481
+ "ʃiɑŋ": 414,
482
+ "pʰ": 415,
483
+ "_wɪtʃ": 416,
484
+ "eɪm": 417,
485
+ "oʊz": 418,
486
+ "əðəɹ": 419,
487
+ "fɑŋ": 420,
488
+ "_ˈg": 421,
489
+ "_do": 422,
490
+ "_ʃiɑʊ": 423,
491
+ "_ˈæ": 424,
492
+ "_jʊɹ": 425,
493
+ "_ðɛm": 426,
494
+ "ɪm": 427,
495
+ "ɛst": 428,
496
+ "ænd": 429,
497
+ "_du": 430,
498
+ "ɯɯ": 431,
499
+ "kan": 432,
500
+ "_da": 433,
501
+ "ino": 434,
502
+ "_e": 435,
503
+ "_wʊd": 436,
504
+ "ɛnd": 437,
505
+ "meɪ": 438,
506
+ "θɪŋ": 439,
507
+ "_ʃjɛn": 440,
508
+ "iz": 441,
509
+ "aɪm": 442,
510
+ "_hu": 443,
511
+ "_əˈb": 444,
512
+ "əns": 445,
513
+ "_wɪɫ": 446,
514
+ "tʰi": 447,
515
+ "go": 448,
516
+ "ɛnt": 449,
517
+ "fu": 450,
518
+ "æp": 451,
519
+ "xoʊ": 452,
520
+ "eɪk": 453,
521
+ "ʊk": 454,
522
+ "əɹˈ": 455,
523
+ "_θɪŋ": 456,
524
+ "əl": 457,
525
+ "pɹ": 458,
526
+ "ətʃ": 459,
527
+ "nt": 460,
528
+ "_ɸɯ": 461,
529
+ "lu": 462,
530
+ "_ˈɔ": 463,
531
+ "_iɑʊ": 464,
532
+ "lə": 465,
533
+ "tu": 466,
534
+ "_dʑi": 467,
535
+ "eɪt": 468,
536
+ "_ʃin": 469,
537
+ "nna": 470,
538
+ "_ˈpɹ": 471,
539
+ "fən": 472,
540
+ "_əp": 473,
541
+ "njɛn": 474,
542
+ "_aʊt": 475,
543
+ "fɔɹ": 476,
544
+ "_tu": 477,
545
+ "eɪʃən": 478,
546
+ "ɪɫ": 479,
547
+ "_wət": 480,
548
+ "_ɪf": 481,
549
+ "_ɥ": 482,
550
+ "_fa": 483,
551
+ "ˈw": 484,
552
+ "tʃʰjɛn": 485,
553
+ "_wɪn": 486,
554
+ "oʊɫd": 487,
555
+ "_əˈp": 488,
556
+ "aʊnd": 489,
557
+ "san": 490,
558
+ "he": 491,
559
+ "_bɪn": 492,
560
+ "fa": 493,
561
+ "ɪf": 494,
562
+ "ɔŋ": 495,
563
+ "ge": 496,
564
+ "_ɪn_ðə": 497,
565
+ "miŋ": 498,
566
+ "_pɹ": 499,
567
+ "ina": 500,
568
+ "ano": 501,
569
+ "əbəɫ": 502,
570
+ "kˈs": 503,
571
+ "_ˈɛni": 504,
572
+ "nəŋ": 505,
573
+ "əd": 506,
574
+ "_əv_ðə_ˈ": 507,
575
+ "_waɪ": 508,
576
+ "_taɪm": 509,
577
+ "ˈsɛɫ": 510,
578
+ "ʃiɛ": 511,
579
+ "_kəm": 512,
580
+ "æst": 513,
581
+ "_goʊ": 514,
582
+ "mɯ": 515,
583
+ "ˈp": 516,
584
+ "_ˈst": 517,
585
+ "ə_t": 518,
586
+ "pt": 519,
587
+ "_pʰ": 520,
588
+ "ʰɹ": 521,
589
+ "ʃja": 522,
590
+ "iwa": 523,
591
+ "ɪl": 524,
592
+ "bət": 525,
593
+ "_fɑŋ": 526,
594
+ "ho": 527,
595
+ "iv": 528,
596
+ "loʊ": 529,
597
+ "be": 530,
598
+ "_laɪk": 531,
599
+ "ɪʃ": 532,
600
+ "_fu": 533,
601
+ "ze": 534,
602
+ "ə_tʃ": 535,
603
+ "ɑɹt": 536,
604
+ "ɔɹd": 537,
605
+ "tʃʰiŋ": 538,
606
+ "mp": 539,
607
+ "_ðə_s": 540,
608
+ "_əˈbaʊt": 541,
609
+ "_ˈoʊ": 542,
610
+ "kʰə": 543,
611
+ "d_tɪ": 544,
612
+ "ŋga": 545,
613
+ "əli": 546,
614
+ "_kʰan": 547,
615
+ "çi": 548,
616
+ "_ˈju": 549,
617
+ "_kʊd": 550,
618
+ "ɔɫ": 551,
619
+ "ɔt": 552,
620
+ "_ɪts": 553,
621
+ "_san": 554,
622
+ "tʃa": 555,
623
+ "i_na": 556,
624
+ "xə": 557,
625
+ "ɛkt": 558,
626
+ "_mɔɹ": 559,
627
+ "te_kɯ": 560,
628
+ "ɪdʒ": 561,
629
+ "jʊŋ": 562,
630
+ "_wan": 563,
631
+ "æt": 564,
632
+ "kat": 565,
633
+ "ˈsɛɫf": 566,
634
+ "_ke": 567,
635
+ "aɪnd": 568,
636
+ "it": 569,
637
+ "_ɑɹ": 570,
638
+ "sp": 571,
639
+ "oʊnt": 572,
640
+ "_tʃi": 573,
641
+ "tsʰɹ": 574,
642
+ "_xən": 575,
643
+ "_əˈg": 576,
644
+ "ə_k": 577,
645
+ "to_i": 578,
646
+ "_tʰi": 579,
647
+ "_iŋ": 580,
648
+ "aʊn": 581,
649
+ "gɯ": 582,
650
+ "_ɪkˈs": 583,
651
+ "ɛv": 584,
652
+ "gi": 585,
653
+ "ks": 586,
654
+ "_səm": 587,
655
+ "ana": 588,
656
+ "ɪtəɫ": 589,
657
+ "nan": 590,
658
+ "_ˈɪntu": 591,
659
+ "_hiɹ": 592,
660
+ "_te": 593,
661
+ "_naʊ": 594,
662
+ "ʃiɑʊ": 595,
663
+ "ʃo": 596,
664
+ "ɹe": 597,
665
+ "xaɪ": 598,
666
+ "_tʃʰiŋ": 599,
667
+ "_sɹ": 600,
668
+ "_haʊ": 601,
669
+ "?.": 602,
670
+ "_feɪ": 603,
671
+ "liŋ": 604,
672
+ "_ʃja": 605,
673
+ "_ˈdʒ": 606,
674
+ "_seɪ": 607,
675
+ "ˈn": 608,
676
+ "soʊ": 609,
677
+ "tʰʊŋ": 610,
678
+ "_ljoʊ": 611,
679
+ "maɪ": 612,
680
+ "_bɹ": 613,
681
+ "ɹeɪt": 614,
682
+ "_nəŋ": 615,
683
+ "ʰə": 616,
684
+ "æns": 617,
685
+ "_ˈɔl": 618,
686
+ "tatʃi": 619,
687
+ "nto": 620,
688
+ "_ˌɪnˈ": 621,
689
+ "le": 622,
690
+ "nde": 623,
691
+ "_ˈvɛɹi": 624,
692
+ "mənt": 625,
693
+ "ɾima": 626,
694
+ "_ðɛn": 627,
695
+ "_həz": 628,
696
+ "_ɹi": 629,
697
+ "ftəɹ": 630,
698
+ "_sp": 631,
699
+ "ɾewa": 632,
700
+ "ga_a": 633,
701
+ "z_əv": 634,
702
+ "_miŋ": 635,
703
+ "_tɪ_ðə": 636,
704
+ "ɹaɪ": 637,
705
+ "ɛl": 638,
706
+ "ɹæ": 639,
707
+ "_hoʊ": 640,
708
+ "xu": 641,
709
+ "oʊnli": 642,
710
+ "ŋk": 643,
711
+ "i_i": 644,
712
+ "_dɪd": 645,
713
+ "_dʒɪst": 646,
714
+ "ing": 647,
715
+ "kai": 648,
716
+ "_mæn": 649,
717
+ "_in": 650,
718
+ "zo": 651,
719
+ "əf": 652,
720
+ "dake": 653,
721
+ "_ˈsəm": 654,
722
+ "ɾɯ_no": 655,
723
+ "_go": 656,
724
+ "tʃəɹ": 657,
725
+ "ite": 658,
726
+ "`↓.": 659,
727
+ "_kʰaɪ": 660,
728
+ "sk": 661,
729
+ "ɔɹs": 662,
730
+ "_tʰiŋ": 663,
731
+ "_nə": 664,
732
+ "pəɫ": 665,
733
+ "_tɪ_bi": 666,
734
+ "ˈfɔɹ": 667,
735
+ "mu": 668,
736
+ "su": 669,
737
+ "aa": 670,
738
+ "ɪstəɹ": 671,
739
+ "ʰan": 672,
740
+ "pəɹ": 673,
741
+ "ə_p": 674,
742
+ "liɑŋ": 675,
743
+ "_v": 676,
744
+ "oʊst": 677,
745
+ "_əˈgɛn": 678,
746
+ "ənz": 679,
747
+ "No": 680,
748
+ "ɔɹt": 681,
749
+ "_səˈ": 682,
750
+ "_mɯ": 683,
751
+ "tʃʰ": 684,
752
+ "_ˈlɪtəɫ": 685,
753
+ "_xwo": 686,
754
+ "_ˌbi": 687,
755
+ "_ˈoʊvəɹ": 688,
756
+ "_çi": 689,
757
+ "_deɪ": 690,
758
+ "aɪn": 691,
759
+ "_ʃiŋ": 692,
760
+ "i_ʃi": 693,
761
+ "_tsʰaɪ": 694,
762
+ "ʃoo": 695,
763
+ "ɾoo": 696,
764
+ "bəɹ": 697,
765
+ "ʰa": 698,
766
+ "ˈɛs": 699,
767
+ "_ɪn_ðə_ˈ": 700,
768
+ "Nwa": 701,
769
+ "_ðən": 702,
770
+ "saɪ": 703,
771
+ "_ˈjuˈɛs": 704,
772
+ "nda": 705,
773
+ "_pleɪ": 706,
774
+ "ɪŋ_tɪ": 707,
775
+ "ɪti": 708,
776
+ "_me": 709,
777
+ "_ʃʊd": 710,
778
+ "_nu": 711,
779
+ "_ðə_k": 712,
780
+ "za": 713,
781
+ "_ˈɛvəɹ": 714,
782
+ "əɹn": 715,
783
+ "æd": 716,
784
+ "ˈm": 717,
785
+ "_doʊnt": 718,
786
+ "_məst": 719,
787
+ "jɯɯ": 720,
788
+ "ɑɹd": 721,
789
+ "_jɛn": 722,
790
+ "ʃɥ": 723,
791
+ "_ˈoʊnli": 724,
792
+ "_ʃo": 725,
793
+ "_liŋ": 726,
794
+ "ss": 727,
795
+ "ɑl": 728,
796
+ "dea": 729,
797
+ "ɾeta": 730,
798
+ "mjɛn": 731,
799
+ "_gʊd": 732,
800
+ "_wɔ": 733,
801
+ "imo": 734,
802
+ "no_ko": 735,
803
+ "_ɥæn": 736,
804
+ "ndʒ": 737,
805
+ "ɪʃən": 738,
806
+ "o_ʃi": 739,
807
+ "_θɪŋk": 740,
808
+ "_nan": 741,
809
+ "to_o": 742,
810
+ "_tʰʊŋ": 743,
811
+ "ljoʊ": 744,
812
+ "tai": 745,
813
+ "mə_s": 746,
814
+ "_jɯ": 747,
815
+ "_uɑŋ": 748,
816
+ "_ˌbiˈfɔɹ": 749,
817
+ "æs": 750,
818
+ "_tʃʰjɛn": 751,
819
+ "ik": 752,
820
+ "_bæk": 753,
821
+ "_ˈiv": 754,
822
+ "eɪn": 755,
823
+ "un": 756,
824
+ "la": 757,
825
+ "ˈk": 758,
826
+ "_daʊn": 759,
827
+ "anai": 760,
828
+ "_lɛ": 761,
829
+ "əɹt": 762,
830
+ "ðɛɹ": 763,
831
+ "_ˈæftəɹ": 764,
832
+ "dat": 765,
833
+ "fan": 766,
834
+ "bəɫ": 767,
835
+ "temo": 768,
836
+ "tʰa": 769,
837
+ "ɾɯ_ko": 770,
838
+ "ˈv": 771,
839
+ "feɪ": 772,
840
+ "_mətʃ": 773,
841
+ "xwo": 774,
842
+ "ɹoʊ": 775,
843
+ "_ba": 776,
844
+ "_ˈnɛvəɹ": 777,
845
+ "_meɪd": 778,
846
+ "_jʊŋ": 779,
847
+ "_əˈpɑn": 780,
848
+ "!?": 781,
849
+ "_ˈʃ": 782,
850
+ "_ðə_ˈk": 783,
851
+ "ft": 784,
852
+ "_bo": 785,
853
+ "_ɪn_ə": 786,
854
+ "tʃʰɥæn": 787,
855
+ "ˈz": 788,
856
+ "`↓,": 789,
857
+ "_bɪˈk": 790,
858
+ "ɪg": 791,
859
+ "kin": 792,
860
+ "_kl": 793,
861
+ "ɾɯ_n": 794,
862
+ "_lɑʊ": 795,
863
+ "----": 796,
864
+ "ika": 797,
865
+ "_ɹaɪt": 798,
866
+ "zd": 799,
867
+ "z_ənd": 800,
868
+ "_kjo": 801,
869
+ "xwan": 802,
870
+ "too": 803,
871
+ "_gɪt": 804,
872
+ "_liɑŋ": 805,
873
+ "ta_n": 806,
874
+ "_keɪm": 807,
875
+ "_ˈəðəɹ": 808,
876
+ "_wɛɫ": 809,
877
+ "teki": 810,
878
+ "see": 811,
879
+ "jɯ": 812,
880
+ "i_o": 813,
881
+ "to_ʃi": 814,
882
+ "fəɫ": 815,
883
+ "bo": 816,
884
+ "ˌt": 817,
885
+ "ɪp": 818,
886
+ "ane": 819,
887
+ "_tʰjɛn": 820,
888
+ "_tʃo": 821,
889
+ "ɾjo": 822,
890
+ "ɪns": 823,
891
+ "_he": 824,
892
+ "ŋka": 825,
893
+ "ʃɥɛ": 826,
894
+ "dʑa": 827,
895
+ "vd": 828,
896
+ "ʰwan": 829,
897
+ "_gɹeɪt": 830,
898
+ "_əv_ə": 831,
899
+ "əndəɹ": 832,
900
+ "kedo": 833,
901
+ "_ðə_b": 834,
902
+ "ək": 835,
903
+ "_teɪk": 836,
904
+ "kʰan": 837,
905
+ "_ˈɔlˌ": 838,
906
+ "swo": 839,
907
+ "_ɪt_wɑz": 840,
908
+ "_ʃɥ": 841,
909
+ "_sim": 842,
910
+ "_ˈfɑ": 843,
911
+ "min": 844,
912
+ "i_a": 845,
913
+ "soo": 846,
914
+ "ɛns": 847,
915
+ "_sətʃ": 848,
916
+ "tʰaɪ": 849,
917
+ "_ga": 850,
918
+ "i_ka": 851,
919
+ "koo": 852,
920
+ "_fəɹst": 853,
921
+ "_ˈtʃ": 854,
922
+ "nno": 855,
923
+ "ə_ɹ": 856,
924
+ "taɾa": 857,
925
+ "tʃʰjoʊ": 858,
926
+ "_æm": 859,
927
+ "_mu": 860,
928
+ "_meɪk": 861,
929
+ "↓…": 862,
930
+ "ɪˈθ": 863,
931
+ "ɑb": 864,
932
+ "ɹa": 865,
933
+ "_wɛɹ": 866,
934
+ "_ðə_ˈs": 867,
935
+ "_əˈl": 868,
936
+ "_oʊɫd": 869,
937
+ "æl": 870,
938
+ "_ˈpi": 871,
939
+ "_lɔŋ": 872,
940
+ "dʑo": 873,
941
+ "_tʰaɪ": 874,
942
+ "ɔɹn": 875,
943
+ "əɫz": 876,
944
+ "_təˈ": 877,
945
+ "_əˈweɪ": 878,
946
+ "pa": 879,
947
+ "_ðiz": 880,
948
+ "_ˈsp": 881,
949
+ "nn": 882,
950
+ "mae": 883,
951
+ "towa": 884,
952
+ "ta_no": 885,
953
+ "_an": 886,
954
+ "kʰaɪ": 887,
955
+ "ɾaɾe": 888,
956
+ "eɪs": 889,
957
+ "ɑd": 890,
958
+ "_wɪˈθ": 891,
959
+ "_ˈivɪn": 892,
960
+ "_lu": 893,
961
+ "ɔɪ": 894,
962
+ "lɪŋ": 895,
963
+ "əti": 896,
964
+ "_ðə_f": 897,
965
+ "oʃi": 898,
966
+ "_la": 899,
967
+ "si": 900,
968
+ "tɪd": 901,
969
+ "haʊ": 902,
970
+ "pʰin": 903,
971
+ "ˈst": 904,
972
+ "_ˈpəɹ": 905,
973
+ "eɹ": 906,
974
+ "*!": 907,
975
+ "_ˈmɪstəɹ": 908,
976
+ "ʃa": 909,
977
+ "_ˌɪm": 910,
978
+ "ˌθɪŋ": 911,
979
+ "_neɪ": 912,
980
+ "_nɥ": 913,
981
+ "ɑk": 914,
982
+ "_ɹu": 915,
983
+ "_ʃɯ": 916,
984
+ "_ðə_ˈm": 917,
985
+ "demo": 918,
986
+ "_dɹ": 919,
987
+ "dʑoo": 920,
988
+ "_stɪɫ": 921,
989
+ "_pʰiŋ": 922,
990
+ "ə_i": 923,
991
+ "_ɪkˈsp": 924,
992
+ "_wɛnt": 925,
993
+ "ɪɹi": 926,
994
+ "əˈm": 927,
995
+ "o_ka": 928,
996
+ "_əˈk": 929,
997
+ "ɔk": 930,
998
+ "_ɥɛ": 931,
999
+ "_lʊk": 932,
1000
+ "ˈd": 933,
1001
+ "kaʃi": 934,
1002
+ "_wɪθ_ə": 935,
1003
+ "ljɛn": 936,
1004
+ "ɔn": 937,
1005
+ "_ljɛn": 938,
1006
+ "_hɛɫ": 939,
1007
+ "uɹ": 940,
1008
+ "_tʰoʊ": 941,
1009
+ "_tʃʰɥæn": 942,
1010
+ "_sk": 943,
1011
+ "tsʰaɪ": 944,
1012
+ "ɛtəɹ": 945,
1013
+ "_min": 946,
1014
+ "noʊ": 947,
1015
+ "ʃɯ": 948,
1016
+ "_θɹu": 949,
1017
+ "_θɔt": 950,
1018
+ "dajo": 951,
1019
+ "wi": 952,
1020
+ "i_ko": 953,
1021
+ "_tɹ": 954,
1022
+ "_fan": 955,
1023
+ "ɹɛ": 956,
1024
+ "saN": 957,
1025
+ "_hi_wɑz": 958,
1026
+ "_ɾe": 959,
1027
+ "_əm": 960,
1028
+ "te_ki": 961,
1029
+ "_xoʊ": 962,
1030
+ "ˈl": 963,
1031
+ "ˈg": 964,
1032
+ "ga_i": 965,
1033
+ "_ɔn_ðə": 966,
1034
+ "_xwa": 967,
1035
+ "vɪŋ": 968,
1036
+ "man": 969,
1037
+ "fəɹ": 970,
1038
+ "_oʊn": 971,
1039
+ "ˈɹ": 972,
1040
+ "_kɹ": 973,
1041
+ "te_o": 974,
1042
+ "ɪli": 975,
1043
+ "_ʃɥɛ": 976,
1044
+ "_fəŋ": 977,
1045
+ "æɫ": 978,
1046
+ "ɑp": 979,
1047
+ "_ˈɛv": 980,
1048
+ "eɪndʒ": 981,
1049
+ "iɫ": 982,
1050
+ "wət": 983,
1051
+ "ɛðəɹ": 984,
1052
+ "_fən": 985,
1053
+ "ɾee": 986,
1054
+ "_hi_hæd": 987,
1055
+ "_maɪt": 988,
1056
+ "_ge": 989,
1057
+ "ækt": 990,
1058
+ "ɪts": 991,
1059
+ "_hɪm": 992,
1060
+ "_ze": 993,
1061
+ "ii": 994,
1062
+ "_N": 995,
1063
+ "_əv_hɪz": 996,
1064
+ "_gɹ": 997,
1065
+ "ænt": 998,
1066
+ "ɪˌ": 999,
1067
+ "_hɪmˈsɛɫf": 1000,
1068
+ "wa_na": 1001,
1069
+ "aɪəɹ": 1002,
1070
+ "dʑanai": 1003,
1071
+ "kana": 1004,
1072
+ "aɪz": 1005,
1073
+ "_ɪt_ɪz": 1006,
1074
+ "mase": 1007,
1075
+ "wɪn": 1008,
1076
+ "əθɪŋ": 1009,
1077
+ "_pɹəˈ": 1010,
1078
+ "kɯn": 1011,
1079
+ "ˈju": 1012,
1080
+ "_fɔɹ": 1013,
1081
+ "pʰi": 1014,
1082
+ "pʰiŋ": 1015,
1083
+ "o_i": 1016,
1084
+ "vz": 1017,
1085
+ "ɔɪn": 1018,
1086
+ "tʰiŋ": 1019,
1087
+ "_ne": 1020,
1088
+ "gəɹ": 1021,
1089
+ "æts": 1022,
1090
+ "_ˈɹi": 1023
1091
+ },
1092
+ "merges": [
1093
+ "_ t",
1094
+ "↓ ↑",
1095
+ "_ ˈ",
1096
+ "ə n",
1097
+ "_ s",
1098
+ "a ɪ",
1099
+ "ə ɹ",
1100
+ "e ɪ",
1101
+ "o ʊ",
1102
+ "_ k",
1103
+ "ʃ i",
1104
+ "_ w",
1105
+ "_ ð",
1106
+ "t s",
1107
+ "t ʃ",
1108
+ "_t s",
1109
+ "_ h",
1110
+ "_ ə",
1111
+ "_ m",
1112
+ "a n",
1113
+ "_ n",
1114
+ "_ð ə",
1115
+ "ɛ n",
1116
+ "ɑ ʊ",
1117
+ "ɑ ŋ",
1118
+ "` ⁼",
1119
+ "_ p",
1120
+ "_ i",
1121
+ "_ ɪ",
1122
+ "_t ʃ",
1123
+ "_ l",
1124
+ "j ɛn",
1125
+ "_ d",
1126
+ "_ f",
1127
+ "_ j",
1128
+ "w o",
1129
+ "_ b",
1130
+ "t a",
1131
+ "` ↓",
1132
+ "t e",
1133
+ "ən d",
1134
+ "_ ʃi",
1135
+ "w a",
1136
+ "k a",
1137
+ "ɪ ŋ",
1138
+ "i n",
1139
+ "s t",
1140
+ "l i",
1141
+ "ʊ ŋ",
1142
+ "_t ɪ",
1143
+ "t o",
1144
+ "w eɪ",
1145
+ "_ ənd",
1146
+ "ʰ i",
1147
+ "_ə v",
1148
+ "ə ŋ",
1149
+ "n o",
1150
+ "_ x",
1151
+ "ɾ ɯ",
1152
+ "n a",
1153
+ "_ a",
1154
+ "_ ɹ",
1155
+ "ɪ n",
1156
+ "g a",
1157
+ "d e",
1158
+ "j oʊ",
1159
+ "æ n",
1160
+ "k ɯ",
1161
+ "ɾ e",
1162
+ "m a",
1163
+ "_ðə _ˈ",
1164
+ "ɾ a",
1165
+ "ɛ ɹ",
1166
+ "m o",
1167
+ "ɔ ɹ",
1168
+ "ə ɫ",
1169
+ "_ g",
1170
+ "d a",
1171
+ "* ↑",
1172
+ "ɪ ˈ",
1173
+ "_ o",
1174
+ "_ ʃ",
1175
+ "i ŋ",
1176
+ "j a",
1177
+ "ə m",
1178
+ "_ ˌ",
1179
+ "a ʊ",
1180
+ "_ə ˈ",
1181
+ "` ↑",
1182
+ "ə t",
1183
+ "_ aɪ",
1184
+ "o o",
1185
+ "s ɯ",
1186
+ "↓ .",
1187
+ "_ɪ n",
1188
+ "_h i",
1189
+ "_w ɪ",
1190
+ "ɪ z",
1191
+ "_n a",
1192
+ "w an",
1193
+ "_k o",
1194
+ "_w o",
1195
+ "ɪ d",
1196
+ "ɾ i",
1197
+ "_j u",
1198
+ "m ə",
1199
+ "_l ə",
1200
+ "_h æ",
1201
+ "_ðə t",
1202
+ "ɑ ɹ",
1203
+ "t ʰ",
1204
+ "k i",
1205
+ "… …",
1206
+ "ɑ z",
1207
+ "_ ɔ",
1208
+ "_m i",
1209
+ "_w ɑz",
1210
+ "_ˈ s",
1211
+ "↓ ,",
1212
+ "_t ʰ",
1213
+ "ə ˈ",
1214
+ "d ʑ",
1215
+ "ɪ t",
1216
+ "_k ʰ",
1217
+ "i ɛ",
1218
+ "_m a",
1219
+ "ɪ s",
1220
+ "ts ɯ",
1221
+ "_n i",
1222
+ "_ɪ t",
1223
+ "k e",
1224
+ "i ɑʊ",
1225
+ "_k a",
1226
+ "_ əɹ",
1227
+ "n d",
1228
+ "_ˈ p",
1229
+ "k o",
1230
+ "j o",
1231
+ "ɹ i",
1232
+ "m ən",
1233
+ "ʊ d",
1234
+ "_ˈ m",
1235
+ "_f əɹ",
1236
+ "tʃ ʰi",
1237
+ "s a",
1238
+ "ʰ ɥ",
1239
+ "k ʰ",
1240
+ "ˈ s",
1241
+ "ɑ t",
1242
+ "ɛ d",
1243
+ "s e",
1244
+ "t ʃi",
1245
+ "ɛ ɫ",
1246
+ "_ˈ k",
1247
+ "_j oʊ",
1248
+ "t əɹ",
1249
+ "ɛ z",
1250
+ "- -",
1251
+ "v əɹ",
1252
+ "` →",
1253
+ "ʃ ən",
1254
+ "_ɪ z",
1255
+ "_m eɪ",
1256
+ "_ æ",
1257
+ "d ʒ",
1258
+ "_k i",
1259
+ "_h ɪz",
1260
+ "_b i",
1261
+ "u ɑŋ",
1262
+ "_ˈ f",
1263
+ "↓↑ .",
1264
+ "_wɪ θ",
1265
+ "j u",
1266
+ "i ɑŋ",
1267
+ "→ .",
1268
+ "_s o",
1269
+ "_h əɹ",
1270
+ "↑ .",
1271
+ "n i",
1272
+ "_m o",
1273
+ "_m aɪ",
1274
+ "l aɪ",
1275
+ "ɥ ɛ",
1276
+ "_t a",
1277
+ "ən t",
1278
+ "_tʃ ʰi",
1279
+ "_s ɯ",
1280
+ "_ θ",
1281
+ "_ ɛz",
1282
+ "w ən",
1283
+ "m e",
1284
+ "m i",
1285
+ "_hæ d",
1286
+ "_h a",
1287
+ "ə s",
1288
+ "_ˈ l",
1289
+ "_s t",
1290
+ "ð əɹ",
1291
+ "oʊ n",
1292
+ "_w a",
1293
+ "ʰ əŋ",
1294
+ "_n ɑt",
1295
+ "* .",
1296
+ "k t",
1297
+ "_ˈ h",
1298
+ "d o",
1299
+ "ɥ æn",
1300
+ "n e",
1301
+ "_t o",
1302
+ "_w ən",
1303
+ "_n o",
1304
+ "_l aɪ",
1305
+ "_w əɹ",
1306
+ "↑ ,",
1307
+ "→ ,",
1308
+ "ɛ s",
1309
+ "↓↑ ,",
1310
+ "_ɔ n",
1311
+ "ʰ u",
1312
+ "s o",
1313
+ "_ˈ b",
1314
+ "ɫ d",
1315
+ "ɪ k",
1316
+ "ɪ st",
1317
+ "_f ɹ",
1318
+ "_ð ɛɹ",
1319
+ "_w eɪ",
1320
+ "ka ɾa",
1321
+ "_ˈ d",
1322
+ "_hæ v",
1323
+ "ts ʰ",
1324
+ "w aɪ",
1325
+ "ɾ o",
1326
+ "ɛ m",
1327
+ "_æ t",
1328
+ "ʊ ɹ",
1329
+ "_ˈ w",
1330
+ "b a",
1331
+ "_n oʊ",
1332
+ "ʰ jɛn",
1333
+ "ɹ eɪ",
1334
+ "_j o",
1335
+ "ɸ ɯ",
1336
+ "_s a",
1337
+ "_ɹ ɪˈ",
1338
+ "_ˈ n",
1339
+ "a i",
1340
+ "_b ət",
1341
+ "ɪ ɹ",
1342
+ "tʃ ʰɥ",
1343
+ "_d ʑ",
1344
+ "ə ˌ",
1345
+ "_ð ɪs",
1346
+ ". .",
1347
+ "x wa",
1348
+ "_ɪ m",
1349
+ "_d ɪˈ",
1350
+ "_k ən",
1351
+ "dʑ i",
1352
+ "* ,",
1353
+ "ɑ n",
1354
+ "_ʃi ɑŋ",
1355
+ "_k ɯ",
1356
+ "ʃi n",
1357
+ "_s oʊ",
1358
+ "b i",
1359
+ "tʰ jɛn",
1360
+ "te _i",
1361
+ "_ts ʰ",
1362
+ "_ ɯ",
1363
+ "aɪ t",
1364
+ "ʰi ŋ",
1365
+ "ð ə",
1366
+ "_ɔ ɫ",
1367
+ "_ˈ ɹ",
1368
+ "na i",
1369
+ "əɹ d",
1370
+ "_ˈ t",
1371
+ "_ ən",
1372
+ "_tʃ ʰɥ",
1373
+ "_i ɛ",
1374
+ "l eɪ",
1375
+ "ɛɹ i",
1376
+ "ˈ t",
1377
+ "h a",
1378
+ "ʃi ŋ",
1379
+ "ɛ vəɹ",
1380
+ "z ɯ",
1381
+ "_w i",
1382
+ "_j a",
1383
+ "ɛ k",
1384
+ "ʰ ɑŋ",
1385
+ "_ts ɯ",
1386
+ "_əv _ðə",
1387
+ "ta ʃi",
1388
+ "_s ɛd",
1389
+ "_x ə",
1390
+ "_l i",
1391
+ "_s i",
1392
+ "de sɯ",
1393
+ "_ˌ ɪn",
1394
+ "ʃ jɛn",
1395
+ "_b aɪ",
1396
+ "o n",
1397
+ "_x ɑʊ",
1398
+ "_ð eɪ",
1399
+ "_x aɪ",
1400
+ "` ↓↑",
1401
+ "x weɪ",
1402
+ "h i",
1403
+ "_s e",
1404
+ "ə _s",
1405
+ "_fɹ əm",
1406
+ "ʊ t",
1407
+ "d i",
1408
+ "aʊ t",
1409
+ "ə b",
1410
+ "s ɹ",
1411
+ "ə z",
1412
+ "_x weɪ",
1413
+ "_kʰ ə",
1414
+ "ɹ u",
1415
+ "_ u",
1416
+ "_d e",
1417
+ "aɪ d",
1418
+ "ɪ v",
1419
+ "b ɯ",
1420
+ "_h o",
1421
+ "əɹ z",
1422
+ "j oo",
1423
+ "_b ɪˈ",
1424
+ "_tʰ a",
1425
+ "ɛ t",
1426
+ "e n",
1427
+ "ɛn i",
1428
+ "ə st",
1429
+ "æ k",
1430
+ "ə _ts",
1431
+ "_ˈ ɪn",
1432
+ "t i",
1433
+ "ɥ n",
1434
+ "_d ʒ",
1435
+ "x ɑʊ",
1436
+ "_ˈ v",
1437
+ "ʃi ɑŋ",
1438
+ "p ʰ",
1439
+ "_wɪ tʃ",
1440
+ "eɪ m",
1441
+ "oʊ z",
1442
+ "ə ðəɹ",
1443
+ "f ɑŋ",
1444
+ "_ˈ g",
1445
+ "_d o",
1446
+ "_ʃi ɑʊ",
1447
+ "_ˈ æ",
1448
+ "_j ʊɹ",
1449
+ "_ð ɛm",
1450
+ "ɪ m",
1451
+ "ɛ st",
1452
+ "æn d",
1453
+ "_d u",
1454
+ "ɯ ɯ",
1455
+ "k an",
1456
+ "_d a",
1457
+ "in o",
1458
+ "_ e",
1459
+ "_w ʊd",
1460
+ "ɛn d",
1461
+ "m eɪ",
1462
+ "θ ɪŋ",
1463
+ "_ʃ jɛn",
1464
+ "i z",
1465
+ "aɪ m",
1466
+ "_h u",
1467
+ "_əˈ b",
1468
+ "ən s",
1469
+ "_wɪ ɫ",
1470
+ "t ʰi",
1471
+ "g o",
1472
+ "ɛn t",
1473
+ "f u",
1474
+ "æ p",
1475
+ "x oʊ",
1476
+ "eɪ k",
1477
+ "ʊ k",
1478
+ "əɹ ˈ",
1479
+ "_θ ɪŋ",
1480
+ "ə l",
1481
+ "p ɹ",
1482
+ "ə tʃ",
1483
+ "n t",
1484
+ "_ ɸɯ",
1485
+ "l u",
1486
+ "_ˈ ɔ",
1487
+ "_i ɑʊ",
1488
+ "l ə",
1489
+ "t u",
1490
+ "_dʑ i",
1491
+ "eɪ t",
1492
+ "_ʃi n",
1493
+ "n na",
1494
+ "_ˈp ɹ",
1495
+ "f ən",
1496
+ "_ə p",
1497
+ "n jɛn",
1498
+ "_a ʊt",
1499
+ "f ɔɹ",
1500
+ "_t u",
1501
+ "eɪ ʃən",
1502
+ "ɪ ɫ",
1503
+ "_w ət",
1504
+ "_ɪ f",
1505
+ "_ ɥ",
1506
+ "_f a",
1507
+ "ˈ w",
1508
+ "tʃ ʰjɛn",
1509
+ "_w ɪn",
1510
+ "oʊ ɫd",
1511
+ "_əˈ p",
1512
+ "aʊ nd",
1513
+ "s an",
1514
+ "h e",
1515
+ "_b ɪn",
1516
+ "f a",
1517
+ "ɪ f",
1518
+ "ɔ ŋ",
1519
+ "g e",
1520
+ "_ɪn _ðə",
1521
+ "m iŋ",
1522
+ "_p ɹ",
1523
+ "in a",
1524
+ "an o",
1525
+ "əb əɫ",
1526
+ "k ˈs",
1527
+ "_ˈ ɛni",
1528
+ "n əŋ",
1529
+ "ə d",
1530
+ "_əv _ðə_ˈ",
1531
+ "_w aɪ",
1532
+ "_t aɪm",
1533
+ "ˈs ɛɫ",
1534
+ "ʃi ɛ",
1535
+ "_k əm",
1536
+ "æ st",
1537
+ "_g oʊ",
1538
+ "m ɯ",
1539
+ "ˈ p",
1540
+ "_ˈ st",
1541
+ "ə _t",
1542
+ "p t",
1543
+ "_p ʰ",
1544
+ "ʰ ɹ",
1545
+ "ʃ ja",
1546
+ "i wa",
1547
+ "ɪ l",
1548
+ "b ət",
1549
+ "_f ɑŋ",
1550
+ "h o",
1551
+ "i v",
1552
+ "l oʊ",
1553
+ "b e",
1554
+ "_laɪ k",
1555
+ "ɪ ʃ",
1556
+ "_f u",
1557
+ "z e",
1558
+ "ə _tʃ",
1559
+ "ɑɹ t",
1560
+ "ɔɹ d",
1561
+ "tʃʰi ŋ",
1562
+ "m p",
1563
+ "_ðə _s",
1564
+ "_əˈb aʊt",
1565
+ "_ˈ oʊ",
1566
+ "kʰ ə",
1567
+ "d _tɪ",
1568
+ "ŋ ga",
1569
+ "ə li",
1570
+ "_kʰ an",
1571
+ "ç i",
1572
+ "_ˈ ju",
1573
+ "_k ʊd",
1574
+ "ɔ ɫ",
1575
+ "ɔ t",
1576
+ "_ɪ ts",
1577
+ "_s an",
1578
+ "tʃ a",
1579
+ "i _na",
1580
+ "x ə",
1581
+ "ɛ kt",
1582
+ "_m ɔɹ",
1583
+ "te _kɯ",
1584
+ "ɪd ʒ",
1585
+ "j ʊŋ",
1586
+ "_w an",
1587
+ "æ t",
1588
+ "ka t",
1589
+ "ˈsɛɫ f",
1590
+ "_k e",
1591
+ "aɪ nd",
1592
+ "i t",
1593
+ "_ ɑɹ",
1594
+ "s p",
1595
+ "oʊn t",
1596
+ "_t ʃi",
1597
+ "tsʰ ɹ",
1598
+ "_x ən",
1599
+ "_əˈ g",
1600
+ "ə _k",
1601
+ "to _i",
1602
+ "_t ʰi",
1603
+ "_i ŋ",
1604
+ "aʊ n",
1605
+ "g ɯ",
1606
+ "_ɪ kˈs",
1607
+ "ɛ v",
1608
+ "g i",
1609
+ "k s",
1610
+ "_s əm",
1611
+ "an a",
1612
+ "ɪt əɫ",
1613
+ "n an",
1614
+ "_ˈɪn tu",
1615
+ "_hi ɹ",
1616
+ "_t e",
1617
+ "_n aʊ",
1618
+ "ʃi ɑʊ",
1619
+ "ʃ o",
1620
+ "ɹ e",
1621
+ "x aɪ",
1622
+ "_tʃʰi ŋ",
1623
+ "_s ɹ",
1624
+ "_h aʊ",
1625
+ "? .",
1626
+ "_f eɪ",
1627
+ "li ŋ",
1628
+ "_ʃ ja",
1629
+ "_ˈ dʒ",
1630
+ "_s eɪ",
1631
+ "ˈ n",
1632
+ "s oʊ",
1633
+ "tʰ ʊŋ",
1634
+ "_l joʊ",
1635
+ "m aɪ",
1636
+ "_b ɹ",
1637
+ "ɹeɪ t",
1638
+ "_n əŋ",
1639
+ "ʰ ə",
1640
+ "æn s",
1641
+ "_ˈɔ l",
1642
+ "ta tʃi",
1643
+ "n to",
1644
+ "_ˌɪn ˈ",
1645
+ "l e",
1646
+ "n de",
1647
+ "_ˈv ɛɹi",
1648
+ "mən t",
1649
+ "ɾi ma",
1650
+ "_ð ɛn",
1651
+ "_h əz",
1652
+ "_ɹ i",
1653
+ "f təɹ",
1654
+ "_s p",
1655
+ "ɾe wa",
1656
+ "ga _a",
1657
+ "z _əv",
1658
+ "_m iŋ",
1659
+ "_tɪ _ðə",
1660
+ "ɹ aɪ",
1661
+ "ɛ l",
1662
+ "ɹ æ",
1663
+ "_h oʊ",
1664
+ "x u",
1665
+ "oʊn li",
1666
+ "ŋ k",
1667
+ "i _i",
1668
+ "_d ɪd",
1669
+ "_dʒ ɪst",
1670
+ "in g",
1671
+ "ka i",
1672
+ "_m æn",
1673
+ "_i n",
1674
+ "z o",
1675
+ "ə f",
1676
+ "da ke",
1677
+ "_ˈs əm",
1678
+ "ɾɯ _no",
1679
+ "_g o",
1680
+ "tʃ əɹ",
1681
+ "i te",
1682
+ "`↓ .",
1683
+ "_kʰ aɪ",
1684
+ "s k",
1685
+ "ɔɹ s",
1686
+ "_t ʰiŋ",
1687
+ "_n ə",
1688
+ "p əɫ",
1689
+ "_tɪ _bi",
1690
+ "ˈ fɔɹ",
1691
+ "m u",
1692
+ "s u",
1693
+ "a a",
1694
+ "ɪst əɹ",
1695
+ "ʰ an",
1696
+ "p əɹ",
1697
+ "ə _p",
1698
+ "li ɑŋ",
1699
+ "_ v",
1700
+ "oʊ st",
1701
+ "_əˈg ɛn",
1702
+ "ən z",
1703
+ "N o",
1704
+ "ɔɹ t",
1705
+ "_s əˈ",
1706
+ "_m ɯ",
1707
+ "tʃ ʰ",
1708
+ "_ˈl ɪtəɫ",
1709
+ "_x wo",
1710
+ "_ˌ bi",
1711
+ "_ˈoʊ vəɹ",
1712
+ "_ çi",
1713
+ "_d eɪ",
1714
+ "aɪ n",
1715
+ "_ʃi ŋ",
1716
+ "i _ʃi",
1717
+ "_tsʰ aɪ",
1718
+ "ʃ oo",
1719
+ "ɾ oo",
1720
+ "b əɹ",
1721
+ "ʰ a",
1722
+ "ˈ ɛs",
1723
+ "_ɪn _ðə_ˈ",
1724
+ "N wa",
1725
+ "_ð ən",
1726
+ "s aɪ",
1727
+ "_ˈju ˈɛs",
1728
+ "n da",
1729
+ "_p leɪ",
1730
+ "ɪŋ _tɪ",
1731
+ "ɪt i",
1732
+ "_m e",
1733
+ "_ʃ ʊd",
1734
+ "_n u",
1735
+ "_ðə _k",
1736
+ "z a",
1737
+ "_ˈ ɛvəɹ",
1738
+ "əɹ n",
1739
+ "æ d",
1740
+ "ˈ m",
1741
+ "_d oʊnt",
1742
+ "_m əst",
1743
+ "j ɯɯ",
1744
+ "ɑɹ d",
1745
+ "_ jɛn",
1746
+ "ʃ ɥ",
1747
+ "_ˈ oʊnli",
1748
+ "_ʃ o",
1749
+ "_l iŋ",
1750
+ "s s",
1751
+ "ɑ l",
1752
+ "de a",
1753
+ "ɾe ta",
1754
+ "m jɛn",
1755
+ "_g ʊd",
1756
+ "_w ɔ",
1757
+ "i mo",
1758
+ "no _ko",
1759
+ "_ ɥæn",
1760
+ "nd ʒ",
1761
+ "ɪ ʃən",
1762
+ "o _ʃi",
1763
+ "_θɪŋ k",
1764
+ "_n an",
1765
+ "to _o",
1766
+ "_tʰ ʊŋ",
1767
+ "l joʊ",
1768
+ "ta i",
1769
+ "mə _s",
1770
+ "_j ɯ",
1771
+ "_ uɑŋ",
1772
+ "_ˌbi ˈfɔɹ",
1773
+ "æ s",
1774
+ "_tʃ ʰjɛn",
1775
+ "i k",
1776
+ "_b æk",
1777
+ "_ˈ iv",
1778
+ "eɪ n",
1779
+ "u n",
1780
+ "l a",
1781
+ "ˈ k",
1782
+ "_d aʊn",
1783
+ "an ai",
1784
+ "_l ɛ",
1785
+ "əɹ t",
1786
+ "ð ɛɹ",
1787
+ "_ˈæ ftəɹ",
1788
+ "da t",
1789
+ "f an",
1790
+ "b əɫ",
1791
+ "te mo",
1792
+ "tʰ a",
1793
+ "ɾɯ _ko",
1794
+ "ˈ v",
1795
+ "f eɪ",
1796
+ "_m ətʃ",
1797
+ "x wo",
1798
+ "ɹ oʊ",
1799
+ "_b a",
1800
+ "_ˈn ɛvəɹ",
1801
+ "_meɪ d",
1802
+ "_j ʊŋ",
1803
+ "_əˈp ɑn",
1804
+ "! ?",
1805
+ "_ˈ ʃ",
1806
+ "_ðə_ˈ k",
1807
+ "f t",
1808
+ "_b o",
1809
+ "_ɪn _ə",
1810
+ "tʃʰɥ æn",
1811
+ "ˈ z",
1812
+ "`↓ ,",
1813
+ "_bɪˈ k",
1814
+ "ɪ g",
1815
+ "k in",
1816
+ "_k l",
1817
+ "ɾɯ _n",
1818
+ "_l ɑʊ",
1819
+ "-- --",
1820
+ "i ka",
1821
+ "_ɹ aɪt",
1822
+ "z d",
1823
+ "z _ənd",
1824
+ "_k jo",
1825
+ "x wan",
1826
+ "to o",
1827
+ "_g ɪt",
1828
+ "_l iɑŋ",
1829
+ "ta _n",
1830
+ "_k eɪm",
1831
+ "_ˈ əðəɹ",
1832
+ "_w ɛɫ",
1833
+ "te ki",
1834
+ "se e",
1835
+ "j ɯ",
1836
+ "i _o",
1837
+ "to _ʃi",
1838
+ "f əɫ",
1839
+ "b o",
1840
+ "ˌ t",
1841
+ "ɪ p",
1842
+ "an e",
1843
+ "_tʰ jɛn",
1844
+ "_tʃ o",
1845
+ "ɾ jo",
1846
+ "ɪn s",
1847
+ "_h e",
1848
+ "ŋ ka",
1849
+ "ʃ ɥɛ",
1850
+ "dʑ a",
1851
+ "v d",
1852
+ "ʰ wan",
1853
+ "_g ɹeɪt",
1854
+ "_əv _ə",
1855
+ "ənd əɹ",
1856
+ "ke do",
1857
+ "_ðə _b",
1858
+ "ə k",
1859
+ "_t eɪk",
1860
+ "kʰ an",
1861
+ "_ˈɔl ˌ",
1862
+ "s wo",
1863
+ "_ɪt _wɑz",
1864
+ "_ʃ ɥ",
1865
+ "_si m",
1866
+ "_ˈf ɑ",
1867
+ "m in",
1868
+ "i _a",
1869
+ "s oo",
1870
+ "ɛn s",
1871
+ "_s ətʃ",
1872
+ "tʰ aɪ",
1873
+ "_ ga",
1874
+ "i _ka",
1875
+ "k oo",
1876
+ "_fəɹ st",
1877
+ "_ˈ tʃ",
1878
+ "n no",
1879
+ "ə _ɹ",
1880
+ "ta ɾa",
1881
+ "tʃʰ joʊ",
1882
+ "_æ m",
1883
+ "_m u",
1884
+ "_meɪ k",
1885
+ "↓ …",
1886
+ "ɪˈ θ",
1887
+ "ɑ b",
1888
+ "ɹ a",
1889
+ "_w ɛɹ",
1890
+ "_ðə_ˈ s",
1891
+ "_əˈ l",
1892
+ "_ oʊɫd",
1893
+ "æ l",
1894
+ "_ˈp i",
1895
+ "_l ɔŋ",
1896
+ "dʑ o",
1897
+ "_tʰ aɪ",
1898
+ "ɔɹ n",
1899
+ "əɫ z",
1900
+ "_t əˈ",
1901
+ "_əˈ weɪ",
1902
+ "p a",
1903
+ "_ð iz",
1904
+ "_ˈs p",
1905
+ "n n",
1906
+ "ma e",
1907
+ "to wa",
1908
+ "ta _no",
1909
+ "_ an",
1910
+ "kʰ aɪ",
1911
+ "ɾa ɾe",
1912
+ "eɪ s",
1913
+ "ɑ d",
1914
+ "_w ɪˈθ",
1915
+ "_ˈiv ɪn",
1916
+ "_l u",
1917
+ "ɔ ɪ",
1918
+ "l ɪŋ",
1919
+ "ət i",
1920
+ "_ðə _f",
1921
+ "o ʃi",
1922
+ "_l a",
1923
+ "s i",
1924
+ "t ɪd",
1925
+ "h aʊ",
1926
+ "pʰ in",
1927
+ "ˈ st",
1928
+ "_ˈp əɹ",
1929
+ "e ɹ",
1930
+ "* !",
1931
+ "_ˈm ɪstəɹ",
1932
+ "ʃ a",
1933
+ "_ˌ ɪm",
1934
+ "ˌ θɪŋ",
1935
+ "_n eɪ",
1936
+ "_n ɥ",
1937
+ "ɑ k",
1938
+ "_ɹ u",
1939
+ "_ʃ ɯ",
1940
+ "_ðə_ˈ m",
1941
+ "de mo",
1942
+ "_d ɹ",
1943
+ "dʑ oo",
1944
+ "_st ɪɫ",
1945
+ "_p ʰiŋ",
1946
+ "ə _i",
1947
+ "_ɪkˈs p",
1948
+ "_w ɛnt",
1949
+ "ɪ ɹi",
1950
+ "əˈ m",
1951
+ "o _ka",
1952
+ "_əˈ k",
1953
+ "ɔ k",
1954
+ "_ ɥɛ",
1955
+ "_l ʊk",
1956
+ "ˈ d",
1957
+ "ka ʃi",
1958
+ "_wɪθ _ə",
1959
+ "l jɛn",
1960
+ "ɔ n",
1961
+ "_l jɛn",
1962
+ "_h ɛɫ",
1963
+ "u ɹ",
1964
+ "_tʰ oʊ",
1965
+ "_tʃʰɥ æn",
1966
+ "_s k",
1967
+ "tsʰ aɪ",
1968
+ "ɛ təɹ",
1969
+ "_m in",
1970
+ "n oʊ",
1971
+ "ʃ ɯ",
1972
+ "_θ ɹu",
1973
+ "_θ ɔt",
1974
+ "da jo",
1975
+ "w i",
1976
+ "i _ko",
1977
+ "_t ɹ",
1978
+ "_f an",
1979
+ "ɹ ɛ",
1980
+ "sa N",
1981
+ "_hi _wɑz",
1982
+ "_ ɾe",
1983
+ "_ə m",
1984
+ "te _ki",
1985
+ "_x oʊ",
1986
+ "ˈ l",
1987
+ "ˈ g",
1988
+ "ga _i",
1989
+ "_ɔn _ðə",
1990
+ "_x wa",
1991
+ "v ɪŋ",
1992
+ "m an",
1993
+ "f əɹ",
1994
+ "_ oʊn",
1995
+ "ˈ ɹ",
1996
+ "_k ɹ",
1997
+ "te _o",
1998
+ "ɪ li",
1999
+ "_ʃ ɥɛ",
2000
+ "_f əŋ",
2001
+ "æ ɫ",
2002
+ "ɑ p",
2003
+ "_ˈ ɛv",
2004
+ "eɪ ndʒ",
2005
+ "i ɫ",
2006
+ "w ət",
2007
+ "ɛ ðəɹ",
2008
+ "_f ən",
2009
+ "ɾe e",
2010
+ "_hi _hæd",
2011
+ "_maɪ t",
2012
+ "_g e",
2013
+ "æ kt",
2014
+ "ɪ ts",
2015
+ "_h ɪm",
2016
+ "_ ze",
2017
+ "i i",
2018
+ "_ N",
2019
+ "_əv _hɪz",
2020
+ "_g ɹ",
2021
+ "æn t",
2022
+ "ɪ ˌ",
2023
+ "_hɪm ˈsɛɫf",
2024
+ "wa _na",
2025
+ "aɪ əɹ",
2026
+ "dʑ anai",
2027
+ "kan a",
2028
+ "aɪ z",
2029
+ "_ɪt _ɪz",
2030
+ "ma se",
2031
+ "w ɪn",
2032
+ "ə θɪŋ",
2033
+ "_pɹ əˈ",
2034
+ "kɯ n",
2035
+ "ˈ ju",
2036
+ "_f ɔɹ",
2037
+ "p ʰi",
2038
+ "p ʰiŋ",
2039
+ "o _i",
2040
+ "v z",
2041
+ "ɔ ɪn",
2042
+ "t ʰiŋ",
2043
+ "_n e",
2044
+ "g əɹ",
2045
+ "æ ts",
2046
+ "_ˈ ɹi"
2047
+ ]
2048
+ }
2049
+ }
utils/g2p/cleaners.py CHANGED
@@ -2,7 +2,7 @@ import re
2
  from utils.g2p.japanese import japanese_to_romaji_with_accent, japanese_to_ipa, japanese_to_ipa2, japanese_to_ipa3
3
  from utils.g2p.mandarin import number_to_chinese, chinese_to_bopomofo, latin_to_bopomofo, chinese_to_romaji, chinese_to_lazy_ipa, chinese_to_ipa, chinese_to_ipa2
4
  from utils.g2p.english import english_to_lazy_ipa, english_to_ipa2, english_to_lazy_ipa2
5
-
6
  def japanese_cleaners(text):
7
  text = japanese_to_romaji_with_accent(text)
8
  text = re.sub(r'([A-Za-z])$', r'\1.', text)
@@ -20,6 +20,33 @@ def chinese_cleaners(text):
20
  return text
21
 
22
  def cje_cleaners(text):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  if text.find('[ZH]') != -1:
24
  text = re.sub(r'\[ZH\](.*?)\[ZH\]',
25
  lambda x: chinese_to_ipa(x.group(1))+' ', text)
 
2
  from utils.g2p.japanese import japanese_to_romaji_with_accent, japanese_to_ipa, japanese_to_ipa2, japanese_to_ipa3
3
  from utils.g2p.mandarin import number_to_chinese, chinese_to_bopomofo, latin_to_bopomofo, chinese_to_romaji, chinese_to_lazy_ipa, chinese_to_ipa, chinese_to_ipa2
4
  from utils.g2p.english import english_to_lazy_ipa, english_to_ipa2, english_to_lazy_ipa2
5
+ patterns = [r'\[EN\](.*?)\[EN\]', r'\[ZH\](.*?)\[ZH\]', r'\[JA\](.*?)\[JA\]']
6
  def japanese_cleaners(text):
7
  text = japanese_to_romaji_with_accent(text)
8
  text = re.sub(r'([A-Za-z])$', r'\1.', text)
 
20
  return text
21
 
22
  def cje_cleaners(text):
23
+ matches = []
24
+ for pattern in patterns:
25
+ matches.extend(re.finditer(pattern, text))
26
+
27
+ matches.sort(key=lambda x: x.start()) # Sort matches by their start positions
28
+
29
+ outputs = ""
30
+ output_langs = []
31
+
32
+ for match in matches:
33
+ text_segment = text[match.start():match.end()]
34
+ phon = clean_one(text_segment)
35
+ if "[EN]" in text_segment:
36
+ lang = 'en'
37
+ elif "[ZH]" in text_segment:
38
+ lang = 'zh'
39
+ elif "[JA]" in text_segment:
40
+ lang = 'ja'
41
+ else:
42
+ raise ValueError("If you see this error, please report this bug to issues.")
43
+ outputs += phon
44
+ output_langs += [lang] * len(phon)
45
+ assert len(outputs) == len(output_langs)
46
+ return outputs, output_langs
47
+
48
+
49
+ def clean_one(text):
50
  if text.find('[ZH]') != -1:
51
  text = re.sub(r'\[ZH\](.*?)\[ZH\]',
52
  lambda x: chinese_to_ipa(x.group(1))+' ', text)