Respair commited on
Commit
43c5871
1 Parent(s): 4028449

Update ljspeechimportable.py

Browse files
Files changed (1) hide show
  1. ljspeechimportable.py +585 -18
ljspeechimportable.py CHANGED
@@ -67,11 +67,575 @@ def compute_style(ref_dicts):
67
  return reference_embeddings
68
 
69
  # load phonemizer
70
- import phonemizer
71
- global_phonemizer = phonemizer.backend.EspeakBackend(language='en-us', preserve_punctuation=True, with_stress=True, words_mismatch='ignore')
72
 
73
  # phonemizer = Phonemizer.from_checkpoint(str(cached_path('https://public-asai-dl-models.s3.eu-central-1.amazonaws.com/DeepPhonemizer/en_us_cmudict_ipa_forward.pt')))
74
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
  config = yaml.safe_load(open(str(cached_path('hf://yl4579/StyleTTS2-LJSpeech/Models/LJSpeech/config.yml'))))
77
 
@@ -86,7 +650,7 @@ pitch_extractor = load_F0_models(F0_path)
86
 
87
  # load BERT model
88
  from Utils.PLBERT.util import load_plbert
89
- BERT_path = config.get('PLBERT_dir', False)
90
  plbert = load_plbert(BERT_path)
91
 
92
  model = build_model(recursive_munch(config['model_params']), text_aligner, pitch_extractor, plbert)
@@ -94,7 +658,7 @@ _ = [model[key].eval() for key in model]
94
  _ = [model[key].to(device) for key in model]
95
 
96
  # params_whole = torch.load("Models/LJSpeech/epoch_2nd_00100.pth", map_location='cpu')
97
- params_whole = torch.load(str(cached_path('hf://yl4579/StyleTTS2-LJSpeech/Models/LJSpeech/epoch_2nd_00100.pth')), map_location='cpu')
98
  params = params_whole['net']
99
 
100
  for key in model:
@@ -125,13 +689,15 @@ sampler = DiffusionSampler(
125
  )
126
 
127
  def inference(text, noise, diffusion_steps=5, embedding_scale=1):
128
- text = text.strip()
129
- text = text.replace('"', '')
130
- ps = global_phonemizer.phonemize([text])
131
- ps = word_tokenize(ps[0])
132
- ps = ' '.join(ps)
 
 
133
 
134
- tokens = textclenaer(ps)
135
  tokens.insert(0, 0)
136
  tokens = torch.LongTensor(tokens).to(device).unsqueeze(0)
137
 
@@ -174,13 +740,14 @@ def inference(text, noise, diffusion_steps=5, embedding_scale=1):
174
  return out.squeeze().cpu().numpy()
175
 
176
  def LFinference(text, s_prev, noise, alpha=0.7, diffusion_steps=5, embedding_scale=1):
177
- text = text.strip()
178
- text = text.replace('"', '')
179
- ps = global_phonemizer.phonemize([text])
180
- ps = word_tokenize(ps[0])
181
- ps = ' '.join(ps)
182
-
183
- tokens = textclenaer(ps)
 
184
  tokens.insert(0, 0)
185
  tokens = torch.LongTensor(tokens).to(device).unsqueeze(0)
186
 
 
67
  return reference_embeddings
68
 
69
  # load phonemizer
70
+ # import phonemizer
71
+ # global_phonemizer = phonemizer.backend.EspeakBackend(language='en-us', preserve_punctuation=True, with_stress=True, words_mismatch='ignore')
72
 
73
  # phonemizer = Phonemizer.from_checkpoint(str(cached_path('https://public-asai-dl-models.s3.eu-central-1.amazonaws.com/DeepPhonemizer/en_us_cmudict_ipa_forward.pt')))
74
+ import fugashi
75
+ import pykakasi
76
+ from collections import OrderedDict
77
+
78
+
79
+ # MB-iSTFT-VITS2
80
+
81
+ import re
82
+ from unidecode import unidecode
83
+ import pyopenjtalk
84
+
85
+
86
+ # Regular expression matching Japanese without punctuation marks:
87
+ _japanese_characters = re.compile(
88
+ r'[A-Za-z\d\u3005\u3040-\u30ff\u4e00-\u9fff\uff11-\uff19\uff21-\uff3a\uff41-\uff5a\uff66-\uff9d]')
89
+
90
+ # Regular expression matching non-Japanese characters or punctuation marks:
91
+ _japanese_marks = re.compile(
92
+ r'[^A-Za-z\d\u3005\u3040-\u30ff\u4e00-\u9fff\uff11-\uff19\uff21-\uff3a\uff41-\uff5a\uff66-\uff9d]')
93
+
94
+ # List of (symbol, Japanese) pairs for marks:
95
+ _symbols_to_japanese = [(re.compile('%s' % x[0]), x[1]) for x in [
96
+ ('%', 'パーセント')
97
+ ]]
98
+
99
+ # List of (romaji, ipa) pairs for marks:
100
+ _romaji_to_ipa = [(re.compile('%s' % x[0]), x[1]) for x in [
101
+ ('ts', 'ʦ'),
102
+ ('u', 'ɯ'),
103
+ ('j', 'ʥ'),
104
+ ('y', 'j'),
105
+ ('ni', 'n^i'),
106
+ ('nj', 'n^'),
107
+ ('hi', 'çi'),
108
+ ('hj', 'ç'),
109
+ ('f', 'ɸ'),
110
+ ('I', 'i*'),
111
+ ('U', 'ɯ*'),
112
+ ('r', 'ɾ')
113
+ ]]
114
+
115
+ # List of (romaji, ipa2) pairs for marks:
116
+ _romaji_to_ipa2 = [(re.compile('%s' % x[0]), x[1]) for x in [
117
+ ('u', 'ɯ'),
118
+ ('ʧ', 'tʃ'),
119
+ ('j', 'dʑ'),
120
+ ('y', 'j'),
121
+ ('ni', 'n^i'),
122
+ ('nj', 'n^'),
123
+ ('hi', 'çi'),
124
+ ('hj', 'ç'),
125
+ ('f', 'ɸ'),
126
+ ('I', 'i*'),
127
+ ('U', 'ɯ*'),
128
+ ('r', 'ɾ')
129
+ ]]
130
+
131
+ # List of (consonant, sokuon) pairs:
132
+ _real_sokuon = [(re.compile('%s' % x[0]), x[1]) for x in [
133
+ (r'Q([↑↓]*[kg])', r'k#\1'),
134
+ (r'Q([↑↓]*[tdjʧ])', r't#\1'),
135
+ (r'Q([↑↓]*[sʃ])', r's\1'),
136
+ (r'Q([↑↓]*[pb])', r'p#\1')
137
+ ]]
138
+
139
+ # List of (consonant, hatsuon) pairs:
140
+ _real_hatsuon = [(re.compile('%s' % x[0]), x[1]) for x in [
141
+ (r'N([↑↓]*[pbm])', r'm\1'),
142
+ (r'N([↑↓]*[ʧʥj])', r'n^\1'),
143
+ (r'N([↑↓]*[tdn])', r'n\1'),
144
+ (r'N([↑↓]*[kg])', r'ŋ\1')
145
+ ]]
146
+
147
+
148
+ def symbols_to_japanese(text):
149
+ for regex, replacement in _symbols_to_japanese:
150
+ text = re.sub(regex, replacement, text)
151
+ return text
152
+
153
+
154
+ def japanese_to_romaji_with_accent(text):
155
+ '''Reference https://r9y9.github.io/ttslearn/latest/notebooks/ch10_Recipe-Tacotron.html'''
156
+ text = symbols_to_japanese(text)
157
+ sentences = re.split(_japanese_marks, text)
158
+ marks = re.findall(_japanese_marks, text)
159
+ text = ''
160
+ for i, sentence in enumerate(sentences):
161
+ if re.match(_japanese_characters, sentence):
162
+ if text != '':
163
+ text += ' '
164
+ labels = pyopenjtalk.extract_fullcontext(sentence)
165
+ for n, label in enumerate(labels):
166
+ phoneme = re.search(r'\-([^\+]*)\+', label).group(1)
167
+ if phoneme not in ['sil', 'pau']:
168
+ text += phoneme.replace('ch', 'ʧ').replace('sh',
169
+ 'ʃ').replace('cl', 'Q')
170
+ else:
171
+ continue
172
+ # n_moras = int(re.search(r'/F:(\d+)_', label).group(1))
173
+ a1 = int(re.search(r"/A:(\-?[0-9]+)\+", label).group(1))
174
+ a2 = int(re.search(r"\+(\d+)\+", label).group(1))
175
+ a3 = int(re.search(r"\+(\d+)/", label).group(1))
176
+ if re.search(r'\-([^\+]*)\+', labels[n + 1]).group(1) in ['sil', 'pau']:
177
+ a2_next = -1
178
+ else:
179
+ a2_next = int(
180
+ re.search(r"\+(\d+)\+", labels[n + 1]).group(1))
181
+ # Accent phrase boundary
182
+ if a3 == 1 and a2_next == 1:
183
+ text += ' '
184
+ # Falling
185
+ elif a1 == 0 and a2_next == a2 + 1:
186
+ text += '↓'
187
+ # Rising
188
+ elif a2 == 1 and a2_next == 2:
189
+ text += '↑'
190
+ if i < len(marks):
191
+ text += unidecode(marks[i]).replace(' ', '')
192
+ return text
193
+
194
+
195
+ def get_real_sokuon(text):
196
+ for regex, replacement in _real_sokuon:
197
+ text = re.sub(regex, replacement, text)
198
+ return text
199
+
200
+
201
+ def get_real_hatsuon(text):
202
+ for regex, replacement in _real_hatsuon:
203
+ text = re.sub(regex, replacement, text)
204
+ return text
205
+
206
+
207
+ def japanese_to_ipa(text):
208
+ text = japanese_to_romaji_with_accent(text).replace('...', '…')
209
+ text = re.sub(
210
+ r'([aiueo])\1+', lambda x: x.group(0)[0]+'ː'*(len(x.group(0))-1), text)
211
+ text = get_real_sokuon(text)
212
+ text = get_real_hatsuon(text)
213
+ for regex, replacement in _romaji_to_ipa:
214
+ text = re.sub(regex, replacement, text)
215
+ return text
216
+
217
+
218
+ def japanese_to_ipa2(text):
219
+ text = japanese_to_romaji_with_accent(text).replace('...', '…')
220
+ text = get_real_sokuon(text)
221
+ text = get_real_hatsuon(text)
222
+ for regex, replacement in _romaji_to_ipa2:
223
+ text = re.sub(regex, replacement, text)
224
+ return text
225
+
226
+
227
+ def japanese_to_ipa3(text):
228
+ text = japanese_to_ipa2(text).replace('n^', 'ȵ').replace(
229
+ 'ʃ', 'ɕ').replace('*', '\u0325').replace('#', '\u031a')
230
+ text = re.sub(
231
+ r'([aiɯeo])\1+', lambda x: x.group(0)[0]+'ː'*(len(x.group(0))-1), text)
232
+ text = re.sub(r'((?:^|\s)(?:ts|tɕ|[kpt]))', r'\1ʰ', text)
233
+ return text
234
+
235
+
236
+ """ from https://github.com/keithito/tacotron """
237
+
238
+ '''
239
+ Cleaners are transformations that run over the input text at both training and eval time.
240
+
241
+ Cleaners can be selected by passing a comma-delimited list of cleaner names as the "cleaners"
242
+ hyperparameter. Some cleaners are English-specific. You'll typically want to use:
243
+ 1. "english_cleaners" for English text
244
+ 2. "transliteration_cleaners" for non-English text that can be transliterated to ASCII using
245
+ the Unidecode library (https://pypi.python.org/pypi/Unidecode)
246
+ 3. "basic_cleaners" if you do not want to transliterate (in this case, you should also update
247
+ the symbols in symbols.py to match your data).
248
+ '''
249
+
250
+
251
+ # Regular expression matching whitespace:
252
+
253
+
254
+ import re
255
+ import inflect
256
+ from unidecode import unidecode
257
+
258
+ _inflect = inflect.engine()
259
+ _comma_number_re = re.compile(r'([0-9][0-9\,]+[0-9])')
260
+ _decimal_number_re = re.compile(r'([0-9]+\.[0-9]+)')
261
+ _pounds_re = re.compile(r'£([0-9\,]*[0-9]+)')
262
+ _dollars_re = re.compile(r'\$([0-9\.\,]*[0-9]+)')
263
+ _ordinal_re = re.compile(r'[0-9]+(st|nd|rd|th)')
264
+ _number_re = re.compile(r'[0-9]+')
265
+
266
+ # List of (regular expression, replacement) pairs for abbreviations:
267
+ _abbreviations = [(re.compile('\\b%s\\.' % x[0], re.IGNORECASE), x[1]) for x in [
268
+ ('mrs', 'misess'),
269
+ ('mr', 'mister'),
270
+ ('dr', 'doctor'),
271
+ ('st', 'saint'),
272
+ ('co', 'company'),
273
+ ('jr', 'junior'),
274
+ ('maj', 'major'),
275
+ ('gen', 'general'),
276
+ ('drs', 'doctors'),
277
+ ('rev', 'reverend'),
278
+ ('lt', 'lieutenant'),
279
+ ('hon', 'honorable'),
280
+ ('sgt', 'sergeant'),
281
+ ('capt', 'captain'),
282
+ ('esq', 'esquire'),
283
+ ('ltd', 'limited'),
284
+ ('col', 'colonel'),
285
+ ('ft', 'fort'),
286
+ ]]
287
+
288
+
289
+ # List of (ipa, lazy ipa) pairs:
290
+ _lazy_ipa = [(re.compile('%s' % x[0]), x[1]) for x in [
291
+ ('r', 'ɹ'),
292
+ ('æ', 'e'),
293
+ ('ɑ', 'a'),
294
+ ('ɔ', 'o'),
295
+ ('ð', 'z'),
296
+ ('θ', 's'),
297
+ ('ɛ', 'e'),
298
+ ('ɪ', 'i'),
299
+ ('ʊ', 'u'),
300
+ ('ʒ', 'ʥ'),
301
+ ('ʤ', 'ʥ'),
302
+ ('', '↓'),
303
+ ]]
304
+
305
+ # List of (ipa, lazy ipa2) pairs:
306
+ _lazy_ipa2 = [(re.compile('%s' % x[0]), x[1]) for x in [
307
+ ('r', 'ɹ'),
308
+ ('ð', 'z'),
309
+ ('θ', 's'),
310
+ ('ʒ', 'ʑ'),
311
+ ('ʤ', 'dʑ'),
312
+ ('', '↓'),
313
+ ]]
314
+
315
+ # List of (ipa, ipa2) pairs
316
+ _ipa_to_ipa2 = [(re.compile('%s' % x[0]), x[1]) for x in [
317
+ ('r', 'ɹ'),
318
+ ('ʤ', 'dʒ'),
319
+ ('ʧ', 'tʃ')
320
+ ]]
321
+
322
+
323
+ def expand_abbreviations(text):
324
+ for regex, replacement in _abbreviations:
325
+ text = re.sub(regex, replacement, text)
326
+ return text
327
+
328
+
329
+ def collapse_whitespace(text):
330
+ return re.sub(r'\s+', ' ', text)
331
+
332
+
333
+ def _remove_commas(m):
334
+ return m.group(1).replace(',', '')
335
+
336
+
337
+ def _expand_decimal_point(m):
338
+ return m.group(1).replace('.', ' point ')
339
+
340
+
341
+ def _expand_dollars(m):
342
+ match = m.group(1)
343
+ parts = match.split('.')
344
+ if len(parts) > 2:
345
+ return match + ' dollars' # Unexpected format
346
+ dollars = int(parts[0]) if parts[0] else 0
347
+ cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0
348
+ if dollars and cents:
349
+ dollar_unit = 'dollar' if dollars == 1 else 'dollars'
350
+ cent_unit = 'cent' if cents == 1 else 'cents'
351
+ return '%s %s, %s %s' % (dollars, dollar_unit, cents, cent_unit)
352
+ elif dollars:
353
+ dollar_unit = 'dollar' if dollars == 1 else 'dollars'
354
+ return '%s %s' % (dollars, dollar_unit)
355
+ elif cents:
356
+ cent_unit = 'cent' if cents == 1 else 'cents'
357
+ return '%s %s' % (cents, cent_unit)
358
+ else:
359
+ return 'zero dollars'
360
+
361
+
362
+ def _expand_ordinal(m):
363
+ return _inflect.number_to_words(m.group(0))
364
+
365
+
366
+ def _expand_number(m):
367
+ num = int(m.group(0))
368
+ if num > 1000 and num < 3000:
369
+ if num == 2000:
370
+ return 'two thousand'
371
+ elif num > 2000 and num < 2010:
372
+ return 'two thousand ' + _inflect.number_to_words(num % 100)
373
+ elif num % 100 == 0:
374
+ return _inflect.number_to_words(num // 100) + ' hundred'
375
+ else:
376
+ return _inflect.number_to_words(num, andword='', zero='oh', group=2).replace(', ', ' ')
377
+ else:
378
+ return _inflect.number_to_words(num, andword='')
379
+
380
+
381
+ def normalize_numbers(text):
382
+ text = re.sub(_comma_number_re, _remove_commas, text)
383
+ text = re.sub(_pounds_re, r'\1 pounds', text)
384
+ text = re.sub(_dollars_re, _expand_dollars, text)
385
+ text = re.sub(_decimal_number_re, _expand_decimal_point, text)
386
+ text = re.sub(_ordinal_re, _expand_ordinal, text)
387
+ text = re.sub(_number_re, _expand_number, text)
388
+ return text
389
+
390
+
391
+ def mark_dark_l(text):
392
+ return re.sub(r'l([^aeiouæɑɔəɛɪʊ ]*(?: |$))', lambda x: 'ɫ'+x.group(1), text)
393
+
394
+
395
+ import re
396
+ #from text.thai import num_to_thai, latin_to_thai
397
+ #from text.shanghainese import shanghainese_to_ipa
398
+ #from text.cantonese import cantonese_to_ipa
399
+ #from text.ngu_dialect import ngu_dialect_to_ipa
400
+ from unidecode import unidecode
401
+
402
+
403
+ _whitespace_re = re.compile(r'\s+')
404
+
405
+ # Regular expression matching Japanese without punctuation marks:
406
+ _japanese_characters = re.compile(r'[A-Za-z\d\u3005\u3040-\u30ff\u4e00-\u9fff\uff11-\uff19\uff21-\uff3a\uff41-\uff5a\uff66-\uff9d]')
407
+
408
+ # Regular expression matching non-Japanese characters or punctuation marks:
409
+ _japanese_marks = re.compile(r'[^A-Za-z\d\u3005\u3040-\u30ff\u4e00-\u9fff\uff11-\uff19\uff21-\uff3a\uff41-\uff5a\uff66-\uff9d]')
410
+
411
+ # List of (regular expression, replacement) pairs for abbreviations:
412
+ _abbreviations = [(re.compile('\\b%s\\.' % x[0], re.IGNORECASE), x[1]) for x in [
413
+ ('mrs', 'misess'),
414
+ ('mr', 'mister'),
415
+ ('dr', 'doctor'),
416
+ ('st', 'saint'),
417
+ ('co', 'company'),
418
+ ('jr', 'junior'),
419
+ ('maj', 'major'),
420
+ ('gen', 'general'),
421
+ ('drs', 'doctors'),
422
+ ('rev', 'reverend'),
423
+ ('lt', 'lieutenant'),
424
+ ('hon', 'honorable'),
425
+ ('sgt', 'sergeant'),
426
+ ('capt', 'captain'),
427
+ ('esq', 'esquire'),
428
+ ('ltd', 'limited'),
429
+ ('col', 'colonel'),
430
+ ('ft', 'fort'),
431
+ ]]
432
+
433
+
434
+ def expand_abbreviations(text):
435
+ for regex, replacement in _abbreviations:
436
+ text = re.sub(regex, replacement, text)
437
+ return text
438
+
439
+ def collapse_whitespace(text):
440
+ return re.sub(_whitespace_re, ' ', text)
441
+
442
+
443
+ def convert_to_ascii(text):
444
+ return unidecode(text)
445
+
446
+
447
+ def basic_cleaners(text):
448
+ # - For replication of https://github.com/FENRlR/MB-iSTFT-VITS2/issues/2
449
+ # you may need to replace the symbol to Russian one
450
+ '''Basic pipeline that lowercases and collapses whitespace without transliteration.'''
451
+ text = text.lower()
452
+ text = collapse_whitespace(text)
453
+ return text
454
+
455
+ '''
456
+ def fix_g2pk2_error(text):
457
+ new_text = ""
458
+ i = 0
459
+ while i < len(text) - 4:
460
+ if (text[i:i+3] == 'ㅇㅡㄹ' or text[i:i+3] == 'ㄹㅡㄹ') and text[i+3] == ' ' and text[i+4] == 'ㄹ':
461
+ new_text += text[i:i+3] + ' ' + 'ㄴ'
462
+ i += 5
463
+ else:
464
+ new_text += text[i]
465
+ i += 1
466
+
467
+ new_text += text[i:]
468
+ return new_text
469
+ '''
470
+
471
+
472
+
473
+ def japanese_cleaners(text):
474
+ text = japanese_to_romaji_with_accent(text)
475
+ text = re.sub(r'([A-Za-z])$', r'\1.', text)
476
+ return text
477
+
478
+
479
+ def japanese_cleaners2(text):
480
+ return japanese_cleaners(text).replace('ts', 'ʦ').replace('...', '…')
481
+
482
+ def japanese_cleaners3(text):
483
+ text = japanese_to_ipa3(text)
484
+ if "<<" in text or ">>" in text or "¡" in text or "¿" in text:
485
+ text = text.replace("<<","«")
486
+ text = text.replace(">>","»")
487
+ text = text.replace("!","¡")
488
+ text = text.replace("?","¿")
489
+
490
+ if'"'in text:
491
+ text = text.replace('"','”')
492
+
493
+ if'--'in text:
494
+ text = text.replace('--','—')
495
+ if ' ' in text:
496
+ text = text.replace(' ','')
497
+ return text
498
+
499
+
500
+
501
+ # ------------------------------
502
+ ''' cjke type cleaners below '''
503
+ #- text for these cleaners must be labeled first
504
+ # ex1 (single) : some.wav|[EN]put some text here[EN]
505
+ # ex2 (multi) : some.wav|0|[EN]put some text here[EN]
506
+ # ------------------------------
507
+
508
+
509
+ def kej_cleaners(text):
510
+ text = re.sub(r'\[KO\](.*?)\[KO\]',
511
+ lambda x: korean_to_ipa(x.group(1))+' ', text)
512
+ text = re.sub(r'\[EN\](.*?)\[EN\]',
513
+ lambda x: english_to_ipa2(x.group(1)) + ' ', text)
514
+ text = re.sub(r'\[JA\](.*?)\[JA\]',
515
+ lambda x: japanese_to_ipa2(x.group(1)) + ' ', text)
516
+ text = re.sub(r'\s+$', '', text)
517
+ text = re.sub(r'([^\.,!\?\-…~])$', r'\1.', text)
518
+ return text
519
+
520
+
521
+ def cjks_cleaners(text):
522
+ text = re.sub(r'\[JA\](.*?)\[JA\]',
523
+ lambda x: japanese_to_ipa(x.group(1))+' ', text)
524
+ #text = re.sub(r'\[SA\](.*?)\[SA\]',
525
+ # lambda x: devanagari_to_ipa(x.group(1))+' ', text)
526
+ text = re.sub(r'\[EN\](.*?)\[EN\]',
527
+ lambda x: english_to_lazy_ipa(x.group(1))+' ', text)
528
+ text = re.sub(r'\s+$', '', text)
529
+ text = re.sub(r'([^\.,!\?\-…~])$', r'\1.', text)
530
+ return text
531
+
532
+ '''
533
+ #- reserves
534
+
535
+ def thai_cleaners(text):
536
+ text = num_to_thai(text)
537
+ text = latin_to_thai(text)
538
+ return text
539
+
540
+
541
+ def shanghainese_cleaners(text):
542
+ text = shanghainese_to_ipa(text)
543
+ text = re.sub(r'([^\.,!\?\-…~])$', r'\1.', text)
544
+ return text
545
+
546
+
547
+ def chinese_dialect_cleaners(text):
548
+ text = re.sub(r'\[ZH\](.*?)\[ZH\]',
549
+ lambda x: chinese_to_ipa2(x.group(1))+' ', text)
550
+ text = re.sub(r'\[JA\](.*?)\[JA\]',
551
+ lambda x: japanese_to_ipa3(x.group(1)).replace('Q', 'ʔ')+' ', text)
552
+ text = re.sub(r'\[SH\](.*?)\[SH\]', lambda x: shanghainese_to_ipa(x.group(1)).replace('1', '˥˧').replace('5',
553
+ '˧˧˦').replace('6', '˩˩˧').replace('7', '˥').replace('8', '˩˨').replace('ᴀ', 'ɐ').replace('ᴇ', 'e')+' ', text)
554
+ text = re.sub(r'\[GD\](.*?)\[GD\]',
555
+ lambda x: cantonese_to_ipa(x.group(1))+' ', text)
556
+ text = re.sub(r'\[EN\](.*?)\[EN\]',
557
+ lambda x: english_to_lazy_ipa2(x.group(1))+' ', text)
558
+ text = re.sub(r'\[([A-Z]{2})\](.*?)\[\1\]', lambda x: ngu_dialect_to_ipa(x.group(2), x.group(
559
+ 1)).replace('ʣ', 'dz').replace('ʥ', 'dʑ').replace('ʦ', 'ts').replace('ʨ', 'tɕ')+' ', text)
560
+ text = re.sub(r'\s+$', '', text)
561
+ text = re.sub(r'([^\.,!\?\-…~])$', r'\1.', text)
562
+ return text
563
+ '''
564
+ def japanese_cleaners3(text):
565
+
566
+ global orig
567
+
568
+ orig = text # saving the original unmodifed text for future use
569
+
570
+ text = japanese_to_ipa2(text)
571
+
572
+ if '' in text:
573
+ text = text.replace('','')
574
+ if "<<" in text or ">>" in text or "¡" in text or "¿" in text:
575
+ text = text.replace("<<","«")
576
+ text = text.replace(">>","»")
577
+ text = text.replace("!","¡")
578
+ text = text.replace("?","¿")
579
+
580
+ if'"'in text:
581
+ text = text.replace('"','”')
582
+
583
+ if'--'in text:
584
+ text = text.replace('--','—')
585
+
586
+ text = text.replace("#","ʔ")
587
+ text = text.replace("^","")
588
+
589
+ text = text.replace("kj","kʲ")
590
+ text = text.replace("kj","kʲ")
591
+ text = text.replace("ɾj","ɾʲ")
592
+
593
+ text = text.replace("mj","mʲ")
594
+ text = text.replace("ʃ","ɕ")
595
+ text = text.replace("*","")
596
+ text = text.replace("bj","bʲ")
597
+ text = text.replace("h","ç")
598
+ text = text.replace("gj","gʲ")
599
+
600
+
601
+ return text
602
+
603
+ def japanese_cleaners4(text):
604
+
605
+ text = japanese_cleaners3(text)
606
+
607
+ if "にゃ" in orig:
608
+ text = text.replace("na","nʲa")
609
+
610
+ elif "にゅ" in orig:
611
+ text = text.replace("n","nʲ")
612
+
613
+ elif "にょ" in orig:
614
+ text = text.replace("n","nʲ")
615
+ elif "にぃ" in orig:
616
+ text = text.replace("ni i","niː")
617
+
618
+ elif "いゃ" in orig:
619
+ text = text.replace("i↑ja","ja")
620
+
621
+ elif "いゃ" in orig:
622
+ text = text.replace("i↑ja","ja")
623
+
624
+ elif "ひょ" in orig:
625
+ text = text.replace("ço","çʲo")
626
+
627
+ elif "しょ" in orig:
628
+ text = text.replace("ɕo","ɕʲo")
629
+
630
+
631
+ text = text.replace("Q","ʔ")
632
+ text = text.replace("N","ɴ")
633
+
634
+ text = re.sub(r'.ʔ', 'ʔ', text)
635
+ text = text.replace('" ', '"')
636
+ text = text.replace('” ', '”')
637
+
638
+ return text
639
 
640
  config = yaml.safe_load(open(str(cached_path('hf://yl4579/StyleTTS2-LJSpeech/Models/LJSpeech/config.yml'))))
641
 
 
650
 
651
  # load BERT model
652
  from Utils.PLBERT.util import load_plbert
653
+ BERT_path = "Utils/PLBERT/step_1040000.t7"
654
  plbert = load_plbert(BERT_path)
655
 
656
  model = build_model(recursive_munch(config['model_params']), text_aligner, pitch_extractor, plbert)
 
658
  _ = [model[key].to(device) for key in model]
659
 
660
  # params_whole = torch.load("Models/LJSpeech/epoch_2nd_00100.pth", map_location='cpu')
661
+ params_whole = torch.load("Models/Kaede.pth"), map_location='cpu')
662
  params = params_whole['net']
663
 
664
  for key in model:
 
689
  )
690
 
691
  def inference(text, noise, diffusion_steps=5, embedding_scale=1):
692
+ # text = text.strip()
693
+ # text = text.replace('"', '')
694
+ # ps = global_phonemizer.phonemize([text])
695
+ # ps = word_tokenize(ps[0])
696
+ # ps = ' '.join(ps)
697
+
698
+ text = japanese_cleaners4(text)
699
 
700
+ tokens = textclenaer(text)
701
  tokens.insert(0, 0)
702
  tokens = torch.LongTensor(tokens).to(device).unsqueeze(0)
703
 
 
740
  return out.squeeze().cpu().numpy()
741
 
742
  def LFinference(text, s_prev, noise, alpha=0.7, diffusion_steps=5, embedding_scale=1):
743
+ # text = text.strip()
744
+ # text = text.replace('"', '')
745
+ # ps = global_phonemizer.phonemize([text])
746
+ # ps = word_tokenize(ps[0])
747
+ # ps = ' '.join(ps)
748
+ text = japanese_cleaners4(text)
749
+
750
+ tokens = textclenaer(text)
751
  tokens.insert(0, 0)
752
  tokens = torch.LongTensor(tokens).to(device).unsqueeze(0)
753