Plachta commited on
Commit
c0d010f
1 Parent(s): f32f220

Replaced Encodec with Vocos

Browse files
app.py CHANGED
@@ -36,7 +36,7 @@ import gradio as gr
36
  from vocos import Vocos
37
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
38
 
39
- from utils.sentence_cutter import split_text_into_sentences
40
 
41
  torch._C._jit_set_profiling_executor(False)
42
  torch._C._jit_set_profiling_mode(False)
@@ -331,6 +331,7 @@ def infer_long_text(text, preset_prompt, prompt=None, language='auto', accent='n
331
  fixed-prompt: This mode will keep using the same prompt the user has provided, and generate audio sentence by sentence.
332
  sliding-window: This mode will use the last sentence as the prompt for the next sentence, but has some concern on speaker maintenance.
333
  """
 
334
  if len(text) > 1000:
335
  return "Rejected, Text too long (should be less than 1000 characters)", None
336
  mode = 'fixed-prompt'
 
36
  from vocos import Vocos
37
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
38
 
39
+
40
 
41
  torch._C._jit_set_profiling_executor(False)
42
  torch._C._jit_set_profiling_mode(False)
 
331
  fixed-prompt: This mode will keep using the same prompt the user has provided, and generate audio sentence by sentence.
332
  sliding-window: This mode will use the last sentence as the prompt for the next sentence, but has some concern on speaker maintenance.
333
  """
334
+ from utils.sentence_cutter import split_text_into_sentences
335
  if len(text) > 1000:
336
  return "Rejected, Text too long (should be less than 1000 characters)", None
337
  mode = 'fixed-prompt'
utils/g2p/english.py CHANGED
@@ -19,7 +19,6 @@ hyperparameter. Some cleaners are English-specific. You'll typically want to use
19
  import re
20
  from unidecode import unidecode
21
  import inflect
22
- import eng_to_ipa as ipa
23
  _inflect = inflect.engine()
24
  _comma_number_re = re.compile(r'([0-9][0-9\,]+[0-9])')
25
  _decimal_number_re = re.compile(r'([0-9]+\.[0-9]+)')
@@ -158,6 +157,7 @@ def mark_dark_l(text):
158
 
159
 
160
  def english_to_ipa(text):
 
161
  text = unidecode(text).lower()
162
  text = expand_abbreviations(text)
163
  text = normalize_numbers(text)
 
19
  import re
20
  from unidecode import unidecode
21
  import inflect
 
22
  _inflect = inflect.engine()
23
  _comma_number_re = re.compile(r'([0-9][0-9\,]+[0-9])')
24
  _decimal_number_re = re.compile(r'([0-9]+\.[0-9]+)')
 
157
 
158
 
159
  def english_to_ipa(text):
160
+ import eng_to_ipa as ipa
161
  text = unidecode(text).lower()
162
  text = expand_abbreviations(text)
163
  text = normalize_numbers(text)
utils/g2p/japanese.py CHANGED
@@ -1,6 +1,5 @@
1
  import re
2
  from unidecode import unidecode
3
- import pyopenjtalk
4
 
5
 
6
 
@@ -74,7 +73,7 @@ def symbols_to_japanese(text):
74
 
75
  def japanese_to_romaji_with_accent(text):
76
  '''Reference https://r9y9.github.io/ttslearn/latest/notebooks/ch10_Recipe-Tacotron.html'''
77
-
78
  text = symbols_to_japanese(text)
79
  sentences = re.split(_japanese_marks, text)
80
  marks = re.findall(_japanese_marks, text)
 
1
  import re
2
  from unidecode import unidecode
 
3
 
4
 
5
 
 
73
 
74
  def japanese_to_romaji_with_accent(text):
75
  '''Reference https://r9y9.github.io/ttslearn/latest/notebooks/ch10_Recipe-Tacotron.html'''
76
+ import pyopenjtalk
77
  text = symbols_to_japanese(text)
78
  sentences = re.split(_japanese_marks, text)
79
  marks = re.findall(_japanese_marks, text)
utils/g2p/mandarin.py CHANGED
@@ -4,7 +4,6 @@ import re
4
  import jieba
5
  import cn2an
6
  import logging
7
- from pypinyin import lazy_pinyin, BOPOMOFO
8
 
9
 
10
  # List of (Latin alphabet, bopomofo) pairs:
@@ -241,7 +240,7 @@ def number_to_chinese(text):
241
 
242
 
243
  def chinese_to_bopomofo(text):
244
-
245
  text = text.replace('、', ',').replace(';', ',').replace(':', ',')
246
  words = jieba.lcut(text, cut_all=False)
247
  text = ''
 
4
  import jieba
5
  import cn2an
6
  import logging
 
7
 
8
 
9
  # List of (Latin alphabet, bopomofo) pairs:
 
240
 
241
 
242
  def chinese_to_bopomofo(text):
243
+ from pypinyin import lazy_pinyin, BOPOMOFO
244
  text = text.replace('、', ',').replace(';', ',').replace(':', ',')
245
  words = jieba.lcut(text, cut_all=False)
246
  text = ''
utils/sentence_cutter.py CHANGED
@@ -40,16 +40,4 @@ def split_text_into_sentences(text):
40
 
41
  return sentences
42
 
43
- raise RuntimeError("It is impossible to reach here.")
44
-
45
- long_text = """
46
- This is a very long paragraph, so most TTS model is unable to handle it. Hence, we have to split it into several sentences. With the help of NLTK, we can split it into sentences. However, the punctuation is not preserved, so we have to add it back. How are we going to do write this code? Let's see.
47
- """
48
-
49
- long_text = """
50
- 现在我们要来尝试一下中文分句。因为很不幸的是,NLTK不支持中文分句。幸运的是,我们可以使用jieba来分句。但是,jieba分句后,标点符号会丢失,所以我们要手动添加回去。我现在正在想办法把这个例句写的更长更复杂一点,来测试jieba分句的性能。嗯......省略号,感觉不太好,因为省略号不是句号,所以jieba不会把它当作句子的结尾。会这样吗?我们来试试看。
51
- """
52
-
53
- long_text = """
54
- これなら、英語と中国語の分句もできる。でも、日本語はどうする?まつわ、ChatGPTに僕と教えてください。ちょーと待ってください。あ、出来た!
55
- """
 
40
 
41
  return sentences
42
 
43
+ raise RuntimeError("It is impossible to reach here.")