mrfakename commited on
Commit
addff22
1 Parent(s): 5cf7b18

Gruut doesn't work

Browse files
Files changed (2) hide show
  1. app.py +4 -4
  2. styletts2importable.py +6 -18
app.py CHANGED
@@ -16,13 +16,13 @@ voices = {}
16
  # else:
17
  for v in voicelist:
18
  voices[v] = styletts2importable.compute_style(f'voices/{v}.wav')
19
- def synthesize(text, voice, use_gruut):
20
  if text.strip() == "":
21
  raise gr.Error("You must enter some text")
22
  if len(text) > 300:
23
  raise gr.Error("Text must be under 300 characters")
24
  v = voice.lower()
25
- return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=7, embedding_scale=1, use_gruut=use_gruut))
26
  def clsynthesize(text, voice):
27
  if text.strip() == "":
28
  raise gr.Error("You must enter some text")
@@ -43,11 +43,11 @@ with gr.Blocks() as vctk:
43
  with gr.Column(scale=1):
44
  inp = gr.Textbox(label="Text", info="What would you like StyleTTS 2 to read? It works better on full sentences.", interactive=True)
45
  voice = gr.Dropdown(voicelist, label="Voice", info="Select a default voice.", value='m-us-1', interactive=True)
46
- use_gruut = gr.Checkbox(label="Use alternate phonemizer (Gruut) - Experimental")
47
  with gr.Column(scale=1):
48
  btn = gr.Button("Synthesize", variant="primary")
49
  audio = gr.Audio(interactive=False, label="Synthesized Audio")
50
- btn.click(synthesize, inputs=[inp, voice, use_gruut], outputs=[audio], concurrency_limit=4)
51
  with gr.Blocks() as clone:
52
  with gr.Row():
53
  with gr.Column(scale=1):
 
16
  # else:
17
  for v in voicelist:
18
  voices[v] = styletts2importable.compute_style(f'voices/{v}.wav')
19
+ def synthesize(text, voice):
20
  if text.strip() == "":
21
  raise gr.Error("You must enter some text")
22
  if len(text) > 300:
23
  raise gr.Error("Text must be under 300 characters")
24
  v = voice.lower()
25
+ return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=7, embedding_scale=1))
26
  def clsynthesize(text, voice):
27
  if text.strip() == "":
28
  raise gr.Error("You must enter some text")
 
43
  with gr.Column(scale=1):
44
  inp = gr.Textbox(label="Text", info="What would you like StyleTTS 2 to read? It works better on full sentences.", interactive=True)
45
  voice = gr.Dropdown(voicelist, label="Voice", info="Select a default voice.", value='m-us-1', interactive=True)
46
+ # use_gruut = gr.Checkbox(label="Use alternate phonemizer (Gruut) - Experimental")
47
  with gr.Column(scale=1):
48
  btn = gr.Button("Synthesize", variant="primary")
49
  audio = gr.Audio(interactive=False, label="Synthesized Audio")
50
+ btn.click(synthesize, inputs=[inp, voice], outputs=[audio], concurrency_limit=4)
51
  with gr.Blocks() as clone:
52
  with gr.Row():
53
  with gr.Column(scale=1):
styletts2importable.py CHANGED
@@ -1,6 +1,6 @@
1
  from cached_path import cached_path
2
- print("GRUUT")
3
- from gruut_phonemize import gphonemize
4
 
5
  # from dp.phonemizer import Phonemizer
6
  print("NLTK")
@@ -135,10 +135,7 @@ sampler = DiffusionSampler(
135
 
136
  def inference(text, ref_s, alpha = 0.3, beta = 0.7, diffusion_steps=5, embedding_scale=1, use_gruut=False):
137
  text = text.strip()
138
- if use_gruut:
139
- ps = gphonemize(text)
140
- else:
141
- ps = global_phonemizer.phonemize([text])
142
  ps = word_tokenize(ps[0])
143
  ps = ' '.join(ps)
144
  tokens = textclenaer(ps)
@@ -207,10 +204,7 @@ def inference(text, ref_s, alpha = 0.3, beta = 0.7, diffusion_steps=5, embedding
207
 
208
  def LFinference(text, s_prev, ref_s, alpha = 0.3, beta = 0.7, t = 0.7, diffusion_steps=5, embedding_scale=1, use_gruut=False):
209
  text = text.strip()
210
- if use_gruut:
211
- ps = gphonemize(text)
212
- else:
213
- ps = global_phonemizer.phonemize([text])
214
  ps = word_tokenize(ps[0])
215
  ps = ' '.join(ps)
216
  ps = ps.replace('``', '"')
@@ -287,10 +281,7 @@ def LFinference(text, s_prev, ref_s, alpha = 0.3, beta = 0.7, t = 0.7, diffusion
287
 
288
  def STinference(text, ref_s, ref_text, alpha = 0.3, beta = 0.7, diffusion_steps=5, embedding_scale=1, use_gruut=False):
289
  text = text.strip()
290
- if use_gruut:
291
- ps = gphonemize(text)
292
- else:
293
- ps = global_phonemizer.phonemize([text])
294
  ps = word_tokenize(ps[0])
295
  ps = ' '.join(ps)
296
 
@@ -299,10 +290,7 @@ def STinference(text, ref_s, ref_text, alpha = 0.3, beta = 0.7, diffusion_steps=
299
  tokens = torch.LongTensor(tokens).to(device).unsqueeze(0)
300
 
301
  ref_text = ref_text.strip()
302
- if use_gruut:
303
- ps = gphonemize(text)
304
- else:
305
- ps = global_phonemizer.phonemize([ref_text])
306
  ps = word_tokenize(ps[0])
307
  ps = ' '.join(ps)
308
 
 
1
  from cached_path import cached_path
2
+ # print("GRUUT")
3
+ # from gruut_phonemize import gphonemize
4
 
5
  # from dp.phonemizer import Phonemizer
6
  print("NLTK")
 
135
 
136
  def inference(text, ref_s, alpha = 0.3, beta = 0.7, diffusion_steps=5, embedding_scale=1, use_gruut=False):
137
  text = text.strip()
138
+ ps = global_phonemizer.phonemize([text])
 
 
 
139
  ps = word_tokenize(ps[0])
140
  ps = ' '.join(ps)
141
  tokens = textclenaer(ps)
 
204
 
205
  def LFinference(text, s_prev, ref_s, alpha = 0.3, beta = 0.7, t = 0.7, diffusion_steps=5, embedding_scale=1, use_gruut=False):
206
  text = text.strip()
207
+ ps = global_phonemizer.phonemize([text])
 
 
 
208
  ps = word_tokenize(ps[0])
209
  ps = ' '.join(ps)
210
  ps = ps.replace('``', '"')
 
281
 
282
  def STinference(text, ref_s, ref_text, alpha = 0.3, beta = 0.7, diffusion_steps=5, embedding_scale=1, use_gruut=False):
283
  text = text.strip()
284
+ ps = global_phonemizer.phonemize([text])
 
 
 
285
  ps = word_tokenize(ps[0])
286
  ps = ' '.join(ps)
287
 
 
290
  tokens = torch.LongTensor(tokens).to(device).unsqueeze(0)
291
 
292
  ref_text = ref_text.strip()
293
+ ps = global_phonemizer.phonemize([ref_text])
 
 
 
294
  ps = word_tokenize(ps[0])
295
  ps = ' '.join(ps)
296