mrfakename commited on
Commit
efc3af1
·
verified ·
1 Parent(s): 3c7b087

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -2
app.py CHANGED
@@ -86,13 +86,19 @@ def clsynthesize(text, voice, vcsteps, embscale, alpha, beta, progress=gr.Progre
86
  raise gr.Error("You must enter some text")
87
  if len(text) > 50000:
88
  raise gr.Error("Text must be <50k characters")
 
 
89
  print("*** saying ***")
90
  print(text)
91
  print("*** end ***")
92
  texts = txtsplit(text)
93
  audios = []
 
 
 
94
  for t in progress.tqdm(texts):
95
- audios.append(styletts2importable.inference(t, styletts2importable.compute_style(voice), alpha=alpha, beta=beta, diffusion_steps=vcsteps, embedding_scale=embscale))
 
96
  return (24000, np.concatenate(audios))
97
  def ljsynthesize(text, steps, progress=gr.Progress()):
98
  # if text.strip() == "":
@@ -133,7 +139,7 @@ with gr.Blocks() as clone:
133
  clinp = gr.Textbox(label="Text", info="What would you like StyleTTS 2 to read? It works better on full sentences.", interactive=True)
134
  clvoice = gr.Audio(label="Voice", interactive=True, type='filepath', max_length=300, waveform_options={'waveform_progress_color': '#3C82F6'})
135
  vcsteps = gr.Slider(minimum=3, maximum=20, value=20, step=1, label="Diffusion Steps", info="Theoretically, higher should be better quality but slower, but we cannot notice a difference. Try with lower steps first - it is faster", interactive=True)
136
- embscale = gr.Slider(minimum=1, maximum=1.2, value=1, step=0.1, label="Embedding Scale", info="Defaults to 1", interactive=True)
137
  alpha = gr.Slider(minimum=0, maximum=1, value=0.3, step=0.1, label="Alpha", info="Defaults to 0.3", interactive=True)
138
  beta = gr.Slider(minimum=0, maximum=1, value=0.7, step=0.1, label="Beta", info="Defaults to 0.7", interactive=True)
139
  with gr.Column(scale=1):
 
86
  raise gr.Error("You must enter some text")
87
  if len(text) > 50000:
88
  raise gr.Error("Text must be <50k characters")
89
+ if embscale > 1.3 and len(text) < 20:
90
+ gr.Warning("WARNING: You entered short text, you may get static!")
91
  print("*** saying ***")
92
  print(text)
93
  print("*** end ***")
94
  texts = txtsplit(text)
95
  audios = []
96
+ # vs = styletts2importable.compute_style(voice)
97
+ vs = styletts2importable.compute_style('voices/m-us-2.wav')
98
+ # print(vs)
99
  for t in progress.tqdm(texts):
100
+ audios.append(styletts2importable.inference(t, vs, alpha=alpha, beta=beta, diffusion_steps=vcsteps, embedding_scale=embscale))
101
+ # audios.append(styletts2importable.inference(t, vs, diffusion_steps=10, alpha=0.3, beta=0.7, embedding_scale=5))
102
  return (24000, np.concatenate(audios))
103
  def ljsynthesize(text, steps, progress=gr.Progress()):
104
  # if text.strip() == "":
 
139
  clinp = gr.Textbox(label="Text", info="What would you like StyleTTS 2 to read? It works better on full sentences.", interactive=True)
140
  clvoice = gr.Audio(label="Voice", interactive=True, type='filepath', max_length=300, waveform_options={'waveform_progress_color': '#3C82F6'})
141
  vcsteps = gr.Slider(minimum=3, maximum=20, value=20, step=1, label="Diffusion Steps", info="Theoretically, higher should be better quality but slower, but we cannot notice a difference. Try with lower steps first - it is faster", interactive=True)
142
+ embscale = gr.Slider(minimum=1, maximum=2, value=1, step=0.1, label="Embedding Scale (READ WARNING BELOW)", info="Defaults to 1. WARNING: If you set this too high and generate text that's too short you will get static!", interactive=True)
143
  alpha = gr.Slider(minimum=0, maximum=1, value=0.3, step=0.1, label="Alpha", info="Defaults to 0.3", interactive=True)
144
  beta = gr.Slider(minimum=0, maximum=1, value=0.7, step=0.1, label="Beta", info="Defaults to 0.7", interactive=True)
145
  with gr.Column(scale=1):