Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -39,7 +39,7 @@ theme = gr.themes.Base(
|
|
39 |
|
40 |
from Modules.diffusion.sampler import DiffusionSampler, ADPM2Sampler, KarrasSchedule
|
41 |
|
42 |
-
voicelist = ['
|
43 |
voices = {}
|
44 |
# import phonemizer
|
45 |
# global_phonemizer = phonemizer.backend.EspeakBackend(language='en-us', preserve_punctuation=True, with_stress=True)
|
@@ -73,7 +73,7 @@ def synthesize(text, voice, lngsteps, password, progress=gr.Progress()):
|
|
73 |
audios = []
|
74 |
for t in progress.tqdm(texts):
|
75 |
print(t)
|
76 |
-
audios.append(styletts2importable.inference(t, voices[v], alpha=0.3, beta=0.
|
77 |
return (24000, np.concatenate(audios))
|
78 |
# def longsynthesize(text, voice, lngsteps, password, progress=gr.Progress()):
|
79 |
# if password == os.environ['ACCESS_CODE']:
|
@@ -165,19 +165,35 @@ def ljsynthesize(text, steps,embscale, progress=gr.Progress()):
|
|
165 |
return (24000, np.concatenate(audios))
|
166 |
|
167 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
with gr.Blocks() as vctk:
|
169 |
with gr.Row():
|
170 |
with gr.Column(scale=1):
|
171 |
-
|
172 |
-
voice = gr.Dropdown(voicelist, label="Voice", info="Select a default voice.", interactive=True)
|
173 |
-
|
174 |
-
|
175 |
-
alpha = gr.Slider(minimum=0, maximum=1, value=0.3, step=0.1, label="Alpha", interactive=True)
|
176 |
-
beta = gr.Slider(minimum=0, maximum=1, value=0.4, step=0.1, label="Beta", interactive=True)
|
177 |
with gr.Column(scale=1):
|
178 |
-
|
179 |
-
|
180 |
-
|
|
|
|
|
|
|
|
|
|
|
181 |
# with gr.Blocks() as clone:
|
182 |
# with gr.Row():
|
183 |
# with gr.Column(scale=1):
|
|
|
39 |
|
40 |
from Modules.diffusion.sampler import DiffusionSampler, ADPM2Sampler, KarrasSchedule
|
41 |
|
42 |
+
voicelist = ['1','2','3']
|
43 |
voices = {}
|
44 |
# import phonemizer
|
45 |
# global_phonemizer = phonemizer.backend.EspeakBackend(language='en-us', preserve_punctuation=True, with_stress=True)
|
|
|
73 |
audios = []
|
74 |
for t in progress.tqdm(texts):
|
75 |
print(t)
|
76 |
+
audios.append(styletts2importable.inference(t, voices[v], alpha=0.3, beta=0.4, diffusion_steps=lngsteps, embedding_scale=1.5))
|
77 |
return (24000, np.concatenate(audios))
|
78 |
# def longsynthesize(text, voice, lngsteps, password, progress=gr.Progress()):
|
79 |
# if password == os.environ['ACCESS_CODE']:
|
|
|
165 |
return (24000, np.concatenate(audios))
|
166 |
|
167 |
|
168 |
+
# with gr.Blocks() as vctk:
|
169 |
+
# with gr.Row():
|
170 |
+
# with gr.Column(scale=1):
|
171 |
+
# clinp = gr.Textbox(label="Text", info="Enter the text | ใใญในใใๅ
ฅใใฆใใ ใใใ็ญใใใใจใฒใฉใใชใใพใ",value="ใใชใใใใชใใจใไธ็ใฏ่ฒ่คชใใฆ่ฆใใพใใใใชใใฎ็ฌ้กใ็งใฎๆฅใ
ใๆใใ็
งใใใฆใใพใใใใชใใใใชใๆฅใฏใใพใใงๅฌใฎใใใซๅฏใใๆใใงใ.", interactive=True)
|
172 |
+
# voice = gr.Dropdown(voicelist, label="Voice", info="Select a default voice.", interactive=True)
|
173 |
+
# vcsteps = gr.Slider(minimum=3, maximum=20, value=5, step=1, label="Diffusion Steps", info="You'll get more variation in the results if you increase it, doesn't necessarily improve anything.| ใใใไธใใใใใฃใจใจใขใผใทใงใใซใช้ณๅฃฐใซใชใใพใ๏ผไธใใใใใฎ้๏ผใๅขใใใใใใจใ ใใซใชใใฎใงใใๆณจๆใใ ใใ", interactive=True)
|
174 |
+
# embscale = gr.Slider(minimum=1, maximum=10, value=1.8, step=0.1, label="Embedding Scale (READ WARNING BELOW)", info="ใใใไธใใใใใฃใจใจใขใผใทใงใใซใช้ณๅฃฐใซใชใใพใ๏ผไธใใใใใฎ้๏ผใๅขใใใใใใจใ ใใซใชใใฎใงใใๆณจๆใใ ใใ", interactive=True)
|
175 |
+
# alpha = gr.Slider(minimum=0, maximum=1, value=0.3, step=0.1, label="Alpha", interactive=True)
|
176 |
+
# beta = gr.Slider(minimum=0, maximum=1, value=0.4, step=0.1, label="Beta", interactive=True)
|
177 |
+
# with gr.Column(scale=1):
|
178 |
+
# clbtn = gr.Button("Synthesize", variant="primary")
|
179 |
+
# claudio = gr.Audio(interactive=False, label="Synthesized Audio", waveform_options={'waveform_progress_color': '#3C82F6'})
|
180 |
+
# clbtn.click(clsynthesize, inputs=[clinp, voice, vcsteps, embscale, alpha, beta], outputs=[claudio], concurrency_limit=4)
|
181 |
with gr.Blocks() as vctk:
|
182 |
with gr.Row():
|
183 |
with gr.Column(scale=1):
|
184 |
+
inp = gr.Textbox(label="Text", info="Enter the text | ใใญในใใๅ
ฅใใฆใใ ใใใ็ญใใใใจใฒใฉใใชใใพใ.", value="ใใชใใใใชใใจใไธ็ใฏ่ฒ่คชใใฆ่ฆใใพใใใใชใใฎ็ฌ้กใ็งใฎๆฅใ
ใๆใใ็
งใใใฆใใพใใใใชใใใใชใๆฅใฏใใพใใงๅฌใฎใใใซๅฏใใๆใใงใ.", interactive=True)
|
185 |
+
voice = gr.Dropdown(voicelist, label="Voice", info="Select a default voice.", value='m-us-2', interactive=True)
|
186 |
+
multispeakersteps = gr.Slider(minimum=3, maximum=15, value=3, step=1, label="Diffusion Steps", interactive=True)
|
187 |
+
# use_gruut = gr.Checkbox(label="Use alternate phonemizer (Gruut) - Experimental")
|
|
|
|
|
188 |
with gr.Column(scale=1):
|
189 |
+
btn = gr.Button("Synthesize", variant="primary")
|
190 |
+
audio = gr.Audio(interactive=False, label="Synthesized Audio", waveform_options={'waveform_progress_color': '#3C82F6'})
|
191 |
+
btn.click(synthesize, inputs=[inp, voice, multispeakersteps], outputs=[audio], concurrency_limit=4)
|
192 |
+
|
193 |
+
|
194 |
+
|
195 |
+
|
196 |
+
|
197 |
# with gr.Blocks() as clone:
|
198 |
# with gr.Row():
|
199 |
# with gr.Column(scale=1):
|