Spaces:
Running
Running
add wav/ogg option
Browse files
app.py
CHANGED
@@ -134,7 +134,7 @@ def load_checkpoints():
|
|
134 |
|
135 |
return model, hps, net_g_vctk, hps_vctk
|
136 |
|
137 |
-
def inference(text, language, noise_scale, noise_scale_w, length_scale, voice):
|
138 |
if len(text.strip())==0:
|
139 |
return []
|
140 |
language = language.split()[0]
|
@@ -145,7 +145,6 @@ def inference(text, language, noise_scale, noise_scale_w, length_scale, voice):
|
|
145 |
result = generate_voice(lojban2ipa(text,'nix'), current+"/pretrained/nix-tts/nix-ljspeech-v0.1")
|
146 |
elif voice == 'Nix-Stochastic' and language == 'jbo':
|
147 |
result = generate_voice(lojban2ipa(text,'nix'), current+"/pretrained/nix-tts/nix-ljspeech-sdp-v0.1")
|
148 |
-
result = [result[0], wav2ogg(result[1][1], result[1][0], text, language)]
|
149 |
elif voice == 'LJS':
|
150 |
ipa_text, stn_tst = get_text(text, language, hps, mode="VITS")
|
151 |
with torch.no_grad():
|
@@ -163,6 +162,9 @@ def inference(text, language, noise_scale, noise_scale_w, length_scale, voice):
|
|
163 |
audio = model_vctk.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=noise_scale,
|
164 |
noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0, 0].data.cpu().float().numpy()
|
165 |
result = [ipa_text, (hps_vctk.data.sampling_rate, float2pcm(audio))]
|
|
|
|
|
|
|
166 |
return result
|
167 |
|
168 |
# download_pretrained()
|
@@ -175,7 +177,7 @@ defaults = {
|
|
175 |
"noise_scale_w": .8,
|
176 |
"speed": 1.8,
|
177 |
"voice": "LJS",
|
178 |
-
"example": ["", "Lojban", 0.667, 0.8, 1.8,"LJS"]
|
179 |
}
|
180 |
|
181 |
inputs = []
|
@@ -227,6 +229,8 @@ with gr.Blocks(css=css) as demo:
|
|
227 |
ipa_block = gr.Textbox(label="International Phonetic Alphabet")
|
228 |
audio = gr.Audio(type="numpy", label="Output audio")
|
229 |
outputs = [ ipa_block, audio ]
|
|
|
|
|
230 |
btn = gr.Button("Vocalize")
|
231 |
btn.click(fn=inference, inputs=inputs, outputs=outputs, api_name="cupra")
|
232 |
|
|
|
134 |
|
135 |
return model, hps, net_g_vctk, hps_vctk
|
136 |
|
137 |
+
def inference(text, language, noise_scale, noise_scale_w, length_scale, voice, file_format):
|
138 |
if len(text.strip())==0:
|
139 |
return []
|
140 |
language = language.split()[0]
|
|
|
145 |
result = generate_voice(lojban2ipa(text,'nix'), current+"/pretrained/nix-tts/nix-ljspeech-v0.1")
|
146 |
elif voice == 'Nix-Stochastic' and language == 'jbo':
|
147 |
result = generate_voice(lojban2ipa(text,'nix'), current+"/pretrained/nix-tts/nix-ljspeech-sdp-v0.1")
|
|
|
148 |
elif voice == 'LJS':
|
149 |
ipa_text, stn_tst = get_text(text, language, hps, mode="VITS")
|
150 |
with torch.no_grad():
|
|
|
162 |
audio = model_vctk.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=noise_scale,
|
163 |
noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0, 0].data.cpu().float().numpy()
|
164 |
result = [ipa_text, (hps_vctk.data.sampling_rate, float2pcm(audio))]
|
165 |
+
if file_format == 'ogg':
|
166 |
+
result = [result[0], wav2ogg(result[1][1], result[1][0], text, language)]
|
167 |
+
|
168 |
return result
|
169 |
|
170 |
# download_pretrained()
|
|
|
177 |
"noise_scale_w": .8,
|
178 |
"speed": 1.8,
|
179 |
"voice": "LJS",
|
180 |
+
"example": ["", "Lojban", 0.667, 0.8, 1.8,"LJS","wav"]
|
181 |
}
|
182 |
|
183 |
inputs = []
|
|
|
229 |
ipa_block = gr.Textbox(label="International Phonetic Alphabet")
|
230 |
audio = gr.Audio(type="numpy", label="Output audio")
|
231 |
outputs = [ ipa_block, audio ]
|
232 |
+
file_format = gr.Radio(["wav", "ogg"], value="wav", label="File format")
|
233 |
+
inputs.append(file_format)
|
234 |
btn = gr.Button("Vocalize")
|
235 |
btn.click(fn=inference, inputs=inputs, outputs=outputs, api_name="cupra")
|
236 |
|
dev.sh
CHANGED
@@ -8,6 +8,7 @@ docker rm -f jboselvoha 2> /dev/null
|
|
8 |
# -p 7860:7860 \
|
9 |
# jboselvoha
|
10 |
docker run -d -it --name jboselvoha \
|
|
|
11 |
-v $(pwd)/libs:/home/user/app/libs:Z \
|
12 |
-v $(pwd)/assets:/home/user/app/assets:Z \
|
13 |
-v $(pwd)/pretrained/nix-tts:/home/user/app/pretrained/nix-tts/:Z \
|
|
|
8 |
# -p 7860:7860 \
|
9 |
# jboselvoha
|
10 |
docker run -d -it --name jboselvoha \
|
11 |
+
-v $(pwd)/lfs:/home/user/app/lfs:Z \
|
12 |
-v $(pwd)/libs:/home/user/app/libs:Z \
|
13 |
-v $(pwd)/assets:/home/user/app/assets:Z \
|
14 |
-v $(pwd)/pretrained/nix-tts:/home/user/app/pretrained/nix-tts/:Z \
|
prod.sh
DELETED
@@ -1,10 +0,0 @@
|
|
1 |
-
docker kill jboselvoha 2> /dev/null
|
2 |
-
docker rm -f jboselvoha 2> /dev/null
|
3 |
-
docker run -it --name jboselvoha \
|
4 |
-
-v $(pwd)/lfs:/home/user/app/lfs/:Z \
|
5 |
-
-v $(pwd)/app.py:/home/user/app/app.py:Z \
|
6 |
-
-v $(pwd)/lojban/lojban.py:/home/user/app/lojban/lojban.py:Z \
|
7 |
-
-v $(pwd)/vits:/home/user/app/vits:Z \
|
8 |
-
-v $(pwd)/nix-tts:/home/user/app/nix-tts:Z \
|
9 |
-
-p 7860:7860 \
|
10 |
-
jboselvoha
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|