lojban commited on
Commit
11efcdb
1 Parent(s): e85d807

add wav/ogg option

Browse files
Files changed (3) hide show
  1. app.py +7 -3
  2. dev.sh +1 -0
  3. prod.sh +0 -10
app.py CHANGED
@@ -134,7 +134,7 @@ def load_checkpoints():
134
 
135
  return model, hps, net_g_vctk, hps_vctk
136
 
137
- def inference(text, language, noise_scale, noise_scale_w, length_scale, voice):
138
  if len(text.strip())==0:
139
  return []
140
  language = language.split()[0]
@@ -145,7 +145,6 @@ def inference(text, language, noise_scale, noise_scale_w, length_scale, voice):
145
  result = generate_voice(lojban2ipa(text,'nix'), current+"/pretrained/nix-tts/nix-ljspeech-v0.1")
146
  elif voice == 'Nix-Stochastic' and language == 'jbo':
147
  result = generate_voice(lojban2ipa(text,'nix'), current+"/pretrained/nix-tts/nix-ljspeech-sdp-v0.1")
148
- result = [result[0], wav2ogg(result[1][1], result[1][0], text, language)]
149
  elif voice == 'LJS':
150
  ipa_text, stn_tst = get_text(text, language, hps, mode="VITS")
151
  with torch.no_grad():
@@ -163,6 +162,9 @@ def inference(text, language, noise_scale, noise_scale_w, length_scale, voice):
163
  audio = model_vctk.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=noise_scale,
164
  noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0, 0].data.cpu().float().numpy()
165
  result = [ipa_text, (hps_vctk.data.sampling_rate, float2pcm(audio))]
 
 
 
166
  return result
167
 
168
  # download_pretrained()
@@ -175,7 +177,7 @@ defaults = {
175
  "noise_scale_w": .8,
176
  "speed": 1.8,
177
  "voice": "LJS",
178
- "example": ["", "Lojban", 0.667, 0.8, 1.8,"LJS"]
179
  }
180
 
181
  inputs = []
@@ -227,6 +229,8 @@ with gr.Blocks(css=css) as demo:
227
  ipa_block = gr.Textbox(label="International Phonetic Alphabet")
228
  audio = gr.Audio(type="numpy", label="Output audio")
229
  outputs = [ ipa_block, audio ]
 
 
230
  btn = gr.Button("Vocalize")
231
  btn.click(fn=inference, inputs=inputs, outputs=outputs, api_name="cupra")
232
 
 
134
 
135
  return model, hps, net_g_vctk, hps_vctk
136
 
137
+ def inference(text, language, noise_scale, noise_scale_w, length_scale, voice, file_format):
138
  if len(text.strip())==0:
139
  return []
140
  language = language.split()[0]
 
145
  result = generate_voice(lojban2ipa(text,'nix'), current+"/pretrained/nix-tts/nix-ljspeech-v0.1")
146
  elif voice == 'Nix-Stochastic' and language == 'jbo':
147
  result = generate_voice(lojban2ipa(text,'nix'), current+"/pretrained/nix-tts/nix-ljspeech-sdp-v0.1")
 
148
  elif voice == 'LJS':
149
  ipa_text, stn_tst = get_text(text, language, hps, mode="VITS")
150
  with torch.no_grad():
 
162
  audio = model_vctk.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=noise_scale,
163
  noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0, 0].data.cpu().float().numpy()
164
  result = [ipa_text, (hps_vctk.data.sampling_rate, float2pcm(audio))]
165
+ if file_format == 'ogg':
166
+ result = [result[0], wav2ogg(result[1][1], result[1][0], text, language)]
167
+
168
  return result
169
 
170
  # download_pretrained()
 
177
  "noise_scale_w": .8,
178
  "speed": 1.8,
179
  "voice": "LJS",
180
+ "example": ["", "Lojban", 0.667, 0.8, 1.8,"LJS","wav"]
181
  }
182
 
183
  inputs = []
 
229
  ipa_block = gr.Textbox(label="International Phonetic Alphabet")
230
  audio = gr.Audio(type="numpy", label="Output audio")
231
  outputs = [ ipa_block, audio ]
232
+ file_format = gr.Radio(["wav", "ogg"], value="wav", label="File format")
233
+ inputs.append(file_format)
234
  btn = gr.Button("Vocalize")
235
  btn.click(fn=inference, inputs=inputs, outputs=outputs, api_name="cupra")
236
 
dev.sh CHANGED
@@ -8,6 +8,7 @@ docker rm -f jboselvoha 2> /dev/null
8
  # -p 7860:7860 \
9
  # jboselvoha
10
  docker run -d -it --name jboselvoha \
 
11
  -v $(pwd)/libs:/home/user/app/libs:Z \
12
  -v $(pwd)/assets:/home/user/app/assets:Z \
13
  -v $(pwd)/pretrained/nix-tts:/home/user/app/pretrained/nix-tts/:Z \
 
8
  # -p 7860:7860 \
9
  # jboselvoha
10
  docker run -d -it --name jboselvoha \
11
+ -v $(pwd)/lfs:/home/user/app/lfs:Z \
12
  -v $(pwd)/libs:/home/user/app/libs:Z \
13
  -v $(pwd)/assets:/home/user/app/assets:Z \
14
  -v $(pwd)/pretrained/nix-tts:/home/user/app/pretrained/nix-tts/:Z \
prod.sh DELETED
@@ -1,10 +0,0 @@
1
- docker kill jboselvoha 2> /dev/null
2
- docker rm -f jboselvoha 2> /dev/null
3
- docker run -it --name jboselvoha \
4
- -v $(pwd)/lfs:/home/user/app/lfs/:Z \
5
- -v $(pwd)/app.py:/home/user/app/app.py:Z \
6
- -v $(pwd)/lojban/lojban.py:/home/user/app/lojban/lojban.py:Z \
7
- -v $(pwd)/vits:/home/user/app/vits:Z \
8
- -v $(pwd)/nix-tts:/home/user/app/nix-tts:Z \
9
- -p 7860:7860 \
10
- jboselvoha