RaushanA commited on
Commit
df7ded0
·
1 Parent(s): cbb8cf8

Update app_latest.py

Browse files

remove_audio_genration

Files changed (1) hide show
  1. app_latest.py +2 -121
app_latest.py CHANGED
@@ -240,98 +240,6 @@ def tts(
240
  return info, None
241
 
242
 
243
- def tts1(
244
- model_name,
245
- speed,
246
- audio,
247
-
248
- pitch=0.5,
249
- f0_up_key=0,
250
- f0_method="rmvpe",
251
- index_rate=0,
252
- protect=0.33,
253
-
254
- filter_radius=3,
255
- resample_sr=0,
256
- rms_mix_rate=0.25,
257
-
258
- ):
259
-
260
-
261
- try:
262
- tgt_sr, net_g, vc, version, index_file, if_f0 = model_data(model_name)
263
- t0 = time.time()
264
- if speed >= 0:
265
- speed_str = f"+{speed}%"
266
- else:
267
- speed_str = f"{speed}%"
268
- # audio=separate(inp=audio, outp="voice")
269
- edge_output_filename=audio
270
- unpaudio, sr = librosa.load(edge_output_filename, sr=16000, mono=True)
271
- print(pitch)
272
- audio = librosa.effects.pitch_shift(unpaudio, sr, int(pitch))
273
- soundfile.write(edge_output_filename, audio, sr, )
274
- audio, sr = librosa.load(edge_output_filename, sr=16000, mono=True)
275
-
276
- duration = len(audio) / sr
277
- print(f"Audio duration: {duration}s")
278
- if limitation and duration >= 20:
279
- print("Error: Audio too long")
280
- return (
281
- f"Audio should be less than 20 seconds in this huggingface space, but got {duration}s.",
282
-
283
- None,
284
- )
285
-
286
- f0_up_key = int(f0_up_key)
287
-
288
- if not hubert_model:
289
- load_hubert()
290
- if f0_method == "rmvpe":
291
- vc.model_rmvpe = rmvpe_model
292
- times = [0, 0, 0]
293
- audio_opt = vc.pipeline(
294
- hubert_model,
295
- net_g,
296
- 0,
297
- audio,
298
- edge_output_filename,
299
- times,
300
- f0_up_key,
301
- f0_method,
302
- index_file,
303
- # file_big_npy,
304
- index_rate,
305
- if_f0,
306
- filter_radius,
307
- tgt_sr,
308
- resample_sr,
309
- rms_mix_rate,
310
- version,
311
- protect,
312
- None,
313
- )
314
- if tgt_sr != resample_sr >= 16000:
315
- tgt_sr = resample_sr
316
- info = f"Success."
317
- print(info)
318
- return (
319
- info,
320
-
321
- (tgt_sr, audio_opt),
322
- )
323
- except EOFError:
324
- info = (
325
- "It seems that the edge-tts output is not valid. "
326
- "This may occur when the input text and the speaker do not match. "
327
- "For example, maybe you entered Japanese (without alphabets) text but chose non-Japanese speaker?"
328
- )
329
- print(info)
330
- return info, None
331
- except:
332
- info = traceback.format_exc()
333
- print(info)
334
- return info, None
335
 
336
  initial_md = """
337
  # Hatch new voice sound
@@ -380,35 +288,8 @@ with app:
380
  ],
381
  [info_text, tts_output],
382
  )
383
- with gr.Tab("Audio"):
384
- with gr.Row():
385
- with gr.Column():
386
- speed = gr.Slider(
387
- minimum=-100,
388
- maximum=100,
389
- label="Speech speed (%)",
390
- value=0,
391
- step=10,
392
- interactive=True,
393
- )
394
- audio = gr.Audio(source="microphone", type="filepath")
395
- with gr.Column():
396
- but0 = gr.Button("Convert", variant="primary")
397
- info_text = gr.Textbox(label="Output info")
398
- with gr.Column():
399
- tts_output = gr.Audio(label="Result")
400
- but0.click(
401
- tts1,
402
- [
403
- model_name,
404
- speed,
405
- audio,
406
-
407
- ],
408
- [info_text, tts_output],
409
- )
410
-
411
 
412
 
413
 
414
- app.launch(inbrowser=True, share=True)
 
240
  return info, None
241
 
242
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
 
244
  initial_md = """
245
  # Hatch new voice sound
 
288
  ],
289
  [info_text, tts_output],
290
  )
291
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292
 
293
 
294
 
295
+ app.launch()