ggoknar commited on
Commit
3f2e1a8
·
1 Parent(s): bd470e7

limit speech to 250 characters for now

Browse files
Files changed (1) hide show
  1. app.py +49 -36
app.py CHANGED
@@ -399,7 +399,13 @@ def generate_speech(history):
399
  for sentence, history in get_sentence(history):
400
  print(sentence)
401
  # Sometimes prompt </s> coming on output remove it
 
402
  sentence = sentence.replace("</s>", "")
 
 
 
 
 
403
  # A fast fix for last chacter, may produce weird sounds if it is with text
404
  if sentence[-1] in ["!", "?", ".", ","]:
405
  # just add a space
@@ -410,49 +416,56 @@ def generate_speech(history):
410
  # generate speech using precomputed latents
411
  # This is not streaming but it will be fast
412
  # wav = get_voice(sentence,language, latent_map["Female_Voice"], suffix=len(wav_list))
413
- audio_stream = get_voice_streaming(
414
- sentence, language, latent_map["Female_Voice"]
415
- )
416
- wav_chunks = wave_header_chunk()
417
- frame_length = 0
418
- for chunk in audio_stream:
419
- try:
420
- wav_bytestream += chunk
421
- if DIRECT_STREAM:
422
- yield (
423
- gr.Audio.update(
424
- value=wave_header_chunk() + chunk, autoplay=True
425
- ),
426
- history,
427
- )
428
- wait_time = len(chunk) / 2 / 24000
429
- wait_time = AUDIO_WAIT_MODIFIER * wait_time
430
- print("Sleeping till chunk end")
431
- time.sleep(wait_time)
432
-
433
- else:
434
- wav_chunks += chunk
435
- frame_length += len(chunk)
436
- except:
437
- # hack to continue on playing. sometimes last chunk is empty , will be fixed on next TTS
438
- continue
 
 
 
 
 
 
439
 
440
  if not DIRECT_STREAM:
441
  yield (
442
  gr.Audio.update(value=None, autoplay=True),
443
  history,
444
  ) # hack to switch autoplay
445
- yield (gr.Audio.update(value=wav_chunks, autoplay=True), history)
446
- # Streaming wait time calculation
447
- # audio_length = frame_length / sample_width/ frame_rate
448
- wait_time = frame_length / 2 / 24000
 
449
 
450
- # for non streaming
451
- # wait_time= librosa.get_duration(path=wav)
452
 
453
- wait_time = AUDIO_WAIT_MODIFIER * wait_time
454
- print("Sleeping till audio end")
455
- time.sleep(wait_time)
456
 
457
  except RuntimeError as e:
458
  if "device-side assert" in str(e):
@@ -480,7 +493,7 @@ def generate_speech(history):
480
  # yield (combined_file_name, history
481
 
482
  wav_bytestream = wave_header_chunk() + wav_bytestream
483
- time.sleep(0.5)
484
  yield (gr.Audio.update(value=None, autoplay=False), history)
485
  yield (gr.Audio.update(value=wav_bytestream, autoplay=False), history)
486
 
 
399
  for sentence, history in get_sentence(history):
400
  print(sentence)
401
  # Sometimes prompt </s> coming on output remove it
402
+ # Some post process for speech only
403
  sentence = sentence.replace("</s>", "")
404
+ sentence = sentence.replace("```", "")
405
+ sentence = sentence.replace("```", "")
406
+ sentence = sentence.replace("(", " ")
407
+ sentence = sentence.replace(")", " ")
408
+
409
  # A fast fix for last chacter, may produce weird sounds if it is with text
410
  if sentence[-1] in ["!", "?", ".", ","]:
411
  # just add a space
 
416
  # generate speech using precomputed latents
417
  # This is not streaming but it will be fast
418
  # wav = get_voice(sentence,language, latent_map["Female_Voice"], suffix=len(wav_list))
419
+ if len(sentence) > 250:
420
+ # should not generate voice it will hit token limit
421
+ # It should not generate audio for it
422
+ audio_stream = None
423
+ else:
424
+ audio_stream = get_voice_streaming(
425
+ sentence, language, latent_map["Female_Voice"]
426
+ )
427
+ if audio_stream is not None:
428
+ wav_chunks = wave_header_chunk()
429
+ frame_length = 0
430
+ for chunk in audio_stream:
431
+ try:
432
+ wav_bytestream += chunk
433
+ if DIRECT_STREAM:
434
+ yield (
435
+ gr.Audio.update(
436
+ value=wave_header_chunk() + chunk, autoplay=True
437
+ ),
438
+ history,
439
+ )
440
+ wait_time = len(chunk) / 2 / 24000
441
+ wait_time = AUDIO_WAIT_MODIFIER * wait_time
442
+ print("Sleeping till chunk end")
443
+ time.sleep(wait_time)
444
+
445
+ else:
446
+ wav_chunks += chunk
447
+ frame_length += len(chunk)
448
+ except:
449
+ # hack to continue on playing. sometimes last chunk is empty , will be fixed on next TTS
450
+ continue
451
 
452
  if not DIRECT_STREAM:
453
  yield (
454
  gr.Audio.update(value=None, autoplay=True),
455
  history,
456
  ) # hack to switch autoplay
457
+ if audio_stream is not None:
458
+ yield (gr.Audio.update(value=wav_chunks, autoplay=True), history)
459
+ # Streaming wait time calculation
460
+ # audio_length = frame_length / sample_width/ frame_rate
461
+ wait_time = frame_length / 2 / 24000
462
 
463
+ # for non streaming
464
+ # wait_time= librosa.get_duration(path=wav)
465
 
466
+ wait_time = AUDIO_WAIT_MODIFIER * wait_time
467
+ print("Sleeping till audio end")
468
+ time.sleep(wait_time)
469
 
470
  except RuntimeError as e:
471
  if "device-side assert" in str(e):
 
493
  # yield (combined_file_name, history
494
 
495
  wav_bytestream = wave_header_chunk() + wav_bytestream
496
+ time.sleep(0.7)
497
  yield (gr.Audio.update(value=None, autoplay=False), history)
498
  yield (gr.Audio.update(value=wav_bytestream, autoplay=False), history)
499