Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -557,131 +557,131 @@ second_of_silence = AudioSegment.silent() # use default
|
|
557 |
second_of_silence.export("sil.wav", format='wav')
|
558 |
|
559 |
|
560 |
-
|
561 |
-
#
|
562 |
-
|
563 |
|
564 |
-
|
565 |
-
|
566 |
|
567 |
-
|
568 |
-
|
569 |
-
|
570 |
-
|
571 |
-
|
572 |
|
573 |
-
|
574 |
|
575 |
-
|
576 |
-
|
577 |
-
|
578 |
-
#
|
579 |
-
|
580 |
|
581 |
|
582 |
-
#
|
583 |
-
|
584 |
|
585 |
-
|
586 |
|
587 |
-
|
588 |
-
|
589 |
-
|
590 |
|
591 |
-
#
|
592 |
-
#
|
593 |
-
|
594 |
-
#
|
595 |
-
|
596 |
-
|
597 |
|
598 |
-
|
599 |
|
600 |
-
|
601 |
-
|
602 |
-
|
603 |
-
|
604 |
-
|
605 |
-
|
606 |
-
|
607 |
-
|
608 |
-
|
609 |
|
610 |
-
#
|
611 |
-
|
612 |
-
#
|
613 |
-
|
614 |
-
|
615 |
|
616 |
|
617 |
-
|
618 |
-
|
619 |
-
|
620 |
-
#
|
621 |
-
|
622 |
-
|
623 |
-
#
|
624 |
-
#
|
625 |
-
#
|
626 |
-
|
627 |
-
|
628 |
|
629 |
-
|
630 |
|
631 |
-
|
632 |
-
|
633 |
-
#
|
634 |
-
|
635 |
|
636 |
-
#
|
637 |
-
|
638 |
-
|
639 |
-
|
640 |
-
|
641 |
-
#
|
642 |
-
|
643 |
|
644 |
-
#
|
645 |
-
#
|
646 |
-
|
647 |
-
|
648 |
-
|
649 |
-
|
650 |
-
|
651 |
-
|
652 |
-
|
653 |
-
|
654 |
-
|
655 |
-
#
|
656 |
-
|
657 |
-
|
658 |
-
|
659 |
-
|
660 |
-
|
661 |
-
|
662 |
-
|
663 |
-
#
|
664 |
-
|
665 |
-
|
666 |
-
|
667 |
-
|
668 |
-
|
669 |
-
#
|
670 |
-
|
671 |
-
|
672 |
-
|
673 |
-
|
674 |
-
|
675 |
-
|
676 |
-
|
677 |
-
#
|
678 |
-
|
679 |
-
|
680 |
-
|
681 |
-
|
682 |
-
|
683 |
-
|
684 |
-
|
685 |
|
686 |
|
687 |
latent_map = {}
|
|
|
557 |
second_of_silence.export("sil.wav", format='wav')
|
558 |
|
559 |
|
560 |
+
def generate_speech(history,chatbot_role):
|
561 |
+
# Must set autoplay to True first
|
562 |
+
yield (history, chatbot_role, "", wave_header_chunk() )
|
563 |
|
564 |
+
first_sentence=True
|
565 |
+
language="autodetect" # will predict from first sentence
|
566 |
|
567 |
+
for sentence, history in get_sentence(history,chatbot_role):
|
568 |
+
if sentence != "":
|
569 |
+
if first_sentence:
|
570 |
+
language = detect_language(sentence)
|
571 |
+
first_sentence=False
|
572 |
|
573 |
+
print("BG: inserting sentence to queue")
|
574 |
|
575 |
+
generated_speech = generate_speech_for_sentence(history, chatbot_role, sentence,return_as_byte=True,language=language)
|
576 |
+
if generated_speech is not None:
|
577 |
+
_, audio_dict = generated_speech
|
578 |
+
# We are using byte streaming
|
579 |
+
yield (history, chatbot_role, sentence, audio_dict["value"] )
|
580 |
|
581 |
|
582 |
+
# will generate speech audio file per sentence
|
583 |
+
def generate_speech_for_sentence(history, chatbot_role, sentence, return_as_byte=True, language="autodetect"):
|
584 |
|
585 |
+
wav_bytestream = b""
|
586 |
|
587 |
+
if len(sentence)==0:
|
588 |
+
print("EMPTY SENTENCE")
|
589 |
+
return
|
590 |
|
591 |
+
# Sometimes prompt </s> coming on output remove it
|
592 |
+
# Some post process for speech only
|
593 |
+
sentence = sentence.replace("</s>", "")
|
594 |
+
# remove code from speech
|
595 |
+
sentence = re.sub("```.*```", "", sentence, flags=re.DOTALL)
|
596 |
+
sentence = re.sub("`.*`", "", sentence, flags=re.DOTALL)
|
597 |
|
598 |
+
sentence = re.sub("\(.*\)", "", sentence, flags=re.DOTALL)
|
599 |
|
600 |
+
sentence = sentence.replace("```", "")
|
601 |
+
sentence = sentence.replace("...", " ")
|
602 |
+
sentence = sentence.replace("(", " ")
|
603 |
+
sentence = sentence.replace(")", " ")
|
604 |
+
sentence = sentence.replace("<|assistant|>","")
|
605 |
+
|
606 |
+
if len(sentence)==0:
|
607 |
+
print("EMPTY SENTENCE after processing")
|
608 |
+
return
|
609 |
|
610 |
+
# A fast fix for last chacter, may produce weird sounds if it is with text
|
611 |
+
if (sentence[-1] in ["!", "?", ".", ","]) or (sentence[-2] in ["!", "?", ".", ","]):
|
612 |
+
# just add a space
|
613 |
+
sentence = sentence[:-1] + " " + sentence[-1]
|
614 |
+
print("Sentence for speech:", sentence)
|
615 |
|
616 |
|
617 |
+
try:
|
618 |
+
SENTENCE_SPLIT_LENGTH=350
|
619 |
+
if len(sentence)<SENTENCE_SPLIT_LENGTH:
|
620 |
+
# no problem continue on
|
621 |
+
sentence_list = [sentence]
|
622 |
+
else:
|
623 |
+
# Until now nltk likely split sentences properly but we need additional
|
624 |
+
# check for longer sentence and split at last possible position
|
625 |
+
# Do whatever necessary, first break at hypens then spaces and then even split very long words
|
626 |
+
sentence_list=textwrap.wrap(sentence,SENTENCE_SPLIT_LENGTH)
|
627 |
+
print("SPLITTED LONG SENTENCE:",sentence_list)
|
628 |
|
629 |
+
for sentence in sentence_list:
|
630 |
|
631 |
+
if any(c.isalnum() for c in sentence):
|
632 |
+
if language=="autodetect":
|
633 |
+
#on first call autodetect, nexts sentence calls will use same language
|
634 |
+
language = detect_language(sentence)
|
635 |
|
636 |
+
#exists at least 1 alphanumeric (utf-8)
|
637 |
+
audio_stream = get_voice_streaming(
|
638 |
+
sentence, language, latent_map[chatbot_role]
|
639 |
+
)
|
640 |
+
else:
|
641 |
+
# likely got a ' or " or some other text without alphanumeric in it
|
642 |
+
audio_stream = None
|
643 |
|
644 |
+
# XTTS is actually using streaming response but we are playing audio by sentence
|
645 |
+
# If you want direct XTTS voice streaming (send each chunk to voice ) you may set DIRECT_STREAM=1 environment variable
|
646 |
+
if audio_stream is not None:
|
647 |
+
wav_chunks = wave_header_chunk()
|
648 |
+
frame_length = 0
|
649 |
+
for chunk in audio_stream:
|
650 |
+
try:
|
651 |
+
wav_bytestream += chunk
|
652 |
+
wav_chunks += chunk
|
653 |
+
frame_length += len(chunk)
|
654 |
+
except:
|
655 |
+
# hack to continue on playing. sometimes last chunk is empty , will be fixed on next TTS
|
656 |
+
continue
|
657 |
+
|
658 |
+
if audio_stream is not None:
|
659 |
+
if not return_as_byte:
|
660 |
+
audio_unique_filename = "/tmp/"+ str(uuid.uuid4())+".wav"
|
661 |
+
with open(audio_unique_filename, "wb") as f:
|
662 |
+
f.write(wav_chunks)
|
663 |
+
#Will write filename to context variable
|
664 |
+
return (history , gr.Audio.update(value=audio_unique_filename, autoplay=True))
|
665 |
+
else:
|
666 |
+
return (history , gr.Audio.update(value=wav_bytestream, autoplay=True))
|
667 |
+
except RuntimeError as e:
|
668 |
+
if "device-side assert" in str(e):
|
669 |
+
# cannot do anything on cuda device side error, need tor estart
|
670 |
+
print(
|
671 |
+
f"Exit due to: Unrecoverable exception caused by prompt:{sentence}",
|
672 |
+
flush=True,
|
673 |
+
)
|
674 |
+
gr.Warning("Unhandled Exception encounter, please retry in a minute")
|
675 |
+
print("Cuda device-assert Runtime encountered need restart")
|
676 |
+
|
677 |
+
# HF Space specific.. This error is unrecoverable need to restart space
|
678 |
+
api.restart_space(repo_id=repo_id)
|
679 |
+
else:
|
680 |
+
print("RuntimeError: non device-side assert error:", str(e))
|
681 |
+
raise e
|
682 |
+
|
683 |
+
print("All speech ended")
|
684 |
+
return
|
685 |
|
686 |
|
687 |
latent_map = {}
|