Spaces:

fffiloni
/

instant-TTS-Bark-cloning

Paused

App Files Files Community

fffiloni commited on Sep 8, 2023

Commit

89f999f

•

1 Parent(s): a8a3c1e

limit max sentences allowed to infer

Browse files

Files changed (1) hide show

app.py +22 -3

app.py CHANGED Viewed

@@ -10,6 +10,7 @@ from scipy.io.wavfile import write, read
 from pydub import AudioSegment
 file_upload_available = os.environ.get("ALLOW_FILE_UPLOAD")
 import json
 with open("characters.json", "r") as file:
@@ -151,8 +152,17 @@ def infer(prompt, input_wav_file, clean_audio, hidden_numpy_audio):
     # Move the WAV file to the new directory
     shutil.move(source_path, os.path.join(destination_path, f"{file_name}.wav"))
-    tts.tts_to_file(text=prompt,
                 file_path="output.wav",
                 voice_dir="bark_voices/",
                 speaker=f"{file_name}")
@@ -169,8 +179,17 @@ def infer(prompt, input_wav_file, clean_audio, hidden_numpy_audio):
     return "output.wav", tts_video, gr.update(value=f"bark_voices/{file_name}/{contents[1]}", visible=True), gr.Group.update(visible=True), destination_path
 def infer_from_c(prompt, c_name):
-    tts.tts_to_file(text=prompt,
                 file_path="output.wav",
                 voice_dir="examples/library/",
                 speaker=f"{c_name}")
@@ -303,7 +322,7 @@ with gr.Blocks(css=css) as demo:
             with gr.Column():
                 prompt = gr.Textbox(
                     label = "Text to speech prompt",
-                    info = "One or two sentences at a time is better*",
                     placeholder = "Hello friend! How are you today?",
                     elem_id = "tts-prompt"
                 )

 from pydub import AudioSegment
 file_upload_available = os.environ.get("ALLOW_FILE_UPLOAD")
+MAX_NUMBER_SENTENCES = 10
 import json
 with open("characters.json", "r") as file:
     # Move the WAV file to the new directory
     shutil.move(source_path, os.path.join(destination_path, f"{file_name}.wav"))
+    # Split the text into sentences based on common punctuation marks
+    sentences = re.split(r'(?<=[.!?])\s+', prompt)
+    # Keep only the first MAX_NUMBER_SENTENCES sentences
+    first_nb_sentences = sentences[:MAX_NUMBER_SENTENCES]
+    # Join the selected sentences back into a single string
+    limited_prompt = ' '.join(first_nb_sentences)
+    tts.tts_to_file(text=limited_prompt,
                 file_path="output.wav",
                 voice_dir="bark_voices/",
                 speaker=f"{file_name}")
     return "output.wav", tts_video, gr.update(value=f"bark_voices/{file_name}/{contents[1]}", visible=True), gr.Group.update(visible=True), destination_path
 def infer_from_c(prompt, c_name):
+    # Split the text into sentences based on common punctuation marks
+    sentences = re.split(r'(?<=[.!?])\s+', prompt)
+    # Keep only the first MAX_NUMBER_SENTENCES sentences
+    first_nb_sentences = sentences[:MAX_NUMBER_SENTENCES]
+    # Join the selected sentences back into a single string
+    limited_prompt = ' '.join(first_nb_sentences)
+    tts.tts_to_file(text=limited_prompt,
                 file_path="output.wav",
                 voice_dir="examples/library/",
                 speaker=f"{c_name}")
             with gr.Column():
                 prompt = gr.Textbox(
                     label = "Text to speech prompt",
+                    info = "One or two sentences at a time is better* (max: 10)",
                     placeholder = "Hello friend! How are you today?",
                     elem_id = "tts-prompt"
                 )