moretts

Sleeping

App Files Files Community

Gregniuki commited on Nov 28, 2024

Commit

89a95a4

verified ·

1 Parent(s): cf18133

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -11

app.py CHANGED Viewed

@@ -21,7 +21,7 @@ from model.utils import (
 from infer.utils_infer import (
     load_vocoder,
     load_model,
-    preprocess_ref_audio_text,
     infer_process,
     remove_silence_edges,
     remove_silence_for_generated_wav,
@@ -136,17 +136,17 @@ E2TTS_ema_model2 = load_custom(
     "hf://Gregniuki/F5-tts_English_German_Polish/Polish/model_500000.pt", "", F5TTS_model_cfg
 )
-import re
 def chunk_text(text, max_chars=100):
     """
-    Splits the input text into chunks, each with a maximum number of characters
-    (but splits only after a space if the chunk exceeds the character limit).
     Args:
         text (str): The text to be split.
         max_chars (int): The maximum number of characters per chunk.
     Returns:
         List[str]: A list of text chunks.
     """
@@ -157,15 +157,23 @@ def chunk_text(text, max_chars=100):
     sentences = re.split(r"(?<=[;:,.!?])\s+|(?<=[；：，。！？])", text)
     for sentence in sentences:
-        # Check if adding this sentence to the current chunk will exceed the max_chars
         if len(current_chunk) + len(sentence) + 1 <= max_chars:  # +1 for the space
             current_chunk += sentence + " "
         else:
             if current_chunk:
-                # Add the current chunk to the list and reset it
-                chunks.append(current_chunk.strip())
-            # Start a new chunk with the current sentence
-            current_chunk = sentence + " "
     # Append any remaining text in current_chunk to chunks
     if current_chunk:
@@ -174,6 +182,7 @@ def chunk_text(text, max_chars=100):
     return chunks
 @gpu_decorator
 def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence, cross_fade_duration=0.15, progress=gr.Progress()):
     if exp_name == "English":

 from infer.utils_infer import (
     load_vocoder,
     load_model,
+#    preprocess_ref_audio_text,
     infer_process,
     remove_silence_edges,
     remove_silence_for_generated_wav,
     "hf://Gregniuki/F5-tts_English_German_Polish/Polish/model_500000.pt", "", F5TTS_model_cfg
 )
 def chunk_text(text, max_chars=100):
     """
+    Splits the input text into chunks, each with a maximum number of characters.
+    If a chunk exceeds the character limit, it will split at a space after the limit is exceeded,
+    but only if no punctuation mark is present at the split point.
     Args:
         text (str): The text to be split.
         max_chars (int): The maximum number of characters per chunk.
     Returns:
         List[str]: A list of text chunks.
     """
     sentences = re.split(r"(?<=[;:,.!?])\s+|(?<=[；：，。！？])", text)
     for sentence in sentences:
+        # Check if adding this sentence exceeds the max_chars limit
         if len(current_chunk) + len(sentence) + 1 <= max_chars:  # +1 for the space
             current_chunk += sentence + " "
         else:
+            # If the chunk exceeds max_chars and no punctuation at the end, split at the last space
             if current_chunk:
+                # Find the last space in the current chunk and split there
+                split_index = current_chunk.rfind(" ")
+                if split_index != -1:
+                    chunks.append(current_chunk[:split_index].strip())
+                    current_chunk = current_chunk[split_index:].strip() + sentence
+                else:
+                    # If no space is found (unusual case), append the chunk as is
+                    chunks.append(current_chunk.strip())
+            else:
+                # If no chunk is being built, just append the sentence
+                current_chunk = sentence + " "
     # Append any remaining text in current_chunk to chunks
     if current_chunk:
     return chunks
 @gpu_decorator
 def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence, cross_fade_duration=0.15, progress=gr.Progress()):
     if exp_name == "English":